[ARM] Handle UNSPEC_VOLATILE in rtx costs and don't recurse inside the unspec
[official-gcc.git] / gcc / config / arm / arm.c
blobfa9543138bcc60490f4e0bbf31e10caeff58c7e5
1 /* Output routines for GCC for ARM.
2 Copyright (C) 1991-2015 Free Software Foundation, Inc.
3 Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
4 and Martin Simmons (@harleqn.co.uk).
5 More major hacks by Richard Earnshaw (rearnsha@arm.com).
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify it
10 under the terms of the GNU General Public License as published
11 by the Free Software Foundation; either version 3, or (at your
12 option) any later version.
14 GCC is distributed in the hope that it will be useful, but WITHOUT
15 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
16 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
17 License for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "hash-table.h"
27 #include "tm.h"
28 #include "rtl.h"
29 #include "hash-set.h"
30 #include "machmode.h"
31 #include "vec.h"
32 #include "double-int.h"
33 #include "input.h"
34 #include "alias.h"
35 #include "symtab.h"
36 #include "wide-int.h"
37 #include "inchash.h"
38 #include "tree.h"
39 #include "fold-const.h"
40 #include "stringpool.h"
41 #include "stor-layout.h"
42 #include "calls.h"
43 #include "varasm.h"
44 #include "obstack.h"
45 #include "regs.h"
46 #include "hard-reg-set.h"
47 #include "insn-config.h"
48 #include "conditions.h"
49 #include "output.h"
50 #include "insn-attr.h"
51 #include "flags.h"
52 #include "reload.h"
53 #include "function.h"
54 #include "hashtab.h"
55 #include "statistics.h"
56 #include "real.h"
57 #include "fixed-value.h"
58 #include "expmed.h"
59 #include "dojump.h"
60 #include "explow.h"
61 #include "emit-rtl.h"
62 #include "stmt.h"
63 #include "expr.h"
64 #include "insn-codes.h"
65 #include "optabs.h"
66 #include "diagnostic-core.h"
67 #include "recog.h"
68 #include "predict.h"
69 #include "dominance.h"
70 #include "cfg.h"
71 #include "cfgrtl.h"
72 #include "cfganal.h"
73 #include "lcm.h"
74 #include "cfgbuild.h"
75 #include "cfgcleanup.h"
76 #include "basic-block.h"
77 #include "hash-map.h"
78 #include "is-a.h"
79 #include "plugin-api.h"
80 #include "ipa-ref.h"
81 #include "cgraph.h"
82 #include "ggc.h"
83 #include "except.h"
84 #include "tm_p.h"
85 #include "target.h"
86 #include "sched-int.h"
87 #include "target-def.h"
88 #include "debug.h"
89 #include "langhooks.h"
90 #include "df.h"
91 #include "intl.h"
92 #include "libfuncs.h"
93 #include "params.h"
94 #include "opts.h"
95 #include "dumpfile.h"
96 #include "gimple-expr.h"
97 #include "builtins.h"
98 #include "tm-constrs.h"
99 #include "rtl-iter.h"
100 #include "sched-int.h"
102 /* Forward definitions of types. */
103 typedef struct minipool_node Mnode;
104 typedef struct minipool_fixup Mfix;
106 void (*arm_lang_output_object_attributes_hook)(void);
108 struct four_ints
110 int i[4];
113 /* Forward function declarations. */
114 static bool arm_const_not_ok_for_debug_p (rtx);
115 static bool arm_needs_doubleword_align (machine_mode, const_tree);
116 static int arm_compute_static_chain_stack_bytes (void);
117 static arm_stack_offsets *arm_get_frame_offsets (void);
118 static void arm_add_gc_roots (void);
119 static int arm_gen_constant (enum rtx_code, machine_mode, rtx,
120 HOST_WIDE_INT, rtx, rtx, int, int);
121 static unsigned bit_count (unsigned long);
122 static int arm_address_register_rtx_p (rtx, int);
123 static int arm_legitimate_index_p (machine_mode, rtx, RTX_CODE, int);
124 static bool is_called_in_ARM_mode (tree);
125 static int thumb2_legitimate_index_p (machine_mode, rtx, int);
126 static int thumb1_base_register_rtx_p (rtx, machine_mode, int);
127 static rtx arm_legitimize_address (rtx, rtx, machine_mode);
128 static reg_class_t arm_preferred_reload_class (rtx, reg_class_t);
129 static rtx thumb_legitimize_address (rtx, rtx, machine_mode);
130 inline static int thumb1_index_register_rtx_p (rtx, int);
131 static int thumb_far_jump_used_p (void);
132 static bool thumb_force_lr_save (void);
133 static unsigned arm_size_return_regs (void);
134 static bool arm_assemble_integer (rtx, unsigned int, int);
135 static void arm_print_operand (FILE *, rtx, int);
136 static void arm_print_operand_address (FILE *, rtx);
137 static bool arm_print_operand_punct_valid_p (unsigned char code);
138 static const char *fp_const_from_val (REAL_VALUE_TYPE *);
139 static arm_cc get_arm_condition_code (rtx);
140 static HOST_WIDE_INT int_log2 (HOST_WIDE_INT);
141 static const char *output_multi_immediate (rtx *, const char *, const char *,
142 int, HOST_WIDE_INT);
143 static const char *shift_op (rtx, HOST_WIDE_INT *);
144 static struct machine_function *arm_init_machine_status (void);
145 static void thumb_exit (FILE *, int);
146 static HOST_WIDE_INT get_jump_table_size (rtx_jump_table_data *);
147 static Mnode *move_minipool_fix_forward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
148 static Mnode *add_minipool_forward_ref (Mfix *);
149 static Mnode *move_minipool_fix_backward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
150 static Mnode *add_minipool_backward_ref (Mfix *);
151 static void assign_minipool_offsets (Mfix *);
152 static void arm_print_value (FILE *, rtx);
153 static void dump_minipool (rtx_insn *);
154 static int arm_barrier_cost (rtx_insn *);
155 static Mfix *create_fix_barrier (Mfix *, HOST_WIDE_INT);
156 static void push_minipool_barrier (rtx_insn *, HOST_WIDE_INT);
157 static void push_minipool_fix (rtx_insn *, HOST_WIDE_INT, rtx *,
158 machine_mode, rtx);
159 static void arm_reorg (void);
160 static void note_invalid_constants (rtx_insn *, HOST_WIDE_INT, int);
161 static unsigned long arm_compute_save_reg0_reg12_mask (void);
162 static unsigned long arm_compute_save_reg_mask (void);
163 static unsigned long arm_isr_value (tree);
164 static unsigned long arm_compute_func_type (void);
165 static tree arm_handle_fndecl_attribute (tree *, tree, tree, int, bool *);
166 static tree arm_handle_pcs_attribute (tree *, tree, tree, int, bool *);
167 static tree arm_handle_isr_attribute (tree *, tree, tree, int, bool *);
168 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
169 static tree arm_handle_notshared_attribute (tree *, tree, tree, int, bool *);
170 #endif
171 static void arm_output_function_epilogue (FILE *, HOST_WIDE_INT);
172 static void arm_output_function_prologue (FILE *, HOST_WIDE_INT);
173 static int arm_comp_type_attributes (const_tree, const_tree);
174 static void arm_set_default_type_attributes (tree);
175 static int arm_adjust_cost (rtx_insn *, rtx, rtx_insn *, int);
176 static int arm_sched_reorder (FILE *, int, rtx_insn **, int *, int);
177 static int optimal_immediate_sequence (enum rtx_code code,
178 unsigned HOST_WIDE_INT val,
179 struct four_ints *return_sequence);
180 static int optimal_immediate_sequence_1 (enum rtx_code code,
181 unsigned HOST_WIDE_INT val,
182 struct four_ints *return_sequence,
183 int i);
184 static int arm_get_strip_length (int);
185 static bool arm_function_ok_for_sibcall (tree, tree);
186 static machine_mode arm_promote_function_mode (const_tree,
187 machine_mode, int *,
188 const_tree, int);
189 static bool arm_return_in_memory (const_tree, const_tree);
190 static rtx arm_function_value (const_tree, const_tree, bool);
191 static rtx arm_libcall_value_1 (machine_mode);
192 static rtx arm_libcall_value (machine_mode, const_rtx);
193 static bool arm_function_value_regno_p (const unsigned int);
194 static void arm_internal_label (FILE *, const char *, unsigned long);
195 static void arm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT,
196 tree);
197 static bool arm_have_conditional_execution (void);
198 static bool arm_cannot_force_const_mem (machine_mode, rtx);
199 static bool arm_legitimate_constant_p (machine_mode, rtx);
200 static bool arm_rtx_costs_1 (rtx, enum rtx_code, int*, bool);
201 static bool arm_size_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *);
202 static bool arm_slowmul_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
203 static bool arm_fastmul_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
204 static bool arm_xscale_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
205 static bool arm_9e_rtx_costs (rtx, enum rtx_code, enum rtx_code, int *, bool);
206 static bool arm_rtx_costs (rtx, int, int, int, int *, bool);
207 static int arm_address_cost (rtx, machine_mode, addr_space_t, bool);
208 static int arm_register_move_cost (machine_mode, reg_class_t, reg_class_t);
209 static int arm_memory_move_cost (machine_mode, reg_class_t, bool);
210 static void emit_constant_insn (rtx cond, rtx pattern);
211 static rtx_insn *emit_set_insn (rtx, rtx);
212 static rtx emit_multi_reg_push (unsigned long, unsigned long);
213 static int arm_arg_partial_bytes (cumulative_args_t, machine_mode,
214 tree, bool);
215 static rtx arm_function_arg (cumulative_args_t, machine_mode,
216 const_tree, bool);
217 static void arm_function_arg_advance (cumulative_args_t, machine_mode,
218 const_tree, bool);
219 static unsigned int arm_function_arg_boundary (machine_mode, const_tree);
220 static rtx aapcs_allocate_return_reg (machine_mode, const_tree,
221 const_tree);
222 static rtx aapcs_libcall_value (machine_mode);
223 static int aapcs_select_return_coproc (const_tree, const_tree);
225 #ifdef OBJECT_FORMAT_ELF
226 static void arm_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
227 static void arm_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
228 #endif
229 #ifndef ARM_PE
230 static void arm_encode_section_info (tree, rtx, int);
231 #endif
233 static void arm_file_end (void);
234 static void arm_file_start (void);
236 static void arm_setup_incoming_varargs (cumulative_args_t, machine_mode,
237 tree, int *, int);
238 static bool arm_pass_by_reference (cumulative_args_t,
239 machine_mode, const_tree, bool);
240 static bool arm_promote_prototypes (const_tree);
241 static bool arm_default_short_enums (void);
242 static bool arm_align_anon_bitfield (void);
243 static bool arm_return_in_msb (const_tree);
244 static bool arm_must_pass_in_stack (machine_mode, const_tree);
245 static bool arm_return_in_memory (const_tree, const_tree);
246 #if ARM_UNWIND_INFO
247 static void arm_unwind_emit (FILE *, rtx_insn *);
248 static bool arm_output_ttype (rtx);
249 static void arm_asm_emit_except_personality (rtx);
250 static void arm_asm_init_sections (void);
251 #endif
252 static rtx arm_dwarf_register_span (rtx);
254 static tree arm_cxx_guard_type (void);
255 static bool arm_cxx_guard_mask_bit (void);
256 static tree arm_get_cookie_size (tree);
257 static bool arm_cookie_has_size (void);
258 static bool arm_cxx_cdtor_returns_this (void);
259 static bool arm_cxx_key_method_may_be_inline (void);
260 static void arm_cxx_determine_class_data_visibility (tree);
261 static bool arm_cxx_class_data_always_comdat (void);
262 static bool arm_cxx_use_aeabi_atexit (void);
263 static void arm_init_libfuncs (void);
264 static tree arm_build_builtin_va_list (void);
265 static void arm_expand_builtin_va_start (tree, rtx);
266 static tree arm_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
267 static void arm_option_override (void);
268 static unsigned HOST_WIDE_INT arm_shift_truncation_mask (machine_mode);
269 static bool arm_macro_fusion_p (void);
270 static bool arm_cannot_copy_insn_p (rtx_insn *);
271 static int arm_issue_rate (void);
272 static int arm_first_cycle_multipass_dfa_lookahead (void);
273 static int arm_first_cycle_multipass_dfa_lookahead_guard (rtx_insn *, int);
274 static void arm_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
275 static bool arm_output_addr_const_extra (FILE *, rtx);
276 static bool arm_allocate_stack_slots_for_args (void);
277 static bool arm_warn_func_return (tree);
278 static const char *arm_invalid_parameter_type (const_tree t);
279 static const char *arm_invalid_return_type (const_tree t);
280 static tree arm_promoted_type (const_tree t);
281 static tree arm_convert_to_type (tree type, tree expr);
282 static bool arm_scalar_mode_supported_p (machine_mode);
283 static bool arm_frame_pointer_required (void);
284 static bool arm_can_eliminate (const int, const int);
285 static void arm_asm_trampoline_template (FILE *);
286 static void arm_trampoline_init (rtx, tree, rtx);
287 static rtx arm_trampoline_adjust_address (rtx);
288 static rtx arm_pic_static_addr (rtx orig, rtx reg);
289 static bool cortex_a9_sched_adjust_cost (rtx_insn *, rtx, rtx_insn *, int *);
290 static bool xscale_sched_adjust_cost (rtx_insn *, rtx, rtx_insn *, int *);
291 static bool fa726te_sched_adjust_cost (rtx_insn *, rtx, rtx_insn *, int *);
292 static bool arm_array_mode_supported_p (machine_mode,
293 unsigned HOST_WIDE_INT);
294 static machine_mode arm_preferred_simd_mode (machine_mode);
295 static bool arm_class_likely_spilled_p (reg_class_t);
296 static HOST_WIDE_INT arm_vector_alignment (const_tree type);
297 static bool arm_vector_alignment_reachable (const_tree type, bool is_packed);
298 static bool arm_builtin_support_vector_misalignment (machine_mode mode,
299 const_tree type,
300 int misalignment,
301 bool is_packed);
302 static void arm_conditional_register_usage (void);
303 static reg_class_t arm_preferred_rename_class (reg_class_t rclass);
304 static unsigned int arm_autovectorize_vector_sizes (void);
305 static int arm_default_branch_cost (bool, bool);
306 static int arm_cortex_a5_branch_cost (bool, bool);
307 static int arm_cortex_m_branch_cost (bool, bool);
308 static int arm_cortex_m7_branch_cost (bool, bool);
310 static bool arm_vectorize_vec_perm_const_ok (machine_mode vmode,
311 const unsigned char *sel);
313 static bool aarch_macro_fusion_pair_p (rtx_insn*, rtx_insn*);
315 static int arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
316 tree vectype,
317 int misalign ATTRIBUTE_UNUSED);
318 static unsigned arm_add_stmt_cost (void *data, int count,
319 enum vect_cost_for_stmt kind,
320 struct _stmt_vec_info *stmt_info,
321 int misalign,
322 enum vect_cost_model_location where);
324 static void arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
325 bool op0_preserve_value);
326 static unsigned HOST_WIDE_INT arm_asan_shadow_offset (void);
328 static void arm_sched_fusion_priority (rtx_insn *, int, int *, int*);
330 /* Table of machine attributes. */
331 static const struct attribute_spec arm_attribute_table[] =
333 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
334 affects_type_identity } */
335 /* Function calls made to this symbol must be done indirectly, because
336 it may lie outside of the 26 bit addressing range of a normal function
337 call. */
338 { "long_call", 0, 0, false, true, true, NULL, false },
339 /* Whereas these functions are always known to reside within the 26 bit
340 addressing range. */
341 { "short_call", 0, 0, false, true, true, NULL, false },
342 /* Specify the procedure call conventions for a function. */
343 { "pcs", 1, 1, false, true, true, arm_handle_pcs_attribute,
344 false },
345 /* Interrupt Service Routines have special prologue and epilogue requirements. */
346 { "isr", 0, 1, false, false, false, arm_handle_isr_attribute,
347 false },
348 { "interrupt", 0, 1, false, false, false, arm_handle_isr_attribute,
349 false },
350 { "naked", 0, 0, true, false, false, arm_handle_fndecl_attribute,
351 false },
352 #ifdef ARM_PE
353 /* ARM/PE has three new attributes:
354 interfacearm - ?
355 dllexport - for exporting a function/variable that will live in a dll
356 dllimport - for importing a function/variable from a dll
358 Microsoft allows multiple declspecs in one __declspec, separating
359 them with spaces. We do NOT support this. Instead, use __declspec
360 multiple times.
362 { "dllimport", 0, 0, true, false, false, NULL, false },
363 { "dllexport", 0, 0, true, false, false, NULL, false },
364 { "interfacearm", 0, 0, true, false, false, arm_handle_fndecl_attribute,
365 false },
366 #elif TARGET_DLLIMPORT_DECL_ATTRIBUTES
367 { "dllimport", 0, 0, false, false, false, handle_dll_attribute, false },
368 { "dllexport", 0, 0, false, false, false, handle_dll_attribute, false },
369 { "notshared", 0, 0, false, true, false, arm_handle_notshared_attribute,
370 false },
371 #endif
372 { NULL, 0, 0, false, false, false, NULL, false }
375 /* Initialize the GCC target structure. */
376 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
377 #undef TARGET_MERGE_DECL_ATTRIBUTES
378 #define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
379 #endif
381 #undef TARGET_LEGITIMIZE_ADDRESS
382 #define TARGET_LEGITIMIZE_ADDRESS arm_legitimize_address
384 #undef TARGET_LRA_P
385 #define TARGET_LRA_P hook_bool_void_true
387 #undef TARGET_ATTRIBUTE_TABLE
388 #define TARGET_ATTRIBUTE_TABLE arm_attribute_table
390 #undef TARGET_ASM_FILE_START
391 #define TARGET_ASM_FILE_START arm_file_start
392 #undef TARGET_ASM_FILE_END
393 #define TARGET_ASM_FILE_END arm_file_end
395 #undef TARGET_ASM_ALIGNED_SI_OP
396 #define TARGET_ASM_ALIGNED_SI_OP NULL
397 #undef TARGET_ASM_INTEGER
398 #define TARGET_ASM_INTEGER arm_assemble_integer
400 #undef TARGET_PRINT_OPERAND
401 #define TARGET_PRINT_OPERAND arm_print_operand
402 #undef TARGET_PRINT_OPERAND_ADDRESS
403 #define TARGET_PRINT_OPERAND_ADDRESS arm_print_operand_address
404 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
405 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P arm_print_operand_punct_valid_p
407 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
408 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA arm_output_addr_const_extra
410 #undef TARGET_ASM_FUNCTION_PROLOGUE
411 #define TARGET_ASM_FUNCTION_PROLOGUE arm_output_function_prologue
413 #undef TARGET_ASM_FUNCTION_EPILOGUE
414 #define TARGET_ASM_FUNCTION_EPILOGUE arm_output_function_epilogue
416 #undef TARGET_OPTION_OVERRIDE
417 #define TARGET_OPTION_OVERRIDE arm_option_override
419 #undef TARGET_COMP_TYPE_ATTRIBUTES
420 #define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes
422 #undef TARGET_SCHED_MACRO_FUSION_P
423 #define TARGET_SCHED_MACRO_FUSION_P arm_macro_fusion_p
425 #undef TARGET_SCHED_MACRO_FUSION_PAIR_P
426 #define TARGET_SCHED_MACRO_FUSION_PAIR_P aarch_macro_fusion_pair_p
428 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
429 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES arm_set_default_type_attributes
431 #undef TARGET_SCHED_ADJUST_COST
432 #define TARGET_SCHED_ADJUST_COST arm_adjust_cost
434 #undef TARGET_SCHED_REORDER
435 #define TARGET_SCHED_REORDER arm_sched_reorder
437 #undef TARGET_REGISTER_MOVE_COST
438 #define TARGET_REGISTER_MOVE_COST arm_register_move_cost
440 #undef TARGET_MEMORY_MOVE_COST
441 #define TARGET_MEMORY_MOVE_COST arm_memory_move_cost
443 #undef TARGET_ENCODE_SECTION_INFO
444 #ifdef ARM_PE
445 #define TARGET_ENCODE_SECTION_INFO arm_pe_encode_section_info
446 #else
447 #define TARGET_ENCODE_SECTION_INFO arm_encode_section_info
448 #endif
450 #undef TARGET_STRIP_NAME_ENCODING
451 #define TARGET_STRIP_NAME_ENCODING arm_strip_name_encoding
453 #undef TARGET_ASM_INTERNAL_LABEL
454 #define TARGET_ASM_INTERNAL_LABEL arm_internal_label
456 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
457 #define TARGET_FUNCTION_OK_FOR_SIBCALL arm_function_ok_for_sibcall
459 #undef TARGET_FUNCTION_VALUE
460 #define TARGET_FUNCTION_VALUE arm_function_value
462 #undef TARGET_LIBCALL_VALUE
463 #define TARGET_LIBCALL_VALUE arm_libcall_value
465 #undef TARGET_FUNCTION_VALUE_REGNO_P
466 #define TARGET_FUNCTION_VALUE_REGNO_P arm_function_value_regno_p
468 #undef TARGET_ASM_OUTPUT_MI_THUNK
469 #define TARGET_ASM_OUTPUT_MI_THUNK arm_output_mi_thunk
470 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
471 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK default_can_output_mi_thunk_no_vcall
473 #undef TARGET_RTX_COSTS
474 #define TARGET_RTX_COSTS arm_rtx_costs
475 #undef TARGET_ADDRESS_COST
476 #define TARGET_ADDRESS_COST arm_address_cost
478 #undef TARGET_SHIFT_TRUNCATION_MASK
479 #define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask
480 #undef TARGET_VECTOR_MODE_SUPPORTED_P
481 #define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p
482 #undef TARGET_ARRAY_MODE_SUPPORTED_P
483 #define TARGET_ARRAY_MODE_SUPPORTED_P arm_array_mode_supported_p
484 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
485 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE arm_preferred_simd_mode
486 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
487 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
488 arm_autovectorize_vector_sizes
490 #undef TARGET_MACHINE_DEPENDENT_REORG
491 #define TARGET_MACHINE_DEPENDENT_REORG arm_reorg
493 #undef TARGET_INIT_BUILTINS
494 #define TARGET_INIT_BUILTINS arm_init_builtins
495 #undef TARGET_EXPAND_BUILTIN
496 #define TARGET_EXPAND_BUILTIN arm_expand_builtin
497 #undef TARGET_BUILTIN_DECL
498 #define TARGET_BUILTIN_DECL arm_builtin_decl
500 #undef TARGET_INIT_LIBFUNCS
501 #define TARGET_INIT_LIBFUNCS arm_init_libfuncs
503 #undef TARGET_PROMOTE_FUNCTION_MODE
504 #define TARGET_PROMOTE_FUNCTION_MODE arm_promote_function_mode
505 #undef TARGET_PROMOTE_PROTOTYPES
506 #define TARGET_PROMOTE_PROTOTYPES arm_promote_prototypes
507 #undef TARGET_PASS_BY_REFERENCE
508 #define TARGET_PASS_BY_REFERENCE arm_pass_by_reference
509 #undef TARGET_ARG_PARTIAL_BYTES
510 #define TARGET_ARG_PARTIAL_BYTES arm_arg_partial_bytes
511 #undef TARGET_FUNCTION_ARG
512 #define TARGET_FUNCTION_ARG arm_function_arg
513 #undef TARGET_FUNCTION_ARG_ADVANCE
514 #define TARGET_FUNCTION_ARG_ADVANCE arm_function_arg_advance
515 #undef TARGET_FUNCTION_ARG_BOUNDARY
516 #define TARGET_FUNCTION_ARG_BOUNDARY arm_function_arg_boundary
518 #undef TARGET_SETUP_INCOMING_VARARGS
519 #define TARGET_SETUP_INCOMING_VARARGS arm_setup_incoming_varargs
521 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
522 #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS arm_allocate_stack_slots_for_args
524 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
525 #define TARGET_ASM_TRAMPOLINE_TEMPLATE arm_asm_trampoline_template
526 #undef TARGET_TRAMPOLINE_INIT
527 #define TARGET_TRAMPOLINE_INIT arm_trampoline_init
528 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
529 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS arm_trampoline_adjust_address
531 #undef TARGET_WARN_FUNC_RETURN
532 #define TARGET_WARN_FUNC_RETURN arm_warn_func_return
534 #undef TARGET_DEFAULT_SHORT_ENUMS
535 #define TARGET_DEFAULT_SHORT_ENUMS arm_default_short_enums
537 #undef TARGET_ALIGN_ANON_BITFIELD
538 #define TARGET_ALIGN_ANON_BITFIELD arm_align_anon_bitfield
540 #undef TARGET_NARROW_VOLATILE_BITFIELD
541 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
543 #undef TARGET_CXX_GUARD_TYPE
544 #define TARGET_CXX_GUARD_TYPE arm_cxx_guard_type
546 #undef TARGET_CXX_GUARD_MASK_BIT
547 #define TARGET_CXX_GUARD_MASK_BIT arm_cxx_guard_mask_bit
549 #undef TARGET_CXX_GET_COOKIE_SIZE
550 #define TARGET_CXX_GET_COOKIE_SIZE arm_get_cookie_size
552 #undef TARGET_CXX_COOKIE_HAS_SIZE
553 #define TARGET_CXX_COOKIE_HAS_SIZE arm_cookie_has_size
555 #undef TARGET_CXX_CDTOR_RETURNS_THIS
556 #define TARGET_CXX_CDTOR_RETURNS_THIS arm_cxx_cdtor_returns_this
558 #undef TARGET_CXX_KEY_METHOD_MAY_BE_INLINE
559 #define TARGET_CXX_KEY_METHOD_MAY_BE_INLINE arm_cxx_key_method_may_be_inline
561 #undef TARGET_CXX_USE_AEABI_ATEXIT
562 #define TARGET_CXX_USE_AEABI_ATEXIT arm_cxx_use_aeabi_atexit
564 #undef TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY
565 #define TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY \
566 arm_cxx_determine_class_data_visibility
568 #undef TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT
569 #define TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT arm_cxx_class_data_always_comdat
571 #undef TARGET_RETURN_IN_MSB
572 #define TARGET_RETURN_IN_MSB arm_return_in_msb
574 #undef TARGET_RETURN_IN_MEMORY
575 #define TARGET_RETURN_IN_MEMORY arm_return_in_memory
577 #undef TARGET_MUST_PASS_IN_STACK
578 #define TARGET_MUST_PASS_IN_STACK arm_must_pass_in_stack
580 #if ARM_UNWIND_INFO
581 #undef TARGET_ASM_UNWIND_EMIT
582 #define TARGET_ASM_UNWIND_EMIT arm_unwind_emit
584 /* EABI unwinding tables use a different format for the typeinfo tables. */
585 #undef TARGET_ASM_TTYPE
586 #define TARGET_ASM_TTYPE arm_output_ttype
588 #undef TARGET_ARM_EABI_UNWINDER
589 #define TARGET_ARM_EABI_UNWINDER true
591 #undef TARGET_ASM_EMIT_EXCEPT_PERSONALITY
592 #define TARGET_ASM_EMIT_EXCEPT_PERSONALITY arm_asm_emit_except_personality
594 #undef TARGET_ASM_INIT_SECTIONS
595 #define TARGET_ASM_INIT_SECTIONS arm_asm_init_sections
596 #endif /* ARM_UNWIND_INFO */
598 #undef TARGET_DWARF_REGISTER_SPAN
599 #define TARGET_DWARF_REGISTER_SPAN arm_dwarf_register_span
601 #undef TARGET_CANNOT_COPY_INSN_P
602 #define TARGET_CANNOT_COPY_INSN_P arm_cannot_copy_insn_p
604 #ifdef HAVE_AS_TLS
605 #undef TARGET_HAVE_TLS
606 #define TARGET_HAVE_TLS true
607 #endif
609 #undef TARGET_HAVE_CONDITIONAL_EXECUTION
610 #define TARGET_HAVE_CONDITIONAL_EXECUTION arm_have_conditional_execution
612 #undef TARGET_LEGITIMATE_CONSTANT_P
613 #define TARGET_LEGITIMATE_CONSTANT_P arm_legitimate_constant_p
615 #undef TARGET_CANNOT_FORCE_CONST_MEM
616 #define TARGET_CANNOT_FORCE_CONST_MEM arm_cannot_force_const_mem
618 #undef TARGET_MAX_ANCHOR_OFFSET
619 #define TARGET_MAX_ANCHOR_OFFSET 4095
621 /* The minimum is set such that the total size of the block
622 for a particular anchor is -4088 + 1 + 4095 bytes, which is
623 divisible by eight, ensuring natural spacing of anchors. */
624 #undef TARGET_MIN_ANCHOR_OFFSET
625 #define TARGET_MIN_ANCHOR_OFFSET -4088
627 #undef TARGET_SCHED_ISSUE_RATE
628 #define TARGET_SCHED_ISSUE_RATE arm_issue_rate
630 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
631 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
632 arm_first_cycle_multipass_dfa_lookahead
634 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
635 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD \
636 arm_first_cycle_multipass_dfa_lookahead_guard
638 #undef TARGET_MANGLE_TYPE
639 #define TARGET_MANGLE_TYPE arm_mangle_type
641 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
642 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV arm_atomic_assign_expand_fenv
644 #undef TARGET_BUILD_BUILTIN_VA_LIST
645 #define TARGET_BUILD_BUILTIN_VA_LIST arm_build_builtin_va_list
646 #undef TARGET_EXPAND_BUILTIN_VA_START
647 #define TARGET_EXPAND_BUILTIN_VA_START arm_expand_builtin_va_start
648 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
649 #define TARGET_GIMPLIFY_VA_ARG_EXPR arm_gimplify_va_arg_expr
651 #ifdef HAVE_AS_TLS
652 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
653 #define TARGET_ASM_OUTPUT_DWARF_DTPREL arm_output_dwarf_dtprel
654 #endif
656 #undef TARGET_LEGITIMATE_ADDRESS_P
657 #define TARGET_LEGITIMATE_ADDRESS_P arm_legitimate_address_p
659 #undef TARGET_PREFERRED_RELOAD_CLASS
660 #define TARGET_PREFERRED_RELOAD_CLASS arm_preferred_reload_class
662 #undef TARGET_INVALID_PARAMETER_TYPE
663 #define TARGET_INVALID_PARAMETER_TYPE arm_invalid_parameter_type
665 #undef TARGET_INVALID_RETURN_TYPE
666 #define TARGET_INVALID_RETURN_TYPE arm_invalid_return_type
668 #undef TARGET_PROMOTED_TYPE
669 #define TARGET_PROMOTED_TYPE arm_promoted_type
671 #undef TARGET_CONVERT_TO_TYPE
672 #define TARGET_CONVERT_TO_TYPE arm_convert_to_type
674 #undef TARGET_SCALAR_MODE_SUPPORTED_P
675 #define TARGET_SCALAR_MODE_SUPPORTED_P arm_scalar_mode_supported_p
677 #undef TARGET_FRAME_POINTER_REQUIRED
678 #define TARGET_FRAME_POINTER_REQUIRED arm_frame_pointer_required
680 #undef TARGET_CAN_ELIMINATE
681 #define TARGET_CAN_ELIMINATE arm_can_eliminate
683 #undef TARGET_CONDITIONAL_REGISTER_USAGE
684 #define TARGET_CONDITIONAL_REGISTER_USAGE arm_conditional_register_usage
686 #undef TARGET_CLASS_LIKELY_SPILLED_P
687 #define TARGET_CLASS_LIKELY_SPILLED_P arm_class_likely_spilled_p
689 #undef TARGET_VECTORIZE_BUILTINS
690 #define TARGET_VECTORIZE_BUILTINS
692 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
693 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
694 arm_builtin_vectorized_function
696 #undef TARGET_VECTOR_ALIGNMENT
697 #define TARGET_VECTOR_ALIGNMENT arm_vector_alignment
699 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
700 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
701 arm_vector_alignment_reachable
703 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
704 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
705 arm_builtin_support_vector_misalignment
707 #undef TARGET_PREFERRED_RENAME_CLASS
708 #define TARGET_PREFERRED_RENAME_CLASS \
709 arm_preferred_rename_class
711 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
712 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
713 arm_vectorize_vec_perm_const_ok
715 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
716 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
717 arm_builtin_vectorization_cost
718 #undef TARGET_VECTORIZE_ADD_STMT_COST
719 #define TARGET_VECTORIZE_ADD_STMT_COST arm_add_stmt_cost
721 #undef TARGET_CANONICALIZE_COMPARISON
722 #define TARGET_CANONICALIZE_COMPARISON \
723 arm_canonicalize_comparison
725 #undef TARGET_ASAN_SHADOW_OFFSET
726 #define TARGET_ASAN_SHADOW_OFFSET arm_asan_shadow_offset
728 #undef MAX_INSN_PER_IT_BLOCK
729 #define MAX_INSN_PER_IT_BLOCK (arm_restrict_it ? 1 : 4)
731 #undef TARGET_CAN_USE_DOLOOP_P
732 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
734 #undef TARGET_CONST_NOT_OK_FOR_DEBUG_P
735 #define TARGET_CONST_NOT_OK_FOR_DEBUG_P arm_const_not_ok_for_debug_p
737 #undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
738 #define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
740 #undef TARGET_SCHED_FUSION_PRIORITY
741 #define TARGET_SCHED_FUSION_PRIORITY arm_sched_fusion_priority
743 struct gcc_target targetm = TARGET_INITIALIZER;
745 /* Obstack for minipool constant handling. */
746 static struct obstack minipool_obstack;
747 static char * minipool_startobj;
749 /* The maximum number of insns skipped which
750 will be conditionalised if possible. */
751 static int max_insns_skipped = 5;
753 extern FILE * asm_out_file;
755 /* True if we are currently building a constant table. */
756 int making_const_table;
758 /* The processor for which instructions should be scheduled. */
759 enum processor_type arm_tune = arm_none;
761 /* The current tuning set. */
762 const struct tune_params *current_tune;
764 /* Which floating point hardware to schedule for. */
765 int arm_fpu_attr;
767 /* Which floating popint hardware to use. */
768 const struct arm_fpu_desc *arm_fpu_desc;
770 /* Used for Thumb call_via trampolines. */
771 rtx thumb_call_via_label[14];
772 static int thumb_call_reg_needed;
774 /* The bits in this mask specify which
775 instructions we are allowed to generate. */
776 unsigned long insn_flags = 0;
778 /* The bits in this mask specify which instruction scheduling options should
779 be used. */
780 unsigned long tune_flags = 0;
782 /* The highest ARM architecture version supported by the
783 target. */
784 enum base_architecture arm_base_arch = BASE_ARCH_0;
786 /* The following are used in the arm.md file as equivalents to bits
787 in the above two flag variables. */
789 /* Nonzero if this chip supports the ARM Architecture 3M extensions. */
790 int arm_arch3m = 0;
792 /* Nonzero if this chip supports the ARM Architecture 4 extensions. */
793 int arm_arch4 = 0;
795 /* Nonzero if this chip supports the ARM Architecture 4t extensions. */
796 int arm_arch4t = 0;
798 /* Nonzero if this chip supports the ARM Architecture 5 extensions. */
799 int arm_arch5 = 0;
801 /* Nonzero if this chip supports the ARM Architecture 5E extensions. */
802 int arm_arch5e = 0;
804 /* Nonzero if this chip supports the ARM Architecture 6 extensions. */
805 int arm_arch6 = 0;
807 /* Nonzero if this chip supports the ARM 6K extensions. */
808 int arm_arch6k = 0;
810 /* Nonzero if instructions present in ARMv6-M can be used. */
811 int arm_arch6m = 0;
813 /* Nonzero if this chip supports the ARM 7 extensions. */
814 int arm_arch7 = 0;
816 /* Nonzero if instructions not present in the 'M' profile can be used. */
817 int arm_arch_notm = 0;
819 /* Nonzero if instructions present in ARMv7E-M can be used. */
820 int arm_arch7em = 0;
822 /* Nonzero if instructions present in ARMv8 can be used. */
823 int arm_arch8 = 0;
825 /* Nonzero if this chip can benefit from load scheduling. */
826 int arm_ld_sched = 0;
828 /* Nonzero if this chip is a StrongARM. */
829 int arm_tune_strongarm = 0;
831 /* Nonzero if this chip supports Intel Wireless MMX technology. */
832 int arm_arch_iwmmxt = 0;
834 /* Nonzero if this chip supports Intel Wireless MMX2 technology. */
835 int arm_arch_iwmmxt2 = 0;
837 /* Nonzero if this chip is an XScale. */
838 int arm_arch_xscale = 0;
840 /* Nonzero if tuning for XScale */
841 int arm_tune_xscale = 0;
843 /* Nonzero if we want to tune for stores that access the write-buffer.
844 This typically means an ARM6 or ARM7 with MMU or MPU. */
845 int arm_tune_wbuf = 0;
847 /* Nonzero if tuning for Cortex-A9. */
848 int arm_tune_cortex_a9 = 0;
850 /* Nonzero if we should define __THUMB_INTERWORK__ in the
851 preprocessor.
852 XXX This is a bit of a hack, it's intended to help work around
853 problems in GLD which doesn't understand that armv5t code is
854 interworking clean. */
855 int arm_cpp_interwork = 0;
857 /* Nonzero if chip supports Thumb 2. */
858 int arm_arch_thumb2;
860 /* Nonzero if chip supports integer division instruction. */
861 int arm_arch_arm_hwdiv;
862 int arm_arch_thumb_hwdiv;
864 /* Nonzero if chip disallows volatile memory access in IT block. */
865 int arm_arch_no_volatile_ce;
867 /* Nonzero if we should use Neon to handle 64-bits operations rather
868 than core registers. */
869 int prefer_neon_for_64bits = 0;
871 /* Nonzero if we shouldn't use literal pools. */
872 bool arm_disable_literal_pool = false;
874 /* In case of a PRE_INC, POST_INC, PRE_DEC, POST_DEC memory reference,
875 we must report the mode of the memory reference from
876 TARGET_PRINT_OPERAND to TARGET_PRINT_OPERAND_ADDRESS. */
877 machine_mode output_memory_reference_mode;
879 /* The register number to be used for the PIC offset register. */
880 unsigned arm_pic_register = INVALID_REGNUM;
882 enum arm_pcs arm_pcs_default;
884 /* For an explanation of these variables, see final_prescan_insn below. */
885 int arm_ccfsm_state;
886 /* arm_current_cc is also used for Thumb-2 cond_exec blocks. */
887 enum arm_cond_code arm_current_cc;
889 rtx arm_target_insn;
890 int arm_target_label;
891 /* The number of conditionally executed insns, including the current insn. */
892 int arm_condexec_count = 0;
893 /* A bitmask specifying the patterns for the IT block.
894 Zero means do not output an IT block before this insn. */
895 int arm_condexec_mask = 0;
896 /* The number of bits used in arm_condexec_mask. */
897 int arm_condexec_masklen = 0;
899 /* Nonzero if chip supports the ARMv8 CRC instructions. */
900 int arm_arch_crc = 0;
902 /* Nonzero if the core has a very small, high-latency, multiply unit. */
903 int arm_m_profile_small_mul = 0;
905 /* The condition codes of the ARM, and the inverse function. */
906 static const char * const arm_condition_codes[] =
908 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
909 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
912 /* The register numbers in sequence, for passing to arm_gen_load_multiple. */
913 int arm_regs_in_sequence[] =
915 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
918 #define ARM_LSL_NAME (TARGET_UNIFIED_ASM ? "lsl" : "asl")
919 #define streq(string1, string2) (strcmp (string1, string2) == 0)
921 #define THUMB2_WORK_REGS (0xff & ~( (1 << THUMB_HARD_FRAME_POINTER_REGNUM) \
922 | (1 << SP_REGNUM) | (1 << PC_REGNUM) \
923 | (1 << PIC_OFFSET_TABLE_REGNUM)))
925 /* Initialization code. */
927 struct processors
929 const char *const name;
930 enum processor_type core;
931 const char *arch;
932 enum base_architecture base_arch;
933 const unsigned long flags;
934 const struct tune_params *const tune;
938 #define ARM_PREFETCH_NOT_BENEFICIAL { 0, -1, -1 }
939 #define ARM_PREFETCH_BENEFICIAL(num_slots,l1_size,l1_line_size) \
941 num_slots, \
942 l1_size, \
943 l1_line_size \
946 /* arm generic vectorizer costs. */
947 static const
948 struct cpu_vec_costs arm_default_vec_cost = {
949 1, /* scalar_stmt_cost. */
950 1, /* scalar load_cost. */
951 1, /* scalar_store_cost. */
952 1, /* vec_stmt_cost. */
953 1, /* vec_to_scalar_cost. */
954 1, /* scalar_to_vec_cost. */
955 1, /* vec_align_load_cost. */
956 1, /* vec_unalign_load_cost. */
957 1, /* vec_unalign_store_cost. */
958 1, /* vec_store_cost. */
959 3, /* cond_taken_branch_cost. */
960 1, /* cond_not_taken_branch_cost. */
963 /* Cost tables for AArch32 + AArch64 cores should go in aarch-cost-tables.h */
964 #include "aarch-cost-tables.h"
968 const struct cpu_cost_table cortexa9_extra_costs =
970 /* ALU */
972 0, /* arith. */
973 0, /* logical. */
974 0, /* shift. */
975 COSTS_N_INSNS (1), /* shift_reg. */
976 COSTS_N_INSNS (1), /* arith_shift. */
977 COSTS_N_INSNS (2), /* arith_shift_reg. */
978 0, /* log_shift. */
979 COSTS_N_INSNS (1), /* log_shift_reg. */
980 COSTS_N_INSNS (1), /* extend. */
981 COSTS_N_INSNS (2), /* extend_arith. */
982 COSTS_N_INSNS (1), /* bfi. */
983 COSTS_N_INSNS (1), /* bfx. */
984 0, /* clz. */
985 0, /* rev. */
986 0, /* non_exec. */
987 true /* non_exec_costs_exec. */
990 /* MULT SImode */
992 COSTS_N_INSNS (3), /* simple. */
993 COSTS_N_INSNS (3), /* flag_setting. */
994 COSTS_N_INSNS (2), /* extend. */
995 COSTS_N_INSNS (3), /* add. */
996 COSTS_N_INSNS (2), /* extend_add. */
997 COSTS_N_INSNS (30) /* idiv. No HW div on Cortex A9. */
999 /* MULT DImode */
1001 0, /* simple (N/A). */
1002 0, /* flag_setting (N/A). */
1003 COSTS_N_INSNS (4), /* extend. */
1004 0, /* add (N/A). */
1005 COSTS_N_INSNS (4), /* extend_add. */
1006 0 /* idiv (N/A). */
1009 /* LD/ST */
1011 COSTS_N_INSNS (2), /* load. */
1012 COSTS_N_INSNS (2), /* load_sign_extend. */
1013 COSTS_N_INSNS (2), /* ldrd. */
1014 COSTS_N_INSNS (2), /* ldm_1st. */
1015 1, /* ldm_regs_per_insn_1st. */
1016 2, /* ldm_regs_per_insn_subsequent. */
1017 COSTS_N_INSNS (5), /* loadf. */
1018 COSTS_N_INSNS (5), /* loadd. */
1019 COSTS_N_INSNS (1), /* load_unaligned. */
1020 COSTS_N_INSNS (2), /* store. */
1021 COSTS_N_INSNS (2), /* strd. */
1022 COSTS_N_INSNS (2), /* stm_1st. */
1023 1, /* stm_regs_per_insn_1st. */
1024 2, /* stm_regs_per_insn_subsequent. */
1025 COSTS_N_INSNS (1), /* storef. */
1026 COSTS_N_INSNS (1), /* stored. */
1027 COSTS_N_INSNS (1), /* store_unaligned. */
1028 COSTS_N_INSNS (1), /* loadv. */
1029 COSTS_N_INSNS (1) /* storev. */
1032 /* FP SFmode */
1034 COSTS_N_INSNS (14), /* div. */
1035 COSTS_N_INSNS (4), /* mult. */
1036 COSTS_N_INSNS (7), /* mult_addsub. */
1037 COSTS_N_INSNS (30), /* fma. */
1038 COSTS_N_INSNS (3), /* addsub. */
1039 COSTS_N_INSNS (1), /* fpconst. */
1040 COSTS_N_INSNS (1), /* neg. */
1041 COSTS_N_INSNS (3), /* compare. */
1042 COSTS_N_INSNS (3), /* widen. */
1043 COSTS_N_INSNS (3), /* narrow. */
1044 COSTS_N_INSNS (3), /* toint. */
1045 COSTS_N_INSNS (3), /* fromint. */
1046 COSTS_N_INSNS (3) /* roundint. */
1048 /* FP DFmode */
1050 COSTS_N_INSNS (24), /* div. */
1051 COSTS_N_INSNS (5), /* mult. */
1052 COSTS_N_INSNS (8), /* mult_addsub. */
1053 COSTS_N_INSNS (30), /* fma. */
1054 COSTS_N_INSNS (3), /* addsub. */
1055 COSTS_N_INSNS (1), /* fpconst. */
1056 COSTS_N_INSNS (1), /* neg. */
1057 COSTS_N_INSNS (3), /* compare. */
1058 COSTS_N_INSNS (3), /* widen. */
1059 COSTS_N_INSNS (3), /* narrow. */
1060 COSTS_N_INSNS (3), /* toint. */
1061 COSTS_N_INSNS (3), /* fromint. */
1062 COSTS_N_INSNS (3) /* roundint. */
1065 /* Vector */
1067 COSTS_N_INSNS (1) /* alu. */
1071 const struct cpu_cost_table cortexa8_extra_costs =
1073 /* ALU */
1075 0, /* arith. */
1076 0, /* logical. */
1077 COSTS_N_INSNS (1), /* shift. */
1078 0, /* shift_reg. */
1079 COSTS_N_INSNS (1), /* arith_shift. */
1080 0, /* arith_shift_reg. */
1081 COSTS_N_INSNS (1), /* log_shift. */
1082 0, /* log_shift_reg. */
1083 0, /* extend. */
1084 0, /* extend_arith. */
1085 0, /* bfi. */
1086 0, /* bfx. */
1087 0, /* clz. */
1088 0, /* rev. */
1089 0, /* non_exec. */
1090 true /* non_exec_costs_exec. */
1093 /* MULT SImode */
1095 COSTS_N_INSNS (1), /* simple. */
1096 COSTS_N_INSNS (1), /* flag_setting. */
1097 COSTS_N_INSNS (1), /* extend. */
1098 COSTS_N_INSNS (1), /* add. */
1099 COSTS_N_INSNS (1), /* extend_add. */
1100 COSTS_N_INSNS (30) /* idiv. No HW div on Cortex A8. */
1102 /* MULT DImode */
1104 0, /* simple (N/A). */
1105 0, /* flag_setting (N/A). */
1106 COSTS_N_INSNS (2), /* extend. */
1107 0, /* add (N/A). */
1108 COSTS_N_INSNS (2), /* extend_add. */
1109 0 /* idiv (N/A). */
1112 /* LD/ST */
1114 COSTS_N_INSNS (1), /* load. */
1115 COSTS_N_INSNS (1), /* load_sign_extend. */
1116 COSTS_N_INSNS (1), /* ldrd. */
1117 COSTS_N_INSNS (1), /* ldm_1st. */
1118 1, /* ldm_regs_per_insn_1st. */
1119 2, /* ldm_regs_per_insn_subsequent. */
1120 COSTS_N_INSNS (1), /* loadf. */
1121 COSTS_N_INSNS (1), /* loadd. */
1122 COSTS_N_INSNS (1), /* load_unaligned. */
1123 COSTS_N_INSNS (1), /* store. */
1124 COSTS_N_INSNS (1), /* strd. */
1125 COSTS_N_INSNS (1), /* stm_1st. */
1126 1, /* stm_regs_per_insn_1st. */
1127 2, /* stm_regs_per_insn_subsequent. */
1128 COSTS_N_INSNS (1), /* storef. */
1129 COSTS_N_INSNS (1), /* stored. */
1130 COSTS_N_INSNS (1), /* store_unaligned. */
1131 COSTS_N_INSNS (1), /* loadv. */
1132 COSTS_N_INSNS (1) /* storev. */
1135 /* FP SFmode */
1137 COSTS_N_INSNS (36), /* div. */
1138 COSTS_N_INSNS (11), /* mult. */
1139 COSTS_N_INSNS (20), /* mult_addsub. */
1140 COSTS_N_INSNS (30), /* fma. */
1141 COSTS_N_INSNS (9), /* addsub. */
1142 COSTS_N_INSNS (3), /* fpconst. */
1143 COSTS_N_INSNS (3), /* neg. */
1144 COSTS_N_INSNS (6), /* compare. */
1145 COSTS_N_INSNS (4), /* widen. */
1146 COSTS_N_INSNS (4), /* narrow. */
1147 COSTS_N_INSNS (8), /* toint. */
1148 COSTS_N_INSNS (8), /* fromint. */
1149 COSTS_N_INSNS (8) /* roundint. */
1151 /* FP DFmode */
1153 COSTS_N_INSNS (64), /* div. */
1154 COSTS_N_INSNS (16), /* mult. */
1155 COSTS_N_INSNS (25), /* mult_addsub. */
1156 COSTS_N_INSNS (30), /* fma. */
1157 COSTS_N_INSNS (9), /* addsub. */
1158 COSTS_N_INSNS (3), /* fpconst. */
1159 COSTS_N_INSNS (3), /* neg. */
1160 COSTS_N_INSNS (6), /* compare. */
1161 COSTS_N_INSNS (6), /* widen. */
1162 COSTS_N_INSNS (6), /* narrow. */
1163 COSTS_N_INSNS (8), /* toint. */
1164 COSTS_N_INSNS (8), /* fromint. */
1165 COSTS_N_INSNS (8) /* roundint. */
1168 /* Vector */
1170 COSTS_N_INSNS (1) /* alu. */
1174 const struct cpu_cost_table cortexa5_extra_costs =
1176 /* ALU */
1178 0, /* arith. */
1179 0, /* logical. */
1180 COSTS_N_INSNS (1), /* shift. */
1181 COSTS_N_INSNS (1), /* shift_reg. */
1182 COSTS_N_INSNS (1), /* arith_shift. */
1183 COSTS_N_INSNS (1), /* arith_shift_reg. */
1184 COSTS_N_INSNS (1), /* log_shift. */
1185 COSTS_N_INSNS (1), /* log_shift_reg. */
1186 COSTS_N_INSNS (1), /* extend. */
1187 COSTS_N_INSNS (1), /* extend_arith. */
1188 COSTS_N_INSNS (1), /* bfi. */
1189 COSTS_N_INSNS (1), /* bfx. */
1190 COSTS_N_INSNS (1), /* clz. */
1191 COSTS_N_INSNS (1), /* rev. */
1192 0, /* non_exec. */
1193 true /* non_exec_costs_exec. */
1197 /* MULT SImode */
1199 0, /* simple. */
1200 COSTS_N_INSNS (1), /* flag_setting. */
1201 COSTS_N_INSNS (1), /* extend. */
1202 COSTS_N_INSNS (1), /* add. */
1203 COSTS_N_INSNS (1), /* extend_add. */
1204 COSTS_N_INSNS (7) /* idiv. */
1206 /* MULT DImode */
1208 0, /* simple (N/A). */
1209 0, /* flag_setting (N/A). */
1210 COSTS_N_INSNS (1), /* extend. */
1211 0, /* add. */
1212 COSTS_N_INSNS (2), /* extend_add. */
1213 0 /* idiv (N/A). */
1216 /* LD/ST */
1218 COSTS_N_INSNS (1), /* load. */
1219 COSTS_N_INSNS (1), /* load_sign_extend. */
1220 COSTS_N_INSNS (6), /* ldrd. */
1221 COSTS_N_INSNS (1), /* ldm_1st. */
1222 1, /* ldm_regs_per_insn_1st. */
1223 2, /* ldm_regs_per_insn_subsequent. */
1224 COSTS_N_INSNS (2), /* loadf. */
1225 COSTS_N_INSNS (4), /* loadd. */
1226 COSTS_N_INSNS (1), /* load_unaligned. */
1227 COSTS_N_INSNS (1), /* store. */
1228 COSTS_N_INSNS (3), /* strd. */
1229 COSTS_N_INSNS (1), /* stm_1st. */
1230 1, /* stm_regs_per_insn_1st. */
1231 2, /* stm_regs_per_insn_subsequent. */
1232 COSTS_N_INSNS (2), /* storef. */
1233 COSTS_N_INSNS (2), /* stored. */
1234 COSTS_N_INSNS (1), /* store_unaligned. */
1235 COSTS_N_INSNS (1), /* loadv. */
1236 COSTS_N_INSNS (1) /* storev. */
1239 /* FP SFmode */
1241 COSTS_N_INSNS (15), /* div. */
1242 COSTS_N_INSNS (3), /* mult. */
1243 COSTS_N_INSNS (7), /* mult_addsub. */
1244 COSTS_N_INSNS (7), /* fma. */
1245 COSTS_N_INSNS (3), /* addsub. */
1246 COSTS_N_INSNS (3), /* fpconst. */
1247 COSTS_N_INSNS (3), /* neg. */
1248 COSTS_N_INSNS (3), /* compare. */
1249 COSTS_N_INSNS (3), /* widen. */
1250 COSTS_N_INSNS (3), /* narrow. */
1251 COSTS_N_INSNS (3), /* toint. */
1252 COSTS_N_INSNS (3), /* fromint. */
1253 COSTS_N_INSNS (3) /* roundint. */
1255 /* FP DFmode */
1257 COSTS_N_INSNS (30), /* div. */
1258 COSTS_N_INSNS (6), /* mult. */
1259 COSTS_N_INSNS (10), /* mult_addsub. */
1260 COSTS_N_INSNS (7), /* fma. */
1261 COSTS_N_INSNS (3), /* addsub. */
1262 COSTS_N_INSNS (3), /* fpconst. */
1263 COSTS_N_INSNS (3), /* neg. */
1264 COSTS_N_INSNS (3), /* compare. */
1265 COSTS_N_INSNS (3), /* widen. */
1266 COSTS_N_INSNS (3), /* narrow. */
1267 COSTS_N_INSNS (3), /* toint. */
1268 COSTS_N_INSNS (3), /* fromint. */
1269 COSTS_N_INSNS (3) /* roundint. */
1272 /* Vector */
1274 COSTS_N_INSNS (1) /* alu. */
1279 const struct cpu_cost_table cortexa7_extra_costs =
1281 /* ALU */
1283 0, /* arith. */
1284 0, /* logical. */
1285 COSTS_N_INSNS (1), /* shift. */
1286 COSTS_N_INSNS (1), /* shift_reg. */
1287 COSTS_N_INSNS (1), /* arith_shift. */
1288 COSTS_N_INSNS (1), /* arith_shift_reg. */
1289 COSTS_N_INSNS (1), /* log_shift. */
1290 COSTS_N_INSNS (1), /* log_shift_reg. */
1291 COSTS_N_INSNS (1), /* extend. */
1292 COSTS_N_INSNS (1), /* extend_arith. */
1293 COSTS_N_INSNS (1), /* bfi. */
1294 COSTS_N_INSNS (1), /* bfx. */
1295 COSTS_N_INSNS (1), /* clz. */
1296 COSTS_N_INSNS (1), /* rev. */
1297 0, /* non_exec. */
1298 true /* non_exec_costs_exec. */
1302 /* MULT SImode */
1304 0, /* simple. */
1305 COSTS_N_INSNS (1), /* flag_setting. */
1306 COSTS_N_INSNS (1), /* extend. */
1307 COSTS_N_INSNS (1), /* add. */
1308 COSTS_N_INSNS (1), /* extend_add. */
1309 COSTS_N_INSNS (7) /* idiv. */
1311 /* MULT DImode */
1313 0, /* simple (N/A). */
1314 0, /* flag_setting (N/A). */
1315 COSTS_N_INSNS (1), /* extend. */
1316 0, /* add. */
1317 COSTS_N_INSNS (2), /* extend_add. */
1318 0 /* idiv (N/A). */
1321 /* LD/ST */
1323 COSTS_N_INSNS (1), /* load. */
1324 COSTS_N_INSNS (1), /* load_sign_extend. */
1325 COSTS_N_INSNS (3), /* ldrd. */
1326 COSTS_N_INSNS (1), /* ldm_1st. */
1327 1, /* ldm_regs_per_insn_1st. */
1328 2, /* ldm_regs_per_insn_subsequent. */
1329 COSTS_N_INSNS (2), /* loadf. */
1330 COSTS_N_INSNS (2), /* loadd. */
1331 COSTS_N_INSNS (1), /* load_unaligned. */
1332 COSTS_N_INSNS (1), /* store. */
1333 COSTS_N_INSNS (3), /* strd. */
1334 COSTS_N_INSNS (1), /* stm_1st. */
1335 1, /* stm_regs_per_insn_1st. */
1336 2, /* stm_regs_per_insn_subsequent. */
1337 COSTS_N_INSNS (2), /* storef. */
1338 COSTS_N_INSNS (2), /* stored. */
1339 COSTS_N_INSNS (1), /* store_unaligned. */
1340 COSTS_N_INSNS (1), /* loadv. */
1341 COSTS_N_INSNS (1) /* storev. */
1344 /* FP SFmode */
1346 COSTS_N_INSNS (15), /* div. */
1347 COSTS_N_INSNS (3), /* mult. */
1348 COSTS_N_INSNS (7), /* mult_addsub. */
1349 COSTS_N_INSNS (7), /* fma. */
1350 COSTS_N_INSNS (3), /* addsub. */
1351 COSTS_N_INSNS (3), /* fpconst. */
1352 COSTS_N_INSNS (3), /* neg. */
1353 COSTS_N_INSNS (3), /* compare. */
1354 COSTS_N_INSNS (3), /* widen. */
1355 COSTS_N_INSNS (3), /* narrow. */
1356 COSTS_N_INSNS (3), /* toint. */
1357 COSTS_N_INSNS (3), /* fromint. */
1358 COSTS_N_INSNS (3) /* roundint. */
1360 /* FP DFmode */
1362 COSTS_N_INSNS (30), /* div. */
1363 COSTS_N_INSNS (6), /* mult. */
1364 COSTS_N_INSNS (10), /* mult_addsub. */
1365 COSTS_N_INSNS (7), /* fma. */
1366 COSTS_N_INSNS (3), /* addsub. */
1367 COSTS_N_INSNS (3), /* fpconst. */
1368 COSTS_N_INSNS (3), /* neg. */
1369 COSTS_N_INSNS (3), /* compare. */
1370 COSTS_N_INSNS (3), /* widen. */
1371 COSTS_N_INSNS (3), /* narrow. */
1372 COSTS_N_INSNS (3), /* toint. */
1373 COSTS_N_INSNS (3), /* fromint. */
1374 COSTS_N_INSNS (3) /* roundint. */
1377 /* Vector */
1379 COSTS_N_INSNS (1) /* alu. */
1383 const struct cpu_cost_table cortexa12_extra_costs =
1385 /* ALU */
1387 0, /* arith. */
1388 0, /* logical. */
1389 0, /* shift. */
1390 COSTS_N_INSNS (1), /* shift_reg. */
1391 COSTS_N_INSNS (1), /* arith_shift. */
1392 COSTS_N_INSNS (1), /* arith_shift_reg. */
1393 COSTS_N_INSNS (1), /* log_shift. */
1394 COSTS_N_INSNS (1), /* log_shift_reg. */
1395 0, /* extend. */
1396 COSTS_N_INSNS (1), /* extend_arith. */
1397 0, /* bfi. */
1398 COSTS_N_INSNS (1), /* bfx. */
1399 COSTS_N_INSNS (1), /* clz. */
1400 COSTS_N_INSNS (1), /* rev. */
1401 0, /* non_exec. */
1402 true /* non_exec_costs_exec. */
1404 /* MULT SImode */
1407 COSTS_N_INSNS (2), /* simple. */
1408 COSTS_N_INSNS (3), /* flag_setting. */
1409 COSTS_N_INSNS (2), /* extend. */
1410 COSTS_N_INSNS (3), /* add. */
1411 COSTS_N_INSNS (2), /* extend_add. */
1412 COSTS_N_INSNS (18) /* idiv. */
1414 /* MULT DImode */
1416 0, /* simple (N/A). */
1417 0, /* flag_setting (N/A). */
1418 COSTS_N_INSNS (3), /* extend. */
1419 0, /* add (N/A). */
1420 COSTS_N_INSNS (3), /* extend_add. */
1421 0 /* idiv (N/A). */
1424 /* LD/ST */
1426 COSTS_N_INSNS (3), /* load. */
1427 COSTS_N_INSNS (3), /* load_sign_extend. */
1428 COSTS_N_INSNS (3), /* ldrd. */
1429 COSTS_N_INSNS (3), /* ldm_1st. */
1430 1, /* ldm_regs_per_insn_1st. */
1431 2, /* ldm_regs_per_insn_subsequent. */
1432 COSTS_N_INSNS (3), /* loadf. */
1433 COSTS_N_INSNS (3), /* loadd. */
1434 0, /* load_unaligned. */
1435 0, /* store. */
1436 0, /* strd. */
1437 0, /* stm_1st. */
1438 1, /* stm_regs_per_insn_1st. */
1439 2, /* stm_regs_per_insn_subsequent. */
1440 COSTS_N_INSNS (2), /* storef. */
1441 COSTS_N_INSNS (2), /* stored. */
1442 0, /* store_unaligned. */
1443 COSTS_N_INSNS (1), /* loadv. */
1444 COSTS_N_INSNS (1) /* storev. */
1447 /* FP SFmode */
1449 COSTS_N_INSNS (17), /* div. */
1450 COSTS_N_INSNS (4), /* mult. */
1451 COSTS_N_INSNS (8), /* mult_addsub. */
1452 COSTS_N_INSNS (8), /* fma. */
1453 COSTS_N_INSNS (4), /* addsub. */
1454 COSTS_N_INSNS (2), /* fpconst. */
1455 COSTS_N_INSNS (2), /* neg. */
1456 COSTS_N_INSNS (2), /* compare. */
1457 COSTS_N_INSNS (4), /* widen. */
1458 COSTS_N_INSNS (4), /* narrow. */
1459 COSTS_N_INSNS (4), /* toint. */
1460 COSTS_N_INSNS (4), /* fromint. */
1461 COSTS_N_INSNS (4) /* roundint. */
1463 /* FP DFmode */
1465 COSTS_N_INSNS (31), /* div. */
1466 COSTS_N_INSNS (4), /* mult. */
1467 COSTS_N_INSNS (8), /* mult_addsub. */
1468 COSTS_N_INSNS (8), /* fma. */
1469 COSTS_N_INSNS (4), /* addsub. */
1470 COSTS_N_INSNS (2), /* fpconst. */
1471 COSTS_N_INSNS (2), /* neg. */
1472 COSTS_N_INSNS (2), /* compare. */
1473 COSTS_N_INSNS (4), /* widen. */
1474 COSTS_N_INSNS (4), /* narrow. */
1475 COSTS_N_INSNS (4), /* toint. */
1476 COSTS_N_INSNS (4), /* fromint. */
1477 COSTS_N_INSNS (4) /* roundint. */
1480 /* Vector */
1482 COSTS_N_INSNS (1) /* alu. */
1486 const struct cpu_cost_table cortexa15_extra_costs =
1488 /* ALU */
1490 0, /* arith. */
1491 0, /* logical. */
1492 0, /* shift. */
1493 0, /* shift_reg. */
1494 COSTS_N_INSNS (1), /* arith_shift. */
1495 COSTS_N_INSNS (1), /* arith_shift_reg. */
1496 COSTS_N_INSNS (1), /* log_shift. */
1497 COSTS_N_INSNS (1), /* log_shift_reg. */
1498 0, /* extend. */
1499 COSTS_N_INSNS (1), /* extend_arith. */
1500 COSTS_N_INSNS (1), /* bfi. */
1501 0, /* bfx. */
1502 0, /* clz. */
1503 0, /* rev. */
1504 0, /* non_exec. */
1505 true /* non_exec_costs_exec. */
1507 /* MULT SImode */
1510 COSTS_N_INSNS (2), /* simple. */
1511 COSTS_N_INSNS (3), /* flag_setting. */
1512 COSTS_N_INSNS (2), /* extend. */
1513 COSTS_N_INSNS (2), /* add. */
1514 COSTS_N_INSNS (2), /* extend_add. */
1515 COSTS_N_INSNS (18) /* idiv. */
1517 /* MULT DImode */
1519 0, /* simple (N/A). */
1520 0, /* flag_setting (N/A). */
1521 COSTS_N_INSNS (3), /* extend. */
1522 0, /* add (N/A). */
1523 COSTS_N_INSNS (3), /* extend_add. */
1524 0 /* idiv (N/A). */
1527 /* LD/ST */
1529 COSTS_N_INSNS (3), /* load. */
1530 COSTS_N_INSNS (3), /* load_sign_extend. */
1531 COSTS_N_INSNS (3), /* ldrd. */
1532 COSTS_N_INSNS (4), /* ldm_1st. */
1533 1, /* ldm_regs_per_insn_1st. */
1534 2, /* ldm_regs_per_insn_subsequent. */
1535 COSTS_N_INSNS (4), /* loadf. */
1536 COSTS_N_INSNS (4), /* loadd. */
1537 0, /* load_unaligned. */
1538 0, /* store. */
1539 0, /* strd. */
1540 COSTS_N_INSNS (1), /* stm_1st. */
1541 1, /* stm_regs_per_insn_1st. */
1542 2, /* stm_regs_per_insn_subsequent. */
1543 0, /* storef. */
1544 0, /* stored. */
1545 0, /* store_unaligned. */
1546 COSTS_N_INSNS (1), /* loadv. */
1547 COSTS_N_INSNS (1) /* storev. */
1550 /* FP SFmode */
1552 COSTS_N_INSNS (17), /* div. */
1553 COSTS_N_INSNS (4), /* mult. */
1554 COSTS_N_INSNS (8), /* mult_addsub. */
1555 COSTS_N_INSNS (8), /* fma. */
1556 COSTS_N_INSNS (4), /* addsub. */
1557 COSTS_N_INSNS (2), /* fpconst. */
1558 COSTS_N_INSNS (2), /* neg. */
1559 COSTS_N_INSNS (5), /* compare. */
1560 COSTS_N_INSNS (4), /* widen. */
1561 COSTS_N_INSNS (4), /* narrow. */
1562 COSTS_N_INSNS (4), /* toint. */
1563 COSTS_N_INSNS (4), /* fromint. */
1564 COSTS_N_INSNS (4) /* roundint. */
1566 /* FP DFmode */
1568 COSTS_N_INSNS (31), /* div. */
1569 COSTS_N_INSNS (4), /* mult. */
1570 COSTS_N_INSNS (8), /* mult_addsub. */
1571 COSTS_N_INSNS (8), /* fma. */
1572 COSTS_N_INSNS (4), /* addsub. */
1573 COSTS_N_INSNS (2), /* fpconst. */
1574 COSTS_N_INSNS (2), /* neg. */
1575 COSTS_N_INSNS (2), /* compare. */
1576 COSTS_N_INSNS (4), /* widen. */
1577 COSTS_N_INSNS (4), /* narrow. */
1578 COSTS_N_INSNS (4), /* toint. */
1579 COSTS_N_INSNS (4), /* fromint. */
1580 COSTS_N_INSNS (4) /* roundint. */
1583 /* Vector */
1585 COSTS_N_INSNS (1) /* alu. */
1589 const struct cpu_cost_table v7m_extra_costs =
1591 /* ALU */
1593 0, /* arith. */
1594 0, /* logical. */
1595 0, /* shift. */
1596 0, /* shift_reg. */
1597 0, /* arith_shift. */
1598 COSTS_N_INSNS (1), /* arith_shift_reg. */
1599 0, /* log_shift. */
1600 COSTS_N_INSNS (1), /* log_shift_reg. */
1601 0, /* extend. */
1602 COSTS_N_INSNS (1), /* extend_arith. */
1603 0, /* bfi. */
1604 0, /* bfx. */
1605 0, /* clz. */
1606 0, /* rev. */
1607 COSTS_N_INSNS (1), /* non_exec. */
1608 false /* non_exec_costs_exec. */
1611 /* MULT SImode */
1613 COSTS_N_INSNS (1), /* simple. */
1614 COSTS_N_INSNS (1), /* flag_setting. */
1615 COSTS_N_INSNS (2), /* extend. */
1616 COSTS_N_INSNS (1), /* add. */
1617 COSTS_N_INSNS (3), /* extend_add. */
1618 COSTS_N_INSNS (8) /* idiv. */
1620 /* MULT DImode */
1622 0, /* simple (N/A). */
1623 0, /* flag_setting (N/A). */
1624 COSTS_N_INSNS (2), /* extend. */
1625 0, /* add (N/A). */
1626 COSTS_N_INSNS (3), /* extend_add. */
1627 0 /* idiv (N/A). */
1630 /* LD/ST */
1632 COSTS_N_INSNS (2), /* load. */
1633 0, /* load_sign_extend. */
1634 COSTS_N_INSNS (3), /* ldrd. */
1635 COSTS_N_INSNS (2), /* ldm_1st. */
1636 1, /* ldm_regs_per_insn_1st. */
1637 1, /* ldm_regs_per_insn_subsequent. */
1638 COSTS_N_INSNS (2), /* loadf. */
1639 COSTS_N_INSNS (3), /* loadd. */
1640 COSTS_N_INSNS (1), /* load_unaligned. */
1641 COSTS_N_INSNS (2), /* store. */
1642 COSTS_N_INSNS (3), /* strd. */
1643 COSTS_N_INSNS (2), /* stm_1st. */
1644 1, /* stm_regs_per_insn_1st. */
1645 1, /* stm_regs_per_insn_subsequent. */
1646 COSTS_N_INSNS (2), /* storef. */
1647 COSTS_N_INSNS (3), /* stored. */
1648 COSTS_N_INSNS (1), /* store_unaligned. */
1649 COSTS_N_INSNS (1), /* loadv. */
1650 COSTS_N_INSNS (1) /* storev. */
1653 /* FP SFmode */
1655 COSTS_N_INSNS (7), /* div. */
1656 COSTS_N_INSNS (2), /* mult. */
1657 COSTS_N_INSNS (5), /* mult_addsub. */
1658 COSTS_N_INSNS (3), /* fma. */
1659 COSTS_N_INSNS (1), /* addsub. */
1660 0, /* fpconst. */
1661 0, /* neg. */
1662 0, /* compare. */
1663 0, /* widen. */
1664 0, /* narrow. */
1665 0, /* toint. */
1666 0, /* fromint. */
1667 0 /* roundint. */
1669 /* FP DFmode */
1671 COSTS_N_INSNS (15), /* div. */
1672 COSTS_N_INSNS (5), /* mult. */
1673 COSTS_N_INSNS (7), /* mult_addsub. */
1674 COSTS_N_INSNS (7), /* fma. */
1675 COSTS_N_INSNS (3), /* addsub. */
1676 0, /* fpconst. */
1677 0, /* neg. */
1678 0, /* compare. */
1679 0, /* widen. */
1680 0, /* narrow. */
1681 0, /* toint. */
1682 0, /* fromint. */
1683 0 /* roundint. */
1686 /* Vector */
1688 COSTS_N_INSNS (1) /* alu. */
1692 const struct tune_params arm_slowmul_tune =
1694 arm_slowmul_rtx_costs,
1695 NULL, /* Insn extra costs. */
1696 NULL, /* Sched adj cost. */
1697 arm_default_branch_cost,
1698 &arm_default_vec_cost,
1699 3, /* Constant limit. */
1700 5, /* Max cond insns. */
1701 8, /* Memset max inline. */
1702 1, /* Issue rate. */
1703 ARM_PREFETCH_NOT_BENEFICIAL,
1704 tune_params::PREF_CONST_POOL_TRUE,
1705 tune_params::PREF_LDRD_FALSE,
1706 tune_params::LOG_OP_NON_SC_TRUE, /* Thumb. */
1707 tune_params::LOG_OP_NON_SC_TRUE, /* ARM. */
1708 tune_params::DISPARAGE_FLAGS_NEITHER,
1709 tune_params::PREF_NEON_64_FALSE,
1710 tune_params::PREF_NEON_STRINGOPS_FALSE,
1711 tune_params::FUSE_NOTHING,
1712 tune_params::SCHED_AUTOPREF_OFF
1715 const struct tune_params arm_fastmul_tune =
1717 arm_fastmul_rtx_costs,
1718 NULL, /* Insn extra costs. */
1719 NULL, /* Sched adj cost. */
1720 arm_default_branch_cost,
1721 &arm_default_vec_cost,
1722 1, /* Constant limit. */
1723 5, /* Max cond insns. */
1724 8, /* Memset max inline. */
1725 1, /* Issue rate. */
1726 ARM_PREFETCH_NOT_BENEFICIAL,
1727 tune_params::PREF_CONST_POOL_TRUE,
1728 tune_params::PREF_LDRD_FALSE,
1729 tune_params::LOG_OP_NON_SC_TRUE, /* Thumb. */
1730 tune_params::LOG_OP_NON_SC_TRUE, /* ARM. */
1731 tune_params::DISPARAGE_FLAGS_NEITHER,
1732 tune_params::PREF_NEON_64_FALSE,
1733 tune_params::PREF_NEON_STRINGOPS_FALSE,
1734 tune_params::FUSE_NOTHING,
1735 tune_params::SCHED_AUTOPREF_OFF
1738 /* StrongARM has early execution of branches, so a sequence that is worth
1739 skipping is shorter. Set max_insns_skipped to a lower value. */
1741 const struct tune_params arm_strongarm_tune =
1743 arm_fastmul_rtx_costs,
1744 NULL, /* Insn extra costs. */
1745 NULL, /* Sched adj cost. */
1746 arm_default_branch_cost,
1747 &arm_default_vec_cost,
1748 1, /* Constant limit. */
1749 3, /* Max cond insns. */
1750 8, /* Memset max inline. */
1751 1, /* Issue rate. */
1752 ARM_PREFETCH_NOT_BENEFICIAL,
1753 tune_params::PREF_CONST_POOL_TRUE,
1754 tune_params::PREF_LDRD_FALSE,
1755 tune_params::LOG_OP_NON_SC_TRUE, /* Thumb. */
1756 tune_params::LOG_OP_NON_SC_TRUE, /* ARM. */
1757 tune_params::DISPARAGE_FLAGS_NEITHER,
1758 tune_params::PREF_NEON_64_FALSE,
1759 tune_params::PREF_NEON_STRINGOPS_FALSE,
1760 tune_params::FUSE_NOTHING,
1761 tune_params::SCHED_AUTOPREF_OFF
1764 const struct tune_params arm_xscale_tune =
1766 arm_xscale_rtx_costs,
1767 NULL, /* Insn extra costs. */
1768 xscale_sched_adjust_cost,
1769 arm_default_branch_cost,
1770 &arm_default_vec_cost,
1771 2, /* Constant limit. */
1772 3, /* Max cond insns. */
1773 8, /* Memset max inline. */
1774 1, /* Issue rate. */
1775 ARM_PREFETCH_NOT_BENEFICIAL,
1776 tune_params::PREF_CONST_POOL_TRUE,
1777 tune_params::PREF_LDRD_FALSE,
1778 tune_params::LOG_OP_NON_SC_TRUE, /* Thumb. */
1779 tune_params::LOG_OP_NON_SC_TRUE, /* ARM. */
1780 tune_params::DISPARAGE_FLAGS_NEITHER,
1781 tune_params::PREF_NEON_64_FALSE,
1782 tune_params::PREF_NEON_STRINGOPS_FALSE,
1783 tune_params::FUSE_NOTHING,
1784 tune_params::SCHED_AUTOPREF_OFF
1787 const struct tune_params arm_9e_tune =
1789 arm_9e_rtx_costs,
1790 NULL, /* Insn extra costs. */
1791 NULL, /* Sched adj cost. */
1792 arm_default_branch_cost,
1793 &arm_default_vec_cost,
1794 1, /* Constant limit. */
1795 5, /* Max cond insns. */
1796 8, /* Memset max inline. */
1797 1, /* Issue rate. */
1798 ARM_PREFETCH_NOT_BENEFICIAL,
1799 tune_params::PREF_CONST_POOL_TRUE,
1800 tune_params::PREF_LDRD_FALSE,
1801 tune_params::LOG_OP_NON_SC_TRUE, /* Thumb. */
1802 tune_params::LOG_OP_NON_SC_TRUE, /* ARM. */
1803 tune_params::DISPARAGE_FLAGS_NEITHER,
1804 tune_params::PREF_NEON_64_FALSE,
1805 tune_params::PREF_NEON_STRINGOPS_FALSE,
1806 tune_params::FUSE_NOTHING,
1807 tune_params::SCHED_AUTOPREF_OFF
1810 const struct tune_params arm_marvell_pj4_tune =
1812 arm_9e_rtx_costs,
1813 NULL, /* Insn extra costs. */
1814 NULL, /* Sched adj cost. */
1815 arm_default_branch_cost,
1816 &arm_default_vec_cost,
1817 1, /* Constant limit. */
1818 5, /* Max cond insns. */
1819 8, /* Memset max inline. */
1820 2, /* Issue rate. */
1821 ARM_PREFETCH_NOT_BENEFICIAL,
1822 tune_params::PREF_CONST_POOL_TRUE,
1823 tune_params::PREF_LDRD_FALSE,
1824 tune_params::LOG_OP_NON_SC_TRUE, /* Thumb. */
1825 tune_params::LOG_OP_NON_SC_TRUE, /* ARM. */
1826 tune_params::DISPARAGE_FLAGS_NEITHER,
1827 tune_params::PREF_NEON_64_FALSE,
1828 tune_params::PREF_NEON_STRINGOPS_FALSE,
1829 tune_params::FUSE_NOTHING,
1830 tune_params::SCHED_AUTOPREF_OFF
1833 const struct tune_params arm_v6t2_tune =
1835 arm_9e_rtx_costs,
1836 NULL, /* Insn extra costs. */
1837 NULL, /* Sched adj cost. */
1838 arm_default_branch_cost,
1839 &arm_default_vec_cost,
1840 1, /* Constant limit. */
1841 5, /* Max cond insns. */
1842 8, /* Memset max inline. */
1843 1, /* Issue rate. */
1844 ARM_PREFETCH_NOT_BENEFICIAL,
1845 tune_params::PREF_CONST_POOL_FALSE,
1846 tune_params::PREF_LDRD_FALSE,
1847 tune_params::LOG_OP_NON_SC_TRUE, /* Thumb. */
1848 tune_params::LOG_OP_NON_SC_TRUE, /* ARM. */
1849 tune_params::DISPARAGE_FLAGS_NEITHER,
1850 tune_params::PREF_NEON_64_FALSE,
1851 tune_params::PREF_NEON_STRINGOPS_FALSE,
1852 tune_params::FUSE_NOTHING,
1853 tune_params::SCHED_AUTOPREF_OFF
1857 /* Generic Cortex tuning. Use more specific tunings if appropriate. */
1858 const struct tune_params arm_cortex_tune =
1860 arm_9e_rtx_costs,
1861 &generic_extra_costs,
1862 NULL, /* Sched adj cost. */
1863 arm_default_branch_cost,
1864 &arm_default_vec_cost,
1865 1, /* Constant limit. */
1866 5, /* Max cond insns. */
1867 8, /* Memset max inline. */
1868 2, /* Issue rate. */
1869 ARM_PREFETCH_NOT_BENEFICIAL,
1870 tune_params::PREF_CONST_POOL_FALSE,
1871 tune_params::PREF_LDRD_FALSE,
1872 tune_params::LOG_OP_NON_SC_TRUE, /* Thumb. */
1873 tune_params::LOG_OP_NON_SC_TRUE, /* ARM. */
1874 tune_params::DISPARAGE_FLAGS_NEITHER,
1875 tune_params::PREF_NEON_64_FALSE,
1876 tune_params::PREF_NEON_STRINGOPS_FALSE,
1877 tune_params::FUSE_NOTHING,
1878 tune_params::SCHED_AUTOPREF_OFF
1881 const struct tune_params arm_cortex_a8_tune =
1883 arm_9e_rtx_costs,
1884 &cortexa8_extra_costs,
1885 NULL, /* Sched adj cost. */
1886 arm_default_branch_cost,
1887 &arm_default_vec_cost,
1888 1, /* Constant limit. */
1889 5, /* Max cond insns. */
1890 8, /* Memset max inline. */
1891 2, /* Issue rate. */
1892 ARM_PREFETCH_NOT_BENEFICIAL,
1893 tune_params::PREF_CONST_POOL_FALSE,
1894 tune_params::PREF_LDRD_FALSE,
1895 tune_params::LOG_OP_NON_SC_TRUE, /* Thumb. */
1896 tune_params::LOG_OP_NON_SC_TRUE, /* ARM. */
1897 tune_params::DISPARAGE_FLAGS_NEITHER,
1898 tune_params::PREF_NEON_64_FALSE,
1899 tune_params::PREF_NEON_STRINGOPS_TRUE,
1900 tune_params::FUSE_NOTHING,
1901 tune_params::SCHED_AUTOPREF_OFF
1904 const struct tune_params arm_cortex_a7_tune =
1906 arm_9e_rtx_costs,
1907 &cortexa7_extra_costs,
1908 NULL, /* Sched adj cost. */
1909 arm_default_branch_cost,
1910 &arm_default_vec_cost,
1911 1, /* Constant limit. */
1912 5, /* Max cond insns. */
1913 8, /* Memset max inline. */
1914 2, /* Issue rate. */
1915 ARM_PREFETCH_NOT_BENEFICIAL,
1916 tune_params::PREF_CONST_POOL_FALSE,
1917 tune_params::PREF_LDRD_FALSE,
1918 tune_params::LOG_OP_NON_SC_TRUE, /* Thumb. */
1919 tune_params::LOG_OP_NON_SC_TRUE, /* ARM. */
1920 tune_params::DISPARAGE_FLAGS_NEITHER,
1921 tune_params::PREF_NEON_64_FALSE,
1922 tune_params::PREF_NEON_STRINGOPS_TRUE,
1923 tune_params::FUSE_NOTHING,
1924 tune_params::SCHED_AUTOPREF_OFF
1927 const struct tune_params arm_cortex_a15_tune =
1929 arm_9e_rtx_costs,
1930 &cortexa15_extra_costs,
1931 NULL, /* Sched adj cost. */
1932 arm_default_branch_cost,
1933 &arm_default_vec_cost,
1934 1, /* Constant limit. */
1935 2, /* Max cond insns. */
1936 8, /* Memset max inline. */
1937 3, /* Issue rate. */
1938 ARM_PREFETCH_NOT_BENEFICIAL,
1939 tune_params::PREF_CONST_POOL_FALSE,
1940 tune_params::PREF_LDRD_TRUE,
1941 tune_params::LOG_OP_NON_SC_TRUE, /* Thumb. */
1942 tune_params::LOG_OP_NON_SC_TRUE, /* ARM. */
1943 tune_params::DISPARAGE_FLAGS_ALL,
1944 tune_params::PREF_NEON_64_FALSE,
1945 tune_params::PREF_NEON_STRINGOPS_TRUE,
1946 tune_params::FUSE_NOTHING,
1947 tune_params::SCHED_AUTOPREF_FULL
1950 const struct tune_params arm_cortex_a53_tune =
1952 arm_9e_rtx_costs,
1953 &cortexa53_extra_costs,
1954 NULL, /* Sched adj cost. */
1955 arm_default_branch_cost,
1956 &arm_default_vec_cost,
1957 1, /* Constant limit. */
1958 5, /* Max cond insns. */
1959 8, /* Memset max inline. */
1960 2, /* Issue rate. */
1961 ARM_PREFETCH_NOT_BENEFICIAL,
1962 tune_params::PREF_CONST_POOL_FALSE,
1963 tune_params::PREF_LDRD_FALSE,
1964 tune_params::LOG_OP_NON_SC_TRUE, /* Thumb. */
1965 tune_params::LOG_OP_NON_SC_TRUE, /* ARM. */
1966 tune_params::DISPARAGE_FLAGS_NEITHER,
1967 tune_params::PREF_NEON_64_FALSE,
1968 tune_params::PREF_NEON_STRINGOPS_TRUE,
1969 FUSE_OPS (tune_params::FUSE_MOVW_MOVT),
1970 tune_params::SCHED_AUTOPREF_OFF
1973 const struct tune_params arm_cortex_a57_tune =
1975 arm_9e_rtx_costs,
1976 &cortexa57_extra_costs,
1977 NULL, /* Sched adj cost. */
1978 arm_default_branch_cost,
1979 &arm_default_vec_cost,
1980 1, /* Constant limit. */
1981 2, /* Max cond insns. */
1982 8, /* Memset max inline. */
1983 3, /* Issue rate. */
1984 ARM_PREFETCH_NOT_BENEFICIAL,
1985 tune_params::PREF_CONST_POOL_FALSE,
1986 tune_params::PREF_LDRD_TRUE,
1987 tune_params::LOG_OP_NON_SC_TRUE, /* Thumb. */
1988 tune_params::LOG_OP_NON_SC_TRUE, /* ARM. */
1989 tune_params::DISPARAGE_FLAGS_ALL,
1990 tune_params::PREF_NEON_64_FALSE,
1991 tune_params::PREF_NEON_STRINGOPS_TRUE,
1992 FUSE_OPS (tune_params::FUSE_MOVW_MOVT),
1993 tune_params::SCHED_AUTOPREF_FULL
1996 const struct tune_params arm_xgene1_tune =
1998 arm_9e_rtx_costs,
1999 &xgene1_extra_costs,
2000 NULL, /* Sched adj cost. */
2001 arm_default_branch_cost,
2002 &arm_default_vec_cost,
2003 1, /* Constant limit. */
2004 2, /* Max cond insns. */
2005 32, /* Memset max inline. */
2006 4, /* Issue rate. */
2007 ARM_PREFETCH_NOT_BENEFICIAL,
2008 tune_params::PREF_CONST_POOL_FALSE,
2009 tune_params::PREF_LDRD_TRUE,
2010 tune_params::LOG_OP_NON_SC_TRUE, /* Thumb. */
2011 tune_params::LOG_OP_NON_SC_TRUE, /* ARM. */
2012 tune_params::DISPARAGE_FLAGS_ALL,
2013 tune_params::PREF_NEON_64_FALSE,
2014 tune_params::PREF_NEON_STRINGOPS_FALSE,
2015 tune_params::FUSE_NOTHING,
2016 tune_params::SCHED_AUTOPREF_OFF
2019 /* Branches can be dual-issued on Cortex-A5, so conditional execution is
2020 less appealing. Set max_insns_skipped to a low value. */
2022 const struct tune_params arm_cortex_a5_tune =
2024 arm_9e_rtx_costs,
2025 &cortexa5_extra_costs,
2026 NULL, /* Sched adj cost. */
2027 arm_cortex_a5_branch_cost,
2028 &arm_default_vec_cost,
2029 1, /* Constant limit. */
2030 1, /* Max cond insns. */
2031 8, /* Memset max inline. */
2032 2, /* Issue rate. */
2033 ARM_PREFETCH_NOT_BENEFICIAL,
2034 tune_params::PREF_CONST_POOL_FALSE,
2035 tune_params::PREF_LDRD_FALSE,
2036 tune_params::LOG_OP_NON_SC_FALSE, /* Thumb. */
2037 tune_params::LOG_OP_NON_SC_FALSE, /* ARM. */
2038 tune_params::DISPARAGE_FLAGS_NEITHER,
2039 tune_params::PREF_NEON_64_FALSE,
2040 tune_params::PREF_NEON_STRINGOPS_TRUE,
2041 tune_params::FUSE_NOTHING,
2042 tune_params::SCHED_AUTOPREF_OFF
2045 const struct tune_params arm_cortex_a9_tune =
2047 arm_9e_rtx_costs,
2048 &cortexa9_extra_costs,
2049 cortex_a9_sched_adjust_cost,
2050 arm_default_branch_cost,
2051 &arm_default_vec_cost,
2052 1, /* Constant limit. */
2053 5, /* Max cond insns. */
2054 8, /* Memset max inline. */
2055 2, /* Issue rate. */
2056 ARM_PREFETCH_BENEFICIAL(4,32,32),
2057 tune_params::PREF_CONST_POOL_FALSE,
2058 tune_params::PREF_LDRD_FALSE,
2059 tune_params::LOG_OP_NON_SC_TRUE, /* Thumb. */
2060 tune_params::LOG_OP_NON_SC_TRUE, /* ARM. */
2061 tune_params::DISPARAGE_FLAGS_NEITHER,
2062 tune_params::PREF_NEON_64_FALSE,
2063 tune_params::PREF_NEON_STRINGOPS_FALSE,
2064 tune_params::FUSE_NOTHING,
2065 tune_params::SCHED_AUTOPREF_OFF
2068 const struct tune_params arm_cortex_a12_tune =
2070 arm_9e_rtx_costs,
2071 &cortexa12_extra_costs,
2072 NULL, /* Sched adj cost. */
2073 arm_default_branch_cost,
2074 &arm_default_vec_cost, /* Vectorizer costs. */
2075 1, /* Constant limit. */
2076 2, /* Max cond insns. */
2077 8, /* Memset max inline. */
2078 2, /* Issue rate. */
2079 ARM_PREFETCH_NOT_BENEFICIAL,
2080 tune_params::PREF_CONST_POOL_FALSE,
2081 tune_params::PREF_LDRD_TRUE,
2082 tune_params::LOG_OP_NON_SC_TRUE, /* Thumb. */
2083 tune_params::LOG_OP_NON_SC_TRUE, /* ARM. */
2084 tune_params::DISPARAGE_FLAGS_ALL,
2085 tune_params::PREF_NEON_64_FALSE,
2086 tune_params::PREF_NEON_STRINGOPS_TRUE,
2087 FUSE_OPS (tune_params::FUSE_MOVW_MOVT),
2088 tune_params::SCHED_AUTOPREF_OFF
2091 /* armv7m tuning. On Cortex-M4 cores for example, MOVW/MOVT take a single
2092 cycle to execute each. An LDR from the constant pool also takes two cycles
2093 to execute, but mildly increases pipelining opportunity (consecutive
2094 loads/stores can be pipelined together, saving one cycle), and may also
2095 improve icache utilisation. Hence we prefer the constant pool for such
2096 processors. */
2098 const struct tune_params arm_v7m_tune =
2100 arm_9e_rtx_costs,
2101 &v7m_extra_costs,
2102 NULL, /* Sched adj cost. */
2103 arm_cortex_m_branch_cost,
2104 &arm_default_vec_cost,
2105 1, /* Constant limit. */
2106 2, /* Max cond insns. */
2107 8, /* Memset max inline. */
2108 1, /* Issue rate. */
2109 ARM_PREFETCH_NOT_BENEFICIAL,
2110 tune_params::PREF_CONST_POOL_TRUE,
2111 tune_params::PREF_LDRD_FALSE,
2112 tune_params::LOG_OP_NON_SC_FALSE, /* Thumb. */
2113 tune_params::LOG_OP_NON_SC_FALSE, /* ARM. */
2114 tune_params::DISPARAGE_FLAGS_NEITHER,
2115 tune_params::PREF_NEON_64_FALSE,
2116 tune_params::PREF_NEON_STRINGOPS_FALSE,
2117 tune_params::FUSE_NOTHING,
2118 tune_params::SCHED_AUTOPREF_OFF
2121 /* Cortex-M7 tuning. */
2123 const struct tune_params arm_cortex_m7_tune =
2125 arm_9e_rtx_costs,
2126 &v7m_extra_costs,
2127 NULL, /* Sched adj cost. */
2128 arm_cortex_m7_branch_cost,
2129 &arm_default_vec_cost,
2130 0, /* Constant limit. */
2131 1, /* Max cond insns. */
2132 8, /* Memset max inline. */
2133 2, /* Issue rate. */
2134 ARM_PREFETCH_NOT_BENEFICIAL,
2135 tune_params::PREF_CONST_POOL_TRUE,
2136 tune_params::PREF_LDRD_FALSE,
2137 tune_params::LOG_OP_NON_SC_TRUE, /* Thumb. */
2138 tune_params::LOG_OP_NON_SC_TRUE, /* ARM. */
2139 tune_params::DISPARAGE_FLAGS_NEITHER,
2140 tune_params::PREF_NEON_64_FALSE,
2141 tune_params::PREF_NEON_STRINGOPS_FALSE,
2142 tune_params::FUSE_NOTHING,
2143 tune_params::SCHED_AUTOPREF_OFF
2146 /* The arm_v6m_tune is duplicated from arm_cortex_tune, rather than
2147 arm_v6t2_tune. It is used for cortex-m0, cortex-m1 and cortex-m0plus. */
2148 const struct tune_params arm_v6m_tune =
2150 arm_9e_rtx_costs,
2151 NULL, /* Insn extra costs. */
2152 NULL, /* Sched adj cost. */
2153 arm_default_branch_cost,
2154 &arm_default_vec_cost, /* Vectorizer costs. */
2155 1, /* Constant limit. */
2156 5, /* Max cond insns. */
2157 8, /* Memset max inline. */
2158 1, /* Issue rate. */
2159 ARM_PREFETCH_NOT_BENEFICIAL,
2160 tune_params::PREF_CONST_POOL_FALSE,
2161 tune_params::PREF_LDRD_FALSE,
2162 tune_params::LOG_OP_NON_SC_FALSE, /* Thumb. */
2163 tune_params::LOG_OP_NON_SC_FALSE, /* ARM. */
2164 tune_params::DISPARAGE_FLAGS_NEITHER,
2165 tune_params::PREF_NEON_64_FALSE,
2166 tune_params::PREF_NEON_STRINGOPS_FALSE,
2167 tune_params::FUSE_NOTHING,
2168 tune_params::SCHED_AUTOPREF_OFF
2171 const struct tune_params arm_fa726te_tune =
2173 arm_9e_rtx_costs,
2174 NULL, /* Insn extra costs. */
2175 fa726te_sched_adjust_cost,
2176 arm_default_branch_cost,
2177 &arm_default_vec_cost,
2178 1, /* Constant limit. */
2179 5, /* Max cond insns. */
2180 8, /* Memset max inline. */
2181 2, /* Issue rate. */
2182 ARM_PREFETCH_NOT_BENEFICIAL,
2183 tune_params::PREF_CONST_POOL_TRUE,
2184 tune_params::PREF_LDRD_FALSE,
2185 tune_params::LOG_OP_NON_SC_TRUE, /* Thumb. */
2186 tune_params::LOG_OP_NON_SC_TRUE, /* ARM. */
2187 tune_params::DISPARAGE_FLAGS_NEITHER,
2188 tune_params::PREF_NEON_64_FALSE,
2189 tune_params::PREF_NEON_STRINGOPS_FALSE,
2190 tune_params::FUSE_NOTHING,
2191 tune_params::SCHED_AUTOPREF_OFF
2195 /* Not all of these give usefully different compilation alternatives,
2196 but there is no simple way of generalizing them. */
2197 static const struct processors all_cores[] =
2199 /* ARM Cores */
2200 #define ARM_CORE(NAME, X, IDENT, ARCH, FLAGS, COSTS) \
2201 {NAME, IDENT, #ARCH, BASE_ARCH_##ARCH, \
2202 FLAGS | FL_FOR_ARCH##ARCH, &arm_##COSTS##_tune},
2203 #include "arm-cores.def"
2204 #undef ARM_CORE
2205 {NULL, arm_none, NULL, BASE_ARCH_0, 0, NULL}
2208 static const struct processors all_architectures[] =
2210 /* ARM Architectures */
2211 /* We don't specify tuning costs here as it will be figured out
2212 from the core. */
2214 #define ARM_ARCH(NAME, CORE, ARCH, FLAGS) \
2215 {NAME, CORE, #ARCH, BASE_ARCH_##ARCH, FLAGS, NULL},
2216 #include "arm-arches.def"
2217 #undef ARM_ARCH
2218 {NULL, arm_none, NULL, BASE_ARCH_0, 0, NULL}
2222 /* These are populated as commandline arguments are processed, or NULL
2223 if not specified. */
2224 static const struct processors *arm_selected_arch;
2225 static const struct processors *arm_selected_cpu;
2226 static const struct processors *arm_selected_tune;
2228 /* The name of the preprocessor macro to define for this architecture. */
2230 char arm_arch_name[] = "__ARM_ARCH_0UNK__";
2232 /* Available values for -mfpu=. */
2234 static const struct arm_fpu_desc all_fpus[] =
2236 #define ARM_FPU(NAME, MODEL, REV, VFP_REGS, NEON, FP16, CRYPTO) \
2237 { NAME, MODEL, REV, VFP_REGS, NEON, FP16, CRYPTO },
2238 #include "arm-fpus.def"
2239 #undef ARM_FPU
2243 /* Supported TLS relocations. */
2245 enum tls_reloc {
2246 TLS_GD32,
2247 TLS_LDM32,
2248 TLS_LDO32,
2249 TLS_IE32,
2250 TLS_LE32,
2251 TLS_DESCSEQ /* GNU scheme */
2254 /* The maximum number of insns to be used when loading a constant. */
2255 inline static int
2256 arm_constant_limit (bool size_p)
2258 return size_p ? 1 : current_tune->constant_limit;
2261 /* Emit an insn that's a simple single-set. Both the operands must be known
2262 to be valid. */
2263 inline static rtx_insn *
2264 emit_set_insn (rtx x, rtx y)
2266 return emit_insn (gen_rtx_SET (x, y));
2269 /* Return the number of bits set in VALUE. */
2270 static unsigned
2271 bit_count (unsigned long value)
2273 unsigned long count = 0;
2275 while (value)
2277 count++;
2278 value &= value - 1; /* Clear the least-significant set bit. */
2281 return count;
2284 typedef struct
2286 machine_mode mode;
2287 const char *name;
2288 } arm_fixed_mode_set;
2290 /* A small helper for setting fixed-point library libfuncs. */
2292 static void
2293 arm_set_fixed_optab_libfunc (optab optable, machine_mode mode,
2294 const char *funcname, const char *modename,
2295 int num_suffix)
2297 char buffer[50];
2299 if (num_suffix == 0)
2300 sprintf (buffer, "__gnu_%s%s", funcname, modename);
2301 else
2302 sprintf (buffer, "__gnu_%s%s%d", funcname, modename, num_suffix);
2304 set_optab_libfunc (optable, mode, buffer);
2307 static void
2308 arm_set_fixed_conv_libfunc (convert_optab optable, machine_mode to,
2309 machine_mode from, const char *funcname,
2310 const char *toname, const char *fromname)
2312 char buffer[50];
2313 const char *maybe_suffix_2 = "";
2315 /* Follow the logic for selecting a "2" suffix in fixed-bit.h. */
2316 if (ALL_FIXED_POINT_MODE_P (from) && ALL_FIXED_POINT_MODE_P (to)
2317 && UNSIGNED_FIXED_POINT_MODE_P (from) == UNSIGNED_FIXED_POINT_MODE_P (to)
2318 && ALL_FRACT_MODE_P (from) == ALL_FRACT_MODE_P (to))
2319 maybe_suffix_2 = "2";
2321 sprintf (buffer, "__gnu_%s%s%s%s", funcname, fromname, toname,
2322 maybe_suffix_2);
2324 set_conv_libfunc (optable, to, from, buffer);
2327 /* Set up library functions unique to ARM. */
2329 static void
2330 arm_init_libfuncs (void)
2332 /* For Linux, we have access to kernel support for atomic operations. */
2333 if (arm_abi == ARM_ABI_AAPCS_LINUX)
2334 init_sync_libfuncs (MAX_SYNC_LIBFUNC_SIZE);
2336 /* There are no special library functions unless we are using the
2337 ARM BPABI. */
2338 if (!TARGET_BPABI)
2339 return;
2341 /* The functions below are described in Section 4 of the "Run-Time
2342 ABI for the ARM architecture", Version 1.0. */
2344 /* Double-precision floating-point arithmetic. Table 2. */
2345 set_optab_libfunc (add_optab, DFmode, "__aeabi_dadd");
2346 set_optab_libfunc (sdiv_optab, DFmode, "__aeabi_ddiv");
2347 set_optab_libfunc (smul_optab, DFmode, "__aeabi_dmul");
2348 set_optab_libfunc (neg_optab, DFmode, "__aeabi_dneg");
2349 set_optab_libfunc (sub_optab, DFmode, "__aeabi_dsub");
2351 /* Double-precision comparisons. Table 3. */
2352 set_optab_libfunc (eq_optab, DFmode, "__aeabi_dcmpeq");
2353 set_optab_libfunc (ne_optab, DFmode, NULL);
2354 set_optab_libfunc (lt_optab, DFmode, "__aeabi_dcmplt");
2355 set_optab_libfunc (le_optab, DFmode, "__aeabi_dcmple");
2356 set_optab_libfunc (ge_optab, DFmode, "__aeabi_dcmpge");
2357 set_optab_libfunc (gt_optab, DFmode, "__aeabi_dcmpgt");
2358 set_optab_libfunc (unord_optab, DFmode, "__aeabi_dcmpun");
2360 /* Single-precision floating-point arithmetic. Table 4. */
2361 set_optab_libfunc (add_optab, SFmode, "__aeabi_fadd");
2362 set_optab_libfunc (sdiv_optab, SFmode, "__aeabi_fdiv");
2363 set_optab_libfunc (smul_optab, SFmode, "__aeabi_fmul");
2364 set_optab_libfunc (neg_optab, SFmode, "__aeabi_fneg");
2365 set_optab_libfunc (sub_optab, SFmode, "__aeabi_fsub");
2367 /* Single-precision comparisons. Table 5. */
2368 set_optab_libfunc (eq_optab, SFmode, "__aeabi_fcmpeq");
2369 set_optab_libfunc (ne_optab, SFmode, NULL);
2370 set_optab_libfunc (lt_optab, SFmode, "__aeabi_fcmplt");
2371 set_optab_libfunc (le_optab, SFmode, "__aeabi_fcmple");
2372 set_optab_libfunc (ge_optab, SFmode, "__aeabi_fcmpge");
2373 set_optab_libfunc (gt_optab, SFmode, "__aeabi_fcmpgt");
2374 set_optab_libfunc (unord_optab, SFmode, "__aeabi_fcmpun");
2376 /* Floating-point to integer conversions. Table 6. */
2377 set_conv_libfunc (sfix_optab, SImode, DFmode, "__aeabi_d2iz");
2378 set_conv_libfunc (ufix_optab, SImode, DFmode, "__aeabi_d2uiz");
2379 set_conv_libfunc (sfix_optab, DImode, DFmode, "__aeabi_d2lz");
2380 set_conv_libfunc (ufix_optab, DImode, DFmode, "__aeabi_d2ulz");
2381 set_conv_libfunc (sfix_optab, SImode, SFmode, "__aeabi_f2iz");
2382 set_conv_libfunc (ufix_optab, SImode, SFmode, "__aeabi_f2uiz");
2383 set_conv_libfunc (sfix_optab, DImode, SFmode, "__aeabi_f2lz");
2384 set_conv_libfunc (ufix_optab, DImode, SFmode, "__aeabi_f2ulz");
2386 /* Conversions between floating types. Table 7. */
2387 set_conv_libfunc (trunc_optab, SFmode, DFmode, "__aeabi_d2f");
2388 set_conv_libfunc (sext_optab, DFmode, SFmode, "__aeabi_f2d");
2390 /* Integer to floating-point conversions. Table 8. */
2391 set_conv_libfunc (sfloat_optab, DFmode, SImode, "__aeabi_i2d");
2392 set_conv_libfunc (ufloat_optab, DFmode, SImode, "__aeabi_ui2d");
2393 set_conv_libfunc (sfloat_optab, DFmode, DImode, "__aeabi_l2d");
2394 set_conv_libfunc (ufloat_optab, DFmode, DImode, "__aeabi_ul2d");
2395 set_conv_libfunc (sfloat_optab, SFmode, SImode, "__aeabi_i2f");
2396 set_conv_libfunc (ufloat_optab, SFmode, SImode, "__aeabi_ui2f");
2397 set_conv_libfunc (sfloat_optab, SFmode, DImode, "__aeabi_l2f");
2398 set_conv_libfunc (ufloat_optab, SFmode, DImode, "__aeabi_ul2f");
2400 /* Long long. Table 9. */
2401 set_optab_libfunc (smul_optab, DImode, "__aeabi_lmul");
2402 set_optab_libfunc (sdivmod_optab, DImode, "__aeabi_ldivmod");
2403 set_optab_libfunc (udivmod_optab, DImode, "__aeabi_uldivmod");
2404 set_optab_libfunc (ashl_optab, DImode, "__aeabi_llsl");
2405 set_optab_libfunc (lshr_optab, DImode, "__aeabi_llsr");
2406 set_optab_libfunc (ashr_optab, DImode, "__aeabi_lasr");
2407 set_optab_libfunc (cmp_optab, DImode, "__aeabi_lcmp");
2408 set_optab_libfunc (ucmp_optab, DImode, "__aeabi_ulcmp");
2410 /* Integer (32/32->32) division. \S 4.3.1. */
2411 set_optab_libfunc (sdivmod_optab, SImode, "__aeabi_idivmod");
2412 set_optab_libfunc (udivmod_optab, SImode, "__aeabi_uidivmod");
2414 /* The divmod functions are designed so that they can be used for
2415 plain division, even though they return both the quotient and the
2416 remainder. The quotient is returned in the usual location (i.e.,
2417 r0 for SImode, {r0, r1} for DImode), just as would be expected
2418 for an ordinary division routine. Because the AAPCS calling
2419 conventions specify that all of { r0, r1, r2, r3 } are
2420 callee-saved registers, there is no need to tell the compiler
2421 explicitly that those registers are clobbered by these
2422 routines. */
2423 set_optab_libfunc (sdiv_optab, DImode, "__aeabi_ldivmod");
2424 set_optab_libfunc (udiv_optab, DImode, "__aeabi_uldivmod");
2426 /* For SImode division the ABI provides div-without-mod routines,
2427 which are faster. */
2428 set_optab_libfunc (sdiv_optab, SImode, "__aeabi_idiv");
2429 set_optab_libfunc (udiv_optab, SImode, "__aeabi_uidiv");
2431 /* We don't have mod libcalls. Fortunately gcc knows how to use the
2432 divmod libcalls instead. */
2433 set_optab_libfunc (smod_optab, DImode, NULL);
2434 set_optab_libfunc (umod_optab, DImode, NULL);
2435 set_optab_libfunc (smod_optab, SImode, NULL);
2436 set_optab_libfunc (umod_optab, SImode, NULL);
2438 /* Half-precision float operations. The compiler handles all operations
2439 with NULL libfuncs by converting the SFmode. */
2440 switch (arm_fp16_format)
2442 case ARM_FP16_FORMAT_IEEE:
2443 case ARM_FP16_FORMAT_ALTERNATIVE:
2445 /* Conversions. */
2446 set_conv_libfunc (trunc_optab, HFmode, SFmode,
2447 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2448 ? "__gnu_f2h_ieee"
2449 : "__gnu_f2h_alternative"));
2450 set_conv_libfunc (sext_optab, SFmode, HFmode,
2451 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2452 ? "__gnu_h2f_ieee"
2453 : "__gnu_h2f_alternative"));
2455 /* Arithmetic. */
2456 set_optab_libfunc (add_optab, HFmode, NULL);
2457 set_optab_libfunc (sdiv_optab, HFmode, NULL);
2458 set_optab_libfunc (smul_optab, HFmode, NULL);
2459 set_optab_libfunc (neg_optab, HFmode, NULL);
2460 set_optab_libfunc (sub_optab, HFmode, NULL);
2462 /* Comparisons. */
2463 set_optab_libfunc (eq_optab, HFmode, NULL);
2464 set_optab_libfunc (ne_optab, HFmode, NULL);
2465 set_optab_libfunc (lt_optab, HFmode, NULL);
2466 set_optab_libfunc (le_optab, HFmode, NULL);
2467 set_optab_libfunc (ge_optab, HFmode, NULL);
2468 set_optab_libfunc (gt_optab, HFmode, NULL);
2469 set_optab_libfunc (unord_optab, HFmode, NULL);
2470 break;
2472 default:
2473 break;
2476 /* Use names prefixed with __gnu_ for fixed-point helper functions. */
2478 const arm_fixed_mode_set fixed_arith_modes[] =
2480 { QQmode, "qq" },
2481 { UQQmode, "uqq" },
2482 { HQmode, "hq" },
2483 { UHQmode, "uhq" },
2484 { SQmode, "sq" },
2485 { USQmode, "usq" },
2486 { DQmode, "dq" },
2487 { UDQmode, "udq" },
2488 { TQmode, "tq" },
2489 { UTQmode, "utq" },
2490 { HAmode, "ha" },
2491 { UHAmode, "uha" },
2492 { SAmode, "sa" },
2493 { USAmode, "usa" },
2494 { DAmode, "da" },
2495 { UDAmode, "uda" },
2496 { TAmode, "ta" },
2497 { UTAmode, "uta" }
2499 const arm_fixed_mode_set fixed_conv_modes[] =
2501 { QQmode, "qq" },
2502 { UQQmode, "uqq" },
2503 { HQmode, "hq" },
2504 { UHQmode, "uhq" },
2505 { SQmode, "sq" },
2506 { USQmode, "usq" },
2507 { DQmode, "dq" },
2508 { UDQmode, "udq" },
2509 { TQmode, "tq" },
2510 { UTQmode, "utq" },
2511 { HAmode, "ha" },
2512 { UHAmode, "uha" },
2513 { SAmode, "sa" },
2514 { USAmode, "usa" },
2515 { DAmode, "da" },
2516 { UDAmode, "uda" },
2517 { TAmode, "ta" },
2518 { UTAmode, "uta" },
2519 { QImode, "qi" },
2520 { HImode, "hi" },
2521 { SImode, "si" },
2522 { DImode, "di" },
2523 { TImode, "ti" },
2524 { SFmode, "sf" },
2525 { DFmode, "df" }
2527 unsigned int i, j;
2529 for (i = 0; i < ARRAY_SIZE (fixed_arith_modes); i++)
2531 arm_set_fixed_optab_libfunc (add_optab, fixed_arith_modes[i].mode,
2532 "add", fixed_arith_modes[i].name, 3);
2533 arm_set_fixed_optab_libfunc (ssadd_optab, fixed_arith_modes[i].mode,
2534 "ssadd", fixed_arith_modes[i].name, 3);
2535 arm_set_fixed_optab_libfunc (usadd_optab, fixed_arith_modes[i].mode,
2536 "usadd", fixed_arith_modes[i].name, 3);
2537 arm_set_fixed_optab_libfunc (sub_optab, fixed_arith_modes[i].mode,
2538 "sub", fixed_arith_modes[i].name, 3);
2539 arm_set_fixed_optab_libfunc (sssub_optab, fixed_arith_modes[i].mode,
2540 "sssub", fixed_arith_modes[i].name, 3);
2541 arm_set_fixed_optab_libfunc (ussub_optab, fixed_arith_modes[i].mode,
2542 "ussub", fixed_arith_modes[i].name, 3);
2543 arm_set_fixed_optab_libfunc (smul_optab, fixed_arith_modes[i].mode,
2544 "mul", fixed_arith_modes[i].name, 3);
2545 arm_set_fixed_optab_libfunc (ssmul_optab, fixed_arith_modes[i].mode,
2546 "ssmul", fixed_arith_modes[i].name, 3);
2547 arm_set_fixed_optab_libfunc (usmul_optab, fixed_arith_modes[i].mode,
2548 "usmul", fixed_arith_modes[i].name, 3);
2549 arm_set_fixed_optab_libfunc (sdiv_optab, fixed_arith_modes[i].mode,
2550 "div", fixed_arith_modes[i].name, 3);
2551 arm_set_fixed_optab_libfunc (udiv_optab, fixed_arith_modes[i].mode,
2552 "udiv", fixed_arith_modes[i].name, 3);
2553 arm_set_fixed_optab_libfunc (ssdiv_optab, fixed_arith_modes[i].mode,
2554 "ssdiv", fixed_arith_modes[i].name, 3);
2555 arm_set_fixed_optab_libfunc (usdiv_optab, fixed_arith_modes[i].mode,
2556 "usdiv", fixed_arith_modes[i].name, 3);
2557 arm_set_fixed_optab_libfunc (neg_optab, fixed_arith_modes[i].mode,
2558 "neg", fixed_arith_modes[i].name, 2);
2559 arm_set_fixed_optab_libfunc (ssneg_optab, fixed_arith_modes[i].mode,
2560 "ssneg", fixed_arith_modes[i].name, 2);
2561 arm_set_fixed_optab_libfunc (usneg_optab, fixed_arith_modes[i].mode,
2562 "usneg", fixed_arith_modes[i].name, 2);
2563 arm_set_fixed_optab_libfunc (ashl_optab, fixed_arith_modes[i].mode,
2564 "ashl", fixed_arith_modes[i].name, 3);
2565 arm_set_fixed_optab_libfunc (ashr_optab, fixed_arith_modes[i].mode,
2566 "ashr", fixed_arith_modes[i].name, 3);
2567 arm_set_fixed_optab_libfunc (lshr_optab, fixed_arith_modes[i].mode,
2568 "lshr", fixed_arith_modes[i].name, 3);
2569 arm_set_fixed_optab_libfunc (ssashl_optab, fixed_arith_modes[i].mode,
2570 "ssashl", fixed_arith_modes[i].name, 3);
2571 arm_set_fixed_optab_libfunc (usashl_optab, fixed_arith_modes[i].mode,
2572 "usashl", fixed_arith_modes[i].name, 3);
2573 arm_set_fixed_optab_libfunc (cmp_optab, fixed_arith_modes[i].mode,
2574 "cmp", fixed_arith_modes[i].name, 2);
2577 for (i = 0; i < ARRAY_SIZE (fixed_conv_modes); i++)
2578 for (j = 0; j < ARRAY_SIZE (fixed_conv_modes); j++)
2580 if (i == j
2581 || (!ALL_FIXED_POINT_MODE_P (fixed_conv_modes[i].mode)
2582 && !ALL_FIXED_POINT_MODE_P (fixed_conv_modes[j].mode)))
2583 continue;
2585 arm_set_fixed_conv_libfunc (fract_optab, fixed_conv_modes[i].mode,
2586 fixed_conv_modes[j].mode, "fract",
2587 fixed_conv_modes[i].name,
2588 fixed_conv_modes[j].name);
2589 arm_set_fixed_conv_libfunc (satfract_optab,
2590 fixed_conv_modes[i].mode,
2591 fixed_conv_modes[j].mode, "satfract",
2592 fixed_conv_modes[i].name,
2593 fixed_conv_modes[j].name);
2594 arm_set_fixed_conv_libfunc (fractuns_optab,
2595 fixed_conv_modes[i].mode,
2596 fixed_conv_modes[j].mode, "fractuns",
2597 fixed_conv_modes[i].name,
2598 fixed_conv_modes[j].name);
2599 arm_set_fixed_conv_libfunc (satfractuns_optab,
2600 fixed_conv_modes[i].mode,
2601 fixed_conv_modes[j].mode, "satfractuns",
2602 fixed_conv_modes[i].name,
2603 fixed_conv_modes[j].name);
2607 if (TARGET_AAPCS_BASED)
2608 synchronize_libfunc = init_one_libfunc ("__sync_synchronize");
2611 /* On AAPCS systems, this is the "struct __va_list". */
2612 static GTY(()) tree va_list_type;
2614 /* Return the type to use as __builtin_va_list. */
2615 static tree
2616 arm_build_builtin_va_list (void)
2618 tree va_list_name;
2619 tree ap_field;
2621 if (!TARGET_AAPCS_BASED)
2622 return std_build_builtin_va_list ();
2624 /* AAPCS \S 7.1.4 requires that va_list be a typedef for a type
2625 defined as:
2627 struct __va_list
2629 void *__ap;
2632 The C Library ABI further reinforces this definition in \S
2633 4.1.
2635 We must follow this definition exactly. The structure tag
2636 name is visible in C++ mangled names, and thus forms a part
2637 of the ABI. The field name may be used by people who
2638 #include <stdarg.h>. */
2639 /* Create the type. */
2640 va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
2641 /* Give it the required name. */
2642 va_list_name = build_decl (BUILTINS_LOCATION,
2643 TYPE_DECL,
2644 get_identifier ("__va_list"),
2645 va_list_type);
2646 DECL_ARTIFICIAL (va_list_name) = 1;
2647 TYPE_NAME (va_list_type) = va_list_name;
2648 TYPE_STUB_DECL (va_list_type) = va_list_name;
2649 /* Create the __ap field. */
2650 ap_field = build_decl (BUILTINS_LOCATION,
2651 FIELD_DECL,
2652 get_identifier ("__ap"),
2653 ptr_type_node);
2654 DECL_ARTIFICIAL (ap_field) = 1;
2655 DECL_FIELD_CONTEXT (ap_field) = va_list_type;
2656 TYPE_FIELDS (va_list_type) = ap_field;
2657 /* Compute its layout. */
2658 layout_type (va_list_type);
2660 return va_list_type;
2663 /* Return an expression of type "void *" pointing to the next
2664 available argument in a variable-argument list. VALIST is the
2665 user-level va_list object, of type __builtin_va_list. */
2666 static tree
2667 arm_extract_valist_ptr (tree valist)
2669 if (TREE_TYPE (valist) == error_mark_node)
2670 return error_mark_node;
2672 /* On an AAPCS target, the pointer is stored within "struct
2673 va_list". */
2674 if (TARGET_AAPCS_BASED)
2676 tree ap_field = TYPE_FIELDS (TREE_TYPE (valist));
2677 valist = build3 (COMPONENT_REF, TREE_TYPE (ap_field),
2678 valist, ap_field, NULL_TREE);
2681 return valist;
2684 /* Implement TARGET_EXPAND_BUILTIN_VA_START. */
2685 static void
2686 arm_expand_builtin_va_start (tree valist, rtx nextarg)
2688 valist = arm_extract_valist_ptr (valist);
2689 std_expand_builtin_va_start (valist, nextarg);
2692 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
2693 static tree
2694 arm_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
2695 gimple_seq *post_p)
2697 valist = arm_extract_valist_ptr (valist);
2698 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
2701 /* Check any incompatible options that the user has specified. */
2702 static void
2703 arm_option_check_internal (struct gcc_options *opts)
2705 /* Make sure that the processor choice does not conflict with any of the
2706 other command line choices. */
2707 if (TREE_TARGET_ARM (opts) && !(insn_flags & FL_NOTM))
2708 error ("target CPU does not support ARM mode");
2710 /* TARGET_BACKTRACE calls leaf_function_p, which causes a crash if done
2711 from here where no function is being compiled currently. */
2712 if ((TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME) && TREE_TARGET_ARM (opts))
2713 warning (0, "enabling backtrace support is only meaningful when compiling for the Thumb");
2715 if (TREE_TARGET_ARM (opts) && TARGET_CALLEE_INTERWORKING)
2716 warning (0, "enabling callee interworking support is only meaningful when compiling for the Thumb");
2718 /* If this target is normally configured to use APCS frames, warn if they
2719 are turned off and debugging is turned on. */
2720 if (TREE_TARGET_ARM (opts)
2721 && write_symbols != NO_DEBUG
2722 && !TARGET_APCS_FRAME
2723 && (TARGET_DEFAULT & MASK_APCS_FRAME))
2724 warning (0, "-g with -mno-apcs-frame may not give sensible debugging");
2726 /* iWMMXt unsupported under Thumb mode. */
2727 if (TREE_TARGET_THUMB (opts) && TARGET_IWMMXT)
2728 error ("iWMMXt unsupported under Thumb mode");
2730 if (TARGET_HARD_TP && TREE_TARGET_THUMB1 (opts))
2731 error ("can not use -mtp=cp15 with 16-bit Thumb");
2733 if (TREE_TARGET_THUMB (opts) && TARGET_VXWORKS_RTP && flag_pic)
2735 error ("RTP PIC is incompatible with Thumb");
2736 flag_pic = 0;
2739 /* We only support -mslow-flash-data on armv7-m targets. */
2740 if (target_slow_flash_data
2741 && ((!(arm_arch7 && !arm_arch_notm) && !arm_arch7em)
2742 || (TREE_TARGET_THUMB1 (opts) || flag_pic || TARGET_NEON)))
2743 error ("-mslow-flash-data only supports non-pic code on armv7-m targets");
2746 /* Set params depending on attributes and optimization options. */
2747 static void
2748 arm_option_params_internal (struct gcc_options *opts)
2750 /* If we are not using the default (ARM mode) section anchor offset
2751 ranges, then set the correct ranges now. */
2752 if (TREE_TARGET_THUMB1 (opts))
2754 /* Thumb-1 LDR instructions cannot have negative offsets.
2755 Permissible positive offset ranges are 5-bit (for byte loads),
2756 6-bit (for halfword loads), or 7-bit (for word loads).
2757 Empirical results suggest a 7-bit anchor range gives the best
2758 overall code size. */
2759 targetm.min_anchor_offset = 0;
2760 targetm.max_anchor_offset = 127;
2762 else if (TREE_TARGET_THUMB2 (opts))
2764 /* The minimum is set such that the total size of the block
2765 for a particular anchor is 248 + 1 + 4095 bytes, which is
2766 divisible by eight, ensuring natural spacing of anchors. */
2767 targetm.min_anchor_offset = -248;
2768 targetm.max_anchor_offset = 4095;
2770 else
2772 targetm.min_anchor_offset = TARGET_MIN_ANCHOR_OFFSET;
2773 targetm.max_anchor_offset = TARGET_MAX_ANCHOR_OFFSET;
2776 if (optimize_size)
2778 /* If optimizing for size, bump the number of instructions that we
2779 are prepared to conditionally execute (even on a StrongARM). */
2780 max_insns_skipped = 6;
2782 /* For THUMB2, we limit the conditional sequence to one IT block. */
2783 if (TREE_TARGET_THUMB2 (opts))
2784 max_insns_skipped = opts->x_arm_restrict_it ? 1 : 4;
2786 else
2787 max_insns_skipped = current_tune->max_insns_skipped;
2790 /* Reset options between modes that the user has specified. */
2791 static void
2792 arm_option_override_internal (struct gcc_options *opts,
2793 struct gcc_options *opts_set)
2795 if (TREE_TARGET_THUMB (opts) && !(insn_flags & FL_THUMB))
2797 warning (0, "target CPU does not support THUMB instructions");
2798 opts->x_target_flags &= ~MASK_THUMB;
2801 if (TARGET_APCS_FRAME && TREE_TARGET_THUMB (opts))
2803 /* warning (0, "ignoring -mapcs-frame because -mthumb was used"); */
2804 opts->x_target_flags &= ~MASK_APCS_FRAME;
2807 /* Callee super interworking implies thumb interworking. Adding
2808 this to the flags here simplifies the logic elsewhere. */
2809 if (TREE_TARGET_THUMB (opts) && TARGET_CALLEE_INTERWORKING)
2810 opts->x_target_flags |= MASK_INTERWORK;
2812 if (! opts_set->x_arm_restrict_it)
2813 opts->x_arm_restrict_it = arm_arch8;
2815 if (!TREE_TARGET_THUMB2 (opts))
2816 opts->x_arm_restrict_it = 0;
2818 if (TREE_TARGET_THUMB1 (opts))
2820 /* Don't warn since it's on by default in -O2. */
2821 opts->x_flag_schedule_insns = 0;
2824 /* Disable shrink-wrap when optimizing function for size, since it tends to
2825 generate additional returns. */
2826 if (optimize_function_for_size_p (cfun) && TREE_TARGET_THUMB2 (opts))
2827 opts->x_flag_shrink_wrap = false;
2829 /* In Thumb1 mode, we emit the epilogue in RTL, but the last insn
2830 - epilogue_insns - does not accurately model the corresponding insns
2831 emitted in the asm file. In particular, see the comment in thumb_exit
2832 'Find out how many of the (return) argument registers we can corrupt'.
2833 As a consequence, the epilogue may clobber registers without fipa-ra
2834 finding out about it. Therefore, disable fipa-ra in Thumb1 mode.
2835 TODO: Accurately model clobbers for epilogue_insns and reenable
2836 fipa-ra. */
2837 if (TREE_TARGET_THUMB1 (opts))
2838 opts->x_flag_ipa_ra = 0;
2840 /* Thumb2 inline assembly code should always use unified syntax.
2841 This will apply to ARM and Thumb1 eventually. */
2842 opts->x_inline_asm_unified = TREE_TARGET_THUMB2 (opts);
2845 /* Fix up any incompatible options that the user has specified. */
2846 static void
2847 arm_option_override (void)
2849 arm_selected_arch = NULL;
2850 arm_selected_cpu = NULL;
2851 arm_selected_tune = NULL;
2853 if (global_options_set.x_arm_arch_option)
2854 arm_selected_arch = &all_architectures[arm_arch_option];
2856 if (global_options_set.x_arm_cpu_option)
2858 arm_selected_cpu = &all_cores[(int) arm_cpu_option];
2859 arm_selected_tune = &all_cores[(int) arm_cpu_option];
2862 if (global_options_set.x_arm_tune_option)
2863 arm_selected_tune = &all_cores[(int) arm_tune_option];
2865 #ifdef SUBTARGET_OVERRIDE_OPTIONS
2866 SUBTARGET_OVERRIDE_OPTIONS;
2867 #endif
2869 if (arm_selected_arch)
2871 if (arm_selected_cpu)
2873 /* Check for conflict between mcpu and march. */
2874 if ((arm_selected_cpu->flags ^ arm_selected_arch->flags) & ~FL_TUNE)
2876 warning (0, "switch -mcpu=%s conflicts with -march=%s switch",
2877 arm_selected_cpu->name, arm_selected_arch->name);
2878 /* -march wins for code generation.
2879 -mcpu wins for default tuning. */
2880 if (!arm_selected_tune)
2881 arm_selected_tune = arm_selected_cpu;
2883 arm_selected_cpu = arm_selected_arch;
2885 else
2886 /* -mcpu wins. */
2887 arm_selected_arch = NULL;
2889 else
2890 /* Pick a CPU based on the architecture. */
2891 arm_selected_cpu = arm_selected_arch;
2894 /* If the user did not specify a processor, choose one for them. */
2895 if (!arm_selected_cpu)
2897 const struct processors * sel;
2898 unsigned int sought;
2900 arm_selected_cpu = &all_cores[TARGET_CPU_DEFAULT];
2901 if (!arm_selected_cpu->name)
2903 #ifdef SUBTARGET_CPU_DEFAULT
2904 /* Use the subtarget default CPU if none was specified by
2905 configure. */
2906 arm_selected_cpu = &all_cores[SUBTARGET_CPU_DEFAULT];
2907 #endif
2908 /* Default to ARM6. */
2909 if (!arm_selected_cpu->name)
2910 arm_selected_cpu = &all_cores[arm6];
2913 sel = arm_selected_cpu;
2914 insn_flags = sel->flags;
2916 /* Now check to see if the user has specified some command line
2917 switch that require certain abilities from the cpu. */
2918 sought = 0;
2920 if (TARGET_INTERWORK || TARGET_THUMB)
2922 sought |= (FL_THUMB | FL_MODE32);
2924 /* There are no ARM processors that support both APCS-26 and
2925 interworking. Therefore we force FL_MODE26 to be removed
2926 from insn_flags here (if it was set), so that the search
2927 below will always be able to find a compatible processor. */
2928 insn_flags &= ~FL_MODE26;
2931 if (sought != 0 && ((sought & insn_flags) != sought))
2933 /* Try to locate a CPU type that supports all of the abilities
2934 of the default CPU, plus the extra abilities requested by
2935 the user. */
2936 for (sel = all_cores; sel->name != NULL; sel++)
2937 if ((sel->flags & sought) == (sought | insn_flags))
2938 break;
2940 if (sel->name == NULL)
2942 unsigned current_bit_count = 0;
2943 const struct processors * best_fit = NULL;
2945 /* Ideally we would like to issue an error message here
2946 saying that it was not possible to find a CPU compatible
2947 with the default CPU, but which also supports the command
2948 line options specified by the programmer, and so they
2949 ought to use the -mcpu=<name> command line option to
2950 override the default CPU type.
2952 If we cannot find a cpu that has both the
2953 characteristics of the default cpu and the given
2954 command line options we scan the array again looking
2955 for a best match. */
2956 for (sel = all_cores; sel->name != NULL; sel++)
2957 if ((sel->flags & sought) == sought)
2959 unsigned count;
2961 count = bit_count (sel->flags & insn_flags);
2963 if (count >= current_bit_count)
2965 best_fit = sel;
2966 current_bit_count = count;
2970 gcc_assert (best_fit);
2971 sel = best_fit;
2974 arm_selected_cpu = sel;
2978 gcc_assert (arm_selected_cpu);
2979 /* The selected cpu may be an architecture, so lookup tuning by core ID. */
2980 if (!arm_selected_tune)
2981 arm_selected_tune = &all_cores[arm_selected_cpu->core];
2983 sprintf (arm_arch_name, "__ARM_ARCH_%s__", arm_selected_cpu->arch);
2984 insn_flags = arm_selected_cpu->flags;
2985 arm_base_arch = arm_selected_cpu->base_arch;
2987 arm_tune = arm_selected_tune->core;
2988 tune_flags = arm_selected_tune->flags;
2989 current_tune = arm_selected_tune->tune;
2991 /* TBD: Dwarf info for apcs frame is not handled yet. */
2992 if (TARGET_APCS_FRAME)
2993 flag_shrink_wrap = false;
2995 /* BPABI targets use linker tricks to allow interworking on cores
2996 without thumb support. */
2997 if (TARGET_INTERWORK && !((insn_flags & FL_THUMB) || TARGET_BPABI))
2999 warning (0, "target CPU does not support interworking" );
3000 target_flags &= ~MASK_INTERWORK;
3003 if (TARGET_APCS_STACK && !TARGET_APCS_FRAME)
3005 warning (0, "-mapcs-stack-check incompatible with -mno-apcs-frame");
3006 target_flags |= MASK_APCS_FRAME;
3009 if (TARGET_POKE_FUNCTION_NAME)
3010 target_flags |= MASK_APCS_FRAME;
3012 if (TARGET_APCS_REENT && flag_pic)
3013 error ("-fpic and -mapcs-reent are incompatible");
3015 if (TARGET_APCS_REENT)
3016 warning (0, "APCS reentrant code not supported. Ignored");
3018 if (TARGET_APCS_FLOAT)
3019 warning (0, "passing floating point arguments in fp regs not yet supported");
3021 /* Initialize boolean versions of the flags, for use in the arm.md file. */
3022 arm_arch3m = (insn_flags & FL_ARCH3M) != 0;
3023 arm_arch4 = (insn_flags & FL_ARCH4) != 0;
3024 arm_arch4t = arm_arch4 & ((insn_flags & FL_THUMB) != 0);
3025 arm_arch5 = (insn_flags & FL_ARCH5) != 0;
3026 arm_arch5e = (insn_flags & FL_ARCH5E) != 0;
3027 arm_arch6 = (insn_flags & FL_ARCH6) != 0;
3028 arm_arch6k = (insn_flags & FL_ARCH6K) != 0;
3029 arm_arch_notm = (insn_flags & FL_NOTM) != 0;
3030 arm_arch6m = arm_arch6 && !arm_arch_notm;
3031 arm_arch7 = (insn_flags & FL_ARCH7) != 0;
3032 arm_arch7em = (insn_flags & FL_ARCH7EM) != 0;
3033 arm_arch8 = (insn_flags & FL_ARCH8) != 0;
3034 arm_arch_thumb2 = (insn_flags & FL_THUMB2) != 0;
3035 arm_arch_xscale = (insn_flags & FL_XSCALE) != 0;
3037 arm_ld_sched = (tune_flags & FL_LDSCHED) != 0;
3038 arm_tune_strongarm = (tune_flags & FL_STRONG) != 0;
3039 arm_tune_wbuf = (tune_flags & FL_WBUF) != 0;
3040 arm_tune_xscale = (tune_flags & FL_XSCALE) != 0;
3041 arm_arch_iwmmxt = (insn_flags & FL_IWMMXT) != 0;
3042 arm_arch_iwmmxt2 = (insn_flags & FL_IWMMXT2) != 0;
3043 arm_arch_thumb_hwdiv = (insn_flags & FL_THUMB_DIV) != 0;
3044 arm_arch_arm_hwdiv = (insn_flags & FL_ARM_DIV) != 0;
3045 arm_arch_no_volatile_ce = (insn_flags & FL_NO_VOLATILE_CE) != 0;
3046 arm_tune_cortex_a9 = (arm_tune == cortexa9) != 0;
3047 arm_arch_crc = (insn_flags & FL_CRC32) != 0;
3048 arm_m_profile_small_mul = (insn_flags & FL_SMALLMUL) != 0;
3050 /* V5 code we generate is completely interworking capable, so we turn off
3051 TARGET_INTERWORK here to avoid many tests later on. */
3053 /* XXX However, we must pass the right pre-processor defines to CPP
3054 or GLD can get confused. This is a hack. */
3055 if (TARGET_INTERWORK)
3056 arm_cpp_interwork = 1;
3058 if (arm_arch5)
3059 target_flags &= ~MASK_INTERWORK;
3061 if (TARGET_IWMMXT && !ARM_DOUBLEWORD_ALIGN)
3062 error ("iwmmxt requires an AAPCS compatible ABI for proper operation");
3064 if (TARGET_IWMMXT_ABI && !TARGET_IWMMXT)
3065 error ("iwmmxt abi requires an iwmmxt capable cpu");
3067 if (!global_options_set.x_arm_fpu_index)
3069 const char *target_fpu_name;
3070 bool ok;
3072 #ifdef FPUTYPE_DEFAULT
3073 target_fpu_name = FPUTYPE_DEFAULT;
3074 #else
3075 target_fpu_name = "vfp";
3076 #endif
3078 ok = opt_enum_arg_to_value (OPT_mfpu_, target_fpu_name, &arm_fpu_index,
3079 CL_TARGET);
3080 gcc_assert (ok);
3083 arm_fpu_desc = &all_fpus[arm_fpu_index];
3085 switch (arm_fpu_desc->model)
3087 case ARM_FP_MODEL_VFP:
3088 arm_fpu_attr = FPU_VFP;
3089 break;
3091 default:
3092 gcc_unreachable();
3095 if (TARGET_AAPCS_BASED)
3097 if (TARGET_CALLER_INTERWORKING)
3098 error ("AAPCS does not support -mcaller-super-interworking");
3099 else
3100 if (TARGET_CALLEE_INTERWORKING)
3101 error ("AAPCS does not support -mcallee-super-interworking");
3104 /* iWMMXt and NEON are incompatible. */
3105 if (TARGET_IWMMXT && TARGET_NEON)
3106 error ("iWMMXt and NEON are incompatible");
3108 /* __fp16 support currently assumes the core has ldrh. */
3109 if (!arm_arch4 && arm_fp16_format != ARM_FP16_FORMAT_NONE)
3110 sorry ("__fp16 and no ldrh");
3112 /* If soft-float is specified then don't use FPU. */
3113 if (TARGET_SOFT_FLOAT)
3114 arm_fpu_attr = FPU_NONE;
3116 if (TARGET_AAPCS_BASED)
3118 if (arm_abi == ARM_ABI_IWMMXT)
3119 arm_pcs_default = ARM_PCS_AAPCS_IWMMXT;
3120 else if (arm_float_abi == ARM_FLOAT_ABI_HARD
3121 && TARGET_HARD_FLOAT
3122 && TARGET_VFP)
3123 arm_pcs_default = ARM_PCS_AAPCS_VFP;
3124 else
3125 arm_pcs_default = ARM_PCS_AAPCS;
3127 else
3129 if (arm_float_abi == ARM_FLOAT_ABI_HARD && TARGET_VFP)
3130 sorry ("-mfloat-abi=hard and VFP");
3132 if (arm_abi == ARM_ABI_APCS)
3133 arm_pcs_default = ARM_PCS_APCS;
3134 else
3135 arm_pcs_default = ARM_PCS_ATPCS;
3138 /* For arm2/3 there is no need to do any scheduling if we are doing
3139 software floating-point. */
3140 if (TARGET_SOFT_FLOAT && (tune_flags & FL_MODE32) == 0)
3141 flag_schedule_insns = flag_schedule_insns_after_reload = 0;
3143 /* Use the cp15 method if it is available. */
3144 if (target_thread_pointer == TP_AUTO)
3146 if (arm_arch6k && !TARGET_THUMB1)
3147 target_thread_pointer = TP_CP15;
3148 else
3149 target_thread_pointer = TP_SOFT;
3152 /* Override the default structure alignment for AAPCS ABI. */
3153 if (!global_options_set.x_arm_structure_size_boundary)
3155 if (TARGET_AAPCS_BASED)
3156 arm_structure_size_boundary = 8;
3158 else
3160 if (arm_structure_size_boundary != 8
3161 && arm_structure_size_boundary != 32
3162 && !(ARM_DOUBLEWORD_ALIGN && arm_structure_size_boundary == 64))
3164 if (ARM_DOUBLEWORD_ALIGN)
3165 warning (0,
3166 "structure size boundary can only be set to 8, 32 or 64");
3167 else
3168 warning (0, "structure size boundary can only be set to 8 or 32");
3169 arm_structure_size_boundary
3170 = (TARGET_AAPCS_BASED ? 8 : DEFAULT_STRUCTURE_SIZE_BOUNDARY);
3174 /* If stack checking is disabled, we can use r10 as the PIC register,
3175 which keeps r9 available. The EABI specifies r9 as the PIC register. */
3176 if (flag_pic && TARGET_SINGLE_PIC_BASE)
3178 if (TARGET_VXWORKS_RTP)
3179 warning (0, "RTP PIC is incompatible with -msingle-pic-base");
3180 arm_pic_register = (TARGET_APCS_STACK || TARGET_AAPCS_BASED) ? 9 : 10;
3183 if (flag_pic && TARGET_VXWORKS_RTP)
3184 arm_pic_register = 9;
3186 if (arm_pic_register_string != NULL)
3188 int pic_register = decode_reg_name (arm_pic_register_string);
3190 if (!flag_pic)
3191 warning (0, "-mpic-register= is useless without -fpic");
3193 /* Prevent the user from choosing an obviously stupid PIC register. */
3194 else if (pic_register < 0 || call_used_regs[pic_register]
3195 || pic_register == HARD_FRAME_POINTER_REGNUM
3196 || pic_register == STACK_POINTER_REGNUM
3197 || pic_register >= PC_REGNUM
3198 || (TARGET_VXWORKS_RTP
3199 && (unsigned int) pic_register != arm_pic_register))
3200 error ("unable to use '%s' for PIC register", arm_pic_register_string);
3201 else
3202 arm_pic_register = pic_register;
3205 if (TARGET_VXWORKS_RTP
3206 && !global_options_set.x_arm_pic_data_is_text_relative)
3207 arm_pic_data_is_text_relative = 0;
3209 /* Enable -mfix-cortex-m3-ldrd by default for Cortex-M3 cores. */
3210 if (fix_cm3_ldrd == 2)
3212 if (arm_selected_cpu->core == cortexm3)
3213 fix_cm3_ldrd = 1;
3214 else
3215 fix_cm3_ldrd = 0;
3218 /* Enable -munaligned-access by default for
3219 - all ARMv6 architecture-based processors
3220 - ARMv7-A, ARMv7-R, and ARMv7-M architecture-based processors.
3221 - ARMv8 architecture-base processors.
3223 Disable -munaligned-access by default for
3224 - all pre-ARMv6 architecture-based processors
3225 - ARMv6-M architecture-based processors. */
3227 if (unaligned_access == 2)
3229 if (arm_arch6 && (arm_arch_notm || arm_arch7))
3230 unaligned_access = 1;
3231 else
3232 unaligned_access = 0;
3234 else if (unaligned_access == 1
3235 && !(arm_arch6 && (arm_arch_notm || arm_arch7)))
3237 warning (0, "target CPU does not support unaligned accesses");
3238 unaligned_access = 0;
3241 /* Hot/Cold partitioning is not currently supported, since we can't
3242 handle literal pool placement in that case. */
3243 if (flag_reorder_blocks_and_partition)
3245 inform (input_location,
3246 "-freorder-blocks-and-partition not supported on this architecture");
3247 flag_reorder_blocks_and_partition = 0;
3248 flag_reorder_blocks = 1;
3251 if (flag_pic)
3252 /* Hoisting PIC address calculations more aggressively provides a small,
3253 but measurable, size reduction for PIC code. Therefore, we decrease
3254 the bar for unrestricted expression hoisting to the cost of PIC address
3255 calculation, which is 2 instructions. */
3256 maybe_set_param_value (PARAM_GCSE_UNRESTRICTED_COST, 2,
3257 global_options.x_param_values,
3258 global_options_set.x_param_values);
3260 /* ARM EABI defaults to strict volatile bitfields. */
3261 if (TARGET_AAPCS_BASED && flag_strict_volatile_bitfields < 0
3262 && abi_version_at_least(2))
3263 flag_strict_volatile_bitfields = 1;
3265 /* Enable sw prefetching at -O3 for CPUS that have prefetch, and we
3266 have deemed it beneficial (signified by setting
3267 prefetch.num_slots to 1 or more). */
3268 if (flag_prefetch_loop_arrays < 0
3269 && HAVE_prefetch
3270 && optimize >= 3
3271 && current_tune->prefetch.num_slots > 0)
3272 flag_prefetch_loop_arrays = 1;
3274 /* Set up parameters to be used in prefetching algorithm. Do not
3275 override the defaults unless we are tuning for a core we have
3276 researched values for. */
3277 if (current_tune->prefetch.num_slots > 0)
3278 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
3279 current_tune->prefetch.num_slots,
3280 global_options.x_param_values,
3281 global_options_set.x_param_values);
3282 if (current_tune->prefetch.l1_cache_line_size >= 0)
3283 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
3284 current_tune->prefetch.l1_cache_line_size,
3285 global_options.x_param_values,
3286 global_options_set.x_param_values);
3287 if (current_tune->prefetch.l1_cache_size >= 0)
3288 maybe_set_param_value (PARAM_L1_CACHE_SIZE,
3289 current_tune->prefetch.l1_cache_size,
3290 global_options.x_param_values,
3291 global_options_set.x_param_values);
3293 /* Use Neon to perform 64-bits operations rather than core
3294 registers. */
3295 prefer_neon_for_64bits = current_tune->prefer_neon_for_64bits;
3296 if (use_neon_for_64bits == 1)
3297 prefer_neon_for_64bits = true;
3299 /* Use the alternative scheduling-pressure algorithm by default. */
3300 maybe_set_param_value (PARAM_SCHED_PRESSURE_ALGORITHM, SCHED_PRESSURE_MODEL,
3301 global_options.x_param_values,
3302 global_options_set.x_param_values);
3304 /* Look through ready list and all of queue for instructions
3305 relevant for L2 auto-prefetcher. */
3306 int param_sched_autopref_queue_depth;
3308 switch (current_tune->sched_autopref)
3310 case tune_params::SCHED_AUTOPREF_OFF:
3311 param_sched_autopref_queue_depth = -1;
3312 break;
3314 case tune_params::SCHED_AUTOPREF_RANK:
3315 param_sched_autopref_queue_depth = 0;
3316 break;
3318 case tune_params::SCHED_AUTOPREF_FULL:
3319 param_sched_autopref_queue_depth = max_insn_queue_index + 1;
3320 break;
3322 default:
3323 gcc_unreachable ();
3326 maybe_set_param_value (PARAM_SCHED_AUTOPREF_QUEUE_DEPTH,
3327 param_sched_autopref_queue_depth,
3328 global_options.x_param_values,
3329 global_options_set.x_param_values);
3331 /* Currently, for slow flash data, we just disable literal pools. */
3332 if (target_slow_flash_data)
3333 arm_disable_literal_pool = true;
3335 /* Disable scheduling fusion by default if it's not armv7 processor
3336 or doesn't prefer ldrd/strd. */
3337 if (flag_schedule_fusion == 2
3338 && (!arm_arch7 || !current_tune->prefer_ldrd_strd))
3339 flag_schedule_fusion = 0;
3341 arm_option_override_internal (&global_options, &global_options_set);
3342 arm_option_check_internal (&global_options);
3343 arm_option_params_internal (&global_options);
3345 /* Register global variables with the garbage collector. */
3346 arm_add_gc_roots ();
3349 static void
3350 arm_add_gc_roots (void)
3352 gcc_obstack_init(&minipool_obstack);
3353 minipool_startobj = (char *) obstack_alloc (&minipool_obstack, 0);
3356 /* A table of known ARM exception types.
3357 For use with the interrupt function attribute. */
3359 typedef struct
3361 const char *const arg;
3362 const unsigned long return_value;
3364 isr_attribute_arg;
3366 static const isr_attribute_arg isr_attribute_args [] =
3368 { "IRQ", ARM_FT_ISR },
3369 { "irq", ARM_FT_ISR },
3370 { "FIQ", ARM_FT_FIQ },
3371 { "fiq", ARM_FT_FIQ },
3372 { "ABORT", ARM_FT_ISR },
3373 { "abort", ARM_FT_ISR },
3374 { "ABORT", ARM_FT_ISR },
3375 { "abort", ARM_FT_ISR },
3376 { "UNDEF", ARM_FT_EXCEPTION },
3377 { "undef", ARM_FT_EXCEPTION },
3378 { "SWI", ARM_FT_EXCEPTION },
3379 { "swi", ARM_FT_EXCEPTION },
3380 { NULL, ARM_FT_NORMAL }
3383 /* Returns the (interrupt) function type of the current
3384 function, or ARM_FT_UNKNOWN if the type cannot be determined. */
3386 static unsigned long
3387 arm_isr_value (tree argument)
3389 const isr_attribute_arg * ptr;
3390 const char * arg;
3392 if (!arm_arch_notm)
3393 return ARM_FT_NORMAL | ARM_FT_STACKALIGN;
3395 /* No argument - default to IRQ. */
3396 if (argument == NULL_TREE)
3397 return ARM_FT_ISR;
3399 /* Get the value of the argument. */
3400 if (TREE_VALUE (argument) == NULL_TREE
3401 || TREE_CODE (TREE_VALUE (argument)) != STRING_CST)
3402 return ARM_FT_UNKNOWN;
3404 arg = TREE_STRING_POINTER (TREE_VALUE (argument));
3406 /* Check it against the list of known arguments. */
3407 for (ptr = isr_attribute_args; ptr->arg != NULL; ptr++)
3408 if (streq (arg, ptr->arg))
3409 return ptr->return_value;
3411 /* An unrecognized interrupt type. */
3412 return ARM_FT_UNKNOWN;
3415 /* Computes the type of the current function. */
3417 static unsigned long
3418 arm_compute_func_type (void)
3420 unsigned long type = ARM_FT_UNKNOWN;
3421 tree a;
3422 tree attr;
3424 gcc_assert (TREE_CODE (current_function_decl) == FUNCTION_DECL);
3426 /* Decide if the current function is volatile. Such functions
3427 never return, and many memory cycles can be saved by not storing
3428 register values that will never be needed again. This optimization
3429 was added to speed up context switching in a kernel application. */
3430 if (optimize > 0
3431 && (TREE_NOTHROW (current_function_decl)
3432 || !(flag_unwind_tables
3433 || (flag_exceptions
3434 && arm_except_unwind_info (&global_options) != UI_SJLJ)))
3435 && TREE_THIS_VOLATILE (current_function_decl))
3436 type |= ARM_FT_VOLATILE;
3438 if (cfun->static_chain_decl != NULL)
3439 type |= ARM_FT_NESTED;
3441 attr = DECL_ATTRIBUTES (current_function_decl);
3443 a = lookup_attribute ("naked", attr);
3444 if (a != NULL_TREE)
3445 type |= ARM_FT_NAKED;
3447 a = lookup_attribute ("isr", attr);
3448 if (a == NULL_TREE)
3449 a = lookup_attribute ("interrupt", attr);
3451 if (a == NULL_TREE)
3452 type |= TARGET_INTERWORK ? ARM_FT_INTERWORKED : ARM_FT_NORMAL;
3453 else
3454 type |= arm_isr_value (TREE_VALUE (a));
3456 return type;
3459 /* Returns the type of the current function. */
3461 unsigned long
3462 arm_current_func_type (void)
3464 if (ARM_FUNC_TYPE (cfun->machine->func_type) == ARM_FT_UNKNOWN)
3465 cfun->machine->func_type = arm_compute_func_type ();
3467 return cfun->machine->func_type;
3470 bool
3471 arm_allocate_stack_slots_for_args (void)
3473 /* Naked functions should not allocate stack slots for arguments. */
3474 return !IS_NAKED (arm_current_func_type ());
3477 static bool
3478 arm_warn_func_return (tree decl)
3480 /* Naked functions are implemented entirely in assembly, including the
3481 return sequence, so suppress warnings about this. */
3482 return lookup_attribute ("naked", DECL_ATTRIBUTES (decl)) == NULL_TREE;
3486 /* Output assembler code for a block containing the constant parts
3487 of a trampoline, leaving space for the variable parts.
3489 On the ARM, (if r8 is the static chain regnum, and remembering that
3490 referencing pc adds an offset of 8) the trampoline looks like:
3491 ldr r8, [pc, #0]
3492 ldr pc, [pc]
3493 .word static chain value
3494 .word function's address
3495 XXX FIXME: When the trampoline returns, r8 will be clobbered. */
3497 static void
3498 arm_asm_trampoline_template (FILE *f)
3500 if (TARGET_ARM)
3502 asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", STATIC_CHAIN_REGNUM, PC_REGNUM);
3503 asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", PC_REGNUM, PC_REGNUM);
3505 else if (TARGET_THUMB2)
3507 /* The Thumb-2 trampoline is similar to the arm implementation.
3508 Unlike 16-bit Thumb, we enter the stub in thumb mode. */
3509 asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n",
3510 STATIC_CHAIN_REGNUM, PC_REGNUM);
3511 asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n", PC_REGNUM, PC_REGNUM);
3513 else
3515 ASM_OUTPUT_ALIGN (f, 2);
3516 fprintf (f, "\t.code\t16\n");
3517 fprintf (f, ".Ltrampoline_start:\n");
3518 asm_fprintf (f, "\tpush\t{r0, r1}\n");
3519 asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
3520 asm_fprintf (f, "\tmov\t%r, r0\n", STATIC_CHAIN_REGNUM);
3521 asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
3522 asm_fprintf (f, "\tstr\tr0, [%r, #4]\n", SP_REGNUM);
3523 asm_fprintf (f, "\tpop\t{r0, %r}\n", PC_REGNUM);
3525 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
3526 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
3529 /* Emit RTL insns to initialize the variable parts of a trampoline. */
3531 static void
3532 arm_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
3534 rtx fnaddr, mem, a_tramp;
3536 emit_block_move (m_tramp, assemble_trampoline_template (),
3537 GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);
3539 mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 8 : 12);
3540 emit_move_insn (mem, chain_value);
3542 mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 12 : 16);
3543 fnaddr = XEXP (DECL_RTL (fndecl), 0);
3544 emit_move_insn (mem, fnaddr);
3546 a_tramp = XEXP (m_tramp, 0);
3547 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"),
3548 LCT_NORMAL, VOIDmode, 2, a_tramp, Pmode,
3549 plus_constant (Pmode, a_tramp, TRAMPOLINE_SIZE), Pmode);
3552 /* Thumb trampolines should be entered in thumb mode, so set
3553 the bottom bit of the address. */
3555 static rtx
3556 arm_trampoline_adjust_address (rtx addr)
3558 if (TARGET_THUMB)
3559 addr = expand_simple_binop (Pmode, IOR, addr, const1_rtx,
3560 NULL, 0, OPTAB_LIB_WIDEN);
3561 return addr;
3564 /* Return 1 if it is possible to return using a single instruction.
3565 If SIBLING is non-null, this is a test for a return before a sibling
3566 call. SIBLING is the call insn, so we can examine its register usage. */
3569 use_return_insn (int iscond, rtx sibling)
3571 int regno;
3572 unsigned int func_type;
3573 unsigned long saved_int_regs;
3574 unsigned HOST_WIDE_INT stack_adjust;
3575 arm_stack_offsets *offsets;
3577 /* Never use a return instruction before reload has run. */
3578 if (!reload_completed)
3579 return 0;
3581 func_type = arm_current_func_type ();
3583 /* Naked, volatile and stack alignment functions need special
3584 consideration. */
3585 if (func_type & (ARM_FT_VOLATILE | ARM_FT_NAKED | ARM_FT_STACKALIGN))
3586 return 0;
3588 /* So do interrupt functions that use the frame pointer and Thumb
3589 interrupt functions. */
3590 if (IS_INTERRUPT (func_type) && (frame_pointer_needed || TARGET_THUMB))
3591 return 0;
3593 if (TARGET_LDRD && current_tune->prefer_ldrd_strd
3594 && !optimize_function_for_size_p (cfun))
3595 return 0;
3597 offsets = arm_get_frame_offsets ();
3598 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
3600 /* As do variadic functions. */
3601 if (crtl->args.pretend_args_size
3602 || cfun->machine->uses_anonymous_args
3603 /* Or if the function calls __builtin_eh_return () */
3604 || crtl->calls_eh_return
3605 /* Or if the function calls alloca */
3606 || cfun->calls_alloca
3607 /* Or if there is a stack adjustment. However, if the stack pointer
3608 is saved on the stack, we can use a pre-incrementing stack load. */
3609 || !(stack_adjust == 0 || (TARGET_APCS_FRAME && frame_pointer_needed
3610 && stack_adjust == 4)))
3611 return 0;
3613 saved_int_regs = offsets->saved_regs_mask;
3615 /* Unfortunately, the insn
3617 ldmib sp, {..., sp, ...}
3619 triggers a bug on most SA-110 based devices, such that the stack
3620 pointer won't be correctly restored if the instruction takes a
3621 page fault. We work around this problem by popping r3 along with
3622 the other registers, since that is never slower than executing
3623 another instruction.
3625 We test for !arm_arch5 here, because code for any architecture
3626 less than this could potentially be run on one of the buggy
3627 chips. */
3628 if (stack_adjust == 4 && !arm_arch5 && TARGET_ARM)
3630 /* Validate that r3 is a call-clobbered register (always true in
3631 the default abi) ... */
3632 if (!call_used_regs[3])
3633 return 0;
3635 /* ... that it isn't being used for a return value ... */
3636 if (arm_size_return_regs () >= (4 * UNITS_PER_WORD))
3637 return 0;
3639 /* ... or for a tail-call argument ... */
3640 if (sibling)
3642 gcc_assert (CALL_P (sibling));
3644 if (find_regno_fusage (sibling, USE, 3))
3645 return 0;
3648 /* ... and that there are no call-saved registers in r0-r2
3649 (always true in the default ABI). */
3650 if (saved_int_regs & 0x7)
3651 return 0;
3654 /* Can't be done if interworking with Thumb, and any registers have been
3655 stacked. */
3656 if (TARGET_INTERWORK && saved_int_regs != 0 && !IS_INTERRUPT(func_type))
3657 return 0;
3659 /* On StrongARM, conditional returns are expensive if they aren't
3660 taken and multiple registers have been stacked. */
3661 if (iscond && arm_tune_strongarm)
3663 /* Conditional return when just the LR is stored is a simple
3664 conditional-load instruction, that's not expensive. */
3665 if (saved_int_regs != 0 && saved_int_regs != (1 << LR_REGNUM))
3666 return 0;
3668 if (flag_pic
3669 && arm_pic_register != INVALID_REGNUM
3670 && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
3671 return 0;
3674 /* If there are saved registers but the LR isn't saved, then we need
3675 two instructions for the return. */
3676 if (saved_int_regs && !(saved_int_regs & (1 << LR_REGNUM)))
3677 return 0;
3679 /* Can't be done if any of the VFP regs are pushed,
3680 since this also requires an insn. */
3681 if (TARGET_HARD_FLOAT && TARGET_VFP)
3682 for (regno = FIRST_VFP_REGNUM; regno <= LAST_VFP_REGNUM; regno++)
3683 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
3684 return 0;
3686 if (TARGET_REALLY_IWMMXT)
3687 for (regno = FIRST_IWMMXT_REGNUM; regno <= LAST_IWMMXT_REGNUM; regno++)
3688 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
3689 return 0;
3691 return 1;
3694 /* Return TRUE if we should try to use a simple_return insn, i.e. perform
3695 shrink-wrapping if possible. This is the case if we need to emit a
3696 prologue, which we can test by looking at the offsets. */
3697 bool
3698 use_simple_return_p (void)
3700 arm_stack_offsets *offsets;
3702 offsets = arm_get_frame_offsets ();
3703 return offsets->outgoing_args != 0;
3706 /* Return TRUE if int I is a valid immediate ARM constant. */
3709 const_ok_for_arm (HOST_WIDE_INT i)
3711 int lowbit;
3713 /* For machines with >32 bit HOST_WIDE_INT, the bits above bit 31 must
3714 be all zero, or all one. */
3715 if ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff) != 0
3716 && ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff)
3717 != ((~(unsigned HOST_WIDE_INT) 0)
3718 & ~(unsigned HOST_WIDE_INT) 0xffffffff)))
3719 return FALSE;
3721 i &= (unsigned HOST_WIDE_INT) 0xffffffff;
3723 /* Fast return for 0 and small values. We must do this for zero, since
3724 the code below can't handle that one case. */
3725 if ((i & ~(unsigned HOST_WIDE_INT) 0xff) == 0)
3726 return TRUE;
3728 /* Get the number of trailing zeros. */
3729 lowbit = ffs((int) i) - 1;
3731 /* Only even shifts are allowed in ARM mode so round down to the
3732 nearest even number. */
3733 if (TARGET_ARM)
3734 lowbit &= ~1;
3736 if ((i & ~(((unsigned HOST_WIDE_INT) 0xff) << lowbit)) == 0)
3737 return TRUE;
3739 if (TARGET_ARM)
3741 /* Allow rotated constants in ARM mode. */
3742 if (lowbit <= 4
3743 && ((i & ~0xc000003f) == 0
3744 || (i & ~0xf000000f) == 0
3745 || (i & ~0xfc000003) == 0))
3746 return TRUE;
3748 else
3750 HOST_WIDE_INT v;
3752 /* Allow repeated patterns 0x00XY00XY or 0xXYXYXYXY. */
3753 v = i & 0xff;
3754 v |= v << 16;
3755 if (i == v || i == (v | (v << 8)))
3756 return TRUE;
3758 /* Allow repeated pattern 0xXY00XY00. */
3759 v = i & 0xff00;
3760 v |= v << 16;
3761 if (i == v)
3762 return TRUE;
3765 return FALSE;
3768 /* Return true if I is a valid constant for the operation CODE. */
3770 const_ok_for_op (HOST_WIDE_INT i, enum rtx_code code)
3772 if (const_ok_for_arm (i))
3773 return 1;
3775 switch (code)
3777 case SET:
3778 /* See if we can use movw. */
3779 if (arm_arch_thumb2 && (i & 0xffff0000) == 0)
3780 return 1;
3781 else
3782 /* Otherwise, try mvn. */
3783 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
3785 case PLUS:
3786 /* See if we can use addw or subw. */
3787 if (TARGET_THUMB2
3788 && ((i & 0xfffff000) == 0
3789 || ((-i) & 0xfffff000) == 0))
3790 return 1;
3791 /* else fall through. */
3793 case COMPARE:
3794 case EQ:
3795 case NE:
3796 case GT:
3797 case LE:
3798 case LT:
3799 case GE:
3800 case GEU:
3801 case LTU:
3802 case GTU:
3803 case LEU:
3804 case UNORDERED:
3805 case ORDERED:
3806 case UNEQ:
3807 case UNGE:
3808 case UNLT:
3809 case UNGT:
3810 case UNLE:
3811 return const_ok_for_arm (ARM_SIGN_EXTEND (-i));
3813 case MINUS: /* Should only occur with (MINUS I reg) => rsb */
3814 case XOR:
3815 return 0;
3817 case IOR:
3818 if (TARGET_THUMB2)
3819 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
3820 return 0;
3822 case AND:
3823 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
3825 default:
3826 gcc_unreachable ();
3830 /* Return true if I is a valid di mode constant for the operation CODE. */
3832 const_ok_for_dimode_op (HOST_WIDE_INT i, enum rtx_code code)
3834 HOST_WIDE_INT hi_val = (i >> 32) & 0xFFFFFFFF;
3835 HOST_WIDE_INT lo_val = i & 0xFFFFFFFF;
3836 rtx hi = GEN_INT (hi_val);
3837 rtx lo = GEN_INT (lo_val);
3839 if (TARGET_THUMB1)
3840 return 0;
3842 switch (code)
3844 case AND:
3845 case IOR:
3846 case XOR:
3847 return (const_ok_for_op (hi_val, code) || hi_val == 0xFFFFFFFF)
3848 && (const_ok_for_op (lo_val, code) || lo_val == 0xFFFFFFFF);
3849 case PLUS:
3850 return arm_not_operand (hi, SImode) && arm_add_operand (lo, SImode);
3852 default:
3853 return 0;
3857 /* Emit a sequence of insns to handle a large constant.
3858 CODE is the code of the operation required, it can be any of SET, PLUS,
3859 IOR, AND, XOR, MINUS;
3860 MODE is the mode in which the operation is being performed;
3861 VAL is the integer to operate on;
3862 SOURCE is the other operand (a register, or a null-pointer for SET);
3863 SUBTARGETS means it is safe to create scratch registers if that will
3864 either produce a simpler sequence, or we will want to cse the values.
3865 Return value is the number of insns emitted. */
3867 /* ??? Tweak this for thumb2. */
3869 arm_split_constant (enum rtx_code code, machine_mode mode, rtx insn,
3870 HOST_WIDE_INT val, rtx target, rtx source, int subtargets)
3872 rtx cond;
3874 if (insn && GET_CODE (PATTERN (insn)) == COND_EXEC)
3875 cond = COND_EXEC_TEST (PATTERN (insn));
3876 else
3877 cond = NULL_RTX;
3879 if (subtargets || code == SET
3880 || (REG_P (target) && REG_P (source)
3881 && REGNO (target) != REGNO (source)))
3883 /* After arm_reorg has been called, we can't fix up expensive
3884 constants by pushing them into memory so we must synthesize
3885 them in-line, regardless of the cost. This is only likely to
3886 be more costly on chips that have load delay slots and we are
3887 compiling without running the scheduler (so no splitting
3888 occurred before the final instruction emission).
3890 Ref: gcc -O1 -mcpu=strongarm gcc.c-torture/compile/980506-2.c
3892 if (!cfun->machine->after_arm_reorg
3893 && !cond
3894 && (arm_gen_constant (code, mode, NULL_RTX, val, target, source,
3895 1, 0)
3896 > (arm_constant_limit (optimize_function_for_size_p (cfun))
3897 + (code != SET))))
3899 if (code == SET)
3901 /* Currently SET is the only monadic value for CODE, all
3902 the rest are diadic. */
3903 if (TARGET_USE_MOVT)
3904 arm_emit_movpair (target, GEN_INT (val));
3905 else
3906 emit_set_insn (target, GEN_INT (val));
3908 return 1;
3910 else
3912 rtx temp = subtargets ? gen_reg_rtx (mode) : target;
3914 if (TARGET_USE_MOVT)
3915 arm_emit_movpair (temp, GEN_INT (val));
3916 else
3917 emit_set_insn (temp, GEN_INT (val));
3919 /* For MINUS, the value is subtracted from, since we never
3920 have subtraction of a constant. */
3921 if (code == MINUS)
3922 emit_set_insn (target, gen_rtx_MINUS (mode, temp, source));
3923 else
3924 emit_set_insn (target,
3925 gen_rtx_fmt_ee (code, mode, source, temp));
3926 return 2;
3931 return arm_gen_constant (code, mode, cond, val, target, source, subtargets,
3935 /* Return a sequence of integers, in RETURN_SEQUENCE that fit into
3936 ARM/THUMB2 immediates, and add up to VAL.
3937 Thr function return value gives the number of insns required. */
3938 static int
3939 optimal_immediate_sequence (enum rtx_code code, unsigned HOST_WIDE_INT val,
3940 struct four_ints *return_sequence)
3942 int best_consecutive_zeros = 0;
3943 int i;
3944 int best_start = 0;
3945 int insns1, insns2;
3946 struct four_ints tmp_sequence;
3948 /* If we aren't targeting ARM, the best place to start is always at
3949 the bottom, otherwise look more closely. */
3950 if (TARGET_ARM)
3952 for (i = 0; i < 32; i += 2)
3954 int consecutive_zeros = 0;
3956 if (!(val & (3 << i)))
3958 while ((i < 32) && !(val & (3 << i)))
3960 consecutive_zeros += 2;
3961 i += 2;
3963 if (consecutive_zeros > best_consecutive_zeros)
3965 best_consecutive_zeros = consecutive_zeros;
3966 best_start = i - consecutive_zeros;
3968 i -= 2;
3973 /* So long as it won't require any more insns to do so, it's
3974 desirable to emit a small constant (in bits 0...9) in the last
3975 insn. This way there is more chance that it can be combined with
3976 a later addressing insn to form a pre-indexed load or store
3977 operation. Consider:
3979 *((volatile int *)0xe0000100) = 1;
3980 *((volatile int *)0xe0000110) = 2;
3982 We want this to wind up as:
3984 mov rA, #0xe0000000
3985 mov rB, #1
3986 str rB, [rA, #0x100]
3987 mov rB, #2
3988 str rB, [rA, #0x110]
3990 rather than having to synthesize both large constants from scratch.
3992 Therefore, we calculate how many insns would be required to emit
3993 the constant starting from `best_start', and also starting from
3994 zero (i.e. with bit 31 first to be output). If `best_start' doesn't
3995 yield a shorter sequence, we may as well use zero. */
3996 insns1 = optimal_immediate_sequence_1 (code, val, return_sequence, best_start);
3997 if (best_start != 0
3998 && ((((unsigned HOST_WIDE_INT) 1) << best_start) < val))
4000 insns2 = optimal_immediate_sequence_1 (code, val, &tmp_sequence, 0);
4001 if (insns2 <= insns1)
4003 *return_sequence = tmp_sequence;
4004 insns1 = insns2;
4008 return insns1;
4011 /* As for optimal_immediate_sequence, but starting at bit-position I. */
4012 static int
4013 optimal_immediate_sequence_1 (enum rtx_code code, unsigned HOST_WIDE_INT val,
4014 struct four_ints *return_sequence, int i)
4016 int remainder = val & 0xffffffff;
4017 int insns = 0;
4019 /* Try and find a way of doing the job in either two or three
4020 instructions.
4022 In ARM mode we can use 8-bit constants, rotated to any 2-bit aligned
4023 location. We start at position I. This may be the MSB, or
4024 optimial_immediate_sequence may have positioned it at the largest block
4025 of zeros that are aligned on a 2-bit boundary. We then fill up the temps,
4026 wrapping around to the top of the word when we drop off the bottom.
4027 In the worst case this code should produce no more than four insns.
4029 In Thumb2 mode, we can use 32/16-bit replicated constants, and 8-bit
4030 constants, shifted to any arbitrary location. We should always start
4031 at the MSB. */
4034 int end;
4035 unsigned int b1, b2, b3, b4;
4036 unsigned HOST_WIDE_INT result;
4037 int loc;
4039 gcc_assert (insns < 4);
4041 if (i <= 0)
4042 i += 32;
4044 /* First, find the next normal 12/8-bit shifted/rotated immediate. */
4045 if (remainder & ((TARGET_ARM ? (3 << (i - 2)) : (1 << (i - 1)))))
4047 loc = i;
4048 if (i <= 12 && TARGET_THUMB2 && code == PLUS)
4049 /* We can use addw/subw for the last 12 bits. */
4050 result = remainder;
4051 else
4053 /* Use an 8-bit shifted/rotated immediate. */
4054 end = i - 8;
4055 if (end < 0)
4056 end += 32;
4057 result = remainder & ((0x0ff << end)
4058 | ((i < end) ? (0xff >> (32 - end))
4059 : 0));
4060 i -= 8;
4063 else
4065 /* Arm allows rotates by a multiple of two. Thumb-2 allows
4066 arbitrary shifts. */
4067 i -= TARGET_ARM ? 2 : 1;
4068 continue;
4071 /* Next, see if we can do a better job with a thumb2 replicated
4072 constant.
4074 We do it this way around to catch the cases like 0x01F001E0 where
4075 two 8-bit immediates would work, but a replicated constant would
4076 make it worse.
4078 TODO: 16-bit constants that don't clear all the bits, but still win.
4079 TODO: Arithmetic splitting for set/add/sub, rather than bitwise. */
4080 if (TARGET_THUMB2)
4082 b1 = (remainder & 0xff000000) >> 24;
4083 b2 = (remainder & 0x00ff0000) >> 16;
4084 b3 = (remainder & 0x0000ff00) >> 8;
4085 b4 = remainder & 0xff;
4087 if (loc > 24)
4089 /* The 8-bit immediate already found clears b1 (and maybe b2),
4090 but must leave b3 and b4 alone. */
4092 /* First try to find a 32-bit replicated constant that clears
4093 almost everything. We can assume that we can't do it in one,
4094 or else we wouldn't be here. */
4095 unsigned int tmp = b1 & b2 & b3 & b4;
4096 unsigned int tmp2 = tmp + (tmp << 8) + (tmp << 16)
4097 + (tmp << 24);
4098 unsigned int matching_bytes = (tmp == b1) + (tmp == b2)
4099 + (tmp == b3) + (tmp == b4);
4100 if (tmp
4101 && (matching_bytes >= 3
4102 || (matching_bytes == 2
4103 && const_ok_for_op (remainder & ~tmp2, code))))
4105 /* At least 3 of the bytes match, and the fourth has at
4106 least as many bits set, or two of the bytes match
4107 and it will only require one more insn to finish. */
4108 result = tmp2;
4109 i = tmp != b1 ? 32
4110 : tmp != b2 ? 24
4111 : tmp != b3 ? 16
4112 : 8;
4115 /* Second, try to find a 16-bit replicated constant that can
4116 leave three of the bytes clear. If b2 or b4 is already
4117 zero, then we can. If the 8-bit from above would not
4118 clear b2 anyway, then we still win. */
4119 else if (b1 == b3 && (!b2 || !b4
4120 || (remainder & 0x00ff0000 & ~result)))
4122 result = remainder & 0xff00ff00;
4123 i = 24;
4126 else if (loc > 16)
4128 /* The 8-bit immediate already found clears b2 (and maybe b3)
4129 and we don't get here unless b1 is alredy clear, but it will
4130 leave b4 unchanged. */
4132 /* If we can clear b2 and b4 at once, then we win, since the
4133 8-bits couldn't possibly reach that far. */
4134 if (b2 == b4)
4136 result = remainder & 0x00ff00ff;
4137 i = 16;
4142 return_sequence->i[insns++] = result;
4143 remainder &= ~result;
4145 if (code == SET || code == MINUS)
4146 code = PLUS;
4148 while (remainder);
4150 return insns;
4153 /* Emit an instruction with the indicated PATTERN. If COND is
4154 non-NULL, conditionalize the execution of the instruction on COND
4155 being true. */
4157 static void
4158 emit_constant_insn (rtx cond, rtx pattern)
4160 if (cond)
4161 pattern = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond), pattern);
4162 emit_insn (pattern);
4165 /* As above, but extra parameter GENERATE which, if clear, suppresses
4166 RTL generation. */
4168 static int
4169 arm_gen_constant (enum rtx_code code, machine_mode mode, rtx cond,
4170 HOST_WIDE_INT val, rtx target, rtx source, int subtargets,
4171 int generate)
4173 int can_invert = 0;
4174 int can_negate = 0;
4175 int final_invert = 0;
4176 int i;
4177 int set_sign_bit_copies = 0;
4178 int clear_sign_bit_copies = 0;
4179 int clear_zero_bit_copies = 0;
4180 int set_zero_bit_copies = 0;
4181 int insns = 0, neg_insns, inv_insns;
4182 unsigned HOST_WIDE_INT temp1, temp2;
4183 unsigned HOST_WIDE_INT remainder = val & 0xffffffff;
4184 struct four_ints *immediates;
4185 struct four_ints pos_immediates, neg_immediates, inv_immediates;
4187 /* Find out which operations are safe for a given CODE. Also do a quick
4188 check for degenerate cases; these can occur when DImode operations
4189 are split. */
4190 switch (code)
4192 case SET:
4193 can_invert = 1;
4194 break;
4196 case PLUS:
4197 can_negate = 1;
4198 break;
4200 case IOR:
4201 if (remainder == 0xffffffff)
4203 if (generate)
4204 emit_constant_insn (cond,
4205 gen_rtx_SET (target,
4206 GEN_INT (ARM_SIGN_EXTEND (val))));
4207 return 1;
4210 if (remainder == 0)
4212 if (reload_completed && rtx_equal_p (target, source))
4213 return 0;
4215 if (generate)
4216 emit_constant_insn (cond, gen_rtx_SET (target, source));
4217 return 1;
4219 break;
4221 case AND:
4222 if (remainder == 0)
4224 if (generate)
4225 emit_constant_insn (cond, gen_rtx_SET (target, const0_rtx));
4226 return 1;
4228 if (remainder == 0xffffffff)
4230 if (reload_completed && rtx_equal_p (target, source))
4231 return 0;
4232 if (generate)
4233 emit_constant_insn (cond, gen_rtx_SET (target, source));
4234 return 1;
4236 can_invert = 1;
4237 break;
4239 case XOR:
4240 if (remainder == 0)
4242 if (reload_completed && rtx_equal_p (target, source))
4243 return 0;
4244 if (generate)
4245 emit_constant_insn (cond, gen_rtx_SET (target, source));
4246 return 1;
4249 if (remainder == 0xffffffff)
4251 if (generate)
4252 emit_constant_insn (cond,
4253 gen_rtx_SET (target,
4254 gen_rtx_NOT (mode, source)));
4255 return 1;
4257 final_invert = 1;
4258 break;
4260 case MINUS:
4261 /* We treat MINUS as (val - source), since (source - val) is always
4262 passed as (source + (-val)). */
4263 if (remainder == 0)
4265 if (generate)
4266 emit_constant_insn (cond,
4267 gen_rtx_SET (target,
4268 gen_rtx_NEG (mode, source)));
4269 return 1;
4271 if (const_ok_for_arm (val))
4273 if (generate)
4274 emit_constant_insn (cond,
4275 gen_rtx_SET (target,
4276 gen_rtx_MINUS (mode, GEN_INT (val),
4277 source)));
4278 return 1;
4281 break;
4283 default:
4284 gcc_unreachable ();
4287 /* If we can do it in one insn get out quickly. */
4288 if (const_ok_for_op (val, code))
4290 if (generate)
4291 emit_constant_insn (cond,
4292 gen_rtx_SET (target,
4293 (source
4294 ? gen_rtx_fmt_ee (code, mode, source,
4295 GEN_INT (val))
4296 : GEN_INT (val))));
4297 return 1;
4300 /* On targets with UXTH/UBFX, we can deal with AND (2^N)-1 in a single
4301 insn. */
4302 if (code == AND && (i = exact_log2 (remainder + 1)) > 0
4303 && (arm_arch_thumb2 || (i == 16 && arm_arch6 && mode == SImode)))
4305 if (generate)
4307 if (mode == SImode && i == 16)
4308 /* Use UXTH in preference to UBFX, since on Thumb2 it's a
4309 smaller insn. */
4310 emit_constant_insn (cond,
4311 gen_zero_extendhisi2
4312 (target, gen_lowpart (HImode, source)));
4313 else
4314 /* Extz only supports SImode, but we can coerce the operands
4315 into that mode. */
4316 emit_constant_insn (cond,
4317 gen_extzv_t2 (gen_lowpart (SImode, target),
4318 gen_lowpart (SImode, source),
4319 GEN_INT (i), const0_rtx));
4322 return 1;
4325 /* Calculate a few attributes that may be useful for specific
4326 optimizations. */
4327 /* Count number of leading zeros. */
4328 for (i = 31; i >= 0; i--)
4330 if ((remainder & (1 << i)) == 0)
4331 clear_sign_bit_copies++;
4332 else
4333 break;
4336 /* Count number of leading 1's. */
4337 for (i = 31; i >= 0; i--)
4339 if ((remainder & (1 << i)) != 0)
4340 set_sign_bit_copies++;
4341 else
4342 break;
4345 /* Count number of trailing zero's. */
4346 for (i = 0; i <= 31; i++)
4348 if ((remainder & (1 << i)) == 0)
4349 clear_zero_bit_copies++;
4350 else
4351 break;
4354 /* Count number of trailing 1's. */
4355 for (i = 0; i <= 31; i++)
4357 if ((remainder & (1 << i)) != 0)
4358 set_zero_bit_copies++;
4359 else
4360 break;
4363 switch (code)
4365 case SET:
4366 /* See if we can do this by sign_extending a constant that is known
4367 to be negative. This is a good, way of doing it, since the shift
4368 may well merge into a subsequent insn. */
4369 if (set_sign_bit_copies > 1)
4371 if (const_ok_for_arm
4372 (temp1 = ARM_SIGN_EXTEND (remainder
4373 << (set_sign_bit_copies - 1))))
4375 if (generate)
4377 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4378 emit_constant_insn (cond,
4379 gen_rtx_SET (new_src, GEN_INT (temp1)));
4380 emit_constant_insn (cond,
4381 gen_ashrsi3 (target, new_src,
4382 GEN_INT (set_sign_bit_copies - 1)));
4384 return 2;
4386 /* For an inverted constant, we will need to set the low bits,
4387 these will be shifted out of harm's way. */
4388 temp1 |= (1 << (set_sign_bit_copies - 1)) - 1;
4389 if (const_ok_for_arm (~temp1))
4391 if (generate)
4393 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4394 emit_constant_insn (cond,
4395 gen_rtx_SET (new_src, GEN_INT (temp1)));
4396 emit_constant_insn (cond,
4397 gen_ashrsi3 (target, new_src,
4398 GEN_INT (set_sign_bit_copies - 1)));
4400 return 2;
4404 /* See if we can calculate the value as the difference between two
4405 valid immediates. */
4406 if (clear_sign_bit_copies + clear_zero_bit_copies <= 16)
4408 int topshift = clear_sign_bit_copies & ~1;
4410 temp1 = ARM_SIGN_EXTEND ((remainder + (0x00800000 >> topshift))
4411 & (0xff000000 >> topshift));
4413 /* If temp1 is zero, then that means the 9 most significant
4414 bits of remainder were 1 and we've caused it to overflow.
4415 When topshift is 0 we don't need to do anything since we
4416 can borrow from 'bit 32'. */
4417 if (temp1 == 0 && topshift != 0)
4418 temp1 = 0x80000000 >> (topshift - 1);
4420 temp2 = ARM_SIGN_EXTEND (temp1 - remainder);
4422 if (const_ok_for_arm (temp2))
4424 if (generate)
4426 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4427 emit_constant_insn (cond,
4428 gen_rtx_SET (new_src, GEN_INT (temp1)));
4429 emit_constant_insn (cond,
4430 gen_addsi3 (target, new_src,
4431 GEN_INT (-temp2)));
4434 return 2;
4438 /* See if we can generate this by setting the bottom (or the top)
4439 16 bits, and then shifting these into the other half of the
4440 word. We only look for the simplest cases, to do more would cost
4441 too much. Be careful, however, not to generate this when the
4442 alternative would take fewer insns. */
4443 if (val & 0xffff0000)
4445 temp1 = remainder & 0xffff0000;
4446 temp2 = remainder & 0x0000ffff;
4448 /* Overlaps outside this range are best done using other methods. */
4449 for (i = 9; i < 24; i++)
4451 if ((((temp2 | (temp2 << i)) & 0xffffffff) == remainder)
4452 && !const_ok_for_arm (temp2))
4454 rtx new_src = (subtargets
4455 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
4456 : target);
4457 insns = arm_gen_constant (code, mode, cond, temp2, new_src,
4458 source, subtargets, generate);
4459 source = new_src;
4460 if (generate)
4461 emit_constant_insn
4462 (cond,
4463 gen_rtx_SET
4464 (target,
4465 gen_rtx_IOR (mode,
4466 gen_rtx_ASHIFT (mode, source,
4467 GEN_INT (i)),
4468 source)));
4469 return insns + 1;
4473 /* Don't duplicate cases already considered. */
4474 for (i = 17; i < 24; i++)
4476 if (((temp1 | (temp1 >> i)) == remainder)
4477 && !const_ok_for_arm (temp1))
4479 rtx new_src = (subtargets
4480 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
4481 : target);
4482 insns = arm_gen_constant (code, mode, cond, temp1, new_src,
4483 source, subtargets, generate);
4484 source = new_src;
4485 if (generate)
4486 emit_constant_insn
4487 (cond,
4488 gen_rtx_SET (target,
4489 gen_rtx_IOR
4490 (mode,
4491 gen_rtx_LSHIFTRT (mode, source,
4492 GEN_INT (i)),
4493 source)));
4494 return insns + 1;
4498 break;
4500 case IOR:
4501 case XOR:
4502 /* If we have IOR or XOR, and the constant can be loaded in a
4503 single instruction, and we can find a temporary to put it in,
4504 then this can be done in two instructions instead of 3-4. */
4505 if (subtargets
4506 /* TARGET can't be NULL if SUBTARGETS is 0 */
4507 || (reload_completed && !reg_mentioned_p (target, source)))
4509 if (const_ok_for_arm (ARM_SIGN_EXTEND (~val)))
4511 if (generate)
4513 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4515 emit_constant_insn (cond,
4516 gen_rtx_SET (sub, GEN_INT (val)));
4517 emit_constant_insn (cond,
4518 gen_rtx_SET (target,
4519 gen_rtx_fmt_ee (code, mode,
4520 source, sub)));
4522 return 2;
4526 if (code == XOR)
4527 break;
4529 /* Convert.
4530 x = y | constant ( which is composed of set_sign_bit_copies of leading 1s
4531 and the remainder 0s for e.g. 0xfff00000)
4532 x = ~(~(y ashift set_sign_bit_copies) lshiftrt set_sign_bit_copies)
4534 This can be done in 2 instructions by using shifts with mov or mvn.
4535 e.g. for
4536 x = x | 0xfff00000;
4537 we generate.
4538 mvn r0, r0, asl #12
4539 mvn r0, r0, lsr #12 */
4540 if (set_sign_bit_copies > 8
4541 && (val & (-1 << (32 - set_sign_bit_copies))) == val)
4543 if (generate)
4545 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4546 rtx shift = GEN_INT (set_sign_bit_copies);
4548 emit_constant_insn
4549 (cond,
4550 gen_rtx_SET (sub,
4551 gen_rtx_NOT (mode,
4552 gen_rtx_ASHIFT (mode,
4553 source,
4554 shift))));
4555 emit_constant_insn
4556 (cond,
4557 gen_rtx_SET (target,
4558 gen_rtx_NOT (mode,
4559 gen_rtx_LSHIFTRT (mode, sub,
4560 shift))));
4562 return 2;
4565 /* Convert
4566 x = y | constant (which has set_zero_bit_copies number of trailing ones).
4568 x = ~((~y lshiftrt set_zero_bit_copies) ashift set_zero_bit_copies).
4570 For eg. r0 = r0 | 0xfff
4571 mvn r0, r0, lsr #12
4572 mvn r0, r0, asl #12
4575 if (set_zero_bit_copies > 8
4576 && (remainder & ((1 << set_zero_bit_copies) - 1)) == remainder)
4578 if (generate)
4580 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4581 rtx shift = GEN_INT (set_zero_bit_copies);
4583 emit_constant_insn
4584 (cond,
4585 gen_rtx_SET (sub,
4586 gen_rtx_NOT (mode,
4587 gen_rtx_LSHIFTRT (mode,
4588 source,
4589 shift))));
4590 emit_constant_insn
4591 (cond,
4592 gen_rtx_SET (target,
4593 gen_rtx_NOT (mode,
4594 gen_rtx_ASHIFT (mode, sub,
4595 shift))));
4597 return 2;
4600 /* This will never be reached for Thumb2 because orn is a valid
4601 instruction. This is for Thumb1 and the ARM 32 bit cases.
4603 x = y | constant (such that ~constant is a valid constant)
4604 Transform this to
4605 x = ~(~y & ~constant).
4607 if (const_ok_for_arm (temp1 = ARM_SIGN_EXTEND (~val)))
4609 if (generate)
4611 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4612 emit_constant_insn (cond,
4613 gen_rtx_SET (sub,
4614 gen_rtx_NOT (mode, source)));
4615 source = sub;
4616 if (subtargets)
4617 sub = gen_reg_rtx (mode);
4618 emit_constant_insn (cond,
4619 gen_rtx_SET (sub,
4620 gen_rtx_AND (mode, source,
4621 GEN_INT (temp1))));
4622 emit_constant_insn (cond,
4623 gen_rtx_SET (target,
4624 gen_rtx_NOT (mode, sub)));
4626 return 3;
4628 break;
4630 case AND:
4631 /* See if two shifts will do 2 or more insn's worth of work. */
4632 if (clear_sign_bit_copies >= 16 && clear_sign_bit_copies < 24)
4634 HOST_WIDE_INT shift_mask = ((0xffffffff
4635 << (32 - clear_sign_bit_copies))
4636 & 0xffffffff);
4638 if ((remainder | shift_mask) != 0xffffffff)
4640 HOST_WIDE_INT new_val
4641 = ARM_SIGN_EXTEND (remainder | shift_mask);
4643 if (generate)
4645 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4646 insns = arm_gen_constant (AND, SImode, cond, new_val,
4647 new_src, source, subtargets, 1);
4648 source = new_src;
4650 else
4652 rtx targ = subtargets ? NULL_RTX : target;
4653 insns = arm_gen_constant (AND, mode, cond, new_val,
4654 targ, source, subtargets, 0);
4658 if (generate)
4660 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4661 rtx shift = GEN_INT (clear_sign_bit_copies);
4663 emit_insn (gen_ashlsi3 (new_src, source, shift));
4664 emit_insn (gen_lshrsi3 (target, new_src, shift));
4667 return insns + 2;
4670 if (clear_zero_bit_copies >= 16 && clear_zero_bit_copies < 24)
4672 HOST_WIDE_INT shift_mask = (1 << clear_zero_bit_copies) - 1;
4674 if ((remainder | shift_mask) != 0xffffffff)
4676 HOST_WIDE_INT new_val
4677 = ARM_SIGN_EXTEND (remainder | shift_mask);
4678 if (generate)
4680 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4682 insns = arm_gen_constant (AND, mode, cond, new_val,
4683 new_src, source, subtargets, 1);
4684 source = new_src;
4686 else
4688 rtx targ = subtargets ? NULL_RTX : target;
4690 insns = arm_gen_constant (AND, mode, cond, new_val,
4691 targ, source, subtargets, 0);
4695 if (generate)
4697 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4698 rtx shift = GEN_INT (clear_zero_bit_copies);
4700 emit_insn (gen_lshrsi3 (new_src, source, shift));
4701 emit_insn (gen_ashlsi3 (target, new_src, shift));
4704 return insns + 2;
4707 break;
4709 default:
4710 break;
4713 /* Calculate what the instruction sequences would be if we generated it
4714 normally, negated, or inverted. */
4715 if (code == AND)
4716 /* AND cannot be split into multiple insns, so invert and use BIC. */
4717 insns = 99;
4718 else
4719 insns = optimal_immediate_sequence (code, remainder, &pos_immediates);
4721 if (can_negate)
4722 neg_insns = optimal_immediate_sequence (code, (-remainder) & 0xffffffff,
4723 &neg_immediates);
4724 else
4725 neg_insns = 99;
4727 if (can_invert || final_invert)
4728 inv_insns = optimal_immediate_sequence (code, remainder ^ 0xffffffff,
4729 &inv_immediates);
4730 else
4731 inv_insns = 99;
4733 immediates = &pos_immediates;
4735 /* Is the negated immediate sequence more efficient? */
4736 if (neg_insns < insns && neg_insns <= inv_insns)
4738 insns = neg_insns;
4739 immediates = &neg_immediates;
4741 else
4742 can_negate = 0;
4744 /* Is the inverted immediate sequence more efficient?
4745 We must allow for an extra NOT instruction for XOR operations, although
4746 there is some chance that the final 'mvn' will get optimized later. */
4747 if ((inv_insns + 1) < insns || (!final_invert && inv_insns < insns))
4749 insns = inv_insns;
4750 immediates = &inv_immediates;
4752 else
4754 can_invert = 0;
4755 final_invert = 0;
4758 /* Now output the chosen sequence as instructions. */
4759 if (generate)
4761 for (i = 0; i < insns; i++)
4763 rtx new_src, temp1_rtx;
4765 temp1 = immediates->i[i];
4767 if (code == SET || code == MINUS)
4768 new_src = (subtargets ? gen_reg_rtx (mode) : target);
4769 else if ((final_invert || i < (insns - 1)) && subtargets)
4770 new_src = gen_reg_rtx (mode);
4771 else
4772 new_src = target;
4774 if (can_invert)
4775 temp1 = ~temp1;
4776 else if (can_negate)
4777 temp1 = -temp1;
4779 temp1 = trunc_int_for_mode (temp1, mode);
4780 temp1_rtx = GEN_INT (temp1);
4782 if (code == SET)
4784 else if (code == MINUS)
4785 temp1_rtx = gen_rtx_MINUS (mode, temp1_rtx, source);
4786 else
4787 temp1_rtx = gen_rtx_fmt_ee (code, mode, source, temp1_rtx);
4789 emit_constant_insn (cond, gen_rtx_SET (new_src, temp1_rtx));
4790 source = new_src;
4792 if (code == SET)
4794 can_negate = can_invert;
4795 can_invert = 0;
4796 code = PLUS;
4798 else if (code == MINUS)
4799 code = PLUS;
4803 if (final_invert)
4805 if (generate)
4806 emit_constant_insn (cond, gen_rtx_SET (target,
4807 gen_rtx_NOT (mode, source)));
4808 insns++;
4811 return insns;
4814 /* Canonicalize a comparison so that we are more likely to recognize it.
4815 This can be done for a few constant compares, where we can make the
4816 immediate value easier to load. */
4818 static void
4819 arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
4820 bool op0_preserve_value)
4822 machine_mode mode;
4823 unsigned HOST_WIDE_INT i, maxval;
4825 mode = GET_MODE (*op0);
4826 if (mode == VOIDmode)
4827 mode = GET_MODE (*op1);
4829 maxval = (((unsigned HOST_WIDE_INT) 1) << (GET_MODE_BITSIZE(mode) - 1)) - 1;
4831 /* For DImode, we have GE/LT/GEU/LTU comparisons. In ARM mode
4832 we can also use cmp/cmpeq for GTU/LEU. GT/LE must be either
4833 reversed or (for constant OP1) adjusted to GE/LT. Similarly
4834 for GTU/LEU in Thumb mode. */
4835 if (mode == DImode)
4838 if (*code == GT || *code == LE
4839 || (!TARGET_ARM && (*code == GTU || *code == LEU)))
4841 /* Missing comparison. First try to use an available
4842 comparison. */
4843 if (CONST_INT_P (*op1))
4845 i = INTVAL (*op1);
4846 switch (*code)
4848 case GT:
4849 case LE:
4850 if (i != maxval
4851 && arm_const_double_by_immediates (GEN_INT (i + 1)))
4853 *op1 = GEN_INT (i + 1);
4854 *code = *code == GT ? GE : LT;
4855 return;
4857 break;
4858 case GTU:
4859 case LEU:
4860 if (i != ~((unsigned HOST_WIDE_INT) 0)
4861 && arm_const_double_by_immediates (GEN_INT (i + 1)))
4863 *op1 = GEN_INT (i + 1);
4864 *code = *code == GTU ? GEU : LTU;
4865 return;
4867 break;
4868 default:
4869 gcc_unreachable ();
4873 /* If that did not work, reverse the condition. */
4874 if (!op0_preserve_value)
4876 std::swap (*op0, *op1);
4877 *code = (int)swap_condition ((enum rtx_code)*code);
4880 return;
4883 /* If *op0 is (zero_extend:SI (subreg:QI (reg:SI) 0)) and comparing
4884 with const0_rtx, change it to (and:SI (reg:SI) (const_int 255)),
4885 to facilitate possible combining with a cmp into 'ands'. */
4886 if (mode == SImode
4887 && GET_CODE (*op0) == ZERO_EXTEND
4888 && GET_CODE (XEXP (*op0, 0)) == SUBREG
4889 && GET_MODE (XEXP (*op0, 0)) == QImode
4890 && GET_MODE (SUBREG_REG (XEXP (*op0, 0))) == SImode
4891 && subreg_lowpart_p (XEXP (*op0, 0))
4892 && *op1 == const0_rtx)
4893 *op0 = gen_rtx_AND (SImode, SUBREG_REG (XEXP (*op0, 0)),
4894 GEN_INT (255));
4896 /* Comparisons smaller than DImode. Only adjust comparisons against
4897 an out-of-range constant. */
4898 if (!CONST_INT_P (*op1)
4899 || const_ok_for_arm (INTVAL (*op1))
4900 || const_ok_for_arm (- INTVAL (*op1)))
4901 return;
4903 i = INTVAL (*op1);
4905 switch (*code)
4907 case EQ:
4908 case NE:
4909 return;
4911 case GT:
4912 case LE:
4913 if (i != maxval
4914 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
4916 *op1 = GEN_INT (ARM_SIGN_EXTEND (i + 1));
4917 *code = *code == GT ? GE : LT;
4918 return;
4920 break;
4922 case GE:
4923 case LT:
4924 if (i != ~maxval
4925 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
4927 *op1 = GEN_INT (i - 1);
4928 *code = *code == GE ? GT : LE;
4929 return;
4931 break;
4933 case GTU:
4934 case LEU:
4935 if (i != ~((unsigned HOST_WIDE_INT) 0)
4936 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
4938 *op1 = GEN_INT (ARM_SIGN_EXTEND (i + 1));
4939 *code = *code == GTU ? GEU : LTU;
4940 return;
4942 break;
4944 case GEU:
4945 case LTU:
4946 if (i != 0
4947 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
4949 *op1 = GEN_INT (i - 1);
4950 *code = *code == GEU ? GTU : LEU;
4951 return;
4953 break;
4955 default:
4956 gcc_unreachable ();
4961 /* Define how to find the value returned by a function. */
4963 static rtx
4964 arm_function_value(const_tree type, const_tree func,
4965 bool outgoing ATTRIBUTE_UNUSED)
4967 machine_mode mode;
4968 int unsignedp ATTRIBUTE_UNUSED;
4969 rtx r ATTRIBUTE_UNUSED;
4971 mode = TYPE_MODE (type);
4973 if (TARGET_AAPCS_BASED)
4974 return aapcs_allocate_return_reg (mode, type, func);
4976 /* Promote integer types. */
4977 if (INTEGRAL_TYPE_P (type))
4978 mode = arm_promote_function_mode (type, mode, &unsignedp, func, 1);
4980 /* Promotes small structs returned in a register to full-word size
4981 for big-endian AAPCS. */
4982 if (arm_return_in_msb (type))
4984 HOST_WIDE_INT size = int_size_in_bytes (type);
4985 if (size % UNITS_PER_WORD != 0)
4987 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
4988 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
4992 return arm_libcall_value_1 (mode);
4995 /* libcall hashtable helpers. */
4997 struct libcall_hasher : typed_noop_remove <rtx_def>
4999 typedef const rtx_def *value_type;
5000 typedef const rtx_def *compare_type;
5001 static inline hashval_t hash (const rtx_def *);
5002 static inline bool equal (const rtx_def *, const rtx_def *);
5003 static inline void remove (rtx_def *);
5006 inline bool
5007 libcall_hasher::equal (const rtx_def *p1, const rtx_def *p2)
5009 return rtx_equal_p (p1, p2);
5012 inline hashval_t
5013 libcall_hasher::hash (const rtx_def *p1)
5015 return hash_rtx (p1, VOIDmode, NULL, NULL, FALSE);
5018 typedef hash_table<libcall_hasher> libcall_table_type;
5020 static void
5021 add_libcall (libcall_table_type *htab, rtx libcall)
5023 *htab->find_slot (libcall, INSERT) = libcall;
5026 static bool
5027 arm_libcall_uses_aapcs_base (const_rtx libcall)
5029 static bool init_done = false;
5030 static libcall_table_type *libcall_htab = NULL;
5032 if (!init_done)
5034 init_done = true;
5036 libcall_htab = new libcall_table_type (31);
5037 add_libcall (libcall_htab,
5038 convert_optab_libfunc (sfloat_optab, SFmode, SImode));
5039 add_libcall (libcall_htab,
5040 convert_optab_libfunc (sfloat_optab, DFmode, SImode));
5041 add_libcall (libcall_htab,
5042 convert_optab_libfunc (sfloat_optab, SFmode, DImode));
5043 add_libcall (libcall_htab,
5044 convert_optab_libfunc (sfloat_optab, DFmode, DImode));
5046 add_libcall (libcall_htab,
5047 convert_optab_libfunc (ufloat_optab, SFmode, SImode));
5048 add_libcall (libcall_htab,
5049 convert_optab_libfunc (ufloat_optab, DFmode, SImode));
5050 add_libcall (libcall_htab,
5051 convert_optab_libfunc (ufloat_optab, SFmode, DImode));
5052 add_libcall (libcall_htab,
5053 convert_optab_libfunc (ufloat_optab, DFmode, DImode));
5055 add_libcall (libcall_htab,
5056 convert_optab_libfunc (sext_optab, SFmode, HFmode));
5057 add_libcall (libcall_htab,
5058 convert_optab_libfunc (trunc_optab, HFmode, SFmode));
5059 add_libcall (libcall_htab,
5060 convert_optab_libfunc (sfix_optab, SImode, DFmode));
5061 add_libcall (libcall_htab,
5062 convert_optab_libfunc (ufix_optab, SImode, DFmode));
5063 add_libcall (libcall_htab,
5064 convert_optab_libfunc (sfix_optab, DImode, DFmode));
5065 add_libcall (libcall_htab,
5066 convert_optab_libfunc (ufix_optab, DImode, DFmode));
5067 add_libcall (libcall_htab,
5068 convert_optab_libfunc (sfix_optab, DImode, SFmode));
5069 add_libcall (libcall_htab,
5070 convert_optab_libfunc (ufix_optab, DImode, SFmode));
5072 /* Values from double-precision helper functions are returned in core
5073 registers if the selected core only supports single-precision
5074 arithmetic, even if we are using the hard-float ABI. The same is
5075 true for single-precision helpers, but we will never be using the
5076 hard-float ABI on a CPU which doesn't support single-precision
5077 operations in hardware. */
5078 add_libcall (libcall_htab, optab_libfunc (add_optab, DFmode));
5079 add_libcall (libcall_htab, optab_libfunc (sdiv_optab, DFmode));
5080 add_libcall (libcall_htab, optab_libfunc (smul_optab, DFmode));
5081 add_libcall (libcall_htab, optab_libfunc (neg_optab, DFmode));
5082 add_libcall (libcall_htab, optab_libfunc (sub_optab, DFmode));
5083 add_libcall (libcall_htab, optab_libfunc (eq_optab, DFmode));
5084 add_libcall (libcall_htab, optab_libfunc (lt_optab, DFmode));
5085 add_libcall (libcall_htab, optab_libfunc (le_optab, DFmode));
5086 add_libcall (libcall_htab, optab_libfunc (ge_optab, DFmode));
5087 add_libcall (libcall_htab, optab_libfunc (gt_optab, DFmode));
5088 add_libcall (libcall_htab, optab_libfunc (unord_optab, DFmode));
5089 add_libcall (libcall_htab, convert_optab_libfunc (sext_optab, DFmode,
5090 SFmode));
5091 add_libcall (libcall_htab, convert_optab_libfunc (trunc_optab, SFmode,
5092 DFmode));
5095 return libcall && libcall_htab->find (libcall) != NULL;
5098 static rtx
5099 arm_libcall_value_1 (machine_mode mode)
5101 if (TARGET_AAPCS_BASED)
5102 return aapcs_libcall_value (mode);
5103 else if (TARGET_IWMMXT_ABI
5104 && arm_vector_mode_supported_p (mode))
5105 return gen_rtx_REG (mode, FIRST_IWMMXT_REGNUM);
5106 else
5107 return gen_rtx_REG (mode, ARG_REGISTER (1));
5110 /* Define how to find the value returned by a library function
5111 assuming the value has mode MODE. */
5113 static rtx
5114 arm_libcall_value (machine_mode mode, const_rtx libcall)
5116 if (TARGET_AAPCS_BASED && arm_pcs_default != ARM_PCS_AAPCS
5117 && GET_MODE_CLASS (mode) == MODE_FLOAT)
5119 /* The following libcalls return their result in integer registers,
5120 even though they return a floating point value. */
5121 if (arm_libcall_uses_aapcs_base (libcall))
5122 return gen_rtx_REG (mode, ARG_REGISTER(1));
5126 return arm_libcall_value_1 (mode);
5129 /* Implement TARGET_FUNCTION_VALUE_REGNO_P. */
5131 static bool
5132 arm_function_value_regno_p (const unsigned int regno)
5134 if (regno == ARG_REGISTER (1)
5135 || (TARGET_32BIT
5136 && TARGET_AAPCS_BASED
5137 && TARGET_VFP
5138 && TARGET_HARD_FLOAT
5139 && regno == FIRST_VFP_REGNUM)
5140 || (TARGET_IWMMXT_ABI
5141 && regno == FIRST_IWMMXT_REGNUM))
5142 return true;
5144 return false;
5147 /* Determine the amount of memory needed to store the possible return
5148 registers of an untyped call. */
5150 arm_apply_result_size (void)
5152 int size = 16;
5154 if (TARGET_32BIT)
5156 if (TARGET_HARD_FLOAT_ABI && TARGET_VFP)
5157 size += 32;
5158 if (TARGET_IWMMXT_ABI)
5159 size += 8;
5162 return size;
5165 /* Decide whether TYPE should be returned in memory (true)
5166 or in a register (false). FNTYPE is the type of the function making
5167 the call. */
5168 static bool
5169 arm_return_in_memory (const_tree type, const_tree fntype)
5171 HOST_WIDE_INT size;
5173 size = int_size_in_bytes (type); /* Negative if not fixed size. */
5175 if (TARGET_AAPCS_BASED)
5177 /* Simple, non-aggregate types (ie not including vectors and
5178 complex) are always returned in a register (or registers).
5179 We don't care about which register here, so we can short-cut
5180 some of the detail. */
5181 if (!AGGREGATE_TYPE_P (type)
5182 && TREE_CODE (type) != VECTOR_TYPE
5183 && TREE_CODE (type) != COMPLEX_TYPE)
5184 return false;
5186 /* Any return value that is no larger than one word can be
5187 returned in r0. */
5188 if (((unsigned HOST_WIDE_INT) size) <= UNITS_PER_WORD)
5189 return false;
5191 /* Check any available co-processors to see if they accept the
5192 type as a register candidate (VFP, for example, can return
5193 some aggregates in consecutive registers). These aren't
5194 available if the call is variadic. */
5195 if (aapcs_select_return_coproc (type, fntype) >= 0)
5196 return false;
5198 /* Vector values should be returned using ARM registers, not
5199 memory (unless they're over 16 bytes, which will break since
5200 we only have four call-clobbered registers to play with). */
5201 if (TREE_CODE (type) == VECTOR_TYPE)
5202 return (size < 0 || size > (4 * UNITS_PER_WORD));
5204 /* The rest go in memory. */
5205 return true;
5208 if (TREE_CODE (type) == VECTOR_TYPE)
5209 return (size < 0 || size > (4 * UNITS_PER_WORD));
5211 if (!AGGREGATE_TYPE_P (type) &&
5212 (TREE_CODE (type) != VECTOR_TYPE))
5213 /* All simple types are returned in registers. */
5214 return false;
5216 if (arm_abi != ARM_ABI_APCS)
5218 /* ATPCS and later return aggregate types in memory only if they are
5219 larger than a word (or are variable size). */
5220 return (size < 0 || size > UNITS_PER_WORD);
5223 /* For the arm-wince targets we choose to be compatible with Microsoft's
5224 ARM and Thumb compilers, which always return aggregates in memory. */
5225 #ifndef ARM_WINCE
5226 /* All structures/unions bigger than one word are returned in memory.
5227 Also catch the case where int_size_in_bytes returns -1. In this case
5228 the aggregate is either huge or of variable size, and in either case
5229 we will want to return it via memory and not in a register. */
5230 if (size < 0 || size > UNITS_PER_WORD)
5231 return true;
5233 if (TREE_CODE (type) == RECORD_TYPE)
5235 tree field;
5237 /* For a struct the APCS says that we only return in a register
5238 if the type is 'integer like' and every addressable element
5239 has an offset of zero. For practical purposes this means
5240 that the structure can have at most one non bit-field element
5241 and that this element must be the first one in the structure. */
5243 /* Find the first field, ignoring non FIELD_DECL things which will
5244 have been created by C++. */
5245 for (field = TYPE_FIELDS (type);
5246 field && TREE_CODE (field) != FIELD_DECL;
5247 field = DECL_CHAIN (field))
5248 continue;
5250 if (field == NULL)
5251 return false; /* An empty structure. Allowed by an extension to ANSI C. */
5253 /* Check that the first field is valid for returning in a register. */
5255 /* ... Floats are not allowed */
5256 if (FLOAT_TYPE_P (TREE_TYPE (field)))
5257 return true;
5259 /* ... Aggregates that are not themselves valid for returning in
5260 a register are not allowed. */
5261 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
5262 return true;
5264 /* Now check the remaining fields, if any. Only bitfields are allowed,
5265 since they are not addressable. */
5266 for (field = DECL_CHAIN (field);
5267 field;
5268 field = DECL_CHAIN (field))
5270 if (TREE_CODE (field) != FIELD_DECL)
5271 continue;
5273 if (!DECL_BIT_FIELD_TYPE (field))
5274 return true;
5277 return false;
5280 if (TREE_CODE (type) == UNION_TYPE)
5282 tree field;
5284 /* Unions can be returned in registers if every element is
5285 integral, or can be returned in an integer register. */
5286 for (field = TYPE_FIELDS (type);
5287 field;
5288 field = DECL_CHAIN (field))
5290 if (TREE_CODE (field) != FIELD_DECL)
5291 continue;
5293 if (FLOAT_TYPE_P (TREE_TYPE (field)))
5294 return true;
5296 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
5297 return true;
5300 return false;
5302 #endif /* not ARM_WINCE */
5304 /* Return all other types in memory. */
5305 return true;
5308 const struct pcs_attribute_arg
5310 const char *arg;
5311 enum arm_pcs value;
5312 } pcs_attribute_args[] =
5314 {"aapcs", ARM_PCS_AAPCS},
5315 {"aapcs-vfp", ARM_PCS_AAPCS_VFP},
5316 #if 0
5317 /* We could recognize these, but changes would be needed elsewhere
5318 * to implement them. */
5319 {"aapcs-iwmmxt", ARM_PCS_AAPCS_IWMMXT},
5320 {"atpcs", ARM_PCS_ATPCS},
5321 {"apcs", ARM_PCS_APCS},
5322 #endif
5323 {NULL, ARM_PCS_UNKNOWN}
5326 static enum arm_pcs
5327 arm_pcs_from_attribute (tree attr)
5329 const struct pcs_attribute_arg *ptr;
5330 const char *arg;
5332 /* Get the value of the argument. */
5333 if (TREE_VALUE (attr) == NULL_TREE
5334 || TREE_CODE (TREE_VALUE (attr)) != STRING_CST)
5335 return ARM_PCS_UNKNOWN;
5337 arg = TREE_STRING_POINTER (TREE_VALUE (attr));
5339 /* Check it against the list of known arguments. */
5340 for (ptr = pcs_attribute_args; ptr->arg != NULL; ptr++)
5341 if (streq (arg, ptr->arg))
5342 return ptr->value;
5344 /* An unrecognized interrupt type. */
5345 return ARM_PCS_UNKNOWN;
5348 /* Get the PCS variant to use for this call. TYPE is the function's type
5349 specification, DECL is the specific declartion. DECL may be null if
5350 the call could be indirect or if this is a library call. */
5351 static enum arm_pcs
5352 arm_get_pcs_model (const_tree type, const_tree decl)
5354 bool user_convention = false;
5355 enum arm_pcs user_pcs = arm_pcs_default;
5356 tree attr;
5358 gcc_assert (type);
5360 attr = lookup_attribute ("pcs", TYPE_ATTRIBUTES (type));
5361 if (attr)
5363 user_pcs = arm_pcs_from_attribute (TREE_VALUE (attr));
5364 user_convention = true;
5367 if (TARGET_AAPCS_BASED)
5369 /* Detect varargs functions. These always use the base rules
5370 (no argument is ever a candidate for a co-processor
5371 register). */
5372 bool base_rules = stdarg_p (type);
5374 if (user_convention)
5376 if (user_pcs > ARM_PCS_AAPCS_LOCAL)
5377 sorry ("non-AAPCS derived PCS variant");
5378 else if (base_rules && user_pcs != ARM_PCS_AAPCS)
5379 error ("variadic functions must use the base AAPCS variant");
5382 if (base_rules)
5383 return ARM_PCS_AAPCS;
5384 else if (user_convention)
5385 return user_pcs;
5386 else if (decl && flag_unit_at_a_time)
5388 /* Local functions never leak outside this compilation unit,
5389 so we are free to use whatever conventions are
5390 appropriate. */
5391 /* FIXME: remove CONST_CAST_TREE when cgraph is constified. */
5392 cgraph_local_info *i = cgraph_node::local_info (CONST_CAST_TREE(decl));
5393 if (i && i->local)
5394 return ARM_PCS_AAPCS_LOCAL;
5397 else if (user_convention && user_pcs != arm_pcs_default)
5398 sorry ("PCS variant");
5400 /* For everything else we use the target's default. */
5401 return arm_pcs_default;
5405 static void
5406 aapcs_vfp_cum_init (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
5407 const_tree fntype ATTRIBUTE_UNUSED,
5408 rtx libcall ATTRIBUTE_UNUSED,
5409 const_tree fndecl ATTRIBUTE_UNUSED)
5411 /* Record the unallocated VFP registers. */
5412 pcum->aapcs_vfp_regs_free = (1 << NUM_VFP_ARG_REGS) - 1;
5413 pcum->aapcs_vfp_reg_alloc = 0;
5416 /* Walk down the type tree of TYPE counting consecutive base elements.
5417 If *MODEP is VOIDmode, then set it to the first valid floating point
5418 type. If a non-floating point type is found, or if a floating point
5419 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
5420 otherwise return the count in the sub-tree. */
5421 static int
5422 aapcs_vfp_sub_candidate (const_tree type, machine_mode *modep)
5424 machine_mode mode;
5425 HOST_WIDE_INT size;
5427 switch (TREE_CODE (type))
5429 case REAL_TYPE:
5430 mode = TYPE_MODE (type);
5431 if (mode != DFmode && mode != SFmode)
5432 return -1;
5434 if (*modep == VOIDmode)
5435 *modep = mode;
5437 if (*modep == mode)
5438 return 1;
5440 break;
5442 case COMPLEX_TYPE:
5443 mode = TYPE_MODE (TREE_TYPE (type));
5444 if (mode != DFmode && mode != SFmode)
5445 return -1;
5447 if (*modep == VOIDmode)
5448 *modep = mode;
5450 if (*modep == mode)
5451 return 2;
5453 break;
5455 case VECTOR_TYPE:
5456 /* Use V2SImode and V4SImode as representatives of all 64-bit
5457 and 128-bit vector types, whether or not those modes are
5458 supported with the present options. */
5459 size = int_size_in_bytes (type);
5460 switch (size)
5462 case 8:
5463 mode = V2SImode;
5464 break;
5465 case 16:
5466 mode = V4SImode;
5467 break;
5468 default:
5469 return -1;
5472 if (*modep == VOIDmode)
5473 *modep = mode;
5475 /* Vector modes are considered to be opaque: two vectors are
5476 equivalent for the purposes of being homogeneous aggregates
5477 if they are the same size. */
5478 if (*modep == mode)
5479 return 1;
5481 break;
5483 case ARRAY_TYPE:
5485 int count;
5486 tree index = TYPE_DOMAIN (type);
5488 /* Can't handle incomplete types nor sizes that are not
5489 fixed. */
5490 if (!COMPLETE_TYPE_P (type)
5491 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
5492 return -1;
5494 count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep);
5495 if (count == -1
5496 || !index
5497 || !TYPE_MAX_VALUE (index)
5498 || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index))
5499 || !TYPE_MIN_VALUE (index)
5500 || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index))
5501 || count < 0)
5502 return -1;
5504 count *= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index))
5505 - tree_to_uhwi (TYPE_MIN_VALUE (index)));
5507 /* There must be no padding. */
5508 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
5509 return -1;
5511 return count;
5514 case RECORD_TYPE:
5516 int count = 0;
5517 int sub_count;
5518 tree field;
5520 /* Can't handle incomplete types nor sizes that are not
5521 fixed. */
5522 if (!COMPLETE_TYPE_P (type)
5523 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
5524 return -1;
5526 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
5528 if (TREE_CODE (field) != FIELD_DECL)
5529 continue;
5531 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
5532 if (sub_count < 0)
5533 return -1;
5534 count += sub_count;
5537 /* There must be no padding. */
5538 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
5539 return -1;
5541 return count;
5544 case UNION_TYPE:
5545 case QUAL_UNION_TYPE:
5547 /* These aren't very interesting except in a degenerate case. */
5548 int count = 0;
5549 int sub_count;
5550 tree field;
5552 /* Can't handle incomplete types nor sizes that are not
5553 fixed. */
5554 if (!COMPLETE_TYPE_P (type)
5555 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
5556 return -1;
5558 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
5560 if (TREE_CODE (field) != FIELD_DECL)
5561 continue;
5563 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
5564 if (sub_count < 0)
5565 return -1;
5566 count = count > sub_count ? count : sub_count;
5569 /* There must be no padding. */
5570 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
5571 return -1;
5573 return count;
5576 default:
5577 break;
5580 return -1;
5583 /* Return true if PCS_VARIANT should use VFP registers. */
5584 static bool
5585 use_vfp_abi (enum arm_pcs pcs_variant, bool is_double)
5587 if (pcs_variant == ARM_PCS_AAPCS_VFP)
5589 static bool seen_thumb1_vfp = false;
5591 if (TARGET_THUMB1 && !seen_thumb1_vfp)
5593 sorry ("Thumb-1 hard-float VFP ABI");
5594 /* sorry() is not immediately fatal, so only display this once. */
5595 seen_thumb1_vfp = true;
5598 return true;
5601 if (pcs_variant != ARM_PCS_AAPCS_LOCAL)
5602 return false;
5604 return (TARGET_32BIT && TARGET_VFP && TARGET_HARD_FLOAT &&
5605 (TARGET_VFP_DOUBLE || !is_double));
5608 /* Return true if an argument whose type is TYPE, or mode is MODE, is
5609 suitable for passing or returning in VFP registers for the PCS
5610 variant selected. If it is, then *BASE_MODE is updated to contain
5611 a machine mode describing each element of the argument's type and
5612 *COUNT to hold the number of such elements. */
5613 static bool
5614 aapcs_vfp_is_call_or_return_candidate (enum arm_pcs pcs_variant,
5615 machine_mode mode, const_tree type,
5616 machine_mode *base_mode, int *count)
5618 machine_mode new_mode = VOIDmode;
5620 /* If we have the type information, prefer that to working things
5621 out from the mode. */
5622 if (type)
5624 int ag_count = aapcs_vfp_sub_candidate (type, &new_mode);
5626 if (ag_count > 0 && ag_count <= 4)
5627 *count = ag_count;
5628 else
5629 return false;
5631 else if (GET_MODE_CLASS (mode) == MODE_FLOAT
5632 || GET_MODE_CLASS (mode) == MODE_VECTOR_INT
5633 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
5635 *count = 1;
5636 new_mode = mode;
5638 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
5640 *count = 2;
5641 new_mode = (mode == DCmode ? DFmode : SFmode);
5643 else
5644 return false;
5647 if (!use_vfp_abi (pcs_variant, ARM_NUM_REGS (new_mode) > 1))
5648 return false;
5650 *base_mode = new_mode;
5651 return true;
5654 static bool
5655 aapcs_vfp_is_return_candidate (enum arm_pcs pcs_variant,
5656 machine_mode mode, const_tree type)
5658 int count ATTRIBUTE_UNUSED;
5659 machine_mode ag_mode ATTRIBUTE_UNUSED;
5661 if (!use_vfp_abi (pcs_variant, false))
5662 return false;
5663 return aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
5664 &ag_mode, &count);
5667 static bool
5668 aapcs_vfp_is_call_candidate (CUMULATIVE_ARGS *pcum, machine_mode mode,
5669 const_tree type)
5671 if (!use_vfp_abi (pcum->pcs_variant, false))
5672 return false;
5674 return aapcs_vfp_is_call_or_return_candidate (pcum->pcs_variant, mode, type,
5675 &pcum->aapcs_vfp_rmode,
5676 &pcum->aapcs_vfp_rcount);
5679 static bool
5680 aapcs_vfp_allocate (CUMULATIVE_ARGS *pcum, machine_mode mode,
5681 const_tree type ATTRIBUTE_UNUSED)
5683 int shift = GET_MODE_SIZE (pcum->aapcs_vfp_rmode) / GET_MODE_SIZE (SFmode);
5684 unsigned mask = (1 << (shift * pcum->aapcs_vfp_rcount)) - 1;
5685 int regno;
5687 for (regno = 0; regno < NUM_VFP_ARG_REGS; regno += shift)
5688 if (((pcum->aapcs_vfp_regs_free >> regno) & mask) == mask)
5690 pcum->aapcs_vfp_reg_alloc = mask << regno;
5691 if (mode == BLKmode
5692 || (mode == TImode && ! TARGET_NEON)
5693 || ! arm_hard_regno_mode_ok (FIRST_VFP_REGNUM + regno, mode))
5695 int i;
5696 int rcount = pcum->aapcs_vfp_rcount;
5697 int rshift = shift;
5698 machine_mode rmode = pcum->aapcs_vfp_rmode;
5699 rtx par;
5700 if (!TARGET_NEON)
5702 /* Avoid using unsupported vector modes. */
5703 if (rmode == V2SImode)
5704 rmode = DImode;
5705 else if (rmode == V4SImode)
5707 rmode = DImode;
5708 rcount *= 2;
5709 rshift /= 2;
5712 par = gen_rtx_PARALLEL (mode, rtvec_alloc (rcount));
5713 for (i = 0; i < rcount; i++)
5715 rtx tmp = gen_rtx_REG (rmode,
5716 FIRST_VFP_REGNUM + regno + i * rshift);
5717 tmp = gen_rtx_EXPR_LIST
5718 (VOIDmode, tmp,
5719 GEN_INT (i * GET_MODE_SIZE (rmode)));
5720 XVECEXP (par, 0, i) = tmp;
5723 pcum->aapcs_reg = par;
5725 else
5726 pcum->aapcs_reg = gen_rtx_REG (mode, FIRST_VFP_REGNUM + regno);
5727 return true;
5729 return false;
5732 static rtx
5733 aapcs_vfp_allocate_return_reg (enum arm_pcs pcs_variant ATTRIBUTE_UNUSED,
5734 machine_mode mode,
5735 const_tree type ATTRIBUTE_UNUSED)
5737 if (!use_vfp_abi (pcs_variant, false))
5738 return NULL;
5740 if (mode == BLKmode || (mode == TImode && !TARGET_NEON))
5742 int count;
5743 machine_mode ag_mode;
5744 int i;
5745 rtx par;
5746 int shift;
5748 aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
5749 &ag_mode, &count);
5751 if (!TARGET_NEON)
5753 if (ag_mode == V2SImode)
5754 ag_mode = DImode;
5755 else if (ag_mode == V4SImode)
5757 ag_mode = DImode;
5758 count *= 2;
5761 shift = GET_MODE_SIZE(ag_mode) / GET_MODE_SIZE(SFmode);
5762 par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
5763 for (i = 0; i < count; i++)
5765 rtx tmp = gen_rtx_REG (ag_mode, FIRST_VFP_REGNUM + i * shift);
5766 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
5767 GEN_INT (i * GET_MODE_SIZE (ag_mode)));
5768 XVECEXP (par, 0, i) = tmp;
5771 return par;
5774 return gen_rtx_REG (mode, FIRST_VFP_REGNUM);
5777 static void
5778 aapcs_vfp_advance (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
5779 machine_mode mode ATTRIBUTE_UNUSED,
5780 const_tree type ATTRIBUTE_UNUSED)
5782 pcum->aapcs_vfp_regs_free &= ~pcum->aapcs_vfp_reg_alloc;
5783 pcum->aapcs_vfp_reg_alloc = 0;
5784 return;
5787 #define AAPCS_CP(X) \
5789 aapcs_ ## X ## _cum_init, \
5790 aapcs_ ## X ## _is_call_candidate, \
5791 aapcs_ ## X ## _allocate, \
5792 aapcs_ ## X ## _is_return_candidate, \
5793 aapcs_ ## X ## _allocate_return_reg, \
5794 aapcs_ ## X ## _advance \
5797 /* Table of co-processors that can be used to pass arguments in
5798 registers. Idealy no arugment should be a candidate for more than
5799 one co-processor table entry, but the table is processed in order
5800 and stops after the first match. If that entry then fails to put
5801 the argument into a co-processor register, the argument will go on
5802 the stack. */
5803 static struct
5805 /* Initialize co-processor related state in CUMULATIVE_ARGS structure. */
5806 void (*cum_init) (CUMULATIVE_ARGS *, const_tree, rtx, const_tree);
5808 /* Return true if an argument of mode MODE (or type TYPE if MODE is
5809 BLKmode) is a candidate for this co-processor's registers; this
5810 function should ignore any position-dependent state in
5811 CUMULATIVE_ARGS and only use call-type dependent information. */
5812 bool (*is_call_candidate) (CUMULATIVE_ARGS *, machine_mode, const_tree);
5814 /* Return true if the argument does get a co-processor register; it
5815 should set aapcs_reg to an RTX of the register allocated as is
5816 required for a return from FUNCTION_ARG. */
5817 bool (*allocate) (CUMULATIVE_ARGS *, machine_mode, const_tree);
5819 /* Return true if a result of mode MODE (or type TYPE if MODE is
5820 BLKmode) is can be returned in this co-processor's registers. */
5821 bool (*is_return_candidate) (enum arm_pcs, machine_mode, const_tree);
5823 /* Allocate and return an RTX element to hold the return type of a
5824 call, this routine must not fail and will only be called if
5825 is_return_candidate returned true with the same parameters. */
5826 rtx (*allocate_return_reg) (enum arm_pcs, machine_mode, const_tree);
5828 /* Finish processing this argument and prepare to start processing
5829 the next one. */
5830 void (*advance) (CUMULATIVE_ARGS *, machine_mode, const_tree);
5831 } aapcs_cp_arg_layout[ARM_NUM_COPROC_SLOTS] =
5833 AAPCS_CP(vfp)
5836 #undef AAPCS_CP
5838 static int
5839 aapcs_select_call_coproc (CUMULATIVE_ARGS *pcum, machine_mode mode,
5840 const_tree type)
5842 int i;
5844 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
5845 if (aapcs_cp_arg_layout[i].is_call_candidate (pcum, mode, type))
5846 return i;
5848 return -1;
5851 static int
5852 aapcs_select_return_coproc (const_tree type, const_tree fntype)
5854 /* We aren't passed a decl, so we can't check that a call is local.
5855 However, it isn't clear that that would be a win anyway, since it
5856 might limit some tail-calling opportunities. */
5857 enum arm_pcs pcs_variant;
5859 if (fntype)
5861 const_tree fndecl = NULL_TREE;
5863 if (TREE_CODE (fntype) == FUNCTION_DECL)
5865 fndecl = fntype;
5866 fntype = TREE_TYPE (fntype);
5869 pcs_variant = arm_get_pcs_model (fntype, fndecl);
5871 else
5872 pcs_variant = arm_pcs_default;
5874 if (pcs_variant != ARM_PCS_AAPCS)
5876 int i;
5878 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
5879 if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant,
5880 TYPE_MODE (type),
5881 type))
5882 return i;
5884 return -1;
5887 static rtx
5888 aapcs_allocate_return_reg (machine_mode mode, const_tree type,
5889 const_tree fntype)
5891 /* We aren't passed a decl, so we can't check that a call is local.
5892 However, it isn't clear that that would be a win anyway, since it
5893 might limit some tail-calling opportunities. */
5894 enum arm_pcs pcs_variant;
5895 int unsignedp ATTRIBUTE_UNUSED;
5897 if (fntype)
5899 const_tree fndecl = NULL_TREE;
5901 if (TREE_CODE (fntype) == FUNCTION_DECL)
5903 fndecl = fntype;
5904 fntype = TREE_TYPE (fntype);
5907 pcs_variant = arm_get_pcs_model (fntype, fndecl);
5909 else
5910 pcs_variant = arm_pcs_default;
5912 /* Promote integer types. */
5913 if (type && INTEGRAL_TYPE_P (type))
5914 mode = arm_promote_function_mode (type, mode, &unsignedp, fntype, 1);
5916 if (pcs_variant != ARM_PCS_AAPCS)
5918 int i;
5920 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
5921 if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant, mode,
5922 type))
5923 return aapcs_cp_arg_layout[i].allocate_return_reg (pcs_variant,
5924 mode, type);
5927 /* Promotes small structs returned in a register to full-word size
5928 for big-endian AAPCS. */
5929 if (type && arm_return_in_msb (type))
5931 HOST_WIDE_INT size = int_size_in_bytes (type);
5932 if (size % UNITS_PER_WORD != 0)
5934 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
5935 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
5939 return gen_rtx_REG (mode, R0_REGNUM);
5942 static rtx
5943 aapcs_libcall_value (machine_mode mode)
5945 if (BYTES_BIG_ENDIAN && ALL_FIXED_POINT_MODE_P (mode)
5946 && GET_MODE_SIZE (mode) <= 4)
5947 mode = SImode;
5949 return aapcs_allocate_return_reg (mode, NULL_TREE, NULL_TREE);
5952 /* Lay out a function argument using the AAPCS rules. The rule
5953 numbers referred to here are those in the AAPCS. */
5954 static void
5955 aapcs_layout_arg (CUMULATIVE_ARGS *pcum, machine_mode mode,
5956 const_tree type, bool named)
5958 int nregs, nregs2;
5959 int ncrn;
5961 /* We only need to do this once per argument. */
5962 if (pcum->aapcs_arg_processed)
5963 return;
5965 pcum->aapcs_arg_processed = true;
5967 /* Special case: if named is false then we are handling an incoming
5968 anonymous argument which is on the stack. */
5969 if (!named)
5970 return;
5972 /* Is this a potential co-processor register candidate? */
5973 if (pcum->pcs_variant != ARM_PCS_AAPCS)
5975 int slot = aapcs_select_call_coproc (pcum, mode, type);
5976 pcum->aapcs_cprc_slot = slot;
5978 /* We don't have to apply any of the rules from part B of the
5979 preparation phase, these are handled elsewhere in the
5980 compiler. */
5982 if (slot >= 0)
5984 /* A Co-processor register candidate goes either in its own
5985 class of registers or on the stack. */
5986 if (!pcum->aapcs_cprc_failed[slot])
5988 /* C1.cp - Try to allocate the argument to co-processor
5989 registers. */
5990 if (aapcs_cp_arg_layout[slot].allocate (pcum, mode, type))
5991 return;
5993 /* C2.cp - Put the argument on the stack and note that we
5994 can't assign any more candidates in this slot. We also
5995 need to note that we have allocated stack space, so that
5996 we won't later try to split a non-cprc candidate between
5997 core registers and the stack. */
5998 pcum->aapcs_cprc_failed[slot] = true;
5999 pcum->can_split = false;
6002 /* We didn't get a register, so this argument goes on the
6003 stack. */
6004 gcc_assert (pcum->can_split == false);
6005 return;
6009 /* C3 - For double-word aligned arguments, round the NCRN up to the
6010 next even number. */
6011 ncrn = pcum->aapcs_ncrn;
6012 if ((ncrn & 1) && arm_needs_doubleword_align (mode, type))
6013 ncrn++;
6015 nregs = ARM_NUM_REGS2(mode, type);
6017 /* Sigh, this test should really assert that nregs > 0, but a GCC
6018 extension allows empty structs and then gives them empty size; it
6019 then allows such a structure to be passed by value. For some of
6020 the code below we have to pretend that such an argument has
6021 non-zero size so that we 'locate' it correctly either in
6022 registers or on the stack. */
6023 gcc_assert (nregs >= 0);
6025 nregs2 = nregs ? nregs : 1;
6027 /* C4 - Argument fits entirely in core registers. */
6028 if (ncrn + nregs2 <= NUM_ARG_REGS)
6030 pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
6031 pcum->aapcs_next_ncrn = ncrn + nregs;
6032 return;
6035 /* C5 - Some core registers left and there are no arguments already
6036 on the stack: split this argument between the remaining core
6037 registers and the stack. */
6038 if (ncrn < NUM_ARG_REGS && pcum->can_split)
6040 pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
6041 pcum->aapcs_next_ncrn = NUM_ARG_REGS;
6042 pcum->aapcs_partial = (NUM_ARG_REGS - ncrn) * UNITS_PER_WORD;
6043 return;
6046 /* C6 - NCRN is set to 4. */
6047 pcum->aapcs_next_ncrn = NUM_ARG_REGS;
6049 /* C7,C8 - arugment goes on the stack. We have nothing to do here. */
6050 return;
6053 /* Initialize a variable CUM of type CUMULATIVE_ARGS
6054 for a call to a function whose data type is FNTYPE.
6055 For a library call, FNTYPE is NULL. */
6056 void
6057 arm_init_cumulative_args (CUMULATIVE_ARGS *pcum, tree fntype,
6058 rtx libname,
6059 tree fndecl ATTRIBUTE_UNUSED)
6061 /* Long call handling. */
6062 if (fntype)
6063 pcum->pcs_variant = arm_get_pcs_model (fntype, fndecl);
6064 else
6065 pcum->pcs_variant = arm_pcs_default;
6067 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6069 if (arm_libcall_uses_aapcs_base (libname))
6070 pcum->pcs_variant = ARM_PCS_AAPCS;
6072 pcum->aapcs_ncrn = pcum->aapcs_next_ncrn = 0;
6073 pcum->aapcs_reg = NULL_RTX;
6074 pcum->aapcs_partial = 0;
6075 pcum->aapcs_arg_processed = false;
6076 pcum->aapcs_cprc_slot = -1;
6077 pcum->can_split = true;
6079 if (pcum->pcs_variant != ARM_PCS_AAPCS)
6081 int i;
6083 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6085 pcum->aapcs_cprc_failed[i] = false;
6086 aapcs_cp_arg_layout[i].cum_init (pcum, fntype, libname, fndecl);
6089 return;
6092 /* Legacy ABIs */
6094 /* On the ARM, the offset starts at 0. */
6095 pcum->nregs = 0;
6096 pcum->iwmmxt_nregs = 0;
6097 pcum->can_split = true;
6099 /* Varargs vectors are treated the same as long long.
6100 named_count avoids having to change the way arm handles 'named' */
6101 pcum->named_count = 0;
6102 pcum->nargs = 0;
6104 if (TARGET_REALLY_IWMMXT && fntype)
6106 tree fn_arg;
6108 for (fn_arg = TYPE_ARG_TYPES (fntype);
6109 fn_arg;
6110 fn_arg = TREE_CHAIN (fn_arg))
6111 pcum->named_count += 1;
6113 if (! pcum->named_count)
6114 pcum->named_count = INT_MAX;
6118 /* Return true if mode/type need doubleword alignment. */
6119 static bool
6120 arm_needs_doubleword_align (machine_mode mode, const_tree type)
6122 return (GET_MODE_ALIGNMENT (mode) > PARM_BOUNDARY
6123 || (type && TYPE_ALIGN (type) > PARM_BOUNDARY));
6127 /* Determine where to put an argument to a function.
6128 Value is zero to push the argument on the stack,
6129 or a hard register in which to store the argument.
6131 MODE is the argument's machine mode.
6132 TYPE is the data type of the argument (as a tree).
6133 This is null for libcalls where that information may
6134 not be available.
6135 CUM is a variable of type CUMULATIVE_ARGS which gives info about
6136 the preceding args and about the function being called.
6137 NAMED is nonzero if this argument is a named parameter
6138 (otherwise it is an extra parameter matching an ellipsis).
6140 On the ARM, normally the first 16 bytes are passed in registers r0-r3; all
6141 other arguments are passed on the stack. If (NAMED == 0) (which happens
6142 only in assign_parms, since TARGET_SETUP_INCOMING_VARARGS is
6143 defined), say it is passed in the stack (function_prologue will
6144 indeed make it pass in the stack if necessary). */
6146 static rtx
6147 arm_function_arg (cumulative_args_t pcum_v, machine_mode mode,
6148 const_tree type, bool named)
6150 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
6151 int nregs;
6153 /* Handle the special case quickly. Pick an arbitrary value for op2 of
6154 a call insn (op3 of a call_value insn). */
6155 if (mode == VOIDmode)
6156 return const0_rtx;
6158 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6160 aapcs_layout_arg (pcum, mode, type, named);
6161 return pcum->aapcs_reg;
6164 /* Varargs vectors are treated the same as long long.
6165 named_count avoids having to change the way arm handles 'named' */
6166 if (TARGET_IWMMXT_ABI
6167 && arm_vector_mode_supported_p (mode)
6168 && pcum->named_count > pcum->nargs + 1)
6170 if (pcum->iwmmxt_nregs <= 9)
6171 return gen_rtx_REG (mode, pcum->iwmmxt_nregs + FIRST_IWMMXT_REGNUM);
6172 else
6174 pcum->can_split = false;
6175 return NULL_RTX;
6179 /* Put doubleword aligned quantities in even register pairs. */
6180 if (pcum->nregs & 1
6181 && ARM_DOUBLEWORD_ALIGN
6182 && arm_needs_doubleword_align (mode, type))
6183 pcum->nregs++;
6185 /* Only allow splitting an arg between regs and memory if all preceding
6186 args were allocated to regs. For args passed by reference we only count
6187 the reference pointer. */
6188 if (pcum->can_split)
6189 nregs = 1;
6190 else
6191 nregs = ARM_NUM_REGS2 (mode, type);
6193 if (!named || pcum->nregs + nregs > NUM_ARG_REGS)
6194 return NULL_RTX;
6196 return gen_rtx_REG (mode, pcum->nregs);
6199 static unsigned int
6200 arm_function_arg_boundary (machine_mode mode, const_tree type)
6202 return (ARM_DOUBLEWORD_ALIGN && arm_needs_doubleword_align (mode, type)
6203 ? DOUBLEWORD_ALIGNMENT
6204 : PARM_BOUNDARY);
6207 static int
6208 arm_arg_partial_bytes (cumulative_args_t pcum_v, machine_mode mode,
6209 tree type, bool named)
6211 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
6212 int nregs = pcum->nregs;
6214 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6216 aapcs_layout_arg (pcum, mode, type, named);
6217 return pcum->aapcs_partial;
6220 if (TARGET_IWMMXT_ABI && arm_vector_mode_supported_p (mode))
6221 return 0;
6223 if (NUM_ARG_REGS > nregs
6224 && (NUM_ARG_REGS < nregs + ARM_NUM_REGS2 (mode, type))
6225 && pcum->can_split)
6226 return (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
6228 return 0;
6231 /* Update the data in PCUM to advance over an argument
6232 of mode MODE and data type TYPE.
6233 (TYPE is null for libcalls where that information may not be available.) */
6235 static void
6236 arm_function_arg_advance (cumulative_args_t pcum_v, machine_mode mode,
6237 const_tree type, bool named)
6239 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
6241 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6243 aapcs_layout_arg (pcum, mode, type, named);
6245 if (pcum->aapcs_cprc_slot >= 0)
6247 aapcs_cp_arg_layout[pcum->aapcs_cprc_slot].advance (pcum, mode,
6248 type);
6249 pcum->aapcs_cprc_slot = -1;
6252 /* Generic stuff. */
6253 pcum->aapcs_arg_processed = false;
6254 pcum->aapcs_ncrn = pcum->aapcs_next_ncrn;
6255 pcum->aapcs_reg = NULL_RTX;
6256 pcum->aapcs_partial = 0;
6258 else
6260 pcum->nargs += 1;
6261 if (arm_vector_mode_supported_p (mode)
6262 && pcum->named_count > pcum->nargs
6263 && TARGET_IWMMXT_ABI)
6264 pcum->iwmmxt_nregs += 1;
6265 else
6266 pcum->nregs += ARM_NUM_REGS2 (mode, type);
6270 /* Variable sized types are passed by reference. This is a GCC
6271 extension to the ARM ABI. */
6273 static bool
6274 arm_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED,
6275 machine_mode mode ATTRIBUTE_UNUSED,
6276 const_tree type, bool named ATTRIBUTE_UNUSED)
6278 return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
6281 /* Encode the current state of the #pragma [no_]long_calls. */
6282 typedef enum
6284 OFF, /* No #pragma [no_]long_calls is in effect. */
6285 LONG, /* #pragma long_calls is in effect. */
6286 SHORT /* #pragma no_long_calls is in effect. */
6287 } arm_pragma_enum;
6289 static arm_pragma_enum arm_pragma_long_calls = OFF;
6291 void
6292 arm_pr_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
6294 arm_pragma_long_calls = LONG;
6297 void
6298 arm_pr_no_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
6300 arm_pragma_long_calls = SHORT;
6303 void
6304 arm_pr_long_calls_off (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
6306 arm_pragma_long_calls = OFF;
6309 /* Handle an attribute requiring a FUNCTION_DECL;
6310 arguments as in struct attribute_spec.handler. */
6311 static tree
6312 arm_handle_fndecl_attribute (tree *node, tree name, tree args ATTRIBUTE_UNUSED,
6313 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
6315 if (TREE_CODE (*node) != FUNCTION_DECL)
6317 warning (OPT_Wattributes, "%qE attribute only applies to functions",
6318 name);
6319 *no_add_attrs = true;
6322 return NULL_TREE;
6325 /* Handle an "interrupt" or "isr" attribute;
6326 arguments as in struct attribute_spec.handler. */
6327 static tree
6328 arm_handle_isr_attribute (tree *node, tree name, tree args, int flags,
6329 bool *no_add_attrs)
6331 if (DECL_P (*node))
6333 if (TREE_CODE (*node) != FUNCTION_DECL)
6335 warning (OPT_Wattributes, "%qE attribute only applies to functions",
6336 name);
6337 *no_add_attrs = true;
6339 /* FIXME: the argument if any is checked for type attributes;
6340 should it be checked for decl ones? */
6342 else
6344 if (TREE_CODE (*node) == FUNCTION_TYPE
6345 || TREE_CODE (*node) == METHOD_TYPE)
6347 if (arm_isr_value (args) == ARM_FT_UNKNOWN)
6349 warning (OPT_Wattributes, "%qE attribute ignored",
6350 name);
6351 *no_add_attrs = true;
6354 else if (TREE_CODE (*node) == POINTER_TYPE
6355 && (TREE_CODE (TREE_TYPE (*node)) == FUNCTION_TYPE
6356 || TREE_CODE (TREE_TYPE (*node)) == METHOD_TYPE)
6357 && arm_isr_value (args) != ARM_FT_UNKNOWN)
6359 *node = build_variant_type_copy (*node);
6360 TREE_TYPE (*node) = build_type_attribute_variant
6361 (TREE_TYPE (*node),
6362 tree_cons (name, args, TYPE_ATTRIBUTES (TREE_TYPE (*node))));
6363 *no_add_attrs = true;
6365 else
6367 /* Possibly pass this attribute on from the type to a decl. */
6368 if (flags & ((int) ATTR_FLAG_DECL_NEXT
6369 | (int) ATTR_FLAG_FUNCTION_NEXT
6370 | (int) ATTR_FLAG_ARRAY_NEXT))
6372 *no_add_attrs = true;
6373 return tree_cons (name, args, NULL_TREE);
6375 else
6377 warning (OPT_Wattributes, "%qE attribute ignored",
6378 name);
6383 return NULL_TREE;
6386 /* Handle a "pcs" attribute; arguments as in struct
6387 attribute_spec.handler. */
6388 static tree
6389 arm_handle_pcs_attribute (tree *node ATTRIBUTE_UNUSED, tree name, tree args,
6390 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
6392 if (arm_pcs_from_attribute (args) == ARM_PCS_UNKNOWN)
6394 warning (OPT_Wattributes, "%qE attribute ignored", name);
6395 *no_add_attrs = true;
6397 return NULL_TREE;
6400 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
6401 /* Handle the "notshared" attribute. This attribute is another way of
6402 requesting hidden visibility. ARM's compiler supports
6403 "__declspec(notshared)"; we support the same thing via an
6404 attribute. */
6406 static tree
6407 arm_handle_notshared_attribute (tree *node,
6408 tree name ATTRIBUTE_UNUSED,
6409 tree args ATTRIBUTE_UNUSED,
6410 int flags ATTRIBUTE_UNUSED,
6411 bool *no_add_attrs)
6413 tree decl = TYPE_NAME (*node);
6415 if (decl)
6417 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
6418 DECL_VISIBILITY_SPECIFIED (decl) = 1;
6419 *no_add_attrs = false;
6421 return NULL_TREE;
6423 #endif
6425 /* Return 0 if the attributes for two types are incompatible, 1 if they
6426 are compatible, and 2 if they are nearly compatible (which causes a
6427 warning to be generated). */
6428 static int
6429 arm_comp_type_attributes (const_tree type1, const_tree type2)
6431 int l1, l2, s1, s2;
6433 /* Check for mismatch of non-default calling convention. */
6434 if (TREE_CODE (type1) != FUNCTION_TYPE)
6435 return 1;
6437 /* Check for mismatched call attributes. */
6438 l1 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1)) != NULL;
6439 l2 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2)) != NULL;
6440 s1 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1)) != NULL;
6441 s2 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2)) != NULL;
6443 /* Only bother to check if an attribute is defined. */
6444 if (l1 | l2 | s1 | s2)
6446 /* If one type has an attribute, the other must have the same attribute. */
6447 if ((l1 != l2) || (s1 != s2))
6448 return 0;
6450 /* Disallow mixed attributes. */
6451 if ((l1 & s2) || (l2 & s1))
6452 return 0;
6455 /* Check for mismatched ISR attribute. */
6456 l1 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type1)) != NULL;
6457 if (! l1)
6458 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type1)) != NULL;
6459 l2 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type2)) != NULL;
6460 if (! l2)
6461 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type2)) != NULL;
6462 if (l1 != l2)
6463 return 0;
6465 return 1;
6468 /* Assigns default attributes to newly defined type. This is used to
6469 set short_call/long_call attributes for function types of
6470 functions defined inside corresponding #pragma scopes. */
6471 static void
6472 arm_set_default_type_attributes (tree type)
6474 /* Add __attribute__ ((long_call)) to all functions, when
6475 inside #pragma long_calls or __attribute__ ((short_call)),
6476 when inside #pragma no_long_calls. */
6477 if (TREE_CODE (type) == FUNCTION_TYPE || TREE_CODE (type) == METHOD_TYPE)
6479 tree type_attr_list, attr_name;
6480 type_attr_list = TYPE_ATTRIBUTES (type);
6482 if (arm_pragma_long_calls == LONG)
6483 attr_name = get_identifier ("long_call");
6484 else if (arm_pragma_long_calls == SHORT)
6485 attr_name = get_identifier ("short_call");
6486 else
6487 return;
6489 type_attr_list = tree_cons (attr_name, NULL_TREE, type_attr_list);
6490 TYPE_ATTRIBUTES (type) = type_attr_list;
6494 /* Return true if DECL is known to be linked into section SECTION. */
6496 static bool
6497 arm_function_in_section_p (tree decl, section *section)
6499 /* We can only be certain about the prevailing symbol definition. */
6500 if (!decl_binds_to_current_def_p (decl))
6501 return false;
6503 /* If DECL_SECTION_NAME is set, assume it is trustworthy. */
6504 if (!DECL_SECTION_NAME (decl))
6506 /* Make sure that we will not create a unique section for DECL. */
6507 if (flag_function_sections || DECL_COMDAT_GROUP (decl))
6508 return false;
6511 return function_section (decl) == section;
6514 /* Return nonzero if a 32-bit "long_call" should be generated for
6515 a call from the current function to DECL. We generate a long_call
6516 if the function:
6518 a. has an __attribute__((long call))
6519 or b. is within the scope of a #pragma long_calls
6520 or c. the -mlong-calls command line switch has been specified
6522 However we do not generate a long call if the function:
6524 d. has an __attribute__ ((short_call))
6525 or e. is inside the scope of a #pragma no_long_calls
6526 or f. is defined in the same section as the current function. */
6528 bool
6529 arm_is_long_call_p (tree decl)
6531 tree attrs;
6533 if (!decl)
6534 return TARGET_LONG_CALLS;
6536 attrs = TYPE_ATTRIBUTES (TREE_TYPE (decl));
6537 if (lookup_attribute ("short_call", attrs))
6538 return false;
6540 /* For "f", be conservative, and only cater for cases in which the
6541 whole of the current function is placed in the same section. */
6542 if (!flag_reorder_blocks_and_partition
6543 && TREE_CODE (decl) == FUNCTION_DECL
6544 && arm_function_in_section_p (decl, current_function_section ()))
6545 return false;
6547 if (lookup_attribute ("long_call", attrs))
6548 return true;
6550 return TARGET_LONG_CALLS;
6553 /* Return nonzero if it is ok to make a tail-call to DECL. */
6554 static bool
6555 arm_function_ok_for_sibcall (tree decl, tree exp)
6557 unsigned long func_type;
6559 if (cfun->machine->sibcall_blocked)
6560 return false;
6562 /* Never tailcall something if we are generating code for Thumb-1. */
6563 if (TARGET_THUMB1)
6564 return false;
6566 /* The PIC register is live on entry to VxWorks PLT entries, so we
6567 must make the call before restoring the PIC register. */
6568 if (TARGET_VXWORKS_RTP && flag_pic && !targetm.binds_local_p (decl))
6569 return false;
6571 /* If we are interworking and the function is not declared static
6572 then we can't tail-call it unless we know that it exists in this
6573 compilation unit (since it might be a Thumb routine). */
6574 if (TARGET_INTERWORK && decl && TREE_PUBLIC (decl)
6575 && !TREE_ASM_WRITTEN (decl))
6576 return false;
6578 func_type = arm_current_func_type ();
6579 /* Never tailcall from an ISR routine - it needs a special exit sequence. */
6580 if (IS_INTERRUPT (func_type))
6581 return false;
6583 if (!VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
6585 /* Check that the return value locations are the same. For
6586 example that we aren't returning a value from the sibling in
6587 a VFP register but then need to transfer it to a core
6588 register. */
6589 rtx a, b;
6591 a = arm_function_value (TREE_TYPE (exp), decl, false);
6592 b = arm_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
6593 cfun->decl, false);
6594 if (!rtx_equal_p (a, b))
6595 return false;
6598 /* Never tailcall if function may be called with a misaligned SP. */
6599 if (IS_STACKALIGN (func_type))
6600 return false;
6602 /* The AAPCS says that, on bare-metal, calls to unresolved weak
6603 references should become a NOP. Don't convert such calls into
6604 sibling calls. */
6605 if (TARGET_AAPCS_BASED
6606 && arm_abi == ARM_ABI_AAPCS
6607 && decl
6608 && DECL_WEAK (decl))
6609 return false;
6611 /* Everything else is ok. */
6612 return true;
6616 /* Addressing mode support functions. */
6618 /* Return nonzero if X is a legitimate immediate operand when compiling
6619 for PIC. We know that X satisfies CONSTANT_P and flag_pic is true. */
6621 legitimate_pic_operand_p (rtx x)
6623 if (GET_CODE (x) == SYMBOL_REF
6624 || (GET_CODE (x) == CONST
6625 && GET_CODE (XEXP (x, 0)) == PLUS
6626 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
6627 return 0;
6629 return 1;
6632 /* Record that the current function needs a PIC register. Initialize
6633 cfun->machine->pic_reg if we have not already done so. */
6635 static void
6636 require_pic_register (void)
6638 /* A lot of the logic here is made obscure by the fact that this
6639 routine gets called as part of the rtx cost estimation process.
6640 We don't want those calls to affect any assumptions about the real
6641 function; and further, we can't call entry_of_function() until we
6642 start the real expansion process. */
6643 if (!crtl->uses_pic_offset_table)
6645 gcc_assert (can_create_pseudo_p ());
6646 if (arm_pic_register != INVALID_REGNUM
6647 && !(TARGET_THUMB1 && arm_pic_register > LAST_LO_REGNUM))
6649 if (!cfun->machine->pic_reg)
6650 cfun->machine->pic_reg = gen_rtx_REG (Pmode, arm_pic_register);
6652 /* Play games to avoid marking the function as needing pic
6653 if we are being called as part of the cost-estimation
6654 process. */
6655 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
6656 crtl->uses_pic_offset_table = 1;
6658 else
6660 rtx_insn *seq, *insn;
6662 if (!cfun->machine->pic_reg)
6663 cfun->machine->pic_reg = gen_reg_rtx (Pmode);
6665 /* Play games to avoid marking the function as needing pic
6666 if we are being called as part of the cost-estimation
6667 process. */
6668 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
6670 crtl->uses_pic_offset_table = 1;
6671 start_sequence ();
6673 if (TARGET_THUMB1 && arm_pic_register != INVALID_REGNUM
6674 && arm_pic_register > LAST_LO_REGNUM)
6675 emit_move_insn (cfun->machine->pic_reg,
6676 gen_rtx_REG (Pmode, arm_pic_register));
6677 else
6678 arm_load_pic_register (0UL);
6680 seq = get_insns ();
6681 end_sequence ();
6683 for (insn = seq; insn; insn = NEXT_INSN (insn))
6684 if (INSN_P (insn))
6685 INSN_LOCATION (insn) = prologue_location;
6687 /* We can be called during expansion of PHI nodes, where
6688 we can't yet emit instructions directly in the final
6689 insn stream. Queue the insns on the entry edge, they will
6690 be committed after everything else is expanded. */
6691 insert_insn_on_edge (seq,
6692 single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
6699 legitimize_pic_address (rtx orig, machine_mode mode, rtx reg)
6701 if (GET_CODE (orig) == SYMBOL_REF
6702 || GET_CODE (orig) == LABEL_REF)
6704 rtx insn;
6706 if (reg == 0)
6708 gcc_assert (can_create_pseudo_p ());
6709 reg = gen_reg_rtx (Pmode);
6712 /* VxWorks does not impose a fixed gap between segments; the run-time
6713 gap can be different from the object-file gap. We therefore can't
6714 use GOTOFF unless we are absolutely sure that the symbol is in the
6715 same segment as the GOT. Unfortunately, the flexibility of linker
6716 scripts means that we can't be sure of that in general, so assume
6717 that GOTOFF is never valid on VxWorks. */
6718 if ((GET_CODE (orig) == LABEL_REF
6719 || (GET_CODE (orig) == SYMBOL_REF &&
6720 SYMBOL_REF_LOCAL_P (orig)))
6721 && NEED_GOT_RELOC
6722 && arm_pic_data_is_text_relative)
6723 insn = arm_pic_static_addr (orig, reg);
6724 else
6726 rtx pat;
6727 rtx mem;
6729 /* If this function doesn't have a pic register, create one now. */
6730 require_pic_register ();
6732 pat = gen_calculate_pic_address (reg, cfun->machine->pic_reg, orig);
6734 /* Make the MEM as close to a constant as possible. */
6735 mem = SET_SRC (pat);
6736 gcc_assert (MEM_P (mem) && !MEM_VOLATILE_P (mem));
6737 MEM_READONLY_P (mem) = 1;
6738 MEM_NOTRAP_P (mem) = 1;
6740 insn = emit_insn (pat);
6743 /* Put a REG_EQUAL note on this insn, so that it can be optimized
6744 by loop. */
6745 set_unique_reg_note (insn, REG_EQUAL, orig);
6747 return reg;
6749 else if (GET_CODE (orig) == CONST)
6751 rtx base, offset;
6753 if (GET_CODE (XEXP (orig, 0)) == PLUS
6754 && XEXP (XEXP (orig, 0), 0) == cfun->machine->pic_reg)
6755 return orig;
6757 /* Handle the case where we have: const (UNSPEC_TLS). */
6758 if (GET_CODE (XEXP (orig, 0)) == UNSPEC
6759 && XINT (XEXP (orig, 0), 1) == UNSPEC_TLS)
6760 return orig;
6762 /* Handle the case where we have:
6763 const (plus (UNSPEC_TLS) (ADDEND)). The ADDEND must be a
6764 CONST_INT. */
6765 if (GET_CODE (XEXP (orig, 0)) == PLUS
6766 && GET_CODE (XEXP (XEXP (orig, 0), 0)) == UNSPEC
6767 && XINT (XEXP (XEXP (orig, 0), 0), 1) == UNSPEC_TLS)
6769 gcc_assert (CONST_INT_P (XEXP (XEXP (orig, 0), 1)));
6770 return orig;
6773 if (reg == 0)
6775 gcc_assert (can_create_pseudo_p ());
6776 reg = gen_reg_rtx (Pmode);
6779 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
6781 base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
6782 offset = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
6783 base == reg ? 0 : reg);
6785 if (CONST_INT_P (offset))
6787 /* The base register doesn't really matter, we only want to
6788 test the index for the appropriate mode. */
6789 if (!arm_legitimate_index_p (mode, offset, SET, 0))
6791 gcc_assert (can_create_pseudo_p ());
6792 offset = force_reg (Pmode, offset);
6795 if (CONST_INT_P (offset))
6796 return plus_constant (Pmode, base, INTVAL (offset));
6799 if (GET_MODE_SIZE (mode) > 4
6800 && (GET_MODE_CLASS (mode) == MODE_INT
6801 || TARGET_SOFT_FLOAT))
6803 emit_insn (gen_addsi3 (reg, base, offset));
6804 return reg;
6807 return gen_rtx_PLUS (Pmode, base, offset);
6810 return orig;
6814 /* Find a spare register to use during the prolog of a function. */
6816 static int
6817 thumb_find_work_register (unsigned long pushed_regs_mask)
6819 int reg;
6821 /* Check the argument registers first as these are call-used. The
6822 register allocation order means that sometimes r3 might be used
6823 but earlier argument registers might not, so check them all. */
6824 for (reg = LAST_ARG_REGNUM; reg >= 0; reg --)
6825 if (!df_regs_ever_live_p (reg))
6826 return reg;
6828 /* Before going on to check the call-saved registers we can try a couple
6829 more ways of deducing that r3 is available. The first is when we are
6830 pushing anonymous arguments onto the stack and we have less than 4
6831 registers worth of fixed arguments(*). In this case r3 will be part of
6832 the variable argument list and so we can be sure that it will be
6833 pushed right at the start of the function. Hence it will be available
6834 for the rest of the prologue.
6835 (*): ie crtl->args.pretend_args_size is greater than 0. */
6836 if (cfun->machine->uses_anonymous_args
6837 && crtl->args.pretend_args_size > 0)
6838 return LAST_ARG_REGNUM;
6840 /* The other case is when we have fixed arguments but less than 4 registers
6841 worth. In this case r3 might be used in the body of the function, but
6842 it is not being used to convey an argument into the function. In theory
6843 we could just check crtl->args.size to see how many bytes are
6844 being passed in argument registers, but it seems that it is unreliable.
6845 Sometimes it will have the value 0 when in fact arguments are being
6846 passed. (See testcase execute/20021111-1.c for an example). So we also
6847 check the args_info.nregs field as well. The problem with this field is
6848 that it makes no allowances for arguments that are passed to the
6849 function but which are not used. Hence we could miss an opportunity
6850 when a function has an unused argument in r3. But it is better to be
6851 safe than to be sorry. */
6852 if (! cfun->machine->uses_anonymous_args
6853 && crtl->args.size >= 0
6854 && crtl->args.size <= (LAST_ARG_REGNUM * UNITS_PER_WORD)
6855 && (TARGET_AAPCS_BASED
6856 ? crtl->args.info.aapcs_ncrn < 4
6857 : crtl->args.info.nregs < 4))
6858 return LAST_ARG_REGNUM;
6860 /* Otherwise look for a call-saved register that is going to be pushed. */
6861 for (reg = LAST_LO_REGNUM; reg > LAST_ARG_REGNUM; reg --)
6862 if (pushed_regs_mask & (1 << reg))
6863 return reg;
6865 if (TARGET_THUMB2)
6867 /* Thumb-2 can use high regs. */
6868 for (reg = FIRST_HI_REGNUM; reg < 15; reg ++)
6869 if (pushed_regs_mask & (1 << reg))
6870 return reg;
6872 /* Something went wrong - thumb_compute_save_reg_mask()
6873 should have arranged for a suitable register to be pushed. */
6874 gcc_unreachable ();
6877 static GTY(()) int pic_labelno;
6879 /* Generate code to load the PIC register. In thumb mode SCRATCH is a
6880 low register. */
6882 void
6883 arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED)
6885 rtx l1, labelno, pic_tmp, pic_rtx, pic_reg;
6887 if (crtl->uses_pic_offset_table == 0 || TARGET_SINGLE_PIC_BASE)
6888 return;
6890 gcc_assert (flag_pic);
6892 pic_reg = cfun->machine->pic_reg;
6893 if (TARGET_VXWORKS_RTP)
6895 pic_rtx = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE);
6896 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
6897 emit_insn (gen_pic_load_addr_32bit (pic_reg, pic_rtx));
6899 emit_insn (gen_rtx_SET (pic_reg, gen_rtx_MEM (Pmode, pic_reg)));
6901 pic_tmp = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
6902 emit_insn (gen_pic_offset_arm (pic_reg, pic_reg, pic_tmp));
6904 else
6906 /* We use an UNSPEC rather than a LABEL_REF because this label
6907 never appears in the code stream. */
6909 labelno = GEN_INT (pic_labelno++);
6910 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
6911 l1 = gen_rtx_CONST (VOIDmode, l1);
6913 /* On the ARM the PC register contains 'dot + 8' at the time of the
6914 addition, on the Thumb it is 'dot + 4'. */
6915 pic_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);
6916 pic_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, pic_rtx),
6917 UNSPEC_GOTSYM_OFF);
6918 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
6920 if (TARGET_32BIT)
6922 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
6924 else /* TARGET_THUMB1 */
6926 if (arm_pic_register != INVALID_REGNUM
6927 && REGNO (pic_reg) > LAST_LO_REGNUM)
6929 /* We will have pushed the pic register, so we should always be
6930 able to find a work register. */
6931 pic_tmp = gen_rtx_REG (SImode,
6932 thumb_find_work_register (saved_regs));
6933 emit_insn (gen_pic_load_addr_thumb1 (pic_tmp, pic_rtx));
6934 emit_insn (gen_movsi (pic_offset_table_rtx, pic_tmp));
6935 emit_insn (gen_pic_add_dot_plus_four (pic_reg, pic_reg, labelno));
6937 else if (arm_pic_register != INVALID_REGNUM
6938 && arm_pic_register > LAST_LO_REGNUM
6939 && REGNO (pic_reg) <= LAST_LO_REGNUM)
6941 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
6942 emit_move_insn (gen_rtx_REG (Pmode, arm_pic_register), pic_reg);
6943 emit_use (gen_rtx_REG (Pmode, arm_pic_register));
6945 else
6946 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
6950 /* Need to emit this whether or not we obey regdecls,
6951 since setjmp/longjmp can cause life info to screw up. */
6952 emit_use (pic_reg);
6955 /* Generate code to load the address of a static var when flag_pic is set. */
6956 static rtx
6957 arm_pic_static_addr (rtx orig, rtx reg)
6959 rtx l1, labelno, offset_rtx, insn;
6961 gcc_assert (flag_pic);
6963 /* We use an UNSPEC rather than a LABEL_REF because this label
6964 never appears in the code stream. */
6965 labelno = GEN_INT (pic_labelno++);
6966 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
6967 l1 = gen_rtx_CONST (VOIDmode, l1);
6969 /* On the ARM the PC register contains 'dot + 8' at the time of the
6970 addition, on the Thumb it is 'dot + 4'. */
6971 offset_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);
6972 offset_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, orig, offset_rtx),
6973 UNSPEC_SYMBOL_OFFSET);
6974 offset_rtx = gen_rtx_CONST (Pmode, offset_rtx);
6976 insn = emit_insn (gen_pic_load_addr_unified (reg, offset_rtx, labelno));
6977 return insn;
6980 /* Return nonzero if X is valid as an ARM state addressing register. */
6981 static int
6982 arm_address_register_rtx_p (rtx x, int strict_p)
6984 int regno;
6986 if (!REG_P (x))
6987 return 0;
6989 regno = REGNO (x);
6991 if (strict_p)
6992 return ARM_REGNO_OK_FOR_BASE_P (regno);
6994 return (regno <= LAST_ARM_REGNUM
6995 || regno >= FIRST_PSEUDO_REGISTER
6996 || regno == FRAME_POINTER_REGNUM
6997 || regno == ARG_POINTER_REGNUM);
7000 /* Return TRUE if this rtx is the difference of a symbol and a label,
7001 and will reduce to a PC-relative relocation in the object file.
7002 Expressions like this can be left alone when generating PIC, rather
7003 than forced through the GOT. */
7004 static int
7005 pcrel_constant_p (rtx x)
7007 if (GET_CODE (x) == MINUS)
7008 return symbol_mentioned_p (XEXP (x, 0)) && label_mentioned_p (XEXP (x, 1));
7010 return FALSE;
7013 /* Return true if X will surely end up in an index register after next
7014 splitting pass. */
7015 static bool
7016 will_be_in_index_register (const_rtx x)
7018 /* arm.md: calculate_pic_address will split this into a register. */
7019 return GET_CODE (x) == UNSPEC && (XINT (x, 1) == UNSPEC_PIC_SYM);
7022 /* Return nonzero if X is a valid ARM state address operand. */
7024 arm_legitimate_address_outer_p (machine_mode mode, rtx x, RTX_CODE outer,
7025 int strict_p)
7027 bool use_ldrd;
7028 enum rtx_code code = GET_CODE (x);
7030 if (arm_address_register_rtx_p (x, strict_p))
7031 return 1;
7033 use_ldrd = (TARGET_LDRD
7034 && (mode == DImode
7035 || (mode == DFmode && (TARGET_SOFT_FLOAT || TARGET_VFP))));
7037 if (code == POST_INC || code == PRE_DEC
7038 || ((code == PRE_INC || code == POST_DEC)
7039 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
7040 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
7042 else if ((code == POST_MODIFY || code == PRE_MODIFY)
7043 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
7044 && GET_CODE (XEXP (x, 1)) == PLUS
7045 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
7047 rtx addend = XEXP (XEXP (x, 1), 1);
7049 /* Don't allow ldrd post increment by register because it's hard
7050 to fixup invalid register choices. */
7051 if (use_ldrd
7052 && GET_CODE (x) == POST_MODIFY
7053 && REG_P (addend))
7054 return 0;
7056 return ((use_ldrd || GET_MODE_SIZE (mode) <= 4)
7057 && arm_legitimate_index_p (mode, addend, outer, strict_p));
7060 /* After reload constants split into minipools will have addresses
7061 from a LABEL_REF. */
7062 else if (reload_completed
7063 && (code == LABEL_REF
7064 || (code == CONST
7065 && GET_CODE (XEXP (x, 0)) == PLUS
7066 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
7067 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
7068 return 1;
7070 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
7071 return 0;
7073 else if (code == PLUS)
7075 rtx xop0 = XEXP (x, 0);
7076 rtx xop1 = XEXP (x, 1);
7078 return ((arm_address_register_rtx_p (xop0, strict_p)
7079 && ((CONST_INT_P (xop1)
7080 && arm_legitimate_index_p (mode, xop1, outer, strict_p))
7081 || (!strict_p && will_be_in_index_register (xop1))))
7082 || (arm_address_register_rtx_p (xop1, strict_p)
7083 && arm_legitimate_index_p (mode, xop0, outer, strict_p)));
7086 #if 0
7087 /* Reload currently can't handle MINUS, so disable this for now */
7088 else if (GET_CODE (x) == MINUS)
7090 rtx xop0 = XEXP (x, 0);
7091 rtx xop1 = XEXP (x, 1);
7093 return (arm_address_register_rtx_p (xop0, strict_p)
7094 && arm_legitimate_index_p (mode, xop1, outer, strict_p));
7096 #endif
7098 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
7099 && code == SYMBOL_REF
7100 && CONSTANT_POOL_ADDRESS_P (x)
7101 && ! (flag_pic
7102 && symbol_mentioned_p (get_pool_constant (x))
7103 && ! pcrel_constant_p (get_pool_constant (x))))
7104 return 1;
7106 return 0;
7109 /* Return nonzero if X is a valid Thumb-2 address operand. */
7110 static int
7111 thumb2_legitimate_address_p (machine_mode mode, rtx x, int strict_p)
7113 bool use_ldrd;
7114 enum rtx_code code = GET_CODE (x);
7116 if (arm_address_register_rtx_p (x, strict_p))
7117 return 1;
7119 use_ldrd = (TARGET_LDRD
7120 && (mode == DImode
7121 || (mode == DFmode && (TARGET_SOFT_FLOAT || TARGET_VFP))));
7123 if (code == POST_INC || code == PRE_DEC
7124 || ((code == PRE_INC || code == POST_DEC)
7125 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
7126 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
7128 else if ((code == POST_MODIFY || code == PRE_MODIFY)
7129 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
7130 && GET_CODE (XEXP (x, 1)) == PLUS
7131 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
7133 /* Thumb-2 only has autoincrement by constant. */
7134 rtx addend = XEXP (XEXP (x, 1), 1);
7135 HOST_WIDE_INT offset;
7137 if (!CONST_INT_P (addend))
7138 return 0;
7140 offset = INTVAL(addend);
7141 if (GET_MODE_SIZE (mode) <= 4)
7142 return (offset > -256 && offset < 256);
7144 return (use_ldrd && offset > -1024 && offset < 1024
7145 && (offset & 3) == 0);
7148 /* After reload constants split into minipools will have addresses
7149 from a LABEL_REF. */
7150 else if (reload_completed
7151 && (code == LABEL_REF
7152 || (code == CONST
7153 && GET_CODE (XEXP (x, 0)) == PLUS
7154 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
7155 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
7156 return 1;
7158 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
7159 return 0;
7161 else if (code == PLUS)
7163 rtx xop0 = XEXP (x, 0);
7164 rtx xop1 = XEXP (x, 1);
7166 return ((arm_address_register_rtx_p (xop0, strict_p)
7167 && (thumb2_legitimate_index_p (mode, xop1, strict_p)
7168 || (!strict_p && will_be_in_index_register (xop1))))
7169 || (arm_address_register_rtx_p (xop1, strict_p)
7170 && thumb2_legitimate_index_p (mode, xop0, strict_p)));
7173 /* Normally we can assign constant values to target registers without
7174 the help of constant pool. But there are cases we have to use constant
7175 pool like:
7176 1) assign a label to register.
7177 2) sign-extend a 8bit value to 32bit and then assign to register.
7179 Constant pool access in format:
7180 (set (reg r0) (mem (symbol_ref (".LC0"))))
7181 will cause the use of literal pool (later in function arm_reorg).
7182 So here we mark such format as an invalid format, then the compiler
7183 will adjust it into:
7184 (set (reg r0) (symbol_ref (".LC0")))
7185 (set (reg r0) (mem (reg r0))).
7186 No extra register is required, and (mem (reg r0)) won't cause the use
7187 of literal pools. */
7188 else if (arm_disable_literal_pool && code == SYMBOL_REF
7189 && CONSTANT_POOL_ADDRESS_P (x))
7190 return 0;
7192 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
7193 && code == SYMBOL_REF
7194 && CONSTANT_POOL_ADDRESS_P (x)
7195 && ! (flag_pic
7196 && symbol_mentioned_p (get_pool_constant (x))
7197 && ! pcrel_constant_p (get_pool_constant (x))))
7198 return 1;
7200 return 0;
7203 /* Return nonzero if INDEX is valid for an address index operand in
7204 ARM state. */
7205 static int
7206 arm_legitimate_index_p (machine_mode mode, rtx index, RTX_CODE outer,
7207 int strict_p)
7209 HOST_WIDE_INT range;
7210 enum rtx_code code = GET_CODE (index);
7212 /* Standard coprocessor addressing modes. */
7213 if (TARGET_HARD_FLOAT
7214 && TARGET_VFP
7215 && (mode == SFmode || mode == DFmode))
7216 return (code == CONST_INT && INTVAL (index) < 1024
7217 && INTVAL (index) > -1024
7218 && (INTVAL (index) & 3) == 0);
7220 /* For quad modes, we restrict the constant offset to be slightly less
7221 than what the instruction format permits. We do this because for
7222 quad mode moves, we will actually decompose them into two separate
7223 double-mode reads or writes. INDEX must therefore be a valid
7224 (double-mode) offset and so should INDEX+8. */
7225 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
7226 return (code == CONST_INT
7227 && INTVAL (index) < 1016
7228 && INTVAL (index) > -1024
7229 && (INTVAL (index) & 3) == 0);
7231 /* We have no such constraint on double mode offsets, so we permit the
7232 full range of the instruction format. */
7233 if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
7234 return (code == CONST_INT
7235 && INTVAL (index) < 1024
7236 && INTVAL (index) > -1024
7237 && (INTVAL (index) & 3) == 0);
7239 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
7240 return (code == CONST_INT
7241 && INTVAL (index) < 1024
7242 && INTVAL (index) > -1024
7243 && (INTVAL (index) & 3) == 0);
7245 if (arm_address_register_rtx_p (index, strict_p)
7246 && (GET_MODE_SIZE (mode) <= 4))
7247 return 1;
7249 if (mode == DImode || mode == DFmode)
7251 if (code == CONST_INT)
7253 HOST_WIDE_INT val = INTVAL (index);
7255 if (TARGET_LDRD)
7256 return val > -256 && val < 256;
7257 else
7258 return val > -4096 && val < 4092;
7261 return TARGET_LDRD && arm_address_register_rtx_p (index, strict_p);
7264 if (GET_MODE_SIZE (mode) <= 4
7265 && ! (arm_arch4
7266 && (mode == HImode
7267 || mode == HFmode
7268 || (mode == QImode && outer == SIGN_EXTEND))))
7270 if (code == MULT)
7272 rtx xiop0 = XEXP (index, 0);
7273 rtx xiop1 = XEXP (index, 1);
7275 return ((arm_address_register_rtx_p (xiop0, strict_p)
7276 && power_of_two_operand (xiop1, SImode))
7277 || (arm_address_register_rtx_p (xiop1, strict_p)
7278 && power_of_two_operand (xiop0, SImode)));
7280 else if (code == LSHIFTRT || code == ASHIFTRT
7281 || code == ASHIFT || code == ROTATERT)
7283 rtx op = XEXP (index, 1);
7285 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
7286 && CONST_INT_P (op)
7287 && INTVAL (op) > 0
7288 && INTVAL (op) <= 31);
7292 /* For ARM v4 we may be doing a sign-extend operation during the
7293 load. */
7294 if (arm_arch4)
7296 if (mode == HImode
7297 || mode == HFmode
7298 || (outer == SIGN_EXTEND && mode == QImode))
7299 range = 256;
7300 else
7301 range = 4096;
7303 else
7304 range = (mode == HImode || mode == HFmode) ? 4095 : 4096;
7306 return (code == CONST_INT
7307 && INTVAL (index) < range
7308 && INTVAL (index) > -range);
7311 /* Return true if OP is a valid index scaling factor for Thumb-2 address
7312 index operand. i.e. 1, 2, 4 or 8. */
7313 static bool
7314 thumb2_index_mul_operand (rtx op)
7316 HOST_WIDE_INT val;
7318 if (!CONST_INT_P (op))
7319 return false;
7321 val = INTVAL(op);
7322 return (val == 1 || val == 2 || val == 4 || val == 8);
7325 /* Return nonzero if INDEX is a valid Thumb-2 address index operand. */
7326 static int
7327 thumb2_legitimate_index_p (machine_mode mode, rtx index, int strict_p)
7329 enum rtx_code code = GET_CODE (index);
7331 /* ??? Combine arm and thumb2 coprocessor addressing modes. */
7332 /* Standard coprocessor addressing modes. */
7333 if (TARGET_HARD_FLOAT
7334 && TARGET_VFP
7335 && (mode == SFmode || mode == DFmode))
7336 return (code == CONST_INT && INTVAL (index) < 1024
7337 /* Thumb-2 allows only > -256 index range for it's core register
7338 load/stores. Since we allow SF/DF in core registers, we have
7339 to use the intersection between -256~4096 (core) and -1024~1024
7340 (coprocessor). */
7341 && INTVAL (index) > -256
7342 && (INTVAL (index) & 3) == 0);
7344 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
7346 /* For DImode assume values will usually live in core regs
7347 and only allow LDRD addressing modes. */
7348 if (!TARGET_LDRD || mode != DImode)
7349 return (code == CONST_INT
7350 && INTVAL (index) < 1024
7351 && INTVAL (index) > -1024
7352 && (INTVAL (index) & 3) == 0);
7355 /* For quad modes, we restrict the constant offset to be slightly less
7356 than what the instruction format permits. We do this because for
7357 quad mode moves, we will actually decompose them into two separate
7358 double-mode reads or writes. INDEX must therefore be a valid
7359 (double-mode) offset and so should INDEX+8. */
7360 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
7361 return (code == CONST_INT
7362 && INTVAL (index) < 1016
7363 && INTVAL (index) > -1024
7364 && (INTVAL (index) & 3) == 0);
7366 /* We have no such constraint on double mode offsets, so we permit the
7367 full range of the instruction format. */
7368 if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
7369 return (code == CONST_INT
7370 && INTVAL (index) < 1024
7371 && INTVAL (index) > -1024
7372 && (INTVAL (index) & 3) == 0);
7374 if (arm_address_register_rtx_p (index, strict_p)
7375 && (GET_MODE_SIZE (mode) <= 4))
7376 return 1;
7378 if (mode == DImode || mode == DFmode)
7380 if (code == CONST_INT)
7382 HOST_WIDE_INT val = INTVAL (index);
7383 /* ??? Can we assume ldrd for thumb2? */
7384 /* Thumb-2 ldrd only has reg+const addressing modes. */
7385 /* ldrd supports offsets of +-1020.
7386 However the ldr fallback does not. */
7387 return val > -256 && val < 256 && (val & 3) == 0;
7389 else
7390 return 0;
7393 if (code == MULT)
7395 rtx xiop0 = XEXP (index, 0);
7396 rtx xiop1 = XEXP (index, 1);
7398 return ((arm_address_register_rtx_p (xiop0, strict_p)
7399 && thumb2_index_mul_operand (xiop1))
7400 || (arm_address_register_rtx_p (xiop1, strict_p)
7401 && thumb2_index_mul_operand (xiop0)));
7403 else if (code == ASHIFT)
7405 rtx op = XEXP (index, 1);
7407 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
7408 && CONST_INT_P (op)
7409 && INTVAL (op) > 0
7410 && INTVAL (op) <= 3);
7413 return (code == CONST_INT
7414 && INTVAL (index) < 4096
7415 && INTVAL (index) > -256);
7418 /* Return nonzero if X is valid as a 16-bit Thumb state base register. */
7419 static int
7420 thumb1_base_register_rtx_p (rtx x, machine_mode mode, int strict_p)
7422 int regno;
7424 if (!REG_P (x))
7425 return 0;
7427 regno = REGNO (x);
7429 if (strict_p)
7430 return THUMB1_REGNO_MODE_OK_FOR_BASE_P (regno, mode);
7432 return (regno <= LAST_LO_REGNUM
7433 || regno > LAST_VIRTUAL_REGISTER
7434 || regno == FRAME_POINTER_REGNUM
7435 || (GET_MODE_SIZE (mode) >= 4
7436 && (regno == STACK_POINTER_REGNUM
7437 || regno >= FIRST_PSEUDO_REGISTER
7438 || x == hard_frame_pointer_rtx
7439 || x == arg_pointer_rtx)));
7442 /* Return nonzero if x is a legitimate index register. This is the case
7443 for any base register that can access a QImode object. */
7444 inline static int
7445 thumb1_index_register_rtx_p (rtx x, int strict_p)
7447 return thumb1_base_register_rtx_p (x, QImode, strict_p);
7450 /* Return nonzero if x is a legitimate 16-bit Thumb-state address.
7452 The AP may be eliminated to either the SP or the FP, so we use the
7453 least common denominator, e.g. SImode, and offsets from 0 to 64.
7455 ??? Verify whether the above is the right approach.
7457 ??? Also, the FP may be eliminated to the SP, so perhaps that
7458 needs special handling also.
7460 ??? Look at how the mips16 port solves this problem. It probably uses
7461 better ways to solve some of these problems.
7463 Although it is not incorrect, we don't accept QImode and HImode
7464 addresses based on the frame pointer or arg pointer until the
7465 reload pass starts. This is so that eliminating such addresses
7466 into stack based ones won't produce impossible code. */
7468 thumb1_legitimate_address_p (machine_mode mode, rtx x, int strict_p)
7470 /* ??? Not clear if this is right. Experiment. */
7471 if (GET_MODE_SIZE (mode) < 4
7472 && !(reload_in_progress || reload_completed)
7473 && (reg_mentioned_p (frame_pointer_rtx, x)
7474 || reg_mentioned_p (arg_pointer_rtx, x)
7475 || reg_mentioned_p (virtual_incoming_args_rtx, x)
7476 || reg_mentioned_p (virtual_outgoing_args_rtx, x)
7477 || reg_mentioned_p (virtual_stack_dynamic_rtx, x)
7478 || reg_mentioned_p (virtual_stack_vars_rtx, x)))
7479 return 0;
7481 /* Accept any base register. SP only in SImode or larger. */
7482 else if (thumb1_base_register_rtx_p (x, mode, strict_p))
7483 return 1;
7485 /* This is PC relative data before arm_reorg runs. */
7486 else if (GET_MODE_SIZE (mode) >= 4 && CONSTANT_P (x)
7487 && GET_CODE (x) == SYMBOL_REF
7488 && CONSTANT_POOL_ADDRESS_P (x) && !flag_pic)
7489 return 1;
7491 /* This is PC relative data after arm_reorg runs. */
7492 else if ((GET_MODE_SIZE (mode) >= 4 || mode == HFmode)
7493 && reload_completed
7494 && (GET_CODE (x) == LABEL_REF
7495 || (GET_CODE (x) == CONST
7496 && GET_CODE (XEXP (x, 0)) == PLUS
7497 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
7498 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
7499 return 1;
7501 /* Post-inc indexing only supported for SImode and larger. */
7502 else if (GET_CODE (x) == POST_INC && GET_MODE_SIZE (mode) >= 4
7503 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p))
7504 return 1;
7506 else if (GET_CODE (x) == PLUS)
7508 /* REG+REG address can be any two index registers. */
7509 /* We disallow FRAME+REG addressing since we know that FRAME
7510 will be replaced with STACK, and SP relative addressing only
7511 permits SP+OFFSET. */
7512 if (GET_MODE_SIZE (mode) <= 4
7513 && XEXP (x, 0) != frame_pointer_rtx
7514 && XEXP (x, 1) != frame_pointer_rtx
7515 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
7516 && (thumb1_index_register_rtx_p (XEXP (x, 1), strict_p)
7517 || (!strict_p && will_be_in_index_register (XEXP (x, 1)))))
7518 return 1;
7520 /* REG+const has 5-7 bit offset for non-SP registers. */
7521 else if ((thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
7522 || XEXP (x, 0) == arg_pointer_rtx)
7523 && CONST_INT_P (XEXP (x, 1))
7524 && thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
7525 return 1;
7527 /* REG+const has 10-bit offset for SP, but only SImode and
7528 larger is supported. */
7529 /* ??? Should probably check for DI/DFmode overflow here
7530 just like GO_IF_LEGITIMATE_OFFSET does. */
7531 else if (REG_P (XEXP (x, 0))
7532 && REGNO (XEXP (x, 0)) == STACK_POINTER_REGNUM
7533 && GET_MODE_SIZE (mode) >= 4
7534 && CONST_INT_P (XEXP (x, 1))
7535 && INTVAL (XEXP (x, 1)) >= 0
7536 && INTVAL (XEXP (x, 1)) + GET_MODE_SIZE (mode) <= 1024
7537 && (INTVAL (XEXP (x, 1)) & 3) == 0)
7538 return 1;
7540 else if (REG_P (XEXP (x, 0))
7541 && (REGNO (XEXP (x, 0)) == FRAME_POINTER_REGNUM
7542 || REGNO (XEXP (x, 0)) == ARG_POINTER_REGNUM
7543 || (REGNO (XEXP (x, 0)) >= FIRST_VIRTUAL_REGISTER
7544 && REGNO (XEXP (x, 0))
7545 <= LAST_VIRTUAL_POINTER_REGISTER))
7546 && GET_MODE_SIZE (mode) >= 4
7547 && CONST_INT_P (XEXP (x, 1))
7548 && (INTVAL (XEXP (x, 1)) & 3) == 0)
7549 return 1;
7552 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
7553 && GET_MODE_SIZE (mode) == 4
7554 && GET_CODE (x) == SYMBOL_REF
7555 && CONSTANT_POOL_ADDRESS_P (x)
7556 && ! (flag_pic
7557 && symbol_mentioned_p (get_pool_constant (x))
7558 && ! pcrel_constant_p (get_pool_constant (x))))
7559 return 1;
7561 return 0;
7564 /* Return nonzero if VAL can be used as an offset in a Thumb-state address
7565 instruction of mode MODE. */
7567 thumb_legitimate_offset_p (machine_mode mode, HOST_WIDE_INT val)
7569 switch (GET_MODE_SIZE (mode))
7571 case 1:
7572 return val >= 0 && val < 32;
7574 case 2:
7575 return val >= 0 && val < 64 && (val & 1) == 0;
7577 default:
7578 return (val >= 0
7579 && (val + GET_MODE_SIZE (mode)) <= 128
7580 && (val & 3) == 0);
7584 bool
7585 arm_legitimate_address_p (machine_mode mode, rtx x, bool strict_p)
7587 if (TARGET_ARM)
7588 return arm_legitimate_address_outer_p (mode, x, SET, strict_p);
7589 else if (TARGET_THUMB2)
7590 return thumb2_legitimate_address_p (mode, x, strict_p);
7591 else /* if (TARGET_THUMB1) */
7592 return thumb1_legitimate_address_p (mode, x, strict_p);
7595 /* Worker function for TARGET_PREFERRED_RELOAD_CLASS.
7597 Given an rtx X being reloaded into a reg required to be
7598 in class CLASS, return the class of reg to actually use.
7599 In general this is just CLASS, but for the Thumb core registers and
7600 immediate constants we prefer a LO_REGS class or a subset. */
7602 static reg_class_t
7603 arm_preferred_reload_class (rtx x ATTRIBUTE_UNUSED, reg_class_t rclass)
7605 if (TARGET_32BIT)
7606 return rclass;
7607 else
7609 if (rclass == GENERAL_REGS)
7610 return LO_REGS;
7611 else
7612 return rclass;
7616 /* Build the SYMBOL_REF for __tls_get_addr. */
7618 static GTY(()) rtx tls_get_addr_libfunc;
7620 static rtx
7621 get_tls_get_addr (void)
7623 if (!tls_get_addr_libfunc)
7624 tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
7625 return tls_get_addr_libfunc;
7629 arm_load_tp (rtx target)
7631 if (!target)
7632 target = gen_reg_rtx (SImode);
7634 if (TARGET_HARD_TP)
7636 /* Can return in any reg. */
7637 emit_insn (gen_load_tp_hard (target));
7639 else
7641 /* Always returned in r0. Immediately copy the result into a pseudo,
7642 otherwise other uses of r0 (e.g. setting up function arguments) may
7643 clobber the value. */
7645 rtx tmp;
7647 emit_insn (gen_load_tp_soft ());
7649 tmp = gen_rtx_REG (SImode, R0_REGNUM);
7650 emit_move_insn (target, tmp);
7652 return target;
7655 static rtx
7656 load_tls_operand (rtx x, rtx reg)
7658 rtx tmp;
7660 if (reg == NULL_RTX)
7661 reg = gen_reg_rtx (SImode);
7663 tmp = gen_rtx_CONST (SImode, x);
7665 emit_move_insn (reg, tmp);
7667 return reg;
7670 static rtx
7671 arm_call_tls_get_addr (rtx x, rtx reg, rtx *valuep, int reloc)
7673 rtx insns, label, labelno, sum;
7675 gcc_assert (reloc != TLS_DESCSEQ);
7676 start_sequence ();
7678 labelno = GEN_INT (pic_labelno++);
7679 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
7680 label = gen_rtx_CONST (VOIDmode, label);
7682 sum = gen_rtx_UNSPEC (Pmode,
7683 gen_rtvec (4, x, GEN_INT (reloc), label,
7684 GEN_INT (TARGET_ARM ? 8 : 4)),
7685 UNSPEC_TLS);
7686 reg = load_tls_operand (sum, reg);
7688 if (TARGET_ARM)
7689 emit_insn (gen_pic_add_dot_plus_eight (reg, reg, labelno));
7690 else
7691 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
7693 *valuep = emit_library_call_value (get_tls_get_addr (), NULL_RTX,
7694 LCT_PURE, /* LCT_CONST? */
7695 Pmode, 1, reg, Pmode);
7697 insns = get_insns ();
7698 end_sequence ();
7700 return insns;
7703 static rtx
7704 arm_tls_descseq_addr (rtx x, rtx reg)
7706 rtx labelno = GEN_INT (pic_labelno++);
7707 rtx label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
7708 rtx sum = gen_rtx_UNSPEC (Pmode,
7709 gen_rtvec (4, x, GEN_INT (TLS_DESCSEQ),
7710 gen_rtx_CONST (VOIDmode, label),
7711 GEN_INT (!TARGET_ARM)),
7712 UNSPEC_TLS);
7713 rtx reg0 = load_tls_operand (sum, gen_rtx_REG (SImode, R0_REGNUM));
7715 emit_insn (gen_tlscall (x, labelno));
7716 if (!reg)
7717 reg = gen_reg_rtx (SImode);
7718 else
7719 gcc_assert (REGNO (reg) != R0_REGNUM);
7721 emit_move_insn (reg, reg0);
7723 return reg;
7727 legitimize_tls_address (rtx x, rtx reg)
7729 rtx dest, tp, label, labelno, sum, insns, ret, eqv, addend;
7730 unsigned int model = SYMBOL_REF_TLS_MODEL (x);
7732 switch (model)
7734 case TLS_MODEL_GLOBAL_DYNAMIC:
7735 if (TARGET_GNU2_TLS)
7737 reg = arm_tls_descseq_addr (x, reg);
7739 tp = arm_load_tp (NULL_RTX);
7741 dest = gen_rtx_PLUS (Pmode, tp, reg);
7743 else
7745 /* Original scheme */
7746 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_GD32);
7747 dest = gen_reg_rtx (Pmode);
7748 emit_libcall_block (insns, dest, ret, x);
7750 return dest;
7752 case TLS_MODEL_LOCAL_DYNAMIC:
7753 if (TARGET_GNU2_TLS)
7755 reg = arm_tls_descseq_addr (x, reg);
7757 tp = arm_load_tp (NULL_RTX);
7759 dest = gen_rtx_PLUS (Pmode, tp, reg);
7761 else
7763 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_LDM32);
7765 /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
7766 share the LDM result with other LD model accesses. */
7767 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const1_rtx),
7768 UNSPEC_TLS);
7769 dest = gen_reg_rtx (Pmode);
7770 emit_libcall_block (insns, dest, ret, eqv);
7772 /* Load the addend. */
7773 addend = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, x,
7774 GEN_INT (TLS_LDO32)),
7775 UNSPEC_TLS);
7776 addend = force_reg (SImode, gen_rtx_CONST (SImode, addend));
7777 dest = gen_rtx_PLUS (Pmode, dest, addend);
7779 return dest;
7781 case TLS_MODEL_INITIAL_EXEC:
7782 labelno = GEN_INT (pic_labelno++);
7783 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
7784 label = gen_rtx_CONST (VOIDmode, label);
7785 sum = gen_rtx_UNSPEC (Pmode,
7786 gen_rtvec (4, x, GEN_INT (TLS_IE32), label,
7787 GEN_INT (TARGET_ARM ? 8 : 4)),
7788 UNSPEC_TLS);
7789 reg = load_tls_operand (sum, reg);
7791 if (TARGET_ARM)
7792 emit_insn (gen_tls_load_dot_plus_eight (reg, reg, labelno));
7793 else if (TARGET_THUMB2)
7794 emit_insn (gen_tls_load_dot_plus_four (reg, NULL, reg, labelno));
7795 else
7797 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
7798 emit_move_insn (reg, gen_const_mem (SImode, reg));
7801 tp = arm_load_tp (NULL_RTX);
7803 return gen_rtx_PLUS (Pmode, tp, reg);
7805 case TLS_MODEL_LOCAL_EXEC:
7806 tp = arm_load_tp (NULL_RTX);
7808 reg = gen_rtx_UNSPEC (Pmode,
7809 gen_rtvec (2, x, GEN_INT (TLS_LE32)),
7810 UNSPEC_TLS);
7811 reg = force_reg (SImode, gen_rtx_CONST (SImode, reg));
7813 return gen_rtx_PLUS (Pmode, tp, reg);
7815 default:
7816 abort ();
7820 /* Try machine-dependent ways of modifying an illegitimate address
7821 to be legitimate. If we find one, return the new, valid address. */
7823 arm_legitimize_address (rtx x, rtx orig_x, machine_mode mode)
7825 if (arm_tls_referenced_p (x))
7827 rtx addend = NULL;
7829 if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS)
7831 addend = XEXP (XEXP (x, 0), 1);
7832 x = XEXP (XEXP (x, 0), 0);
7835 if (GET_CODE (x) != SYMBOL_REF)
7836 return x;
7838 gcc_assert (SYMBOL_REF_TLS_MODEL (x) != 0);
7840 x = legitimize_tls_address (x, NULL_RTX);
7842 if (addend)
7844 x = gen_rtx_PLUS (SImode, x, addend);
7845 orig_x = x;
7847 else
7848 return x;
7851 if (!TARGET_ARM)
7853 /* TODO: legitimize_address for Thumb2. */
7854 if (TARGET_THUMB2)
7855 return x;
7856 return thumb_legitimize_address (x, orig_x, mode);
7859 if (GET_CODE (x) == PLUS)
7861 rtx xop0 = XEXP (x, 0);
7862 rtx xop1 = XEXP (x, 1);
7864 if (CONSTANT_P (xop0) && !symbol_mentioned_p (xop0))
7865 xop0 = force_reg (SImode, xop0);
7867 if (CONSTANT_P (xop1) && !CONST_INT_P (xop1)
7868 && !symbol_mentioned_p (xop1))
7869 xop1 = force_reg (SImode, xop1);
7871 if (ARM_BASE_REGISTER_RTX_P (xop0)
7872 && CONST_INT_P (xop1))
7874 HOST_WIDE_INT n, low_n;
7875 rtx base_reg, val;
7876 n = INTVAL (xop1);
7878 /* VFP addressing modes actually allow greater offsets, but for
7879 now we just stick with the lowest common denominator. */
7880 if (mode == DImode
7881 || ((TARGET_SOFT_FLOAT || TARGET_VFP) && mode == DFmode))
7883 low_n = n & 0x0f;
7884 n &= ~0x0f;
7885 if (low_n > 4)
7887 n += 16;
7888 low_n -= 16;
7891 else
7893 low_n = ((mode) == TImode ? 0
7894 : n >= 0 ? (n & 0xfff) : -((-n) & 0xfff));
7895 n -= low_n;
7898 base_reg = gen_reg_rtx (SImode);
7899 val = force_operand (plus_constant (Pmode, xop0, n), NULL_RTX);
7900 emit_move_insn (base_reg, val);
7901 x = plus_constant (Pmode, base_reg, low_n);
7903 else if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
7904 x = gen_rtx_PLUS (SImode, xop0, xop1);
7907 /* XXX We don't allow MINUS any more -- see comment in
7908 arm_legitimate_address_outer_p (). */
7909 else if (GET_CODE (x) == MINUS)
7911 rtx xop0 = XEXP (x, 0);
7912 rtx xop1 = XEXP (x, 1);
7914 if (CONSTANT_P (xop0))
7915 xop0 = force_reg (SImode, xop0);
7917 if (CONSTANT_P (xop1) && ! symbol_mentioned_p (xop1))
7918 xop1 = force_reg (SImode, xop1);
7920 if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
7921 x = gen_rtx_MINUS (SImode, xop0, xop1);
7924 /* Make sure to take full advantage of the pre-indexed addressing mode
7925 with absolute addresses which often allows for the base register to
7926 be factorized for multiple adjacent memory references, and it might
7927 even allows for the mini pool to be avoided entirely. */
7928 else if (CONST_INT_P (x) && optimize > 0)
7930 unsigned int bits;
7931 HOST_WIDE_INT mask, base, index;
7932 rtx base_reg;
7934 /* ldr and ldrb can use a 12-bit index, ldrsb and the rest can only
7935 use a 8-bit index. So let's use a 12-bit index for SImode only and
7936 hope that arm_gen_constant will enable ldrb to use more bits. */
7937 bits = (mode == SImode) ? 12 : 8;
7938 mask = (1 << bits) - 1;
7939 base = INTVAL (x) & ~mask;
7940 index = INTVAL (x) & mask;
7941 if (bit_count (base & 0xffffffff) > (32 - bits)/2)
7943 /* It'll most probably be more efficient to generate the base
7944 with more bits set and use a negative index instead. */
7945 base |= mask;
7946 index -= mask;
7948 base_reg = force_reg (SImode, GEN_INT (base));
7949 x = plus_constant (Pmode, base_reg, index);
7952 if (flag_pic)
7954 /* We need to find and carefully transform any SYMBOL and LABEL
7955 references; so go back to the original address expression. */
7956 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
7958 if (new_x != orig_x)
7959 x = new_x;
7962 return x;
7966 /* Try machine-dependent ways of modifying an illegitimate Thumb address
7967 to be legitimate. If we find one, return the new, valid address. */
7969 thumb_legitimize_address (rtx x, rtx orig_x, machine_mode mode)
7971 if (GET_CODE (x) == PLUS
7972 && CONST_INT_P (XEXP (x, 1))
7973 && (INTVAL (XEXP (x, 1)) >= 32 * GET_MODE_SIZE (mode)
7974 || INTVAL (XEXP (x, 1)) < 0))
7976 rtx xop0 = XEXP (x, 0);
7977 rtx xop1 = XEXP (x, 1);
7978 HOST_WIDE_INT offset = INTVAL (xop1);
7980 /* Try and fold the offset into a biasing of the base register and
7981 then offsetting that. Don't do this when optimizing for space
7982 since it can cause too many CSEs. */
7983 if (optimize_size && offset >= 0
7984 && offset < 256 + 31 * GET_MODE_SIZE (mode))
7986 HOST_WIDE_INT delta;
7988 if (offset >= 256)
7989 delta = offset - (256 - GET_MODE_SIZE (mode));
7990 else if (offset < 32 * GET_MODE_SIZE (mode) + 8)
7991 delta = 31 * GET_MODE_SIZE (mode);
7992 else
7993 delta = offset & (~31 * GET_MODE_SIZE (mode));
7995 xop0 = force_operand (plus_constant (Pmode, xop0, offset - delta),
7996 NULL_RTX);
7997 x = plus_constant (Pmode, xop0, delta);
7999 else if (offset < 0 && offset > -256)
8000 /* Small negative offsets are best done with a subtract before the
8001 dereference, forcing these into a register normally takes two
8002 instructions. */
8003 x = force_operand (x, NULL_RTX);
8004 else
8006 /* For the remaining cases, force the constant into a register. */
8007 xop1 = force_reg (SImode, xop1);
8008 x = gen_rtx_PLUS (SImode, xop0, xop1);
8011 else if (GET_CODE (x) == PLUS
8012 && s_register_operand (XEXP (x, 1), SImode)
8013 && !s_register_operand (XEXP (x, 0), SImode))
8015 rtx xop0 = force_operand (XEXP (x, 0), NULL_RTX);
8017 x = gen_rtx_PLUS (SImode, xop0, XEXP (x, 1));
8020 if (flag_pic)
8022 /* We need to find and carefully transform any SYMBOL and LABEL
8023 references; so go back to the original address expression. */
8024 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
8026 if (new_x != orig_x)
8027 x = new_x;
8030 return x;
8033 /* Return TRUE if X contains any TLS symbol references. */
8035 bool
8036 arm_tls_referenced_p (rtx x)
8038 if (! TARGET_HAVE_TLS)
8039 return false;
8041 subrtx_iterator::array_type array;
8042 FOR_EACH_SUBRTX (iter, array, x, ALL)
8044 const_rtx x = *iter;
8045 if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (x) != 0)
8046 return true;
8048 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
8049 TLS offsets, not real symbol references. */
8050 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
8051 iter.skip_subrtxes ();
8053 return false;
8056 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
8058 On the ARM, allow any integer (invalid ones are removed later by insn
8059 patterns), nice doubles and symbol_refs which refer to the function's
8060 constant pool XXX.
8062 When generating pic allow anything. */
8064 static bool
8065 arm_legitimate_constant_p_1 (machine_mode, rtx x)
8067 return flag_pic || !label_mentioned_p (x);
8070 static bool
8071 thumb_legitimate_constant_p (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
8073 return (CONST_INT_P (x)
8074 || CONST_DOUBLE_P (x)
8075 || CONSTANT_ADDRESS_P (x)
8076 || flag_pic);
8079 static bool
8080 arm_legitimate_constant_p (machine_mode mode, rtx x)
8082 return (!arm_cannot_force_const_mem (mode, x)
8083 && (TARGET_32BIT
8084 ? arm_legitimate_constant_p_1 (mode, x)
8085 : thumb_legitimate_constant_p (mode, x)));
8088 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
8090 static bool
8091 arm_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
8093 rtx base, offset;
8095 if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P)
8097 split_const (x, &base, &offset);
8098 if (GET_CODE (base) == SYMBOL_REF
8099 && !offset_within_block_p (base, INTVAL (offset)))
8100 return true;
8102 return arm_tls_referenced_p (x);
8105 #define REG_OR_SUBREG_REG(X) \
8106 (REG_P (X) \
8107 || (GET_CODE (X) == SUBREG && REG_P (SUBREG_REG (X))))
8109 #define REG_OR_SUBREG_RTX(X) \
8110 (REG_P (X) ? (X) : SUBREG_REG (X))
8112 static inline int
8113 thumb1_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
8115 machine_mode mode = GET_MODE (x);
8116 int total, words;
8118 switch (code)
8120 case ASHIFT:
8121 case ASHIFTRT:
8122 case LSHIFTRT:
8123 case ROTATERT:
8124 return (mode == SImode) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
8126 case PLUS:
8127 case MINUS:
8128 case COMPARE:
8129 case NEG:
8130 case NOT:
8131 return COSTS_N_INSNS (1);
8133 case MULT:
8134 if (CONST_INT_P (XEXP (x, 1)))
8136 int cycles = 0;
8137 unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
8139 while (i)
8141 i >>= 2;
8142 cycles++;
8144 return COSTS_N_INSNS (2) + cycles;
8146 return COSTS_N_INSNS (1) + 16;
8148 case SET:
8149 /* A SET doesn't have a mode, so let's look at the SET_DEST to get
8150 the mode. */
8151 words = ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x))));
8152 return (COSTS_N_INSNS (words)
8153 + 4 * ((MEM_P (SET_SRC (x)))
8154 + MEM_P (SET_DEST (x))));
8156 case CONST_INT:
8157 if (outer == SET)
8159 if ((unsigned HOST_WIDE_INT) INTVAL (x) < 256)
8160 return 0;
8161 if (thumb_shiftable_const (INTVAL (x)))
8162 return COSTS_N_INSNS (2);
8163 return COSTS_N_INSNS (3);
8165 else if ((outer == PLUS || outer == COMPARE)
8166 && INTVAL (x) < 256 && INTVAL (x) > -256)
8167 return 0;
8168 else if ((outer == IOR || outer == XOR || outer == AND)
8169 && INTVAL (x) < 256 && INTVAL (x) >= -256)
8170 return COSTS_N_INSNS (1);
8171 else if (outer == AND)
8173 int i;
8174 /* This duplicates the tests in the andsi3 expander. */
8175 for (i = 9; i <= 31; i++)
8176 if ((((HOST_WIDE_INT) 1) << i) - 1 == INTVAL (x)
8177 || (((HOST_WIDE_INT) 1) << i) - 1 == ~INTVAL (x))
8178 return COSTS_N_INSNS (2);
8180 else if (outer == ASHIFT || outer == ASHIFTRT
8181 || outer == LSHIFTRT)
8182 return 0;
8183 return COSTS_N_INSNS (2);
8185 case CONST:
8186 case CONST_DOUBLE:
8187 case LABEL_REF:
8188 case SYMBOL_REF:
8189 return COSTS_N_INSNS (3);
8191 case UDIV:
8192 case UMOD:
8193 case DIV:
8194 case MOD:
8195 return 100;
8197 case TRUNCATE:
8198 return 99;
8200 case AND:
8201 case XOR:
8202 case IOR:
8203 /* XXX guess. */
8204 return 8;
8206 case MEM:
8207 /* XXX another guess. */
8208 /* Memory costs quite a lot for the first word, but subsequent words
8209 load at the equivalent of a single insn each. */
8210 return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
8211 + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
8212 ? 4 : 0));
8214 case IF_THEN_ELSE:
8215 /* XXX a guess. */
8216 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
8217 return 14;
8218 return 2;
8220 case SIGN_EXTEND:
8221 case ZERO_EXTEND:
8222 total = mode == DImode ? COSTS_N_INSNS (1) : 0;
8223 total += thumb1_rtx_costs (XEXP (x, 0), GET_CODE (XEXP (x, 0)), code);
8225 if (mode == SImode)
8226 return total;
8228 if (arm_arch6)
8229 return total + COSTS_N_INSNS (1);
8231 /* Assume a two-shift sequence. Increase the cost slightly so
8232 we prefer actual shifts over an extend operation. */
8233 return total + 1 + COSTS_N_INSNS (2);
8235 default:
8236 return 99;
8240 static inline bool
8241 arm_rtx_costs_1 (rtx x, enum rtx_code outer, int* total, bool speed)
8243 machine_mode mode = GET_MODE (x);
8244 enum rtx_code subcode;
8245 rtx operand;
8246 enum rtx_code code = GET_CODE (x);
8247 *total = 0;
8249 switch (code)
8251 case MEM:
8252 /* Memory costs quite a lot for the first word, but subsequent words
8253 load at the equivalent of a single insn each. */
8254 *total = COSTS_N_INSNS (2 + ARM_NUM_REGS (mode));
8255 return true;
8257 case DIV:
8258 case MOD:
8259 case UDIV:
8260 case UMOD:
8261 if (TARGET_HARD_FLOAT && mode == SFmode)
8262 *total = COSTS_N_INSNS (2);
8263 else if (TARGET_HARD_FLOAT && mode == DFmode && !TARGET_VFP_SINGLE)
8264 *total = COSTS_N_INSNS (4);
8265 else
8266 *total = COSTS_N_INSNS (20);
8267 return false;
8269 case ROTATE:
8270 if (REG_P (XEXP (x, 1)))
8271 *total = COSTS_N_INSNS (1); /* Need to subtract from 32 */
8272 else if (!CONST_INT_P (XEXP (x, 1)))
8273 *total = rtx_cost (XEXP (x, 1), code, 1, speed);
8275 /* Fall through */
8276 case ROTATERT:
8277 if (mode != SImode)
8279 *total += COSTS_N_INSNS (4);
8280 return true;
8283 /* Fall through */
8284 case ASHIFT: case LSHIFTRT: case ASHIFTRT:
8285 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8286 if (mode == DImode)
8288 *total += COSTS_N_INSNS (3);
8289 return true;
8292 *total += COSTS_N_INSNS (1);
8293 /* Increase the cost of complex shifts because they aren't any faster,
8294 and reduce dual issue opportunities. */
8295 if (arm_tune_cortex_a9
8296 && outer != SET && !CONST_INT_P (XEXP (x, 1)))
8297 ++*total;
8299 return true;
8301 case MINUS:
8302 if (mode == DImode)
8304 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
8305 if (CONST_INT_P (XEXP (x, 0))
8306 && const_ok_for_arm (INTVAL (XEXP (x, 0))))
8308 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8309 return true;
8312 if (CONST_INT_P (XEXP (x, 1))
8313 && const_ok_for_arm (INTVAL (XEXP (x, 1))))
8315 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8316 return true;
8319 return false;
8322 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
8324 if (TARGET_HARD_FLOAT
8325 && (mode == SFmode
8326 || (mode == DFmode && !TARGET_VFP_SINGLE)))
8328 *total = COSTS_N_INSNS (1);
8329 if (CONST_DOUBLE_P (XEXP (x, 0))
8330 && arm_const_double_rtx (XEXP (x, 0)))
8332 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8333 return true;
8336 if (CONST_DOUBLE_P (XEXP (x, 1))
8337 && arm_const_double_rtx (XEXP (x, 1)))
8339 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8340 return true;
8343 return false;
8345 *total = COSTS_N_INSNS (20);
8346 return false;
8349 *total = COSTS_N_INSNS (1);
8350 if (CONST_INT_P (XEXP (x, 0))
8351 && const_ok_for_arm (INTVAL (XEXP (x, 0))))
8353 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8354 return true;
8357 subcode = GET_CODE (XEXP (x, 1));
8358 if (subcode == ASHIFT || subcode == ASHIFTRT
8359 || subcode == LSHIFTRT
8360 || subcode == ROTATE || subcode == ROTATERT)
8362 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8363 *total += rtx_cost (XEXP (XEXP (x, 1), 0), subcode, 0, speed);
8364 return true;
8367 /* A shift as a part of RSB costs no more than RSB itself. */
8368 if (GET_CODE (XEXP (x, 0)) == MULT
8369 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
8371 *total += rtx_cost (XEXP (XEXP (x, 0), 0), code, 0, speed);
8372 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8373 return true;
8376 if (subcode == MULT
8377 && power_of_two_operand (XEXP (XEXP (x, 1), 1), SImode))
8379 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8380 *total += rtx_cost (XEXP (XEXP (x, 1), 0), subcode, 0, speed);
8381 return true;
8384 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == RTX_COMPARE
8385 || GET_RTX_CLASS (GET_CODE (XEXP (x, 1))) == RTX_COMM_COMPARE)
8387 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, 0, speed);
8388 if (REG_P (XEXP (XEXP (x, 1), 0))
8389 && REGNO (XEXP (XEXP (x, 1), 0)) != CC_REGNUM)
8390 *total += COSTS_N_INSNS (1);
8392 return true;
8395 /* Fall through */
8397 case PLUS:
8398 if (code == PLUS && arm_arch6 && mode == SImode
8399 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
8400 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
8402 *total = COSTS_N_INSNS (1);
8403 *total += rtx_cost (XEXP (XEXP (x, 0), 0), GET_CODE (XEXP (x, 0)),
8404 0, speed);
8405 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8406 return true;
8409 /* MLA: All arguments must be registers. We filter out
8410 multiplication by a power of two, so that we fall down into
8411 the code below. */
8412 if (GET_CODE (XEXP (x, 0)) == MULT
8413 && !power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
8415 /* The cost comes from the cost of the multiply. */
8416 return false;
8419 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
8421 if (TARGET_HARD_FLOAT
8422 && (mode == SFmode
8423 || (mode == DFmode && !TARGET_VFP_SINGLE)))
8425 *total = COSTS_N_INSNS (1);
8426 if (CONST_DOUBLE_P (XEXP (x, 1))
8427 && arm_const_double_rtx (XEXP (x, 1)))
8429 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8430 return true;
8433 return false;
8436 *total = COSTS_N_INSNS (20);
8437 return false;
8440 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMPARE
8441 || GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMM_COMPARE)
8443 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 1), code, 1, speed);
8444 if (REG_P (XEXP (XEXP (x, 0), 0))
8445 && REGNO (XEXP (XEXP (x, 0), 0)) != CC_REGNUM)
8446 *total += COSTS_N_INSNS (1);
8447 return true;
8450 /* Fall through */
8452 case AND: case XOR: case IOR:
8454 /* Normally the frame registers will be spilt into reg+const during
8455 reload, so it is a bad idea to combine them with other instructions,
8456 since then they might not be moved outside of loops. As a compromise
8457 we allow integration with ops that have a constant as their second
8458 operand. */
8459 if (REG_OR_SUBREG_REG (XEXP (x, 0))
8460 && ARM_FRAME_RTX (REG_OR_SUBREG_RTX (XEXP (x, 0)))
8461 && !CONST_INT_P (XEXP (x, 1)))
8462 *total = COSTS_N_INSNS (1);
8464 if (mode == DImode)
8466 *total += COSTS_N_INSNS (2);
8467 if (CONST_INT_P (XEXP (x, 1))
8468 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
8470 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8471 return true;
8474 return false;
8477 *total += COSTS_N_INSNS (1);
8478 if (CONST_INT_P (XEXP (x, 1))
8479 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
8481 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8482 return true;
8484 subcode = GET_CODE (XEXP (x, 0));
8485 if (subcode == ASHIFT || subcode == ASHIFTRT
8486 || subcode == LSHIFTRT
8487 || subcode == ROTATE || subcode == ROTATERT)
8489 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8490 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
8491 return true;
8494 if (subcode == MULT
8495 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
8497 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8498 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
8499 return true;
8502 if (subcode == UMIN || subcode == UMAX
8503 || subcode == SMIN || subcode == SMAX)
8505 *total = COSTS_N_INSNS (3);
8506 return true;
8509 return false;
8511 case MULT:
8512 /* This should have been handled by the CPU specific routines. */
8513 gcc_unreachable ();
8515 case TRUNCATE:
8516 if (arm_arch3m && mode == SImode
8517 && GET_CODE (XEXP (x, 0)) == LSHIFTRT
8518 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
8519 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0))
8520 == GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)))
8521 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
8522 || GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND))
8524 *total = rtx_cost (XEXP (XEXP (x, 0), 0), LSHIFTRT, 0, speed);
8525 return true;
8527 *total = COSTS_N_INSNS (2); /* Plus the cost of the MULT */
8528 return false;
8530 case NEG:
8531 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
8533 if (TARGET_HARD_FLOAT
8534 && (mode == SFmode
8535 || (mode == DFmode && !TARGET_VFP_SINGLE)))
8537 *total = COSTS_N_INSNS (1);
8538 return false;
8540 *total = COSTS_N_INSNS (2);
8541 return false;
8544 /* Fall through */
8545 case NOT:
8546 *total = COSTS_N_INSNS (ARM_NUM_REGS(mode));
8547 if (mode == SImode && code == NOT)
8549 subcode = GET_CODE (XEXP (x, 0));
8550 if (subcode == ASHIFT || subcode == ASHIFTRT
8551 || subcode == LSHIFTRT
8552 || subcode == ROTATE || subcode == ROTATERT
8553 || (subcode == MULT
8554 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode)))
8556 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
8557 /* Register shifts cost an extra cycle. */
8558 if (!CONST_INT_P (XEXP (XEXP (x, 0), 1)))
8559 *total += COSTS_N_INSNS (1) + rtx_cost (XEXP (XEXP (x, 0), 1),
8560 subcode, 1, speed);
8561 return true;
8565 return false;
8567 case IF_THEN_ELSE:
8568 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
8570 *total = COSTS_N_INSNS (4);
8571 return true;
8574 operand = XEXP (x, 0);
8576 if (!((GET_RTX_CLASS (GET_CODE (operand)) == RTX_COMPARE
8577 || GET_RTX_CLASS (GET_CODE (operand)) == RTX_COMM_COMPARE)
8578 && REG_P (XEXP (operand, 0))
8579 && REGNO (XEXP (operand, 0)) == CC_REGNUM))
8580 *total += COSTS_N_INSNS (1);
8581 *total += (rtx_cost (XEXP (x, 1), code, 1, speed)
8582 + rtx_cost (XEXP (x, 2), code, 2, speed));
8583 return true;
8585 case NE:
8586 if (mode == SImode && XEXP (x, 1) == const0_rtx)
8588 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, 0, speed);
8589 return true;
8591 goto scc_insn;
8593 case GE:
8594 if ((!REG_P (XEXP (x, 0)) || REGNO (XEXP (x, 0)) != CC_REGNUM)
8595 && mode == SImode && XEXP (x, 1) == const0_rtx)
8597 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, 0, speed);
8598 return true;
8600 goto scc_insn;
8602 case LT:
8603 if ((!REG_P (XEXP (x, 0)) || REGNO (XEXP (x, 0)) != CC_REGNUM)
8604 && mode == SImode && XEXP (x, 1) == const0_rtx)
8606 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, 0, speed);
8607 return true;
8609 goto scc_insn;
8611 case EQ:
8612 case GT:
8613 case LE:
8614 case GEU:
8615 case LTU:
8616 case GTU:
8617 case LEU:
8618 case UNORDERED:
8619 case ORDERED:
8620 case UNEQ:
8621 case UNGE:
8622 case UNLT:
8623 case UNGT:
8624 case UNLE:
8625 scc_insn:
8626 /* SCC insns. In the case where the comparison has already been
8627 performed, then they cost 2 instructions. Otherwise they need
8628 an additional comparison before them. */
8629 *total = COSTS_N_INSNS (2);
8630 if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM)
8632 return true;
8635 /* Fall through */
8636 case COMPARE:
8637 if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM)
8639 *total = 0;
8640 return true;
8643 *total += COSTS_N_INSNS (1);
8644 if (CONST_INT_P (XEXP (x, 1))
8645 && const_ok_for_op (INTVAL (XEXP (x, 1)), code))
8647 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8648 return true;
8651 subcode = GET_CODE (XEXP (x, 0));
8652 if (subcode == ASHIFT || subcode == ASHIFTRT
8653 || subcode == LSHIFTRT
8654 || subcode == ROTATE || subcode == ROTATERT)
8656 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8657 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
8658 return true;
8661 if (subcode == MULT
8662 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
8664 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8665 *total += rtx_cost (XEXP (XEXP (x, 0), 0), subcode, 0, speed);
8666 return true;
8669 return false;
8671 case UMIN:
8672 case UMAX:
8673 case SMIN:
8674 case SMAX:
8675 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, 0, speed);
8676 if (!CONST_INT_P (XEXP (x, 1))
8677 || !const_ok_for_arm (INTVAL (XEXP (x, 1))))
8678 *total += rtx_cost (XEXP (x, 1), code, 1, speed);
8679 return true;
8681 case ABS:
8682 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
8684 if (TARGET_HARD_FLOAT
8685 && (mode == SFmode
8686 || (mode == DFmode && !TARGET_VFP_SINGLE)))
8688 *total = COSTS_N_INSNS (1);
8689 return false;
8691 *total = COSTS_N_INSNS (20);
8692 return false;
8694 *total = COSTS_N_INSNS (1);
8695 if (mode == DImode)
8696 *total += COSTS_N_INSNS (3);
8697 return false;
8699 case SIGN_EXTEND:
8700 case ZERO_EXTEND:
8701 *total = 0;
8702 if (GET_MODE_CLASS (mode) == MODE_INT)
8704 rtx op = XEXP (x, 0);
8705 machine_mode opmode = GET_MODE (op);
8707 if (mode == DImode)
8708 *total += COSTS_N_INSNS (1);
8710 if (opmode != SImode)
8712 if (MEM_P (op))
8714 /* If !arm_arch4, we use one of the extendhisi2_mem
8715 or movhi_bytes patterns for HImode. For a QImode
8716 sign extension, we first zero-extend from memory
8717 and then perform a shift sequence. */
8718 if (!arm_arch4 && (opmode != QImode || code == SIGN_EXTEND))
8719 *total += COSTS_N_INSNS (2);
8721 else if (arm_arch6)
8722 *total += COSTS_N_INSNS (1);
8724 /* We don't have the necessary insn, so we need to perform some
8725 other operation. */
8726 else if (TARGET_ARM && code == ZERO_EXTEND && mode == QImode)
8727 /* An and with constant 255. */
8728 *total += COSTS_N_INSNS (1);
8729 else
8730 /* A shift sequence. Increase costs slightly to avoid
8731 combining two shifts into an extend operation. */
8732 *total += COSTS_N_INSNS (2) + 1;
8735 return false;
8738 switch (GET_MODE (XEXP (x, 0)))
8740 case V8QImode:
8741 case V4HImode:
8742 case V2SImode:
8743 case V4QImode:
8744 case V2HImode:
8745 *total = COSTS_N_INSNS (1);
8746 return false;
8748 default:
8749 gcc_unreachable ();
8751 gcc_unreachable ();
8753 case ZERO_EXTRACT:
8754 case SIGN_EXTRACT:
8755 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, 0, speed);
8756 return true;
8758 case CONST_INT:
8759 if (const_ok_for_arm (INTVAL (x))
8760 || const_ok_for_arm (~INTVAL (x)))
8761 *total = COSTS_N_INSNS (1);
8762 else
8763 *total = COSTS_N_INSNS (arm_gen_constant (SET, mode, NULL_RTX,
8764 INTVAL (x), NULL_RTX,
8765 NULL_RTX, 0, 0));
8766 return true;
8768 case CONST:
8769 case LABEL_REF:
8770 case SYMBOL_REF:
8771 *total = COSTS_N_INSNS (3);
8772 return true;
8774 case HIGH:
8775 *total = COSTS_N_INSNS (1);
8776 return true;
8778 case LO_SUM:
8779 *total = COSTS_N_INSNS (1);
8780 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
8781 return true;
8783 case CONST_DOUBLE:
8784 if (TARGET_HARD_FLOAT && vfp3_const_double_rtx (x)
8785 && (mode == SFmode || !TARGET_VFP_SINGLE))
8786 *total = COSTS_N_INSNS (1);
8787 else
8788 *total = COSTS_N_INSNS (4);
8789 return true;
8791 case SET:
8792 /* The vec_extract patterns accept memory operands that require an
8793 address reload. Account for the cost of that reload to give the
8794 auto-inc-dec pass an incentive to try to replace them. */
8795 if (TARGET_NEON && MEM_P (SET_DEST (x))
8796 && GET_CODE (SET_SRC (x)) == VEC_SELECT)
8798 *total = rtx_cost (SET_DEST (x), code, 0, speed);
8799 if (!neon_vector_mem_operand (SET_DEST (x), 2, true))
8800 *total += COSTS_N_INSNS (1);
8801 return true;
8803 /* Likewise for the vec_set patterns. */
8804 if (TARGET_NEON && GET_CODE (SET_SRC (x)) == VEC_MERGE
8805 && GET_CODE (XEXP (SET_SRC (x), 0)) == VEC_DUPLICATE
8806 && MEM_P (XEXP (XEXP (SET_SRC (x), 0), 0)))
8808 rtx mem = XEXP (XEXP (SET_SRC (x), 0), 0);
8809 *total = rtx_cost (mem, code, 0, speed);
8810 if (!neon_vector_mem_operand (mem, 2, true))
8811 *total += COSTS_N_INSNS (1);
8812 return true;
8814 return false;
8816 case UNSPEC:
8817 /* We cost this as high as our memory costs to allow this to
8818 be hoisted from loops. */
8819 if (XINT (x, 1) == UNSPEC_PIC_UNIFIED)
8821 *total = COSTS_N_INSNS (2 + ARM_NUM_REGS (mode));
8823 return true;
8825 case CONST_VECTOR:
8826 if (TARGET_NEON
8827 && TARGET_HARD_FLOAT
8828 && outer == SET
8829 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
8830 && neon_immediate_valid_for_move (x, mode, NULL, NULL))
8831 *total = COSTS_N_INSNS (1);
8832 else
8833 *total = COSTS_N_INSNS (4);
8834 return true;
8836 default:
8837 *total = COSTS_N_INSNS (4);
8838 return false;
8842 /* Estimates the size cost of thumb1 instructions.
8843 For now most of the code is copied from thumb1_rtx_costs. We need more
8844 fine grain tuning when we have more related test cases. */
8845 static inline int
8846 thumb1_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
8848 machine_mode mode = GET_MODE (x);
8849 int words;
8851 switch (code)
8853 case ASHIFT:
8854 case ASHIFTRT:
8855 case LSHIFTRT:
8856 case ROTATERT:
8857 return (mode == SImode) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
8859 case PLUS:
8860 case MINUS:
8861 /* Thumb-1 needs two instructions to fulfill shiftadd/shiftsub0/shiftsub1
8862 defined by RTL expansion, especially for the expansion of
8863 multiplication. */
8864 if ((GET_CODE (XEXP (x, 0)) == MULT
8865 && power_of_two_operand (XEXP (XEXP (x,0),1), SImode))
8866 || (GET_CODE (XEXP (x, 1)) == MULT
8867 && power_of_two_operand (XEXP (XEXP (x, 1), 1), SImode)))
8868 return COSTS_N_INSNS (2);
8869 /* On purpose fall through for normal RTX. */
8870 case COMPARE:
8871 case NEG:
8872 case NOT:
8873 return COSTS_N_INSNS (1);
8875 case MULT:
8876 if (CONST_INT_P (XEXP (x, 1)))
8878 /* Thumb1 mul instruction can't operate on const. We must Load it
8879 into a register first. */
8880 int const_size = thumb1_size_rtx_costs (XEXP (x, 1), CONST_INT, SET);
8881 /* For the targets which have a very small and high-latency multiply
8882 unit, we prefer to synthesize the mult with up to 5 instructions,
8883 giving a good balance between size and performance. */
8884 if (arm_arch6m && arm_m_profile_small_mul)
8885 return COSTS_N_INSNS (5);
8886 else
8887 return COSTS_N_INSNS (1) + const_size;
8889 return COSTS_N_INSNS (1);
8891 case SET:
8892 /* A SET doesn't have a mode, so let's look at the SET_DEST to get
8893 the mode. */
8894 words = ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x))));
8895 return COSTS_N_INSNS (words)
8896 + COSTS_N_INSNS (1) * (satisfies_constraint_J (SET_SRC (x))
8897 || satisfies_constraint_K (SET_SRC (x))
8898 /* thumb1_movdi_insn. */
8899 || ((words > 1) && MEM_P (SET_SRC (x))));
8901 case CONST_INT:
8902 if (outer == SET)
8904 if ((unsigned HOST_WIDE_INT) INTVAL (x) < 256)
8905 return COSTS_N_INSNS (1);
8906 /* See split "TARGET_THUMB1 && satisfies_constraint_J". */
8907 if (INTVAL (x) >= -255 && INTVAL (x) <= -1)
8908 return COSTS_N_INSNS (2);
8909 /* See split "TARGET_THUMB1 && satisfies_constraint_K". */
8910 if (thumb_shiftable_const (INTVAL (x)))
8911 return COSTS_N_INSNS (2);
8912 return COSTS_N_INSNS (3);
8914 else if ((outer == PLUS || outer == COMPARE)
8915 && INTVAL (x) < 256 && INTVAL (x) > -256)
8916 return 0;
8917 else if ((outer == IOR || outer == XOR || outer == AND)
8918 && INTVAL (x) < 256 && INTVAL (x) >= -256)
8919 return COSTS_N_INSNS (1);
8920 else if (outer == AND)
8922 int i;
8923 /* This duplicates the tests in the andsi3 expander. */
8924 for (i = 9; i <= 31; i++)
8925 if ((((HOST_WIDE_INT) 1) << i) - 1 == INTVAL (x)
8926 || (((HOST_WIDE_INT) 1) << i) - 1 == ~INTVAL (x))
8927 return COSTS_N_INSNS (2);
8929 else if (outer == ASHIFT || outer == ASHIFTRT
8930 || outer == LSHIFTRT)
8931 return 0;
8932 return COSTS_N_INSNS (2);
8934 case CONST:
8935 case CONST_DOUBLE:
8936 case LABEL_REF:
8937 case SYMBOL_REF:
8938 return COSTS_N_INSNS (3);
8940 case UDIV:
8941 case UMOD:
8942 case DIV:
8943 case MOD:
8944 return 100;
8946 case TRUNCATE:
8947 return 99;
8949 case AND:
8950 case XOR:
8951 case IOR:
8952 return COSTS_N_INSNS (1);
8954 case MEM:
8955 return (COSTS_N_INSNS (1)
8956 + COSTS_N_INSNS (1)
8957 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
8958 + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
8959 ? COSTS_N_INSNS (1) : 0));
8961 case IF_THEN_ELSE:
8962 /* XXX a guess. */
8963 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
8964 return 14;
8965 return 2;
8967 case ZERO_EXTEND:
8968 /* XXX still guessing. */
8969 switch (GET_MODE (XEXP (x, 0)))
8971 case QImode:
8972 return (1 + (mode == DImode ? 4 : 0)
8973 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
8975 case HImode:
8976 return (4 + (mode == DImode ? 4 : 0)
8977 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
8979 case SImode:
8980 return (1 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
8982 default:
8983 return 99;
8986 default:
8987 return 99;
8991 /* RTX costs when optimizing for size. */
8992 static bool
8993 arm_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
8994 int *total)
8996 machine_mode mode = GET_MODE (x);
8997 if (TARGET_THUMB1)
8999 *total = thumb1_size_rtx_costs (x, code, outer_code);
9000 return true;
9003 /* FIXME: This makes no attempt to prefer narrow Thumb-2 instructions. */
9004 switch (code)
9006 case MEM:
9007 /* A memory access costs 1 insn if the mode is small, or the address is
9008 a single register, otherwise it costs one insn per word. */
9009 if (REG_P (XEXP (x, 0)))
9010 *total = COSTS_N_INSNS (1);
9011 else if (flag_pic
9012 && GET_CODE (XEXP (x, 0)) == PLUS
9013 && will_be_in_index_register (XEXP (XEXP (x, 0), 1)))
9014 /* This will be split into two instructions.
9015 See arm.md:calculate_pic_address. */
9016 *total = COSTS_N_INSNS (2);
9017 else
9018 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9019 return true;
9021 case DIV:
9022 case MOD:
9023 case UDIV:
9024 case UMOD:
9025 /* Needs a libcall, so it costs about this. */
9026 *total = COSTS_N_INSNS (2);
9027 return false;
9029 case ROTATE:
9030 if (mode == SImode && REG_P (XEXP (x, 1)))
9032 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code, 0, false);
9033 return true;
9035 /* Fall through */
9036 case ROTATERT:
9037 case ASHIFT:
9038 case LSHIFTRT:
9039 case ASHIFTRT:
9040 if (mode == DImode && CONST_INT_P (XEXP (x, 1)))
9042 *total = COSTS_N_INSNS (3) + rtx_cost (XEXP (x, 0), code, 0, false);
9043 return true;
9045 else if (mode == SImode)
9047 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code, 0, false);
9048 /* Slightly disparage register shifts, but not by much. */
9049 if (!CONST_INT_P (XEXP (x, 1)))
9050 *total += 1 + rtx_cost (XEXP (x, 1), code, 1, false);
9051 return true;
9054 /* Needs a libcall. */
9055 *total = COSTS_N_INSNS (2);
9056 return false;
9058 case MINUS:
9059 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9060 && (mode == SFmode || !TARGET_VFP_SINGLE))
9062 *total = COSTS_N_INSNS (1);
9063 return false;
9066 if (mode == SImode)
9068 enum rtx_code subcode0 = GET_CODE (XEXP (x, 0));
9069 enum rtx_code subcode1 = GET_CODE (XEXP (x, 1));
9071 if (subcode0 == ROTATE || subcode0 == ROTATERT || subcode0 == ASHIFT
9072 || subcode0 == LSHIFTRT || subcode0 == ASHIFTRT
9073 || subcode1 == ROTATE || subcode1 == ROTATERT
9074 || subcode1 == ASHIFT || subcode1 == LSHIFTRT
9075 || subcode1 == ASHIFTRT)
9077 /* It's just the cost of the two operands. */
9078 *total = 0;
9079 return false;
9082 *total = COSTS_N_INSNS (1);
9083 return false;
9086 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9087 return false;
9089 case PLUS:
9090 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9091 && (mode == SFmode || !TARGET_VFP_SINGLE))
9093 *total = COSTS_N_INSNS (1);
9094 return false;
9097 /* A shift as a part of ADD costs nothing. */
9098 if (GET_CODE (XEXP (x, 0)) == MULT
9099 && power_of_two_operand (XEXP (XEXP (x, 0), 1), SImode))
9101 *total = COSTS_N_INSNS (TARGET_THUMB2 ? 2 : 1);
9102 *total += rtx_cost (XEXP (XEXP (x, 0), 0), code, 0, false);
9103 *total += rtx_cost (XEXP (x, 1), code, 1, false);
9104 return true;
9107 /* Fall through */
9108 case AND: case XOR: case IOR:
9109 if (mode == SImode)
9111 enum rtx_code subcode = GET_CODE (XEXP (x, 0));
9113 if (subcode == ROTATE || subcode == ROTATERT || subcode == ASHIFT
9114 || subcode == LSHIFTRT || subcode == ASHIFTRT
9115 || (code == AND && subcode == NOT))
9117 /* It's just the cost of the two operands. */
9118 *total = 0;
9119 return false;
9123 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9124 return false;
9126 case MULT:
9127 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9128 return false;
9130 case NEG:
9131 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9132 && (mode == SFmode || !TARGET_VFP_SINGLE))
9134 *total = COSTS_N_INSNS (1);
9135 return false;
9138 /* Fall through */
9139 case NOT:
9140 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9142 return false;
9144 case IF_THEN_ELSE:
9145 *total = 0;
9146 return false;
9148 case COMPARE:
9149 if (cc_register (XEXP (x, 0), VOIDmode))
9150 * total = 0;
9151 else
9152 *total = COSTS_N_INSNS (1);
9153 return false;
9155 case ABS:
9156 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9157 && (mode == SFmode || !TARGET_VFP_SINGLE))
9158 *total = COSTS_N_INSNS (1);
9159 else
9160 *total = COSTS_N_INSNS (1 + ARM_NUM_REGS (mode));
9161 return false;
9163 case SIGN_EXTEND:
9164 case ZERO_EXTEND:
9165 return arm_rtx_costs_1 (x, outer_code, total, 0);
9167 case CONST_INT:
9168 if (const_ok_for_arm (INTVAL (x)))
9169 /* A multiplication by a constant requires another instruction
9170 to load the constant to a register. */
9171 *total = COSTS_N_INSNS ((outer_code == SET || outer_code == MULT)
9172 ? 1 : 0);
9173 else if (const_ok_for_arm (~INTVAL (x)))
9174 *total = COSTS_N_INSNS (outer_code == AND ? 0 : 1);
9175 else if (const_ok_for_arm (-INTVAL (x)))
9177 if (outer_code == COMPARE || outer_code == PLUS
9178 || outer_code == MINUS)
9179 *total = 0;
9180 else
9181 *total = COSTS_N_INSNS (1);
9183 else
9184 *total = COSTS_N_INSNS (2);
9185 return true;
9187 case CONST:
9188 case LABEL_REF:
9189 case SYMBOL_REF:
9190 *total = COSTS_N_INSNS (2);
9191 return true;
9193 case CONST_DOUBLE:
9194 *total = COSTS_N_INSNS (4);
9195 return true;
9197 case CONST_VECTOR:
9198 if (TARGET_NEON
9199 && TARGET_HARD_FLOAT
9200 && outer_code == SET
9201 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
9202 && neon_immediate_valid_for_move (x, mode, NULL, NULL))
9203 *total = COSTS_N_INSNS (1);
9204 else
9205 *total = COSTS_N_INSNS (4);
9206 return true;
9208 case HIGH:
9209 case LO_SUM:
9210 /* We prefer constant pool entries to MOVW/MOVT pairs, so bump the
9211 cost of these slightly. */
9212 *total = COSTS_N_INSNS (1) + 1;
9213 return true;
9215 case SET:
9216 return false;
9218 default:
9219 if (mode != VOIDmode)
9220 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9221 else
9222 *total = COSTS_N_INSNS (4); /* How knows? */
9223 return false;
9227 /* Helper function for arm_rtx_costs. If the operand is a valid shift
9228 operand, then return the operand that is being shifted. If the shift
9229 is not by a constant, then set SHIFT_REG to point to the operand.
9230 Return NULL if OP is not a shifter operand. */
9231 static rtx
9232 shifter_op_p (rtx op, rtx *shift_reg)
9234 enum rtx_code code = GET_CODE (op);
9236 if (code == MULT && CONST_INT_P (XEXP (op, 1))
9237 && exact_log2 (INTVAL (XEXP (op, 1))) > 0)
9238 return XEXP (op, 0);
9239 else if (code == ROTATE && CONST_INT_P (XEXP (op, 1)))
9240 return XEXP (op, 0);
9241 else if (code == ROTATERT || code == ASHIFT || code == LSHIFTRT
9242 || code == ASHIFTRT)
9244 if (!CONST_INT_P (XEXP (op, 1)))
9245 *shift_reg = XEXP (op, 1);
9246 return XEXP (op, 0);
9249 return NULL;
9252 static bool
9253 arm_unspec_cost (rtx x, enum rtx_code /* outer_code */, bool speed_p, int *cost)
9255 const struct cpu_cost_table *extra_cost = current_tune->insn_extra_cost;
9256 rtx_code code = GET_CODE (x);
9257 gcc_assert (code == UNSPEC || code == UNSPEC_VOLATILE);
9259 switch (XINT (x, 1))
9261 case UNSPEC_UNALIGNED_LOAD:
9262 /* We can only do unaligned loads into the integer unit, and we can't
9263 use LDM or LDRD. */
9264 *cost = COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x)));
9265 if (speed_p)
9266 *cost += (ARM_NUM_REGS (GET_MODE (x)) * extra_cost->ldst.load
9267 + extra_cost->ldst.load_unaligned);
9269 #ifdef NOT_YET
9270 *cost += arm_address_cost (XEXP (XVECEXP (x, 0, 0), 0), GET_MODE (x),
9271 ADDR_SPACE_GENERIC, speed_p);
9272 #endif
9273 return true;
9275 case UNSPEC_UNALIGNED_STORE:
9276 *cost = COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x)));
9277 if (speed_p)
9278 *cost += (ARM_NUM_REGS (GET_MODE (x)) * extra_cost->ldst.store
9279 + extra_cost->ldst.store_unaligned);
9281 *cost += rtx_cost (XVECEXP (x, 0, 0), UNSPEC, 0, speed_p);
9282 #ifdef NOT_YET
9283 *cost += arm_address_cost (XEXP (XVECEXP (x, 0, 0), 0), GET_MODE (x),
9284 ADDR_SPACE_GENERIC, speed_p);
9285 #endif
9286 return true;
9288 case UNSPEC_VRINTZ:
9289 case UNSPEC_VRINTP:
9290 case UNSPEC_VRINTM:
9291 case UNSPEC_VRINTR:
9292 case UNSPEC_VRINTX:
9293 case UNSPEC_VRINTA:
9294 *cost = COSTS_N_INSNS (1);
9295 if (speed_p)
9296 *cost += extra_cost->fp[GET_MODE (x) == DFmode].roundint;
9298 return true;
9299 default:
9300 *cost = COSTS_N_INSNS (2);
9301 break;
9303 return true;
9306 /* Cost of a libcall. We assume one insn per argument, an amount for the
9307 call (one insn for -Os) and then one for processing the result. */
9308 #define LIBCALL_COST(N) COSTS_N_INSNS (N + (speed_p ? 18 : 2))
9310 #define HANDLE_NARROW_SHIFT_ARITH(OP, IDX) \
9311 do \
9313 shift_op = shifter_op_p (XEXP (x, IDX), &shift_reg); \
9314 if (shift_op != NULL \
9315 && arm_rtx_shift_left_p (XEXP (x, IDX))) \
9317 if (shift_reg) \
9319 if (speed_p) \
9320 *cost += extra_cost->alu.arith_shift_reg; \
9321 *cost += rtx_cost (shift_reg, ASHIFT, 1, speed_p); \
9323 else if (speed_p) \
9324 *cost += extra_cost->alu.arith_shift; \
9326 *cost += (rtx_cost (shift_op, ASHIFT, 0, speed_p) \
9327 + rtx_cost (XEXP (x, 1 - IDX), \
9328 OP, 1, speed_p)); \
9329 return true; \
9332 while (0);
9334 /* RTX costs. Make an estimate of the cost of executing the operation
9335 X, which is contained with an operation with code OUTER_CODE.
9336 SPEED_P indicates whether the cost desired is the performance cost,
9337 or the size cost. The estimate is stored in COST and the return
9338 value is TRUE if the cost calculation is final, or FALSE if the
9339 caller should recurse through the operands of X to add additional
9340 costs.
9342 We currently make no attempt to model the size savings of Thumb-2
9343 16-bit instructions. At the normal points in compilation where
9344 this code is called we have no measure of whether the condition
9345 flags are live or not, and thus no realistic way to determine what
9346 the size will eventually be. */
9347 static bool
9348 arm_new_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
9349 const struct cpu_cost_table *extra_cost,
9350 int *cost, bool speed_p)
9352 machine_mode mode = GET_MODE (x);
9354 if (TARGET_THUMB1)
9356 if (speed_p)
9357 *cost = thumb1_rtx_costs (x, code, outer_code);
9358 else
9359 *cost = thumb1_size_rtx_costs (x, code, outer_code);
9360 return true;
9363 switch (code)
9365 case SET:
9366 *cost = 0;
9367 /* SET RTXs don't have a mode so we get it from the destination. */
9368 mode = GET_MODE (SET_DEST (x));
9370 if (REG_P (SET_SRC (x))
9371 && REG_P (SET_DEST (x)))
9373 /* Assume that most copies can be done with a single insn,
9374 unless we don't have HW FP, in which case everything
9375 larger than word mode will require two insns. */
9376 *cost = COSTS_N_INSNS (((!TARGET_HARD_FLOAT
9377 && GET_MODE_SIZE (mode) > 4)
9378 || mode == DImode)
9379 ? 2 : 1);
9380 /* Conditional register moves can be encoded
9381 in 16 bits in Thumb mode. */
9382 if (!speed_p && TARGET_THUMB && outer_code == COND_EXEC)
9383 *cost >>= 1;
9385 return true;
9388 if (CONST_INT_P (SET_SRC (x)))
9390 /* Handle CONST_INT here, since the value doesn't have a mode
9391 and we would otherwise be unable to work out the true cost. */
9392 *cost = rtx_cost (SET_DEST (x), SET, 0, speed_p);
9393 outer_code = SET;
9394 /* Slightly lower the cost of setting a core reg to a constant.
9395 This helps break up chains and allows for better scheduling. */
9396 if (REG_P (SET_DEST (x))
9397 && REGNO (SET_DEST (x)) <= LR_REGNUM)
9398 *cost -= 1;
9399 x = SET_SRC (x);
9400 /* Immediate moves with an immediate in the range [0, 255] can be
9401 encoded in 16 bits in Thumb mode. */
9402 if (!speed_p && TARGET_THUMB && GET_MODE (x) == SImode
9403 && INTVAL (x) >= 0 && INTVAL (x) <=255)
9404 *cost >>= 1;
9405 goto const_int_cost;
9408 return false;
9410 case MEM:
9411 /* A memory access costs 1 insn if the mode is small, or the address is
9412 a single register, otherwise it costs one insn per word. */
9413 if (REG_P (XEXP (x, 0)))
9414 *cost = COSTS_N_INSNS (1);
9415 else if (flag_pic
9416 && GET_CODE (XEXP (x, 0)) == PLUS
9417 && will_be_in_index_register (XEXP (XEXP (x, 0), 1)))
9418 /* This will be split into two instructions.
9419 See arm.md:calculate_pic_address. */
9420 *cost = COSTS_N_INSNS (2);
9421 else
9422 *cost = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9424 /* For speed optimizations, add the costs of the address and
9425 accessing memory. */
9426 if (speed_p)
9427 #ifdef NOT_YET
9428 *cost += (extra_cost->ldst.load
9429 + arm_address_cost (XEXP (x, 0), mode,
9430 ADDR_SPACE_GENERIC, speed_p));
9431 #else
9432 *cost += extra_cost->ldst.load;
9433 #endif
9434 return true;
9436 case PARALLEL:
9438 /* Calculations of LDM costs are complex. We assume an initial cost
9439 (ldm_1st) which will load the number of registers mentioned in
9440 ldm_regs_per_insn_1st registers; then each additional
9441 ldm_regs_per_insn_subsequent registers cost one more insn. The
9442 formula for N regs is thus:
9444 ldm_1st + COSTS_N_INSNS ((max (N - ldm_regs_per_insn_1st, 0)
9445 + ldm_regs_per_insn_subsequent - 1)
9446 / ldm_regs_per_insn_subsequent).
9448 Additional costs may also be added for addressing. A similar
9449 formula is used for STM. */
9451 bool is_ldm = load_multiple_operation (x, SImode);
9452 bool is_stm = store_multiple_operation (x, SImode);
9454 *cost = COSTS_N_INSNS (1);
9456 if (is_ldm || is_stm)
9458 if (speed_p)
9460 HOST_WIDE_INT nregs = XVECLEN (x, 0);
9461 HOST_WIDE_INT regs_per_insn_1st = is_ldm
9462 ? extra_cost->ldst.ldm_regs_per_insn_1st
9463 : extra_cost->ldst.stm_regs_per_insn_1st;
9464 HOST_WIDE_INT regs_per_insn_sub = is_ldm
9465 ? extra_cost->ldst.ldm_regs_per_insn_subsequent
9466 : extra_cost->ldst.stm_regs_per_insn_subsequent;
9468 *cost += regs_per_insn_1st
9469 + COSTS_N_INSNS (((MAX (nregs - regs_per_insn_1st, 0))
9470 + regs_per_insn_sub - 1)
9471 / regs_per_insn_sub);
9472 return true;
9476 return false;
9478 case DIV:
9479 case UDIV:
9480 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9481 && (mode == SFmode || !TARGET_VFP_SINGLE))
9482 *cost = COSTS_N_INSNS (speed_p
9483 ? extra_cost->fp[mode != SFmode].div : 1);
9484 else if (mode == SImode && TARGET_IDIV)
9485 *cost = COSTS_N_INSNS (speed_p ? extra_cost->mult[0].idiv : 1);
9486 else
9487 *cost = LIBCALL_COST (2);
9488 return false; /* All arguments must be in registers. */
9490 case MOD:
9491 case UMOD:
9492 *cost = LIBCALL_COST (2);
9493 return false; /* All arguments must be in registers. */
9495 case ROTATE:
9496 if (mode == SImode && REG_P (XEXP (x, 1)))
9498 *cost = (COSTS_N_INSNS (2)
9499 + rtx_cost (XEXP (x, 0), code, 0, speed_p));
9500 if (speed_p)
9501 *cost += extra_cost->alu.shift_reg;
9502 return true;
9504 /* Fall through */
9505 case ROTATERT:
9506 case ASHIFT:
9507 case LSHIFTRT:
9508 case ASHIFTRT:
9509 if (mode == DImode && CONST_INT_P (XEXP (x, 1)))
9511 *cost = (COSTS_N_INSNS (3)
9512 + rtx_cost (XEXP (x, 0), code, 0, speed_p));
9513 if (speed_p)
9514 *cost += 2 * extra_cost->alu.shift;
9515 return true;
9517 else if (mode == SImode)
9519 *cost = (COSTS_N_INSNS (1)
9520 + rtx_cost (XEXP (x, 0), code, 0, speed_p));
9521 /* Slightly disparage register shifts at -Os, but not by much. */
9522 if (!CONST_INT_P (XEXP (x, 1)))
9523 *cost += (speed_p ? extra_cost->alu.shift_reg : 1
9524 + rtx_cost (XEXP (x, 1), code, 1, speed_p));
9525 return true;
9527 else if (GET_MODE_CLASS (mode) == MODE_INT
9528 && GET_MODE_SIZE (mode) < 4)
9530 if (code == ASHIFT)
9532 *cost = (COSTS_N_INSNS (1)
9533 + rtx_cost (XEXP (x, 0), code, 0, speed_p));
9534 /* Slightly disparage register shifts at -Os, but not by
9535 much. */
9536 if (!CONST_INT_P (XEXP (x, 1)))
9537 *cost += (speed_p ? extra_cost->alu.shift_reg : 1
9538 + rtx_cost (XEXP (x, 1), code, 1, speed_p));
9540 else if (code == LSHIFTRT || code == ASHIFTRT)
9542 if (arm_arch_thumb2 && CONST_INT_P (XEXP (x, 1)))
9544 /* Can use SBFX/UBFX. */
9545 *cost = COSTS_N_INSNS (1);
9546 if (speed_p)
9547 *cost += extra_cost->alu.bfx;
9548 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
9550 else
9552 *cost = COSTS_N_INSNS (2);
9553 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
9554 if (speed_p)
9556 if (CONST_INT_P (XEXP (x, 1)))
9557 *cost += 2 * extra_cost->alu.shift;
9558 else
9559 *cost += (extra_cost->alu.shift
9560 + extra_cost->alu.shift_reg);
9562 else
9563 /* Slightly disparage register shifts. */
9564 *cost += !CONST_INT_P (XEXP (x, 1));
9567 else /* Rotates. */
9569 *cost = COSTS_N_INSNS (3 + !CONST_INT_P (XEXP (x, 1)));
9570 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
9571 if (speed_p)
9573 if (CONST_INT_P (XEXP (x, 1)))
9574 *cost += (2 * extra_cost->alu.shift
9575 + extra_cost->alu.log_shift);
9576 else
9577 *cost += (extra_cost->alu.shift
9578 + extra_cost->alu.shift_reg
9579 + extra_cost->alu.log_shift_reg);
9582 return true;
9585 *cost = LIBCALL_COST (2);
9586 return false;
9588 case BSWAP:
9589 if (arm_arch6)
9591 if (mode == SImode)
9593 *cost = COSTS_N_INSNS (1);
9594 if (speed_p)
9595 *cost += extra_cost->alu.rev;
9597 return false;
9600 else
9602 /* No rev instruction available. Look at arm_legacy_rev
9603 and thumb_legacy_rev for the form of RTL used then. */
9604 if (TARGET_THUMB)
9606 *cost = COSTS_N_INSNS (10);
9608 if (speed_p)
9610 *cost += 6 * extra_cost->alu.shift;
9611 *cost += 3 * extra_cost->alu.logical;
9614 else
9616 *cost = COSTS_N_INSNS (5);
9618 if (speed_p)
9620 *cost += 2 * extra_cost->alu.shift;
9621 *cost += extra_cost->alu.arith_shift;
9622 *cost += 2 * extra_cost->alu.logical;
9625 return true;
9627 return false;
9629 case MINUS:
9630 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9631 && (mode == SFmode || !TARGET_VFP_SINGLE))
9633 *cost = COSTS_N_INSNS (1);
9634 if (GET_CODE (XEXP (x, 0)) == MULT
9635 || GET_CODE (XEXP (x, 1)) == MULT)
9637 rtx mul_op0, mul_op1, sub_op;
9639 if (speed_p)
9640 *cost += extra_cost->fp[mode != SFmode].mult_addsub;
9642 if (GET_CODE (XEXP (x, 0)) == MULT)
9644 mul_op0 = XEXP (XEXP (x, 0), 0);
9645 mul_op1 = XEXP (XEXP (x, 0), 1);
9646 sub_op = XEXP (x, 1);
9648 else
9650 mul_op0 = XEXP (XEXP (x, 1), 0);
9651 mul_op1 = XEXP (XEXP (x, 1), 1);
9652 sub_op = XEXP (x, 0);
9655 /* The first operand of the multiply may be optionally
9656 negated. */
9657 if (GET_CODE (mul_op0) == NEG)
9658 mul_op0 = XEXP (mul_op0, 0);
9660 *cost += (rtx_cost (mul_op0, code, 0, speed_p)
9661 + rtx_cost (mul_op1, code, 0, speed_p)
9662 + rtx_cost (sub_op, code, 0, speed_p));
9664 return true;
9667 if (speed_p)
9668 *cost += extra_cost->fp[mode != SFmode].addsub;
9669 return false;
9672 if (mode == SImode)
9674 rtx shift_by_reg = NULL;
9675 rtx shift_op;
9676 rtx non_shift_op;
9678 *cost = COSTS_N_INSNS (1);
9680 shift_op = shifter_op_p (XEXP (x, 0), &shift_by_reg);
9681 if (shift_op == NULL)
9683 shift_op = shifter_op_p (XEXP (x, 1), &shift_by_reg);
9684 non_shift_op = XEXP (x, 0);
9686 else
9687 non_shift_op = XEXP (x, 1);
9689 if (shift_op != NULL)
9691 if (shift_by_reg != NULL)
9693 if (speed_p)
9694 *cost += extra_cost->alu.arith_shift_reg;
9695 *cost += rtx_cost (shift_by_reg, code, 0, speed_p);
9697 else if (speed_p)
9698 *cost += extra_cost->alu.arith_shift;
9700 *cost += (rtx_cost (shift_op, code, 0, speed_p)
9701 + rtx_cost (non_shift_op, code, 0, speed_p));
9702 return true;
9705 if (arm_arch_thumb2
9706 && GET_CODE (XEXP (x, 1)) == MULT)
9708 /* MLS. */
9709 if (speed_p)
9710 *cost += extra_cost->mult[0].add;
9711 *cost += (rtx_cost (XEXP (x, 0), MINUS, 0, speed_p)
9712 + rtx_cost (XEXP (XEXP (x, 1), 0), MULT, 0, speed_p)
9713 + rtx_cost (XEXP (XEXP (x, 1), 1), MULT, 1, speed_p));
9714 return true;
9717 if (CONST_INT_P (XEXP (x, 0)))
9719 int insns = arm_gen_constant (MINUS, SImode, NULL_RTX,
9720 INTVAL (XEXP (x, 0)), NULL_RTX,
9721 NULL_RTX, 1, 0);
9722 *cost = COSTS_N_INSNS (insns);
9723 if (speed_p)
9724 *cost += insns * extra_cost->alu.arith;
9725 *cost += rtx_cost (XEXP (x, 1), code, 1, speed_p);
9726 return true;
9728 else if (speed_p)
9729 *cost += extra_cost->alu.arith;
9731 return false;
9734 if (GET_MODE_CLASS (mode) == MODE_INT
9735 && GET_MODE_SIZE (mode) < 4)
9737 rtx shift_op, shift_reg;
9738 shift_reg = NULL;
9740 /* We check both sides of the MINUS for shifter operands since,
9741 unlike PLUS, it's not commutative. */
9743 HANDLE_NARROW_SHIFT_ARITH (MINUS, 0)
9744 HANDLE_NARROW_SHIFT_ARITH (MINUS, 1)
9746 /* Slightly disparage, as we might need to widen the result. */
9747 *cost = 1 + COSTS_N_INSNS (1);
9748 if (speed_p)
9749 *cost += extra_cost->alu.arith;
9751 if (CONST_INT_P (XEXP (x, 0)))
9753 *cost += rtx_cost (XEXP (x, 1), code, 1, speed_p);
9754 return true;
9757 return false;
9760 if (mode == DImode)
9762 *cost = COSTS_N_INSNS (2);
9764 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
9766 rtx op1 = XEXP (x, 1);
9768 if (speed_p)
9769 *cost += 2 * extra_cost->alu.arith;
9771 if (GET_CODE (op1) == ZERO_EXTEND)
9772 *cost += rtx_cost (XEXP (op1, 0), ZERO_EXTEND, 0, speed_p);
9773 else
9774 *cost += rtx_cost (op1, MINUS, 1, speed_p);
9775 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), ZERO_EXTEND,
9776 0, speed_p);
9777 return true;
9779 else if (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
9781 if (speed_p)
9782 *cost += extra_cost->alu.arith + extra_cost->alu.arith_shift;
9783 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), SIGN_EXTEND,
9784 0, speed_p)
9785 + rtx_cost (XEXP (x, 1), MINUS, 1, speed_p));
9786 return true;
9788 else if (GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
9789 || GET_CODE (XEXP (x, 1)) == SIGN_EXTEND)
9791 if (speed_p)
9792 *cost += (extra_cost->alu.arith
9793 + (GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
9794 ? extra_cost->alu.arith
9795 : extra_cost->alu.arith_shift));
9796 *cost += (rtx_cost (XEXP (x, 0), MINUS, 0, speed_p)
9797 + rtx_cost (XEXP (XEXP (x, 1), 0),
9798 GET_CODE (XEXP (x, 1)), 0, speed_p));
9799 return true;
9802 if (speed_p)
9803 *cost += 2 * extra_cost->alu.arith;
9804 return false;
9807 /* Vector mode? */
9809 *cost = LIBCALL_COST (2);
9810 return false;
9812 case PLUS:
9813 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9814 && (mode == SFmode || !TARGET_VFP_SINGLE))
9816 *cost = COSTS_N_INSNS (1);
9817 if (GET_CODE (XEXP (x, 0)) == MULT)
9819 rtx mul_op0, mul_op1, add_op;
9821 if (speed_p)
9822 *cost += extra_cost->fp[mode != SFmode].mult_addsub;
9824 mul_op0 = XEXP (XEXP (x, 0), 0);
9825 mul_op1 = XEXP (XEXP (x, 0), 1);
9826 add_op = XEXP (x, 1);
9828 *cost += (rtx_cost (mul_op0, code, 0, speed_p)
9829 + rtx_cost (mul_op1, code, 0, speed_p)
9830 + rtx_cost (add_op, code, 0, speed_p));
9832 return true;
9835 if (speed_p)
9836 *cost += extra_cost->fp[mode != SFmode].addsub;
9837 return false;
9839 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
9841 *cost = LIBCALL_COST (2);
9842 return false;
9845 /* Narrow modes can be synthesized in SImode, but the range
9846 of useful sub-operations is limited. Check for shift operations
9847 on one of the operands. Only left shifts can be used in the
9848 narrow modes. */
9849 if (GET_MODE_CLASS (mode) == MODE_INT
9850 && GET_MODE_SIZE (mode) < 4)
9852 rtx shift_op, shift_reg;
9853 shift_reg = NULL;
9855 HANDLE_NARROW_SHIFT_ARITH (PLUS, 0)
9857 if (CONST_INT_P (XEXP (x, 1)))
9859 int insns = arm_gen_constant (PLUS, SImode, NULL_RTX,
9860 INTVAL (XEXP (x, 1)), NULL_RTX,
9861 NULL_RTX, 1, 0);
9862 *cost = COSTS_N_INSNS (insns);
9863 if (speed_p)
9864 *cost += insns * extra_cost->alu.arith;
9865 /* Slightly penalize a narrow operation as the result may
9866 need widening. */
9867 *cost += 1 + rtx_cost (XEXP (x, 0), PLUS, 0, speed_p);
9868 return true;
9871 /* Slightly penalize a narrow operation as the result may
9872 need widening. */
9873 *cost = 1 + COSTS_N_INSNS (1);
9874 if (speed_p)
9875 *cost += extra_cost->alu.arith;
9877 return false;
9880 if (mode == SImode)
9882 rtx shift_op, shift_reg;
9884 *cost = COSTS_N_INSNS (1);
9885 if (TARGET_INT_SIMD
9886 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
9887 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
9889 /* UXTA[BH] or SXTA[BH]. */
9890 if (speed_p)
9891 *cost += extra_cost->alu.extend_arith;
9892 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), ZERO_EXTEND, 0,
9893 speed_p)
9894 + rtx_cost (XEXP (x, 1), PLUS, 0, speed_p));
9895 return true;
9898 shift_reg = NULL;
9899 shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
9900 if (shift_op != NULL)
9902 if (shift_reg)
9904 if (speed_p)
9905 *cost += extra_cost->alu.arith_shift_reg;
9906 *cost += rtx_cost (shift_reg, ASHIFT, 1, speed_p);
9908 else if (speed_p)
9909 *cost += extra_cost->alu.arith_shift;
9911 *cost += (rtx_cost (shift_op, ASHIFT, 0, speed_p)
9912 + rtx_cost (XEXP (x, 1), PLUS, 1, speed_p));
9913 return true;
9915 if (GET_CODE (XEXP (x, 0)) == MULT)
9917 rtx mul_op = XEXP (x, 0);
9919 *cost = COSTS_N_INSNS (1);
9921 if (TARGET_DSP_MULTIPLY
9922 && ((GET_CODE (XEXP (mul_op, 0)) == SIGN_EXTEND
9923 && (GET_CODE (XEXP (mul_op, 1)) == SIGN_EXTEND
9924 || (GET_CODE (XEXP (mul_op, 1)) == ASHIFTRT
9925 && CONST_INT_P (XEXP (XEXP (mul_op, 1), 1))
9926 && INTVAL (XEXP (XEXP (mul_op, 1), 1)) == 16)))
9927 || (GET_CODE (XEXP (mul_op, 0)) == ASHIFTRT
9928 && CONST_INT_P (XEXP (XEXP (mul_op, 0), 1))
9929 && INTVAL (XEXP (XEXP (mul_op, 0), 1)) == 16
9930 && (GET_CODE (XEXP (mul_op, 1)) == SIGN_EXTEND
9931 || (GET_CODE (XEXP (mul_op, 1)) == ASHIFTRT
9932 && CONST_INT_P (XEXP (XEXP (mul_op, 1), 1))
9933 && (INTVAL (XEXP (XEXP (mul_op, 1), 1))
9934 == 16))))))
9936 /* SMLA[BT][BT]. */
9937 if (speed_p)
9938 *cost += extra_cost->mult[0].extend_add;
9939 *cost += (rtx_cost (XEXP (XEXP (mul_op, 0), 0),
9940 SIGN_EXTEND, 0, speed_p)
9941 + rtx_cost (XEXP (XEXP (mul_op, 1), 0),
9942 SIGN_EXTEND, 0, speed_p)
9943 + rtx_cost (XEXP (x, 1), PLUS, 1, speed_p));
9944 return true;
9947 if (speed_p)
9948 *cost += extra_cost->mult[0].add;
9949 *cost += (rtx_cost (XEXP (mul_op, 0), MULT, 0, speed_p)
9950 + rtx_cost (XEXP (mul_op, 1), MULT, 1, speed_p)
9951 + rtx_cost (XEXP (x, 1), PLUS, 1, speed_p));
9952 return true;
9954 if (CONST_INT_P (XEXP (x, 1)))
9956 int insns = arm_gen_constant (PLUS, SImode, NULL_RTX,
9957 INTVAL (XEXP (x, 1)), NULL_RTX,
9958 NULL_RTX, 1, 0);
9959 *cost = COSTS_N_INSNS (insns);
9960 if (speed_p)
9961 *cost += insns * extra_cost->alu.arith;
9962 *cost += rtx_cost (XEXP (x, 0), PLUS, 0, speed_p);
9963 return true;
9965 else if (speed_p)
9966 *cost += extra_cost->alu.arith;
9968 return false;
9971 if (mode == DImode)
9973 if (arm_arch3m
9974 && GET_CODE (XEXP (x, 0)) == MULT
9975 && ((GET_CODE (XEXP (XEXP (x, 0), 0)) == ZERO_EXTEND
9976 && GET_CODE (XEXP (XEXP (x, 0), 1)) == ZERO_EXTEND)
9977 || (GET_CODE (XEXP (XEXP (x, 0), 0)) == SIGN_EXTEND
9978 && GET_CODE (XEXP (XEXP (x, 0), 1)) == SIGN_EXTEND)))
9980 *cost = COSTS_N_INSNS (1);
9981 if (speed_p)
9982 *cost += extra_cost->mult[1].extend_add;
9983 *cost += (rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
9984 ZERO_EXTEND, 0, speed_p)
9985 + rtx_cost (XEXP (XEXP (XEXP (x, 0), 1), 0),
9986 ZERO_EXTEND, 0, speed_p)
9987 + rtx_cost (XEXP (x, 1), PLUS, 1, speed_p));
9988 return true;
9991 *cost = COSTS_N_INSNS (2);
9993 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
9994 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
9996 if (speed_p)
9997 *cost += (extra_cost->alu.arith
9998 + (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
9999 ? extra_cost->alu.arith
10000 : extra_cost->alu.arith_shift));
10002 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), ZERO_EXTEND, 0,
10003 speed_p)
10004 + rtx_cost (XEXP (x, 1), PLUS, 1, speed_p));
10005 return true;
10008 if (speed_p)
10009 *cost += 2 * extra_cost->alu.arith;
10010 return false;
10013 /* Vector mode? */
10014 *cost = LIBCALL_COST (2);
10015 return false;
10016 case IOR:
10017 if (mode == SImode && arm_arch6 && aarch_rev16_p (x))
10019 *cost = COSTS_N_INSNS (1);
10020 if (speed_p)
10021 *cost += extra_cost->alu.rev;
10023 return true;
10025 /* Fall through. */
10026 case AND: case XOR:
10027 if (mode == SImode)
10029 enum rtx_code subcode = GET_CODE (XEXP (x, 0));
10030 rtx op0 = XEXP (x, 0);
10031 rtx shift_op, shift_reg;
10033 *cost = COSTS_N_INSNS (1);
10035 if (subcode == NOT
10036 && (code == AND
10037 || (code == IOR && TARGET_THUMB2)))
10038 op0 = XEXP (op0, 0);
10040 shift_reg = NULL;
10041 shift_op = shifter_op_p (op0, &shift_reg);
10042 if (shift_op != NULL)
10044 if (shift_reg)
10046 if (speed_p)
10047 *cost += extra_cost->alu.log_shift_reg;
10048 *cost += rtx_cost (shift_reg, ASHIFT, 1, speed_p);
10050 else if (speed_p)
10051 *cost += extra_cost->alu.log_shift;
10053 *cost += (rtx_cost (shift_op, ASHIFT, 0, speed_p)
10054 + rtx_cost (XEXP (x, 1), code, 1, speed_p));
10055 return true;
10058 if (CONST_INT_P (XEXP (x, 1)))
10060 int insns = arm_gen_constant (code, SImode, NULL_RTX,
10061 INTVAL (XEXP (x, 1)), NULL_RTX,
10062 NULL_RTX, 1, 0);
10064 *cost = COSTS_N_INSNS (insns);
10065 if (speed_p)
10066 *cost += insns * extra_cost->alu.logical;
10067 *cost += rtx_cost (op0, code, 0, speed_p);
10068 return true;
10071 if (speed_p)
10072 *cost += extra_cost->alu.logical;
10073 *cost += (rtx_cost (op0, code, 0, speed_p)
10074 + rtx_cost (XEXP (x, 1), code, 1, speed_p));
10075 return true;
10078 if (mode == DImode)
10080 rtx op0 = XEXP (x, 0);
10081 enum rtx_code subcode = GET_CODE (op0);
10083 *cost = COSTS_N_INSNS (2);
10085 if (subcode == NOT
10086 && (code == AND
10087 || (code == IOR && TARGET_THUMB2)))
10088 op0 = XEXP (op0, 0);
10090 if (GET_CODE (op0) == ZERO_EXTEND)
10092 if (speed_p)
10093 *cost += 2 * extra_cost->alu.logical;
10095 *cost += (rtx_cost (XEXP (op0, 0), ZERO_EXTEND, 0, speed_p)
10096 + rtx_cost (XEXP (x, 1), code, 0, speed_p));
10097 return true;
10099 else if (GET_CODE (op0) == SIGN_EXTEND)
10101 if (speed_p)
10102 *cost += extra_cost->alu.logical + extra_cost->alu.log_shift;
10104 *cost += (rtx_cost (XEXP (op0, 0), SIGN_EXTEND, 0, speed_p)
10105 + rtx_cost (XEXP (x, 1), code, 0, speed_p));
10106 return true;
10109 if (speed_p)
10110 *cost += 2 * extra_cost->alu.logical;
10112 return true;
10114 /* Vector mode? */
10116 *cost = LIBCALL_COST (2);
10117 return false;
10119 case MULT:
10120 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10121 && (mode == SFmode || !TARGET_VFP_SINGLE))
10123 rtx op0 = XEXP (x, 0);
10125 *cost = COSTS_N_INSNS (1);
10127 if (GET_CODE (op0) == NEG)
10128 op0 = XEXP (op0, 0);
10130 if (speed_p)
10131 *cost += extra_cost->fp[mode != SFmode].mult;
10133 *cost += (rtx_cost (op0, MULT, 0, speed_p)
10134 + rtx_cost (XEXP (x, 1), MULT, 1, speed_p));
10135 return true;
10137 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10139 *cost = LIBCALL_COST (2);
10140 return false;
10143 if (mode == SImode)
10145 *cost = COSTS_N_INSNS (1);
10146 if (TARGET_DSP_MULTIPLY
10147 && ((GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
10148 && (GET_CODE (XEXP (x, 1)) == SIGN_EXTEND
10149 || (GET_CODE (XEXP (x, 1)) == ASHIFTRT
10150 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
10151 && INTVAL (XEXP (XEXP (x, 1), 1)) == 16)))
10152 || (GET_CODE (XEXP (x, 0)) == ASHIFTRT
10153 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
10154 && INTVAL (XEXP (XEXP (x, 0), 1)) == 16
10155 && (GET_CODE (XEXP (x, 1)) == SIGN_EXTEND
10156 || (GET_CODE (XEXP (x, 1)) == ASHIFTRT
10157 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
10158 && (INTVAL (XEXP (XEXP (x, 1), 1))
10159 == 16))))))
10161 /* SMUL[TB][TB]. */
10162 if (speed_p)
10163 *cost += extra_cost->mult[0].extend;
10164 *cost += (rtx_cost (XEXP (x, 0), SIGN_EXTEND, 0, speed_p)
10165 + rtx_cost (XEXP (x, 1), SIGN_EXTEND, 0, speed_p));
10166 return true;
10168 if (speed_p)
10169 *cost += extra_cost->mult[0].simple;
10170 return false;
10173 if (mode == DImode)
10175 if (arm_arch3m
10176 && ((GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
10177 && GET_CODE (XEXP (x, 1)) == ZERO_EXTEND)
10178 || (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
10179 && GET_CODE (XEXP (x, 1)) == SIGN_EXTEND)))
10181 *cost = COSTS_N_INSNS (1);
10182 if (speed_p)
10183 *cost += extra_cost->mult[1].extend;
10184 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0),
10185 ZERO_EXTEND, 0, speed_p)
10186 + rtx_cost (XEXP (XEXP (x, 1), 0),
10187 ZERO_EXTEND, 0, speed_p));
10188 return true;
10191 *cost = LIBCALL_COST (2);
10192 return false;
10195 /* Vector mode? */
10196 *cost = LIBCALL_COST (2);
10197 return false;
10199 case NEG:
10200 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10201 && (mode == SFmode || !TARGET_VFP_SINGLE))
10203 *cost = COSTS_N_INSNS (1);
10204 if (speed_p)
10205 *cost += extra_cost->fp[mode != SFmode].neg;
10207 return false;
10209 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10211 *cost = LIBCALL_COST (1);
10212 return false;
10215 if (mode == SImode)
10217 if (GET_CODE (XEXP (x, 0)) == ABS)
10219 *cost = COSTS_N_INSNS (2);
10220 /* Assume the non-flag-changing variant. */
10221 if (speed_p)
10222 *cost += (extra_cost->alu.log_shift
10223 + extra_cost->alu.arith_shift);
10224 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), ABS, 0, speed_p);
10225 return true;
10228 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMPARE
10229 || GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMM_COMPARE)
10231 *cost = COSTS_N_INSNS (2);
10232 /* No extra cost for MOV imm and MVN imm. */
10233 /* If the comparison op is using the flags, there's no further
10234 cost, otherwise we need to add the cost of the comparison. */
10235 if (!(REG_P (XEXP (XEXP (x, 0), 0))
10236 && REGNO (XEXP (XEXP (x, 0), 0)) == CC_REGNUM
10237 && XEXP (XEXP (x, 0), 1) == const0_rtx))
10239 *cost += (COSTS_N_INSNS (1)
10240 + rtx_cost (XEXP (XEXP (x, 0), 0), COMPARE, 0,
10241 speed_p)
10242 + rtx_cost (XEXP (XEXP (x, 0), 1), COMPARE, 1,
10243 speed_p));
10244 if (speed_p)
10245 *cost += extra_cost->alu.arith;
10247 return true;
10249 *cost = COSTS_N_INSNS (1);
10250 if (speed_p)
10251 *cost += extra_cost->alu.arith;
10252 return false;
10255 if (GET_MODE_CLASS (mode) == MODE_INT
10256 && GET_MODE_SIZE (mode) < 4)
10258 /* Slightly disparage, as we might need an extend operation. */
10259 *cost = 1 + COSTS_N_INSNS (1);
10260 if (speed_p)
10261 *cost += extra_cost->alu.arith;
10262 return false;
10265 if (mode == DImode)
10267 *cost = COSTS_N_INSNS (2);
10268 if (speed_p)
10269 *cost += 2 * extra_cost->alu.arith;
10270 return false;
10273 /* Vector mode? */
10274 *cost = LIBCALL_COST (1);
10275 return false;
10277 case NOT:
10278 if (mode == SImode)
10280 rtx shift_op;
10281 rtx shift_reg = NULL;
10283 *cost = COSTS_N_INSNS (1);
10284 shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
10286 if (shift_op)
10288 if (shift_reg != NULL)
10290 if (speed_p)
10291 *cost += extra_cost->alu.log_shift_reg;
10292 *cost += rtx_cost (shift_reg, ASHIFT, 1, speed_p);
10294 else if (speed_p)
10295 *cost += extra_cost->alu.log_shift;
10296 *cost += rtx_cost (shift_op, ASHIFT, 0, speed_p);
10297 return true;
10300 if (speed_p)
10301 *cost += extra_cost->alu.logical;
10302 return false;
10304 if (mode == DImode)
10306 *cost = COSTS_N_INSNS (2);
10307 return false;
10310 /* Vector mode? */
10312 *cost += LIBCALL_COST (1);
10313 return false;
10315 case IF_THEN_ELSE:
10317 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
10319 *cost = COSTS_N_INSNS (4);
10320 return true;
10322 int op1cost = rtx_cost (XEXP (x, 1), SET, 1, speed_p);
10323 int op2cost = rtx_cost (XEXP (x, 2), SET, 1, speed_p);
10325 *cost = rtx_cost (XEXP (x, 0), IF_THEN_ELSE, 0, speed_p);
10326 /* Assume that if one arm of the if_then_else is a register,
10327 that it will be tied with the result and eliminate the
10328 conditional insn. */
10329 if (REG_P (XEXP (x, 1)))
10330 *cost += op2cost;
10331 else if (REG_P (XEXP (x, 2)))
10332 *cost += op1cost;
10333 else
10335 if (speed_p)
10337 if (extra_cost->alu.non_exec_costs_exec)
10338 *cost += op1cost + op2cost + extra_cost->alu.non_exec;
10339 else
10340 *cost += MAX (op1cost, op2cost) + extra_cost->alu.non_exec;
10342 else
10343 *cost += op1cost + op2cost;
10346 return true;
10348 case COMPARE:
10349 if (cc_register (XEXP (x, 0), VOIDmode) && XEXP (x, 1) == const0_rtx)
10350 *cost = 0;
10351 else
10353 machine_mode op0mode;
10354 /* We'll mostly assume that the cost of a compare is the cost of the
10355 LHS. However, there are some notable exceptions. */
10357 /* Floating point compares are never done as side-effects. */
10358 op0mode = GET_MODE (XEXP (x, 0));
10359 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (op0mode) == MODE_FLOAT
10360 && (op0mode == SFmode || !TARGET_VFP_SINGLE))
10362 *cost = COSTS_N_INSNS (1);
10363 if (speed_p)
10364 *cost += extra_cost->fp[op0mode != SFmode].compare;
10366 if (XEXP (x, 1) == CONST0_RTX (op0mode))
10368 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10369 return true;
10372 return false;
10374 else if (GET_MODE_CLASS (op0mode) == MODE_FLOAT)
10376 *cost = LIBCALL_COST (2);
10377 return false;
10380 /* DImode compares normally take two insns. */
10381 if (op0mode == DImode)
10383 *cost = COSTS_N_INSNS (2);
10384 if (speed_p)
10385 *cost += 2 * extra_cost->alu.arith;
10386 return false;
10389 if (op0mode == SImode)
10391 rtx shift_op;
10392 rtx shift_reg;
10394 if (XEXP (x, 1) == const0_rtx
10395 && !(REG_P (XEXP (x, 0))
10396 || (GET_CODE (XEXP (x, 0)) == SUBREG
10397 && REG_P (SUBREG_REG (XEXP (x, 0))))))
10399 *cost = rtx_cost (XEXP (x, 0), COMPARE, 0, speed_p);
10401 /* Multiply operations that set the flags are often
10402 significantly more expensive. */
10403 if (speed_p
10404 && GET_CODE (XEXP (x, 0)) == MULT
10405 && !power_of_two_operand (XEXP (XEXP (x, 0), 1), mode))
10406 *cost += extra_cost->mult[0].flag_setting;
10408 if (speed_p
10409 && GET_CODE (XEXP (x, 0)) == PLUS
10410 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
10411 && !power_of_two_operand (XEXP (XEXP (XEXP (x, 0),
10412 0), 1), mode))
10413 *cost += extra_cost->mult[0].flag_setting;
10414 return true;
10417 shift_reg = NULL;
10418 shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
10419 if (shift_op != NULL)
10421 *cost = COSTS_N_INSNS (1);
10422 if (shift_reg != NULL)
10424 *cost += rtx_cost (shift_reg, ASHIFT, 1, speed_p);
10425 if (speed_p)
10426 *cost += extra_cost->alu.arith_shift_reg;
10428 else if (speed_p)
10429 *cost += extra_cost->alu.arith_shift;
10430 *cost += (rtx_cost (shift_op, ASHIFT, 0, speed_p)
10431 + rtx_cost (XEXP (x, 1), COMPARE, 1, speed_p));
10432 return true;
10435 *cost = COSTS_N_INSNS (1);
10436 if (speed_p)
10437 *cost += extra_cost->alu.arith;
10438 if (CONST_INT_P (XEXP (x, 1))
10439 && const_ok_for_op (INTVAL (XEXP (x, 1)), COMPARE))
10441 *cost += rtx_cost (XEXP (x, 0), COMPARE, 0, speed_p);
10442 return true;
10444 return false;
10447 /* Vector mode? */
10449 *cost = LIBCALL_COST (2);
10450 return false;
10452 return true;
10454 case EQ:
10455 case NE:
10456 case LT:
10457 case LE:
10458 case GT:
10459 case GE:
10460 case LTU:
10461 case LEU:
10462 case GEU:
10463 case GTU:
10464 case ORDERED:
10465 case UNORDERED:
10466 case UNEQ:
10467 case UNLE:
10468 case UNLT:
10469 case UNGE:
10470 case UNGT:
10471 case LTGT:
10472 if (outer_code == SET)
10474 /* Is it a store-flag operation? */
10475 if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM
10476 && XEXP (x, 1) == const0_rtx)
10478 /* Thumb also needs an IT insn. */
10479 *cost = COSTS_N_INSNS (TARGET_THUMB ? 3 : 2);
10480 return true;
10482 if (XEXP (x, 1) == const0_rtx)
10484 switch (code)
10486 case LT:
10487 /* LSR Rd, Rn, #31. */
10488 *cost = COSTS_N_INSNS (1);
10489 if (speed_p)
10490 *cost += extra_cost->alu.shift;
10491 break;
10493 case EQ:
10494 /* RSBS T1, Rn, #0
10495 ADC Rd, Rn, T1. */
10497 case NE:
10498 /* SUBS T1, Rn, #1
10499 SBC Rd, Rn, T1. */
10500 *cost = COSTS_N_INSNS (2);
10501 break;
10503 case LE:
10504 /* RSBS T1, Rn, Rn, LSR #31
10505 ADC Rd, Rn, T1. */
10506 *cost = COSTS_N_INSNS (2);
10507 if (speed_p)
10508 *cost += extra_cost->alu.arith_shift;
10509 break;
10511 case GT:
10512 /* RSB Rd, Rn, Rn, ASR #1
10513 LSR Rd, Rd, #31. */
10514 *cost = COSTS_N_INSNS (2);
10515 if (speed_p)
10516 *cost += (extra_cost->alu.arith_shift
10517 + extra_cost->alu.shift);
10518 break;
10520 case GE:
10521 /* ASR Rd, Rn, #31
10522 ADD Rd, Rn, #1. */
10523 *cost = COSTS_N_INSNS (2);
10524 if (speed_p)
10525 *cost += extra_cost->alu.shift;
10526 break;
10528 default:
10529 /* Remaining cases are either meaningless or would take
10530 three insns anyway. */
10531 *cost = COSTS_N_INSNS (3);
10532 break;
10534 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10535 return true;
10537 else
10539 *cost = COSTS_N_INSNS (TARGET_THUMB ? 4 : 3);
10540 if (CONST_INT_P (XEXP (x, 1))
10541 && const_ok_for_op (INTVAL (XEXP (x, 1)), COMPARE))
10543 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10544 return true;
10547 return false;
10550 /* Not directly inside a set. If it involves the condition code
10551 register it must be the condition for a branch, cond_exec or
10552 I_T_E operation. Since the comparison is performed elsewhere
10553 this is just the control part which has no additional
10554 cost. */
10555 else if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM
10556 && XEXP (x, 1) == const0_rtx)
10558 *cost = 0;
10559 return true;
10561 return false;
10563 case ABS:
10564 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10565 && (mode == SFmode || !TARGET_VFP_SINGLE))
10567 *cost = COSTS_N_INSNS (1);
10568 if (speed_p)
10569 *cost += extra_cost->fp[mode != SFmode].neg;
10571 return false;
10573 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10575 *cost = LIBCALL_COST (1);
10576 return false;
10579 if (mode == SImode)
10581 *cost = COSTS_N_INSNS (1);
10582 if (speed_p)
10583 *cost += extra_cost->alu.log_shift + extra_cost->alu.arith_shift;
10584 return false;
10586 /* Vector mode? */
10587 *cost = LIBCALL_COST (1);
10588 return false;
10590 case SIGN_EXTEND:
10591 if ((arm_arch4 || GET_MODE (XEXP (x, 0)) == SImode)
10592 && MEM_P (XEXP (x, 0)))
10594 *cost = rtx_cost (XEXP (x, 0), code, 0, speed_p);
10596 if (mode == DImode)
10597 *cost += COSTS_N_INSNS (1);
10599 if (!speed_p)
10600 return true;
10602 if (GET_MODE (XEXP (x, 0)) == SImode)
10603 *cost += extra_cost->ldst.load;
10604 else
10605 *cost += extra_cost->ldst.load_sign_extend;
10607 if (mode == DImode)
10608 *cost += extra_cost->alu.shift;
10610 return true;
10613 /* Widening from less than 32-bits requires an extend operation. */
10614 if (GET_MODE (XEXP (x, 0)) != SImode && arm_arch6)
10616 /* We have SXTB/SXTH. */
10617 *cost = COSTS_N_INSNS (1);
10618 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10619 if (speed_p)
10620 *cost += extra_cost->alu.extend;
10622 else if (GET_MODE (XEXP (x, 0)) != SImode)
10624 /* Needs two shifts. */
10625 *cost = COSTS_N_INSNS (2);
10626 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10627 if (speed_p)
10628 *cost += 2 * extra_cost->alu.shift;
10631 /* Widening beyond 32-bits requires one more insn. */
10632 if (mode == DImode)
10634 *cost += COSTS_N_INSNS (1);
10635 if (speed_p)
10636 *cost += extra_cost->alu.shift;
10639 return true;
10641 case ZERO_EXTEND:
10642 if ((arm_arch4
10643 || GET_MODE (XEXP (x, 0)) == SImode
10644 || GET_MODE (XEXP (x, 0)) == QImode)
10645 && MEM_P (XEXP (x, 0)))
10647 *cost = rtx_cost (XEXP (x, 0), code, 0, speed_p);
10649 if (mode == DImode)
10650 *cost += COSTS_N_INSNS (1); /* No speed penalty. */
10652 return true;
10655 /* Widening from less than 32-bits requires an extend operation. */
10656 if (GET_MODE (XEXP (x, 0)) == QImode)
10658 /* UXTB can be a shorter instruction in Thumb2, but it might
10659 be slower than the AND Rd, Rn, #255 alternative. When
10660 optimizing for speed it should never be slower to use
10661 AND, and we don't really model 16-bit vs 32-bit insns
10662 here. */
10663 *cost = COSTS_N_INSNS (1);
10664 if (speed_p)
10665 *cost += extra_cost->alu.logical;
10667 else if (GET_MODE (XEXP (x, 0)) != SImode && arm_arch6)
10669 /* We have UXTB/UXTH. */
10670 *cost = COSTS_N_INSNS (1);
10671 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10672 if (speed_p)
10673 *cost += extra_cost->alu.extend;
10675 else if (GET_MODE (XEXP (x, 0)) != SImode)
10677 /* Needs two shifts. It's marginally preferable to use
10678 shifts rather than two BIC instructions as the second
10679 shift may merge with a subsequent insn as a shifter
10680 op. */
10681 *cost = COSTS_N_INSNS (2);
10682 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10683 if (speed_p)
10684 *cost += 2 * extra_cost->alu.shift;
10686 else /* GET_MODE (XEXP (x, 0)) == SImode. */
10687 *cost = COSTS_N_INSNS (1);
10689 /* Widening beyond 32-bits requires one more insn. */
10690 if (mode == DImode)
10692 *cost += COSTS_N_INSNS (1); /* No speed penalty. */
10695 return true;
10697 case CONST_INT:
10698 *cost = 0;
10699 /* CONST_INT has no mode, so we cannot tell for sure how many
10700 insns are really going to be needed. The best we can do is
10701 look at the value passed. If it fits in SImode, then assume
10702 that's the mode it will be used for. Otherwise assume it
10703 will be used in DImode. */
10704 if (INTVAL (x) == trunc_int_for_mode (INTVAL (x), SImode))
10705 mode = SImode;
10706 else
10707 mode = DImode;
10709 /* Avoid blowing up in arm_gen_constant (). */
10710 if (!(outer_code == PLUS
10711 || outer_code == AND
10712 || outer_code == IOR
10713 || outer_code == XOR
10714 || outer_code == MINUS))
10715 outer_code = SET;
10717 const_int_cost:
10718 if (mode == SImode)
10720 *cost += COSTS_N_INSNS (arm_gen_constant (outer_code, SImode, NULL,
10721 INTVAL (x), NULL, NULL,
10722 0, 0));
10723 /* Extra costs? */
10725 else
10727 *cost += COSTS_N_INSNS (arm_gen_constant
10728 (outer_code, SImode, NULL,
10729 trunc_int_for_mode (INTVAL (x), SImode),
10730 NULL, NULL, 0, 0)
10731 + arm_gen_constant (outer_code, SImode, NULL,
10732 INTVAL (x) >> 32, NULL,
10733 NULL, 0, 0));
10734 /* Extra costs? */
10737 return true;
10739 case CONST:
10740 case LABEL_REF:
10741 case SYMBOL_REF:
10742 if (speed_p)
10744 if (arm_arch_thumb2 && !flag_pic)
10745 *cost = COSTS_N_INSNS (2);
10746 else
10747 *cost = COSTS_N_INSNS (1) + extra_cost->ldst.load;
10749 else
10750 *cost = COSTS_N_INSNS (2);
10752 if (flag_pic)
10754 *cost += COSTS_N_INSNS (1);
10755 if (speed_p)
10756 *cost += extra_cost->alu.arith;
10759 return true;
10761 case CONST_FIXED:
10762 *cost = COSTS_N_INSNS (4);
10763 /* Fixme. */
10764 return true;
10766 case CONST_DOUBLE:
10767 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10768 && (mode == SFmode || !TARGET_VFP_SINGLE))
10770 if (vfp3_const_double_rtx (x))
10772 *cost = COSTS_N_INSNS (1);
10773 if (speed_p)
10774 *cost += extra_cost->fp[mode == DFmode].fpconst;
10775 return true;
10778 if (speed_p)
10780 *cost = COSTS_N_INSNS (1);
10781 if (mode == DFmode)
10782 *cost += extra_cost->ldst.loadd;
10783 else
10784 *cost += extra_cost->ldst.loadf;
10786 else
10787 *cost = COSTS_N_INSNS (2 + (mode == DFmode));
10789 return true;
10791 *cost = COSTS_N_INSNS (4);
10792 return true;
10794 case CONST_VECTOR:
10795 /* Fixme. */
10796 if (TARGET_NEON
10797 && TARGET_HARD_FLOAT
10798 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
10799 && neon_immediate_valid_for_move (x, mode, NULL, NULL))
10800 *cost = COSTS_N_INSNS (1);
10801 else
10802 *cost = COSTS_N_INSNS (4);
10803 return true;
10805 case HIGH:
10806 case LO_SUM:
10807 *cost = COSTS_N_INSNS (1);
10808 /* When optimizing for size, we prefer constant pool entries to
10809 MOVW/MOVT pairs, so bump the cost of these slightly. */
10810 if (!speed_p)
10811 *cost += 1;
10812 return true;
10814 case CLZ:
10815 *cost = COSTS_N_INSNS (1);
10816 if (speed_p)
10817 *cost += extra_cost->alu.clz;
10818 return false;
10820 case SMIN:
10821 if (XEXP (x, 1) == const0_rtx)
10823 *cost = COSTS_N_INSNS (1);
10824 if (speed_p)
10825 *cost += extra_cost->alu.log_shift;
10826 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10827 return true;
10829 /* Fall through. */
10830 case SMAX:
10831 case UMIN:
10832 case UMAX:
10833 *cost = COSTS_N_INSNS (2);
10834 return false;
10836 case TRUNCATE:
10837 if (GET_CODE (XEXP (x, 0)) == ASHIFTRT
10838 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
10839 && INTVAL (XEXP (XEXP (x, 0), 1)) == 32
10840 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
10841 && ((GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND
10842 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == SIGN_EXTEND)
10843 || (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
10844 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1))
10845 == ZERO_EXTEND))))
10847 *cost = COSTS_N_INSNS (1);
10848 if (speed_p)
10849 *cost += extra_cost->mult[1].extend;
10850 *cost += (rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), ZERO_EXTEND, 0,
10851 speed_p)
10852 + rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 1), ZERO_EXTEND,
10853 0, speed_p));
10854 return true;
10856 *cost = LIBCALL_COST (1);
10857 return false;
10859 case UNSPEC_VOLATILE:
10860 case UNSPEC:
10861 return arm_unspec_cost (x, outer_code, speed_p, cost);
10863 case PC:
10864 /* Reading the PC is like reading any other register. Writing it
10865 is more expensive, but we take that into account elsewhere. */
10866 *cost = 0;
10867 return true;
10869 case ZERO_EXTRACT:
10870 /* TODO: Simple zero_extract of bottom bits using AND. */
10871 /* Fall through. */
10872 case SIGN_EXTRACT:
10873 if (arm_arch6
10874 && mode == SImode
10875 && CONST_INT_P (XEXP (x, 1))
10876 && CONST_INT_P (XEXP (x, 2)))
10878 *cost = COSTS_N_INSNS (1);
10879 if (speed_p)
10880 *cost += extra_cost->alu.bfx;
10881 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10882 return true;
10884 /* Without UBFX/SBFX, need to resort to shift operations. */
10885 *cost = COSTS_N_INSNS (2);
10886 if (speed_p)
10887 *cost += 2 * extra_cost->alu.shift;
10888 *cost += rtx_cost (XEXP (x, 0), ASHIFT, 0, speed_p);
10889 return true;
10891 case FLOAT_EXTEND:
10892 if (TARGET_HARD_FLOAT)
10894 *cost = COSTS_N_INSNS (1);
10895 if (speed_p)
10896 *cost += extra_cost->fp[mode == DFmode].widen;
10897 if (!TARGET_FPU_ARMV8
10898 && GET_MODE (XEXP (x, 0)) == HFmode)
10900 /* Pre v8, widening HF->DF is a two-step process, first
10901 widening to SFmode. */
10902 *cost += COSTS_N_INSNS (1);
10903 if (speed_p)
10904 *cost += extra_cost->fp[0].widen;
10906 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10907 return true;
10910 *cost = LIBCALL_COST (1);
10911 return false;
10913 case FLOAT_TRUNCATE:
10914 if (TARGET_HARD_FLOAT)
10916 *cost = COSTS_N_INSNS (1);
10917 if (speed_p)
10918 *cost += extra_cost->fp[mode == DFmode].narrow;
10919 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10920 return true;
10921 /* Vector modes? */
10923 *cost = LIBCALL_COST (1);
10924 return false;
10926 case FMA:
10927 if (TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FMA)
10929 rtx op0 = XEXP (x, 0);
10930 rtx op1 = XEXP (x, 1);
10931 rtx op2 = XEXP (x, 2);
10933 *cost = COSTS_N_INSNS (1);
10935 /* vfms or vfnma. */
10936 if (GET_CODE (op0) == NEG)
10937 op0 = XEXP (op0, 0);
10939 /* vfnms or vfnma. */
10940 if (GET_CODE (op2) == NEG)
10941 op2 = XEXP (op2, 0);
10943 *cost += rtx_cost (op0, FMA, 0, speed_p);
10944 *cost += rtx_cost (op1, FMA, 1, speed_p);
10945 *cost += rtx_cost (op2, FMA, 2, speed_p);
10947 if (speed_p)
10948 *cost += extra_cost->fp[mode ==DFmode].fma;
10950 return true;
10953 *cost = LIBCALL_COST (3);
10954 return false;
10956 case FIX:
10957 case UNSIGNED_FIX:
10958 if (TARGET_HARD_FLOAT)
10960 if (GET_MODE_CLASS (mode) == MODE_INT)
10962 *cost = COSTS_N_INSNS (1);
10963 if (speed_p)
10964 *cost += extra_cost->fp[GET_MODE (XEXP (x, 0)) == DFmode].toint;
10965 /* Strip of the 'cost' of rounding towards zero. */
10966 if (GET_CODE (XEXP (x, 0)) == FIX)
10967 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), code, 0, speed_p);
10968 else
10969 *cost += rtx_cost (XEXP (x, 0), code, 0, speed_p);
10970 /* ??? Increase the cost to deal with transferring from
10971 FP -> CORE registers? */
10972 return true;
10974 else if (GET_MODE_CLASS (mode) == MODE_FLOAT
10975 && TARGET_FPU_ARMV8)
10977 *cost = COSTS_N_INSNS (1);
10978 if (speed_p)
10979 *cost += extra_cost->fp[mode == DFmode].roundint;
10980 return false;
10982 /* Vector costs? */
10984 *cost = LIBCALL_COST (1);
10985 return false;
10987 case FLOAT:
10988 case UNSIGNED_FLOAT:
10989 if (TARGET_HARD_FLOAT)
10991 /* ??? Increase the cost to deal with transferring from CORE
10992 -> FP registers? */
10993 *cost = COSTS_N_INSNS (1);
10994 if (speed_p)
10995 *cost += extra_cost->fp[mode == DFmode].fromint;
10996 return false;
10998 *cost = LIBCALL_COST (1);
10999 return false;
11001 case CALL:
11002 *cost = COSTS_N_INSNS (1);
11003 return true;
11005 case ASM_OPERANDS:
11007 /* Just a guess. Guess number of instructions in the asm
11008 plus one insn per input. Always a minimum of COSTS_N_INSNS (1)
11009 though (see PR60663). */
11010 int asm_length = MAX (1, asm_str_count (ASM_OPERANDS_TEMPLATE (x)));
11011 int num_operands = ASM_OPERANDS_INPUT_LENGTH (x);
11013 *cost = COSTS_N_INSNS (asm_length + num_operands);
11014 return true;
11016 default:
11017 if (mode != VOIDmode)
11018 *cost = COSTS_N_INSNS (ARM_NUM_REGS (mode));
11019 else
11020 *cost = COSTS_N_INSNS (4); /* Who knows? */
11021 return false;
11025 #undef HANDLE_NARROW_SHIFT_ARITH
11027 /* RTX costs when optimizing for size. */
11028 static bool
11029 arm_rtx_costs (rtx x, int code, int outer_code, int opno ATTRIBUTE_UNUSED,
11030 int *total, bool speed)
11032 bool result;
11034 if (TARGET_OLD_RTX_COSTS
11035 || (!current_tune->insn_extra_cost && !TARGET_NEW_GENERIC_COSTS))
11037 /* Old way. (Deprecated.) */
11038 if (!speed)
11039 result = arm_size_rtx_costs (x, (enum rtx_code) code,
11040 (enum rtx_code) outer_code, total);
11041 else
11042 result = current_tune->rtx_costs (x, (enum rtx_code) code,
11043 (enum rtx_code) outer_code, total,
11044 speed);
11046 else
11048 /* New way. */
11049 if (current_tune->insn_extra_cost)
11050 result = arm_new_rtx_costs (x, (enum rtx_code) code,
11051 (enum rtx_code) outer_code,
11052 current_tune->insn_extra_cost,
11053 total, speed);
11054 /* TARGET_NEW_GENERIC_COSTS && !TARGET_OLD_RTX_COSTS
11055 && current_tune->insn_extra_cost != NULL */
11056 else
11057 result = arm_new_rtx_costs (x, (enum rtx_code) code,
11058 (enum rtx_code) outer_code,
11059 &generic_extra_costs, total, speed);
11062 if (dump_file && (dump_flags & TDF_DETAILS))
11064 print_rtl_single (dump_file, x);
11065 fprintf (dump_file, "\n%s cost: %d (%s)\n", speed ? "Hot" : "Cold",
11066 *total, result ? "final" : "partial");
11068 return result;
11071 /* RTX costs for cores with a slow MUL implementation. Thumb-2 is not
11072 supported on any "slowmul" cores, so it can be ignored. */
11074 static bool
11075 arm_slowmul_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
11076 int *total, bool speed)
11078 machine_mode mode = GET_MODE (x);
11080 if (TARGET_THUMB)
11082 *total = thumb1_rtx_costs (x, code, outer_code);
11083 return true;
11086 switch (code)
11088 case MULT:
11089 if (GET_MODE_CLASS (mode) == MODE_FLOAT
11090 || mode == DImode)
11092 *total = COSTS_N_INSNS (20);
11093 return false;
11096 if (CONST_INT_P (XEXP (x, 1)))
11098 unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
11099 & (unsigned HOST_WIDE_INT) 0xffffffff);
11100 int cost, const_ok = const_ok_for_arm (i);
11101 int j, booth_unit_size;
11103 /* Tune as appropriate. */
11104 cost = const_ok ? 4 : 8;
11105 booth_unit_size = 2;
11106 for (j = 0; i && j < 32; j += booth_unit_size)
11108 i >>= booth_unit_size;
11109 cost++;
11112 *total = COSTS_N_INSNS (cost);
11113 *total += rtx_cost (XEXP (x, 0), code, 0, speed);
11114 return true;
11117 *total = COSTS_N_INSNS (20);
11118 return false;
11120 default:
11121 return arm_rtx_costs_1 (x, outer_code, total, speed);;
11126 /* RTX cost for cores with a fast multiply unit (M variants). */
11128 static bool
11129 arm_fastmul_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
11130 int *total, bool speed)
11132 machine_mode mode = GET_MODE (x);
11134 if (TARGET_THUMB1)
11136 *total = thumb1_rtx_costs (x, code, outer_code);
11137 return true;
11140 /* ??? should thumb2 use different costs? */
11141 switch (code)
11143 case MULT:
11144 /* There is no point basing this on the tuning, since it is always the
11145 fast variant if it exists at all. */
11146 if (mode == DImode
11147 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
11148 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
11149 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
11151 *total = COSTS_N_INSNS(2);
11152 return false;
11156 if (mode == DImode)
11158 *total = COSTS_N_INSNS (5);
11159 return false;
11162 if (CONST_INT_P (XEXP (x, 1)))
11164 unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
11165 & (unsigned HOST_WIDE_INT) 0xffffffff);
11166 int cost, const_ok = const_ok_for_arm (i);
11167 int j, booth_unit_size;
11169 /* Tune as appropriate. */
11170 cost = const_ok ? 4 : 8;
11171 booth_unit_size = 8;
11172 for (j = 0; i && j < 32; j += booth_unit_size)
11174 i >>= booth_unit_size;
11175 cost++;
11178 *total = COSTS_N_INSNS(cost);
11179 return false;
11182 if (mode == SImode)
11184 *total = COSTS_N_INSNS (4);
11185 return false;
11188 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
11190 if (TARGET_HARD_FLOAT
11191 && (mode == SFmode
11192 || (mode == DFmode && !TARGET_VFP_SINGLE)))
11194 *total = COSTS_N_INSNS (1);
11195 return false;
11199 /* Requires a lib call */
11200 *total = COSTS_N_INSNS (20);
11201 return false;
11203 default:
11204 return arm_rtx_costs_1 (x, outer_code, total, speed);
11209 /* RTX cost for XScale CPUs. Thumb-2 is not supported on any xscale cores,
11210 so it can be ignored. */
11212 static bool
11213 arm_xscale_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
11214 int *total, bool speed)
11216 machine_mode mode = GET_MODE (x);
11218 if (TARGET_THUMB)
11220 *total = thumb1_rtx_costs (x, code, outer_code);
11221 return true;
11224 switch (code)
11226 case COMPARE:
11227 if (GET_CODE (XEXP (x, 0)) != MULT)
11228 return arm_rtx_costs_1 (x, outer_code, total, speed);
11230 /* A COMPARE of a MULT is slow on XScale; the muls instruction
11231 will stall until the multiplication is complete. */
11232 *total = COSTS_N_INSNS (3);
11233 return false;
11235 case MULT:
11236 /* There is no point basing this on the tuning, since it is always the
11237 fast variant if it exists at all. */
11238 if (mode == DImode
11239 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
11240 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
11241 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
11243 *total = COSTS_N_INSNS (2);
11244 return false;
11248 if (mode == DImode)
11250 *total = COSTS_N_INSNS (5);
11251 return false;
11254 if (CONST_INT_P (XEXP (x, 1)))
11256 /* If operand 1 is a constant we can more accurately
11257 calculate the cost of the multiply. The multiplier can
11258 retire 15 bits on the first cycle and a further 12 on the
11259 second. We do, of course, have to load the constant into
11260 a register first. */
11261 unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
11262 /* There's a general overhead of one cycle. */
11263 int cost = 1;
11264 unsigned HOST_WIDE_INT masked_const;
11266 if (i & 0x80000000)
11267 i = ~i;
11269 i &= (unsigned HOST_WIDE_INT) 0xffffffff;
11271 masked_const = i & 0xffff8000;
11272 if (masked_const != 0)
11274 cost++;
11275 masked_const = i & 0xf8000000;
11276 if (masked_const != 0)
11277 cost++;
11279 *total = COSTS_N_INSNS (cost);
11280 return false;
11283 if (mode == SImode)
11285 *total = COSTS_N_INSNS (3);
11286 return false;
11289 /* Requires a lib call */
11290 *total = COSTS_N_INSNS (20);
11291 return false;
11293 default:
11294 return arm_rtx_costs_1 (x, outer_code, total, speed);
11299 /* RTX costs for 9e (and later) cores. */
11301 static bool
11302 arm_9e_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer_code,
11303 int *total, bool speed)
11305 machine_mode mode = GET_MODE (x);
11307 if (TARGET_THUMB1)
11309 switch (code)
11311 case MULT:
11312 /* Small multiply: 32 cycles for an integer multiply inst. */
11313 if (arm_arch6m && arm_m_profile_small_mul)
11314 *total = COSTS_N_INSNS (32);
11315 else
11316 *total = COSTS_N_INSNS (3);
11317 return true;
11319 default:
11320 *total = thumb1_rtx_costs (x, code, outer_code);
11321 return true;
11325 switch (code)
11327 case MULT:
11328 /* There is no point basing this on the tuning, since it is always the
11329 fast variant if it exists at all. */
11330 if (mode == DImode
11331 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
11332 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
11333 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
11335 *total = COSTS_N_INSNS (2);
11336 return false;
11340 if (mode == DImode)
11342 *total = COSTS_N_INSNS (5);
11343 return false;
11346 if (mode == SImode)
11348 *total = COSTS_N_INSNS (2);
11349 return false;
11352 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
11354 if (TARGET_HARD_FLOAT
11355 && (mode == SFmode
11356 || (mode == DFmode && !TARGET_VFP_SINGLE)))
11358 *total = COSTS_N_INSNS (1);
11359 return false;
11363 *total = COSTS_N_INSNS (20);
11364 return false;
11366 default:
11367 return arm_rtx_costs_1 (x, outer_code, total, speed);
11370 /* All address computations that can be done are free, but rtx cost returns
11371 the same for practically all of them. So we weight the different types
11372 of address here in the order (most pref first):
11373 PRE/POST_INC/DEC, SHIFT or NON-INT sum, INT sum, REG, MEM or LABEL. */
11374 static inline int
11375 arm_arm_address_cost (rtx x)
11377 enum rtx_code c = GET_CODE (x);
11379 if (c == PRE_INC || c == PRE_DEC || c == POST_INC || c == POST_DEC)
11380 return 0;
11381 if (c == MEM || c == LABEL_REF || c == SYMBOL_REF)
11382 return 10;
11384 if (c == PLUS)
11386 if (CONST_INT_P (XEXP (x, 1)))
11387 return 2;
11389 if (ARITHMETIC_P (XEXP (x, 0)) || ARITHMETIC_P (XEXP (x, 1)))
11390 return 3;
11392 return 4;
11395 return 6;
11398 static inline int
11399 arm_thumb_address_cost (rtx x)
11401 enum rtx_code c = GET_CODE (x);
11403 if (c == REG)
11404 return 1;
11405 if (c == PLUS
11406 && REG_P (XEXP (x, 0))
11407 && CONST_INT_P (XEXP (x, 1)))
11408 return 1;
11410 return 2;
11413 static int
11414 arm_address_cost (rtx x, machine_mode mode ATTRIBUTE_UNUSED,
11415 addr_space_t as ATTRIBUTE_UNUSED, bool speed ATTRIBUTE_UNUSED)
11417 return TARGET_32BIT ? arm_arm_address_cost (x) : arm_thumb_address_cost (x);
11420 /* Adjust cost hook for XScale. */
11421 static bool
11422 xscale_sched_adjust_cost (rtx_insn *insn, rtx link, rtx_insn *dep, int * cost)
11424 /* Some true dependencies can have a higher cost depending
11425 on precisely how certain input operands are used. */
11426 if (REG_NOTE_KIND(link) == 0
11427 && recog_memoized (insn) >= 0
11428 && recog_memoized (dep) >= 0)
11430 int shift_opnum = get_attr_shift (insn);
11431 enum attr_type attr_type = get_attr_type (dep);
11433 /* If nonzero, SHIFT_OPNUM contains the operand number of a shifted
11434 operand for INSN. If we have a shifted input operand and the
11435 instruction we depend on is another ALU instruction, then we may
11436 have to account for an additional stall. */
11437 if (shift_opnum != 0
11438 && (attr_type == TYPE_ALU_SHIFT_IMM
11439 || attr_type == TYPE_ALUS_SHIFT_IMM
11440 || attr_type == TYPE_LOGIC_SHIFT_IMM
11441 || attr_type == TYPE_LOGICS_SHIFT_IMM
11442 || attr_type == TYPE_ALU_SHIFT_REG
11443 || attr_type == TYPE_ALUS_SHIFT_REG
11444 || attr_type == TYPE_LOGIC_SHIFT_REG
11445 || attr_type == TYPE_LOGICS_SHIFT_REG
11446 || attr_type == TYPE_MOV_SHIFT
11447 || attr_type == TYPE_MVN_SHIFT
11448 || attr_type == TYPE_MOV_SHIFT_REG
11449 || attr_type == TYPE_MVN_SHIFT_REG))
11451 rtx shifted_operand;
11452 int opno;
11454 /* Get the shifted operand. */
11455 extract_insn (insn);
11456 shifted_operand = recog_data.operand[shift_opnum];
11458 /* Iterate over all the operands in DEP. If we write an operand
11459 that overlaps with SHIFTED_OPERAND, then we have increase the
11460 cost of this dependency. */
11461 extract_insn (dep);
11462 preprocess_constraints (dep);
11463 for (opno = 0; opno < recog_data.n_operands; opno++)
11465 /* We can ignore strict inputs. */
11466 if (recog_data.operand_type[opno] == OP_IN)
11467 continue;
11469 if (reg_overlap_mentioned_p (recog_data.operand[opno],
11470 shifted_operand))
11472 *cost = 2;
11473 return false;
11478 return true;
11481 /* Adjust cost hook for Cortex A9. */
11482 static bool
11483 cortex_a9_sched_adjust_cost (rtx_insn *insn, rtx link, rtx_insn *dep, int * cost)
11485 switch (REG_NOTE_KIND (link))
11487 case REG_DEP_ANTI:
11488 *cost = 0;
11489 return false;
11491 case REG_DEP_TRUE:
11492 case REG_DEP_OUTPUT:
11493 if (recog_memoized (insn) >= 0
11494 && recog_memoized (dep) >= 0)
11496 if (GET_CODE (PATTERN (insn)) == SET)
11498 if (GET_MODE_CLASS
11499 (GET_MODE (SET_DEST (PATTERN (insn)))) == MODE_FLOAT
11500 || GET_MODE_CLASS
11501 (GET_MODE (SET_SRC (PATTERN (insn)))) == MODE_FLOAT)
11503 enum attr_type attr_type_insn = get_attr_type (insn);
11504 enum attr_type attr_type_dep = get_attr_type (dep);
11506 /* By default all dependencies of the form
11507 s0 = s0 <op> s1
11508 s0 = s0 <op> s2
11509 have an extra latency of 1 cycle because
11510 of the input and output dependency in this
11511 case. However this gets modeled as an true
11512 dependency and hence all these checks. */
11513 if (REG_P (SET_DEST (PATTERN (insn)))
11514 && REG_P (SET_DEST (PATTERN (dep)))
11515 && reg_overlap_mentioned_p (SET_DEST (PATTERN (insn)),
11516 SET_DEST (PATTERN (dep))))
11518 /* FMACS is a special case where the dependent
11519 instruction can be issued 3 cycles before
11520 the normal latency in case of an output
11521 dependency. */
11522 if ((attr_type_insn == TYPE_FMACS
11523 || attr_type_insn == TYPE_FMACD)
11524 && (attr_type_dep == TYPE_FMACS
11525 || attr_type_dep == TYPE_FMACD))
11527 if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
11528 *cost = insn_default_latency (dep) - 3;
11529 else
11530 *cost = insn_default_latency (dep);
11531 return false;
11533 else
11535 if (REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
11536 *cost = insn_default_latency (dep) + 1;
11537 else
11538 *cost = insn_default_latency (dep);
11540 return false;
11545 break;
11547 default:
11548 gcc_unreachable ();
11551 return true;
11554 /* Adjust cost hook for FA726TE. */
11555 static bool
11556 fa726te_sched_adjust_cost (rtx_insn *insn, rtx link, rtx_insn *dep, int * cost)
11558 /* For FA726TE, true dependency on CPSR (i.e. set cond followed by predicated)
11559 have penalty of 3. */
11560 if (REG_NOTE_KIND (link) == REG_DEP_TRUE
11561 && recog_memoized (insn) >= 0
11562 && recog_memoized (dep) >= 0
11563 && get_attr_conds (dep) == CONDS_SET)
11565 /* Use of carry (e.g. 64-bit arithmetic) in ALU: 3-cycle latency. */
11566 if (get_attr_conds (insn) == CONDS_USE
11567 && get_attr_type (insn) != TYPE_BRANCH)
11569 *cost = 3;
11570 return false;
11573 if (GET_CODE (PATTERN (insn)) == COND_EXEC
11574 || get_attr_conds (insn) == CONDS_USE)
11576 *cost = 0;
11577 return false;
11581 return true;
11584 /* Implement TARGET_REGISTER_MOVE_COST.
11586 Moves between VFP_REGS and GENERAL_REGS are a single insn, but
11587 it is typically more expensive than a single memory access. We set
11588 the cost to less than two memory accesses so that floating
11589 point to integer conversion does not go through memory. */
11592 arm_register_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
11593 reg_class_t from, reg_class_t to)
11595 if (TARGET_32BIT)
11597 if ((IS_VFP_CLASS (from) && !IS_VFP_CLASS (to))
11598 || (!IS_VFP_CLASS (from) && IS_VFP_CLASS (to)))
11599 return 15;
11600 else if ((from == IWMMXT_REGS && to != IWMMXT_REGS)
11601 || (from != IWMMXT_REGS && to == IWMMXT_REGS))
11602 return 4;
11603 else if (from == IWMMXT_GR_REGS || to == IWMMXT_GR_REGS)
11604 return 20;
11605 else
11606 return 2;
11608 else
11610 if (from == HI_REGS || to == HI_REGS)
11611 return 4;
11612 else
11613 return 2;
11617 /* Implement TARGET_MEMORY_MOVE_COST. */
11620 arm_memory_move_cost (machine_mode mode, reg_class_t rclass,
11621 bool in ATTRIBUTE_UNUSED)
11623 if (TARGET_32BIT)
11624 return 10;
11625 else
11627 if (GET_MODE_SIZE (mode) < 4)
11628 return 8;
11629 else
11630 return ((2 * GET_MODE_SIZE (mode)) * (rclass == LO_REGS ? 1 : 2));
11634 /* Vectorizer cost model implementation. */
11636 /* Implement targetm.vectorize.builtin_vectorization_cost. */
11637 static int
11638 arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
11639 tree vectype,
11640 int misalign ATTRIBUTE_UNUSED)
11642 unsigned elements;
11644 switch (type_of_cost)
11646 case scalar_stmt:
11647 return current_tune->vec_costs->scalar_stmt_cost;
11649 case scalar_load:
11650 return current_tune->vec_costs->scalar_load_cost;
11652 case scalar_store:
11653 return current_tune->vec_costs->scalar_store_cost;
11655 case vector_stmt:
11656 return current_tune->vec_costs->vec_stmt_cost;
11658 case vector_load:
11659 return current_tune->vec_costs->vec_align_load_cost;
11661 case vector_store:
11662 return current_tune->vec_costs->vec_store_cost;
11664 case vec_to_scalar:
11665 return current_tune->vec_costs->vec_to_scalar_cost;
11667 case scalar_to_vec:
11668 return current_tune->vec_costs->scalar_to_vec_cost;
11670 case unaligned_load:
11671 return current_tune->vec_costs->vec_unalign_load_cost;
11673 case unaligned_store:
11674 return current_tune->vec_costs->vec_unalign_store_cost;
11676 case cond_branch_taken:
11677 return current_tune->vec_costs->cond_taken_branch_cost;
11679 case cond_branch_not_taken:
11680 return current_tune->vec_costs->cond_not_taken_branch_cost;
11682 case vec_perm:
11683 case vec_promote_demote:
11684 return current_tune->vec_costs->vec_stmt_cost;
11686 case vec_construct:
11687 elements = TYPE_VECTOR_SUBPARTS (vectype);
11688 return elements / 2 + 1;
11690 default:
11691 gcc_unreachable ();
11695 /* Implement targetm.vectorize.add_stmt_cost. */
11697 static unsigned
11698 arm_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
11699 struct _stmt_vec_info *stmt_info, int misalign,
11700 enum vect_cost_model_location where)
11702 unsigned *cost = (unsigned *) data;
11703 unsigned retval = 0;
11705 if (flag_vect_cost_model)
11707 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
11708 int stmt_cost = arm_builtin_vectorization_cost (kind, vectype, misalign);
11710 /* Statements in an inner loop relative to the loop being
11711 vectorized are weighted more heavily. The value here is
11712 arbitrary and could potentially be improved with analysis. */
11713 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
11714 count *= 50; /* FIXME. */
11716 retval = (unsigned) (count * stmt_cost);
11717 cost[where] += retval;
11720 return retval;
11723 /* Return true if and only if this insn can dual-issue only as older. */
11724 static bool
11725 cortexa7_older_only (rtx_insn *insn)
11727 if (recog_memoized (insn) < 0)
11728 return false;
11730 switch (get_attr_type (insn))
11732 case TYPE_ALU_DSP_REG:
11733 case TYPE_ALU_SREG:
11734 case TYPE_ALUS_SREG:
11735 case TYPE_LOGIC_REG:
11736 case TYPE_LOGICS_REG:
11737 case TYPE_ADC_REG:
11738 case TYPE_ADCS_REG:
11739 case TYPE_ADR:
11740 case TYPE_BFM:
11741 case TYPE_REV:
11742 case TYPE_MVN_REG:
11743 case TYPE_SHIFT_IMM:
11744 case TYPE_SHIFT_REG:
11745 case TYPE_LOAD_BYTE:
11746 case TYPE_LOAD1:
11747 case TYPE_STORE1:
11748 case TYPE_FFARITHS:
11749 case TYPE_FADDS:
11750 case TYPE_FFARITHD:
11751 case TYPE_FADDD:
11752 case TYPE_FMOV:
11753 case TYPE_F_CVT:
11754 case TYPE_FCMPS:
11755 case TYPE_FCMPD:
11756 case TYPE_FCONSTS:
11757 case TYPE_FCONSTD:
11758 case TYPE_FMULS:
11759 case TYPE_FMACS:
11760 case TYPE_FMULD:
11761 case TYPE_FMACD:
11762 case TYPE_FDIVS:
11763 case TYPE_FDIVD:
11764 case TYPE_F_MRC:
11765 case TYPE_F_MRRC:
11766 case TYPE_F_FLAG:
11767 case TYPE_F_LOADS:
11768 case TYPE_F_STORES:
11769 return true;
11770 default:
11771 return false;
11775 /* Return true if and only if this insn can dual-issue as younger. */
11776 static bool
11777 cortexa7_younger (FILE *file, int verbose, rtx_insn *insn)
11779 if (recog_memoized (insn) < 0)
11781 if (verbose > 5)
11782 fprintf (file, ";; not cortexa7_younger %d\n", INSN_UID (insn));
11783 return false;
11786 switch (get_attr_type (insn))
11788 case TYPE_ALU_IMM:
11789 case TYPE_ALUS_IMM:
11790 case TYPE_LOGIC_IMM:
11791 case TYPE_LOGICS_IMM:
11792 case TYPE_EXTEND:
11793 case TYPE_MVN_IMM:
11794 case TYPE_MOV_IMM:
11795 case TYPE_MOV_REG:
11796 case TYPE_MOV_SHIFT:
11797 case TYPE_MOV_SHIFT_REG:
11798 case TYPE_BRANCH:
11799 case TYPE_CALL:
11800 return true;
11801 default:
11802 return false;
11807 /* Look for an instruction that can dual issue only as an older
11808 instruction, and move it in front of any instructions that can
11809 dual-issue as younger, while preserving the relative order of all
11810 other instructions in the ready list. This is a hueuristic to help
11811 dual-issue in later cycles, by postponing issue of more flexible
11812 instructions. This heuristic may affect dual issue opportunities
11813 in the current cycle. */
11814 static void
11815 cortexa7_sched_reorder (FILE *file, int verbose, rtx_insn **ready,
11816 int *n_readyp, int clock)
11818 int i;
11819 int first_older_only = -1, first_younger = -1;
11821 if (verbose > 5)
11822 fprintf (file,
11823 ";; sched_reorder for cycle %d with %d insns in ready list\n",
11824 clock,
11825 *n_readyp);
11827 /* Traverse the ready list from the head (the instruction to issue
11828 first), and looking for the first instruction that can issue as
11829 younger and the first instruction that can dual-issue only as
11830 older. */
11831 for (i = *n_readyp - 1; i >= 0; i--)
11833 rtx_insn *insn = ready[i];
11834 if (cortexa7_older_only (insn))
11836 first_older_only = i;
11837 if (verbose > 5)
11838 fprintf (file, ";; reorder older found %d\n", INSN_UID (insn));
11839 break;
11841 else if (cortexa7_younger (file, verbose, insn) && first_younger == -1)
11842 first_younger = i;
11845 /* Nothing to reorder because either no younger insn found or insn
11846 that can dual-issue only as older appears before any insn that
11847 can dual-issue as younger. */
11848 if (first_younger == -1)
11850 if (verbose > 5)
11851 fprintf (file, ";; sched_reorder nothing to reorder as no younger\n");
11852 return;
11855 /* Nothing to reorder because no older-only insn in the ready list. */
11856 if (first_older_only == -1)
11858 if (verbose > 5)
11859 fprintf (file, ";; sched_reorder nothing to reorder as no older_only\n");
11860 return;
11863 /* Move first_older_only insn before first_younger. */
11864 if (verbose > 5)
11865 fprintf (file, ";; cortexa7_sched_reorder insn %d before %d\n",
11866 INSN_UID(ready [first_older_only]),
11867 INSN_UID(ready [first_younger]));
11868 rtx_insn *first_older_only_insn = ready [first_older_only];
11869 for (i = first_older_only; i < first_younger; i++)
11871 ready[i] = ready[i+1];
11874 ready[i] = first_older_only_insn;
11875 return;
11878 /* Implement TARGET_SCHED_REORDER. */
11879 static int
11880 arm_sched_reorder (FILE *file, int verbose, rtx_insn **ready, int *n_readyp,
11881 int clock)
11883 switch (arm_tune)
11885 case cortexa7:
11886 cortexa7_sched_reorder (file, verbose, ready, n_readyp, clock);
11887 break;
11888 default:
11889 /* Do nothing for other cores. */
11890 break;
11893 return arm_issue_rate ();
11896 /* This function implements the target macro TARGET_SCHED_ADJUST_COST.
11897 It corrects the value of COST based on the relationship between
11898 INSN and DEP through the dependence LINK. It returns the new
11899 value. There is a per-core adjust_cost hook to adjust scheduler costs
11900 and the per-core hook can choose to completely override the generic
11901 adjust_cost function. Only put bits of code into arm_adjust_cost that
11902 are common across all cores. */
11903 static int
11904 arm_adjust_cost (rtx_insn *insn, rtx link, rtx_insn *dep, int cost)
11906 rtx i_pat, d_pat;
11908 /* When generating Thumb-1 code, we want to place flag-setting operations
11909 close to a conditional branch which depends on them, so that we can
11910 omit the comparison. */
11911 if (TARGET_THUMB1
11912 && REG_NOTE_KIND (link) == 0
11913 && recog_memoized (insn) == CODE_FOR_cbranchsi4_insn
11914 && recog_memoized (dep) >= 0
11915 && get_attr_conds (dep) == CONDS_SET)
11916 return 0;
11918 if (current_tune->sched_adjust_cost != NULL)
11920 if (!current_tune->sched_adjust_cost (insn, link, dep, &cost))
11921 return cost;
11924 /* XXX Is this strictly true? */
11925 if (REG_NOTE_KIND (link) == REG_DEP_ANTI
11926 || REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
11927 return 0;
11929 /* Call insns don't incur a stall, even if they follow a load. */
11930 if (REG_NOTE_KIND (link) == 0
11931 && CALL_P (insn))
11932 return 1;
11934 if ((i_pat = single_set (insn)) != NULL
11935 && MEM_P (SET_SRC (i_pat))
11936 && (d_pat = single_set (dep)) != NULL
11937 && MEM_P (SET_DEST (d_pat)))
11939 rtx src_mem = XEXP (SET_SRC (i_pat), 0);
11940 /* This is a load after a store, there is no conflict if the load reads
11941 from a cached area. Assume that loads from the stack, and from the
11942 constant pool are cached, and that others will miss. This is a
11943 hack. */
11945 if ((GET_CODE (src_mem) == SYMBOL_REF
11946 && CONSTANT_POOL_ADDRESS_P (src_mem))
11947 || reg_mentioned_p (stack_pointer_rtx, src_mem)
11948 || reg_mentioned_p (frame_pointer_rtx, src_mem)
11949 || reg_mentioned_p (hard_frame_pointer_rtx, src_mem))
11950 return 1;
11953 return cost;
11957 arm_max_conditional_execute (void)
11959 return max_insns_skipped;
11962 static int
11963 arm_default_branch_cost (bool speed_p, bool predictable_p ATTRIBUTE_UNUSED)
11965 if (TARGET_32BIT)
11966 return (TARGET_THUMB2 && !speed_p) ? 1 : 4;
11967 else
11968 return (optimize > 0) ? 2 : 0;
11971 static int
11972 arm_cortex_a5_branch_cost (bool speed_p, bool predictable_p)
11974 return speed_p ? 0 : arm_default_branch_cost (speed_p, predictable_p);
11977 /* Thumb-2 branches are relatively cheap on Cortex-M processors ("1 + P cycles"
11978 on Cortex-M4, where P varies from 1 to 3 according to some criteria), since
11979 sequences of non-executed instructions in IT blocks probably take the same
11980 amount of time as executed instructions (and the IT instruction itself takes
11981 space in icache). This function was experimentally determined to give good
11982 results on a popular embedded benchmark. */
11984 static int
11985 arm_cortex_m_branch_cost (bool speed_p, bool predictable_p)
11987 return (TARGET_32BIT && speed_p) ? 1
11988 : arm_default_branch_cost (speed_p, predictable_p);
11991 static int
11992 arm_cortex_m7_branch_cost (bool speed_p, bool predictable_p)
11994 return speed_p ? 0 : arm_default_branch_cost (speed_p, predictable_p);
11997 static bool fp_consts_inited = false;
11999 static REAL_VALUE_TYPE value_fp0;
12001 static void
12002 init_fp_table (void)
12004 REAL_VALUE_TYPE r;
12006 r = REAL_VALUE_ATOF ("0", DFmode);
12007 value_fp0 = r;
12008 fp_consts_inited = true;
12011 /* Return TRUE if rtx X is a valid immediate FP constant. */
12013 arm_const_double_rtx (rtx x)
12015 REAL_VALUE_TYPE r;
12017 if (!fp_consts_inited)
12018 init_fp_table ();
12020 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
12021 if (REAL_VALUE_MINUS_ZERO (r))
12022 return 0;
12024 if (REAL_VALUES_EQUAL (r, value_fp0))
12025 return 1;
12027 return 0;
12030 /* VFPv3 has a fairly wide range of representable immediates, formed from
12031 "quarter-precision" floating-point values. These can be evaluated using this
12032 formula (with ^ for exponentiation):
12034 -1^s * n * 2^-r
12036 Where 's' is a sign bit (0/1), 'n' and 'r' are integers such that
12037 16 <= n <= 31 and 0 <= r <= 7.
12039 These values are mapped onto an 8-bit integer ABCDEFGH s.t.
12041 - A (most-significant) is the sign bit.
12042 - BCD are the exponent (encoded as r XOR 3).
12043 - EFGH are the mantissa (encoded as n - 16).
12046 /* Return an integer index for a VFPv3 immediate operand X suitable for the
12047 fconst[sd] instruction, or -1 if X isn't suitable. */
12048 static int
12049 vfp3_const_double_index (rtx x)
12051 REAL_VALUE_TYPE r, m;
12052 int sign, exponent;
12053 unsigned HOST_WIDE_INT mantissa, mant_hi;
12054 unsigned HOST_WIDE_INT mask;
12055 int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
12056 bool fail;
12058 if (!TARGET_VFP3 || !CONST_DOUBLE_P (x))
12059 return -1;
12061 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
12063 /* We can't represent these things, so detect them first. */
12064 if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r) || REAL_VALUE_MINUS_ZERO (r))
12065 return -1;
12067 /* Extract sign, exponent and mantissa. */
12068 sign = REAL_VALUE_NEGATIVE (r) ? 1 : 0;
12069 r = real_value_abs (&r);
12070 exponent = REAL_EXP (&r);
12071 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
12072 highest (sign) bit, with a fixed binary point at bit point_pos.
12073 WARNING: If there's ever a VFP version which uses more than 2 * H_W_I - 1
12074 bits for the mantissa, this may fail (low bits would be lost). */
12075 real_ldexp (&m, &r, point_pos - exponent);
12076 wide_int w = real_to_integer (&m, &fail, HOST_BITS_PER_WIDE_INT * 2);
12077 mantissa = w.elt (0);
12078 mant_hi = w.elt (1);
12080 /* If there are bits set in the low part of the mantissa, we can't
12081 represent this value. */
12082 if (mantissa != 0)
12083 return -1;
12085 /* Now make it so that mantissa contains the most-significant bits, and move
12086 the point_pos to indicate that the least-significant bits have been
12087 discarded. */
12088 point_pos -= HOST_BITS_PER_WIDE_INT;
12089 mantissa = mant_hi;
12091 /* We can permit four significant bits of mantissa only, plus a high bit
12092 which is always 1. */
12093 mask = ((unsigned HOST_WIDE_INT)1 << (point_pos - 5)) - 1;
12094 if ((mantissa & mask) != 0)
12095 return -1;
12097 /* Now we know the mantissa is in range, chop off the unneeded bits. */
12098 mantissa >>= point_pos - 5;
12100 /* The mantissa may be zero. Disallow that case. (It's possible to load the
12101 floating-point immediate zero with Neon using an integer-zero load, but
12102 that case is handled elsewhere.) */
12103 if (mantissa == 0)
12104 return -1;
12106 gcc_assert (mantissa >= 16 && mantissa <= 31);
12108 /* The value of 5 here would be 4 if GCC used IEEE754-like encoding (where
12109 normalized significands are in the range [1, 2). (Our mantissa is shifted
12110 left 4 places at this point relative to normalized IEEE754 values). GCC
12111 internally uses [0.5, 1) (see real.c), so the exponent returned from
12112 REAL_EXP must be altered. */
12113 exponent = 5 - exponent;
12115 if (exponent < 0 || exponent > 7)
12116 return -1;
12118 /* Sign, mantissa and exponent are now in the correct form to plug into the
12119 formula described in the comment above. */
12120 return (sign << 7) | ((exponent ^ 3) << 4) | (mantissa - 16);
12123 /* Return TRUE if rtx X is a valid immediate VFPv3 constant. */
12125 vfp3_const_double_rtx (rtx x)
12127 if (!TARGET_VFP3)
12128 return 0;
12130 return vfp3_const_double_index (x) != -1;
12133 /* Recognize immediates which can be used in various Neon instructions. Legal
12134 immediates are described by the following table (for VMVN variants, the
12135 bitwise inverse of the constant shown is recognized. In either case, VMOV
12136 is output and the correct instruction to use for a given constant is chosen
12137 by the assembler). The constant shown is replicated across all elements of
12138 the destination vector.
12140 insn elems variant constant (binary)
12141 ---- ----- ------- -----------------
12142 vmov i32 0 00000000 00000000 00000000 abcdefgh
12143 vmov i32 1 00000000 00000000 abcdefgh 00000000
12144 vmov i32 2 00000000 abcdefgh 00000000 00000000
12145 vmov i32 3 abcdefgh 00000000 00000000 00000000
12146 vmov i16 4 00000000 abcdefgh
12147 vmov i16 5 abcdefgh 00000000
12148 vmvn i32 6 00000000 00000000 00000000 abcdefgh
12149 vmvn i32 7 00000000 00000000 abcdefgh 00000000
12150 vmvn i32 8 00000000 abcdefgh 00000000 00000000
12151 vmvn i32 9 abcdefgh 00000000 00000000 00000000
12152 vmvn i16 10 00000000 abcdefgh
12153 vmvn i16 11 abcdefgh 00000000
12154 vmov i32 12 00000000 00000000 abcdefgh 11111111
12155 vmvn i32 13 00000000 00000000 abcdefgh 11111111
12156 vmov i32 14 00000000 abcdefgh 11111111 11111111
12157 vmvn i32 15 00000000 abcdefgh 11111111 11111111
12158 vmov i8 16 abcdefgh
12159 vmov i64 17 aaaaaaaa bbbbbbbb cccccccc dddddddd
12160 eeeeeeee ffffffff gggggggg hhhhhhhh
12161 vmov f32 18 aBbbbbbc defgh000 00000000 00000000
12162 vmov f32 19 00000000 00000000 00000000 00000000
12164 For case 18, B = !b. Representable values are exactly those accepted by
12165 vfp3_const_double_index, but are output as floating-point numbers rather
12166 than indices.
12168 For case 19, we will change it to vmov.i32 when assembling.
12170 Variants 0-5 (inclusive) may also be used as immediates for the second
12171 operand of VORR/VBIC instructions.
12173 The INVERSE argument causes the bitwise inverse of the given operand to be
12174 recognized instead (used for recognizing legal immediates for the VAND/VORN
12175 pseudo-instructions). If INVERSE is true, the value placed in *MODCONST is
12176 *not* inverted (i.e. the pseudo-instruction forms vand/vorn should still be
12177 output, rather than the real insns vbic/vorr).
12179 INVERSE makes no difference to the recognition of float vectors.
12181 The return value is the variant of immediate as shown in the above table, or
12182 -1 if the given value doesn't match any of the listed patterns.
12184 static int
12185 neon_valid_immediate (rtx op, machine_mode mode, int inverse,
12186 rtx *modconst, int *elementwidth)
12188 #define CHECK(STRIDE, ELSIZE, CLASS, TEST) \
12189 matches = 1; \
12190 for (i = 0; i < idx; i += (STRIDE)) \
12191 if (!(TEST)) \
12192 matches = 0; \
12193 if (matches) \
12195 immtype = (CLASS); \
12196 elsize = (ELSIZE); \
12197 break; \
12200 unsigned int i, elsize = 0, idx = 0, n_elts;
12201 unsigned int innersize;
12202 unsigned char bytes[16];
12203 int immtype = -1, matches;
12204 unsigned int invmask = inverse ? 0xff : 0;
12205 bool vector = GET_CODE (op) == CONST_VECTOR;
12207 if (vector)
12209 n_elts = CONST_VECTOR_NUNITS (op);
12210 innersize = GET_MODE_SIZE (GET_MODE_INNER (mode));
12212 else
12214 n_elts = 1;
12215 if (mode == VOIDmode)
12216 mode = DImode;
12217 innersize = GET_MODE_SIZE (mode);
12220 /* Vectors of float constants. */
12221 if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
12223 rtx el0 = CONST_VECTOR_ELT (op, 0);
12224 REAL_VALUE_TYPE r0;
12226 if (!vfp3_const_double_rtx (el0) && el0 != CONST0_RTX (GET_MODE (el0)))
12227 return -1;
12229 REAL_VALUE_FROM_CONST_DOUBLE (r0, el0);
12231 for (i = 1; i < n_elts; i++)
12233 rtx elt = CONST_VECTOR_ELT (op, i);
12234 REAL_VALUE_TYPE re;
12236 REAL_VALUE_FROM_CONST_DOUBLE (re, elt);
12238 if (!REAL_VALUES_EQUAL (r0, re))
12239 return -1;
12242 if (modconst)
12243 *modconst = CONST_VECTOR_ELT (op, 0);
12245 if (elementwidth)
12246 *elementwidth = 0;
12248 if (el0 == CONST0_RTX (GET_MODE (el0)))
12249 return 19;
12250 else
12251 return 18;
12254 /* Splat vector constant out into a byte vector. */
12255 for (i = 0; i < n_elts; i++)
12257 rtx el = vector ? CONST_VECTOR_ELT (op, i) : op;
12258 unsigned HOST_WIDE_INT elpart;
12259 unsigned int part, parts;
12261 if (CONST_INT_P (el))
12263 elpart = INTVAL (el);
12264 parts = 1;
12266 else if (CONST_DOUBLE_P (el))
12268 elpart = CONST_DOUBLE_LOW (el);
12269 parts = 2;
12271 else
12272 gcc_unreachable ();
12274 for (part = 0; part < parts; part++)
12276 unsigned int byte;
12277 for (byte = 0; byte < innersize; byte++)
12279 bytes[idx++] = (elpart & 0xff) ^ invmask;
12280 elpart >>= BITS_PER_UNIT;
12282 if (CONST_DOUBLE_P (el))
12283 elpart = CONST_DOUBLE_HIGH (el);
12287 /* Sanity check. */
12288 gcc_assert (idx == GET_MODE_SIZE (mode));
12292 CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
12293 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
12295 CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
12296 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
12298 CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
12299 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
12301 CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
12302 && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3]);
12304 CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0);
12306 CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1]);
12308 CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
12309 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
12311 CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
12312 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
12314 CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
12315 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
12317 CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
12318 && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3]);
12320 CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff);
12322 CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1]);
12324 CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
12325 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
12327 CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
12328 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
12330 CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
12331 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
12333 CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
12334 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
12336 CHECK (1, 8, 16, bytes[i] == bytes[0]);
12338 CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
12339 && bytes[i] == bytes[(i + 8) % idx]);
12341 while (0);
12343 if (immtype == -1)
12344 return -1;
12346 if (elementwidth)
12347 *elementwidth = elsize;
12349 if (modconst)
12351 unsigned HOST_WIDE_INT imm = 0;
12353 /* Un-invert bytes of recognized vector, if necessary. */
12354 if (invmask != 0)
12355 for (i = 0; i < idx; i++)
12356 bytes[i] ^= invmask;
12358 if (immtype == 17)
12360 /* FIXME: Broken on 32-bit H_W_I hosts. */
12361 gcc_assert (sizeof (HOST_WIDE_INT) == 8);
12363 for (i = 0; i < 8; i++)
12364 imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
12365 << (i * BITS_PER_UNIT);
12367 *modconst = GEN_INT (imm);
12369 else
12371 unsigned HOST_WIDE_INT imm = 0;
12373 for (i = 0; i < elsize / BITS_PER_UNIT; i++)
12374 imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
12376 *modconst = GEN_INT (imm);
12380 return immtype;
12381 #undef CHECK
12384 /* Return TRUE if rtx X is legal for use as either a Neon VMOV (or, implicitly,
12385 VMVN) immediate. Write back width per element to *ELEMENTWIDTH (or zero for
12386 float elements), and a modified constant (whatever should be output for a
12387 VMOV) in *MODCONST. */
12390 neon_immediate_valid_for_move (rtx op, machine_mode mode,
12391 rtx *modconst, int *elementwidth)
12393 rtx tmpconst;
12394 int tmpwidth;
12395 int retval = neon_valid_immediate (op, mode, 0, &tmpconst, &tmpwidth);
12397 if (retval == -1)
12398 return 0;
12400 if (modconst)
12401 *modconst = tmpconst;
12403 if (elementwidth)
12404 *elementwidth = tmpwidth;
12406 return 1;
12409 /* Return TRUE if rtx X is legal for use in a VORR or VBIC instruction. If
12410 the immediate is valid, write a constant suitable for using as an operand
12411 to VORR/VBIC/VAND/VORN to *MODCONST and the corresponding element width to
12412 *ELEMENTWIDTH. See neon_valid_immediate for description of INVERSE. */
12415 neon_immediate_valid_for_logic (rtx op, machine_mode mode, int inverse,
12416 rtx *modconst, int *elementwidth)
12418 rtx tmpconst;
12419 int tmpwidth;
12420 int retval = neon_valid_immediate (op, mode, inverse, &tmpconst, &tmpwidth);
12422 if (retval < 0 || retval > 5)
12423 return 0;
12425 if (modconst)
12426 *modconst = tmpconst;
12428 if (elementwidth)
12429 *elementwidth = tmpwidth;
12431 return 1;
12434 /* Return TRUE if rtx OP is legal for use in a VSHR or VSHL instruction. If
12435 the immediate is valid, write a constant suitable for using as an operand
12436 to VSHR/VSHL to *MODCONST and the corresponding element width to
12437 *ELEMENTWIDTH. ISLEFTSHIFT is for determine left or right shift,
12438 because they have different limitations. */
12441 neon_immediate_valid_for_shift (rtx op, machine_mode mode,
12442 rtx *modconst, int *elementwidth,
12443 bool isleftshift)
12445 unsigned int innersize = GET_MODE_SIZE (GET_MODE_INNER (mode));
12446 unsigned int n_elts = CONST_VECTOR_NUNITS (op), i;
12447 unsigned HOST_WIDE_INT last_elt = 0;
12448 unsigned HOST_WIDE_INT maxshift;
12450 /* Split vector constant out into a byte vector. */
12451 for (i = 0; i < n_elts; i++)
12453 rtx el = CONST_VECTOR_ELT (op, i);
12454 unsigned HOST_WIDE_INT elpart;
12456 if (CONST_INT_P (el))
12457 elpart = INTVAL (el);
12458 else if (CONST_DOUBLE_P (el))
12459 return 0;
12460 else
12461 gcc_unreachable ();
12463 if (i != 0 && elpart != last_elt)
12464 return 0;
12466 last_elt = elpart;
12469 /* Shift less than element size. */
12470 maxshift = innersize * 8;
12472 if (isleftshift)
12474 /* Left shift immediate value can be from 0 to <size>-1. */
12475 if (last_elt >= maxshift)
12476 return 0;
12478 else
12480 /* Right shift immediate value can be from 1 to <size>. */
12481 if (last_elt == 0 || last_elt > maxshift)
12482 return 0;
12485 if (elementwidth)
12486 *elementwidth = innersize * 8;
12488 if (modconst)
12489 *modconst = CONST_VECTOR_ELT (op, 0);
12491 return 1;
12494 /* Return a string suitable for output of Neon immediate logic operation
12495 MNEM. */
12497 char *
12498 neon_output_logic_immediate (const char *mnem, rtx *op2, machine_mode mode,
12499 int inverse, int quad)
12501 int width, is_valid;
12502 static char templ[40];
12504 is_valid = neon_immediate_valid_for_logic (*op2, mode, inverse, op2, &width);
12506 gcc_assert (is_valid != 0);
12508 if (quad)
12509 sprintf (templ, "%s.i%d\t%%q0, %%2", mnem, width);
12510 else
12511 sprintf (templ, "%s.i%d\t%%P0, %%2", mnem, width);
12513 return templ;
12516 /* Return a string suitable for output of Neon immediate shift operation
12517 (VSHR or VSHL) MNEM. */
12519 char *
12520 neon_output_shift_immediate (const char *mnem, char sign, rtx *op2,
12521 machine_mode mode, int quad,
12522 bool isleftshift)
12524 int width, is_valid;
12525 static char templ[40];
12527 is_valid = neon_immediate_valid_for_shift (*op2, mode, op2, &width, isleftshift);
12528 gcc_assert (is_valid != 0);
12530 if (quad)
12531 sprintf (templ, "%s.%c%d\t%%q0, %%q1, %%2", mnem, sign, width);
12532 else
12533 sprintf (templ, "%s.%c%d\t%%P0, %%P1, %%2", mnem, sign, width);
12535 return templ;
12538 /* Output a sequence of pairwise operations to implement a reduction.
12539 NOTE: We do "too much work" here, because pairwise operations work on two
12540 registers-worth of operands in one go. Unfortunately we can't exploit those
12541 extra calculations to do the full operation in fewer steps, I don't think.
12542 Although all vector elements of the result but the first are ignored, we
12543 actually calculate the same result in each of the elements. An alternative
12544 such as initially loading a vector with zero to use as each of the second
12545 operands would use up an additional register and take an extra instruction,
12546 for no particular gain. */
12548 void
12549 neon_pairwise_reduce (rtx op0, rtx op1, machine_mode mode,
12550 rtx (*reduc) (rtx, rtx, rtx))
12552 machine_mode inner = GET_MODE_INNER (mode);
12553 unsigned int i, parts = GET_MODE_SIZE (mode) / GET_MODE_SIZE (inner);
12554 rtx tmpsum = op1;
12556 for (i = parts / 2; i >= 1; i /= 2)
12558 rtx dest = (i == 1) ? op0 : gen_reg_rtx (mode);
12559 emit_insn (reduc (dest, tmpsum, tmpsum));
12560 tmpsum = dest;
12564 /* If VALS is a vector constant that can be loaded into a register
12565 using VDUP, generate instructions to do so and return an RTX to
12566 assign to the register. Otherwise return NULL_RTX. */
12568 static rtx
12569 neon_vdup_constant (rtx vals)
12571 machine_mode mode = GET_MODE (vals);
12572 machine_mode inner_mode = GET_MODE_INNER (mode);
12573 int n_elts = GET_MODE_NUNITS (mode);
12574 bool all_same = true;
12575 rtx x;
12576 int i;
12578 if (GET_CODE (vals) != CONST_VECTOR || GET_MODE_SIZE (inner_mode) > 4)
12579 return NULL_RTX;
12581 for (i = 0; i < n_elts; ++i)
12583 x = XVECEXP (vals, 0, i);
12584 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
12585 all_same = false;
12588 if (!all_same)
12589 /* The elements are not all the same. We could handle repeating
12590 patterns of a mode larger than INNER_MODE here (e.g. int8x8_t
12591 {0, C, 0, C, 0, C, 0, C} which can be loaded using
12592 vdup.i16). */
12593 return NULL_RTX;
12595 /* We can load this constant by using VDUP and a constant in a
12596 single ARM register. This will be cheaper than a vector
12597 load. */
12599 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
12600 return gen_rtx_VEC_DUPLICATE (mode, x);
12603 /* Generate code to load VALS, which is a PARALLEL containing only
12604 constants (for vec_init) or CONST_VECTOR, efficiently into a
12605 register. Returns an RTX to copy into the register, or NULL_RTX
12606 for a PARALLEL that can not be converted into a CONST_VECTOR. */
12609 neon_make_constant (rtx vals)
12611 machine_mode mode = GET_MODE (vals);
12612 rtx target;
12613 rtx const_vec = NULL_RTX;
12614 int n_elts = GET_MODE_NUNITS (mode);
12615 int n_const = 0;
12616 int i;
12618 if (GET_CODE (vals) == CONST_VECTOR)
12619 const_vec = vals;
12620 else if (GET_CODE (vals) == PARALLEL)
12622 /* A CONST_VECTOR must contain only CONST_INTs and
12623 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
12624 Only store valid constants in a CONST_VECTOR. */
12625 for (i = 0; i < n_elts; ++i)
12627 rtx x = XVECEXP (vals, 0, i);
12628 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
12629 n_const++;
12631 if (n_const == n_elts)
12632 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
12634 else
12635 gcc_unreachable ();
12637 if (const_vec != NULL
12638 && neon_immediate_valid_for_move (const_vec, mode, NULL, NULL))
12639 /* Load using VMOV. On Cortex-A8 this takes one cycle. */
12640 return const_vec;
12641 else if ((target = neon_vdup_constant (vals)) != NULL_RTX)
12642 /* Loaded using VDUP. On Cortex-A8 the VDUP takes one NEON
12643 pipeline cycle; creating the constant takes one or two ARM
12644 pipeline cycles. */
12645 return target;
12646 else if (const_vec != NULL_RTX)
12647 /* Load from constant pool. On Cortex-A8 this takes two cycles
12648 (for either double or quad vectors). We can not take advantage
12649 of single-cycle VLD1 because we need a PC-relative addressing
12650 mode. */
12651 return const_vec;
12652 else
12653 /* A PARALLEL containing something not valid inside CONST_VECTOR.
12654 We can not construct an initializer. */
12655 return NULL_RTX;
12658 /* Initialize vector TARGET to VALS. */
12660 void
12661 neon_expand_vector_init (rtx target, rtx vals)
12663 machine_mode mode = GET_MODE (target);
12664 machine_mode inner_mode = GET_MODE_INNER (mode);
12665 int n_elts = GET_MODE_NUNITS (mode);
12666 int n_var = 0, one_var = -1;
12667 bool all_same = true;
12668 rtx x, mem;
12669 int i;
12671 for (i = 0; i < n_elts; ++i)
12673 x = XVECEXP (vals, 0, i);
12674 if (!CONSTANT_P (x))
12675 ++n_var, one_var = i;
12677 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
12678 all_same = false;
12681 if (n_var == 0)
12683 rtx constant = neon_make_constant (vals);
12684 if (constant != NULL_RTX)
12686 emit_move_insn (target, constant);
12687 return;
12691 /* Splat a single non-constant element if we can. */
12692 if (all_same && GET_MODE_SIZE (inner_mode) <= 4)
12694 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
12695 emit_insn (gen_rtx_SET (target, gen_rtx_VEC_DUPLICATE (mode, x)));
12696 return;
12699 /* One field is non-constant. Load constant then overwrite varying
12700 field. This is more efficient than using the stack. */
12701 if (n_var == 1)
12703 rtx copy = copy_rtx (vals);
12704 rtx index = GEN_INT (one_var);
12706 /* Load constant part of vector, substitute neighboring value for
12707 varying element. */
12708 XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, (one_var + 1) % n_elts);
12709 neon_expand_vector_init (target, copy);
12711 /* Insert variable. */
12712 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, one_var));
12713 switch (mode)
12715 case V8QImode:
12716 emit_insn (gen_neon_vset_lanev8qi (target, x, target, index));
12717 break;
12718 case V16QImode:
12719 emit_insn (gen_neon_vset_lanev16qi (target, x, target, index));
12720 break;
12721 case V4HImode:
12722 emit_insn (gen_neon_vset_lanev4hi (target, x, target, index));
12723 break;
12724 case V8HImode:
12725 emit_insn (gen_neon_vset_lanev8hi (target, x, target, index));
12726 break;
12727 case V2SImode:
12728 emit_insn (gen_neon_vset_lanev2si (target, x, target, index));
12729 break;
12730 case V4SImode:
12731 emit_insn (gen_neon_vset_lanev4si (target, x, target, index));
12732 break;
12733 case V2SFmode:
12734 emit_insn (gen_neon_vset_lanev2sf (target, x, target, index));
12735 break;
12736 case V4SFmode:
12737 emit_insn (gen_neon_vset_lanev4sf (target, x, target, index));
12738 break;
12739 case V2DImode:
12740 emit_insn (gen_neon_vset_lanev2di (target, x, target, index));
12741 break;
12742 default:
12743 gcc_unreachable ();
12745 return;
12748 /* Construct the vector in memory one field at a time
12749 and load the whole vector. */
12750 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
12751 for (i = 0; i < n_elts; i++)
12752 emit_move_insn (adjust_address_nv (mem, inner_mode,
12753 i * GET_MODE_SIZE (inner_mode)),
12754 XVECEXP (vals, 0, i));
12755 emit_move_insn (target, mem);
12758 /* Ensure OPERAND lies between LOW (inclusive) and HIGH (exclusive). Raise
12759 ERR if it doesn't. FIXME: NEON bounds checks occur late in compilation, so
12760 reported source locations are bogus. */
12762 static void
12763 bounds_check (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
12764 const char *err)
12766 HOST_WIDE_INT lane;
12768 gcc_assert (CONST_INT_P (operand));
12770 lane = INTVAL (operand);
12772 if (lane < low || lane >= high)
12773 error (err);
12776 /* Bounds-check lanes. */
12778 void
12779 neon_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
12781 bounds_check (operand, low, high, "lane out of range");
12784 /* Bounds-check constants. */
12786 void
12787 neon_const_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
12789 bounds_check (operand, low, high, "constant out of range");
12792 HOST_WIDE_INT
12793 neon_element_bits (machine_mode mode)
12795 if (mode == DImode)
12796 return GET_MODE_BITSIZE (mode);
12797 else
12798 return GET_MODE_BITSIZE (GET_MODE_INNER (mode));
12802 /* Predicates for `match_operand' and `match_operator'. */
12804 /* Return TRUE if OP is a valid coprocessor memory address pattern.
12805 WB is true if full writeback address modes are allowed and is false
12806 if limited writeback address modes (POST_INC and PRE_DEC) are
12807 allowed. */
12810 arm_coproc_mem_operand (rtx op, bool wb)
12812 rtx ind;
12814 /* Reject eliminable registers. */
12815 if (! (reload_in_progress || reload_completed || lra_in_progress)
12816 && ( reg_mentioned_p (frame_pointer_rtx, op)
12817 || reg_mentioned_p (arg_pointer_rtx, op)
12818 || reg_mentioned_p (virtual_incoming_args_rtx, op)
12819 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
12820 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
12821 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
12822 return FALSE;
12824 /* Constants are converted into offsets from labels. */
12825 if (!MEM_P (op))
12826 return FALSE;
12828 ind = XEXP (op, 0);
12830 if (reload_completed
12831 && (GET_CODE (ind) == LABEL_REF
12832 || (GET_CODE (ind) == CONST
12833 && GET_CODE (XEXP (ind, 0)) == PLUS
12834 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
12835 && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
12836 return TRUE;
12838 /* Match: (mem (reg)). */
12839 if (REG_P (ind))
12840 return arm_address_register_rtx_p (ind, 0);
12842 /* Autoincremment addressing modes. POST_INC and PRE_DEC are
12843 acceptable in any case (subject to verification by
12844 arm_address_register_rtx_p). We need WB to be true to accept
12845 PRE_INC and POST_DEC. */
12846 if (GET_CODE (ind) == POST_INC
12847 || GET_CODE (ind) == PRE_DEC
12848 || (wb
12849 && (GET_CODE (ind) == PRE_INC
12850 || GET_CODE (ind) == POST_DEC)))
12851 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
12853 if (wb
12854 && (GET_CODE (ind) == POST_MODIFY || GET_CODE (ind) == PRE_MODIFY)
12855 && arm_address_register_rtx_p (XEXP (ind, 0), 0)
12856 && GET_CODE (XEXP (ind, 1)) == PLUS
12857 && rtx_equal_p (XEXP (XEXP (ind, 1), 0), XEXP (ind, 0)))
12858 ind = XEXP (ind, 1);
12860 /* Match:
12861 (plus (reg)
12862 (const)). */
12863 if (GET_CODE (ind) == PLUS
12864 && REG_P (XEXP (ind, 0))
12865 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
12866 && CONST_INT_P (XEXP (ind, 1))
12867 && INTVAL (XEXP (ind, 1)) > -1024
12868 && INTVAL (XEXP (ind, 1)) < 1024
12869 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
12870 return TRUE;
12872 return FALSE;
12875 /* Return TRUE if OP is a memory operand which we can load or store a vector
12876 to/from. TYPE is one of the following values:
12877 0 - Vector load/stor (vldr)
12878 1 - Core registers (ldm)
12879 2 - Element/structure loads (vld1)
12882 neon_vector_mem_operand (rtx op, int type, bool strict)
12884 rtx ind;
12886 /* Reject eliminable registers. */
12887 if (! (reload_in_progress || reload_completed)
12888 && ( reg_mentioned_p (frame_pointer_rtx, op)
12889 || reg_mentioned_p (arg_pointer_rtx, op)
12890 || reg_mentioned_p (virtual_incoming_args_rtx, op)
12891 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
12892 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
12893 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
12894 return !strict;
12896 /* Constants are converted into offsets from labels. */
12897 if (!MEM_P (op))
12898 return FALSE;
12900 ind = XEXP (op, 0);
12902 if (reload_completed
12903 && (GET_CODE (ind) == LABEL_REF
12904 || (GET_CODE (ind) == CONST
12905 && GET_CODE (XEXP (ind, 0)) == PLUS
12906 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
12907 && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
12908 return TRUE;
12910 /* Match: (mem (reg)). */
12911 if (REG_P (ind))
12912 return arm_address_register_rtx_p (ind, 0);
12914 /* Allow post-increment with Neon registers. */
12915 if ((type != 1 && GET_CODE (ind) == POST_INC)
12916 || (type == 0 && GET_CODE (ind) == PRE_DEC))
12917 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
12919 /* Allow post-increment by register for VLDn */
12920 if (type == 2 && GET_CODE (ind) == POST_MODIFY
12921 && GET_CODE (XEXP (ind, 1)) == PLUS
12922 && REG_P (XEXP (XEXP (ind, 1), 1)))
12923 return true;
12925 /* Match:
12926 (plus (reg)
12927 (const)). */
12928 if (type == 0
12929 && GET_CODE (ind) == PLUS
12930 && REG_P (XEXP (ind, 0))
12931 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
12932 && CONST_INT_P (XEXP (ind, 1))
12933 && INTVAL (XEXP (ind, 1)) > -1024
12934 /* For quad modes, we restrict the constant offset to be slightly less
12935 than what the instruction format permits. We have no such constraint
12936 on double mode offsets. (This must match arm_legitimate_index_p.) */
12937 && (INTVAL (XEXP (ind, 1))
12938 < (VALID_NEON_QREG_MODE (GET_MODE (op))? 1016 : 1024))
12939 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
12940 return TRUE;
12942 return FALSE;
12945 /* Return TRUE if OP is a mem suitable for loading/storing a Neon struct
12946 type. */
12948 neon_struct_mem_operand (rtx op)
12950 rtx ind;
12952 /* Reject eliminable registers. */
12953 if (! (reload_in_progress || reload_completed)
12954 && ( reg_mentioned_p (frame_pointer_rtx, op)
12955 || reg_mentioned_p (arg_pointer_rtx, op)
12956 || reg_mentioned_p (virtual_incoming_args_rtx, op)
12957 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
12958 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
12959 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
12960 return FALSE;
12962 /* Constants are converted into offsets from labels. */
12963 if (!MEM_P (op))
12964 return FALSE;
12966 ind = XEXP (op, 0);
12968 if (reload_completed
12969 && (GET_CODE (ind) == LABEL_REF
12970 || (GET_CODE (ind) == CONST
12971 && GET_CODE (XEXP (ind, 0)) == PLUS
12972 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
12973 && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
12974 return TRUE;
12976 /* Match: (mem (reg)). */
12977 if (REG_P (ind))
12978 return arm_address_register_rtx_p (ind, 0);
12980 /* vldm/vstm allows POST_INC (ia) and PRE_DEC (db). */
12981 if (GET_CODE (ind) == POST_INC
12982 || GET_CODE (ind) == PRE_DEC)
12983 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
12985 return FALSE;
12988 /* Return true if X is a register that will be eliminated later on. */
12990 arm_eliminable_register (rtx x)
12992 return REG_P (x) && (REGNO (x) == FRAME_POINTER_REGNUM
12993 || REGNO (x) == ARG_POINTER_REGNUM
12994 || (REGNO (x) >= FIRST_VIRTUAL_REGISTER
12995 && REGNO (x) <= LAST_VIRTUAL_REGISTER));
12998 /* Return GENERAL_REGS if a scratch register required to reload x to/from
12999 coprocessor registers. Otherwise return NO_REGS. */
13001 enum reg_class
13002 coproc_secondary_reload_class (machine_mode mode, rtx x, bool wb)
13004 if (mode == HFmode)
13006 if (!TARGET_NEON_FP16)
13007 return GENERAL_REGS;
13008 if (s_register_operand (x, mode) || neon_vector_mem_operand (x, 2, true))
13009 return NO_REGS;
13010 return GENERAL_REGS;
13013 /* The neon move patterns handle all legitimate vector and struct
13014 addresses. */
13015 if (TARGET_NEON
13016 && (MEM_P (x) || GET_CODE (x) == CONST_VECTOR)
13017 && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
13018 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT
13019 || VALID_NEON_STRUCT_MODE (mode)))
13020 return NO_REGS;
13022 if (arm_coproc_mem_operand (x, wb) || s_register_operand (x, mode))
13023 return NO_REGS;
13025 return GENERAL_REGS;
13028 /* Values which must be returned in the most-significant end of the return
13029 register. */
13031 static bool
13032 arm_return_in_msb (const_tree valtype)
13034 return (TARGET_AAPCS_BASED
13035 && BYTES_BIG_ENDIAN
13036 && (AGGREGATE_TYPE_P (valtype)
13037 || TREE_CODE (valtype) == COMPLEX_TYPE
13038 || FIXED_POINT_TYPE_P (valtype)));
13041 /* Return TRUE if X references a SYMBOL_REF. */
13043 symbol_mentioned_p (rtx x)
13045 const char * fmt;
13046 int i;
13048 if (GET_CODE (x) == SYMBOL_REF)
13049 return 1;
13051 /* UNSPEC_TLS entries for a symbol include the SYMBOL_REF, but they
13052 are constant offsets, not symbols. */
13053 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
13054 return 0;
13056 fmt = GET_RTX_FORMAT (GET_CODE (x));
13058 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
13060 if (fmt[i] == 'E')
13062 int j;
13064 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
13065 if (symbol_mentioned_p (XVECEXP (x, i, j)))
13066 return 1;
13068 else if (fmt[i] == 'e' && symbol_mentioned_p (XEXP (x, i)))
13069 return 1;
13072 return 0;
13075 /* Return TRUE if X references a LABEL_REF. */
13077 label_mentioned_p (rtx x)
13079 const char * fmt;
13080 int i;
13082 if (GET_CODE (x) == LABEL_REF)
13083 return 1;
13085 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the referencing
13086 instruction, but they are constant offsets, not symbols. */
13087 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
13088 return 0;
13090 fmt = GET_RTX_FORMAT (GET_CODE (x));
13091 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
13093 if (fmt[i] == 'E')
13095 int j;
13097 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
13098 if (label_mentioned_p (XVECEXP (x, i, j)))
13099 return 1;
13101 else if (fmt[i] == 'e' && label_mentioned_p (XEXP (x, i)))
13102 return 1;
13105 return 0;
13109 tls_mentioned_p (rtx x)
13111 switch (GET_CODE (x))
13113 case CONST:
13114 return tls_mentioned_p (XEXP (x, 0));
13116 case UNSPEC:
13117 if (XINT (x, 1) == UNSPEC_TLS)
13118 return 1;
13120 default:
13121 return 0;
13125 /* Must not copy any rtx that uses a pc-relative address. */
13127 static bool
13128 arm_cannot_copy_insn_p (rtx_insn *insn)
13130 /* The tls call insn cannot be copied, as it is paired with a data
13131 word. */
13132 if (recog_memoized (insn) == CODE_FOR_tlscall)
13133 return true;
13135 subrtx_iterator::array_type array;
13136 FOR_EACH_SUBRTX (iter, array, PATTERN (insn), ALL)
13138 const_rtx x = *iter;
13139 if (GET_CODE (x) == UNSPEC
13140 && (XINT (x, 1) == UNSPEC_PIC_BASE
13141 || XINT (x, 1) == UNSPEC_PIC_UNIFIED))
13142 return true;
13144 return false;
13147 enum rtx_code
13148 minmax_code (rtx x)
13150 enum rtx_code code = GET_CODE (x);
13152 switch (code)
13154 case SMAX:
13155 return GE;
13156 case SMIN:
13157 return LE;
13158 case UMIN:
13159 return LEU;
13160 case UMAX:
13161 return GEU;
13162 default:
13163 gcc_unreachable ();
13167 /* Match pair of min/max operators that can be implemented via usat/ssat. */
13169 bool
13170 arm_sat_operator_match (rtx lo_bound, rtx hi_bound,
13171 int *mask, bool *signed_sat)
13173 /* The high bound must be a power of two minus one. */
13174 int log = exact_log2 (INTVAL (hi_bound) + 1);
13175 if (log == -1)
13176 return false;
13178 /* The low bound is either zero (for usat) or one less than the
13179 negation of the high bound (for ssat). */
13180 if (INTVAL (lo_bound) == 0)
13182 if (mask)
13183 *mask = log;
13184 if (signed_sat)
13185 *signed_sat = false;
13187 return true;
13190 if (INTVAL (lo_bound) == -INTVAL (hi_bound) - 1)
13192 if (mask)
13193 *mask = log + 1;
13194 if (signed_sat)
13195 *signed_sat = true;
13197 return true;
13200 return false;
13203 /* Return 1 if memory locations are adjacent. */
13205 adjacent_mem_locations (rtx a, rtx b)
13207 /* We don't guarantee to preserve the order of these memory refs. */
13208 if (volatile_refs_p (a) || volatile_refs_p (b))
13209 return 0;
13211 if ((REG_P (XEXP (a, 0))
13212 || (GET_CODE (XEXP (a, 0)) == PLUS
13213 && CONST_INT_P (XEXP (XEXP (a, 0), 1))))
13214 && (REG_P (XEXP (b, 0))
13215 || (GET_CODE (XEXP (b, 0)) == PLUS
13216 && CONST_INT_P (XEXP (XEXP (b, 0), 1)))))
13218 HOST_WIDE_INT val0 = 0, val1 = 0;
13219 rtx reg0, reg1;
13220 int val_diff;
13222 if (GET_CODE (XEXP (a, 0)) == PLUS)
13224 reg0 = XEXP (XEXP (a, 0), 0);
13225 val0 = INTVAL (XEXP (XEXP (a, 0), 1));
13227 else
13228 reg0 = XEXP (a, 0);
13230 if (GET_CODE (XEXP (b, 0)) == PLUS)
13232 reg1 = XEXP (XEXP (b, 0), 0);
13233 val1 = INTVAL (XEXP (XEXP (b, 0), 1));
13235 else
13236 reg1 = XEXP (b, 0);
13238 /* Don't accept any offset that will require multiple
13239 instructions to handle, since this would cause the
13240 arith_adjacentmem pattern to output an overlong sequence. */
13241 if (!const_ok_for_op (val0, PLUS) || !const_ok_for_op (val1, PLUS))
13242 return 0;
13244 /* Don't allow an eliminable register: register elimination can make
13245 the offset too large. */
13246 if (arm_eliminable_register (reg0))
13247 return 0;
13249 val_diff = val1 - val0;
13251 if (arm_ld_sched)
13253 /* If the target has load delay slots, then there's no benefit
13254 to using an ldm instruction unless the offset is zero and
13255 we are optimizing for size. */
13256 return (optimize_size && (REGNO (reg0) == REGNO (reg1))
13257 && (val0 == 0 || val1 == 0 || val0 == 4 || val1 == 4)
13258 && (val_diff == 4 || val_diff == -4));
13261 return ((REGNO (reg0) == REGNO (reg1))
13262 && (val_diff == 4 || val_diff == -4));
13265 return 0;
13268 /* Return true if OP is a valid load or store multiple operation. LOAD is true
13269 for load operations, false for store operations. CONSECUTIVE is true
13270 if the register numbers in the operation must be consecutive in the register
13271 bank. RETURN_PC is true if value is to be loaded in PC.
13272 The pattern we are trying to match for load is:
13273 [(SET (R_d0) (MEM (PLUS (addr) (offset))))
13274 (SET (R_d1) (MEM (PLUS (addr) (offset + <reg_increment>))))
13277 (SET (R_dn) (MEM (PLUS (addr) (offset + n * <reg_increment>))))
13279 where
13280 1. If offset is 0, first insn should be (SET (R_d0) (MEM (src_addr))).
13281 2. REGNO (R_d0) < REGNO (R_d1) < ... < REGNO (R_dn).
13282 3. If consecutive is TRUE, then for kth register being loaded,
13283 REGNO (R_dk) = REGNO (R_d0) + k.
13284 The pattern for store is similar. */
13285 bool
13286 ldm_stm_operation_p (rtx op, bool load, machine_mode mode,
13287 bool consecutive, bool return_pc)
13289 HOST_WIDE_INT count = XVECLEN (op, 0);
13290 rtx reg, mem, addr;
13291 unsigned regno;
13292 unsigned first_regno;
13293 HOST_WIDE_INT i = 1, base = 0, offset = 0;
13294 rtx elt;
13295 bool addr_reg_in_reglist = false;
13296 bool update = false;
13297 int reg_increment;
13298 int offset_adj;
13299 int regs_per_val;
13301 /* If not in SImode, then registers must be consecutive
13302 (e.g., VLDM instructions for DFmode). */
13303 gcc_assert ((mode == SImode) || consecutive);
13304 /* Setting return_pc for stores is illegal. */
13305 gcc_assert (!return_pc || load);
13307 /* Set up the increments and the regs per val based on the mode. */
13308 reg_increment = GET_MODE_SIZE (mode);
13309 regs_per_val = reg_increment / 4;
13310 offset_adj = return_pc ? 1 : 0;
13312 if (count <= 1
13313 || GET_CODE (XVECEXP (op, 0, offset_adj)) != SET
13314 || (load && !REG_P (SET_DEST (XVECEXP (op, 0, offset_adj)))))
13315 return false;
13317 /* Check if this is a write-back. */
13318 elt = XVECEXP (op, 0, offset_adj);
13319 if (GET_CODE (SET_SRC (elt)) == PLUS)
13321 i++;
13322 base = 1;
13323 update = true;
13325 /* The offset adjustment must be the number of registers being
13326 popped times the size of a single register. */
13327 if (!REG_P (SET_DEST (elt))
13328 || !REG_P (XEXP (SET_SRC (elt), 0))
13329 || (REGNO (SET_DEST (elt)) != REGNO (XEXP (SET_SRC (elt), 0)))
13330 || !CONST_INT_P (XEXP (SET_SRC (elt), 1))
13331 || INTVAL (XEXP (SET_SRC (elt), 1)) !=
13332 ((count - 1 - offset_adj) * reg_increment))
13333 return false;
13336 i = i + offset_adj;
13337 base = base + offset_adj;
13338 /* Perform a quick check so we don't blow up below. If only one reg is loaded,
13339 success depends on the type: VLDM can do just one reg,
13340 LDM must do at least two. */
13341 if ((count <= i) && (mode == SImode))
13342 return false;
13344 elt = XVECEXP (op, 0, i - 1);
13345 if (GET_CODE (elt) != SET)
13346 return false;
13348 if (load)
13350 reg = SET_DEST (elt);
13351 mem = SET_SRC (elt);
13353 else
13355 reg = SET_SRC (elt);
13356 mem = SET_DEST (elt);
13359 if (!REG_P (reg) || !MEM_P (mem))
13360 return false;
13362 regno = REGNO (reg);
13363 first_regno = regno;
13364 addr = XEXP (mem, 0);
13365 if (GET_CODE (addr) == PLUS)
13367 if (!CONST_INT_P (XEXP (addr, 1)))
13368 return false;
13370 offset = INTVAL (XEXP (addr, 1));
13371 addr = XEXP (addr, 0);
13374 if (!REG_P (addr))
13375 return false;
13377 /* Don't allow SP to be loaded unless it is also the base register. It
13378 guarantees that SP is reset correctly when an LDM instruction
13379 is interrupted. Otherwise, we might end up with a corrupt stack. */
13380 if (load && (REGNO (reg) == SP_REGNUM) && (REGNO (addr) != SP_REGNUM))
13381 return false;
13383 for (; i < count; i++)
13385 elt = XVECEXP (op, 0, i);
13386 if (GET_CODE (elt) != SET)
13387 return false;
13389 if (load)
13391 reg = SET_DEST (elt);
13392 mem = SET_SRC (elt);
13394 else
13396 reg = SET_SRC (elt);
13397 mem = SET_DEST (elt);
13400 if (!REG_P (reg)
13401 || GET_MODE (reg) != mode
13402 || REGNO (reg) <= regno
13403 || (consecutive
13404 && (REGNO (reg) !=
13405 (unsigned int) (first_regno + regs_per_val * (i - base))))
13406 /* Don't allow SP to be loaded unless it is also the base register. It
13407 guarantees that SP is reset correctly when an LDM instruction
13408 is interrupted. Otherwise, we might end up with a corrupt stack. */
13409 || (load && (REGNO (reg) == SP_REGNUM) && (REGNO (addr) != SP_REGNUM))
13410 || !MEM_P (mem)
13411 || GET_MODE (mem) != mode
13412 || ((GET_CODE (XEXP (mem, 0)) != PLUS
13413 || !rtx_equal_p (XEXP (XEXP (mem, 0), 0), addr)
13414 || !CONST_INT_P (XEXP (XEXP (mem, 0), 1))
13415 || (INTVAL (XEXP (XEXP (mem, 0), 1)) !=
13416 offset + (i - base) * reg_increment))
13417 && (!REG_P (XEXP (mem, 0))
13418 || offset + (i - base) * reg_increment != 0)))
13419 return false;
13421 regno = REGNO (reg);
13422 if (regno == REGNO (addr))
13423 addr_reg_in_reglist = true;
13426 if (load)
13428 if (update && addr_reg_in_reglist)
13429 return false;
13431 /* For Thumb-1, address register is always modified - either by write-back
13432 or by explicit load. If the pattern does not describe an update,
13433 then the address register must be in the list of loaded registers. */
13434 if (TARGET_THUMB1)
13435 return update || addr_reg_in_reglist;
13438 return true;
13441 /* Return true iff it would be profitable to turn a sequence of NOPS loads
13442 or stores (depending on IS_STORE) into a load-multiple or store-multiple
13443 instruction. ADD_OFFSET is nonzero if the base address register needs
13444 to be modified with an add instruction before we can use it. */
13446 static bool
13447 multiple_operation_profitable_p (bool is_store ATTRIBUTE_UNUSED,
13448 int nops, HOST_WIDE_INT add_offset)
13450 /* For ARM8,9 & StrongARM, 2 ldr instructions are faster than an ldm
13451 if the offset isn't small enough. The reason 2 ldrs are faster
13452 is because these ARMs are able to do more than one cache access
13453 in a single cycle. The ARM9 and StrongARM have Harvard caches,
13454 whilst the ARM8 has a double bandwidth cache. This means that
13455 these cores can do both an instruction fetch and a data fetch in
13456 a single cycle, so the trick of calculating the address into a
13457 scratch register (one of the result regs) and then doing a load
13458 multiple actually becomes slower (and no smaller in code size).
13459 That is the transformation
13461 ldr rd1, [rbase + offset]
13462 ldr rd2, [rbase + offset + 4]
13466 add rd1, rbase, offset
13467 ldmia rd1, {rd1, rd2}
13469 produces worse code -- '3 cycles + any stalls on rd2' instead of
13470 '2 cycles + any stalls on rd2'. On ARMs with only one cache
13471 access per cycle, the first sequence could never complete in less
13472 than 6 cycles, whereas the ldm sequence would only take 5 and
13473 would make better use of sequential accesses if not hitting the
13474 cache.
13476 We cheat here and test 'arm_ld_sched' which we currently know to
13477 only be true for the ARM8, ARM9 and StrongARM. If this ever
13478 changes, then the test below needs to be reworked. */
13479 if (nops == 2 && arm_ld_sched && add_offset != 0)
13480 return false;
13482 /* XScale has load-store double instructions, but they have stricter
13483 alignment requirements than load-store multiple, so we cannot
13484 use them.
13486 For XScale ldm requires 2 + NREGS cycles to complete and blocks
13487 the pipeline until completion.
13489 NREGS CYCLES
13495 An ldr instruction takes 1-3 cycles, but does not block the
13496 pipeline.
13498 NREGS CYCLES
13499 1 1-3
13500 2 2-6
13501 3 3-9
13502 4 4-12
13504 Best case ldr will always win. However, the more ldr instructions
13505 we issue, the less likely we are to be able to schedule them well.
13506 Using ldr instructions also increases code size.
13508 As a compromise, we use ldr for counts of 1 or 2 regs, and ldm
13509 for counts of 3 or 4 regs. */
13510 if (nops <= 2 && arm_tune_xscale && !optimize_size)
13511 return false;
13512 return true;
13515 /* Subroutine of load_multiple_sequence and store_multiple_sequence.
13516 Given an array of UNSORTED_OFFSETS, of which there are NOPS, compute
13517 an array ORDER which describes the sequence to use when accessing the
13518 offsets that produces an ascending order. In this sequence, each
13519 offset must be larger by exactly 4 than the previous one. ORDER[0]
13520 must have been filled in with the lowest offset by the caller.
13521 If UNSORTED_REGS is nonnull, it is an array of register numbers that
13522 we use to verify that ORDER produces an ascending order of registers.
13523 Return true if it was possible to construct such an order, false if
13524 not. */
13526 static bool
13527 compute_offset_order (int nops, HOST_WIDE_INT *unsorted_offsets, int *order,
13528 int *unsorted_regs)
13530 int i;
13531 for (i = 1; i < nops; i++)
13533 int j;
13535 order[i] = order[i - 1];
13536 for (j = 0; j < nops; j++)
13537 if (unsorted_offsets[j] == unsorted_offsets[order[i - 1]] + 4)
13539 /* We must find exactly one offset that is higher than the
13540 previous one by 4. */
13541 if (order[i] != order[i - 1])
13542 return false;
13543 order[i] = j;
13545 if (order[i] == order[i - 1])
13546 return false;
13547 /* The register numbers must be ascending. */
13548 if (unsorted_regs != NULL
13549 && unsorted_regs[order[i]] <= unsorted_regs[order[i - 1]])
13550 return false;
13552 return true;
13555 /* Used to determine in a peephole whether a sequence of load
13556 instructions can be changed into a load-multiple instruction.
13557 NOPS is the number of separate load instructions we are examining. The
13558 first NOPS entries in OPERANDS are the destination registers, the
13559 next NOPS entries are memory operands. If this function is
13560 successful, *BASE is set to the common base register of the memory
13561 accesses; *LOAD_OFFSET is set to the first memory location's offset
13562 from that base register.
13563 REGS is an array filled in with the destination register numbers.
13564 SAVED_ORDER (if nonnull), is an array filled in with an order that maps
13565 insn numbers to an ascending order of stores. If CHECK_REGS is true,
13566 the sequence of registers in REGS matches the loads from ascending memory
13567 locations, and the function verifies that the register numbers are
13568 themselves ascending. If CHECK_REGS is false, the register numbers
13569 are stored in the order they are found in the operands. */
13570 static int
13571 load_multiple_sequence (rtx *operands, int nops, int *regs, int *saved_order,
13572 int *base, HOST_WIDE_INT *load_offset, bool check_regs)
13574 int unsorted_regs[MAX_LDM_STM_OPS];
13575 HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
13576 int order[MAX_LDM_STM_OPS];
13577 rtx base_reg_rtx = NULL;
13578 int base_reg = -1;
13579 int i, ldm_case;
13581 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
13582 easily extended if required. */
13583 gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
13585 memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
13587 /* Loop over the operands and check that the memory references are
13588 suitable (i.e. immediate offsets from the same base register). At
13589 the same time, extract the target register, and the memory
13590 offsets. */
13591 for (i = 0; i < nops; i++)
13593 rtx reg;
13594 rtx offset;
13596 /* Convert a subreg of a mem into the mem itself. */
13597 if (GET_CODE (operands[nops + i]) == SUBREG)
13598 operands[nops + i] = alter_subreg (operands + (nops + i), true);
13600 gcc_assert (MEM_P (operands[nops + i]));
13602 /* Don't reorder volatile memory references; it doesn't seem worth
13603 looking for the case where the order is ok anyway. */
13604 if (MEM_VOLATILE_P (operands[nops + i]))
13605 return 0;
13607 offset = const0_rtx;
13609 if ((REG_P (reg = XEXP (operands[nops + i], 0))
13610 || (GET_CODE (reg) == SUBREG
13611 && REG_P (reg = SUBREG_REG (reg))))
13612 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
13613 && ((REG_P (reg = XEXP (XEXP (operands[nops + i], 0), 0)))
13614 || (GET_CODE (reg) == SUBREG
13615 && REG_P (reg = SUBREG_REG (reg))))
13616 && (CONST_INT_P (offset
13617 = XEXP (XEXP (operands[nops + i], 0), 1)))))
13619 if (i == 0)
13621 base_reg = REGNO (reg);
13622 base_reg_rtx = reg;
13623 if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
13624 return 0;
13626 else if (base_reg != (int) REGNO (reg))
13627 /* Not addressed from the same base register. */
13628 return 0;
13630 unsorted_regs[i] = (REG_P (operands[i])
13631 ? REGNO (operands[i])
13632 : REGNO (SUBREG_REG (operands[i])));
13634 /* If it isn't an integer register, or if it overwrites the
13635 base register but isn't the last insn in the list, then
13636 we can't do this. */
13637 if (unsorted_regs[i] < 0
13638 || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
13639 || unsorted_regs[i] > 14
13640 || (i != nops - 1 && unsorted_regs[i] == base_reg))
13641 return 0;
13643 /* Don't allow SP to be loaded unless it is also the base
13644 register. It guarantees that SP is reset correctly when
13645 an LDM instruction is interrupted. Otherwise, we might
13646 end up with a corrupt stack. */
13647 if (unsorted_regs[i] == SP_REGNUM && base_reg != SP_REGNUM)
13648 return 0;
13650 unsorted_offsets[i] = INTVAL (offset);
13651 if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
13652 order[0] = i;
13654 else
13655 /* Not a suitable memory address. */
13656 return 0;
13659 /* All the useful information has now been extracted from the
13660 operands into unsorted_regs and unsorted_offsets; additionally,
13661 order[0] has been set to the lowest offset in the list. Sort
13662 the offsets into order, verifying that they are adjacent, and
13663 check that the register numbers are ascending. */
13664 if (!compute_offset_order (nops, unsorted_offsets, order,
13665 check_regs ? unsorted_regs : NULL))
13666 return 0;
13668 if (saved_order)
13669 memcpy (saved_order, order, sizeof order);
13671 if (base)
13673 *base = base_reg;
13675 for (i = 0; i < nops; i++)
13676 regs[i] = unsorted_regs[check_regs ? order[i] : i];
13678 *load_offset = unsorted_offsets[order[0]];
13681 if (TARGET_THUMB1
13682 && !peep2_reg_dead_p (nops, base_reg_rtx))
13683 return 0;
13685 if (unsorted_offsets[order[0]] == 0)
13686 ldm_case = 1; /* ldmia */
13687 else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
13688 ldm_case = 2; /* ldmib */
13689 else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
13690 ldm_case = 3; /* ldmda */
13691 else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
13692 ldm_case = 4; /* ldmdb */
13693 else if (const_ok_for_arm (unsorted_offsets[order[0]])
13694 || const_ok_for_arm (-unsorted_offsets[order[0]]))
13695 ldm_case = 5;
13696 else
13697 return 0;
13699 if (!multiple_operation_profitable_p (false, nops,
13700 ldm_case == 5
13701 ? unsorted_offsets[order[0]] : 0))
13702 return 0;
13704 return ldm_case;
13707 /* Used to determine in a peephole whether a sequence of store instructions can
13708 be changed into a store-multiple instruction.
13709 NOPS is the number of separate store instructions we are examining.
13710 NOPS_TOTAL is the total number of instructions recognized by the peephole
13711 pattern.
13712 The first NOPS entries in OPERANDS are the source registers, the next
13713 NOPS entries are memory operands. If this function is successful, *BASE is
13714 set to the common base register of the memory accesses; *LOAD_OFFSET is set
13715 to the first memory location's offset from that base register. REGS is an
13716 array filled in with the source register numbers, REG_RTXS (if nonnull) is
13717 likewise filled with the corresponding rtx's.
13718 SAVED_ORDER (if nonnull), is an array filled in with an order that maps insn
13719 numbers to an ascending order of stores.
13720 If CHECK_REGS is true, the sequence of registers in *REGS matches the stores
13721 from ascending memory locations, and the function verifies that the register
13722 numbers are themselves ascending. If CHECK_REGS is false, the register
13723 numbers are stored in the order they are found in the operands. */
13724 static int
13725 store_multiple_sequence (rtx *operands, int nops, int nops_total,
13726 int *regs, rtx *reg_rtxs, int *saved_order, int *base,
13727 HOST_WIDE_INT *load_offset, bool check_regs)
13729 int unsorted_regs[MAX_LDM_STM_OPS];
13730 rtx unsorted_reg_rtxs[MAX_LDM_STM_OPS];
13731 HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
13732 int order[MAX_LDM_STM_OPS];
13733 int base_reg = -1;
13734 rtx base_reg_rtx = NULL;
13735 int i, stm_case;
13737 /* Write back of base register is currently only supported for Thumb 1. */
13738 int base_writeback = TARGET_THUMB1;
13740 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
13741 easily extended if required. */
13742 gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
13744 memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
13746 /* Loop over the operands and check that the memory references are
13747 suitable (i.e. immediate offsets from the same base register). At
13748 the same time, extract the target register, and the memory
13749 offsets. */
13750 for (i = 0; i < nops; i++)
13752 rtx reg;
13753 rtx offset;
13755 /* Convert a subreg of a mem into the mem itself. */
13756 if (GET_CODE (operands[nops + i]) == SUBREG)
13757 operands[nops + i] = alter_subreg (operands + (nops + i), true);
13759 gcc_assert (MEM_P (operands[nops + i]));
13761 /* Don't reorder volatile memory references; it doesn't seem worth
13762 looking for the case where the order is ok anyway. */
13763 if (MEM_VOLATILE_P (operands[nops + i]))
13764 return 0;
13766 offset = const0_rtx;
13768 if ((REG_P (reg = XEXP (operands[nops + i], 0))
13769 || (GET_CODE (reg) == SUBREG
13770 && REG_P (reg = SUBREG_REG (reg))))
13771 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
13772 && ((REG_P (reg = XEXP (XEXP (operands[nops + i], 0), 0)))
13773 || (GET_CODE (reg) == SUBREG
13774 && REG_P (reg = SUBREG_REG (reg))))
13775 && (CONST_INT_P (offset
13776 = XEXP (XEXP (operands[nops + i], 0), 1)))))
13778 unsorted_reg_rtxs[i] = (REG_P (operands[i])
13779 ? operands[i] : SUBREG_REG (operands[i]));
13780 unsorted_regs[i] = REGNO (unsorted_reg_rtxs[i]);
13782 if (i == 0)
13784 base_reg = REGNO (reg);
13785 base_reg_rtx = reg;
13786 if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
13787 return 0;
13789 else if (base_reg != (int) REGNO (reg))
13790 /* Not addressed from the same base register. */
13791 return 0;
13793 /* If it isn't an integer register, then we can't do this. */
13794 if (unsorted_regs[i] < 0
13795 || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
13796 /* The effects are unpredictable if the base register is
13797 both updated and stored. */
13798 || (base_writeback && unsorted_regs[i] == base_reg)
13799 || (TARGET_THUMB2 && unsorted_regs[i] == SP_REGNUM)
13800 || unsorted_regs[i] > 14)
13801 return 0;
13803 unsorted_offsets[i] = INTVAL (offset);
13804 if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
13805 order[0] = i;
13807 else
13808 /* Not a suitable memory address. */
13809 return 0;
13812 /* All the useful information has now been extracted from the
13813 operands into unsorted_regs and unsorted_offsets; additionally,
13814 order[0] has been set to the lowest offset in the list. Sort
13815 the offsets into order, verifying that they are adjacent, and
13816 check that the register numbers are ascending. */
13817 if (!compute_offset_order (nops, unsorted_offsets, order,
13818 check_regs ? unsorted_regs : NULL))
13819 return 0;
13821 if (saved_order)
13822 memcpy (saved_order, order, sizeof order);
13824 if (base)
13826 *base = base_reg;
13828 for (i = 0; i < nops; i++)
13830 regs[i] = unsorted_regs[check_regs ? order[i] : i];
13831 if (reg_rtxs)
13832 reg_rtxs[i] = unsorted_reg_rtxs[check_regs ? order[i] : i];
13835 *load_offset = unsorted_offsets[order[0]];
13838 if (TARGET_THUMB1
13839 && !peep2_reg_dead_p (nops_total, base_reg_rtx))
13840 return 0;
13842 if (unsorted_offsets[order[0]] == 0)
13843 stm_case = 1; /* stmia */
13844 else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
13845 stm_case = 2; /* stmib */
13846 else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
13847 stm_case = 3; /* stmda */
13848 else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
13849 stm_case = 4; /* stmdb */
13850 else
13851 return 0;
13853 if (!multiple_operation_profitable_p (false, nops, 0))
13854 return 0;
13856 return stm_case;
13859 /* Routines for use in generating RTL. */
13861 /* Generate a load-multiple instruction. COUNT is the number of loads in
13862 the instruction; REGS and MEMS are arrays containing the operands.
13863 BASEREG is the base register to be used in addressing the memory operands.
13864 WBACK_OFFSET is nonzero if the instruction should update the base
13865 register. */
13867 static rtx
13868 arm_gen_load_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
13869 HOST_WIDE_INT wback_offset)
13871 int i = 0, j;
13872 rtx result;
13874 if (!multiple_operation_profitable_p (false, count, 0))
13876 rtx seq;
13878 start_sequence ();
13880 for (i = 0; i < count; i++)
13881 emit_move_insn (gen_rtx_REG (SImode, regs[i]), mems[i]);
13883 if (wback_offset != 0)
13884 emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset));
13886 seq = get_insns ();
13887 end_sequence ();
13889 return seq;
13892 result = gen_rtx_PARALLEL (VOIDmode,
13893 rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
13894 if (wback_offset != 0)
13896 XVECEXP (result, 0, 0)
13897 = gen_rtx_SET (basereg, plus_constant (Pmode, basereg, wback_offset));
13898 i = 1;
13899 count++;
13902 for (j = 0; i < count; i++, j++)
13903 XVECEXP (result, 0, i)
13904 = gen_rtx_SET (gen_rtx_REG (SImode, regs[j]), mems[j]);
13906 return result;
13909 /* Generate a store-multiple instruction. COUNT is the number of stores in
13910 the instruction; REGS and MEMS are arrays containing the operands.
13911 BASEREG is the base register to be used in addressing the memory operands.
13912 WBACK_OFFSET is nonzero if the instruction should update the base
13913 register. */
13915 static rtx
13916 arm_gen_store_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
13917 HOST_WIDE_INT wback_offset)
13919 int i = 0, j;
13920 rtx result;
13922 if (GET_CODE (basereg) == PLUS)
13923 basereg = XEXP (basereg, 0);
13925 if (!multiple_operation_profitable_p (false, count, 0))
13927 rtx seq;
13929 start_sequence ();
13931 for (i = 0; i < count; i++)
13932 emit_move_insn (mems[i], gen_rtx_REG (SImode, regs[i]));
13934 if (wback_offset != 0)
13935 emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset));
13937 seq = get_insns ();
13938 end_sequence ();
13940 return seq;
13943 result = gen_rtx_PARALLEL (VOIDmode,
13944 rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
13945 if (wback_offset != 0)
13947 XVECEXP (result, 0, 0)
13948 = gen_rtx_SET (basereg, plus_constant (Pmode, basereg, wback_offset));
13949 i = 1;
13950 count++;
13953 for (j = 0; i < count; i++, j++)
13954 XVECEXP (result, 0, i)
13955 = gen_rtx_SET (mems[j], gen_rtx_REG (SImode, regs[j]));
13957 return result;
13960 /* Generate either a load-multiple or a store-multiple instruction. This
13961 function can be used in situations where we can start with a single MEM
13962 rtx and adjust its address upwards.
13963 COUNT is the number of operations in the instruction, not counting a
13964 possible update of the base register. REGS is an array containing the
13965 register operands.
13966 BASEREG is the base register to be used in addressing the memory operands,
13967 which are constructed from BASEMEM.
13968 WRITE_BACK specifies whether the generated instruction should include an
13969 update of the base register.
13970 OFFSETP is used to pass an offset to and from this function; this offset
13971 is not used when constructing the address (instead BASEMEM should have an
13972 appropriate offset in its address), it is used only for setting
13973 MEM_OFFSET. It is updated only if WRITE_BACK is true.*/
13975 static rtx
13976 arm_gen_multiple_op (bool is_load, int *regs, int count, rtx basereg,
13977 bool write_back, rtx basemem, HOST_WIDE_INT *offsetp)
13979 rtx mems[MAX_LDM_STM_OPS];
13980 HOST_WIDE_INT offset = *offsetp;
13981 int i;
13983 gcc_assert (count <= MAX_LDM_STM_OPS);
13985 if (GET_CODE (basereg) == PLUS)
13986 basereg = XEXP (basereg, 0);
13988 for (i = 0; i < count; i++)
13990 rtx addr = plus_constant (Pmode, basereg, i * 4);
13991 mems[i] = adjust_automodify_address_nv (basemem, SImode, addr, offset);
13992 offset += 4;
13995 if (write_back)
13996 *offsetp = offset;
13998 if (is_load)
13999 return arm_gen_load_multiple_1 (count, regs, mems, basereg,
14000 write_back ? 4 * count : 0);
14001 else
14002 return arm_gen_store_multiple_1 (count, regs, mems, basereg,
14003 write_back ? 4 * count : 0);
14007 arm_gen_load_multiple (int *regs, int count, rtx basereg, int write_back,
14008 rtx basemem, HOST_WIDE_INT *offsetp)
14010 return arm_gen_multiple_op (TRUE, regs, count, basereg, write_back, basemem,
14011 offsetp);
14015 arm_gen_store_multiple (int *regs, int count, rtx basereg, int write_back,
14016 rtx basemem, HOST_WIDE_INT *offsetp)
14018 return arm_gen_multiple_op (FALSE, regs, count, basereg, write_back, basemem,
14019 offsetp);
14022 /* Called from a peephole2 expander to turn a sequence of loads into an
14023 LDM instruction. OPERANDS are the operands found by the peephole matcher;
14024 NOPS indicates how many separate loads we are trying to combine. SORT_REGS
14025 is true if we can reorder the registers because they are used commutatively
14026 subsequently.
14027 Returns true iff we could generate a new instruction. */
14029 bool
14030 gen_ldm_seq (rtx *operands, int nops, bool sort_regs)
14032 int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
14033 rtx mems[MAX_LDM_STM_OPS];
14034 int i, j, base_reg;
14035 rtx base_reg_rtx;
14036 HOST_WIDE_INT offset;
14037 int write_back = FALSE;
14038 int ldm_case;
14039 rtx addr;
14041 ldm_case = load_multiple_sequence (operands, nops, regs, mem_order,
14042 &base_reg, &offset, !sort_regs);
14044 if (ldm_case == 0)
14045 return false;
14047 if (sort_regs)
14048 for (i = 0; i < nops - 1; i++)
14049 for (j = i + 1; j < nops; j++)
14050 if (regs[i] > regs[j])
14052 int t = regs[i];
14053 regs[i] = regs[j];
14054 regs[j] = t;
14056 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
14058 if (TARGET_THUMB1)
14060 gcc_assert (peep2_reg_dead_p (nops, base_reg_rtx));
14061 gcc_assert (ldm_case == 1 || ldm_case == 5);
14062 write_back = TRUE;
14065 if (ldm_case == 5)
14067 rtx newbase = TARGET_THUMB1 ? base_reg_rtx : gen_rtx_REG (SImode, regs[0]);
14068 emit_insn (gen_addsi3 (newbase, base_reg_rtx, GEN_INT (offset)));
14069 offset = 0;
14070 if (!TARGET_THUMB1)
14072 base_reg = regs[0];
14073 base_reg_rtx = newbase;
14077 for (i = 0; i < nops; i++)
14079 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
14080 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
14081 SImode, addr, 0);
14083 emit_insn (arm_gen_load_multiple_1 (nops, regs, mems, base_reg_rtx,
14084 write_back ? offset + i * 4 : 0));
14085 return true;
14088 /* Called from a peephole2 expander to turn a sequence of stores into an
14089 STM instruction. OPERANDS are the operands found by the peephole matcher;
14090 NOPS indicates how many separate stores we are trying to combine.
14091 Returns true iff we could generate a new instruction. */
14093 bool
14094 gen_stm_seq (rtx *operands, int nops)
14096 int i;
14097 int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
14098 rtx mems[MAX_LDM_STM_OPS];
14099 int base_reg;
14100 rtx base_reg_rtx;
14101 HOST_WIDE_INT offset;
14102 int write_back = FALSE;
14103 int stm_case;
14104 rtx addr;
14105 bool base_reg_dies;
14107 stm_case = store_multiple_sequence (operands, nops, nops, regs, NULL,
14108 mem_order, &base_reg, &offset, true);
14110 if (stm_case == 0)
14111 return false;
14113 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
14115 base_reg_dies = peep2_reg_dead_p (nops, base_reg_rtx);
14116 if (TARGET_THUMB1)
14118 gcc_assert (base_reg_dies);
14119 write_back = TRUE;
14122 if (stm_case == 5)
14124 gcc_assert (base_reg_dies);
14125 emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
14126 offset = 0;
14129 addr = plus_constant (Pmode, base_reg_rtx, offset);
14131 for (i = 0; i < nops; i++)
14133 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
14134 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
14135 SImode, addr, 0);
14137 emit_insn (arm_gen_store_multiple_1 (nops, regs, mems, base_reg_rtx,
14138 write_back ? offset + i * 4 : 0));
14139 return true;
14142 /* Called from a peephole2 expander to turn a sequence of stores that are
14143 preceded by constant loads into an STM instruction. OPERANDS are the
14144 operands found by the peephole matcher; NOPS indicates how many
14145 separate stores we are trying to combine; there are 2 * NOPS
14146 instructions in the peephole.
14147 Returns true iff we could generate a new instruction. */
14149 bool
14150 gen_const_stm_seq (rtx *operands, int nops)
14152 int regs[MAX_LDM_STM_OPS], sorted_regs[MAX_LDM_STM_OPS];
14153 int reg_order[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
14154 rtx reg_rtxs[MAX_LDM_STM_OPS], orig_reg_rtxs[MAX_LDM_STM_OPS];
14155 rtx mems[MAX_LDM_STM_OPS];
14156 int base_reg;
14157 rtx base_reg_rtx;
14158 HOST_WIDE_INT offset;
14159 int write_back = FALSE;
14160 int stm_case;
14161 rtx addr;
14162 bool base_reg_dies;
14163 int i, j;
14164 HARD_REG_SET allocated;
14166 stm_case = store_multiple_sequence (operands, nops, 2 * nops, regs, reg_rtxs,
14167 mem_order, &base_reg, &offset, false);
14169 if (stm_case == 0)
14170 return false;
14172 memcpy (orig_reg_rtxs, reg_rtxs, sizeof orig_reg_rtxs);
14174 /* If the same register is used more than once, try to find a free
14175 register. */
14176 CLEAR_HARD_REG_SET (allocated);
14177 for (i = 0; i < nops; i++)
14179 for (j = i + 1; j < nops; j++)
14180 if (regs[i] == regs[j])
14182 rtx t = peep2_find_free_register (0, nops * 2,
14183 TARGET_THUMB1 ? "l" : "r",
14184 SImode, &allocated);
14185 if (t == NULL_RTX)
14186 return false;
14187 reg_rtxs[i] = t;
14188 regs[i] = REGNO (t);
14192 /* Compute an ordering that maps the register numbers to an ascending
14193 sequence. */
14194 reg_order[0] = 0;
14195 for (i = 0; i < nops; i++)
14196 if (regs[i] < regs[reg_order[0]])
14197 reg_order[0] = i;
14199 for (i = 1; i < nops; i++)
14201 int this_order = reg_order[i - 1];
14202 for (j = 0; j < nops; j++)
14203 if (regs[j] > regs[reg_order[i - 1]]
14204 && (this_order == reg_order[i - 1]
14205 || regs[j] < regs[this_order]))
14206 this_order = j;
14207 reg_order[i] = this_order;
14210 /* Ensure that registers that must be live after the instruction end
14211 up with the correct value. */
14212 for (i = 0; i < nops; i++)
14214 int this_order = reg_order[i];
14215 if ((this_order != mem_order[i]
14216 || orig_reg_rtxs[this_order] != reg_rtxs[this_order])
14217 && !peep2_reg_dead_p (nops * 2, orig_reg_rtxs[this_order]))
14218 return false;
14221 /* Load the constants. */
14222 for (i = 0; i < nops; i++)
14224 rtx op = operands[2 * nops + mem_order[i]];
14225 sorted_regs[i] = regs[reg_order[i]];
14226 emit_move_insn (reg_rtxs[reg_order[i]], op);
14229 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
14231 base_reg_dies = peep2_reg_dead_p (nops * 2, base_reg_rtx);
14232 if (TARGET_THUMB1)
14234 gcc_assert (base_reg_dies);
14235 write_back = TRUE;
14238 if (stm_case == 5)
14240 gcc_assert (base_reg_dies);
14241 emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
14242 offset = 0;
14245 addr = plus_constant (Pmode, base_reg_rtx, offset);
14247 for (i = 0; i < nops; i++)
14249 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
14250 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
14251 SImode, addr, 0);
14253 emit_insn (arm_gen_store_multiple_1 (nops, sorted_regs, mems, base_reg_rtx,
14254 write_back ? offset + i * 4 : 0));
14255 return true;
14258 /* Copy a block of memory using plain ldr/str/ldrh/strh instructions, to permit
14259 unaligned copies on processors which support unaligned semantics for those
14260 instructions. INTERLEAVE_FACTOR can be used to attempt to hide load latency
14261 (using more registers) by doing e.g. load/load/store/store for a factor of 2.
14262 An interleave factor of 1 (the minimum) will perform no interleaving.
14263 Load/store multiple are used for aligned addresses where possible. */
14265 static void
14266 arm_block_move_unaligned_straight (rtx dstbase, rtx srcbase,
14267 HOST_WIDE_INT length,
14268 unsigned int interleave_factor)
14270 rtx *regs = XALLOCAVEC (rtx, interleave_factor);
14271 int *regnos = XALLOCAVEC (int, interleave_factor);
14272 HOST_WIDE_INT block_size_bytes = interleave_factor * UNITS_PER_WORD;
14273 HOST_WIDE_INT i, j;
14274 HOST_WIDE_INT remaining = length, words;
14275 rtx halfword_tmp = NULL, byte_tmp = NULL;
14276 rtx dst, src;
14277 bool src_aligned = MEM_ALIGN (srcbase) >= BITS_PER_WORD;
14278 bool dst_aligned = MEM_ALIGN (dstbase) >= BITS_PER_WORD;
14279 HOST_WIDE_INT srcoffset, dstoffset;
14280 HOST_WIDE_INT src_autoinc, dst_autoinc;
14281 rtx mem, addr;
14283 gcc_assert (1 <= interleave_factor && interleave_factor <= 4);
14285 /* Use hard registers if we have aligned source or destination so we can use
14286 load/store multiple with contiguous registers. */
14287 if (dst_aligned || src_aligned)
14288 for (i = 0; i < interleave_factor; i++)
14289 regs[i] = gen_rtx_REG (SImode, i);
14290 else
14291 for (i = 0; i < interleave_factor; i++)
14292 regs[i] = gen_reg_rtx (SImode);
14294 dst = copy_addr_to_reg (XEXP (dstbase, 0));
14295 src = copy_addr_to_reg (XEXP (srcbase, 0));
14297 srcoffset = dstoffset = 0;
14299 /* Calls to arm_gen_load_multiple and arm_gen_store_multiple update SRC/DST.
14300 For copying the last bytes we want to subtract this offset again. */
14301 src_autoinc = dst_autoinc = 0;
14303 for (i = 0; i < interleave_factor; i++)
14304 regnos[i] = i;
14306 /* Copy BLOCK_SIZE_BYTES chunks. */
14308 for (i = 0; i + block_size_bytes <= length; i += block_size_bytes)
14310 /* Load words. */
14311 if (src_aligned && interleave_factor > 1)
14313 emit_insn (arm_gen_load_multiple (regnos, interleave_factor, src,
14314 TRUE, srcbase, &srcoffset));
14315 src_autoinc += UNITS_PER_WORD * interleave_factor;
14317 else
14319 for (j = 0; j < interleave_factor; j++)
14321 addr = plus_constant (Pmode, src, (srcoffset + j * UNITS_PER_WORD
14322 - src_autoinc));
14323 mem = adjust_automodify_address (srcbase, SImode, addr,
14324 srcoffset + j * UNITS_PER_WORD);
14325 emit_insn (gen_unaligned_loadsi (regs[j], mem));
14327 srcoffset += block_size_bytes;
14330 /* Store words. */
14331 if (dst_aligned && interleave_factor > 1)
14333 emit_insn (arm_gen_store_multiple (regnos, interleave_factor, dst,
14334 TRUE, dstbase, &dstoffset));
14335 dst_autoinc += UNITS_PER_WORD * interleave_factor;
14337 else
14339 for (j = 0; j < interleave_factor; j++)
14341 addr = plus_constant (Pmode, dst, (dstoffset + j * UNITS_PER_WORD
14342 - dst_autoinc));
14343 mem = adjust_automodify_address (dstbase, SImode, addr,
14344 dstoffset + j * UNITS_PER_WORD);
14345 emit_insn (gen_unaligned_storesi (mem, regs[j]));
14347 dstoffset += block_size_bytes;
14350 remaining -= block_size_bytes;
14353 /* Copy any whole words left (note these aren't interleaved with any
14354 subsequent halfword/byte load/stores in the interests of simplicity). */
14356 words = remaining / UNITS_PER_WORD;
14358 gcc_assert (words < interleave_factor);
14360 if (src_aligned && words > 1)
14362 emit_insn (arm_gen_load_multiple (regnos, words, src, TRUE, srcbase,
14363 &srcoffset));
14364 src_autoinc += UNITS_PER_WORD * words;
14366 else
14368 for (j = 0; j < words; j++)
14370 addr = plus_constant (Pmode, src,
14371 srcoffset + j * UNITS_PER_WORD - src_autoinc);
14372 mem = adjust_automodify_address (srcbase, SImode, addr,
14373 srcoffset + j * UNITS_PER_WORD);
14374 emit_insn (gen_unaligned_loadsi (regs[j], mem));
14376 srcoffset += words * UNITS_PER_WORD;
14379 if (dst_aligned && words > 1)
14381 emit_insn (arm_gen_store_multiple (regnos, words, dst, TRUE, dstbase,
14382 &dstoffset));
14383 dst_autoinc += words * UNITS_PER_WORD;
14385 else
14387 for (j = 0; j < words; j++)
14389 addr = plus_constant (Pmode, dst,
14390 dstoffset + j * UNITS_PER_WORD - dst_autoinc);
14391 mem = adjust_automodify_address (dstbase, SImode, addr,
14392 dstoffset + j * UNITS_PER_WORD);
14393 emit_insn (gen_unaligned_storesi (mem, regs[j]));
14395 dstoffset += words * UNITS_PER_WORD;
14398 remaining -= words * UNITS_PER_WORD;
14400 gcc_assert (remaining < 4);
14402 /* Copy a halfword if necessary. */
14404 if (remaining >= 2)
14406 halfword_tmp = gen_reg_rtx (SImode);
14408 addr = plus_constant (Pmode, src, srcoffset - src_autoinc);
14409 mem = adjust_automodify_address (srcbase, HImode, addr, srcoffset);
14410 emit_insn (gen_unaligned_loadhiu (halfword_tmp, mem));
14412 /* Either write out immediately, or delay until we've loaded the last
14413 byte, depending on interleave factor. */
14414 if (interleave_factor == 1)
14416 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14417 mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
14418 emit_insn (gen_unaligned_storehi (mem,
14419 gen_lowpart (HImode, halfword_tmp)));
14420 halfword_tmp = NULL;
14421 dstoffset += 2;
14424 remaining -= 2;
14425 srcoffset += 2;
14428 gcc_assert (remaining < 2);
14430 /* Copy last byte. */
14432 if ((remaining & 1) != 0)
14434 byte_tmp = gen_reg_rtx (SImode);
14436 addr = plus_constant (Pmode, src, srcoffset - src_autoinc);
14437 mem = adjust_automodify_address (srcbase, QImode, addr, srcoffset);
14438 emit_move_insn (gen_lowpart (QImode, byte_tmp), mem);
14440 if (interleave_factor == 1)
14442 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14443 mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
14444 emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
14445 byte_tmp = NULL;
14446 dstoffset++;
14449 remaining--;
14450 srcoffset++;
14453 /* Store last halfword if we haven't done so already. */
14455 if (halfword_tmp)
14457 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14458 mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
14459 emit_insn (gen_unaligned_storehi (mem,
14460 gen_lowpart (HImode, halfword_tmp)));
14461 dstoffset += 2;
14464 /* Likewise for last byte. */
14466 if (byte_tmp)
14468 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14469 mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
14470 emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
14471 dstoffset++;
14474 gcc_assert (remaining == 0 && srcoffset == dstoffset);
14477 /* From mips_adjust_block_mem:
14479 Helper function for doing a loop-based block operation on memory
14480 reference MEM. Each iteration of the loop will operate on LENGTH
14481 bytes of MEM.
14483 Create a new base register for use within the loop and point it to
14484 the start of MEM. Create a new memory reference that uses this
14485 register. Store them in *LOOP_REG and *LOOP_MEM respectively. */
14487 static void
14488 arm_adjust_block_mem (rtx mem, HOST_WIDE_INT length, rtx *loop_reg,
14489 rtx *loop_mem)
14491 *loop_reg = copy_addr_to_reg (XEXP (mem, 0));
14493 /* Although the new mem does not refer to a known location,
14494 it does keep up to LENGTH bytes of alignment. */
14495 *loop_mem = change_address (mem, BLKmode, *loop_reg);
14496 set_mem_align (*loop_mem, MIN (MEM_ALIGN (mem), length * BITS_PER_UNIT));
14499 /* From mips_block_move_loop:
14501 Move LENGTH bytes from SRC to DEST using a loop that moves BYTES_PER_ITER
14502 bytes at a time. LENGTH must be at least BYTES_PER_ITER. Assume that
14503 the memory regions do not overlap. */
14505 static void
14506 arm_block_move_unaligned_loop (rtx dest, rtx src, HOST_WIDE_INT length,
14507 unsigned int interleave_factor,
14508 HOST_WIDE_INT bytes_per_iter)
14510 rtx src_reg, dest_reg, final_src, test;
14511 HOST_WIDE_INT leftover;
14513 leftover = length % bytes_per_iter;
14514 length -= leftover;
14516 /* Create registers and memory references for use within the loop. */
14517 arm_adjust_block_mem (src, bytes_per_iter, &src_reg, &src);
14518 arm_adjust_block_mem (dest, bytes_per_iter, &dest_reg, &dest);
14520 /* Calculate the value that SRC_REG should have after the last iteration of
14521 the loop. */
14522 final_src = expand_simple_binop (Pmode, PLUS, src_reg, GEN_INT (length),
14523 0, 0, OPTAB_WIDEN);
14525 /* Emit the start of the loop. */
14526 rtx_code_label *label = gen_label_rtx ();
14527 emit_label (label);
14529 /* Emit the loop body. */
14530 arm_block_move_unaligned_straight (dest, src, bytes_per_iter,
14531 interleave_factor);
14533 /* Move on to the next block. */
14534 emit_move_insn (src_reg, plus_constant (Pmode, src_reg, bytes_per_iter));
14535 emit_move_insn (dest_reg, plus_constant (Pmode, dest_reg, bytes_per_iter));
14537 /* Emit the loop condition. */
14538 test = gen_rtx_NE (VOIDmode, src_reg, final_src);
14539 emit_jump_insn (gen_cbranchsi4 (test, src_reg, final_src, label));
14541 /* Mop up any left-over bytes. */
14542 if (leftover)
14543 arm_block_move_unaligned_straight (dest, src, leftover, interleave_factor);
14546 /* Emit a block move when either the source or destination is unaligned (not
14547 aligned to a four-byte boundary). This may need further tuning depending on
14548 core type, optimize_size setting, etc. */
14550 static int
14551 arm_movmemqi_unaligned (rtx *operands)
14553 HOST_WIDE_INT length = INTVAL (operands[2]);
14555 if (optimize_size)
14557 bool src_aligned = MEM_ALIGN (operands[1]) >= BITS_PER_WORD;
14558 bool dst_aligned = MEM_ALIGN (operands[0]) >= BITS_PER_WORD;
14559 /* Inlined memcpy using ldr/str/ldrh/strh can be quite big: try to limit
14560 size of code if optimizing for size. We'll use ldm/stm if src_aligned
14561 or dst_aligned though: allow more interleaving in those cases since the
14562 resulting code can be smaller. */
14563 unsigned int interleave_factor = (src_aligned || dst_aligned) ? 2 : 1;
14564 HOST_WIDE_INT bytes_per_iter = (src_aligned || dst_aligned) ? 8 : 4;
14566 if (length > 12)
14567 arm_block_move_unaligned_loop (operands[0], operands[1], length,
14568 interleave_factor, bytes_per_iter);
14569 else
14570 arm_block_move_unaligned_straight (operands[0], operands[1], length,
14571 interleave_factor);
14573 else
14575 /* Note that the loop created by arm_block_move_unaligned_loop may be
14576 subject to loop unrolling, which makes tuning this condition a little
14577 redundant. */
14578 if (length > 32)
14579 arm_block_move_unaligned_loop (operands[0], operands[1], length, 4, 16);
14580 else
14581 arm_block_move_unaligned_straight (operands[0], operands[1], length, 4);
14584 return 1;
14588 arm_gen_movmemqi (rtx *operands)
14590 HOST_WIDE_INT in_words_to_go, out_words_to_go, last_bytes;
14591 HOST_WIDE_INT srcoffset, dstoffset;
14592 int i;
14593 rtx src, dst, srcbase, dstbase;
14594 rtx part_bytes_reg = NULL;
14595 rtx mem;
14597 if (!CONST_INT_P (operands[2])
14598 || !CONST_INT_P (operands[3])
14599 || INTVAL (operands[2]) > 64)
14600 return 0;
14602 if (unaligned_access && (INTVAL (operands[3]) & 3) != 0)
14603 return arm_movmemqi_unaligned (operands);
14605 if (INTVAL (operands[3]) & 3)
14606 return 0;
14608 dstbase = operands[0];
14609 srcbase = operands[1];
14611 dst = copy_to_mode_reg (SImode, XEXP (dstbase, 0));
14612 src = copy_to_mode_reg (SImode, XEXP (srcbase, 0));
14614 in_words_to_go = ARM_NUM_INTS (INTVAL (operands[2]));
14615 out_words_to_go = INTVAL (operands[2]) / 4;
14616 last_bytes = INTVAL (operands[2]) & 3;
14617 dstoffset = srcoffset = 0;
14619 if (out_words_to_go != in_words_to_go && ((in_words_to_go - 1) & 3) != 0)
14620 part_bytes_reg = gen_rtx_REG (SImode, (in_words_to_go - 1) & 3);
14622 for (i = 0; in_words_to_go >= 2; i+=4)
14624 if (in_words_to_go > 4)
14625 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, 4, src,
14626 TRUE, srcbase, &srcoffset));
14627 else
14628 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, in_words_to_go,
14629 src, FALSE, srcbase,
14630 &srcoffset));
14632 if (out_words_to_go)
14634 if (out_words_to_go > 4)
14635 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence, 4, dst,
14636 TRUE, dstbase, &dstoffset));
14637 else if (out_words_to_go != 1)
14638 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence,
14639 out_words_to_go, dst,
14640 (last_bytes == 0
14641 ? FALSE : TRUE),
14642 dstbase, &dstoffset));
14643 else
14645 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
14646 emit_move_insn (mem, gen_rtx_REG (SImode, R0_REGNUM));
14647 if (last_bytes != 0)
14649 emit_insn (gen_addsi3 (dst, dst, GEN_INT (4)));
14650 dstoffset += 4;
14655 in_words_to_go -= in_words_to_go < 4 ? in_words_to_go : 4;
14656 out_words_to_go -= out_words_to_go < 4 ? out_words_to_go : 4;
14659 /* OUT_WORDS_TO_GO will be zero here if there are byte stores to do. */
14660 if (out_words_to_go)
14662 rtx sreg;
14664 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
14665 sreg = copy_to_reg (mem);
14667 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
14668 emit_move_insn (mem, sreg);
14669 in_words_to_go--;
14671 gcc_assert (!in_words_to_go); /* Sanity check */
14674 if (in_words_to_go)
14676 gcc_assert (in_words_to_go > 0);
14678 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
14679 part_bytes_reg = copy_to_mode_reg (SImode, mem);
14682 gcc_assert (!last_bytes || part_bytes_reg);
14684 if (BYTES_BIG_ENDIAN && last_bytes)
14686 rtx tmp = gen_reg_rtx (SImode);
14688 /* The bytes we want are in the top end of the word. */
14689 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg,
14690 GEN_INT (8 * (4 - last_bytes))));
14691 part_bytes_reg = tmp;
14693 while (last_bytes)
14695 mem = adjust_automodify_address (dstbase, QImode,
14696 plus_constant (Pmode, dst,
14697 last_bytes - 1),
14698 dstoffset + last_bytes - 1);
14699 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
14701 if (--last_bytes)
14703 tmp = gen_reg_rtx (SImode);
14704 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (8)));
14705 part_bytes_reg = tmp;
14710 else
14712 if (last_bytes > 1)
14714 mem = adjust_automodify_address (dstbase, HImode, dst, dstoffset);
14715 emit_move_insn (mem, gen_lowpart (HImode, part_bytes_reg));
14716 last_bytes -= 2;
14717 if (last_bytes)
14719 rtx tmp = gen_reg_rtx (SImode);
14720 emit_insn (gen_addsi3 (dst, dst, const2_rtx));
14721 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (16)));
14722 part_bytes_reg = tmp;
14723 dstoffset += 2;
14727 if (last_bytes)
14729 mem = adjust_automodify_address (dstbase, QImode, dst, dstoffset);
14730 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
14734 return 1;
14737 /* Helper for gen_movmem_ldrd_strd. Increase the address of memory rtx
14738 by mode size. */
14739 inline static rtx
14740 next_consecutive_mem (rtx mem)
14742 machine_mode mode = GET_MODE (mem);
14743 HOST_WIDE_INT offset = GET_MODE_SIZE (mode);
14744 rtx addr = plus_constant (Pmode, XEXP (mem, 0), offset);
14746 return adjust_automodify_address (mem, mode, addr, offset);
14749 /* Copy using LDRD/STRD instructions whenever possible.
14750 Returns true upon success. */
14751 bool
14752 gen_movmem_ldrd_strd (rtx *operands)
14754 unsigned HOST_WIDE_INT len;
14755 HOST_WIDE_INT align;
14756 rtx src, dst, base;
14757 rtx reg0;
14758 bool src_aligned, dst_aligned;
14759 bool src_volatile, dst_volatile;
14761 gcc_assert (CONST_INT_P (operands[2]));
14762 gcc_assert (CONST_INT_P (operands[3]));
14764 len = UINTVAL (operands[2]);
14765 if (len > 64)
14766 return false;
14768 /* Maximum alignment we can assume for both src and dst buffers. */
14769 align = INTVAL (operands[3]);
14771 if ((!unaligned_access) && (len >= 4) && ((align & 3) != 0))
14772 return false;
14774 /* Place src and dst addresses in registers
14775 and update the corresponding mem rtx. */
14776 dst = operands[0];
14777 dst_volatile = MEM_VOLATILE_P (dst);
14778 dst_aligned = MEM_ALIGN (dst) >= BITS_PER_WORD;
14779 base = copy_to_mode_reg (SImode, XEXP (dst, 0));
14780 dst = adjust_automodify_address (dst, VOIDmode, base, 0);
14782 src = operands[1];
14783 src_volatile = MEM_VOLATILE_P (src);
14784 src_aligned = MEM_ALIGN (src) >= BITS_PER_WORD;
14785 base = copy_to_mode_reg (SImode, XEXP (src, 0));
14786 src = adjust_automodify_address (src, VOIDmode, base, 0);
14788 if (!unaligned_access && !(src_aligned && dst_aligned))
14789 return false;
14791 if (src_volatile || dst_volatile)
14792 return false;
14794 /* If we cannot generate any LDRD/STRD, try to generate LDM/STM. */
14795 if (!(dst_aligned || src_aligned))
14796 return arm_gen_movmemqi (operands);
14798 src = adjust_address (src, DImode, 0);
14799 dst = adjust_address (dst, DImode, 0);
14800 while (len >= 8)
14802 len -= 8;
14803 reg0 = gen_reg_rtx (DImode);
14804 if (src_aligned)
14805 emit_move_insn (reg0, src);
14806 else
14807 emit_insn (gen_unaligned_loaddi (reg0, src));
14809 if (dst_aligned)
14810 emit_move_insn (dst, reg0);
14811 else
14812 emit_insn (gen_unaligned_storedi (dst, reg0));
14814 src = next_consecutive_mem (src);
14815 dst = next_consecutive_mem (dst);
14818 gcc_assert (len < 8);
14819 if (len >= 4)
14821 /* More than a word but less than a double-word to copy. Copy a word. */
14822 reg0 = gen_reg_rtx (SImode);
14823 src = adjust_address (src, SImode, 0);
14824 dst = adjust_address (dst, SImode, 0);
14825 if (src_aligned)
14826 emit_move_insn (reg0, src);
14827 else
14828 emit_insn (gen_unaligned_loadsi (reg0, src));
14830 if (dst_aligned)
14831 emit_move_insn (dst, reg0);
14832 else
14833 emit_insn (gen_unaligned_storesi (dst, reg0));
14835 src = next_consecutive_mem (src);
14836 dst = next_consecutive_mem (dst);
14837 len -= 4;
14840 if (len == 0)
14841 return true;
14843 /* Copy the remaining bytes. */
14844 if (len >= 2)
14846 dst = adjust_address (dst, HImode, 0);
14847 src = adjust_address (src, HImode, 0);
14848 reg0 = gen_reg_rtx (SImode);
14849 if (src_aligned)
14850 emit_insn (gen_zero_extendhisi2 (reg0, src));
14851 else
14852 emit_insn (gen_unaligned_loadhiu (reg0, src));
14854 if (dst_aligned)
14855 emit_insn (gen_movhi (dst, gen_lowpart(HImode, reg0)));
14856 else
14857 emit_insn (gen_unaligned_storehi (dst, gen_lowpart (HImode, reg0)));
14859 src = next_consecutive_mem (src);
14860 dst = next_consecutive_mem (dst);
14861 if (len == 2)
14862 return true;
14865 dst = adjust_address (dst, QImode, 0);
14866 src = adjust_address (src, QImode, 0);
14867 reg0 = gen_reg_rtx (QImode);
14868 emit_move_insn (reg0, src);
14869 emit_move_insn (dst, reg0);
14870 return true;
14873 /* Select a dominance comparison mode if possible for a test of the general
14874 form (OP (COND_OR (X) (Y)) (const_int 0)). We support three forms.
14875 COND_OR == DOM_CC_X_AND_Y => (X && Y)
14876 COND_OR == DOM_CC_NX_OR_Y => ((! X) || Y)
14877 COND_OR == DOM_CC_X_OR_Y => (X || Y)
14878 In all cases OP will be either EQ or NE, but we don't need to know which
14879 here. If we are unable to support a dominance comparison we return
14880 CC mode. This will then fail to match for the RTL expressions that
14881 generate this call. */
14882 machine_mode
14883 arm_select_dominance_cc_mode (rtx x, rtx y, HOST_WIDE_INT cond_or)
14885 enum rtx_code cond1, cond2;
14886 int swapped = 0;
14888 /* Currently we will probably get the wrong result if the individual
14889 comparisons are not simple. This also ensures that it is safe to
14890 reverse a comparison if necessary. */
14891 if ((arm_select_cc_mode (cond1 = GET_CODE (x), XEXP (x, 0), XEXP (x, 1))
14892 != CCmode)
14893 || (arm_select_cc_mode (cond2 = GET_CODE (y), XEXP (y, 0), XEXP (y, 1))
14894 != CCmode))
14895 return CCmode;
14897 /* The if_then_else variant of this tests the second condition if the
14898 first passes, but is true if the first fails. Reverse the first
14899 condition to get a true "inclusive-or" expression. */
14900 if (cond_or == DOM_CC_NX_OR_Y)
14901 cond1 = reverse_condition (cond1);
14903 /* If the comparisons are not equal, and one doesn't dominate the other,
14904 then we can't do this. */
14905 if (cond1 != cond2
14906 && !comparison_dominates_p (cond1, cond2)
14907 && (swapped = 1, !comparison_dominates_p (cond2, cond1)))
14908 return CCmode;
14910 if (swapped)
14911 std::swap (cond1, cond2);
14913 switch (cond1)
14915 case EQ:
14916 if (cond_or == DOM_CC_X_AND_Y)
14917 return CC_DEQmode;
14919 switch (cond2)
14921 case EQ: return CC_DEQmode;
14922 case LE: return CC_DLEmode;
14923 case LEU: return CC_DLEUmode;
14924 case GE: return CC_DGEmode;
14925 case GEU: return CC_DGEUmode;
14926 default: gcc_unreachable ();
14929 case LT:
14930 if (cond_or == DOM_CC_X_AND_Y)
14931 return CC_DLTmode;
14933 switch (cond2)
14935 case LT:
14936 return CC_DLTmode;
14937 case LE:
14938 return CC_DLEmode;
14939 case NE:
14940 return CC_DNEmode;
14941 default:
14942 gcc_unreachable ();
14945 case GT:
14946 if (cond_or == DOM_CC_X_AND_Y)
14947 return CC_DGTmode;
14949 switch (cond2)
14951 case GT:
14952 return CC_DGTmode;
14953 case GE:
14954 return CC_DGEmode;
14955 case NE:
14956 return CC_DNEmode;
14957 default:
14958 gcc_unreachable ();
14961 case LTU:
14962 if (cond_or == DOM_CC_X_AND_Y)
14963 return CC_DLTUmode;
14965 switch (cond2)
14967 case LTU:
14968 return CC_DLTUmode;
14969 case LEU:
14970 return CC_DLEUmode;
14971 case NE:
14972 return CC_DNEmode;
14973 default:
14974 gcc_unreachable ();
14977 case GTU:
14978 if (cond_or == DOM_CC_X_AND_Y)
14979 return CC_DGTUmode;
14981 switch (cond2)
14983 case GTU:
14984 return CC_DGTUmode;
14985 case GEU:
14986 return CC_DGEUmode;
14987 case NE:
14988 return CC_DNEmode;
14989 default:
14990 gcc_unreachable ();
14993 /* The remaining cases only occur when both comparisons are the
14994 same. */
14995 case NE:
14996 gcc_assert (cond1 == cond2);
14997 return CC_DNEmode;
14999 case LE:
15000 gcc_assert (cond1 == cond2);
15001 return CC_DLEmode;
15003 case GE:
15004 gcc_assert (cond1 == cond2);
15005 return CC_DGEmode;
15007 case LEU:
15008 gcc_assert (cond1 == cond2);
15009 return CC_DLEUmode;
15011 case GEU:
15012 gcc_assert (cond1 == cond2);
15013 return CC_DGEUmode;
15015 default:
15016 gcc_unreachable ();
15020 machine_mode
15021 arm_select_cc_mode (enum rtx_code op, rtx x, rtx y)
15023 /* All floating point compares return CCFP if it is an equality
15024 comparison, and CCFPE otherwise. */
15025 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
15027 switch (op)
15029 case EQ:
15030 case NE:
15031 case UNORDERED:
15032 case ORDERED:
15033 case UNLT:
15034 case UNLE:
15035 case UNGT:
15036 case UNGE:
15037 case UNEQ:
15038 case LTGT:
15039 return CCFPmode;
15041 case LT:
15042 case LE:
15043 case GT:
15044 case GE:
15045 return CCFPEmode;
15047 default:
15048 gcc_unreachable ();
15052 /* A compare with a shifted operand. Because of canonicalization, the
15053 comparison will have to be swapped when we emit the assembler. */
15054 if (GET_MODE (y) == SImode
15055 && (REG_P (y) || (GET_CODE (y) == SUBREG))
15056 && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
15057 || GET_CODE (x) == LSHIFTRT || GET_CODE (x) == ROTATE
15058 || GET_CODE (x) == ROTATERT))
15059 return CC_SWPmode;
15061 /* This operation is performed swapped, but since we only rely on the Z
15062 flag we don't need an additional mode. */
15063 if (GET_MODE (y) == SImode
15064 && (REG_P (y) || (GET_CODE (y) == SUBREG))
15065 && GET_CODE (x) == NEG
15066 && (op == EQ || op == NE))
15067 return CC_Zmode;
15069 /* This is a special case that is used by combine to allow a
15070 comparison of a shifted byte load to be split into a zero-extend
15071 followed by a comparison of the shifted integer (only valid for
15072 equalities and unsigned inequalities). */
15073 if (GET_MODE (x) == SImode
15074 && GET_CODE (x) == ASHIFT
15075 && CONST_INT_P (XEXP (x, 1)) && INTVAL (XEXP (x, 1)) == 24
15076 && GET_CODE (XEXP (x, 0)) == SUBREG
15077 && MEM_P (SUBREG_REG (XEXP (x, 0)))
15078 && GET_MODE (SUBREG_REG (XEXP (x, 0))) == QImode
15079 && (op == EQ || op == NE
15080 || op == GEU || op == GTU || op == LTU || op == LEU)
15081 && CONST_INT_P (y))
15082 return CC_Zmode;
15084 /* A construct for a conditional compare, if the false arm contains
15085 0, then both conditions must be true, otherwise either condition
15086 must be true. Not all conditions are possible, so CCmode is
15087 returned if it can't be done. */
15088 if (GET_CODE (x) == IF_THEN_ELSE
15089 && (XEXP (x, 2) == const0_rtx
15090 || XEXP (x, 2) == const1_rtx)
15091 && COMPARISON_P (XEXP (x, 0))
15092 && COMPARISON_P (XEXP (x, 1)))
15093 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
15094 INTVAL (XEXP (x, 2)));
15096 /* Alternate canonicalizations of the above. These are somewhat cleaner. */
15097 if (GET_CODE (x) == AND
15098 && (op == EQ || op == NE)
15099 && COMPARISON_P (XEXP (x, 0))
15100 && COMPARISON_P (XEXP (x, 1)))
15101 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
15102 DOM_CC_X_AND_Y);
15104 if (GET_CODE (x) == IOR
15105 && (op == EQ || op == NE)
15106 && COMPARISON_P (XEXP (x, 0))
15107 && COMPARISON_P (XEXP (x, 1)))
15108 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
15109 DOM_CC_X_OR_Y);
15111 /* An operation (on Thumb) where we want to test for a single bit.
15112 This is done by shifting that bit up into the top bit of a
15113 scratch register; we can then branch on the sign bit. */
15114 if (TARGET_THUMB1
15115 && GET_MODE (x) == SImode
15116 && (op == EQ || op == NE)
15117 && GET_CODE (x) == ZERO_EXTRACT
15118 && XEXP (x, 1) == const1_rtx)
15119 return CC_Nmode;
15121 /* An operation that sets the condition codes as a side-effect, the
15122 V flag is not set correctly, so we can only use comparisons where
15123 this doesn't matter. (For LT and GE we can use "mi" and "pl"
15124 instead.) */
15125 /* ??? Does the ZERO_EXTRACT case really apply to thumb2? */
15126 if (GET_MODE (x) == SImode
15127 && y == const0_rtx
15128 && (op == EQ || op == NE || op == LT || op == GE)
15129 && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS
15130 || GET_CODE (x) == AND || GET_CODE (x) == IOR
15131 || GET_CODE (x) == XOR || GET_CODE (x) == MULT
15132 || GET_CODE (x) == NOT || GET_CODE (x) == NEG
15133 || GET_CODE (x) == LSHIFTRT
15134 || GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
15135 || GET_CODE (x) == ROTATERT
15136 || (TARGET_32BIT && GET_CODE (x) == ZERO_EXTRACT)))
15137 return CC_NOOVmode;
15139 if (GET_MODE (x) == QImode && (op == EQ || op == NE))
15140 return CC_Zmode;
15142 if (GET_MODE (x) == SImode && (op == LTU || op == GEU)
15143 && GET_CODE (x) == PLUS
15144 && (rtx_equal_p (XEXP (x, 0), y) || rtx_equal_p (XEXP (x, 1), y)))
15145 return CC_Cmode;
15147 if (GET_MODE (x) == DImode || GET_MODE (y) == DImode)
15149 switch (op)
15151 case EQ:
15152 case NE:
15153 /* A DImode comparison against zero can be implemented by
15154 or'ing the two halves together. */
15155 if (y == const0_rtx)
15156 return CC_Zmode;
15158 /* We can do an equality test in three Thumb instructions. */
15159 if (!TARGET_32BIT)
15160 return CC_Zmode;
15162 /* FALLTHROUGH */
15164 case LTU:
15165 case LEU:
15166 case GTU:
15167 case GEU:
15168 /* DImode unsigned comparisons can be implemented by cmp +
15169 cmpeq without a scratch register. Not worth doing in
15170 Thumb-2. */
15171 if (TARGET_32BIT)
15172 return CC_CZmode;
15174 /* FALLTHROUGH */
15176 case LT:
15177 case LE:
15178 case GT:
15179 case GE:
15180 /* DImode signed and unsigned comparisons can be implemented
15181 by cmp + sbcs with a scratch register, but that does not
15182 set the Z flag - we must reverse GT/LE/GTU/LEU. */
15183 gcc_assert (op != EQ && op != NE);
15184 return CC_NCVmode;
15186 default:
15187 gcc_unreachable ();
15191 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_CC)
15192 return GET_MODE (x);
15194 return CCmode;
15197 /* X and Y are two things to compare using CODE. Emit the compare insn and
15198 return the rtx for register 0 in the proper mode. FP means this is a
15199 floating point compare: I don't think that it is needed on the arm. */
15201 arm_gen_compare_reg (enum rtx_code code, rtx x, rtx y, rtx scratch)
15203 machine_mode mode;
15204 rtx cc_reg;
15205 int dimode_comparison = GET_MODE (x) == DImode || GET_MODE (y) == DImode;
15207 /* We might have X as a constant, Y as a register because of the predicates
15208 used for cmpdi. If so, force X to a register here. */
15209 if (dimode_comparison && !REG_P (x))
15210 x = force_reg (DImode, x);
15212 mode = SELECT_CC_MODE (code, x, y);
15213 cc_reg = gen_rtx_REG (mode, CC_REGNUM);
15215 if (dimode_comparison
15216 && mode != CC_CZmode)
15218 rtx clobber, set;
15220 /* To compare two non-zero values for equality, XOR them and
15221 then compare against zero. Not used for ARM mode; there
15222 CC_CZmode is cheaper. */
15223 if (mode == CC_Zmode && y != const0_rtx)
15225 gcc_assert (!reload_completed);
15226 x = expand_binop (DImode, xor_optab, x, y, NULL_RTX, 0, OPTAB_WIDEN);
15227 y = const0_rtx;
15230 /* A scratch register is required. */
15231 if (reload_completed)
15232 gcc_assert (scratch != NULL && GET_MODE (scratch) == SImode);
15233 else
15234 scratch = gen_rtx_SCRATCH (SImode);
15236 clobber = gen_rtx_CLOBBER (VOIDmode, scratch);
15237 set = gen_rtx_SET (cc_reg, gen_rtx_COMPARE (mode, x, y));
15238 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber)));
15240 else
15241 emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
15243 return cc_reg;
15246 /* Generate a sequence of insns that will generate the correct return
15247 address mask depending on the physical architecture that the program
15248 is running on. */
15250 arm_gen_return_addr_mask (void)
15252 rtx reg = gen_reg_rtx (Pmode);
15254 emit_insn (gen_return_addr_mask (reg));
15255 return reg;
15258 void
15259 arm_reload_in_hi (rtx *operands)
15261 rtx ref = operands[1];
15262 rtx base, scratch;
15263 HOST_WIDE_INT offset = 0;
15265 if (GET_CODE (ref) == SUBREG)
15267 offset = SUBREG_BYTE (ref);
15268 ref = SUBREG_REG (ref);
15271 if (REG_P (ref))
15273 /* We have a pseudo which has been spilt onto the stack; there
15274 are two cases here: the first where there is a simple
15275 stack-slot replacement and a second where the stack-slot is
15276 out of range, or is used as a subreg. */
15277 if (reg_equiv_mem (REGNO (ref)))
15279 ref = reg_equiv_mem (REGNO (ref));
15280 base = find_replacement (&XEXP (ref, 0));
15282 else
15283 /* The slot is out of range, or was dressed up in a SUBREG. */
15284 base = reg_equiv_address (REGNO (ref));
15286 else
15287 base = find_replacement (&XEXP (ref, 0));
15289 /* Handle the case where the address is too complex to be offset by 1. */
15290 if (GET_CODE (base) == MINUS
15291 || (GET_CODE (base) == PLUS && !CONST_INT_P (XEXP (base, 1))))
15293 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15295 emit_set_insn (base_plus, base);
15296 base = base_plus;
15298 else if (GET_CODE (base) == PLUS)
15300 /* The addend must be CONST_INT, or we would have dealt with it above. */
15301 HOST_WIDE_INT hi, lo;
15303 offset += INTVAL (XEXP (base, 1));
15304 base = XEXP (base, 0);
15306 /* Rework the address into a legal sequence of insns. */
15307 /* Valid range for lo is -4095 -> 4095 */
15308 lo = (offset >= 0
15309 ? (offset & 0xfff)
15310 : -((-offset) & 0xfff));
15312 /* Corner case, if lo is the max offset then we would be out of range
15313 once we have added the additional 1 below, so bump the msb into the
15314 pre-loading insn(s). */
15315 if (lo == 4095)
15316 lo &= 0x7ff;
15318 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
15319 ^ (HOST_WIDE_INT) 0x80000000)
15320 - (HOST_WIDE_INT) 0x80000000);
15322 gcc_assert (hi + lo == offset);
15324 if (hi != 0)
15326 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15328 /* Get the base address; addsi3 knows how to handle constants
15329 that require more than one insn. */
15330 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
15331 base = base_plus;
15332 offset = lo;
15336 /* Operands[2] may overlap operands[0] (though it won't overlap
15337 operands[1]), that's why we asked for a DImode reg -- so we can
15338 use the bit that does not overlap. */
15339 if (REGNO (operands[2]) == REGNO (operands[0]))
15340 scratch = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15341 else
15342 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
15344 emit_insn (gen_zero_extendqisi2 (scratch,
15345 gen_rtx_MEM (QImode,
15346 plus_constant (Pmode, base,
15347 offset))));
15348 emit_insn (gen_zero_extendqisi2 (gen_rtx_SUBREG (SImode, operands[0], 0),
15349 gen_rtx_MEM (QImode,
15350 plus_constant (Pmode, base,
15351 offset + 1))));
15352 if (!BYTES_BIG_ENDIAN)
15353 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
15354 gen_rtx_IOR (SImode,
15355 gen_rtx_ASHIFT
15356 (SImode,
15357 gen_rtx_SUBREG (SImode, operands[0], 0),
15358 GEN_INT (8)),
15359 scratch));
15360 else
15361 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
15362 gen_rtx_IOR (SImode,
15363 gen_rtx_ASHIFT (SImode, scratch,
15364 GEN_INT (8)),
15365 gen_rtx_SUBREG (SImode, operands[0], 0)));
15368 /* Handle storing a half-word to memory during reload by synthesizing as two
15369 byte stores. Take care not to clobber the input values until after we
15370 have moved them somewhere safe. This code assumes that if the DImode
15371 scratch in operands[2] overlaps either the input value or output address
15372 in some way, then that value must die in this insn (we absolutely need
15373 two scratch registers for some corner cases). */
15374 void
15375 arm_reload_out_hi (rtx *operands)
15377 rtx ref = operands[0];
15378 rtx outval = operands[1];
15379 rtx base, scratch;
15380 HOST_WIDE_INT offset = 0;
15382 if (GET_CODE (ref) == SUBREG)
15384 offset = SUBREG_BYTE (ref);
15385 ref = SUBREG_REG (ref);
15388 if (REG_P (ref))
15390 /* We have a pseudo which has been spilt onto the stack; there
15391 are two cases here: the first where there is a simple
15392 stack-slot replacement and a second where the stack-slot is
15393 out of range, or is used as a subreg. */
15394 if (reg_equiv_mem (REGNO (ref)))
15396 ref = reg_equiv_mem (REGNO (ref));
15397 base = find_replacement (&XEXP (ref, 0));
15399 else
15400 /* The slot is out of range, or was dressed up in a SUBREG. */
15401 base = reg_equiv_address (REGNO (ref));
15403 else
15404 base = find_replacement (&XEXP (ref, 0));
15406 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
15408 /* Handle the case where the address is too complex to be offset by 1. */
15409 if (GET_CODE (base) == MINUS
15410 || (GET_CODE (base) == PLUS && !CONST_INT_P (XEXP (base, 1))))
15412 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15414 /* Be careful not to destroy OUTVAL. */
15415 if (reg_overlap_mentioned_p (base_plus, outval))
15417 /* Updating base_plus might destroy outval, see if we can
15418 swap the scratch and base_plus. */
15419 if (!reg_overlap_mentioned_p (scratch, outval))
15420 std::swap (scratch, base_plus);
15421 else
15423 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
15425 /* Be conservative and copy OUTVAL into the scratch now,
15426 this should only be necessary if outval is a subreg
15427 of something larger than a word. */
15428 /* XXX Might this clobber base? I can't see how it can,
15429 since scratch is known to overlap with OUTVAL, and
15430 must be wider than a word. */
15431 emit_insn (gen_movhi (scratch_hi, outval));
15432 outval = scratch_hi;
15436 emit_set_insn (base_plus, base);
15437 base = base_plus;
15439 else if (GET_CODE (base) == PLUS)
15441 /* The addend must be CONST_INT, or we would have dealt with it above. */
15442 HOST_WIDE_INT hi, lo;
15444 offset += INTVAL (XEXP (base, 1));
15445 base = XEXP (base, 0);
15447 /* Rework the address into a legal sequence of insns. */
15448 /* Valid range for lo is -4095 -> 4095 */
15449 lo = (offset >= 0
15450 ? (offset & 0xfff)
15451 : -((-offset) & 0xfff));
15453 /* Corner case, if lo is the max offset then we would be out of range
15454 once we have added the additional 1 below, so bump the msb into the
15455 pre-loading insn(s). */
15456 if (lo == 4095)
15457 lo &= 0x7ff;
15459 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
15460 ^ (HOST_WIDE_INT) 0x80000000)
15461 - (HOST_WIDE_INT) 0x80000000);
15463 gcc_assert (hi + lo == offset);
15465 if (hi != 0)
15467 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15469 /* Be careful not to destroy OUTVAL. */
15470 if (reg_overlap_mentioned_p (base_plus, outval))
15472 /* Updating base_plus might destroy outval, see if we
15473 can swap the scratch and base_plus. */
15474 if (!reg_overlap_mentioned_p (scratch, outval))
15475 std::swap (scratch, base_plus);
15476 else
15478 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
15480 /* Be conservative and copy outval into scratch now,
15481 this should only be necessary if outval is a
15482 subreg of something larger than a word. */
15483 /* XXX Might this clobber base? I can't see how it
15484 can, since scratch is known to overlap with
15485 outval. */
15486 emit_insn (gen_movhi (scratch_hi, outval));
15487 outval = scratch_hi;
15491 /* Get the base address; addsi3 knows how to handle constants
15492 that require more than one insn. */
15493 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
15494 base = base_plus;
15495 offset = lo;
15499 if (BYTES_BIG_ENDIAN)
15501 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
15502 plus_constant (Pmode, base,
15503 offset + 1)),
15504 gen_lowpart (QImode, outval)));
15505 emit_insn (gen_lshrsi3 (scratch,
15506 gen_rtx_SUBREG (SImode, outval, 0),
15507 GEN_INT (8)));
15508 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, base,
15509 offset)),
15510 gen_lowpart (QImode, scratch)));
15512 else
15514 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, base,
15515 offset)),
15516 gen_lowpart (QImode, outval)));
15517 emit_insn (gen_lshrsi3 (scratch,
15518 gen_rtx_SUBREG (SImode, outval, 0),
15519 GEN_INT (8)));
15520 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
15521 plus_constant (Pmode, base,
15522 offset + 1)),
15523 gen_lowpart (QImode, scratch)));
15527 /* Return true if a type must be passed in memory. For AAPCS, small aggregates
15528 (padded to the size of a word) should be passed in a register. */
15530 static bool
15531 arm_must_pass_in_stack (machine_mode mode, const_tree type)
15533 if (TARGET_AAPCS_BASED)
15534 return must_pass_in_stack_var_size (mode, type);
15535 else
15536 return must_pass_in_stack_var_size_or_pad (mode, type);
15540 /* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
15541 Return true if an argument passed on the stack should be padded upwards,
15542 i.e. if the least-significant byte has useful data.
15543 For legacy APCS ABIs we use the default. For AAPCS based ABIs small
15544 aggregate types are placed in the lowest memory address. */
15546 bool
15547 arm_pad_arg_upward (machine_mode mode ATTRIBUTE_UNUSED, const_tree type)
15549 if (!TARGET_AAPCS_BASED)
15550 return DEFAULT_FUNCTION_ARG_PADDING(mode, type) == upward;
15552 if (type && BYTES_BIG_ENDIAN && INTEGRAL_TYPE_P (type))
15553 return false;
15555 return true;
15559 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
15560 Return !BYTES_BIG_ENDIAN if the least significant byte of the
15561 register has useful data, and return the opposite if the most
15562 significant byte does. */
15564 bool
15565 arm_pad_reg_upward (machine_mode mode,
15566 tree type, int first ATTRIBUTE_UNUSED)
15568 if (TARGET_AAPCS_BASED && BYTES_BIG_ENDIAN)
15570 /* For AAPCS, small aggregates, small fixed-point types,
15571 and small complex types are always padded upwards. */
15572 if (type)
15574 if ((AGGREGATE_TYPE_P (type)
15575 || TREE_CODE (type) == COMPLEX_TYPE
15576 || FIXED_POINT_TYPE_P (type))
15577 && int_size_in_bytes (type) <= 4)
15578 return true;
15580 else
15582 if ((COMPLEX_MODE_P (mode) || ALL_FIXED_POINT_MODE_P (mode))
15583 && GET_MODE_SIZE (mode) <= 4)
15584 return true;
15588 /* Otherwise, use default padding. */
15589 return !BYTES_BIG_ENDIAN;
15592 /* Returns true iff OFFSET is valid for use in an LDRD/STRD instruction,
15593 assuming that the address in the base register is word aligned. */
15594 bool
15595 offset_ok_for_ldrd_strd (HOST_WIDE_INT offset)
15597 HOST_WIDE_INT max_offset;
15599 /* Offset must be a multiple of 4 in Thumb mode. */
15600 if (TARGET_THUMB2 && ((offset & 3) != 0))
15601 return false;
15603 if (TARGET_THUMB2)
15604 max_offset = 1020;
15605 else if (TARGET_ARM)
15606 max_offset = 255;
15607 else
15608 return false;
15610 return ((offset <= max_offset) && (offset >= -max_offset));
15613 /* Checks whether the operands are valid for use in an LDRD/STRD instruction.
15614 Assumes that RT, RT2, and RN are REG. This is guaranteed by the patterns.
15615 Assumes that the address in the base register RN is word aligned. Pattern
15616 guarantees that both memory accesses use the same base register,
15617 the offsets are constants within the range, and the gap between the offsets is 4.
15618 If preload complete then check that registers are legal. WBACK indicates whether
15619 address is updated. LOAD indicates whether memory access is load or store. */
15620 bool
15621 operands_ok_ldrd_strd (rtx rt, rtx rt2, rtx rn, HOST_WIDE_INT offset,
15622 bool wback, bool load)
15624 unsigned int t, t2, n;
15626 if (!reload_completed)
15627 return true;
15629 if (!offset_ok_for_ldrd_strd (offset))
15630 return false;
15632 t = REGNO (rt);
15633 t2 = REGNO (rt2);
15634 n = REGNO (rn);
15636 if ((TARGET_THUMB2)
15637 && ((wback && (n == t || n == t2))
15638 || (t == SP_REGNUM)
15639 || (t == PC_REGNUM)
15640 || (t2 == SP_REGNUM)
15641 || (t2 == PC_REGNUM)
15642 || (!load && (n == PC_REGNUM))
15643 || (load && (t == t2))
15644 /* Triggers Cortex-M3 LDRD errata. */
15645 || (!wback && load && fix_cm3_ldrd && (n == t))))
15646 return false;
15648 if ((TARGET_ARM)
15649 && ((wback && (n == t || n == t2))
15650 || (t2 == PC_REGNUM)
15651 || (t % 2 != 0) /* First destination register is not even. */
15652 || (t2 != t + 1)
15653 /* PC can be used as base register (for offset addressing only),
15654 but it is depricated. */
15655 || (n == PC_REGNUM)))
15656 return false;
15658 return true;
15661 /* Helper for gen_operands_ldrd_strd. Returns true iff the memory
15662 operand MEM's address contains an immediate offset from the base
15663 register and has no side effects, in which case it sets BASE and
15664 OFFSET accordingly. */
15665 static bool
15666 mem_ok_for_ldrd_strd (rtx mem, rtx *base, rtx *offset)
15668 rtx addr;
15670 gcc_assert (base != NULL && offset != NULL);
15672 /* TODO: Handle more general memory operand patterns, such as
15673 PRE_DEC and PRE_INC. */
15675 if (side_effects_p (mem))
15676 return false;
15678 /* Can't deal with subregs. */
15679 if (GET_CODE (mem) == SUBREG)
15680 return false;
15682 gcc_assert (MEM_P (mem));
15684 *offset = const0_rtx;
15686 addr = XEXP (mem, 0);
15688 /* If addr isn't valid for DImode, then we can't handle it. */
15689 if (!arm_legitimate_address_p (DImode, addr,
15690 reload_in_progress || reload_completed))
15691 return false;
15693 if (REG_P (addr))
15695 *base = addr;
15696 return true;
15698 else if (GET_CODE (addr) == PLUS || GET_CODE (addr) == MINUS)
15700 *base = XEXP (addr, 0);
15701 *offset = XEXP (addr, 1);
15702 return (REG_P (*base) && CONST_INT_P (*offset));
15705 return false;
15708 /* Called from a peephole2 to replace two word-size accesses with a
15709 single LDRD/STRD instruction. Returns true iff we can generate a
15710 new instruction sequence. That is, both accesses use the same base
15711 register and the gap between constant offsets is 4. This function
15712 may reorder its operands to match ldrd/strd RTL templates.
15713 OPERANDS are the operands found by the peephole matcher;
15714 OPERANDS[0,1] are register operands, and OPERANDS[2,3] are the
15715 corresponding memory operands. LOAD indicaates whether the access
15716 is load or store. CONST_STORE indicates a store of constant
15717 integer values held in OPERANDS[4,5] and assumes that the pattern
15718 is of length 4 insn, for the purpose of checking dead registers.
15719 COMMUTE indicates that register operands may be reordered. */
15720 bool
15721 gen_operands_ldrd_strd (rtx *operands, bool load,
15722 bool const_store, bool commute)
15724 int nops = 2;
15725 HOST_WIDE_INT offsets[2], offset;
15726 rtx base = NULL_RTX;
15727 rtx cur_base, cur_offset, tmp;
15728 int i, gap;
15729 HARD_REG_SET regset;
15731 gcc_assert (!const_store || !load);
15732 /* Check that the memory references are immediate offsets from the
15733 same base register. Extract the base register, the destination
15734 registers, and the corresponding memory offsets. */
15735 for (i = 0; i < nops; i++)
15737 if (!mem_ok_for_ldrd_strd (operands[nops+i], &cur_base, &cur_offset))
15738 return false;
15740 if (i == 0)
15741 base = cur_base;
15742 else if (REGNO (base) != REGNO (cur_base))
15743 return false;
15745 offsets[i] = INTVAL (cur_offset);
15746 if (GET_CODE (operands[i]) == SUBREG)
15748 tmp = SUBREG_REG (operands[i]);
15749 gcc_assert (GET_MODE (operands[i]) == GET_MODE (tmp));
15750 operands[i] = tmp;
15754 /* Make sure there is no dependency between the individual loads. */
15755 if (load && REGNO (operands[0]) == REGNO (base))
15756 return false; /* RAW */
15758 if (load && REGNO (operands[0]) == REGNO (operands[1]))
15759 return false; /* WAW */
15761 /* If the same input register is used in both stores
15762 when storing different constants, try to find a free register.
15763 For example, the code
15764 mov r0, 0
15765 str r0, [r2]
15766 mov r0, 1
15767 str r0, [r2, #4]
15768 can be transformed into
15769 mov r1, 0
15770 strd r1, r0, [r2]
15771 in Thumb mode assuming that r1 is free. */
15772 if (const_store
15773 && REGNO (operands[0]) == REGNO (operands[1])
15774 && INTVAL (operands[4]) != INTVAL (operands[5]))
15776 if (TARGET_THUMB2)
15778 CLEAR_HARD_REG_SET (regset);
15779 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
15780 if (tmp == NULL_RTX)
15781 return false;
15783 /* Use the new register in the first load to ensure that
15784 if the original input register is not dead after peephole,
15785 then it will have the correct constant value. */
15786 operands[0] = tmp;
15788 else if (TARGET_ARM)
15790 return false;
15791 int regno = REGNO (operands[0]);
15792 if (!peep2_reg_dead_p (4, operands[0]))
15794 /* When the input register is even and is not dead after the
15795 pattern, it has to hold the second constant but we cannot
15796 form a legal STRD in ARM mode with this register as the second
15797 register. */
15798 if (regno % 2 == 0)
15799 return false;
15801 /* Is regno-1 free? */
15802 SET_HARD_REG_SET (regset);
15803 CLEAR_HARD_REG_BIT(regset, regno - 1);
15804 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
15805 if (tmp == NULL_RTX)
15806 return false;
15808 operands[0] = tmp;
15810 else
15812 /* Find a DImode register. */
15813 CLEAR_HARD_REG_SET (regset);
15814 tmp = peep2_find_free_register (0, 4, "r", DImode, &regset);
15815 if (tmp != NULL_RTX)
15817 operands[0] = simplify_gen_subreg (SImode, tmp, DImode, 0);
15818 operands[1] = simplify_gen_subreg (SImode, tmp, DImode, 4);
15820 else
15822 /* Can we use the input register to form a DI register? */
15823 SET_HARD_REG_SET (regset);
15824 CLEAR_HARD_REG_BIT(regset,
15825 regno % 2 == 0 ? regno + 1 : regno - 1);
15826 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
15827 if (tmp == NULL_RTX)
15828 return false;
15829 operands[regno % 2 == 1 ? 0 : 1] = tmp;
15833 gcc_assert (operands[0] != NULL_RTX);
15834 gcc_assert (operands[1] != NULL_RTX);
15835 gcc_assert (REGNO (operands[0]) % 2 == 0);
15836 gcc_assert (REGNO (operands[1]) == REGNO (operands[0]) + 1);
15840 /* Make sure the instructions are ordered with lower memory access first. */
15841 if (offsets[0] > offsets[1])
15843 gap = offsets[0] - offsets[1];
15844 offset = offsets[1];
15846 /* Swap the instructions such that lower memory is accessed first. */
15847 std::swap (operands[0], operands[1]);
15848 std::swap (operands[2], operands[3]);
15849 if (const_store)
15850 std::swap (operands[4], operands[5]);
15852 else
15854 gap = offsets[1] - offsets[0];
15855 offset = offsets[0];
15858 /* Make sure accesses are to consecutive memory locations. */
15859 if (gap != 4)
15860 return false;
15862 /* Make sure we generate legal instructions. */
15863 if (operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
15864 false, load))
15865 return true;
15867 /* In Thumb state, where registers are almost unconstrained, there
15868 is little hope to fix it. */
15869 if (TARGET_THUMB2)
15870 return false;
15872 if (load && commute)
15874 /* Try reordering registers. */
15875 std::swap (operands[0], operands[1]);
15876 if (operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
15877 false, load))
15878 return true;
15881 if (const_store)
15883 /* If input registers are dead after this pattern, they can be
15884 reordered or replaced by other registers that are free in the
15885 current pattern. */
15886 if (!peep2_reg_dead_p (4, operands[0])
15887 || !peep2_reg_dead_p (4, operands[1]))
15888 return false;
15890 /* Try to reorder the input registers. */
15891 /* For example, the code
15892 mov r0, 0
15893 mov r1, 1
15894 str r1, [r2]
15895 str r0, [r2, #4]
15896 can be transformed into
15897 mov r1, 0
15898 mov r0, 1
15899 strd r0, [r2]
15901 if (operands_ok_ldrd_strd (operands[1], operands[0], base, offset,
15902 false, false))
15904 std::swap (operands[0], operands[1]);
15905 return true;
15908 /* Try to find a free DI register. */
15909 CLEAR_HARD_REG_SET (regset);
15910 add_to_hard_reg_set (&regset, SImode, REGNO (operands[0]));
15911 add_to_hard_reg_set (&regset, SImode, REGNO (operands[1]));
15912 while (true)
15914 tmp = peep2_find_free_register (0, 4, "r", DImode, &regset);
15915 if (tmp == NULL_RTX)
15916 return false;
15918 /* DREG must be an even-numbered register in DImode.
15919 Split it into SI registers. */
15920 operands[0] = simplify_gen_subreg (SImode, tmp, DImode, 0);
15921 operands[1] = simplify_gen_subreg (SImode, tmp, DImode, 4);
15922 gcc_assert (operands[0] != NULL_RTX);
15923 gcc_assert (operands[1] != NULL_RTX);
15924 gcc_assert (REGNO (operands[0]) % 2 == 0);
15925 gcc_assert (REGNO (operands[0]) + 1 == REGNO (operands[1]));
15927 return (operands_ok_ldrd_strd (operands[0], operands[1],
15928 base, offset,
15929 false, load));
15933 return false;
15939 /* Print a symbolic form of X to the debug file, F. */
15940 static void
15941 arm_print_value (FILE *f, rtx x)
15943 switch (GET_CODE (x))
15945 case CONST_INT:
15946 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
15947 return;
15949 case CONST_DOUBLE:
15950 fprintf (f, "<0x%lx,0x%lx>", (long)XWINT (x, 2), (long)XWINT (x, 3));
15951 return;
15953 case CONST_VECTOR:
15955 int i;
15957 fprintf (f, "<");
15958 for (i = 0; i < CONST_VECTOR_NUNITS (x); i++)
15960 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (CONST_VECTOR_ELT (x, i)));
15961 if (i < (CONST_VECTOR_NUNITS (x) - 1))
15962 fputc (',', f);
15964 fprintf (f, ">");
15966 return;
15968 case CONST_STRING:
15969 fprintf (f, "\"%s\"", XSTR (x, 0));
15970 return;
15972 case SYMBOL_REF:
15973 fprintf (f, "`%s'", XSTR (x, 0));
15974 return;
15976 case LABEL_REF:
15977 fprintf (f, "L%d", INSN_UID (XEXP (x, 0)));
15978 return;
15980 case CONST:
15981 arm_print_value (f, XEXP (x, 0));
15982 return;
15984 case PLUS:
15985 arm_print_value (f, XEXP (x, 0));
15986 fprintf (f, "+");
15987 arm_print_value (f, XEXP (x, 1));
15988 return;
15990 case PC:
15991 fprintf (f, "pc");
15992 return;
15994 default:
15995 fprintf (f, "????");
15996 return;
16000 /* Routines for manipulation of the constant pool. */
16002 /* Arm instructions cannot load a large constant directly into a
16003 register; they have to come from a pc relative load. The constant
16004 must therefore be placed in the addressable range of the pc
16005 relative load. Depending on the precise pc relative load
16006 instruction the range is somewhere between 256 bytes and 4k. This
16007 means that we often have to dump a constant inside a function, and
16008 generate code to branch around it.
16010 It is important to minimize this, since the branches will slow
16011 things down and make the code larger.
16013 Normally we can hide the table after an existing unconditional
16014 branch so that there is no interruption of the flow, but in the
16015 worst case the code looks like this:
16017 ldr rn, L1
16019 b L2
16020 align
16021 L1: .long value
16025 ldr rn, L3
16027 b L4
16028 align
16029 L3: .long value
16033 We fix this by performing a scan after scheduling, which notices
16034 which instructions need to have their operands fetched from the
16035 constant table and builds the table.
16037 The algorithm starts by building a table of all the constants that
16038 need fixing up and all the natural barriers in the function (places
16039 where a constant table can be dropped without breaking the flow).
16040 For each fixup we note how far the pc-relative replacement will be
16041 able to reach and the offset of the instruction into the function.
16043 Having built the table we then group the fixes together to form
16044 tables that are as large as possible (subject to addressing
16045 constraints) and emit each table of constants after the last
16046 barrier that is within range of all the instructions in the group.
16047 If a group does not contain a barrier, then we forcibly create one
16048 by inserting a jump instruction into the flow. Once the table has
16049 been inserted, the insns are then modified to reference the
16050 relevant entry in the pool.
16052 Possible enhancements to the algorithm (not implemented) are:
16054 1) For some processors and object formats, there may be benefit in
16055 aligning the pools to the start of cache lines; this alignment
16056 would need to be taken into account when calculating addressability
16057 of a pool. */
16059 /* These typedefs are located at the start of this file, so that
16060 they can be used in the prototypes there. This comment is to
16061 remind readers of that fact so that the following structures
16062 can be understood more easily.
16064 typedef struct minipool_node Mnode;
16065 typedef struct minipool_fixup Mfix; */
16067 struct minipool_node
16069 /* Doubly linked chain of entries. */
16070 Mnode * next;
16071 Mnode * prev;
16072 /* The maximum offset into the code that this entry can be placed. While
16073 pushing fixes for forward references, all entries are sorted in order
16074 of increasing max_address. */
16075 HOST_WIDE_INT max_address;
16076 /* Similarly for an entry inserted for a backwards ref. */
16077 HOST_WIDE_INT min_address;
16078 /* The number of fixes referencing this entry. This can become zero
16079 if we "unpush" an entry. In this case we ignore the entry when we
16080 come to emit the code. */
16081 int refcount;
16082 /* The offset from the start of the minipool. */
16083 HOST_WIDE_INT offset;
16084 /* The value in table. */
16085 rtx value;
16086 /* The mode of value. */
16087 machine_mode mode;
16088 /* The size of the value. With iWMMXt enabled
16089 sizes > 4 also imply an alignment of 8-bytes. */
16090 int fix_size;
16093 struct minipool_fixup
16095 Mfix * next;
16096 rtx_insn * insn;
16097 HOST_WIDE_INT address;
16098 rtx * loc;
16099 machine_mode mode;
16100 int fix_size;
16101 rtx value;
16102 Mnode * minipool;
16103 HOST_WIDE_INT forwards;
16104 HOST_WIDE_INT backwards;
16107 /* Fixes less than a word need padding out to a word boundary. */
16108 #define MINIPOOL_FIX_SIZE(mode) \
16109 (GET_MODE_SIZE ((mode)) >= 4 ? GET_MODE_SIZE ((mode)) : 4)
16111 static Mnode * minipool_vector_head;
16112 static Mnode * minipool_vector_tail;
16113 static rtx_code_label *minipool_vector_label;
16114 static int minipool_pad;
16116 /* The linked list of all minipool fixes required for this function. */
16117 Mfix * minipool_fix_head;
16118 Mfix * minipool_fix_tail;
16119 /* The fix entry for the current minipool, once it has been placed. */
16120 Mfix * minipool_barrier;
16122 #ifndef JUMP_TABLES_IN_TEXT_SECTION
16123 #define JUMP_TABLES_IN_TEXT_SECTION 0
16124 #endif
16126 static HOST_WIDE_INT
16127 get_jump_table_size (rtx_jump_table_data *insn)
16129 /* ADDR_VECs only take room if read-only data does into the text
16130 section. */
16131 if (JUMP_TABLES_IN_TEXT_SECTION || readonly_data_section == text_section)
16133 rtx body = PATTERN (insn);
16134 int elt = GET_CODE (body) == ADDR_DIFF_VEC ? 1 : 0;
16135 HOST_WIDE_INT size;
16136 HOST_WIDE_INT modesize;
16138 modesize = GET_MODE_SIZE (GET_MODE (body));
16139 size = modesize * XVECLEN (body, elt);
16140 switch (modesize)
16142 case 1:
16143 /* Round up size of TBB table to a halfword boundary. */
16144 size = (size + 1) & ~(HOST_WIDE_INT)1;
16145 break;
16146 case 2:
16147 /* No padding necessary for TBH. */
16148 break;
16149 case 4:
16150 /* Add two bytes for alignment on Thumb. */
16151 if (TARGET_THUMB)
16152 size += 2;
16153 break;
16154 default:
16155 gcc_unreachable ();
16157 return size;
16160 return 0;
16163 /* Return the maximum amount of padding that will be inserted before
16164 label LABEL. */
16166 static HOST_WIDE_INT
16167 get_label_padding (rtx label)
16169 HOST_WIDE_INT align, min_insn_size;
16171 align = 1 << label_to_alignment (label);
16172 min_insn_size = TARGET_THUMB ? 2 : 4;
16173 return align > min_insn_size ? align - min_insn_size : 0;
16176 /* Move a minipool fix MP from its current location to before MAX_MP.
16177 If MAX_MP is NULL, then MP doesn't need moving, but the addressing
16178 constraints may need updating. */
16179 static Mnode *
16180 move_minipool_fix_forward_ref (Mnode *mp, Mnode *max_mp,
16181 HOST_WIDE_INT max_address)
16183 /* The code below assumes these are different. */
16184 gcc_assert (mp != max_mp);
16186 if (max_mp == NULL)
16188 if (max_address < mp->max_address)
16189 mp->max_address = max_address;
16191 else
16193 if (max_address > max_mp->max_address - mp->fix_size)
16194 mp->max_address = max_mp->max_address - mp->fix_size;
16195 else
16196 mp->max_address = max_address;
16198 /* Unlink MP from its current position. Since max_mp is non-null,
16199 mp->prev must be non-null. */
16200 mp->prev->next = mp->next;
16201 if (mp->next != NULL)
16202 mp->next->prev = mp->prev;
16203 else
16204 minipool_vector_tail = mp->prev;
16206 /* Re-insert it before MAX_MP. */
16207 mp->next = max_mp;
16208 mp->prev = max_mp->prev;
16209 max_mp->prev = mp;
16211 if (mp->prev != NULL)
16212 mp->prev->next = mp;
16213 else
16214 minipool_vector_head = mp;
16217 /* Save the new entry. */
16218 max_mp = mp;
16220 /* Scan over the preceding entries and adjust their addresses as
16221 required. */
16222 while (mp->prev != NULL
16223 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
16225 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
16226 mp = mp->prev;
16229 return max_mp;
16232 /* Add a constant to the minipool for a forward reference. Returns the
16233 node added or NULL if the constant will not fit in this pool. */
16234 static Mnode *
16235 add_minipool_forward_ref (Mfix *fix)
16237 /* If set, max_mp is the first pool_entry that has a lower
16238 constraint than the one we are trying to add. */
16239 Mnode * max_mp = NULL;
16240 HOST_WIDE_INT max_address = fix->address + fix->forwards - minipool_pad;
16241 Mnode * mp;
16243 /* If the minipool starts before the end of FIX->INSN then this FIX
16244 can not be placed into the current pool. Furthermore, adding the
16245 new constant pool entry may cause the pool to start FIX_SIZE bytes
16246 earlier. */
16247 if (minipool_vector_head &&
16248 (fix->address + get_attr_length (fix->insn)
16249 >= minipool_vector_head->max_address - fix->fix_size))
16250 return NULL;
16252 /* Scan the pool to see if a constant with the same value has
16253 already been added. While we are doing this, also note the
16254 location where we must insert the constant if it doesn't already
16255 exist. */
16256 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16258 if (GET_CODE (fix->value) == GET_CODE (mp->value)
16259 && fix->mode == mp->mode
16260 && (!LABEL_P (fix->value)
16261 || (CODE_LABEL_NUMBER (fix->value)
16262 == CODE_LABEL_NUMBER (mp->value)))
16263 && rtx_equal_p (fix->value, mp->value))
16265 /* More than one fix references this entry. */
16266 mp->refcount++;
16267 return move_minipool_fix_forward_ref (mp, max_mp, max_address);
16270 /* Note the insertion point if necessary. */
16271 if (max_mp == NULL
16272 && mp->max_address > max_address)
16273 max_mp = mp;
16275 /* If we are inserting an 8-bytes aligned quantity and
16276 we have not already found an insertion point, then
16277 make sure that all such 8-byte aligned quantities are
16278 placed at the start of the pool. */
16279 if (ARM_DOUBLEWORD_ALIGN
16280 && max_mp == NULL
16281 && fix->fix_size >= 8
16282 && mp->fix_size < 8)
16284 max_mp = mp;
16285 max_address = mp->max_address;
16289 /* The value is not currently in the minipool, so we need to create
16290 a new entry for it. If MAX_MP is NULL, the entry will be put on
16291 the end of the list since the placement is less constrained than
16292 any existing entry. Otherwise, we insert the new fix before
16293 MAX_MP and, if necessary, adjust the constraints on the other
16294 entries. */
16295 mp = XNEW (Mnode);
16296 mp->fix_size = fix->fix_size;
16297 mp->mode = fix->mode;
16298 mp->value = fix->value;
16299 mp->refcount = 1;
16300 /* Not yet required for a backwards ref. */
16301 mp->min_address = -65536;
16303 if (max_mp == NULL)
16305 mp->max_address = max_address;
16306 mp->next = NULL;
16307 mp->prev = minipool_vector_tail;
16309 if (mp->prev == NULL)
16311 minipool_vector_head = mp;
16312 minipool_vector_label = gen_label_rtx ();
16314 else
16315 mp->prev->next = mp;
16317 minipool_vector_tail = mp;
16319 else
16321 if (max_address > max_mp->max_address - mp->fix_size)
16322 mp->max_address = max_mp->max_address - mp->fix_size;
16323 else
16324 mp->max_address = max_address;
16326 mp->next = max_mp;
16327 mp->prev = max_mp->prev;
16328 max_mp->prev = mp;
16329 if (mp->prev != NULL)
16330 mp->prev->next = mp;
16331 else
16332 minipool_vector_head = mp;
16335 /* Save the new entry. */
16336 max_mp = mp;
16338 /* Scan over the preceding entries and adjust their addresses as
16339 required. */
16340 while (mp->prev != NULL
16341 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
16343 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
16344 mp = mp->prev;
16347 return max_mp;
16350 static Mnode *
16351 move_minipool_fix_backward_ref (Mnode *mp, Mnode *min_mp,
16352 HOST_WIDE_INT min_address)
16354 HOST_WIDE_INT offset;
16356 /* The code below assumes these are different. */
16357 gcc_assert (mp != min_mp);
16359 if (min_mp == NULL)
16361 if (min_address > mp->min_address)
16362 mp->min_address = min_address;
16364 else
16366 /* We will adjust this below if it is too loose. */
16367 mp->min_address = min_address;
16369 /* Unlink MP from its current position. Since min_mp is non-null,
16370 mp->next must be non-null. */
16371 mp->next->prev = mp->prev;
16372 if (mp->prev != NULL)
16373 mp->prev->next = mp->next;
16374 else
16375 minipool_vector_head = mp->next;
16377 /* Reinsert it after MIN_MP. */
16378 mp->prev = min_mp;
16379 mp->next = min_mp->next;
16380 min_mp->next = mp;
16381 if (mp->next != NULL)
16382 mp->next->prev = mp;
16383 else
16384 minipool_vector_tail = mp;
16387 min_mp = mp;
16389 offset = 0;
16390 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16392 mp->offset = offset;
16393 if (mp->refcount > 0)
16394 offset += mp->fix_size;
16396 if (mp->next && mp->next->min_address < mp->min_address + mp->fix_size)
16397 mp->next->min_address = mp->min_address + mp->fix_size;
16400 return min_mp;
16403 /* Add a constant to the minipool for a backward reference. Returns the
16404 node added or NULL if the constant will not fit in this pool.
16406 Note that the code for insertion for a backwards reference can be
16407 somewhat confusing because the calculated offsets for each fix do
16408 not take into account the size of the pool (which is still under
16409 construction. */
16410 static Mnode *
16411 add_minipool_backward_ref (Mfix *fix)
16413 /* If set, min_mp is the last pool_entry that has a lower constraint
16414 than the one we are trying to add. */
16415 Mnode *min_mp = NULL;
16416 /* This can be negative, since it is only a constraint. */
16417 HOST_WIDE_INT min_address = fix->address - fix->backwards;
16418 Mnode *mp;
16420 /* If we can't reach the current pool from this insn, or if we can't
16421 insert this entry at the end of the pool without pushing other
16422 fixes out of range, then we don't try. This ensures that we
16423 can't fail later on. */
16424 if (min_address >= minipool_barrier->address
16425 || (minipool_vector_tail->min_address + fix->fix_size
16426 >= minipool_barrier->address))
16427 return NULL;
16429 /* Scan the pool to see if a constant with the same value has
16430 already been added. While we are doing this, also note the
16431 location where we must insert the constant if it doesn't already
16432 exist. */
16433 for (mp = minipool_vector_tail; mp != NULL; mp = mp->prev)
16435 if (GET_CODE (fix->value) == GET_CODE (mp->value)
16436 && fix->mode == mp->mode
16437 && (!LABEL_P (fix->value)
16438 || (CODE_LABEL_NUMBER (fix->value)
16439 == CODE_LABEL_NUMBER (mp->value)))
16440 && rtx_equal_p (fix->value, mp->value)
16441 /* Check that there is enough slack to move this entry to the
16442 end of the table (this is conservative). */
16443 && (mp->max_address
16444 > (minipool_barrier->address
16445 + minipool_vector_tail->offset
16446 + minipool_vector_tail->fix_size)))
16448 mp->refcount++;
16449 return move_minipool_fix_backward_ref (mp, min_mp, min_address);
16452 if (min_mp != NULL)
16453 mp->min_address += fix->fix_size;
16454 else
16456 /* Note the insertion point if necessary. */
16457 if (mp->min_address < min_address)
16459 /* For now, we do not allow the insertion of 8-byte alignment
16460 requiring nodes anywhere but at the start of the pool. */
16461 if (ARM_DOUBLEWORD_ALIGN
16462 && fix->fix_size >= 8 && mp->fix_size < 8)
16463 return NULL;
16464 else
16465 min_mp = mp;
16467 else if (mp->max_address
16468 < minipool_barrier->address + mp->offset + fix->fix_size)
16470 /* Inserting before this entry would push the fix beyond
16471 its maximum address (which can happen if we have
16472 re-located a forwards fix); force the new fix to come
16473 after it. */
16474 if (ARM_DOUBLEWORD_ALIGN
16475 && fix->fix_size >= 8 && mp->fix_size < 8)
16476 return NULL;
16477 else
16479 min_mp = mp;
16480 min_address = mp->min_address + fix->fix_size;
16483 /* Do not insert a non-8-byte aligned quantity before 8-byte
16484 aligned quantities. */
16485 else if (ARM_DOUBLEWORD_ALIGN
16486 && fix->fix_size < 8
16487 && mp->fix_size >= 8)
16489 min_mp = mp;
16490 min_address = mp->min_address + fix->fix_size;
16495 /* We need to create a new entry. */
16496 mp = XNEW (Mnode);
16497 mp->fix_size = fix->fix_size;
16498 mp->mode = fix->mode;
16499 mp->value = fix->value;
16500 mp->refcount = 1;
16501 mp->max_address = minipool_barrier->address + 65536;
16503 mp->min_address = min_address;
16505 if (min_mp == NULL)
16507 mp->prev = NULL;
16508 mp->next = minipool_vector_head;
16510 if (mp->next == NULL)
16512 minipool_vector_tail = mp;
16513 minipool_vector_label = gen_label_rtx ();
16515 else
16516 mp->next->prev = mp;
16518 minipool_vector_head = mp;
16520 else
16522 mp->next = min_mp->next;
16523 mp->prev = min_mp;
16524 min_mp->next = mp;
16526 if (mp->next != NULL)
16527 mp->next->prev = mp;
16528 else
16529 minipool_vector_tail = mp;
16532 /* Save the new entry. */
16533 min_mp = mp;
16535 if (mp->prev)
16536 mp = mp->prev;
16537 else
16538 mp->offset = 0;
16540 /* Scan over the following entries and adjust their offsets. */
16541 while (mp->next != NULL)
16543 if (mp->next->min_address < mp->min_address + mp->fix_size)
16544 mp->next->min_address = mp->min_address + mp->fix_size;
16546 if (mp->refcount)
16547 mp->next->offset = mp->offset + mp->fix_size;
16548 else
16549 mp->next->offset = mp->offset;
16551 mp = mp->next;
16554 return min_mp;
16557 static void
16558 assign_minipool_offsets (Mfix *barrier)
16560 HOST_WIDE_INT offset = 0;
16561 Mnode *mp;
16563 minipool_barrier = barrier;
16565 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16567 mp->offset = offset;
16569 if (mp->refcount > 0)
16570 offset += mp->fix_size;
16574 /* Output the literal table */
16575 static void
16576 dump_minipool (rtx_insn *scan)
16578 Mnode * mp;
16579 Mnode * nmp;
16580 int align64 = 0;
16582 if (ARM_DOUBLEWORD_ALIGN)
16583 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16584 if (mp->refcount > 0 && mp->fix_size >= 8)
16586 align64 = 1;
16587 break;
16590 if (dump_file)
16591 fprintf (dump_file,
16592 ";; Emitting minipool after insn %u; address %ld; align %d (bytes)\n",
16593 INSN_UID (scan), (unsigned long) minipool_barrier->address, align64 ? 8 : 4);
16595 scan = emit_label_after (gen_label_rtx (), scan);
16596 scan = emit_insn_after (align64 ? gen_align_8 () : gen_align_4 (), scan);
16597 scan = emit_label_after (minipool_vector_label, scan);
16599 for (mp = minipool_vector_head; mp != NULL; mp = nmp)
16601 if (mp->refcount > 0)
16603 if (dump_file)
16605 fprintf (dump_file,
16606 ";; Offset %u, min %ld, max %ld ",
16607 (unsigned) mp->offset, (unsigned long) mp->min_address,
16608 (unsigned long) mp->max_address);
16609 arm_print_value (dump_file, mp->value);
16610 fputc ('\n', dump_file);
16613 switch (GET_MODE_SIZE (mp->mode))
16615 #ifdef HAVE_consttable_1
16616 case 1:
16617 scan = emit_insn_after (gen_consttable_1 (mp->value), scan);
16618 break;
16620 #endif
16621 #ifdef HAVE_consttable_2
16622 case 2:
16623 scan = emit_insn_after (gen_consttable_2 (mp->value), scan);
16624 break;
16626 #endif
16627 #ifdef HAVE_consttable_4
16628 case 4:
16629 scan = emit_insn_after (gen_consttable_4 (mp->value), scan);
16630 break;
16632 #endif
16633 #ifdef HAVE_consttable_8
16634 case 8:
16635 scan = emit_insn_after (gen_consttable_8 (mp->value), scan);
16636 break;
16638 #endif
16639 #ifdef HAVE_consttable_16
16640 case 16:
16641 scan = emit_insn_after (gen_consttable_16 (mp->value), scan);
16642 break;
16644 #endif
16645 default:
16646 gcc_unreachable ();
16650 nmp = mp->next;
16651 free (mp);
16654 minipool_vector_head = minipool_vector_tail = NULL;
16655 scan = emit_insn_after (gen_consttable_end (), scan);
16656 scan = emit_barrier_after (scan);
16659 /* Return the cost of forcibly inserting a barrier after INSN. */
16660 static int
16661 arm_barrier_cost (rtx_insn *insn)
16663 /* Basing the location of the pool on the loop depth is preferable,
16664 but at the moment, the basic block information seems to be
16665 corrupt by this stage of the compilation. */
16666 int base_cost = 50;
16667 rtx_insn *next = next_nonnote_insn (insn);
16669 if (next != NULL && LABEL_P (next))
16670 base_cost -= 20;
16672 switch (GET_CODE (insn))
16674 case CODE_LABEL:
16675 /* It will always be better to place the table before the label, rather
16676 than after it. */
16677 return 50;
16679 case INSN:
16680 case CALL_INSN:
16681 return base_cost;
16683 case JUMP_INSN:
16684 return base_cost - 10;
16686 default:
16687 return base_cost + 10;
16691 /* Find the best place in the insn stream in the range
16692 (FIX->address,MAX_ADDRESS) to forcibly insert a minipool barrier.
16693 Create the barrier by inserting a jump and add a new fix entry for
16694 it. */
16695 static Mfix *
16696 create_fix_barrier (Mfix *fix, HOST_WIDE_INT max_address)
16698 HOST_WIDE_INT count = 0;
16699 rtx_barrier *barrier;
16700 rtx_insn *from = fix->insn;
16701 /* The instruction after which we will insert the jump. */
16702 rtx_insn *selected = NULL;
16703 int selected_cost;
16704 /* The address at which the jump instruction will be placed. */
16705 HOST_WIDE_INT selected_address;
16706 Mfix * new_fix;
16707 HOST_WIDE_INT max_count = max_address - fix->address;
16708 rtx_code_label *label = gen_label_rtx ();
16710 selected_cost = arm_barrier_cost (from);
16711 selected_address = fix->address;
16713 while (from && count < max_count)
16715 rtx_jump_table_data *tmp;
16716 int new_cost;
16718 /* This code shouldn't have been called if there was a natural barrier
16719 within range. */
16720 gcc_assert (!BARRIER_P (from));
16722 /* Count the length of this insn. This must stay in sync with the
16723 code that pushes minipool fixes. */
16724 if (LABEL_P (from))
16725 count += get_label_padding (from);
16726 else
16727 count += get_attr_length (from);
16729 /* If there is a jump table, add its length. */
16730 if (tablejump_p (from, NULL, &tmp))
16732 count += get_jump_table_size (tmp);
16734 /* Jump tables aren't in a basic block, so base the cost on
16735 the dispatch insn. If we select this location, we will
16736 still put the pool after the table. */
16737 new_cost = arm_barrier_cost (from);
16739 if (count < max_count
16740 && (!selected || new_cost <= selected_cost))
16742 selected = tmp;
16743 selected_cost = new_cost;
16744 selected_address = fix->address + count;
16747 /* Continue after the dispatch table. */
16748 from = NEXT_INSN (tmp);
16749 continue;
16752 new_cost = arm_barrier_cost (from);
16754 if (count < max_count
16755 && (!selected || new_cost <= selected_cost))
16757 selected = from;
16758 selected_cost = new_cost;
16759 selected_address = fix->address + count;
16762 from = NEXT_INSN (from);
16765 /* Make sure that we found a place to insert the jump. */
16766 gcc_assert (selected);
16768 /* Make sure we do not split a call and its corresponding
16769 CALL_ARG_LOCATION note. */
16770 if (CALL_P (selected))
16772 rtx_insn *next = NEXT_INSN (selected);
16773 if (next && NOTE_P (next)
16774 && NOTE_KIND (next) == NOTE_INSN_CALL_ARG_LOCATION)
16775 selected = next;
16778 /* Create a new JUMP_INSN that branches around a barrier. */
16779 from = emit_jump_insn_after (gen_jump (label), selected);
16780 JUMP_LABEL (from) = label;
16781 barrier = emit_barrier_after (from);
16782 emit_label_after (label, barrier);
16784 /* Create a minipool barrier entry for the new barrier. */
16785 new_fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* new_fix));
16786 new_fix->insn = barrier;
16787 new_fix->address = selected_address;
16788 new_fix->next = fix->next;
16789 fix->next = new_fix;
16791 return new_fix;
16794 /* Record that there is a natural barrier in the insn stream at
16795 ADDRESS. */
16796 static void
16797 push_minipool_barrier (rtx_insn *insn, HOST_WIDE_INT address)
16799 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
16801 fix->insn = insn;
16802 fix->address = address;
16804 fix->next = NULL;
16805 if (minipool_fix_head != NULL)
16806 minipool_fix_tail->next = fix;
16807 else
16808 minipool_fix_head = fix;
16810 minipool_fix_tail = fix;
16813 /* Record INSN, which will need fixing up to load a value from the
16814 minipool. ADDRESS is the offset of the insn since the start of the
16815 function; LOC is a pointer to the part of the insn which requires
16816 fixing; VALUE is the constant that must be loaded, which is of type
16817 MODE. */
16818 static void
16819 push_minipool_fix (rtx_insn *insn, HOST_WIDE_INT address, rtx *loc,
16820 machine_mode mode, rtx value)
16822 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
16824 fix->insn = insn;
16825 fix->address = address;
16826 fix->loc = loc;
16827 fix->mode = mode;
16828 fix->fix_size = MINIPOOL_FIX_SIZE (mode);
16829 fix->value = value;
16830 fix->forwards = get_attr_pool_range (insn);
16831 fix->backwards = get_attr_neg_pool_range (insn);
16832 fix->minipool = NULL;
16834 /* If an insn doesn't have a range defined for it, then it isn't
16835 expecting to be reworked by this code. Better to stop now than
16836 to generate duff assembly code. */
16837 gcc_assert (fix->forwards || fix->backwards);
16839 /* If an entry requires 8-byte alignment then assume all constant pools
16840 require 4 bytes of padding. Trying to do this later on a per-pool
16841 basis is awkward because existing pool entries have to be modified. */
16842 if (ARM_DOUBLEWORD_ALIGN && fix->fix_size >= 8)
16843 minipool_pad = 4;
16845 if (dump_file)
16847 fprintf (dump_file,
16848 ";; %smode fixup for i%d; addr %lu, range (%ld,%ld): ",
16849 GET_MODE_NAME (mode),
16850 INSN_UID (insn), (unsigned long) address,
16851 -1 * (long)fix->backwards, (long)fix->forwards);
16852 arm_print_value (dump_file, fix->value);
16853 fprintf (dump_file, "\n");
16856 /* Add it to the chain of fixes. */
16857 fix->next = NULL;
16859 if (minipool_fix_head != NULL)
16860 minipool_fix_tail->next = fix;
16861 else
16862 minipool_fix_head = fix;
16864 minipool_fix_tail = fix;
16867 /* Return maximum allowed cost of synthesizing a 64-bit constant VAL inline.
16868 Returns the number of insns needed, or 99 if we always want to synthesize
16869 the value. */
16871 arm_max_const_double_inline_cost ()
16873 /* Let the value get synthesized to avoid the use of literal pools. */
16874 if (arm_disable_literal_pool)
16875 return 99;
16877 return ((optimize_size || arm_ld_sched) ? 3 : 4);
16880 /* Return the cost of synthesizing a 64-bit constant VAL inline.
16881 Returns the number of insns needed, or 99 if we don't know how to
16882 do it. */
16884 arm_const_double_inline_cost (rtx val)
16886 rtx lowpart, highpart;
16887 machine_mode mode;
16889 mode = GET_MODE (val);
16891 if (mode == VOIDmode)
16892 mode = DImode;
16894 gcc_assert (GET_MODE_SIZE (mode) == 8);
16896 lowpart = gen_lowpart (SImode, val);
16897 highpart = gen_highpart_mode (SImode, mode, val);
16899 gcc_assert (CONST_INT_P (lowpart));
16900 gcc_assert (CONST_INT_P (highpart));
16902 return (arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (lowpart),
16903 NULL_RTX, NULL_RTX, 0, 0)
16904 + arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (highpart),
16905 NULL_RTX, NULL_RTX, 0, 0));
16908 /* Cost of loading a SImode constant. */
16909 static inline int
16910 arm_const_inline_cost (enum rtx_code code, rtx val)
16912 return arm_gen_constant (code, SImode, NULL_RTX, INTVAL (val),
16913 NULL_RTX, NULL_RTX, 1, 0);
16916 /* Return true if it is worthwhile to split a 64-bit constant into two
16917 32-bit operations. This is the case if optimizing for size, or
16918 if we have load delay slots, or if one 32-bit part can be done with
16919 a single data operation. */
16920 bool
16921 arm_const_double_by_parts (rtx val)
16923 machine_mode mode = GET_MODE (val);
16924 rtx part;
16926 if (optimize_size || arm_ld_sched)
16927 return true;
16929 if (mode == VOIDmode)
16930 mode = DImode;
16932 part = gen_highpart_mode (SImode, mode, val);
16934 gcc_assert (CONST_INT_P (part));
16936 if (const_ok_for_arm (INTVAL (part))
16937 || const_ok_for_arm (~INTVAL (part)))
16938 return true;
16940 part = gen_lowpart (SImode, val);
16942 gcc_assert (CONST_INT_P (part));
16944 if (const_ok_for_arm (INTVAL (part))
16945 || const_ok_for_arm (~INTVAL (part)))
16946 return true;
16948 return false;
16951 /* Return true if it is possible to inline both the high and low parts
16952 of a 64-bit constant into 32-bit data processing instructions. */
16953 bool
16954 arm_const_double_by_immediates (rtx val)
16956 machine_mode mode = GET_MODE (val);
16957 rtx part;
16959 if (mode == VOIDmode)
16960 mode = DImode;
16962 part = gen_highpart_mode (SImode, mode, val);
16964 gcc_assert (CONST_INT_P (part));
16966 if (!const_ok_for_arm (INTVAL (part)))
16967 return false;
16969 part = gen_lowpart (SImode, val);
16971 gcc_assert (CONST_INT_P (part));
16973 if (!const_ok_for_arm (INTVAL (part)))
16974 return false;
16976 return true;
16979 /* Scan INSN and note any of its operands that need fixing.
16980 If DO_PUSHES is false we do not actually push any of the fixups
16981 needed. */
16982 static void
16983 note_invalid_constants (rtx_insn *insn, HOST_WIDE_INT address, int do_pushes)
16985 int opno;
16987 extract_constrain_insn (insn);
16989 if (recog_data.n_alternatives == 0)
16990 return;
16992 /* Fill in recog_op_alt with information about the constraints of
16993 this insn. */
16994 preprocess_constraints (insn);
16996 const operand_alternative *op_alt = which_op_alt ();
16997 for (opno = 0; opno < recog_data.n_operands; opno++)
16999 /* Things we need to fix can only occur in inputs. */
17000 if (recog_data.operand_type[opno] != OP_IN)
17001 continue;
17003 /* If this alternative is a memory reference, then any mention
17004 of constants in this alternative is really to fool reload
17005 into allowing us to accept one there. We need to fix them up
17006 now so that we output the right code. */
17007 if (op_alt[opno].memory_ok)
17009 rtx op = recog_data.operand[opno];
17011 if (CONSTANT_P (op))
17013 if (do_pushes)
17014 push_minipool_fix (insn, address, recog_data.operand_loc[opno],
17015 recog_data.operand_mode[opno], op);
17017 else if (MEM_P (op)
17018 && GET_CODE (XEXP (op, 0)) == SYMBOL_REF
17019 && CONSTANT_POOL_ADDRESS_P (XEXP (op, 0)))
17021 if (do_pushes)
17023 rtx cop = avoid_constant_pool_reference (op);
17025 /* Casting the address of something to a mode narrower
17026 than a word can cause avoid_constant_pool_reference()
17027 to return the pool reference itself. That's no good to
17028 us here. Lets just hope that we can use the
17029 constant pool value directly. */
17030 if (op == cop)
17031 cop = get_pool_constant (XEXP (op, 0));
17033 push_minipool_fix (insn, address,
17034 recog_data.operand_loc[opno],
17035 recog_data.operand_mode[opno], cop);
17042 return;
17045 /* Rewrite move insn into subtract of 0 if the condition codes will
17046 be useful in next conditional jump insn. */
17048 static void
17049 thumb1_reorg (void)
17051 basic_block bb;
17053 FOR_EACH_BB_FN (bb, cfun)
17055 rtx dest, src;
17056 rtx pat, op0, set = NULL;
17057 rtx_insn *prev, *insn = BB_END (bb);
17058 bool insn_clobbered = false;
17060 while (insn != BB_HEAD (bb) && !NONDEBUG_INSN_P (insn))
17061 insn = PREV_INSN (insn);
17063 /* Find the last cbranchsi4_insn in basic block BB. */
17064 if (insn == BB_HEAD (bb)
17065 || INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
17066 continue;
17068 /* Get the register with which we are comparing. */
17069 pat = PATTERN (insn);
17070 op0 = XEXP (XEXP (SET_SRC (pat), 0), 0);
17072 /* Find the first flag setting insn before INSN in basic block BB. */
17073 gcc_assert (insn != BB_HEAD (bb));
17074 for (prev = PREV_INSN (insn);
17075 (!insn_clobbered
17076 && prev != BB_HEAD (bb)
17077 && (NOTE_P (prev)
17078 || DEBUG_INSN_P (prev)
17079 || ((set = single_set (prev)) != NULL
17080 && get_attr_conds (prev) == CONDS_NOCOND)));
17081 prev = PREV_INSN (prev))
17083 if (reg_set_p (op0, prev))
17084 insn_clobbered = true;
17087 /* Skip if op0 is clobbered by insn other than prev. */
17088 if (insn_clobbered)
17089 continue;
17091 if (!set)
17092 continue;
17094 dest = SET_DEST (set);
17095 src = SET_SRC (set);
17096 if (!low_register_operand (dest, SImode)
17097 || !low_register_operand (src, SImode))
17098 continue;
17100 /* Rewrite move into subtract of 0 if its operand is compared with ZERO
17101 in INSN. Both src and dest of the move insn are checked. */
17102 if (REGNO (op0) == REGNO (src) || REGNO (op0) == REGNO (dest))
17104 dest = copy_rtx (dest);
17105 src = copy_rtx (src);
17106 src = gen_rtx_MINUS (SImode, src, const0_rtx);
17107 PATTERN (prev) = gen_rtx_SET (dest, src);
17108 INSN_CODE (prev) = -1;
17109 /* Set test register in INSN to dest. */
17110 XEXP (XEXP (SET_SRC (pat), 0), 0) = copy_rtx (dest);
17111 INSN_CODE (insn) = -1;
17116 /* Convert instructions to their cc-clobbering variant if possible, since
17117 that allows us to use smaller encodings. */
17119 static void
17120 thumb2_reorg (void)
17122 basic_block bb;
17123 regset_head live;
17125 INIT_REG_SET (&live);
17127 /* We are freeing block_for_insn in the toplev to keep compatibility
17128 with old MDEP_REORGS that are not CFG based. Recompute it now. */
17129 compute_bb_for_insn ();
17130 df_analyze ();
17132 enum Convert_Action {SKIP, CONV, SWAP_CONV};
17134 FOR_EACH_BB_FN (bb, cfun)
17136 if ((current_tune->disparage_flag_setting_t16_encodings
17137 == tune_params::DISPARAGE_FLAGS_ALL)
17138 && optimize_bb_for_speed_p (bb))
17139 continue;
17141 rtx_insn *insn;
17142 Convert_Action action = SKIP;
17143 Convert_Action action_for_partial_flag_setting
17144 = ((current_tune->disparage_flag_setting_t16_encodings
17145 != tune_params::DISPARAGE_FLAGS_NEITHER)
17146 && optimize_bb_for_speed_p (bb))
17147 ? SKIP : CONV;
17149 COPY_REG_SET (&live, DF_LR_OUT (bb));
17150 df_simulate_initialize_backwards (bb, &live);
17151 FOR_BB_INSNS_REVERSE (bb, insn)
17153 if (NONJUMP_INSN_P (insn)
17154 && !REGNO_REG_SET_P (&live, CC_REGNUM)
17155 && GET_CODE (PATTERN (insn)) == SET)
17157 action = SKIP;
17158 rtx pat = PATTERN (insn);
17159 rtx dst = XEXP (pat, 0);
17160 rtx src = XEXP (pat, 1);
17161 rtx op0 = NULL_RTX, op1 = NULL_RTX;
17163 if (UNARY_P (src) || BINARY_P (src))
17164 op0 = XEXP (src, 0);
17166 if (BINARY_P (src))
17167 op1 = XEXP (src, 1);
17169 if (low_register_operand (dst, SImode))
17171 switch (GET_CODE (src))
17173 case PLUS:
17174 /* Adding two registers and storing the result
17175 in the first source is already a 16-bit
17176 operation. */
17177 if (rtx_equal_p (dst, op0)
17178 && register_operand (op1, SImode))
17179 break;
17181 if (low_register_operand (op0, SImode))
17183 /* ADDS <Rd>,<Rn>,<Rm> */
17184 if (low_register_operand (op1, SImode))
17185 action = CONV;
17186 /* ADDS <Rdn>,#<imm8> */
17187 /* SUBS <Rdn>,#<imm8> */
17188 else if (rtx_equal_p (dst, op0)
17189 && CONST_INT_P (op1)
17190 && IN_RANGE (INTVAL (op1), -255, 255))
17191 action = CONV;
17192 /* ADDS <Rd>,<Rn>,#<imm3> */
17193 /* SUBS <Rd>,<Rn>,#<imm3> */
17194 else if (CONST_INT_P (op1)
17195 && IN_RANGE (INTVAL (op1), -7, 7))
17196 action = CONV;
17198 /* ADCS <Rd>, <Rn> */
17199 else if (GET_CODE (XEXP (src, 0)) == PLUS
17200 && rtx_equal_p (XEXP (XEXP (src, 0), 0), dst)
17201 && low_register_operand (XEXP (XEXP (src, 0), 1),
17202 SImode)
17203 && COMPARISON_P (op1)
17204 && cc_register (XEXP (op1, 0), VOIDmode)
17205 && maybe_get_arm_condition_code (op1) == ARM_CS
17206 && XEXP (op1, 1) == const0_rtx)
17207 action = CONV;
17208 break;
17210 case MINUS:
17211 /* RSBS <Rd>,<Rn>,#0
17212 Not handled here: see NEG below. */
17213 /* SUBS <Rd>,<Rn>,#<imm3>
17214 SUBS <Rdn>,#<imm8>
17215 Not handled here: see PLUS above. */
17216 /* SUBS <Rd>,<Rn>,<Rm> */
17217 if (low_register_operand (op0, SImode)
17218 && low_register_operand (op1, SImode))
17219 action = CONV;
17220 break;
17222 case MULT:
17223 /* MULS <Rdm>,<Rn>,<Rdm>
17224 As an exception to the rule, this is only used
17225 when optimizing for size since MULS is slow on all
17226 known implementations. We do not even want to use
17227 MULS in cold code, if optimizing for speed, so we
17228 test the global flag here. */
17229 if (!optimize_size)
17230 break;
17231 /* else fall through. */
17232 case AND:
17233 case IOR:
17234 case XOR:
17235 /* ANDS <Rdn>,<Rm> */
17236 if (rtx_equal_p (dst, op0)
17237 && low_register_operand (op1, SImode))
17238 action = action_for_partial_flag_setting;
17239 else if (rtx_equal_p (dst, op1)
17240 && low_register_operand (op0, SImode))
17241 action = action_for_partial_flag_setting == SKIP
17242 ? SKIP : SWAP_CONV;
17243 break;
17245 case ASHIFTRT:
17246 case ASHIFT:
17247 case LSHIFTRT:
17248 /* ASRS <Rdn>,<Rm> */
17249 /* LSRS <Rdn>,<Rm> */
17250 /* LSLS <Rdn>,<Rm> */
17251 if (rtx_equal_p (dst, op0)
17252 && low_register_operand (op1, SImode))
17253 action = action_for_partial_flag_setting;
17254 /* ASRS <Rd>,<Rm>,#<imm5> */
17255 /* LSRS <Rd>,<Rm>,#<imm5> */
17256 /* LSLS <Rd>,<Rm>,#<imm5> */
17257 else if (low_register_operand (op0, SImode)
17258 && CONST_INT_P (op1)
17259 && IN_RANGE (INTVAL (op1), 0, 31))
17260 action = action_for_partial_flag_setting;
17261 break;
17263 case ROTATERT:
17264 /* RORS <Rdn>,<Rm> */
17265 if (rtx_equal_p (dst, op0)
17266 && low_register_operand (op1, SImode))
17267 action = action_for_partial_flag_setting;
17268 break;
17270 case NOT:
17271 /* MVNS <Rd>,<Rm> */
17272 if (low_register_operand (op0, SImode))
17273 action = action_for_partial_flag_setting;
17274 break;
17276 case NEG:
17277 /* NEGS <Rd>,<Rm> (a.k.a RSBS) */
17278 if (low_register_operand (op0, SImode))
17279 action = CONV;
17280 break;
17282 case CONST_INT:
17283 /* MOVS <Rd>,#<imm8> */
17284 if (CONST_INT_P (src)
17285 && IN_RANGE (INTVAL (src), 0, 255))
17286 action = action_for_partial_flag_setting;
17287 break;
17289 case REG:
17290 /* MOVS and MOV<c> with registers have different
17291 encodings, so are not relevant here. */
17292 break;
17294 default:
17295 break;
17299 if (action != SKIP)
17301 rtx ccreg = gen_rtx_REG (CCmode, CC_REGNUM);
17302 rtx clobber = gen_rtx_CLOBBER (VOIDmode, ccreg);
17303 rtvec vec;
17305 if (action == SWAP_CONV)
17307 src = copy_rtx (src);
17308 XEXP (src, 0) = op1;
17309 XEXP (src, 1) = op0;
17310 pat = gen_rtx_SET (dst, src);
17311 vec = gen_rtvec (2, pat, clobber);
17313 else /* action == CONV */
17314 vec = gen_rtvec (2, pat, clobber);
17316 PATTERN (insn) = gen_rtx_PARALLEL (VOIDmode, vec);
17317 INSN_CODE (insn) = -1;
17321 if (NONDEBUG_INSN_P (insn))
17322 df_simulate_one_insn_backwards (bb, insn, &live);
17326 CLEAR_REG_SET (&live);
17329 /* Gcc puts the pool in the wrong place for ARM, since we can only
17330 load addresses a limited distance around the pc. We do some
17331 special munging to move the constant pool values to the correct
17332 point in the code. */
17333 static void
17334 arm_reorg (void)
17336 rtx_insn *insn;
17337 HOST_WIDE_INT address = 0;
17338 Mfix * fix;
17340 if (TARGET_THUMB1)
17341 thumb1_reorg ();
17342 else if (TARGET_THUMB2)
17343 thumb2_reorg ();
17345 /* Ensure all insns that must be split have been split at this point.
17346 Otherwise, the pool placement code below may compute incorrect
17347 insn lengths. Note that when optimizing, all insns have already
17348 been split at this point. */
17349 if (!optimize)
17350 split_all_insns_noflow ();
17352 minipool_fix_head = minipool_fix_tail = NULL;
17354 /* The first insn must always be a note, or the code below won't
17355 scan it properly. */
17356 insn = get_insns ();
17357 gcc_assert (NOTE_P (insn));
17358 minipool_pad = 0;
17360 /* Scan all the insns and record the operands that will need fixing. */
17361 for (insn = next_nonnote_insn (insn); insn; insn = next_nonnote_insn (insn))
17363 if (BARRIER_P (insn))
17364 push_minipool_barrier (insn, address);
17365 else if (INSN_P (insn))
17367 rtx_jump_table_data *table;
17369 note_invalid_constants (insn, address, true);
17370 address += get_attr_length (insn);
17372 /* If the insn is a vector jump, add the size of the table
17373 and skip the table. */
17374 if (tablejump_p (insn, NULL, &table))
17376 address += get_jump_table_size (table);
17377 insn = table;
17380 else if (LABEL_P (insn))
17381 /* Add the worst-case padding due to alignment. We don't add
17382 the _current_ padding because the minipool insertions
17383 themselves might change it. */
17384 address += get_label_padding (insn);
17387 fix = minipool_fix_head;
17389 /* Now scan the fixups and perform the required changes. */
17390 while (fix)
17392 Mfix * ftmp;
17393 Mfix * fdel;
17394 Mfix * last_added_fix;
17395 Mfix * last_barrier = NULL;
17396 Mfix * this_fix;
17398 /* Skip any further barriers before the next fix. */
17399 while (fix && BARRIER_P (fix->insn))
17400 fix = fix->next;
17402 /* No more fixes. */
17403 if (fix == NULL)
17404 break;
17406 last_added_fix = NULL;
17408 for (ftmp = fix; ftmp; ftmp = ftmp->next)
17410 if (BARRIER_P (ftmp->insn))
17412 if (ftmp->address >= minipool_vector_head->max_address)
17413 break;
17415 last_barrier = ftmp;
17417 else if ((ftmp->minipool = add_minipool_forward_ref (ftmp)) == NULL)
17418 break;
17420 last_added_fix = ftmp; /* Keep track of the last fix added. */
17423 /* If we found a barrier, drop back to that; any fixes that we
17424 could have reached but come after the barrier will now go in
17425 the next mini-pool. */
17426 if (last_barrier != NULL)
17428 /* Reduce the refcount for those fixes that won't go into this
17429 pool after all. */
17430 for (fdel = last_barrier->next;
17431 fdel && fdel != ftmp;
17432 fdel = fdel->next)
17434 fdel->minipool->refcount--;
17435 fdel->minipool = NULL;
17438 ftmp = last_barrier;
17440 else
17442 /* ftmp is first fix that we can't fit into this pool and
17443 there no natural barriers that we could use. Insert a
17444 new barrier in the code somewhere between the previous
17445 fix and this one, and arrange to jump around it. */
17446 HOST_WIDE_INT max_address;
17448 /* The last item on the list of fixes must be a barrier, so
17449 we can never run off the end of the list of fixes without
17450 last_barrier being set. */
17451 gcc_assert (ftmp);
17453 max_address = minipool_vector_head->max_address;
17454 /* Check that there isn't another fix that is in range that
17455 we couldn't fit into this pool because the pool was
17456 already too large: we need to put the pool before such an
17457 instruction. The pool itself may come just after the
17458 fix because create_fix_barrier also allows space for a
17459 jump instruction. */
17460 if (ftmp->address < max_address)
17461 max_address = ftmp->address + 1;
17463 last_barrier = create_fix_barrier (last_added_fix, max_address);
17466 assign_minipool_offsets (last_barrier);
17468 while (ftmp)
17470 if (!BARRIER_P (ftmp->insn)
17471 && ((ftmp->minipool = add_minipool_backward_ref (ftmp))
17472 == NULL))
17473 break;
17475 ftmp = ftmp->next;
17478 /* Scan over the fixes we have identified for this pool, fixing them
17479 up and adding the constants to the pool itself. */
17480 for (this_fix = fix; this_fix && ftmp != this_fix;
17481 this_fix = this_fix->next)
17482 if (!BARRIER_P (this_fix->insn))
17484 rtx addr
17485 = plus_constant (Pmode,
17486 gen_rtx_LABEL_REF (VOIDmode,
17487 minipool_vector_label),
17488 this_fix->minipool->offset);
17489 *this_fix->loc = gen_rtx_MEM (this_fix->mode, addr);
17492 dump_minipool (last_barrier->insn);
17493 fix = ftmp;
17496 /* From now on we must synthesize any constants that we can't handle
17497 directly. This can happen if the RTL gets split during final
17498 instruction generation. */
17499 cfun->machine->after_arm_reorg = 1;
17501 /* Free the minipool memory. */
17502 obstack_free (&minipool_obstack, minipool_startobj);
17505 /* Routines to output assembly language. */
17507 /* Return string representation of passed in real value. */
17508 static const char *
17509 fp_const_from_val (REAL_VALUE_TYPE *r)
17511 if (!fp_consts_inited)
17512 init_fp_table ();
17514 gcc_assert (REAL_VALUES_EQUAL (*r, value_fp0));
17515 return "0";
17518 /* OPERANDS[0] is the entire list of insns that constitute pop,
17519 OPERANDS[1] is the base register, RETURN_PC is true iff return insn
17520 is in the list, UPDATE is true iff the list contains explicit
17521 update of base register. */
17522 void
17523 arm_output_multireg_pop (rtx *operands, bool return_pc, rtx cond, bool reverse,
17524 bool update)
17526 int i;
17527 char pattern[100];
17528 int offset;
17529 const char *conditional;
17530 int num_saves = XVECLEN (operands[0], 0);
17531 unsigned int regno;
17532 unsigned int regno_base = REGNO (operands[1]);
17534 offset = 0;
17535 offset += update ? 1 : 0;
17536 offset += return_pc ? 1 : 0;
17538 /* Is the base register in the list? */
17539 for (i = offset; i < num_saves; i++)
17541 regno = REGNO (XEXP (XVECEXP (operands[0], 0, i), 0));
17542 /* If SP is in the list, then the base register must be SP. */
17543 gcc_assert ((regno != SP_REGNUM) || (regno_base == SP_REGNUM));
17544 /* If base register is in the list, there must be no explicit update. */
17545 if (regno == regno_base)
17546 gcc_assert (!update);
17549 conditional = reverse ? "%?%D0" : "%?%d0";
17550 if ((regno_base == SP_REGNUM) && TARGET_UNIFIED_ASM)
17552 /* Output pop (not stmfd) because it has a shorter encoding. */
17553 gcc_assert (update);
17554 sprintf (pattern, "pop%s\t{", conditional);
17556 else
17558 /* Output ldmfd when the base register is SP, otherwise output ldmia.
17559 It's just a convention, their semantics are identical. */
17560 if (regno_base == SP_REGNUM)
17561 sprintf (pattern, "ldm%sfd\t", conditional);
17562 else if (TARGET_UNIFIED_ASM)
17563 sprintf (pattern, "ldmia%s\t", conditional);
17564 else
17565 sprintf (pattern, "ldm%sia\t", conditional);
17567 strcat (pattern, reg_names[regno_base]);
17568 if (update)
17569 strcat (pattern, "!, {");
17570 else
17571 strcat (pattern, ", {");
17574 /* Output the first destination register. */
17575 strcat (pattern,
17576 reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, offset), 0))]);
17578 /* Output the rest of the destination registers. */
17579 for (i = offset + 1; i < num_saves; i++)
17581 strcat (pattern, ", ");
17582 strcat (pattern,
17583 reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, i), 0))]);
17586 strcat (pattern, "}");
17588 if (IS_INTERRUPT (arm_current_func_type ()) && return_pc)
17589 strcat (pattern, "^");
17591 output_asm_insn (pattern, &cond);
17595 /* Output the assembly for a store multiple. */
17597 const char *
17598 vfp_output_vstmd (rtx * operands)
17600 char pattern[100];
17601 int p;
17602 int base;
17603 int i;
17604 rtx addr_reg = REG_P (XEXP (operands[0], 0))
17605 ? XEXP (operands[0], 0)
17606 : XEXP (XEXP (operands[0], 0), 0);
17607 bool push_p = REGNO (addr_reg) == SP_REGNUM;
17609 if (push_p)
17610 strcpy (pattern, "vpush%?.64\t{%P1");
17611 else
17612 strcpy (pattern, "vstmdb%?.64\t%m0!, {%P1");
17614 p = strlen (pattern);
17616 gcc_assert (REG_P (operands[1]));
17618 base = (REGNO (operands[1]) - FIRST_VFP_REGNUM) / 2;
17619 for (i = 1; i < XVECLEN (operands[2], 0); i++)
17621 p += sprintf (&pattern[p], ", d%d", base + i);
17623 strcpy (&pattern[p], "}");
17625 output_asm_insn (pattern, operands);
17626 return "";
17630 /* Emit RTL to save block of VFP register pairs to the stack. Returns the
17631 number of bytes pushed. */
17633 static int
17634 vfp_emit_fstmd (int base_reg, int count)
17636 rtx par;
17637 rtx dwarf;
17638 rtx tmp, reg;
17639 int i;
17641 /* Workaround ARM10 VFPr1 bug. Data corruption can occur when exactly two
17642 register pairs are stored by a store multiple insn. We avoid this
17643 by pushing an extra pair. */
17644 if (count == 2 && !arm_arch6)
17646 if (base_reg == LAST_VFP_REGNUM - 3)
17647 base_reg -= 2;
17648 count++;
17651 /* FSTMD may not store more than 16 doubleword registers at once. Split
17652 larger stores into multiple parts (up to a maximum of two, in
17653 practice). */
17654 if (count > 16)
17656 int saved;
17657 /* NOTE: base_reg is an internal register number, so each D register
17658 counts as 2. */
17659 saved = vfp_emit_fstmd (base_reg + 32, count - 16);
17660 saved += vfp_emit_fstmd (base_reg, 16);
17661 return saved;
17664 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
17665 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1));
17667 reg = gen_rtx_REG (DFmode, base_reg);
17668 base_reg += 2;
17670 XVECEXP (par, 0, 0)
17671 = gen_rtx_SET (gen_frame_mem
17672 (BLKmode,
17673 gen_rtx_PRE_MODIFY (Pmode,
17674 stack_pointer_rtx,
17675 plus_constant
17676 (Pmode, stack_pointer_rtx,
17677 - (count * 8)))
17679 gen_rtx_UNSPEC (BLKmode,
17680 gen_rtvec (1, reg),
17681 UNSPEC_PUSH_MULT));
17683 tmp = gen_rtx_SET (stack_pointer_rtx,
17684 plus_constant (Pmode, stack_pointer_rtx, -(count * 8)));
17685 RTX_FRAME_RELATED_P (tmp) = 1;
17686 XVECEXP (dwarf, 0, 0) = tmp;
17688 tmp = gen_rtx_SET (gen_frame_mem (DFmode, stack_pointer_rtx), reg);
17689 RTX_FRAME_RELATED_P (tmp) = 1;
17690 XVECEXP (dwarf, 0, 1) = tmp;
17692 for (i = 1; i < count; i++)
17694 reg = gen_rtx_REG (DFmode, base_reg);
17695 base_reg += 2;
17696 XVECEXP (par, 0, i) = gen_rtx_USE (VOIDmode, reg);
17698 tmp = gen_rtx_SET (gen_frame_mem (DFmode,
17699 plus_constant (Pmode,
17700 stack_pointer_rtx,
17701 i * 8)),
17702 reg);
17703 RTX_FRAME_RELATED_P (tmp) = 1;
17704 XVECEXP (dwarf, 0, i + 1) = tmp;
17707 par = emit_insn (par);
17708 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
17709 RTX_FRAME_RELATED_P (par) = 1;
17711 return count * 8;
17714 /* Emit a call instruction with pattern PAT. ADDR is the address of
17715 the call target. */
17717 void
17718 arm_emit_call_insn (rtx pat, rtx addr, bool sibcall)
17720 rtx insn;
17722 insn = emit_call_insn (pat);
17724 /* The PIC register is live on entry to VxWorks PIC PLT entries.
17725 If the call might use such an entry, add a use of the PIC register
17726 to the instruction's CALL_INSN_FUNCTION_USAGE. */
17727 if (TARGET_VXWORKS_RTP
17728 && flag_pic
17729 && !sibcall
17730 && GET_CODE (addr) == SYMBOL_REF
17731 && (SYMBOL_REF_DECL (addr)
17732 ? !targetm.binds_local_p (SYMBOL_REF_DECL (addr))
17733 : !SYMBOL_REF_LOCAL_P (addr)))
17735 require_pic_register ();
17736 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), cfun->machine->pic_reg);
17739 if (TARGET_AAPCS_BASED)
17741 /* For AAPCS, IP and CC can be clobbered by veneers inserted by the
17742 linker. We need to add an IP clobber to allow setting
17743 TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS to true. A CC clobber
17744 is not needed since it's a fixed register. */
17745 rtx *fusage = &CALL_INSN_FUNCTION_USAGE (insn);
17746 clobber_reg (fusage, gen_rtx_REG (word_mode, IP_REGNUM));
17750 /* Output a 'call' insn. */
17751 const char *
17752 output_call (rtx *operands)
17754 gcc_assert (!arm_arch5); /* Patterns should call blx <reg> directly. */
17756 /* Handle calls to lr using ip (which may be clobbered in subr anyway). */
17757 if (REGNO (operands[0]) == LR_REGNUM)
17759 operands[0] = gen_rtx_REG (SImode, IP_REGNUM);
17760 output_asm_insn ("mov%?\t%0, %|lr", operands);
17763 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
17765 if (TARGET_INTERWORK || arm_arch4t)
17766 output_asm_insn ("bx%?\t%0", operands);
17767 else
17768 output_asm_insn ("mov%?\t%|pc, %0", operands);
17770 return "";
17773 /* Output a 'call' insn that is a reference in memory. This is
17774 disabled for ARMv5 and we prefer a blx instead because otherwise
17775 there's a significant performance overhead. */
17776 const char *
17777 output_call_mem (rtx *operands)
17779 gcc_assert (!arm_arch5);
17780 if (TARGET_INTERWORK)
17782 output_asm_insn ("ldr%?\t%|ip, %0", operands);
17783 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
17784 output_asm_insn ("bx%?\t%|ip", operands);
17786 else if (regno_use_in (LR_REGNUM, operands[0]))
17788 /* LR is used in the memory address. We load the address in the
17789 first instruction. It's safe to use IP as the target of the
17790 load since the call will kill it anyway. */
17791 output_asm_insn ("ldr%?\t%|ip, %0", operands);
17792 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
17793 if (arm_arch4t)
17794 output_asm_insn ("bx%?\t%|ip", operands);
17795 else
17796 output_asm_insn ("mov%?\t%|pc, %|ip", operands);
17798 else
17800 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
17801 output_asm_insn ("ldr%?\t%|pc, %0", operands);
17804 return "";
17808 /* Output a move from arm registers to arm registers of a long double
17809 OPERANDS[0] is the destination.
17810 OPERANDS[1] is the source. */
17811 const char *
17812 output_mov_long_double_arm_from_arm (rtx *operands)
17814 /* We have to be careful here because the two might overlap. */
17815 int dest_start = REGNO (operands[0]);
17816 int src_start = REGNO (operands[1]);
17817 rtx ops[2];
17818 int i;
17820 if (dest_start < src_start)
17822 for (i = 0; i < 3; i++)
17824 ops[0] = gen_rtx_REG (SImode, dest_start + i);
17825 ops[1] = gen_rtx_REG (SImode, src_start + i);
17826 output_asm_insn ("mov%?\t%0, %1", ops);
17829 else
17831 for (i = 2; i >= 0; i--)
17833 ops[0] = gen_rtx_REG (SImode, dest_start + i);
17834 ops[1] = gen_rtx_REG (SImode, src_start + i);
17835 output_asm_insn ("mov%?\t%0, %1", ops);
17839 return "";
17842 void
17843 arm_emit_movpair (rtx dest, rtx src)
17845 /* If the src is an immediate, simplify it. */
17846 if (CONST_INT_P (src))
17848 HOST_WIDE_INT val = INTVAL (src);
17849 emit_set_insn (dest, GEN_INT (val & 0x0000ffff));
17850 if ((val >> 16) & 0x0000ffff)
17851 emit_set_insn (gen_rtx_ZERO_EXTRACT (SImode, dest, GEN_INT (16),
17852 GEN_INT (16)),
17853 GEN_INT ((val >> 16) & 0x0000ffff));
17854 return;
17856 emit_set_insn (dest, gen_rtx_HIGH (SImode, src));
17857 emit_set_insn (dest, gen_rtx_LO_SUM (SImode, dest, src));
17860 /* Output a move between double words. It must be REG<-MEM
17861 or MEM<-REG. */
17862 const char *
17863 output_move_double (rtx *operands, bool emit, int *count)
17865 enum rtx_code code0 = GET_CODE (operands[0]);
17866 enum rtx_code code1 = GET_CODE (operands[1]);
17867 rtx otherops[3];
17868 if (count)
17869 *count = 1;
17871 /* The only case when this might happen is when
17872 you are looking at the length of a DImode instruction
17873 that has an invalid constant in it. */
17874 if (code0 == REG && code1 != MEM)
17876 gcc_assert (!emit);
17877 *count = 2;
17878 return "";
17881 if (code0 == REG)
17883 unsigned int reg0 = REGNO (operands[0]);
17885 otherops[0] = gen_rtx_REG (SImode, 1 + reg0);
17887 gcc_assert (code1 == MEM); /* Constraints should ensure this. */
17889 switch (GET_CODE (XEXP (operands[1], 0)))
17891 case REG:
17893 if (emit)
17895 if (TARGET_LDRD
17896 && !(fix_cm3_ldrd && reg0 == REGNO(XEXP (operands[1], 0))))
17897 output_asm_insn ("ldr%(d%)\t%0, [%m1]", operands);
17898 else
17899 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands);
17901 break;
17903 case PRE_INC:
17904 gcc_assert (TARGET_LDRD);
17905 if (emit)
17906 output_asm_insn ("ldr%(d%)\t%0, [%m1, #8]!", operands);
17907 break;
17909 case PRE_DEC:
17910 if (emit)
17912 if (TARGET_LDRD)
17913 output_asm_insn ("ldr%(d%)\t%0, [%m1, #-8]!", operands);
17914 else
17915 output_asm_insn ("ldm%(db%)\t%m1!, %M0", operands);
17917 break;
17919 case POST_INC:
17920 if (emit)
17922 if (TARGET_LDRD)
17923 output_asm_insn ("ldr%(d%)\t%0, [%m1], #8", operands);
17924 else
17925 output_asm_insn ("ldm%(ia%)\t%m1!, %M0", operands);
17927 break;
17929 case POST_DEC:
17930 gcc_assert (TARGET_LDRD);
17931 if (emit)
17932 output_asm_insn ("ldr%(d%)\t%0, [%m1], #-8", operands);
17933 break;
17935 case PRE_MODIFY:
17936 case POST_MODIFY:
17937 /* Autoicrement addressing modes should never have overlapping
17938 base and destination registers, and overlapping index registers
17939 are already prohibited, so this doesn't need to worry about
17940 fix_cm3_ldrd. */
17941 otherops[0] = operands[0];
17942 otherops[1] = XEXP (XEXP (XEXP (operands[1], 0), 1), 0);
17943 otherops[2] = XEXP (XEXP (XEXP (operands[1], 0), 1), 1);
17945 if (GET_CODE (XEXP (operands[1], 0)) == PRE_MODIFY)
17947 if (reg_overlap_mentioned_p (otherops[0], otherops[2]))
17949 /* Registers overlap so split out the increment. */
17950 if (emit)
17952 output_asm_insn ("add%?\t%1, %1, %2", otherops);
17953 output_asm_insn ("ldr%(d%)\t%0, [%1] @split", otherops);
17955 if (count)
17956 *count = 2;
17958 else
17960 /* Use a single insn if we can.
17961 FIXME: IWMMXT allows offsets larger than ldrd can
17962 handle, fix these up with a pair of ldr. */
17963 if (TARGET_THUMB2
17964 || !CONST_INT_P (otherops[2])
17965 || (INTVAL (otherops[2]) > -256
17966 && INTVAL (otherops[2]) < 256))
17968 if (emit)
17969 output_asm_insn ("ldr%(d%)\t%0, [%1, %2]!", otherops);
17971 else
17973 if (emit)
17975 output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops);
17976 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
17978 if (count)
17979 *count = 2;
17984 else
17986 /* Use a single insn if we can.
17987 FIXME: IWMMXT allows offsets larger than ldrd can handle,
17988 fix these up with a pair of ldr. */
17989 if (TARGET_THUMB2
17990 || !CONST_INT_P (otherops[2])
17991 || (INTVAL (otherops[2]) > -256
17992 && INTVAL (otherops[2]) < 256))
17994 if (emit)
17995 output_asm_insn ("ldr%(d%)\t%0, [%1], %2", otherops);
17997 else
17999 if (emit)
18001 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
18002 output_asm_insn ("ldr%?\t%0, [%1], %2", otherops);
18004 if (count)
18005 *count = 2;
18008 break;
18010 case LABEL_REF:
18011 case CONST:
18012 /* We might be able to use ldrd %0, %1 here. However the range is
18013 different to ldr/adr, and it is broken on some ARMv7-M
18014 implementations. */
18015 /* Use the second register of the pair to avoid problematic
18016 overlap. */
18017 otherops[1] = operands[1];
18018 if (emit)
18019 output_asm_insn ("adr%?\t%0, %1", otherops);
18020 operands[1] = otherops[0];
18021 if (emit)
18023 if (TARGET_LDRD)
18024 output_asm_insn ("ldr%(d%)\t%0, [%1]", operands);
18025 else
18026 output_asm_insn ("ldm%(ia%)\t%1, %M0", operands);
18029 if (count)
18030 *count = 2;
18031 break;
18033 /* ??? This needs checking for thumb2. */
18034 default:
18035 if (arm_add_operand (XEXP (XEXP (operands[1], 0), 1),
18036 GET_MODE (XEXP (XEXP (operands[1], 0), 1))))
18038 otherops[0] = operands[0];
18039 otherops[1] = XEXP (XEXP (operands[1], 0), 0);
18040 otherops[2] = XEXP (XEXP (operands[1], 0), 1);
18042 if (GET_CODE (XEXP (operands[1], 0)) == PLUS)
18044 if (CONST_INT_P (otherops[2]) && !TARGET_LDRD)
18046 switch ((int) INTVAL (otherops[2]))
18048 case -8:
18049 if (emit)
18050 output_asm_insn ("ldm%(db%)\t%1, %M0", otherops);
18051 return "";
18052 case -4:
18053 if (TARGET_THUMB2)
18054 break;
18055 if (emit)
18056 output_asm_insn ("ldm%(da%)\t%1, %M0", otherops);
18057 return "";
18058 case 4:
18059 if (TARGET_THUMB2)
18060 break;
18061 if (emit)
18062 output_asm_insn ("ldm%(ib%)\t%1, %M0", otherops);
18063 return "";
18066 otherops[0] = gen_rtx_REG(SImode, REGNO(operands[0]) + 1);
18067 operands[1] = otherops[0];
18068 if (TARGET_LDRD
18069 && (REG_P (otherops[2])
18070 || TARGET_THUMB2
18071 || (CONST_INT_P (otherops[2])
18072 && INTVAL (otherops[2]) > -256
18073 && INTVAL (otherops[2]) < 256)))
18075 if (reg_overlap_mentioned_p (operands[0],
18076 otherops[2]))
18078 /* Swap base and index registers over to
18079 avoid a conflict. */
18080 std::swap (otherops[1], otherops[2]);
18082 /* If both registers conflict, it will usually
18083 have been fixed by a splitter. */
18084 if (reg_overlap_mentioned_p (operands[0], otherops[2])
18085 || (fix_cm3_ldrd && reg0 == REGNO (otherops[1])))
18087 if (emit)
18089 output_asm_insn ("add%?\t%0, %1, %2", otherops);
18090 output_asm_insn ("ldr%(d%)\t%0, [%1]", operands);
18092 if (count)
18093 *count = 2;
18095 else
18097 otherops[0] = operands[0];
18098 if (emit)
18099 output_asm_insn ("ldr%(d%)\t%0, [%1, %2]", otherops);
18101 return "";
18104 if (CONST_INT_P (otherops[2]))
18106 if (emit)
18108 if (!(const_ok_for_arm (INTVAL (otherops[2]))))
18109 output_asm_insn ("sub%?\t%0, %1, #%n2", otherops);
18110 else
18111 output_asm_insn ("add%?\t%0, %1, %2", otherops);
18114 else
18116 if (emit)
18117 output_asm_insn ("add%?\t%0, %1, %2", otherops);
18120 else
18122 if (emit)
18123 output_asm_insn ("sub%?\t%0, %1, %2", otherops);
18126 if (count)
18127 *count = 2;
18129 if (TARGET_LDRD)
18130 return "ldr%(d%)\t%0, [%1]";
18132 return "ldm%(ia%)\t%1, %M0";
18134 else
18136 otherops[1] = adjust_address (operands[1], SImode, 4);
18137 /* Take care of overlapping base/data reg. */
18138 if (reg_mentioned_p (operands[0], operands[1]))
18140 if (emit)
18142 output_asm_insn ("ldr%?\t%0, %1", otherops);
18143 output_asm_insn ("ldr%?\t%0, %1", operands);
18145 if (count)
18146 *count = 2;
18149 else
18151 if (emit)
18153 output_asm_insn ("ldr%?\t%0, %1", operands);
18154 output_asm_insn ("ldr%?\t%0, %1", otherops);
18156 if (count)
18157 *count = 2;
18162 else
18164 /* Constraints should ensure this. */
18165 gcc_assert (code0 == MEM && code1 == REG);
18166 gcc_assert ((REGNO (operands[1]) != IP_REGNUM)
18167 || (TARGET_ARM && TARGET_LDRD));
18169 switch (GET_CODE (XEXP (operands[0], 0)))
18171 case REG:
18172 if (emit)
18174 if (TARGET_LDRD)
18175 output_asm_insn ("str%(d%)\t%1, [%m0]", operands);
18176 else
18177 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands);
18179 break;
18181 case PRE_INC:
18182 gcc_assert (TARGET_LDRD);
18183 if (emit)
18184 output_asm_insn ("str%(d%)\t%1, [%m0, #8]!", operands);
18185 break;
18187 case PRE_DEC:
18188 if (emit)
18190 if (TARGET_LDRD)
18191 output_asm_insn ("str%(d%)\t%1, [%m0, #-8]!", operands);
18192 else
18193 output_asm_insn ("stm%(db%)\t%m0!, %M1", operands);
18195 break;
18197 case POST_INC:
18198 if (emit)
18200 if (TARGET_LDRD)
18201 output_asm_insn ("str%(d%)\t%1, [%m0], #8", operands);
18202 else
18203 output_asm_insn ("stm%(ia%)\t%m0!, %M1", operands);
18205 break;
18207 case POST_DEC:
18208 gcc_assert (TARGET_LDRD);
18209 if (emit)
18210 output_asm_insn ("str%(d%)\t%1, [%m0], #-8", operands);
18211 break;
18213 case PRE_MODIFY:
18214 case POST_MODIFY:
18215 otherops[0] = operands[1];
18216 otherops[1] = XEXP (XEXP (XEXP (operands[0], 0), 1), 0);
18217 otherops[2] = XEXP (XEXP (XEXP (operands[0], 0), 1), 1);
18219 /* IWMMXT allows offsets larger than ldrd can handle,
18220 fix these up with a pair of ldr. */
18221 if (!TARGET_THUMB2
18222 && CONST_INT_P (otherops[2])
18223 && (INTVAL(otherops[2]) <= -256
18224 || INTVAL(otherops[2]) >= 256))
18226 if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
18228 if (emit)
18230 output_asm_insn ("str%?\t%0, [%1, %2]!", otherops);
18231 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
18233 if (count)
18234 *count = 2;
18236 else
18238 if (emit)
18240 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
18241 output_asm_insn ("str%?\t%0, [%1], %2", otherops);
18243 if (count)
18244 *count = 2;
18247 else if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
18249 if (emit)
18250 output_asm_insn ("str%(d%)\t%0, [%1, %2]!", otherops);
18252 else
18254 if (emit)
18255 output_asm_insn ("str%(d%)\t%0, [%1], %2", otherops);
18257 break;
18259 case PLUS:
18260 otherops[2] = XEXP (XEXP (operands[0], 0), 1);
18261 if (CONST_INT_P (otherops[2]) && !TARGET_LDRD)
18263 switch ((int) INTVAL (XEXP (XEXP (operands[0], 0), 1)))
18265 case -8:
18266 if (emit)
18267 output_asm_insn ("stm%(db%)\t%m0, %M1", operands);
18268 return "";
18270 case -4:
18271 if (TARGET_THUMB2)
18272 break;
18273 if (emit)
18274 output_asm_insn ("stm%(da%)\t%m0, %M1", operands);
18275 return "";
18277 case 4:
18278 if (TARGET_THUMB2)
18279 break;
18280 if (emit)
18281 output_asm_insn ("stm%(ib%)\t%m0, %M1", operands);
18282 return "";
18285 if (TARGET_LDRD
18286 && (REG_P (otherops[2])
18287 || TARGET_THUMB2
18288 || (CONST_INT_P (otherops[2])
18289 && INTVAL (otherops[2]) > -256
18290 && INTVAL (otherops[2]) < 256)))
18292 otherops[0] = operands[1];
18293 otherops[1] = XEXP (XEXP (operands[0], 0), 0);
18294 if (emit)
18295 output_asm_insn ("str%(d%)\t%0, [%1, %2]", otherops);
18296 return "";
18298 /* Fall through */
18300 default:
18301 otherops[0] = adjust_address (operands[0], SImode, 4);
18302 otherops[1] = operands[1];
18303 if (emit)
18305 output_asm_insn ("str%?\t%1, %0", operands);
18306 output_asm_insn ("str%?\t%H1, %0", otherops);
18308 if (count)
18309 *count = 2;
18313 return "";
18316 /* Output a move, load or store for quad-word vectors in ARM registers. Only
18317 handles MEMs accepted by neon_vector_mem_operand with TYPE=1. */
18319 const char *
18320 output_move_quad (rtx *operands)
18322 if (REG_P (operands[0]))
18324 /* Load, or reg->reg move. */
18326 if (MEM_P (operands[1]))
18328 switch (GET_CODE (XEXP (operands[1], 0)))
18330 case REG:
18331 output_asm_insn ("ldm%(ia%)\t%m1, %M0", operands);
18332 break;
18334 case LABEL_REF:
18335 case CONST:
18336 output_asm_insn ("adr%?\t%0, %1", operands);
18337 output_asm_insn ("ldm%(ia%)\t%0, %M0", operands);
18338 break;
18340 default:
18341 gcc_unreachable ();
18344 else
18346 rtx ops[2];
18347 int dest, src, i;
18349 gcc_assert (REG_P (operands[1]));
18351 dest = REGNO (operands[0]);
18352 src = REGNO (operands[1]);
18354 /* This seems pretty dumb, but hopefully GCC won't try to do it
18355 very often. */
18356 if (dest < src)
18357 for (i = 0; i < 4; i++)
18359 ops[0] = gen_rtx_REG (SImode, dest + i);
18360 ops[1] = gen_rtx_REG (SImode, src + i);
18361 output_asm_insn ("mov%?\t%0, %1", ops);
18363 else
18364 for (i = 3; i >= 0; i--)
18366 ops[0] = gen_rtx_REG (SImode, dest + i);
18367 ops[1] = gen_rtx_REG (SImode, src + i);
18368 output_asm_insn ("mov%?\t%0, %1", ops);
18372 else
18374 gcc_assert (MEM_P (operands[0]));
18375 gcc_assert (REG_P (operands[1]));
18376 gcc_assert (!reg_overlap_mentioned_p (operands[1], operands[0]));
18378 switch (GET_CODE (XEXP (operands[0], 0)))
18380 case REG:
18381 output_asm_insn ("stm%(ia%)\t%m0, %M1", operands);
18382 break;
18384 default:
18385 gcc_unreachable ();
18389 return "";
18392 /* Output a VFP load or store instruction. */
18394 const char *
18395 output_move_vfp (rtx *operands)
18397 rtx reg, mem, addr, ops[2];
18398 int load = REG_P (operands[0]);
18399 int dp = GET_MODE_SIZE (GET_MODE (operands[0])) == 8;
18400 int integer_p = GET_MODE_CLASS (GET_MODE (operands[0])) == MODE_INT;
18401 const char *templ;
18402 char buff[50];
18403 machine_mode mode;
18405 reg = operands[!load];
18406 mem = operands[load];
18408 mode = GET_MODE (reg);
18410 gcc_assert (REG_P (reg));
18411 gcc_assert (IS_VFP_REGNUM (REGNO (reg)));
18412 gcc_assert (mode == SFmode
18413 || mode == DFmode
18414 || mode == SImode
18415 || mode == DImode
18416 || (TARGET_NEON && VALID_NEON_DREG_MODE (mode)));
18417 gcc_assert (MEM_P (mem));
18419 addr = XEXP (mem, 0);
18421 switch (GET_CODE (addr))
18423 case PRE_DEC:
18424 templ = "v%smdb%%?.%s\t%%0!, {%%%s1}%s";
18425 ops[0] = XEXP (addr, 0);
18426 ops[1] = reg;
18427 break;
18429 case POST_INC:
18430 templ = "v%smia%%?.%s\t%%0!, {%%%s1}%s";
18431 ops[0] = XEXP (addr, 0);
18432 ops[1] = reg;
18433 break;
18435 default:
18436 templ = "v%sr%%?.%s\t%%%s0, %%1%s";
18437 ops[0] = reg;
18438 ops[1] = mem;
18439 break;
18442 sprintf (buff, templ,
18443 load ? "ld" : "st",
18444 dp ? "64" : "32",
18445 dp ? "P" : "",
18446 integer_p ? "\t%@ int" : "");
18447 output_asm_insn (buff, ops);
18449 return "";
18452 /* Output a Neon double-word or quad-word load or store, or a load
18453 or store for larger structure modes.
18455 WARNING: The ordering of elements is weird in big-endian mode,
18456 because the EABI requires that vectors stored in memory appear
18457 as though they were stored by a VSTM, as required by the EABI.
18458 GCC RTL defines element ordering based on in-memory order.
18459 This can be different from the architectural ordering of elements
18460 within a NEON register. The intrinsics defined in arm_neon.h use the
18461 NEON register element ordering, not the GCC RTL element ordering.
18463 For example, the in-memory ordering of a big-endian a quadword
18464 vector with 16-bit elements when stored from register pair {d0,d1}
18465 will be (lowest address first, d0[N] is NEON register element N):
18467 [d0[3], d0[2], d0[1], d0[0], d1[7], d1[6], d1[5], d1[4]]
18469 When necessary, quadword registers (dN, dN+1) are moved to ARM
18470 registers from rN in the order:
18472 dN -> (rN+1, rN), dN+1 -> (rN+3, rN+2)
18474 So that STM/LDM can be used on vectors in ARM registers, and the
18475 same memory layout will result as if VSTM/VLDM were used.
18477 Instead of VSTM/VLDM we prefer to use VST1.64/VLD1.64 where
18478 possible, which allows use of appropriate alignment tags.
18479 Note that the choice of "64" is independent of the actual vector
18480 element size; this size simply ensures that the behavior is
18481 equivalent to VSTM/VLDM in both little-endian and big-endian mode.
18483 Due to limitations of those instructions, use of VST1.64/VLD1.64
18484 is not possible if:
18485 - the address contains PRE_DEC, or
18486 - the mode refers to more than 4 double-word registers
18488 In those cases, it would be possible to replace VSTM/VLDM by a
18489 sequence of instructions; this is not currently implemented since
18490 this is not certain to actually improve performance. */
18492 const char *
18493 output_move_neon (rtx *operands)
18495 rtx reg, mem, addr, ops[2];
18496 int regno, nregs, load = REG_P (operands[0]);
18497 const char *templ;
18498 char buff[50];
18499 machine_mode mode;
18501 reg = operands[!load];
18502 mem = operands[load];
18504 mode = GET_MODE (reg);
18506 gcc_assert (REG_P (reg));
18507 regno = REGNO (reg);
18508 nregs = HARD_REGNO_NREGS (regno, mode) / 2;
18509 gcc_assert (VFP_REGNO_OK_FOR_DOUBLE (regno)
18510 || NEON_REGNO_OK_FOR_QUAD (regno));
18511 gcc_assert (VALID_NEON_DREG_MODE (mode)
18512 || VALID_NEON_QREG_MODE (mode)
18513 || VALID_NEON_STRUCT_MODE (mode));
18514 gcc_assert (MEM_P (mem));
18516 addr = XEXP (mem, 0);
18518 /* Strip off const from addresses like (const (plus (...))). */
18519 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
18520 addr = XEXP (addr, 0);
18522 switch (GET_CODE (addr))
18524 case POST_INC:
18525 /* We have to use vldm / vstm for too-large modes. */
18526 if (nregs > 4)
18528 templ = "v%smia%%?\t%%0!, %%h1";
18529 ops[0] = XEXP (addr, 0);
18531 else
18533 templ = "v%s1.64\t%%h1, %%A0";
18534 ops[0] = mem;
18536 ops[1] = reg;
18537 break;
18539 case PRE_DEC:
18540 /* We have to use vldm / vstm in this case, since there is no
18541 pre-decrement form of the vld1 / vst1 instructions. */
18542 templ = "v%smdb%%?\t%%0!, %%h1";
18543 ops[0] = XEXP (addr, 0);
18544 ops[1] = reg;
18545 break;
18547 case POST_MODIFY:
18548 /* FIXME: Not currently enabled in neon_vector_mem_operand. */
18549 gcc_unreachable ();
18551 case REG:
18552 /* We have to use vldm / vstm for too-large modes. */
18553 if (nregs > 1)
18555 if (nregs > 4)
18556 templ = "v%smia%%?\t%%m0, %%h1";
18557 else
18558 templ = "v%s1.64\t%%h1, %%A0";
18560 ops[0] = mem;
18561 ops[1] = reg;
18562 break;
18564 /* Fall through. */
18565 case LABEL_REF:
18566 case PLUS:
18568 int i;
18569 int overlap = -1;
18570 for (i = 0; i < nregs; i++)
18572 /* We're only using DImode here because it's a convenient size. */
18573 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * i);
18574 ops[1] = adjust_address (mem, DImode, 8 * i);
18575 if (reg_overlap_mentioned_p (ops[0], mem))
18577 gcc_assert (overlap == -1);
18578 overlap = i;
18580 else
18582 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
18583 output_asm_insn (buff, ops);
18586 if (overlap != -1)
18588 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * overlap);
18589 ops[1] = adjust_address (mem, SImode, 8 * overlap);
18590 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
18591 output_asm_insn (buff, ops);
18594 return "";
18597 default:
18598 gcc_unreachable ();
18601 sprintf (buff, templ, load ? "ld" : "st");
18602 output_asm_insn (buff, ops);
18604 return "";
18607 /* Compute and return the length of neon_mov<mode>, where <mode> is
18608 one of VSTRUCT modes: EI, OI, CI or XI. */
18610 arm_attr_length_move_neon (rtx_insn *insn)
18612 rtx reg, mem, addr;
18613 int load;
18614 machine_mode mode;
18616 extract_insn_cached (insn);
18618 if (REG_P (recog_data.operand[0]) && REG_P (recog_data.operand[1]))
18620 mode = GET_MODE (recog_data.operand[0]);
18621 switch (mode)
18623 case EImode:
18624 case OImode:
18625 return 8;
18626 case CImode:
18627 return 12;
18628 case XImode:
18629 return 16;
18630 default:
18631 gcc_unreachable ();
18635 load = REG_P (recog_data.operand[0]);
18636 reg = recog_data.operand[!load];
18637 mem = recog_data.operand[load];
18639 gcc_assert (MEM_P (mem));
18641 mode = GET_MODE (reg);
18642 addr = XEXP (mem, 0);
18644 /* Strip off const from addresses like (const (plus (...))). */
18645 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
18646 addr = XEXP (addr, 0);
18648 if (GET_CODE (addr) == LABEL_REF || GET_CODE (addr) == PLUS)
18650 int insns = HARD_REGNO_NREGS (REGNO (reg), mode) / 2;
18651 return insns * 4;
18653 else
18654 return 4;
18657 /* Return nonzero if the offset in the address is an immediate. Otherwise,
18658 return zero. */
18661 arm_address_offset_is_imm (rtx_insn *insn)
18663 rtx mem, addr;
18665 extract_insn_cached (insn);
18667 if (REG_P (recog_data.operand[0]))
18668 return 0;
18670 mem = recog_data.operand[0];
18672 gcc_assert (MEM_P (mem));
18674 addr = XEXP (mem, 0);
18676 if (REG_P (addr)
18677 || (GET_CODE (addr) == PLUS
18678 && REG_P (XEXP (addr, 0))
18679 && CONST_INT_P (XEXP (addr, 1))))
18680 return 1;
18681 else
18682 return 0;
18685 /* Output an ADD r, s, #n where n may be too big for one instruction.
18686 If adding zero to one register, output nothing. */
18687 const char *
18688 output_add_immediate (rtx *operands)
18690 HOST_WIDE_INT n = INTVAL (operands[2]);
18692 if (n != 0 || REGNO (operands[0]) != REGNO (operands[1]))
18694 if (n < 0)
18695 output_multi_immediate (operands,
18696 "sub%?\t%0, %1, %2", "sub%?\t%0, %0, %2", 2,
18697 -n);
18698 else
18699 output_multi_immediate (operands,
18700 "add%?\t%0, %1, %2", "add%?\t%0, %0, %2", 2,
18704 return "";
18707 /* Output a multiple immediate operation.
18708 OPERANDS is the vector of operands referred to in the output patterns.
18709 INSTR1 is the output pattern to use for the first constant.
18710 INSTR2 is the output pattern to use for subsequent constants.
18711 IMMED_OP is the index of the constant slot in OPERANDS.
18712 N is the constant value. */
18713 static const char *
18714 output_multi_immediate (rtx *operands, const char *instr1, const char *instr2,
18715 int immed_op, HOST_WIDE_INT n)
18717 #if HOST_BITS_PER_WIDE_INT > 32
18718 n &= 0xffffffff;
18719 #endif
18721 if (n == 0)
18723 /* Quick and easy output. */
18724 operands[immed_op] = const0_rtx;
18725 output_asm_insn (instr1, operands);
18727 else
18729 int i;
18730 const char * instr = instr1;
18732 /* Note that n is never zero here (which would give no output). */
18733 for (i = 0; i < 32; i += 2)
18735 if (n & (3 << i))
18737 operands[immed_op] = GEN_INT (n & (255 << i));
18738 output_asm_insn (instr, operands);
18739 instr = instr2;
18740 i += 6;
18745 return "";
18748 /* Return the name of a shifter operation. */
18749 static const char *
18750 arm_shift_nmem(enum rtx_code code)
18752 switch (code)
18754 case ASHIFT:
18755 return ARM_LSL_NAME;
18757 case ASHIFTRT:
18758 return "asr";
18760 case LSHIFTRT:
18761 return "lsr";
18763 case ROTATERT:
18764 return "ror";
18766 default:
18767 abort();
18771 /* Return the appropriate ARM instruction for the operation code.
18772 The returned result should not be overwritten. OP is the rtx of the
18773 operation. SHIFT_FIRST_ARG is TRUE if the first argument of the operator
18774 was shifted. */
18775 const char *
18776 arithmetic_instr (rtx op, int shift_first_arg)
18778 switch (GET_CODE (op))
18780 case PLUS:
18781 return "add";
18783 case MINUS:
18784 return shift_first_arg ? "rsb" : "sub";
18786 case IOR:
18787 return "orr";
18789 case XOR:
18790 return "eor";
18792 case AND:
18793 return "and";
18795 case ASHIFT:
18796 case ASHIFTRT:
18797 case LSHIFTRT:
18798 case ROTATERT:
18799 return arm_shift_nmem(GET_CODE(op));
18801 default:
18802 gcc_unreachable ();
18806 /* Ensure valid constant shifts and return the appropriate shift mnemonic
18807 for the operation code. The returned result should not be overwritten.
18808 OP is the rtx code of the shift.
18809 On exit, *AMOUNTP will be -1 if the shift is by a register, or a constant
18810 shift. */
18811 static const char *
18812 shift_op (rtx op, HOST_WIDE_INT *amountp)
18814 const char * mnem;
18815 enum rtx_code code = GET_CODE (op);
18817 switch (code)
18819 case ROTATE:
18820 if (!CONST_INT_P (XEXP (op, 1)))
18822 output_operand_lossage ("invalid shift operand");
18823 return NULL;
18826 code = ROTATERT;
18827 *amountp = 32 - INTVAL (XEXP (op, 1));
18828 mnem = "ror";
18829 break;
18831 case ASHIFT:
18832 case ASHIFTRT:
18833 case LSHIFTRT:
18834 case ROTATERT:
18835 mnem = arm_shift_nmem(code);
18836 if (CONST_INT_P (XEXP (op, 1)))
18838 *amountp = INTVAL (XEXP (op, 1));
18840 else if (REG_P (XEXP (op, 1)))
18842 *amountp = -1;
18843 return mnem;
18845 else
18847 output_operand_lossage ("invalid shift operand");
18848 return NULL;
18850 break;
18852 case MULT:
18853 /* We never have to worry about the amount being other than a
18854 power of 2, since this case can never be reloaded from a reg. */
18855 if (!CONST_INT_P (XEXP (op, 1)))
18857 output_operand_lossage ("invalid shift operand");
18858 return NULL;
18861 *amountp = INTVAL (XEXP (op, 1)) & 0xFFFFFFFF;
18863 /* Amount must be a power of two. */
18864 if (*amountp & (*amountp - 1))
18866 output_operand_lossage ("invalid shift operand");
18867 return NULL;
18870 *amountp = int_log2 (*amountp);
18871 return ARM_LSL_NAME;
18873 default:
18874 output_operand_lossage ("invalid shift operand");
18875 return NULL;
18878 /* This is not 100% correct, but follows from the desire to merge
18879 multiplication by a power of 2 with the recognizer for a
18880 shift. >=32 is not a valid shift for "lsl", so we must try and
18881 output a shift that produces the correct arithmetical result.
18882 Using lsr #32 is identical except for the fact that the carry bit
18883 is not set correctly if we set the flags; but we never use the
18884 carry bit from such an operation, so we can ignore that. */
18885 if (code == ROTATERT)
18886 /* Rotate is just modulo 32. */
18887 *amountp &= 31;
18888 else if (*amountp != (*amountp & 31))
18890 if (code == ASHIFT)
18891 mnem = "lsr";
18892 *amountp = 32;
18895 /* Shifts of 0 are no-ops. */
18896 if (*amountp == 0)
18897 return NULL;
18899 return mnem;
18902 /* Obtain the shift from the POWER of two. */
18904 static HOST_WIDE_INT
18905 int_log2 (HOST_WIDE_INT power)
18907 HOST_WIDE_INT shift = 0;
18909 while ((((HOST_WIDE_INT) 1 << shift) & power) == 0)
18911 gcc_assert (shift <= 31);
18912 shift++;
18915 return shift;
18918 /* Output a .ascii pseudo-op, keeping track of lengths. This is
18919 because /bin/as is horribly restrictive. The judgement about
18920 whether or not each character is 'printable' (and can be output as
18921 is) or not (and must be printed with an octal escape) must be made
18922 with reference to the *host* character set -- the situation is
18923 similar to that discussed in the comments above pp_c_char in
18924 c-pretty-print.c. */
18926 #define MAX_ASCII_LEN 51
18928 void
18929 output_ascii_pseudo_op (FILE *stream, const unsigned char *p, int len)
18931 int i;
18932 int len_so_far = 0;
18934 fputs ("\t.ascii\t\"", stream);
18936 for (i = 0; i < len; i++)
18938 int c = p[i];
18940 if (len_so_far >= MAX_ASCII_LEN)
18942 fputs ("\"\n\t.ascii\t\"", stream);
18943 len_so_far = 0;
18946 if (ISPRINT (c))
18948 if (c == '\\' || c == '\"')
18950 putc ('\\', stream);
18951 len_so_far++;
18953 putc (c, stream);
18954 len_so_far++;
18956 else
18958 fprintf (stream, "\\%03o", c);
18959 len_so_far += 4;
18963 fputs ("\"\n", stream);
18966 /* Whether a register is callee saved or not. This is necessary because high
18967 registers are marked as caller saved when optimizing for size on Thumb-1
18968 targets despite being callee saved in order to avoid using them. */
18969 #define callee_saved_reg_p(reg) \
18970 (!call_used_regs[reg] \
18971 || (TARGET_THUMB1 && optimize_size \
18972 && reg >= FIRST_HI_REGNUM && reg <= LAST_HI_REGNUM))
18974 /* Compute the register save mask for registers 0 through 12
18975 inclusive. This code is used by arm_compute_save_reg_mask. */
18977 static unsigned long
18978 arm_compute_save_reg0_reg12_mask (void)
18980 unsigned long func_type = arm_current_func_type ();
18981 unsigned long save_reg_mask = 0;
18982 unsigned int reg;
18984 if (IS_INTERRUPT (func_type))
18986 unsigned int max_reg;
18987 /* Interrupt functions must not corrupt any registers,
18988 even call clobbered ones. If this is a leaf function
18989 we can just examine the registers used by the RTL, but
18990 otherwise we have to assume that whatever function is
18991 called might clobber anything, and so we have to save
18992 all the call-clobbered registers as well. */
18993 if (ARM_FUNC_TYPE (func_type) == ARM_FT_FIQ)
18994 /* FIQ handlers have registers r8 - r12 banked, so
18995 we only need to check r0 - r7, Normal ISRs only
18996 bank r14 and r15, so we must check up to r12.
18997 r13 is the stack pointer which is always preserved,
18998 so we do not need to consider it here. */
18999 max_reg = 7;
19000 else
19001 max_reg = 12;
19003 for (reg = 0; reg <= max_reg; reg++)
19004 if (df_regs_ever_live_p (reg)
19005 || (! crtl->is_leaf && call_used_regs[reg]))
19006 save_reg_mask |= (1 << reg);
19008 /* Also save the pic base register if necessary. */
19009 if (flag_pic
19010 && !TARGET_SINGLE_PIC_BASE
19011 && arm_pic_register != INVALID_REGNUM
19012 && crtl->uses_pic_offset_table)
19013 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
19015 else if (IS_VOLATILE(func_type))
19017 /* For noreturn functions we historically omitted register saves
19018 altogether. However this really messes up debugging. As a
19019 compromise save just the frame pointers. Combined with the link
19020 register saved elsewhere this should be sufficient to get
19021 a backtrace. */
19022 if (frame_pointer_needed)
19023 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
19024 if (df_regs_ever_live_p (ARM_HARD_FRAME_POINTER_REGNUM))
19025 save_reg_mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
19026 if (df_regs_ever_live_p (THUMB_HARD_FRAME_POINTER_REGNUM))
19027 save_reg_mask |= 1 << THUMB_HARD_FRAME_POINTER_REGNUM;
19029 else
19031 /* In the normal case we only need to save those registers
19032 which are call saved and which are used by this function. */
19033 for (reg = 0; reg <= 11; reg++)
19034 if (df_regs_ever_live_p (reg) && callee_saved_reg_p (reg))
19035 save_reg_mask |= (1 << reg);
19037 /* Handle the frame pointer as a special case. */
19038 if (frame_pointer_needed)
19039 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
19041 /* If we aren't loading the PIC register,
19042 don't stack it even though it may be live. */
19043 if (flag_pic
19044 && !TARGET_SINGLE_PIC_BASE
19045 && arm_pic_register != INVALID_REGNUM
19046 && (df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM)
19047 || crtl->uses_pic_offset_table))
19048 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
19050 /* The prologue will copy SP into R0, so save it. */
19051 if (IS_STACKALIGN (func_type))
19052 save_reg_mask |= 1;
19055 /* Save registers so the exception handler can modify them. */
19056 if (crtl->calls_eh_return)
19058 unsigned int i;
19060 for (i = 0; ; i++)
19062 reg = EH_RETURN_DATA_REGNO (i);
19063 if (reg == INVALID_REGNUM)
19064 break;
19065 save_reg_mask |= 1 << reg;
19069 return save_reg_mask;
19072 /* Return true if r3 is live at the start of the function. */
19074 static bool
19075 arm_r3_live_at_start_p (void)
19077 /* Just look at cfg info, which is still close enough to correct at this
19078 point. This gives false positives for broken functions that might use
19079 uninitialized data that happens to be allocated in r3, but who cares? */
19080 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)), 3);
19083 /* Compute the number of bytes used to store the static chain register on the
19084 stack, above the stack frame. We need to know this accurately to get the
19085 alignment of the rest of the stack frame correct. */
19087 static int
19088 arm_compute_static_chain_stack_bytes (void)
19090 /* See the defining assertion in arm_expand_prologue. */
19091 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM
19092 && IS_NESTED (arm_current_func_type ())
19093 && arm_r3_live_at_start_p ()
19094 && crtl->args.pretend_args_size == 0)
19095 return 4;
19097 return 0;
19100 /* Compute a bit mask of which registers need to be
19101 saved on the stack for the current function.
19102 This is used by arm_get_frame_offsets, which may add extra registers. */
19104 static unsigned long
19105 arm_compute_save_reg_mask (void)
19107 unsigned int save_reg_mask = 0;
19108 unsigned long func_type = arm_current_func_type ();
19109 unsigned int reg;
19111 if (IS_NAKED (func_type))
19112 /* This should never really happen. */
19113 return 0;
19115 /* If we are creating a stack frame, then we must save the frame pointer,
19116 IP (which will hold the old stack pointer), LR and the PC. */
19117 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
19118 save_reg_mask |=
19119 (1 << ARM_HARD_FRAME_POINTER_REGNUM)
19120 | (1 << IP_REGNUM)
19121 | (1 << LR_REGNUM)
19122 | (1 << PC_REGNUM);
19124 save_reg_mask |= arm_compute_save_reg0_reg12_mask ();
19126 /* Decide if we need to save the link register.
19127 Interrupt routines have their own banked link register,
19128 so they never need to save it.
19129 Otherwise if we do not use the link register we do not need to save
19130 it. If we are pushing other registers onto the stack however, we
19131 can save an instruction in the epilogue by pushing the link register
19132 now and then popping it back into the PC. This incurs extra memory
19133 accesses though, so we only do it when optimizing for size, and only
19134 if we know that we will not need a fancy return sequence. */
19135 if (df_regs_ever_live_p (LR_REGNUM)
19136 || (save_reg_mask
19137 && optimize_size
19138 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
19139 && !crtl->tail_call_emit
19140 && !crtl->calls_eh_return))
19141 save_reg_mask |= 1 << LR_REGNUM;
19143 if (cfun->machine->lr_save_eliminated)
19144 save_reg_mask &= ~ (1 << LR_REGNUM);
19146 if (TARGET_REALLY_IWMMXT
19147 && ((bit_count (save_reg_mask)
19148 + ARM_NUM_INTS (crtl->args.pretend_args_size +
19149 arm_compute_static_chain_stack_bytes())
19150 ) % 2) != 0)
19152 /* The total number of registers that are going to be pushed
19153 onto the stack is odd. We need to ensure that the stack
19154 is 64-bit aligned before we start to save iWMMXt registers,
19155 and also before we start to create locals. (A local variable
19156 might be a double or long long which we will load/store using
19157 an iWMMXt instruction). Therefore we need to push another
19158 ARM register, so that the stack will be 64-bit aligned. We
19159 try to avoid using the arg registers (r0 -r3) as they might be
19160 used to pass values in a tail call. */
19161 for (reg = 4; reg <= 12; reg++)
19162 if ((save_reg_mask & (1 << reg)) == 0)
19163 break;
19165 if (reg <= 12)
19166 save_reg_mask |= (1 << reg);
19167 else
19169 cfun->machine->sibcall_blocked = 1;
19170 save_reg_mask |= (1 << 3);
19174 /* We may need to push an additional register for use initializing the
19175 PIC base register. */
19176 if (TARGET_THUMB2 && IS_NESTED (func_type) && flag_pic
19177 && (save_reg_mask & THUMB2_WORK_REGS) == 0)
19179 reg = thumb_find_work_register (1 << 4);
19180 if (!call_used_regs[reg])
19181 save_reg_mask |= (1 << reg);
19184 return save_reg_mask;
19188 /* Compute a bit mask of which registers need to be
19189 saved on the stack for the current function. */
19190 static unsigned long
19191 thumb1_compute_save_reg_mask (void)
19193 unsigned long mask;
19194 unsigned reg;
19196 mask = 0;
19197 for (reg = 0; reg < 12; reg ++)
19198 if (df_regs_ever_live_p (reg) && callee_saved_reg_p (reg))
19199 mask |= 1 << reg;
19201 if (flag_pic
19202 && !TARGET_SINGLE_PIC_BASE
19203 && arm_pic_register != INVALID_REGNUM
19204 && crtl->uses_pic_offset_table)
19205 mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
19207 /* See if we might need r11 for calls to _interwork_r11_call_via_rN(). */
19208 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
19209 mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
19211 /* LR will also be pushed if any lo regs are pushed. */
19212 if (mask & 0xff || thumb_force_lr_save ())
19213 mask |= (1 << LR_REGNUM);
19215 /* Make sure we have a low work register if we need one.
19216 We will need one if we are going to push a high register,
19217 but we are not currently intending to push a low register. */
19218 if ((mask & 0xff) == 0
19219 && ((mask & 0x0f00) || TARGET_BACKTRACE))
19221 /* Use thumb_find_work_register to choose which register
19222 we will use. If the register is live then we will
19223 have to push it. Use LAST_LO_REGNUM as our fallback
19224 choice for the register to select. */
19225 reg = thumb_find_work_register (1 << LAST_LO_REGNUM);
19226 /* Make sure the register returned by thumb_find_work_register is
19227 not part of the return value. */
19228 if (reg * UNITS_PER_WORD <= (unsigned) arm_size_return_regs ())
19229 reg = LAST_LO_REGNUM;
19231 if (callee_saved_reg_p (reg))
19232 mask |= 1 << reg;
19235 /* The 504 below is 8 bytes less than 512 because there are two possible
19236 alignment words. We can't tell here if they will be present or not so we
19237 have to play it safe and assume that they are. */
19238 if ((CALLER_INTERWORKING_SLOT_SIZE +
19239 ROUND_UP_WORD (get_frame_size ()) +
19240 crtl->outgoing_args_size) >= 504)
19242 /* This is the same as the code in thumb1_expand_prologue() which
19243 determines which register to use for stack decrement. */
19244 for (reg = LAST_ARG_REGNUM + 1; reg <= LAST_LO_REGNUM; reg++)
19245 if (mask & (1 << reg))
19246 break;
19248 if (reg > LAST_LO_REGNUM)
19250 /* Make sure we have a register available for stack decrement. */
19251 mask |= 1 << LAST_LO_REGNUM;
19255 return mask;
19259 /* Return the number of bytes required to save VFP registers. */
19260 static int
19261 arm_get_vfp_saved_size (void)
19263 unsigned int regno;
19264 int count;
19265 int saved;
19267 saved = 0;
19268 /* Space for saved VFP registers. */
19269 if (TARGET_HARD_FLOAT && TARGET_VFP)
19271 count = 0;
19272 for (regno = FIRST_VFP_REGNUM;
19273 regno < LAST_VFP_REGNUM;
19274 regno += 2)
19276 if ((!df_regs_ever_live_p (regno) || call_used_regs[regno])
19277 && (!df_regs_ever_live_p (regno + 1) || call_used_regs[regno + 1]))
19279 if (count > 0)
19281 /* Workaround ARM10 VFPr1 bug. */
19282 if (count == 2 && !arm_arch6)
19283 count++;
19284 saved += count * 8;
19286 count = 0;
19288 else
19289 count++;
19291 if (count > 0)
19293 if (count == 2 && !arm_arch6)
19294 count++;
19295 saved += count * 8;
19298 return saved;
19302 /* Generate a function exit sequence. If REALLY_RETURN is false, then do
19303 everything bar the final return instruction. If simple_return is true,
19304 then do not output epilogue, because it has already been emitted in RTL. */
19305 const char *
19306 output_return_instruction (rtx operand, bool really_return, bool reverse,
19307 bool simple_return)
19309 char conditional[10];
19310 char instr[100];
19311 unsigned reg;
19312 unsigned long live_regs_mask;
19313 unsigned long func_type;
19314 arm_stack_offsets *offsets;
19316 func_type = arm_current_func_type ();
19318 if (IS_NAKED (func_type))
19319 return "";
19321 if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)
19323 /* If this function was declared non-returning, and we have
19324 found a tail call, then we have to trust that the called
19325 function won't return. */
19326 if (really_return)
19328 rtx ops[2];
19330 /* Otherwise, trap an attempted return by aborting. */
19331 ops[0] = operand;
19332 ops[1] = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)"
19333 : "abort");
19334 assemble_external_libcall (ops[1]);
19335 output_asm_insn (reverse ? "bl%D0\t%a1" : "bl%d0\t%a1", ops);
19338 return "";
19341 gcc_assert (!cfun->calls_alloca || really_return);
19343 sprintf (conditional, "%%?%%%c0", reverse ? 'D' : 'd');
19345 cfun->machine->return_used_this_function = 1;
19347 offsets = arm_get_frame_offsets ();
19348 live_regs_mask = offsets->saved_regs_mask;
19350 if (!simple_return && live_regs_mask)
19352 const char * return_reg;
19354 /* If we do not have any special requirements for function exit
19355 (e.g. interworking) then we can load the return address
19356 directly into the PC. Otherwise we must load it into LR. */
19357 if (really_return
19358 && (IS_INTERRUPT (func_type) || !TARGET_INTERWORK))
19359 return_reg = reg_names[PC_REGNUM];
19360 else
19361 return_reg = reg_names[LR_REGNUM];
19363 if ((live_regs_mask & (1 << IP_REGNUM)) == (1 << IP_REGNUM))
19365 /* There are three possible reasons for the IP register
19366 being saved. 1) a stack frame was created, in which case
19367 IP contains the old stack pointer, or 2) an ISR routine
19368 corrupted it, or 3) it was saved to align the stack on
19369 iWMMXt. In case 1, restore IP into SP, otherwise just
19370 restore IP. */
19371 if (frame_pointer_needed)
19373 live_regs_mask &= ~ (1 << IP_REGNUM);
19374 live_regs_mask |= (1 << SP_REGNUM);
19376 else
19377 gcc_assert (IS_INTERRUPT (func_type) || TARGET_REALLY_IWMMXT);
19380 /* On some ARM architectures it is faster to use LDR rather than
19381 LDM to load a single register. On other architectures, the
19382 cost is the same. In 26 bit mode, or for exception handlers,
19383 we have to use LDM to load the PC so that the CPSR is also
19384 restored. */
19385 for (reg = 0; reg <= LAST_ARM_REGNUM; reg++)
19386 if (live_regs_mask == (1U << reg))
19387 break;
19389 if (reg <= LAST_ARM_REGNUM
19390 && (reg != LR_REGNUM
19391 || ! really_return
19392 || ! IS_INTERRUPT (func_type)))
19394 sprintf (instr, "ldr%s\t%%|%s, [%%|sp], #4", conditional,
19395 (reg == LR_REGNUM) ? return_reg : reg_names[reg]);
19397 else
19399 char *p;
19400 int first = 1;
19402 /* Generate the load multiple instruction to restore the
19403 registers. Note we can get here, even if
19404 frame_pointer_needed is true, but only if sp already
19405 points to the base of the saved core registers. */
19406 if (live_regs_mask & (1 << SP_REGNUM))
19408 unsigned HOST_WIDE_INT stack_adjust;
19410 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
19411 gcc_assert (stack_adjust == 0 || stack_adjust == 4);
19413 if (stack_adjust && arm_arch5 && TARGET_ARM)
19414 if (TARGET_UNIFIED_ASM)
19415 sprintf (instr, "ldmib%s\t%%|sp, {", conditional);
19416 else
19417 sprintf (instr, "ldm%sib\t%%|sp, {", conditional);
19418 else
19420 /* If we can't use ldmib (SA110 bug),
19421 then try to pop r3 instead. */
19422 if (stack_adjust)
19423 live_regs_mask |= 1 << 3;
19425 if (TARGET_UNIFIED_ASM)
19426 sprintf (instr, "ldmfd%s\t%%|sp, {", conditional);
19427 else
19428 sprintf (instr, "ldm%sfd\t%%|sp, {", conditional);
19431 else
19432 if (TARGET_UNIFIED_ASM)
19433 sprintf (instr, "pop%s\t{", conditional);
19434 else
19435 sprintf (instr, "ldm%sfd\t%%|sp!, {", conditional);
19437 p = instr + strlen (instr);
19439 for (reg = 0; reg <= SP_REGNUM; reg++)
19440 if (live_regs_mask & (1 << reg))
19442 int l = strlen (reg_names[reg]);
19444 if (first)
19445 first = 0;
19446 else
19448 memcpy (p, ", ", 2);
19449 p += 2;
19452 memcpy (p, "%|", 2);
19453 memcpy (p + 2, reg_names[reg], l);
19454 p += l + 2;
19457 if (live_regs_mask & (1 << LR_REGNUM))
19459 sprintf (p, "%s%%|%s}", first ? "" : ", ", return_reg);
19460 /* If returning from an interrupt, restore the CPSR. */
19461 if (IS_INTERRUPT (func_type))
19462 strcat (p, "^");
19464 else
19465 strcpy (p, "}");
19468 output_asm_insn (instr, & operand);
19470 /* See if we need to generate an extra instruction to
19471 perform the actual function return. */
19472 if (really_return
19473 && func_type != ARM_FT_INTERWORKED
19474 && (live_regs_mask & (1 << LR_REGNUM)) != 0)
19476 /* The return has already been handled
19477 by loading the LR into the PC. */
19478 return "";
19482 if (really_return)
19484 switch ((int) ARM_FUNC_TYPE (func_type))
19486 case ARM_FT_ISR:
19487 case ARM_FT_FIQ:
19488 /* ??? This is wrong for unified assembly syntax. */
19489 sprintf (instr, "sub%ss\t%%|pc, %%|lr, #4", conditional);
19490 break;
19492 case ARM_FT_INTERWORKED:
19493 sprintf (instr, "bx%s\t%%|lr", conditional);
19494 break;
19496 case ARM_FT_EXCEPTION:
19497 /* ??? This is wrong for unified assembly syntax. */
19498 sprintf (instr, "mov%ss\t%%|pc, %%|lr", conditional);
19499 break;
19501 default:
19502 /* Use bx if it's available. */
19503 if (arm_arch5 || arm_arch4t)
19504 sprintf (instr, "bx%s\t%%|lr", conditional);
19505 else
19506 sprintf (instr, "mov%s\t%%|pc, %%|lr", conditional);
19507 break;
19510 output_asm_insn (instr, & operand);
19513 return "";
19516 /* Write the function name into the code section, directly preceding
19517 the function prologue.
19519 Code will be output similar to this:
19521 .ascii "arm_poke_function_name", 0
19522 .align
19524 .word 0xff000000 + (t1 - t0)
19525 arm_poke_function_name
19526 mov ip, sp
19527 stmfd sp!, {fp, ip, lr, pc}
19528 sub fp, ip, #4
19530 When performing a stack backtrace, code can inspect the value
19531 of 'pc' stored at 'fp' + 0. If the trace function then looks
19532 at location pc - 12 and the top 8 bits are set, then we know
19533 that there is a function name embedded immediately preceding this
19534 location and has length ((pc[-3]) & 0xff000000).
19536 We assume that pc is declared as a pointer to an unsigned long.
19538 It is of no benefit to output the function name if we are assembling
19539 a leaf function. These function types will not contain a stack
19540 backtrace structure, therefore it is not possible to determine the
19541 function name. */
19542 void
19543 arm_poke_function_name (FILE *stream, const char *name)
19545 unsigned long alignlength;
19546 unsigned long length;
19547 rtx x;
19549 length = strlen (name) + 1;
19550 alignlength = ROUND_UP_WORD (length);
19552 ASM_OUTPUT_ASCII (stream, name, length);
19553 ASM_OUTPUT_ALIGN (stream, 2);
19554 x = GEN_INT ((unsigned HOST_WIDE_INT) 0xff000000 + alignlength);
19555 assemble_aligned_integer (UNITS_PER_WORD, x);
19558 /* Place some comments into the assembler stream
19559 describing the current function. */
19560 static void
19561 arm_output_function_prologue (FILE *f, HOST_WIDE_INT frame_size)
19563 unsigned long func_type;
19565 /* ??? Do we want to print some of the below anyway? */
19566 if (TARGET_THUMB1)
19567 return;
19569 /* Sanity check. */
19570 gcc_assert (!arm_ccfsm_state && !arm_target_insn);
19572 func_type = arm_current_func_type ();
19574 switch ((int) ARM_FUNC_TYPE (func_type))
19576 default:
19577 case ARM_FT_NORMAL:
19578 break;
19579 case ARM_FT_INTERWORKED:
19580 asm_fprintf (f, "\t%@ Function supports interworking.\n");
19581 break;
19582 case ARM_FT_ISR:
19583 asm_fprintf (f, "\t%@ Interrupt Service Routine.\n");
19584 break;
19585 case ARM_FT_FIQ:
19586 asm_fprintf (f, "\t%@ Fast Interrupt Service Routine.\n");
19587 break;
19588 case ARM_FT_EXCEPTION:
19589 asm_fprintf (f, "\t%@ ARM Exception Handler.\n");
19590 break;
19593 if (IS_NAKED (func_type))
19594 asm_fprintf (f, "\t%@ Naked Function: prologue and epilogue provided by programmer.\n");
19596 if (IS_VOLATILE (func_type))
19597 asm_fprintf (f, "\t%@ Volatile: function does not return.\n");
19599 if (IS_NESTED (func_type))
19600 asm_fprintf (f, "\t%@ Nested: function declared inside another function.\n");
19601 if (IS_STACKALIGN (func_type))
19602 asm_fprintf (f, "\t%@ Stack Align: May be called with mis-aligned SP.\n");
19604 asm_fprintf (f, "\t%@ args = %d, pretend = %d, frame = %wd\n",
19605 crtl->args.size,
19606 crtl->args.pretend_args_size, frame_size);
19608 asm_fprintf (f, "\t%@ frame_needed = %d, uses_anonymous_args = %d\n",
19609 frame_pointer_needed,
19610 cfun->machine->uses_anonymous_args);
19612 if (cfun->machine->lr_save_eliminated)
19613 asm_fprintf (f, "\t%@ link register save eliminated.\n");
19615 if (crtl->calls_eh_return)
19616 asm_fprintf (f, "\t@ Calls __builtin_eh_return.\n");
19620 static void
19621 arm_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
19622 HOST_WIDE_INT frame_size ATTRIBUTE_UNUSED)
19624 arm_stack_offsets *offsets;
19626 if (TARGET_THUMB1)
19628 int regno;
19630 /* Emit any call-via-reg trampolines that are needed for v4t support
19631 of call_reg and call_value_reg type insns. */
19632 for (regno = 0; regno < LR_REGNUM; regno++)
19634 rtx label = cfun->machine->call_via[regno];
19636 if (label != NULL)
19638 switch_to_section (function_section (current_function_decl));
19639 targetm.asm_out.internal_label (asm_out_file, "L",
19640 CODE_LABEL_NUMBER (label));
19641 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
19645 /* ??? Probably not safe to set this here, since it assumes that a
19646 function will be emitted as assembly immediately after we generate
19647 RTL for it. This does not happen for inline functions. */
19648 cfun->machine->return_used_this_function = 0;
19650 else /* TARGET_32BIT */
19652 /* We need to take into account any stack-frame rounding. */
19653 offsets = arm_get_frame_offsets ();
19655 gcc_assert (!use_return_insn (FALSE, NULL)
19656 || (cfun->machine->return_used_this_function != 0)
19657 || offsets->saved_regs == offsets->outgoing_args
19658 || frame_pointer_needed);
19662 /* Generate and emit a sequence of insns equivalent to PUSH, but using
19663 STR and STRD. If an even number of registers are being pushed, one
19664 or more STRD patterns are created for each register pair. If an
19665 odd number of registers are pushed, emit an initial STR followed by
19666 as many STRD instructions as are needed. This works best when the
19667 stack is initially 64-bit aligned (the normal case), since it
19668 ensures that each STRD is also 64-bit aligned. */
19669 static void
19670 thumb2_emit_strd_push (unsigned long saved_regs_mask)
19672 int num_regs = 0;
19673 int i;
19674 int regno;
19675 rtx par = NULL_RTX;
19676 rtx dwarf = NULL_RTX;
19677 rtx tmp;
19678 bool first = true;
19680 num_regs = bit_count (saved_regs_mask);
19682 /* Must be at least one register to save, and can't save SP or PC. */
19683 gcc_assert (num_regs > 0 && num_regs <= 14);
19684 gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
19685 gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
19687 /* Create sequence for DWARF info. All the frame-related data for
19688 debugging is held in this wrapper. */
19689 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_regs + 1));
19691 /* Describe the stack adjustment. */
19692 tmp = gen_rtx_SET (stack_pointer_rtx,
19693 plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
19694 RTX_FRAME_RELATED_P (tmp) = 1;
19695 XVECEXP (dwarf, 0, 0) = tmp;
19697 /* Find the first register. */
19698 for (regno = 0; (saved_regs_mask & (1 << regno)) == 0; regno++)
19701 i = 0;
19703 /* If there's an odd number of registers to push. Start off by
19704 pushing a single register. This ensures that subsequent strd
19705 operations are dword aligned (assuming that SP was originally
19706 64-bit aligned). */
19707 if ((num_regs & 1) != 0)
19709 rtx reg, mem, insn;
19711 reg = gen_rtx_REG (SImode, regno);
19712 if (num_regs == 1)
19713 mem = gen_frame_mem (Pmode, gen_rtx_PRE_DEC (Pmode,
19714 stack_pointer_rtx));
19715 else
19716 mem = gen_frame_mem (Pmode,
19717 gen_rtx_PRE_MODIFY
19718 (Pmode, stack_pointer_rtx,
19719 plus_constant (Pmode, stack_pointer_rtx,
19720 -4 * num_regs)));
19722 tmp = gen_rtx_SET (mem, reg);
19723 RTX_FRAME_RELATED_P (tmp) = 1;
19724 insn = emit_insn (tmp);
19725 RTX_FRAME_RELATED_P (insn) = 1;
19726 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
19727 tmp = gen_rtx_SET (gen_frame_mem (Pmode, stack_pointer_rtx), reg);
19728 RTX_FRAME_RELATED_P (tmp) = 1;
19729 i++;
19730 regno++;
19731 XVECEXP (dwarf, 0, i) = tmp;
19732 first = false;
19735 while (i < num_regs)
19736 if (saved_regs_mask & (1 << regno))
19738 rtx reg1, reg2, mem1, mem2;
19739 rtx tmp0, tmp1, tmp2;
19740 int regno2;
19742 /* Find the register to pair with this one. */
19743 for (regno2 = regno + 1; (saved_regs_mask & (1 << regno2)) == 0;
19744 regno2++)
19747 reg1 = gen_rtx_REG (SImode, regno);
19748 reg2 = gen_rtx_REG (SImode, regno2);
19750 if (first)
19752 rtx insn;
19754 first = false;
19755 mem1 = gen_frame_mem (Pmode, plus_constant (Pmode,
19756 stack_pointer_rtx,
19757 -4 * num_regs));
19758 mem2 = gen_frame_mem (Pmode, plus_constant (Pmode,
19759 stack_pointer_rtx,
19760 -4 * (num_regs - 1)));
19761 tmp0 = gen_rtx_SET (stack_pointer_rtx,
19762 plus_constant (Pmode, stack_pointer_rtx,
19763 -4 * (num_regs)));
19764 tmp1 = gen_rtx_SET (mem1, reg1);
19765 tmp2 = gen_rtx_SET (mem2, reg2);
19766 RTX_FRAME_RELATED_P (tmp0) = 1;
19767 RTX_FRAME_RELATED_P (tmp1) = 1;
19768 RTX_FRAME_RELATED_P (tmp2) = 1;
19769 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (3));
19770 XVECEXP (par, 0, 0) = tmp0;
19771 XVECEXP (par, 0, 1) = tmp1;
19772 XVECEXP (par, 0, 2) = tmp2;
19773 insn = emit_insn (par);
19774 RTX_FRAME_RELATED_P (insn) = 1;
19775 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
19777 else
19779 mem1 = gen_frame_mem (Pmode, plus_constant (Pmode,
19780 stack_pointer_rtx,
19781 4 * i));
19782 mem2 = gen_frame_mem (Pmode, plus_constant (Pmode,
19783 stack_pointer_rtx,
19784 4 * (i + 1)));
19785 tmp1 = gen_rtx_SET (mem1, reg1);
19786 tmp2 = gen_rtx_SET (mem2, reg2);
19787 RTX_FRAME_RELATED_P (tmp1) = 1;
19788 RTX_FRAME_RELATED_P (tmp2) = 1;
19789 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
19790 XVECEXP (par, 0, 0) = tmp1;
19791 XVECEXP (par, 0, 1) = tmp2;
19792 emit_insn (par);
19795 /* Create unwind information. This is an approximation. */
19796 tmp1 = gen_rtx_SET (gen_frame_mem (Pmode,
19797 plus_constant (Pmode,
19798 stack_pointer_rtx,
19799 4 * i)),
19800 reg1);
19801 tmp2 = gen_rtx_SET (gen_frame_mem (Pmode,
19802 plus_constant (Pmode,
19803 stack_pointer_rtx,
19804 4 * (i + 1))),
19805 reg2);
19807 RTX_FRAME_RELATED_P (tmp1) = 1;
19808 RTX_FRAME_RELATED_P (tmp2) = 1;
19809 XVECEXP (dwarf, 0, i + 1) = tmp1;
19810 XVECEXP (dwarf, 0, i + 2) = tmp2;
19811 i += 2;
19812 regno = regno2 + 1;
19814 else
19815 regno++;
19817 return;
19820 /* STRD in ARM mode requires consecutive registers. This function emits STRD
19821 whenever possible, otherwise it emits single-word stores. The first store
19822 also allocates stack space for all saved registers, using writeback with
19823 post-addressing mode. All other stores use offset addressing. If no STRD
19824 can be emitted, this function emits a sequence of single-word stores,
19825 and not an STM as before, because single-word stores provide more freedom
19826 scheduling and can be turned into an STM by peephole optimizations. */
19827 static void
19828 arm_emit_strd_push (unsigned long saved_regs_mask)
19830 int num_regs = 0;
19831 int i, j, dwarf_index = 0;
19832 int offset = 0;
19833 rtx dwarf = NULL_RTX;
19834 rtx insn = NULL_RTX;
19835 rtx tmp, mem;
19837 /* TODO: A more efficient code can be emitted by changing the
19838 layout, e.g., first push all pairs that can use STRD to keep the
19839 stack aligned, and then push all other registers. */
19840 for (i = 0; i <= LAST_ARM_REGNUM; i++)
19841 if (saved_regs_mask & (1 << i))
19842 num_regs++;
19844 gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
19845 gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
19846 gcc_assert (num_regs > 0);
19848 /* Create sequence for DWARF info. */
19849 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_regs + 1));
19851 /* For dwarf info, we generate explicit stack update. */
19852 tmp = gen_rtx_SET (stack_pointer_rtx,
19853 plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
19854 RTX_FRAME_RELATED_P (tmp) = 1;
19855 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
19857 /* Save registers. */
19858 offset = - 4 * num_regs;
19859 j = 0;
19860 while (j <= LAST_ARM_REGNUM)
19861 if (saved_regs_mask & (1 << j))
19863 if ((j % 2 == 0)
19864 && (saved_regs_mask & (1 << (j + 1))))
19866 /* Current register and previous register form register pair for
19867 which STRD can be generated. */
19868 if (offset < 0)
19870 /* Allocate stack space for all saved registers. */
19871 tmp = plus_constant (Pmode, stack_pointer_rtx, offset);
19872 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
19873 mem = gen_frame_mem (DImode, tmp);
19874 offset = 0;
19876 else if (offset > 0)
19877 mem = gen_frame_mem (DImode,
19878 plus_constant (Pmode,
19879 stack_pointer_rtx,
19880 offset));
19881 else
19882 mem = gen_frame_mem (DImode, stack_pointer_rtx);
19884 tmp = gen_rtx_SET (mem, gen_rtx_REG (DImode, j));
19885 RTX_FRAME_RELATED_P (tmp) = 1;
19886 tmp = emit_insn (tmp);
19888 /* Record the first store insn. */
19889 if (dwarf_index == 1)
19890 insn = tmp;
19892 /* Generate dwarf info. */
19893 mem = gen_frame_mem (SImode,
19894 plus_constant (Pmode,
19895 stack_pointer_rtx,
19896 offset));
19897 tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j));
19898 RTX_FRAME_RELATED_P (tmp) = 1;
19899 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
19901 mem = gen_frame_mem (SImode,
19902 plus_constant (Pmode,
19903 stack_pointer_rtx,
19904 offset + 4));
19905 tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j + 1));
19906 RTX_FRAME_RELATED_P (tmp) = 1;
19907 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
19909 offset += 8;
19910 j += 2;
19912 else
19914 /* Emit a single word store. */
19915 if (offset < 0)
19917 /* Allocate stack space for all saved registers. */
19918 tmp = plus_constant (Pmode, stack_pointer_rtx, offset);
19919 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
19920 mem = gen_frame_mem (SImode, tmp);
19921 offset = 0;
19923 else if (offset > 0)
19924 mem = gen_frame_mem (SImode,
19925 plus_constant (Pmode,
19926 stack_pointer_rtx,
19927 offset));
19928 else
19929 mem = gen_frame_mem (SImode, stack_pointer_rtx);
19931 tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j));
19932 RTX_FRAME_RELATED_P (tmp) = 1;
19933 tmp = emit_insn (tmp);
19935 /* Record the first store insn. */
19936 if (dwarf_index == 1)
19937 insn = tmp;
19939 /* Generate dwarf info. */
19940 mem = gen_frame_mem (SImode,
19941 plus_constant(Pmode,
19942 stack_pointer_rtx,
19943 offset));
19944 tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j));
19945 RTX_FRAME_RELATED_P (tmp) = 1;
19946 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
19948 offset += 4;
19949 j += 1;
19952 else
19953 j++;
19955 /* Attach dwarf info to the first insn we generate. */
19956 gcc_assert (insn != NULL_RTX);
19957 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
19958 RTX_FRAME_RELATED_P (insn) = 1;
19961 /* Generate and emit an insn that we will recognize as a push_multi.
19962 Unfortunately, since this insn does not reflect very well the actual
19963 semantics of the operation, we need to annotate the insn for the benefit
19964 of DWARF2 frame unwind information. DWARF_REGS_MASK is a subset of
19965 MASK for registers that should be annotated for DWARF2 frame unwind
19966 information. */
19967 static rtx
19968 emit_multi_reg_push (unsigned long mask, unsigned long dwarf_regs_mask)
19970 int num_regs = 0;
19971 int num_dwarf_regs = 0;
19972 int i, j;
19973 rtx par;
19974 rtx dwarf;
19975 int dwarf_par_index;
19976 rtx tmp, reg;
19978 /* We don't record the PC in the dwarf frame information. */
19979 dwarf_regs_mask &= ~(1 << PC_REGNUM);
19981 for (i = 0; i <= LAST_ARM_REGNUM; i++)
19983 if (mask & (1 << i))
19984 num_regs++;
19985 if (dwarf_regs_mask & (1 << i))
19986 num_dwarf_regs++;
19989 gcc_assert (num_regs && num_regs <= 16);
19990 gcc_assert ((dwarf_regs_mask & ~mask) == 0);
19992 /* For the body of the insn we are going to generate an UNSPEC in
19993 parallel with several USEs. This allows the insn to be recognized
19994 by the push_multi pattern in the arm.md file.
19996 The body of the insn looks something like this:
19998 (parallel [
19999 (set (mem:BLK (pre_modify:SI (reg:SI sp)
20000 (const_int:SI <num>)))
20001 (unspec:BLK [(reg:SI r4)] UNSPEC_PUSH_MULT))
20002 (use (reg:SI XX))
20003 (use (reg:SI YY))
20007 For the frame note however, we try to be more explicit and actually
20008 show each register being stored into the stack frame, plus a (single)
20009 decrement of the stack pointer. We do it this way in order to be
20010 friendly to the stack unwinding code, which only wants to see a single
20011 stack decrement per instruction. The RTL we generate for the note looks
20012 something like this:
20014 (sequence [
20015 (set (reg:SI sp) (plus:SI (reg:SI sp) (const_int -20)))
20016 (set (mem:SI (reg:SI sp)) (reg:SI r4))
20017 (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI XX))
20018 (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI YY))
20022 FIXME:: In an ideal world the PRE_MODIFY would not exist and
20023 instead we'd have a parallel expression detailing all
20024 the stores to the various memory addresses so that debug
20025 information is more up-to-date. Remember however while writing
20026 this to take care of the constraints with the push instruction.
20028 Note also that this has to be taken care of for the VFP registers.
20030 For more see PR43399. */
20032 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs));
20033 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_dwarf_regs + 1));
20034 dwarf_par_index = 1;
20036 for (i = 0; i <= LAST_ARM_REGNUM; i++)
20038 if (mask & (1 << i))
20040 reg = gen_rtx_REG (SImode, i);
20042 XVECEXP (par, 0, 0)
20043 = gen_rtx_SET (gen_frame_mem
20044 (BLKmode,
20045 gen_rtx_PRE_MODIFY (Pmode,
20046 stack_pointer_rtx,
20047 plus_constant
20048 (Pmode, stack_pointer_rtx,
20049 -4 * num_regs))
20051 gen_rtx_UNSPEC (BLKmode,
20052 gen_rtvec (1, reg),
20053 UNSPEC_PUSH_MULT));
20055 if (dwarf_regs_mask & (1 << i))
20057 tmp = gen_rtx_SET (gen_frame_mem (SImode, stack_pointer_rtx),
20058 reg);
20059 RTX_FRAME_RELATED_P (tmp) = 1;
20060 XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
20063 break;
20067 for (j = 1, i++; j < num_regs; i++)
20069 if (mask & (1 << i))
20071 reg = gen_rtx_REG (SImode, i);
20073 XVECEXP (par, 0, j) = gen_rtx_USE (VOIDmode, reg);
20075 if (dwarf_regs_mask & (1 << i))
20078 = gen_rtx_SET (gen_frame_mem
20079 (SImode,
20080 plus_constant (Pmode, stack_pointer_rtx,
20081 4 * j)),
20082 reg);
20083 RTX_FRAME_RELATED_P (tmp) = 1;
20084 XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
20087 j++;
20091 par = emit_insn (par);
20093 tmp = gen_rtx_SET (stack_pointer_rtx,
20094 plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
20095 RTX_FRAME_RELATED_P (tmp) = 1;
20096 XVECEXP (dwarf, 0, 0) = tmp;
20098 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
20100 return par;
20103 /* Add a REG_CFA_ADJUST_CFA REG note to INSN.
20104 SIZE is the offset to be adjusted.
20105 DEST and SRC might be stack_pointer_rtx or hard_frame_pointer_rtx. */
20106 static void
20107 arm_add_cfa_adjust_cfa_note (rtx insn, int size, rtx dest, rtx src)
20109 rtx dwarf;
20111 RTX_FRAME_RELATED_P (insn) = 1;
20112 dwarf = gen_rtx_SET (dest, plus_constant (Pmode, src, size));
20113 add_reg_note (insn, REG_CFA_ADJUST_CFA, dwarf);
20116 /* Generate and emit an insn pattern that we will recognize as a pop_multi.
20117 SAVED_REGS_MASK shows which registers need to be restored.
20119 Unfortunately, since this insn does not reflect very well the actual
20120 semantics of the operation, we need to annotate the insn for the benefit
20121 of DWARF2 frame unwind information. */
20122 static void
20123 arm_emit_multi_reg_pop (unsigned long saved_regs_mask)
20125 int num_regs = 0;
20126 int i, j;
20127 rtx par;
20128 rtx dwarf = NULL_RTX;
20129 rtx tmp, reg;
20130 bool return_in_pc = saved_regs_mask & (1 << PC_REGNUM);
20131 int offset_adj;
20132 int emit_update;
20134 offset_adj = return_in_pc ? 1 : 0;
20135 for (i = 0; i <= LAST_ARM_REGNUM; i++)
20136 if (saved_regs_mask & (1 << i))
20137 num_regs++;
20139 gcc_assert (num_regs && num_regs <= 16);
20141 /* If SP is in reglist, then we don't emit SP update insn. */
20142 emit_update = (saved_regs_mask & (1 << SP_REGNUM)) ? 0 : 1;
20144 /* The parallel needs to hold num_regs SETs
20145 and one SET for the stack update. */
20146 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + emit_update + offset_adj));
20148 if (return_in_pc)
20149 XVECEXP (par, 0, 0) = ret_rtx;
20151 if (emit_update)
20153 /* Increment the stack pointer, based on there being
20154 num_regs 4-byte registers to restore. */
20155 tmp = gen_rtx_SET (stack_pointer_rtx,
20156 plus_constant (Pmode,
20157 stack_pointer_rtx,
20158 4 * num_regs));
20159 RTX_FRAME_RELATED_P (tmp) = 1;
20160 XVECEXP (par, 0, offset_adj) = tmp;
20163 /* Now restore every reg, which may include PC. */
20164 for (j = 0, i = 0; j < num_regs; i++)
20165 if (saved_regs_mask & (1 << i))
20167 reg = gen_rtx_REG (SImode, i);
20168 if ((num_regs == 1) && emit_update && !return_in_pc)
20170 /* Emit single load with writeback. */
20171 tmp = gen_frame_mem (SImode,
20172 gen_rtx_POST_INC (Pmode,
20173 stack_pointer_rtx));
20174 tmp = emit_insn (gen_rtx_SET (reg, tmp));
20175 REG_NOTES (tmp) = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20176 return;
20179 tmp = gen_rtx_SET (reg,
20180 gen_frame_mem
20181 (SImode,
20182 plus_constant (Pmode, stack_pointer_rtx, 4 * j)));
20183 RTX_FRAME_RELATED_P (tmp) = 1;
20184 XVECEXP (par, 0, j + emit_update + offset_adj) = tmp;
20186 /* We need to maintain a sequence for DWARF info too. As dwarf info
20187 should not have PC, skip PC. */
20188 if (i != PC_REGNUM)
20189 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20191 j++;
20194 if (return_in_pc)
20195 par = emit_jump_insn (par);
20196 else
20197 par = emit_insn (par);
20199 REG_NOTES (par) = dwarf;
20200 if (!return_in_pc)
20201 arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD * num_regs,
20202 stack_pointer_rtx, stack_pointer_rtx);
20205 /* Generate and emit an insn pattern that we will recognize as a pop_multi
20206 of NUM_REGS consecutive VFP regs, starting at FIRST_REG.
20208 Unfortunately, since this insn does not reflect very well the actual
20209 semantics of the operation, we need to annotate the insn for the benefit
20210 of DWARF2 frame unwind information. */
20211 static void
20212 arm_emit_vfp_multi_reg_pop (int first_reg, int num_regs, rtx base_reg)
20214 int i, j;
20215 rtx par;
20216 rtx dwarf = NULL_RTX;
20217 rtx tmp, reg;
20219 gcc_assert (num_regs && num_regs <= 32);
20221 /* Workaround ARM10 VFPr1 bug. */
20222 if (num_regs == 2 && !arm_arch6)
20224 if (first_reg == 15)
20225 first_reg--;
20227 num_regs++;
20230 /* We can emit at most 16 D-registers in a single pop_multi instruction, and
20231 there could be up to 32 D-registers to restore.
20232 If there are more than 16 D-registers, make two recursive calls,
20233 each of which emits one pop_multi instruction. */
20234 if (num_regs > 16)
20236 arm_emit_vfp_multi_reg_pop (first_reg, 16, base_reg);
20237 arm_emit_vfp_multi_reg_pop (first_reg + 16, num_regs - 16, base_reg);
20238 return;
20241 /* The parallel needs to hold num_regs SETs
20242 and one SET for the stack update. */
20243 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + 1));
20245 /* Increment the stack pointer, based on there being
20246 num_regs 8-byte registers to restore. */
20247 tmp = gen_rtx_SET (base_reg, plus_constant (Pmode, base_reg, 8 * num_regs));
20248 RTX_FRAME_RELATED_P (tmp) = 1;
20249 XVECEXP (par, 0, 0) = tmp;
20251 /* Now show every reg that will be restored, using a SET for each. */
20252 for (j = 0, i=first_reg; j < num_regs; i += 2)
20254 reg = gen_rtx_REG (DFmode, i);
20256 tmp = gen_rtx_SET (reg,
20257 gen_frame_mem
20258 (DFmode,
20259 plus_constant (Pmode, base_reg, 8 * j)));
20260 RTX_FRAME_RELATED_P (tmp) = 1;
20261 XVECEXP (par, 0, j + 1) = tmp;
20263 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20265 j++;
20268 par = emit_insn (par);
20269 REG_NOTES (par) = dwarf;
20271 /* Make sure cfa doesn't leave with IP_REGNUM to allow unwinding fron FP. */
20272 if (TARGET_VFP && REGNO (base_reg) == IP_REGNUM)
20274 RTX_FRAME_RELATED_P (par) = 1;
20275 add_reg_note (par, REG_CFA_DEF_CFA, hard_frame_pointer_rtx);
20277 else
20278 arm_add_cfa_adjust_cfa_note (par, 2 * UNITS_PER_WORD * num_regs,
20279 base_reg, base_reg);
20282 /* Generate and emit a pattern that will be recognized as LDRD pattern. If even
20283 number of registers are being popped, multiple LDRD patterns are created for
20284 all register pairs. If odd number of registers are popped, last register is
20285 loaded by using LDR pattern. */
20286 static void
20287 thumb2_emit_ldrd_pop (unsigned long saved_regs_mask)
20289 int num_regs = 0;
20290 int i, j;
20291 rtx par = NULL_RTX;
20292 rtx dwarf = NULL_RTX;
20293 rtx tmp, reg, tmp1;
20294 bool return_in_pc = saved_regs_mask & (1 << PC_REGNUM);
20296 for (i = 0; i <= LAST_ARM_REGNUM; i++)
20297 if (saved_regs_mask & (1 << i))
20298 num_regs++;
20300 gcc_assert (num_regs && num_regs <= 16);
20302 /* We cannot generate ldrd for PC. Hence, reduce the count if PC is
20303 to be popped. So, if num_regs is even, now it will become odd,
20304 and we can generate pop with PC. If num_regs is odd, it will be
20305 even now, and ldr with return can be generated for PC. */
20306 if (return_in_pc)
20307 num_regs--;
20309 gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
20311 /* Var j iterates over all the registers to gather all the registers in
20312 saved_regs_mask. Var i gives index of saved registers in stack frame.
20313 A PARALLEL RTX of register-pair is created here, so that pattern for
20314 LDRD can be matched. As PC is always last register to be popped, and
20315 we have already decremented num_regs if PC, we don't have to worry
20316 about PC in this loop. */
20317 for (i = 0, j = 0; i < (num_regs - (num_regs % 2)); j++)
20318 if (saved_regs_mask & (1 << j))
20320 /* Create RTX for memory load. */
20321 reg = gen_rtx_REG (SImode, j);
20322 tmp = gen_rtx_SET (reg,
20323 gen_frame_mem (SImode,
20324 plus_constant (Pmode,
20325 stack_pointer_rtx, 4 * i)));
20326 RTX_FRAME_RELATED_P (tmp) = 1;
20328 if (i % 2 == 0)
20330 /* When saved-register index (i) is even, the RTX to be emitted is
20331 yet to be created. Hence create it first. The LDRD pattern we
20332 are generating is :
20333 [ (SET (reg_t0) (MEM (PLUS (SP) (NUM))))
20334 (SET (reg_t1) (MEM (PLUS (SP) (NUM + 4)))) ]
20335 where target registers need not be consecutive. */
20336 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20337 dwarf = NULL_RTX;
20340 /* ith register is added in PARALLEL RTX. If i is even, the reg_i is
20341 added as 0th element and if i is odd, reg_i is added as 1st element
20342 of LDRD pattern shown above. */
20343 XVECEXP (par, 0, (i % 2)) = tmp;
20344 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20346 if ((i % 2) == 1)
20348 /* When saved-register index (i) is odd, RTXs for both the registers
20349 to be loaded are generated in above given LDRD pattern, and the
20350 pattern can be emitted now. */
20351 par = emit_insn (par);
20352 REG_NOTES (par) = dwarf;
20353 RTX_FRAME_RELATED_P (par) = 1;
20356 i++;
20359 /* If the number of registers pushed is odd AND return_in_pc is false OR
20360 number of registers are even AND return_in_pc is true, last register is
20361 popped using LDR. It can be PC as well. Hence, adjust the stack first and
20362 then LDR with post increment. */
20364 /* Increment the stack pointer, based on there being
20365 num_regs 4-byte registers to restore. */
20366 tmp = gen_rtx_SET (stack_pointer_rtx,
20367 plus_constant (Pmode, stack_pointer_rtx, 4 * i));
20368 RTX_FRAME_RELATED_P (tmp) = 1;
20369 tmp = emit_insn (tmp);
20370 if (!return_in_pc)
20372 arm_add_cfa_adjust_cfa_note (tmp, UNITS_PER_WORD * i,
20373 stack_pointer_rtx, stack_pointer_rtx);
20376 dwarf = NULL_RTX;
20378 if (((num_regs % 2) == 1 && !return_in_pc)
20379 || ((num_regs % 2) == 0 && return_in_pc))
20381 /* Scan for the single register to be popped. Skip until the saved
20382 register is found. */
20383 for (; (saved_regs_mask & (1 << j)) == 0; j++);
20385 /* Gen LDR with post increment here. */
20386 tmp1 = gen_rtx_MEM (SImode,
20387 gen_rtx_POST_INC (SImode,
20388 stack_pointer_rtx));
20389 set_mem_alias_set (tmp1, get_frame_alias_set ());
20391 reg = gen_rtx_REG (SImode, j);
20392 tmp = gen_rtx_SET (reg, tmp1);
20393 RTX_FRAME_RELATED_P (tmp) = 1;
20394 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20396 if (return_in_pc)
20398 /* If return_in_pc, j must be PC_REGNUM. */
20399 gcc_assert (j == PC_REGNUM);
20400 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20401 XVECEXP (par, 0, 0) = ret_rtx;
20402 XVECEXP (par, 0, 1) = tmp;
20403 par = emit_jump_insn (par);
20405 else
20407 par = emit_insn (tmp);
20408 REG_NOTES (par) = dwarf;
20409 arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD,
20410 stack_pointer_rtx, stack_pointer_rtx);
20414 else if ((num_regs % 2) == 1 && return_in_pc)
20416 /* There are 2 registers to be popped. So, generate the pattern
20417 pop_multiple_with_stack_update_and_return to pop in PC. */
20418 arm_emit_multi_reg_pop (saved_regs_mask & (~((1 << j) - 1)));
20421 return;
20424 /* LDRD in ARM mode needs consecutive registers as operands. This function
20425 emits LDRD whenever possible, otherwise it emits single-word loads. It uses
20426 offset addressing and then generates one separate stack udpate. This provides
20427 more scheduling freedom, compared to writeback on every load. However,
20428 if the function returns using load into PC directly
20429 (i.e., if PC is in SAVED_REGS_MASK), the stack needs to be updated
20430 before the last load. TODO: Add a peephole optimization to recognize
20431 the new epilogue sequence as an LDM instruction whenever possible. TODO: Add
20432 peephole optimization to merge the load at stack-offset zero
20433 with the stack update instruction using load with writeback
20434 in post-index addressing mode. */
20435 static void
20436 arm_emit_ldrd_pop (unsigned long saved_regs_mask)
20438 int j = 0;
20439 int offset = 0;
20440 rtx par = NULL_RTX;
20441 rtx dwarf = NULL_RTX;
20442 rtx tmp, mem;
20444 /* Restore saved registers. */
20445 gcc_assert (!((saved_regs_mask & (1 << SP_REGNUM))));
20446 j = 0;
20447 while (j <= LAST_ARM_REGNUM)
20448 if (saved_regs_mask & (1 << j))
20450 if ((j % 2) == 0
20451 && (saved_regs_mask & (1 << (j + 1)))
20452 && (j + 1) != PC_REGNUM)
20454 /* Current register and next register form register pair for which
20455 LDRD can be generated. PC is always the last register popped, and
20456 we handle it separately. */
20457 if (offset > 0)
20458 mem = gen_frame_mem (DImode,
20459 plus_constant (Pmode,
20460 stack_pointer_rtx,
20461 offset));
20462 else
20463 mem = gen_frame_mem (DImode, stack_pointer_rtx);
20465 tmp = gen_rtx_SET (gen_rtx_REG (DImode, j), mem);
20466 tmp = emit_insn (tmp);
20467 RTX_FRAME_RELATED_P (tmp) = 1;
20469 /* Generate dwarf info. */
20471 dwarf = alloc_reg_note (REG_CFA_RESTORE,
20472 gen_rtx_REG (SImode, j),
20473 NULL_RTX);
20474 dwarf = alloc_reg_note (REG_CFA_RESTORE,
20475 gen_rtx_REG (SImode, j + 1),
20476 dwarf);
20478 REG_NOTES (tmp) = dwarf;
20480 offset += 8;
20481 j += 2;
20483 else if (j != PC_REGNUM)
20485 /* Emit a single word load. */
20486 if (offset > 0)
20487 mem = gen_frame_mem (SImode,
20488 plus_constant (Pmode,
20489 stack_pointer_rtx,
20490 offset));
20491 else
20492 mem = gen_frame_mem (SImode, stack_pointer_rtx);
20494 tmp = gen_rtx_SET (gen_rtx_REG (SImode, j), mem);
20495 tmp = emit_insn (tmp);
20496 RTX_FRAME_RELATED_P (tmp) = 1;
20498 /* Generate dwarf info. */
20499 REG_NOTES (tmp) = alloc_reg_note (REG_CFA_RESTORE,
20500 gen_rtx_REG (SImode, j),
20501 NULL_RTX);
20503 offset += 4;
20504 j += 1;
20506 else /* j == PC_REGNUM */
20507 j++;
20509 else
20510 j++;
20512 /* Update the stack. */
20513 if (offset > 0)
20515 tmp = gen_rtx_SET (stack_pointer_rtx,
20516 plus_constant (Pmode,
20517 stack_pointer_rtx,
20518 offset));
20519 tmp = emit_insn (tmp);
20520 arm_add_cfa_adjust_cfa_note (tmp, offset,
20521 stack_pointer_rtx, stack_pointer_rtx);
20522 offset = 0;
20525 if (saved_regs_mask & (1 << PC_REGNUM))
20527 /* Only PC is to be popped. */
20528 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20529 XVECEXP (par, 0, 0) = ret_rtx;
20530 tmp = gen_rtx_SET (gen_rtx_REG (SImode, PC_REGNUM),
20531 gen_frame_mem (SImode,
20532 gen_rtx_POST_INC (SImode,
20533 stack_pointer_rtx)));
20534 RTX_FRAME_RELATED_P (tmp) = 1;
20535 XVECEXP (par, 0, 1) = tmp;
20536 par = emit_jump_insn (par);
20538 /* Generate dwarf info. */
20539 dwarf = alloc_reg_note (REG_CFA_RESTORE,
20540 gen_rtx_REG (SImode, PC_REGNUM),
20541 NULL_RTX);
20542 REG_NOTES (par) = dwarf;
20543 arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD,
20544 stack_pointer_rtx, stack_pointer_rtx);
20548 /* Calculate the size of the return value that is passed in registers. */
20549 static unsigned
20550 arm_size_return_regs (void)
20552 machine_mode mode;
20554 if (crtl->return_rtx != 0)
20555 mode = GET_MODE (crtl->return_rtx);
20556 else
20557 mode = DECL_MODE (DECL_RESULT (current_function_decl));
20559 return GET_MODE_SIZE (mode);
20562 /* Return true if the current function needs to save/restore LR. */
20563 static bool
20564 thumb_force_lr_save (void)
20566 return !cfun->machine->lr_save_eliminated
20567 && (!leaf_function_p ()
20568 || thumb_far_jump_used_p ()
20569 || df_regs_ever_live_p (LR_REGNUM));
20572 /* We do not know if r3 will be available because
20573 we do have an indirect tailcall happening in this
20574 particular case. */
20575 static bool
20576 is_indirect_tailcall_p (rtx call)
20578 rtx pat = PATTERN (call);
20580 /* Indirect tail call. */
20581 pat = XVECEXP (pat, 0, 0);
20582 if (GET_CODE (pat) == SET)
20583 pat = SET_SRC (pat);
20585 pat = XEXP (XEXP (pat, 0), 0);
20586 return REG_P (pat);
20589 /* Return true if r3 is used by any of the tail call insns in the
20590 current function. */
20591 static bool
20592 any_sibcall_could_use_r3 (void)
20594 edge_iterator ei;
20595 edge e;
20597 if (!crtl->tail_call_emit)
20598 return false;
20599 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
20600 if (e->flags & EDGE_SIBCALL)
20602 rtx call = BB_END (e->src);
20603 if (!CALL_P (call))
20604 call = prev_nonnote_nondebug_insn (call);
20605 gcc_assert (CALL_P (call) && SIBLING_CALL_P (call));
20606 if (find_regno_fusage (call, USE, 3)
20607 || is_indirect_tailcall_p (call))
20608 return true;
20610 return false;
20614 /* Compute the distance from register FROM to register TO.
20615 These can be the arg pointer (26), the soft frame pointer (25),
20616 the stack pointer (13) or the hard frame pointer (11).
20617 In thumb mode r7 is used as the soft frame pointer, if needed.
20618 Typical stack layout looks like this:
20620 old stack pointer -> | |
20621 ----
20622 | | \
20623 | | saved arguments for
20624 | | vararg functions
20625 | | /
20627 hard FP & arg pointer -> | | \
20628 | | stack
20629 | | frame
20630 | | /
20632 | | \
20633 | | call saved
20634 | | registers
20635 soft frame pointer -> | | /
20637 | | \
20638 | | local
20639 | | variables
20640 locals base pointer -> | | /
20642 | | \
20643 | | outgoing
20644 | | arguments
20645 current stack pointer -> | | /
20648 For a given function some or all of these stack components
20649 may not be needed, giving rise to the possibility of
20650 eliminating some of the registers.
20652 The values returned by this function must reflect the behavior
20653 of arm_expand_prologue() and arm_compute_save_reg_mask().
20655 The sign of the number returned reflects the direction of stack
20656 growth, so the values are positive for all eliminations except
20657 from the soft frame pointer to the hard frame pointer.
20659 SFP may point just inside the local variables block to ensure correct
20660 alignment. */
20663 /* Calculate stack offsets. These are used to calculate register elimination
20664 offsets and in prologue/epilogue code. Also calculates which registers
20665 should be saved. */
20667 static arm_stack_offsets *
20668 arm_get_frame_offsets (void)
20670 struct arm_stack_offsets *offsets;
20671 unsigned long func_type;
20672 int leaf;
20673 int saved;
20674 int core_saved;
20675 HOST_WIDE_INT frame_size;
20676 int i;
20678 offsets = &cfun->machine->stack_offsets;
20680 /* We need to know if we are a leaf function. Unfortunately, it
20681 is possible to be called after start_sequence has been called,
20682 which causes get_insns to return the insns for the sequence,
20683 not the function, which will cause leaf_function_p to return
20684 the incorrect result.
20686 to know about leaf functions once reload has completed, and the
20687 frame size cannot be changed after that time, so we can safely
20688 use the cached value. */
20690 if (reload_completed)
20691 return offsets;
20693 /* Initially this is the size of the local variables. It will translated
20694 into an offset once we have determined the size of preceding data. */
20695 frame_size = ROUND_UP_WORD (get_frame_size ());
20697 leaf = leaf_function_p ();
20699 /* Space for variadic functions. */
20700 offsets->saved_args = crtl->args.pretend_args_size;
20702 /* In Thumb mode this is incorrect, but never used. */
20703 offsets->frame
20704 = (offsets->saved_args
20705 + arm_compute_static_chain_stack_bytes ()
20706 + (frame_pointer_needed ? 4 : 0));
20708 if (TARGET_32BIT)
20710 unsigned int regno;
20712 offsets->saved_regs_mask = arm_compute_save_reg_mask ();
20713 core_saved = bit_count (offsets->saved_regs_mask) * 4;
20714 saved = core_saved;
20716 /* We know that SP will be doubleword aligned on entry, and we must
20717 preserve that condition at any subroutine call. We also require the
20718 soft frame pointer to be doubleword aligned. */
20720 if (TARGET_REALLY_IWMMXT)
20722 /* Check for the call-saved iWMMXt registers. */
20723 for (regno = FIRST_IWMMXT_REGNUM;
20724 regno <= LAST_IWMMXT_REGNUM;
20725 regno++)
20726 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
20727 saved += 8;
20730 func_type = arm_current_func_type ();
20731 /* Space for saved VFP registers. */
20732 if (! IS_VOLATILE (func_type)
20733 && TARGET_HARD_FLOAT && TARGET_VFP)
20734 saved += arm_get_vfp_saved_size ();
20736 else /* TARGET_THUMB1 */
20738 offsets->saved_regs_mask = thumb1_compute_save_reg_mask ();
20739 core_saved = bit_count (offsets->saved_regs_mask) * 4;
20740 saved = core_saved;
20741 if (TARGET_BACKTRACE)
20742 saved += 16;
20745 /* Saved registers include the stack frame. */
20746 offsets->saved_regs
20747 = offsets->saved_args + arm_compute_static_chain_stack_bytes () + saved;
20748 offsets->soft_frame = offsets->saved_regs + CALLER_INTERWORKING_SLOT_SIZE;
20750 /* A leaf function does not need any stack alignment if it has nothing
20751 on the stack. */
20752 if (leaf && frame_size == 0
20753 /* However if it calls alloca(), we have a dynamically allocated
20754 block of BIGGEST_ALIGNMENT on stack, so still do stack alignment. */
20755 && ! cfun->calls_alloca)
20757 offsets->outgoing_args = offsets->soft_frame;
20758 offsets->locals_base = offsets->soft_frame;
20759 return offsets;
20762 /* Ensure SFP has the correct alignment. */
20763 if (ARM_DOUBLEWORD_ALIGN
20764 && (offsets->soft_frame & 7))
20766 offsets->soft_frame += 4;
20767 /* Try to align stack by pushing an extra reg. Don't bother doing this
20768 when there is a stack frame as the alignment will be rolled into
20769 the normal stack adjustment. */
20770 if (frame_size + crtl->outgoing_args_size == 0)
20772 int reg = -1;
20774 /* Register r3 is caller-saved. Normally it does not need to be
20775 saved on entry by the prologue. However if we choose to save
20776 it for padding then we may confuse the compiler into thinking
20777 a prologue sequence is required when in fact it is not. This
20778 will occur when shrink-wrapping if r3 is used as a scratch
20779 register and there are no other callee-saved writes.
20781 This situation can be avoided when other callee-saved registers
20782 are available and r3 is not mandatory if we choose a callee-saved
20783 register for padding. */
20784 bool prefer_callee_reg_p = false;
20786 /* If it is safe to use r3, then do so. This sometimes
20787 generates better code on Thumb-2 by avoiding the need to
20788 use 32-bit push/pop instructions. */
20789 if (! any_sibcall_could_use_r3 ()
20790 && arm_size_return_regs () <= 12
20791 && (offsets->saved_regs_mask & (1 << 3)) == 0
20792 && (TARGET_THUMB2
20793 || !(TARGET_LDRD && current_tune->prefer_ldrd_strd)))
20795 reg = 3;
20796 if (!TARGET_THUMB2)
20797 prefer_callee_reg_p = true;
20799 if (reg == -1
20800 || prefer_callee_reg_p)
20802 for (i = 4; i <= (TARGET_THUMB1 ? LAST_LO_REGNUM : 11); i++)
20804 /* Avoid fixed registers; they may be changed at
20805 arbitrary times so it's unsafe to restore them
20806 during the epilogue. */
20807 if (!fixed_regs[i]
20808 && (offsets->saved_regs_mask & (1 << i)) == 0)
20810 reg = i;
20811 break;
20816 if (reg != -1)
20818 offsets->saved_regs += 4;
20819 offsets->saved_regs_mask |= (1 << reg);
20824 offsets->locals_base = offsets->soft_frame + frame_size;
20825 offsets->outgoing_args = (offsets->locals_base
20826 + crtl->outgoing_args_size);
20828 if (ARM_DOUBLEWORD_ALIGN)
20830 /* Ensure SP remains doubleword aligned. */
20831 if (offsets->outgoing_args & 7)
20832 offsets->outgoing_args += 4;
20833 gcc_assert (!(offsets->outgoing_args & 7));
20836 return offsets;
20840 /* Calculate the relative offsets for the different stack pointers. Positive
20841 offsets are in the direction of stack growth. */
20843 HOST_WIDE_INT
20844 arm_compute_initial_elimination_offset (unsigned int from, unsigned int to)
20846 arm_stack_offsets *offsets;
20848 offsets = arm_get_frame_offsets ();
20850 /* OK, now we have enough information to compute the distances.
20851 There must be an entry in these switch tables for each pair
20852 of registers in ELIMINABLE_REGS, even if some of the entries
20853 seem to be redundant or useless. */
20854 switch (from)
20856 case ARG_POINTER_REGNUM:
20857 switch (to)
20859 case THUMB_HARD_FRAME_POINTER_REGNUM:
20860 return 0;
20862 case FRAME_POINTER_REGNUM:
20863 /* This is the reverse of the soft frame pointer
20864 to hard frame pointer elimination below. */
20865 return offsets->soft_frame - offsets->saved_args;
20867 case ARM_HARD_FRAME_POINTER_REGNUM:
20868 /* This is only non-zero in the case where the static chain register
20869 is stored above the frame. */
20870 return offsets->frame - offsets->saved_args - 4;
20872 case STACK_POINTER_REGNUM:
20873 /* If nothing has been pushed on the stack at all
20874 then this will return -4. This *is* correct! */
20875 return offsets->outgoing_args - (offsets->saved_args + 4);
20877 default:
20878 gcc_unreachable ();
20880 gcc_unreachable ();
20882 case FRAME_POINTER_REGNUM:
20883 switch (to)
20885 case THUMB_HARD_FRAME_POINTER_REGNUM:
20886 return 0;
20888 case ARM_HARD_FRAME_POINTER_REGNUM:
20889 /* The hard frame pointer points to the top entry in the
20890 stack frame. The soft frame pointer to the bottom entry
20891 in the stack frame. If there is no stack frame at all,
20892 then they are identical. */
20894 return offsets->frame - offsets->soft_frame;
20896 case STACK_POINTER_REGNUM:
20897 return offsets->outgoing_args - offsets->soft_frame;
20899 default:
20900 gcc_unreachable ();
20902 gcc_unreachable ();
20904 default:
20905 /* You cannot eliminate from the stack pointer.
20906 In theory you could eliminate from the hard frame
20907 pointer to the stack pointer, but this will never
20908 happen, since if a stack frame is not needed the
20909 hard frame pointer will never be used. */
20910 gcc_unreachable ();
20914 /* Given FROM and TO register numbers, say whether this elimination is
20915 allowed. Frame pointer elimination is automatically handled.
20917 All eliminations are permissible. Note that ARG_POINTER_REGNUM and
20918 HARD_FRAME_POINTER_REGNUM are in fact the same thing. If we need a frame
20919 pointer, we must eliminate FRAME_POINTER_REGNUM into
20920 HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM or
20921 ARG_POINTER_REGNUM. */
20923 bool
20924 arm_can_eliminate (const int from, const int to)
20926 return ((to == FRAME_POINTER_REGNUM && from == ARG_POINTER_REGNUM) ? false :
20927 (to == STACK_POINTER_REGNUM && frame_pointer_needed) ? false :
20928 (to == ARM_HARD_FRAME_POINTER_REGNUM && TARGET_THUMB) ? false :
20929 (to == THUMB_HARD_FRAME_POINTER_REGNUM && TARGET_ARM) ? false :
20930 true);
20933 /* Emit RTL to save coprocessor registers on function entry. Returns the
20934 number of bytes pushed. */
20936 static int
20937 arm_save_coproc_regs(void)
20939 int saved_size = 0;
20940 unsigned reg;
20941 unsigned start_reg;
20942 rtx insn;
20944 for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
20945 if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
20947 insn = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
20948 insn = gen_rtx_MEM (V2SImode, insn);
20949 insn = emit_set_insn (insn, gen_rtx_REG (V2SImode, reg));
20950 RTX_FRAME_RELATED_P (insn) = 1;
20951 saved_size += 8;
20954 if (TARGET_HARD_FLOAT && TARGET_VFP)
20956 start_reg = FIRST_VFP_REGNUM;
20958 for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
20960 if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
20961 && (!df_regs_ever_live_p (reg + 1) || call_used_regs[reg + 1]))
20963 if (start_reg != reg)
20964 saved_size += vfp_emit_fstmd (start_reg,
20965 (reg - start_reg) / 2);
20966 start_reg = reg + 2;
20969 if (start_reg != reg)
20970 saved_size += vfp_emit_fstmd (start_reg,
20971 (reg - start_reg) / 2);
20973 return saved_size;
20977 /* Set the Thumb frame pointer from the stack pointer. */
20979 static void
20980 thumb_set_frame_pointer (arm_stack_offsets *offsets)
20982 HOST_WIDE_INT amount;
20983 rtx insn, dwarf;
20985 amount = offsets->outgoing_args - offsets->locals_base;
20986 if (amount < 1024)
20987 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
20988 stack_pointer_rtx, GEN_INT (amount)));
20989 else
20991 emit_insn (gen_movsi (hard_frame_pointer_rtx, GEN_INT (amount)));
20992 /* Thumb-2 RTL patterns expect sp as the first input. Thumb-1
20993 expects the first two operands to be the same. */
20994 if (TARGET_THUMB2)
20996 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
20997 stack_pointer_rtx,
20998 hard_frame_pointer_rtx));
21000 else
21002 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21003 hard_frame_pointer_rtx,
21004 stack_pointer_rtx));
21006 dwarf = gen_rtx_SET (hard_frame_pointer_rtx,
21007 plus_constant (Pmode, stack_pointer_rtx, amount));
21008 RTX_FRAME_RELATED_P (dwarf) = 1;
21009 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21012 RTX_FRAME_RELATED_P (insn) = 1;
21015 /* Generate the prologue instructions for entry into an ARM or Thumb-2
21016 function. */
21017 void
21018 arm_expand_prologue (void)
21020 rtx amount;
21021 rtx insn;
21022 rtx ip_rtx;
21023 unsigned long live_regs_mask;
21024 unsigned long func_type;
21025 int fp_offset = 0;
21026 int saved_pretend_args = 0;
21027 int saved_regs = 0;
21028 unsigned HOST_WIDE_INT args_to_push;
21029 arm_stack_offsets *offsets;
21031 func_type = arm_current_func_type ();
21033 /* Naked functions don't have prologues. */
21034 if (IS_NAKED (func_type))
21035 return;
21037 /* Make a copy of c_f_p_a_s as we may need to modify it locally. */
21038 args_to_push = crtl->args.pretend_args_size;
21040 /* Compute which register we will have to save onto the stack. */
21041 offsets = arm_get_frame_offsets ();
21042 live_regs_mask = offsets->saved_regs_mask;
21044 ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
21046 if (IS_STACKALIGN (func_type))
21048 rtx r0, r1;
21050 /* Handle a word-aligned stack pointer. We generate the following:
21052 mov r0, sp
21053 bic r1, r0, #7
21054 mov sp, r1
21055 <save and restore r0 in normal prologue/epilogue>
21056 mov sp, r0
21057 bx lr
21059 The unwinder doesn't need to know about the stack realignment.
21060 Just tell it we saved SP in r0. */
21061 gcc_assert (TARGET_THUMB2 && !arm_arch_notm && args_to_push == 0);
21063 r0 = gen_rtx_REG (SImode, R0_REGNUM);
21064 r1 = gen_rtx_REG (SImode, R1_REGNUM);
21066 insn = emit_insn (gen_movsi (r0, stack_pointer_rtx));
21067 RTX_FRAME_RELATED_P (insn) = 1;
21068 add_reg_note (insn, REG_CFA_REGISTER, NULL);
21070 emit_insn (gen_andsi3 (r1, r0, GEN_INT (~(HOST_WIDE_INT)7)));
21072 /* ??? The CFA changes here, which may cause GDB to conclude that it
21073 has entered a different function. That said, the unwind info is
21074 correct, individually, before and after this instruction because
21075 we've described the save of SP, which will override the default
21076 handling of SP as restoring from the CFA. */
21077 emit_insn (gen_movsi (stack_pointer_rtx, r1));
21080 /* For APCS frames, if IP register is clobbered
21081 when creating frame, save that register in a special
21082 way. */
21083 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
21085 if (IS_INTERRUPT (func_type))
21087 /* Interrupt functions must not corrupt any registers.
21088 Creating a frame pointer however, corrupts the IP
21089 register, so we must push it first. */
21090 emit_multi_reg_push (1 << IP_REGNUM, 1 << IP_REGNUM);
21092 /* Do not set RTX_FRAME_RELATED_P on this insn.
21093 The dwarf stack unwinding code only wants to see one
21094 stack decrement per function, and this is not it. If
21095 this instruction is labeled as being part of the frame
21096 creation sequence then dwarf2out_frame_debug_expr will
21097 die when it encounters the assignment of IP to FP
21098 later on, since the use of SP here establishes SP as
21099 the CFA register and not IP.
21101 Anyway this instruction is not really part of the stack
21102 frame creation although it is part of the prologue. */
21104 else if (IS_NESTED (func_type))
21106 /* The static chain register is the same as the IP register
21107 used as a scratch register during stack frame creation.
21108 To get around this need to find somewhere to store IP
21109 whilst the frame is being created. We try the following
21110 places in order:
21112 1. The last argument register r3 if it is available.
21113 2. A slot on the stack above the frame if there are no
21114 arguments to push onto the stack.
21115 3. Register r3 again, after pushing the argument registers
21116 onto the stack, if this is a varargs function.
21117 4. The last slot on the stack created for the arguments to
21118 push, if this isn't a varargs function.
21120 Note - we only need to tell the dwarf2 backend about the SP
21121 adjustment in the second variant; the static chain register
21122 doesn't need to be unwound, as it doesn't contain a value
21123 inherited from the caller. */
21125 if (!arm_r3_live_at_start_p ())
21126 insn = emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
21127 else if (args_to_push == 0)
21129 rtx addr, dwarf;
21131 gcc_assert(arm_compute_static_chain_stack_bytes() == 4);
21132 saved_regs += 4;
21134 addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
21135 insn = emit_set_insn (gen_frame_mem (SImode, addr), ip_rtx);
21136 fp_offset = 4;
21138 /* Just tell the dwarf backend that we adjusted SP. */
21139 dwarf = gen_rtx_SET (stack_pointer_rtx,
21140 plus_constant (Pmode, stack_pointer_rtx,
21141 -fp_offset));
21142 RTX_FRAME_RELATED_P (insn) = 1;
21143 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21145 else
21147 /* Store the args on the stack. */
21148 if (cfun->machine->uses_anonymous_args)
21150 insn
21151 = emit_multi_reg_push ((0xf0 >> (args_to_push / 4)) & 0xf,
21152 (0xf0 >> (args_to_push / 4)) & 0xf);
21153 emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
21154 saved_pretend_args = 1;
21156 else
21158 rtx addr, dwarf;
21160 if (args_to_push == 4)
21161 addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
21162 else
21163 addr
21164 = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx,
21165 plus_constant (Pmode,
21166 stack_pointer_rtx,
21167 -args_to_push));
21169 insn = emit_set_insn (gen_frame_mem (SImode, addr), ip_rtx);
21171 /* Just tell the dwarf backend that we adjusted SP. */
21172 dwarf
21173 = gen_rtx_SET (stack_pointer_rtx,
21174 plus_constant (Pmode, stack_pointer_rtx,
21175 -args_to_push));
21176 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21179 RTX_FRAME_RELATED_P (insn) = 1;
21180 fp_offset = args_to_push;
21181 args_to_push = 0;
21185 insn = emit_set_insn (ip_rtx,
21186 plus_constant (Pmode, stack_pointer_rtx,
21187 fp_offset));
21188 RTX_FRAME_RELATED_P (insn) = 1;
21191 if (args_to_push)
21193 /* Push the argument registers, or reserve space for them. */
21194 if (cfun->machine->uses_anonymous_args)
21195 insn = emit_multi_reg_push
21196 ((0xf0 >> (args_to_push / 4)) & 0xf,
21197 (0xf0 >> (args_to_push / 4)) & 0xf);
21198 else
21199 insn = emit_insn
21200 (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
21201 GEN_INT (- args_to_push)));
21202 RTX_FRAME_RELATED_P (insn) = 1;
21205 /* If this is an interrupt service routine, and the link register
21206 is going to be pushed, and we're not generating extra
21207 push of IP (needed when frame is needed and frame layout if apcs),
21208 subtracting four from LR now will mean that the function return
21209 can be done with a single instruction. */
21210 if ((func_type == ARM_FT_ISR || func_type == ARM_FT_FIQ)
21211 && (live_regs_mask & (1 << LR_REGNUM)) != 0
21212 && !(frame_pointer_needed && TARGET_APCS_FRAME)
21213 && TARGET_ARM)
21215 rtx lr = gen_rtx_REG (SImode, LR_REGNUM);
21217 emit_set_insn (lr, plus_constant (SImode, lr, -4));
21220 if (live_regs_mask)
21222 unsigned long dwarf_regs_mask = live_regs_mask;
21224 saved_regs += bit_count (live_regs_mask) * 4;
21225 if (optimize_size && !frame_pointer_needed
21226 && saved_regs == offsets->saved_regs - offsets->saved_args)
21228 /* If no coprocessor registers are being pushed and we don't have
21229 to worry about a frame pointer then push extra registers to
21230 create the stack frame. This is done is a way that does not
21231 alter the frame layout, so is independent of the epilogue. */
21232 int n;
21233 int frame;
21234 n = 0;
21235 while (n < 8 && (live_regs_mask & (1 << n)) == 0)
21236 n++;
21237 frame = offsets->outgoing_args - (offsets->saved_args + saved_regs);
21238 if (frame && n * 4 >= frame)
21240 n = frame / 4;
21241 live_regs_mask |= (1 << n) - 1;
21242 saved_regs += frame;
21246 if (TARGET_LDRD
21247 && current_tune->prefer_ldrd_strd
21248 && !optimize_function_for_size_p (cfun))
21250 gcc_checking_assert (live_regs_mask == dwarf_regs_mask);
21251 if (TARGET_THUMB2)
21252 thumb2_emit_strd_push (live_regs_mask);
21253 else if (TARGET_ARM
21254 && !TARGET_APCS_FRAME
21255 && !IS_INTERRUPT (func_type))
21256 arm_emit_strd_push (live_regs_mask);
21257 else
21259 insn = emit_multi_reg_push (live_regs_mask, live_regs_mask);
21260 RTX_FRAME_RELATED_P (insn) = 1;
21263 else
21265 insn = emit_multi_reg_push (live_regs_mask, dwarf_regs_mask);
21266 RTX_FRAME_RELATED_P (insn) = 1;
21270 if (! IS_VOLATILE (func_type))
21271 saved_regs += arm_save_coproc_regs ();
21273 if (frame_pointer_needed && TARGET_ARM)
21275 /* Create the new frame pointer. */
21276 if (TARGET_APCS_FRAME)
21278 insn = GEN_INT (-(4 + args_to_push + fp_offset));
21279 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx, ip_rtx, insn));
21280 RTX_FRAME_RELATED_P (insn) = 1;
21282 if (IS_NESTED (func_type))
21284 /* Recover the static chain register. */
21285 if (!arm_r3_live_at_start_p () || saved_pretend_args)
21286 insn = gen_rtx_REG (SImode, 3);
21287 else
21289 insn = plus_constant (Pmode, hard_frame_pointer_rtx, 4);
21290 insn = gen_frame_mem (SImode, insn);
21292 emit_set_insn (ip_rtx, insn);
21293 /* Add a USE to stop propagate_one_insn() from barfing. */
21294 emit_insn (gen_force_register_use (ip_rtx));
21297 else
21299 insn = GEN_INT (saved_regs - 4);
21300 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21301 stack_pointer_rtx, insn));
21302 RTX_FRAME_RELATED_P (insn) = 1;
21306 if (flag_stack_usage_info)
21307 current_function_static_stack_size
21308 = offsets->outgoing_args - offsets->saved_args;
21310 if (offsets->outgoing_args != offsets->saved_args + saved_regs)
21312 /* This add can produce multiple insns for a large constant, so we
21313 need to get tricky. */
21314 rtx_insn *last = get_last_insn ();
21316 amount = GEN_INT (offsets->saved_args + saved_regs
21317 - offsets->outgoing_args);
21319 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
21320 amount));
21323 last = last ? NEXT_INSN (last) : get_insns ();
21324 RTX_FRAME_RELATED_P (last) = 1;
21326 while (last != insn);
21328 /* If the frame pointer is needed, emit a special barrier that
21329 will prevent the scheduler from moving stores to the frame
21330 before the stack adjustment. */
21331 if (frame_pointer_needed)
21332 insn = emit_insn (gen_stack_tie (stack_pointer_rtx,
21333 hard_frame_pointer_rtx));
21337 if (frame_pointer_needed && TARGET_THUMB2)
21338 thumb_set_frame_pointer (offsets);
21340 if (flag_pic && arm_pic_register != INVALID_REGNUM)
21342 unsigned long mask;
21344 mask = live_regs_mask;
21345 mask &= THUMB2_WORK_REGS;
21346 if (!IS_NESTED (func_type))
21347 mask |= (1 << IP_REGNUM);
21348 arm_load_pic_register (mask);
21351 /* If we are profiling, make sure no instructions are scheduled before
21352 the call to mcount. Similarly if the user has requested no
21353 scheduling in the prolog. Similarly if we want non-call exceptions
21354 using the EABI unwinder, to prevent faulting instructions from being
21355 swapped with a stack adjustment. */
21356 if (crtl->profile || !TARGET_SCHED_PROLOG
21357 || (arm_except_unwind_info (&global_options) == UI_TARGET
21358 && cfun->can_throw_non_call_exceptions))
21359 emit_insn (gen_blockage ());
21361 /* If the link register is being kept alive, with the return address in it,
21362 then make sure that it does not get reused by the ce2 pass. */
21363 if ((live_regs_mask & (1 << LR_REGNUM)) == 0)
21364 cfun->machine->lr_save_eliminated = 1;
21367 /* Print condition code to STREAM. Helper function for arm_print_operand. */
21368 static void
21369 arm_print_condition (FILE *stream)
21371 if (arm_ccfsm_state == 3 || arm_ccfsm_state == 4)
21373 /* Branch conversion is not implemented for Thumb-2. */
21374 if (TARGET_THUMB)
21376 output_operand_lossage ("predicated Thumb instruction");
21377 return;
21379 if (current_insn_predicate != NULL)
21381 output_operand_lossage
21382 ("predicated instruction in conditional sequence");
21383 return;
21386 fputs (arm_condition_codes[arm_current_cc], stream);
21388 else if (current_insn_predicate)
21390 enum arm_cond_code code;
21392 if (TARGET_THUMB1)
21394 output_operand_lossage ("predicated Thumb instruction");
21395 return;
21398 code = get_arm_condition_code (current_insn_predicate);
21399 fputs (arm_condition_codes[code], stream);
21404 /* Globally reserved letters: acln
21405 Puncutation letters currently used: @_|?().!#
21406 Lower case letters currently used: bcdefhimpqtvwxyz
21407 Upper case letters currently used: ABCDFGHJKLMNOPQRSTU
21408 Letters previously used, but now deprecated/obsolete: sVWXYZ.
21410 Note that the global reservation for 'c' is only for CONSTANT_ADDRESS_P.
21412 If CODE is 'd', then the X is a condition operand and the instruction
21413 should only be executed if the condition is true.
21414 if CODE is 'D', then the X is a condition operand and the instruction
21415 should only be executed if the condition is false: however, if the mode
21416 of the comparison is CCFPEmode, then always execute the instruction -- we
21417 do this because in these circumstances !GE does not necessarily imply LT;
21418 in these cases the instruction pattern will take care to make sure that
21419 an instruction containing %d will follow, thereby undoing the effects of
21420 doing this instruction unconditionally.
21421 If CODE is 'N' then X is a floating point operand that must be negated
21422 before output.
21423 If CODE is 'B' then output a bitwise inverted value of X (a const int).
21424 If X is a REG and CODE is `M', output a ldm/stm style multi-reg. */
21425 static void
21426 arm_print_operand (FILE *stream, rtx x, int code)
21428 switch (code)
21430 case '@':
21431 fputs (ASM_COMMENT_START, stream);
21432 return;
21434 case '_':
21435 fputs (user_label_prefix, stream);
21436 return;
21438 case '|':
21439 fputs (REGISTER_PREFIX, stream);
21440 return;
21442 case '?':
21443 arm_print_condition (stream);
21444 return;
21446 case '(':
21447 /* Nothing in unified syntax, otherwise the current condition code. */
21448 if (!TARGET_UNIFIED_ASM)
21449 arm_print_condition (stream);
21450 break;
21452 case ')':
21453 /* The current condition code in unified syntax, otherwise nothing. */
21454 if (TARGET_UNIFIED_ASM)
21455 arm_print_condition (stream);
21456 break;
21458 case '.':
21459 /* The current condition code for a condition code setting instruction.
21460 Preceded by 's' in unified syntax, otherwise followed by 's'. */
21461 if (TARGET_UNIFIED_ASM)
21463 fputc('s', stream);
21464 arm_print_condition (stream);
21466 else
21468 arm_print_condition (stream);
21469 fputc('s', stream);
21471 return;
21473 case '!':
21474 /* If the instruction is conditionally executed then print
21475 the current condition code, otherwise print 's'. */
21476 gcc_assert (TARGET_THUMB2 && TARGET_UNIFIED_ASM);
21477 if (current_insn_predicate)
21478 arm_print_condition (stream);
21479 else
21480 fputc('s', stream);
21481 break;
21483 /* %# is a "break" sequence. It doesn't output anything, but is used to
21484 separate e.g. operand numbers from following text, if that text consists
21485 of further digits which we don't want to be part of the operand
21486 number. */
21487 case '#':
21488 return;
21490 case 'N':
21492 REAL_VALUE_TYPE r;
21493 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
21494 r = real_value_negate (&r);
21495 fprintf (stream, "%s", fp_const_from_val (&r));
21497 return;
21499 /* An integer or symbol address without a preceding # sign. */
21500 case 'c':
21501 switch (GET_CODE (x))
21503 case CONST_INT:
21504 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
21505 break;
21507 case SYMBOL_REF:
21508 output_addr_const (stream, x);
21509 break;
21511 case CONST:
21512 if (GET_CODE (XEXP (x, 0)) == PLUS
21513 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
21515 output_addr_const (stream, x);
21516 break;
21518 /* Fall through. */
21520 default:
21521 output_operand_lossage ("Unsupported operand for code '%c'", code);
21523 return;
21525 /* An integer that we want to print in HEX. */
21526 case 'x':
21527 switch (GET_CODE (x))
21529 case CONST_INT:
21530 fprintf (stream, "#" HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
21531 break;
21533 default:
21534 output_operand_lossage ("Unsupported operand for code '%c'", code);
21536 return;
21538 case 'B':
21539 if (CONST_INT_P (x))
21541 HOST_WIDE_INT val;
21542 val = ARM_SIGN_EXTEND (~INTVAL (x));
21543 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, val);
21545 else
21547 putc ('~', stream);
21548 output_addr_const (stream, x);
21550 return;
21552 case 'b':
21553 /* Print the log2 of a CONST_INT. */
21555 HOST_WIDE_INT val;
21557 if (!CONST_INT_P (x)
21558 || (val = exact_log2 (INTVAL (x) & 0xffffffff)) < 0)
21559 output_operand_lossage ("Unsupported operand for code '%c'", code);
21560 else
21561 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
21563 return;
21565 case 'L':
21566 /* The low 16 bits of an immediate constant. */
21567 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL(x) & 0xffff);
21568 return;
21570 case 'i':
21571 fprintf (stream, "%s", arithmetic_instr (x, 1));
21572 return;
21574 case 'I':
21575 fprintf (stream, "%s", arithmetic_instr (x, 0));
21576 return;
21578 case 'S':
21580 HOST_WIDE_INT val;
21581 const char *shift;
21583 shift = shift_op (x, &val);
21585 if (shift)
21587 fprintf (stream, ", %s ", shift);
21588 if (val == -1)
21589 arm_print_operand (stream, XEXP (x, 1), 0);
21590 else
21591 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
21594 return;
21596 /* An explanation of the 'Q', 'R' and 'H' register operands:
21598 In a pair of registers containing a DI or DF value the 'Q'
21599 operand returns the register number of the register containing
21600 the least significant part of the value. The 'R' operand returns
21601 the register number of the register containing the most
21602 significant part of the value.
21604 The 'H' operand returns the higher of the two register numbers.
21605 On a run where WORDS_BIG_ENDIAN is true the 'H' operand is the
21606 same as the 'Q' operand, since the most significant part of the
21607 value is held in the lower number register. The reverse is true
21608 on systems where WORDS_BIG_ENDIAN is false.
21610 The purpose of these operands is to distinguish between cases
21611 where the endian-ness of the values is important (for example
21612 when they are added together), and cases where the endian-ness
21613 is irrelevant, but the order of register operations is important.
21614 For example when loading a value from memory into a register
21615 pair, the endian-ness does not matter. Provided that the value
21616 from the lower memory address is put into the lower numbered
21617 register, and the value from the higher address is put into the
21618 higher numbered register, the load will work regardless of whether
21619 the value being loaded is big-wordian or little-wordian. The
21620 order of the two register loads can matter however, if the address
21621 of the memory location is actually held in one of the registers
21622 being overwritten by the load.
21624 The 'Q' and 'R' constraints are also available for 64-bit
21625 constants. */
21626 case 'Q':
21627 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
21629 rtx part = gen_lowpart (SImode, x);
21630 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
21631 return;
21634 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
21636 output_operand_lossage ("invalid operand for code '%c'", code);
21637 return;
21640 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 1 : 0));
21641 return;
21643 case 'R':
21644 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
21646 machine_mode mode = GET_MODE (x);
21647 rtx part;
21649 if (mode == VOIDmode)
21650 mode = DImode;
21651 part = gen_highpart_mode (SImode, mode, x);
21652 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
21653 return;
21656 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
21658 output_operand_lossage ("invalid operand for code '%c'", code);
21659 return;
21662 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 0 : 1));
21663 return;
21665 case 'H':
21666 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
21668 output_operand_lossage ("invalid operand for code '%c'", code);
21669 return;
21672 asm_fprintf (stream, "%r", REGNO (x) + 1);
21673 return;
21675 case 'J':
21676 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
21678 output_operand_lossage ("invalid operand for code '%c'", code);
21679 return;
21682 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 3 : 2));
21683 return;
21685 case 'K':
21686 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
21688 output_operand_lossage ("invalid operand for code '%c'", code);
21689 return;
21692 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 2 : 3));
21693 return;
21695 case 'm':
21696 asm_fprintf (stream, "%r",
21697 REG_P (XEXP (x, 0))
21698 ? REGNO (XEXP (x, 0)) : REGNO (XEXP (XEXP (x, 0), 0)));
21699 return;
21701 case 'M':
21702 asm_fprintf (stream, "{%r-%r}",
21703 REGNO (x),
21704 REGNO (x) + ARM_NUM_REGS (GET_MODE (x)) - 1);
21705 return;
21707 /* Like 'M', but writing doubleword vector registers, for use by Neon
21708 insns. */
21709 case 'h':
21711 int regno = (REGNO (x) - FIRST_VFP_REGNUM) / 2;
21712 int numregs = ARM_NUM_REGS (GET_MODE (x)) / 2;
21713 if (numregs == 1)
21714 asm_fprintf (stream, "{d%d}", regno);
21715 else
21716 asm_fprintf (stream, "{d%d-d%d}", regno, regno + numregs - 1);
21718 return;
21720 case 'd':
21721 /* CONST_TRUE_RTX means always -- that's the default. */
21722 if (x == const_true_rtx)
21723 return;
21725 if (!COMPARISON_P (x))
21727 output_operand_lossage ("invalid operand for code '%c'", code);
21728 return;
21731 fputs (arm_condition_codes[get_arm_condition_code (x)],
21732 stream);
21733 return;
21735 case 'D':
21736 /* CONST_TRUE_RTX means not always -- i.e. never. We shouldn't ever
21737 want to do that. */
21738 if (x == const_true_rtx)
21740 output_operand_lossage ("instruction never executed");
21741 return;
21743 if (!COMPARISON_P (x))
21745 output_operand_lossage ("invalid operand for code '%c'", code);
21746 return;
21749 fputs (arm_condition_codes[ARM_INVERSE_CONDITION_CODE
21750 (get_arm_condition_code (x))],
21751 stream);
21752 return;
21754 case 's':
21755 case 'V':
21756 case 'W':
21757 case 'X':
21758 case 'Y':
21759 case 'Z':
21760 /* Former Maverick support, removed after GCC-4.7. */
21761 output_operand_lossage ("obsolete Maverick format code '%c'", code);
21762 return;
21764 case 'U':
21765 if (!REG_P (x)
21766 || REGNO (x) < FIRST_IWMMXT_GR_REGNUM
21767 || REGNO (x) > LAST_IWMMXT_GR_REGNUM)
21768 /* Bad value for wCG register number. */
21770 output_operand_lossage ("invalid operand for code '%c'", code);
21771 return;
21774 else
21775 fprintf (stream, "%d", REGNO (x) - FIRST_IWMMXT_GR_REGNUM);
21776 return;
21778 /* Print an iWMMXt control register name. */
21779 case 'w':
21780 if (!CONST_INT_P (x)
21781 || INTVAL (x) < 0
21782 || INTVAL (x) >= 16)
21783 /* Bad value for wC register number. */
21785 output_operand_lossage ("invalid operand for code '%c'", code);
21786 return;
21789 else
21791 static const char * wc_reg_names [16] =
21793 "wCID", "wCon", "wCSSF", "wCASF",
21794 "wC4", "wC5", "wC6", "wC7",
21795 "wCGR0", "wCGR1", "wCGR2", "wCGR3",
21796 "wC12", "wC13", "wC14", "wC15"
21799 fputs (wc_reg_names [INTVAL (x)], stream);
21801 return;
21803 /* Print the high single-precision register of a VFP double-precision
21804 register. */
21805 case 'p':
21807 machine_mode mode = GET_MODE (x);
21808 int regno;
21810 if (GET_MODE_SIZE (mode) != 8 || !REG_P (x))
21812 output_operand_lossage ("invalid operand for code '%c'", code);
21813 return;
21816 regno = REGNO (x);
21817 if (!VFP_REGNO_OK_FOR_DOUBLE (regno))
21819 output_operand_lossage ("invalid operand for code '%c'", code);
21820 return;
21823 fprintf (stream, "s%d", regno - FIRST_VFP_REGNUM + 1);
21825 return;
21827 /* Print a VFP/Neon double precision or quad precision register name. */
21828 case 'P':
21829 case 'q':
21831 machine_mode mode = GET_MODE (x);
21832 int is_quad = (code == 'q');
21833 int regno;
21835 if (GET_MODE_SIZE (mode) != (is_quad ? 16 : 8))
21837 output_operand_lossage ("invalid operand for code '%c'", code);
21838 return;
21841 if (!REG_P (x)
21842 || !IS_VFP_REGNUM (REGNO (x)))
21844 output_operand_lossage ("invalid operand for code '%c'", code);
21845 return;
21848 regno = REGNO (x);
21849 if ((is_quad && !NEON_REGNO_OK_FOR_QUAD (regno))
21850 || (!is_quad && !VFP_REGNO_OK_FOR_DOUBLE (regno)))
21852 output_operand_lossage ("invalid operand for code '%c'", code);
21853 return;
21856 fprintf (stream, "%c%d", is_quad ? 'q' : 'd',
21857 (regno - FIRST_VFP_REGNUM) >> (is_quad ? 2 : 1));
21859 return;
21861 /* These two codes print the low/high doubleword register of a Neon quad
21862 register, respectively. For pair-structure types, can also print
21863 low/high quadword registers. */
21864 case 'e':
21865 case 'f':
21867 machine_mode mode = GET_MODE (x);
21868 int regno;
21870 if ((GET_MODE_SIZE (mode) != 16
21871 && GET_MODE_SIZE (mode) != 32) || !REG_P (x))
21873 output_operand_lossage ("invalid operand for code '%c'", code);
21874 return;
21877 regno = REGNO (x);
21878 if (!NEON_REGNO_OK_FOR_QUAD (regno))
21880 output_operand_lossage ("invalid operand for code '%c'", code);
21881 return;
21884 if (GET_MODE_SIZE (mode) == 16)
21885 fprintf (stream, "d%d", ((regno - FIRST_VFP_REGNUM) >> 1)
21886 + (code == 'f' ? 1 : 0));
21887 else
21888 fprintf (stream, "q%d", ((regno - FIRST_VFP_REGNUM) >> 2)
21889 + (code == 'f' ? 1 : 0));
21891 return;
21893 /* Print a VFPv3 floating-point constant, represented as an integer
21894 index. */
21895 case 'G':
21897 int index = vfp3_const_double_index (x);
21898 gcc_assert (index != -1);
21899 fprintf (stream, "%d", index);
21901 return;
21903 /* Print bits representing opcode features for Neon.
21905 Bit 0 is 1 for signed, 0 for unsigned. Floats count as signed
21906 and polynomials as unsigned.
21908 Bit 1 is 1 for floats and polynomials, 0 for ordinary integers.
21910 Bit 2 is 1 for rounding functions, 0 otherwise. */
21912 /* Identify the type as 's', 'u', 'p' or 'f'. */
21913 case 'T':
21915 HOST_WIDE_INT bits = INTVAL (x);
21916 fputc ("uspf"[bits & 3], stream);
21918 return;
21920 /* Likewise, but signed and unsigned integers are both 'i'. */
21921 case 'F':
21923 HOST_WIDE_INT bits = INTVAL (x);
21924 fputc ("iipf"[bits & 3], stream);
21926 return;
21928 /* As for 'T', but emit 'u' instead of 'p'. */
21929 case 't':
21931 HOST_WIDE_INT bits = INTVAL (x);
21932 fputc ("usuf"[bits & 3], stream);
21934 return;
21936 /* Bit 2: rounding (vs none). */
21937 case 'O':
21939 HOST_WIDE_INT bits = INTVAL (x);
21940 fputs ((bits & 4) != 0 ? "r" : "", stream);
21942 return;
21944 /* Memory operand for vld1/vst1 instruction. */
21945 case 'A':
21947 rtx addr;
21948 bool postinc = FALSE;
21949 rtx postinc_reg = NULL;
21950 unsigned align, memsize, align_bits;
21952 gcc_assert (MEM_P (x));
21953 addr = XEXP (x, 0);
21954 if (GET_CODE (addr) == POST_INC)
21956 postinc = 1;
21957 addr = XEXP (addr, 0);
21959 if (GET_CODE (addr) == POST_MODIFY)
21961 postinc_reg = XEXP( XEXP (addr, 1), 1);
21962 addr = XEXP (addr, 0);
21964 asm_fprintf (stream, "[%r", REGNO (addr));
21966 /* We know the alignment of this access, so we can emit a hint in the
21967 instruction (for some alignments) as an aid to the memory subsystem
21968 of the target. */
21969 align = MEM_ALIGN (x) >> 3;
21970 memsize = MEM_SIZE (x);
21972 /* Only certain alignment specifiers are supported by the hardware. */
21973 if (memsize == 32 && (align % 32) == 0)
21974 align_bits = 256;
21975 else if ((memsize == 16 || memsize == 32) && (align % 16) == 0)
21976 align_bits = 128;
21977 else if (memsize >= 8 && (align % 8) == 0)
21978 align_bits = 64;
21979 else
21980 align_bits = 0;
21982 if (align_bits != 0)
21983 asm_fprintf (stream, ":%d", align_bits);
21985 asm_fprintf (stream, "]");
21987 if (postinc)
21988 fputs("!", stream);
21989 if (postinc_reg)
21990 asm_fprintf (stream, ", %r", REGNO (postinc_reg));
21992 return;
21994 case 'C':
21996 rtx addr;
21998 gcc_assert (MEM_P (x));
21999 addr = XEXP (x, 0);
22000 gcc_assert (REG_P (addr));
22001 asm_fprintf (stream, "[%r]", REGNO (addr));
22003 return;
22005 /* Translate an S register number into a D register number and element index. */
22006 case 'y':
22008 machine_mode mode = GET_MODE (x);
22009 int regno;
22011 if (GET_MODE_SIZE (mode) != 4 || !REG_P (x))
22013 output_operand_lossage ("invalid operand for code '%c'", code);
22014 return;
22017 regno = REGNO (x);
22018 if (!VFP_REGNO_OK_FOR_SINGLE (regno))
22020 output_operand_lossage ("invalid operand for code '%c'", code);
22021 return;
22024 regno = regno - FIRST_VFP_REGNUM;
22025 fprintf (stream, "d%d[%d]", regno / 2, regno % 2);
22027 return;
22029 case 'v':
22030 gcc_assert (CONST_DOUBLE_P (x));
22031 int result;
22032 result = vfp3_const_double_for_fract_bits (x);
22033 if (result == 0)
22034 result = vfp3_const_double_for_bits (x);
22035 fprintf (stream, "#%d", result);
22036 return;
22038 /* Register specifier for vld1.16/vst1.16. Translate the S register
22039 number into a D register number and element index. */
22040 case 'z':
22042 machine_mode mode = GET_MODE (x);
22043 int regno;
22045 if (GET_MODE_SIZE (mode) != 2 || !REG_P (x))
22047 output_operand_lossage ("invalid operand for code '%c'", code);
22048 return;
22051 regno = REGNO (x);
22052 if (!VFP_REGNO_OK_FOR_SINGLE (regno))
22054 output_operand_lossage ("invalid operand for code '%c'", code);
22055 return;
22058 regno = regno - FIRST_VFP_REGNUM;
22059 fprintf (stream, "d%d[%d]", regno/2, ((regno % 2) ? 2 : 0));
22061 return;
22063 default:
22064 if (x == 0)
22066 output_operand_lossage ("missing operand");
22067 return;
22070 switch (GET_CODE (x))
22072 case REG:
22073 asm_fprintf (stream, "%r", REGNO (x));
22074 break;
22076 case MEM:
22077 output_memory_reference_mode = GET_MODE (x);
22078 output_address (XEXP (x, 0));
22079 break;
22081 case CONST_DOUBLE:
22083 char fpstr[20];
22084 real_to_decimal (fpstr, CONST_DOUBLE_REAL_VALUE (x),
22085 sizeof (fpstr), 0, 1);
22086 fprintf (stream, "#%s", fpstr);
22088 break;
22090 default:
22091 gcc_assert (GET_CODE (x) != NEG);
22092 fputc ('#', stream);
22093 if (GET_CODE (x) == HIGH)
22095 fputs (":lower16:", stream);
22096 x = XEXP (x, 0);
22099 output_addr_const (stream, x);
22100 break;
22105 /* Target hook for printing a memory address. */
22106 static void
22107 arm_print_operand_address (FILE *stream, rtx x)
22109 if (TARGET_32BIT)
22111 int is_minus = GET_CODE (x) == MINUS;
22113 if (REG_P (x))
22114 asm_fprintf (stream, "[%r]", REGNO (x));
22115 else if (GET_CODE (x) == PLUS || is_minus)
22117 rtx base = XEXP (x, 0);
22118 rtx index = XEXP (x, 1);
22119 HOST_WIDE_INT offset = 0;
22120 if (!REG_P (base)
22121 || (REG_P (index) && REGNO (index) == SP_REGNUM))
22123 /* Ensure that BASE is a register. */
22124 /* (one of them must be). */
22125 /* Also ensure the SP is not used as in index register. */
22126 std::swap (base, index);
22128 switch (GET_CODE (index))
22130 case CONST_INT:
22131 offset = INTVAL (index);
22132 if (is_minus)
22133 offset = -offset;
22134 asm_fprintf (stream, "[%r, #%wd]",
22135 REGNO (base), offset);
22136 break;
22138 case REG:
22139 asm_fprintf (stream, "[%r, %s%r]",
22140 REGNO (base), is_minus ? "-" : "",
22141 REGNO (index));
22142 break;
22144 case MULT:
22145 case ASHIFTRT:
22146 case LSHIFTRT:
22147 case ASHIFT:
22148 case ROTATERT:
22150 asm_fprintf (stream, "[%r, %s%r",
22151 REGNO (base), is_minus ? "-" : "",
22152 REGNO (XEXP (index, 0)));
22153 arm_print_operand (stream, index, 'S');
22154 fputs ("]", stream);
22155 break;
22158 default:
22159 gcc_unreachable ();
22162 else if (GET_CODE (x) == PRE_INC || GET_CODE (x) == POST_INC
22163 || GET_CODE (x) == PRE_DEC || GET_CODE (x) == POST_DEC)
22165 extern machine_mode output_memory_reference_mode;
22167 gcc_assert (REG_P (XEXP (x, 0)));
22169 if (GET_CODE (x) == PRE_DEC || GET_CODE (x) == PRE_INC)
22170 asm_fprintf (stream, "[%r, #%s%d]!",
22171 REGNO (XEXP (x, 0)),
22172 GET_CODE (x) == PRE_DEC ? "-" : "",
22173 GET_MODE_SIZE (output_memory_reference_mode));
22174 else
22175 asm_fprintf (stream, "[%r], #%s%d",
22176 REGNO (XEXP (x, 0)),
22177 GET_CODE (x) == POST_DEC ? "-" : "",
22178 GET_MODE_SIZE (output_memory_reference_mode));
22180 else if (GET_CODE (x) == PRE_MODIFY)
22182 asm_fprintf (stream, "[%r, ", REGNO (XEXP (x, 0)));
22183 if (CONST_INT_P (XEXP (XEXP (x, 1), 1)))
22184 asm_fprintf (stream, "#%wd]!",
22185 INTVAL (XEXP (XEXP (x, 1), 1)));
22186 else
22187 asm_fprintf (stream, "%r]!",
22188 REGNO (XEXP (XEXP (x, 1), 1)));
22190 else if (GET_CODE (x) == POST_MODIFY)
22192 asm_fprintf (stream, "[%r], ", REGNO (XEXP (x, 0)));
22193 if (CONST_INT_P (XEXP (XEXP (x, 1), 1)))
22194 asm_fprintf (stream, "#%wd",
22195 INTVAL (XEXP (XEXP (x, 1), 1)));
22196 else
22197 asm_fprintf (stream, "%r",
22198 REGNO (XEXP (XEXP (x, 1), 1)));
22200 else output_addr_const (stream, x);
22202 else
22204 if (REG_P (x))
22205 asm_fprintf (stream, "[%r]", REGNO (x));
22206 else if (GET_CODE (x) == POST_INC)
22207 asm_fprintf (stream, "%r!", REGNO (XEXP (x, 0)));
22208 else if (GET_CODE (x) == PLUS)
22210 gcc_assert (REG_P (XEXP (x, 0)));
22211 if (CONST_INT_P (XEXP (x, 1)))
22212 asm_fprintf (stream, "[%r, #%wd]",
22213 REGNO (XEXP (x, 0)),
22214 INTVAL (XEXP (x, 1)));
22215 else
22216 asm_fprintf (stream, "[%r, %r]",
22217 REGNO (XEXP (x, 0)),
22218 REGNO (XEXP (x, 1)));
22220 else
22221 output_addr_const (stream, x);
22225 /* Target hook for indicating whether a punctuation character for
22226 TARGET_PRINT_OPERAND is valid. */
22227 static bool
22228 arm_print_operand_punct_valid_p (unsigned char code)
22230 return (code == '@' || code == '|' || code == '.'
22231 || code == '(' || code == ')' || code == '#'
22232 || (TARGET_32BIT && (code == '?'))
22233 || (TARGET_THUMB2 && (code == '!'))
22234 || (TARGET_THUMB && (code == '_')));
22237 /* Target hook for assembling integer objects. The ARM version needs to
22238 handle word-sized values specially. */
22239 static bool
22240 arm_assemble_integer (rtx x, unsigned int size, int aligned_p)
22242 machine_mode mode;
22244 if (size == UNITS_PER_WORD && aligned_p)
22246 fputs ("\t.word\t", asm_out_file);
22247 output_addr_const (asm_out_file, x);
22249 /* Mark symbols as position independent. We only do this in the
22250 .text segment, not in the .data segment. */
22251 if (NEED_GOT_RELOC && flag_pic && making_const_table &&
22252 (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF))
22254 /* See legitimize_pic_address for an explanation of the
22255 TARGET_VXWORKS_RTP check. */
22256 if (!arm_pic_data_is_text_relative
22257 || (GET_CODE (x) == SYMBOL_REF && !SYMBOL_REF_LOCAL_P (x)))
22258 fputs ("(GOT)", asm_out_file);
22259 else
22260 fputs ("(GOTOFF)", asm_out_file);
22262 fputc ('\n', asm_out_file);
22263 return true;
22266 mode = GET_MODE (x);
22268 if (arm_vector_mode_supported_p (mode))
22270 int i, units;
22272 gcc_assert (GET_CODE (x) == CONST_VECTOR);
22274 units = CONST_VECTOR_NUNITS (x);
22275 size = GET_MODE_SIZE (GET_MODE_INNER (mode));
22277 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
22278 for (i = 0; i < units; i++)
22280 rtx elt = CONST_VECTOR_ELT (x, i);
22281 assemble_integer
22282 (elt, size, i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT, 1);
22284 else
22285 for (i = 0; i < units; i++)
22287 rtx elt = CONST_VECTOR_ELT (x, i);
22288 REAL_VALUE_TYPE rval;
22290 REAL_VALUE_FROM_CONST_DOUBLE (rval, elt);
22292 assemble_real
22293 (rval, GET_MODE_INNER (mode),
22294 i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT);
22297 return true;
22300 return default_assemble_integer (x, size, aligned_p);
22303 static void
22304 arm_elf_asm_cdtor (rtx symbol, int priority, bool is_ctor)
22306 section *s;
22308 if (!TARGET_AAPCS_BASED)
22310 (is_ctor ?
22311 default_named_section_asm_out_constructor
22312 : default_named_section_asm_out_destructor) (symbol, priority);
22313 return;
22316 /* Put these in the .init_array section, using a special relocation. */
22317 if (priority != DEFAULT_INIT_PRIORITY)
22319 char buf[18];
22320 sprintf (buf, "%s.%.5u",
22321 is_ctor ? ".init_array" : ".fini_array",
22322 priority);
22323 s = get_section (buf, SECTION_WRITE, NULL_TREE);
22325 else if (is_ctor)
22326 s = ctors_section;
22327 else
22328 s = dtors_section;
22330 switch_to_section (s);
22331 assemble_align (POINTER_SIZE);
22332 fputs ("\t.word\t", asm_out_file);
22333 output_addr_const (asm_out_file, symbol);
22334 fputs ("(target1)\n", asm_out_file);
22337 /* Add a function to the list of static constructors. */
22339 static void
22340 arm_elf_asm_constructor (rtx symbol, int priority)
22342 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/true);
22345 /* Add a function to the list of static destructors. */
22347 static void
22348 arm_elf_asm_destructor (rtx symbol, int priority)
22350 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/false);
22353 /* A finite state machine takes care of noticing whether or not instructions
22354 can be conditionally executed, and thus decrease execution time and code
22355 size by deleting branch instructions. The fsm is controlled by
22356 final_prescan_insn, and controls the actions of ASM_OUTPUT_OPCODE. */
22358 /* The state of the fsm controlling condition codes are:
22359 0: normal, do nothing special
22360 1: make ASM_OUTPUT_OPCODE not output this instruction
22361 2: make ASM_OUTPUT_OPCODE not output this instruction
22362 3: make instructions conditional
22363 4: make instructions conditional
22365 State transitions (state->state by whom under condition):
22366 0 -> 1 final_prescan_insn if the `target' is a label
22367 0 -> 2 final_prescan_insn if the `target' is an unconditional branch
22368 1 -> 3 ASM_OUTPUT_OPCODE after not having output the conditional branch
22369 2 -> 4 ASM_OUTPUT_OPCODE after not having output the conditional branch
22370 3 -> 0 (*targetm.asm_out.internal_label) if the `target' label is reached
22371 (the target label has CODE_LABEL_NUMBER equal to arm_target_label).
22372 4 -> 0 final_prescan_insn if the `target' unconditional branch is reached
22373 (the target insn is arm_target_insn).
22375 If the jump clobbers the conditions then we use states 2 and 4.
22377 A similar thing can be done with conditional return insns.
22379 XXX In case the `target' is an unconditional branch, this conditionalising
22380 of the instructions always reduces code size, but not always execution
22381 time. But then, I want to reduce the code size to somewhere near what
22382 /bin/cc produces. */
22384 /* In addition to this, state is maintained for Thumb-2 COND_EXEC
22385 instructions. When a COND_EXEC instruction is seen the subsequent
22386 instructions are scanned so that multiple conditional instructions can be
22387 combined into a single IT block. arm_condexec_count and arm_condexec_mask
22388 specify the length and true/false mask for the IT block. These will be
22389 decremented/zeroed by arm_asm_output_opcode as the insns are output. */
22391 /* Returns the index of the ARM condition code string in
22392 `arm_condition_codes', or ARM_NV if the comparison is invalid.
22393 COMPARISON should be an rtx like `(eq (...) (...))'. */
22395 enum arm_cond_code
22396 maybe_get_arm_condition_code (rtx comparison)
22398 machine_mode mode = GET_MODE (XEXP (comparison, 0));
22399 enum arm_cond_code code;
22400 enum rtx_code comp_code = GET_CODE (comparison);
22402 if (GET_MODE_CLASS (mode) != MODE_CC)
22403 mode = SELECT_CC_MODE (comp_code, XEXP (comparison, 0),
22404 XEXP (comparison, 1));
22406 switch (mode)
22408 case CC_DNEmode: code = ARM_NE; goto dominance;
22409 case CC_DEQmode: code = ARM_EQ; goto dominance;
22410 case CC_DGEmode: code = ARM_GE; goto dominance;
22411 case CC_DGTmode: code = ARM_GT; goto dominance;
22412 case CC_DLEmode: code = ARM_LE; goto dominance;
22413 case CC_DLTmode: code = ARM_LT; goto dominance;
22414 case CC_DGEUmode: code = ARM_CS; goto dominance;
22415 case CC_DGTUmode: code = ARM_HI; goto dominance;
22416 case CC_DLEUmode: code = ARM_LS; goto dominance;
22417 case CC_DLTUmode: code = ARM_CC;
22419 dominance:
22420 if (comp_code == EQ)
22421 return ARM_INVERSE_CONDITION_CODE (code);
22422 if (comp_code == NE)
22423 return code;
22424 return ARM_NV;
22426 case CC_NOOVmode:
22427 switch (comp_code)
22429 case NE: return ARM_NE;
22430 case EQ: return ARM_EQ;
22431 case GE: return ARM_PL;
22432 case LT: return ARM_MI;
22433 default: return ARM_NV;
22436 case CC_Zmode:
22437 switch (comp_code)
22439 case NE: return ARM_NE;
22440 case EQ: return ARM_EQ;
22441 default: return ARM_NV;
22444 case CC_Nmode:
22445 switch (comp_code)
22447 case NE: return ARM_MI;
22448 case EQ: return ARM_PL;
22449 default: return ARM_NV;
22452 case CCFPEmode:
22453 case CCFPmode:
22454 /* We can handle all cases except UNEQ and LTGT. */
22455 switch (comp_code)
22457 case GE: return ARM_GE;
22458 case GT: return ARM_GT;
22459 case LE: return ARM_LS;
22460 case LT: return ARM_MI;
22461 case NE: return ARM_NE;
22462 case EQ: return ARM_EQ;
22463 case ORDERED: return ARM_VC;
22464 case UNORDERED: return ARM_VS;
22465 case UNLT: return ARM_LT;
22466 case UNLE: return ARM_LE;
22467 case UNGT: return ARM_HI;
22468 case UNGE: return ARM_PL;
22469 /* UNEQ and LTGT do not have a representation. */
22470 case UNEQ: /* Fall through. */
22471 case LTGT: /* Fall through. */
22472 default: return ARM_NV;
22475 case CC_SWPmode:
22476 switch (comp_code)
22478 case NE: return ARM_NE;
22479 case EQ: return ARM_EQ;
22480 case GE: return ARM_LE;
22481 case GT: return ARM_LT;
22482 case LE: return ARM_GE;
22483 case LT: return ARM_GT;
22484 case GEU: return ARM_LS;
22485 case GTU: return ARM_CC;
22486 case LEU: return ARM_CS;
22487 case LTU: return ARM_HI;
22488 default: return ARM_NV;
22491 case CC_Cmode:
22492 switch (comp_code)
22494 case LTU: return ARM_CS;
22495 case GEU: return ARM_CC;
22496 default: return ARM_NV;
22499 case CC_CZmode:
22500 switch (comp_code)
22502 case NE: return ARM_NE;
22503 case EQ: return ARM_EQ;
22504 case GEU: return ARM_CS;
22505 case GTU: return ARM_HI;
22506 case LEU: return ARM_LS;
22507 case LTU: return ARM_CC;
22508 default: return ARM_NV;
22511 case CC_NCVmode:
22512 switch (comp_code)
22514 case GE: return ARM_GE;
22515 case LT: return ARM_LT;
22516 case GEU: return ARM_CS;
22517 case LTU: return ARM_CC;
22518 default: return ARM_NV;
22521 case CCmode:
22522 switch (comp_code)
22524 case NE: return ARM_NE;
22525 case EQ: return ARM_EQ;
22526 case GE: return ARM_GE;
22527 case GT: return ARM_GT;
22528 case LE: return ARM_LE;
22529 case LT: return ARM_LT;
22530 case GEU: return ARM_CS;
22531 case GTU: return ARM_HI;
22532 case LEU: return ARM_LS;
22533 case LTU: return ARM_CC;
22534 default: return ARM_NV;
22537 default: gcc_unreachable ();
22541 /* Like maybe_get_arm_condition_code, but never return ARM_NV. */
22542 static enum arm_cond_code
22543 get_arm_condition_code (rtx comparison)
22545 enum arm_cond_code code = maybe_get_arm_condition_code (comparison);
22546 gcc_assert (code != ARM_NV);
22547 return code;
22550 /* Tell arm_asm_output_opcode to output IT blocks for conditionally executed
22551 instructions. */
22552 void
22553 thumb2_final_prescan_insn (rtx_insn *insn)
22555 rtx_insn *first_insn = insn;
22556 rtx body = PATTERN (insn);
22557 rtx predicate;
22558 enum arm_cond_code code;
22559 int n;
22560 int mask;
22561 int max;
22563 /* max_insns_skipped in the tune was already taken into account in the
22564 cost model of ifcvt pass when generating COND_EXEC insns. At this stage
22565 just emit the IT blocks as we can. It does not make sense to split
22566 the IT blocks. */
22567 max = MAX_INSN_PER_IT_BLOCK;
22569 /* Remove the previous insn from the count of insns to be output. */
22570 if (arm_condexec_count)
22571 arm_condexec_count--;
22573 /* Nothing to do if we are already inside a conditional block. */
22574 if (arm_condexec_count)
22575 return;
22577 if (GET_CODE (body) != COND_EXEC)
22578 return;
22580 /* Conditional jumps are implemented directly. */
22581 if (JUMP_P (insn))
22582 return;
22584 predicate = COND_EXEC_TEST (body);
22585 arm_current_cc = get_arm_condition_code (predicate);
22587 n = get_attr_ce_count (insn);
22588 arm_condexec_count = 1;
22589 arm_condexec_mask = (1 << n) - 1;
22590 arm_condexec_masklen = n;
22591 /* See if subsequent instructions can be combined into the same block. */
22592 for (;;)
22594 insn = next_nonnote_insn (insn);
22596 /* Jumping into the middle of an IT block is illegal, so a label or
22597 barrier terminates the block. */
22598 if (!NONJUMP_INSN_P (insn) && !JUMP_P (insn))
22599 break;
22601 body = PATTERN (insn);
22602 /* USE and CLOBBER aren't really insns, so just skip them. */
22603 if (GET_CODE (body) == USE
22604 || GET_CODE (body) == CLOBBER)
22605 continue;
22607 /* ??? Recognize conditional jumps, and combine them with IT blocks. */
22608 if (GET_CODE (body) != COND_EXEC)
22609 break;
22610 /* Maximum number of conditionally executed instructions in a block. */
22611 n = get_attr_ce_count (insn);
22612 if (arm_condexec_masklen + n > max)
22613 break;
22615 predicate = COND_EXEC_TEST (body);
22616 code = get_arm_condition_code (predicate);
22617 mask = (1 << n) - 1;
22618 if (arm_current_cc == code)
22619 arm_condexec_mask |= (mask << arm_condexec_masklen);
22620 else if (arm_current_cc != ARM_INVERSE_CONDITION_CODE(code))
22621 break;
22623 arm_condexec_count++;
22624 arm_condexec_masklen += n;
22626 /* A jump must be the last instruction in a conditional block. */
22627 if (JUMP_P (insn))
22628 break;
22630 /* Restore recog_data (getting the attributes of other insns can
22631 destroy this array, but final.c assumes that it remains intact
22632 across this call). */
22633 extract_constrain_insn_cached (first_insn);
22636 void
22637 arm_final_prescan_insn (rtx_insn *insn)
22639 /* BODY will hold the body of INSN. */
22640 rtx body = PATTERN (insn);
22642 /* This will be 1 if trying to repeat the trick, and things need to be
22643 reversed if it appears to fail. */
22644 int reverse = 0;
22646 /* If we start with a return insn, we only succeed if we find another one. */
22647 int seeking_return = 0;
22648 enum rtx_code return_code = UNKNOWN;
22650 /* START_INSN will hold the insn from where we start looking. This is the
22651 first insn after the following code_label if REVERSE is true. */
22652 rtx_insn *start_insn = insn;
22654 /* If in state 4, check if the target branch is reached, in order to
22655 change back to state 0. */
22656 if (arm_ccfsm_state == 4)
22658 if (insn == arm_target_insn)
22660 arm_target_insn = NULL;
22661 arm_ccfsm_state = 0;
22663 return;
22666 /* If in state 3, it is possible to repeat the trick, if this insn is an
22667 unconditional branch to a label, and immediately following this branch
22668 is the previous target label which is only used once, and the label this
22669 branch jumps to is not too far off. */
22670 if (arm_ccfsm_state == 3)
22672 if (simplejump_p (insn))
22674 start_insn = next_nonnote_insn (start_insn);
22675 if (BARRIER_P (start_insn))
22677 /* XXX Isn't this always a barrier? */
22678 start_insn = next_nonnote_insn (start_insn);
22680 if (LABEL_P (start_insn)
22681 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
22682 && LABEL_NUSES (start_insn) == 1)
22683 reverse = TRUE;
22684 else
22685 return;
22687 else if (ANY_RETURN_P (body))
22689 start_insn = next_nonnote_insn (start_insn);
22690 if (BARRIER_P (start_insn))
22691 start_insn = next_nonnote_insn (start_insn);
22692 if (LABEL_P (start_insn)
22693 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
22694 && LABEL_NUSES (start_insn) == 1)
22696 reverse = TRUE;
22697 seeking_return = 1;
22698 return_code = GET_CODE (body);
22700 else
22701 return;
22703 else
22704 return;
22707 gcc_assert (!arm_ccfsm_state || reverse);
22708 if (!JUMP_P (insn))
22709 return;
22711 /* This jump might be paralleled with a clobber of the condition codes
22712 the jump should always come first */
22713 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
22714 body = XVECEXP (body, 0, 0);
22716 if (reverse
22717 || (GET_CODE (body) == SET && GET_CODE (SET_DEST (body)) == PC
22718 && GET_CODE (SET_SRC (body)) == IF_THEN_ELSE))
22720 int insns_skipped;
22721 int fail = FALSE, succeed = FALSE;
22722 /* Flag which part of the IF_THEN_ELSE is the LABEL_REF. */
22723 int then_not_else = TRUE;
22724 rtx_insn *this_insn = start_insn;
22725 rtx label = 0;
22727 /* Register the insn jumped to. */
22728 if (reverse)
22730 if (!seeking_return)
22731 label = XEXP (SET_SRC (body), 0);
22733 else if (GET_CODE (XEXP (SET_SRC (body), 1)) == LABEL_REF)
22734 label = XEXP (XEXP (SET_SRC (body), 1), 0);
22735 else if (GET_CODE (XEXP (SET_SRC (body), 2)) == LABEL_REF)
22737 label = XEXP (XEXP (SET_SRC (body), 2), 0);
22738 then_not_else = FALSE;
22740 else if (ANY_RETURN_P (XEXP (SET_SRC (body), 1)))
22742 seeking_return = 1;
22743 return_code = GET_CODE (XEXP (SET_SRC (body), 1));
22745 else if (ANY_RETURN_P (XEXP (SET_SRC (body), 2)))
22747 seeking_return = 1;
22748 return_code = GET_CODE (XEXP (SET_SRC (body), 2));
22749 then_not_else = FALSE;
22751 else
22752 gcc_unreachable ();
22754 /* See how many insns this branch skips, and what kind of insns. If all
22755 insns are okay, and the label or unconditional branch to the same
22756 label is not too far away, succeed. */
22757 for (insns_skipped = 0;
22758 !fail && !succeed && insns_skipped++ < max_insns_skipped;)
22760 rtx scanbody;
22762 this_insn = next_nonnote_insn (this_insn);
22763 if (!this_insn)
22764 break;
22766 switch (GET_CODE (this_insn))
22768 case CODE_LABEL:
22769 /* Succeed if it is the target label, otherwise fail since
22770 control falls in from somewhere else. */
22771 if (this_insn == label)
22773 arm_ccfsm_state = 1;
22774 succeed = TRUE;
22776 else
22777 fail = TRUE;
22778 break;
22780 case BARRIER:
22781 /* Succeed if the following insn is the target label.
22782 Otherwise fail.
22783 If return insns are used then the last insn in a function
22784 will be a barrier. */
22785 this_insn = next_nonnote_insn (this_insn);
22786 if (this_insn && this_insn == label)
22788 arm_ccfsm_state = 1;
22789 succeed = TRUE;
22791 else
22792 fail = TRUE;
22793 break;
22795 case CALL_INSN:
22796 /* The AAPCS says that conditional calls should not be
22797 used since they make interworking inefficient (the
22798 linker can't transform BL<cond> into BLX). That's
22799 only a problem if the machine has BLX. */
22800 if (arm_arch5)
22802 fail = TRUE;
22803 break;
22806 /* Succeed if the following insn is the target label, or
22807 if the following two insns are a barrier and the
22808 target label. */
22809 this_insn = next_nonnote_insn (this_insn);
22810 if (this_insn && BARRIER_P (this_insn))
22811 this_insn = next_nonnote_insn (this_insn);
22813 if (this_insn && this_insn == label
22814 && insns_skipped < max_insns_skipped)
22816 arm_ccfsm_state = 1;
22817 succeed = TRUE;
22819 else
22820 fail = TRUE;
22821 break;
22823 case JUMP_INSN:
22824 /* If this is an unconditional branch to the same label, succeed.
22825 If it is to another label, do nothing. If it is conditional,
22826 fail. */
22827 /* XXX Probably, the tests for SET and the PC are
22828 unnecessary. */
22830 scanbody = PATTERN (this_insn);
22831 if (GET_CODE (scanbody) == SET
22832 && GET_CODE (SET_DEST (scanbody)) == PC)
22834 if (GET_CODE (SET_SRC (scanbody)) == LABEL_REF
22835 && XEXP (SET_SRC (scanbody), 0) == label && !reverse)
22837 arm_ccfsm_state = 2;
22838 succeed = TRUE;
22840 else if (GET_CODE (SET_SRC (scanbody)) == IF_THEN_ELSE)
22841 fail = TRUE;
22843 /* Fail if a conditional return is undesirable (e.g. on a
22844 StrongARM), but still allow this if optimizing for size. */
22845 else if (GET_CODE (scanbody) == return_code
22846 && !use_return_insn (TRUE, NULL)
22847 && !optimize_size)
22848 fail = TRUE;
22849 else if (GET_CODE (scanbody) == return_code)
22851 arm_ccfsm_state = 2;
22852 succeed = TRUE;
22854 else if (GET_CODE (scanbody) == PARALLEL)
22856 switch (get_attr_conds (this_insn))
22858 case CONDS_NOCOND:
22859 break;
22860 default:
22861 fail = TRUE;
22862 break;
22865 else
22866 fail = TRUE; /* Unrecognized jump (e.g. epilogue). */
22868 break;
22870 case INSN:
22871 /* Instructions using or affecting the condition codes make it
22872 fail. */
22873 scanbody = PATTERN (this_insn);
22874 if (!(GET_CODE (scanbody) == SET
22875 || GET_CODE (scanbody) == PARALLEL)
22876 || get_attr_conds (this_insn) != CONDS_NOCOND)
22877 fail = TRUE;
22878 break;
22880 default:
22881 break;
22884 if (succeed)
22886 if ((!seeking_return) && (arm_ccfsm_state == 1 || reverse))
22887 arm_target_label = CODE_LABEL_NUMBER (label);
22888 else
22890 gcc_assert (seeking_return || arm_ccfsm_state == 2);
22892 while (this_insn && GET_CODE (PATTERN (this_insn)) == USE)
22894 this_insn = next_nonnote_insn (this_insn);
22895 gcc_assert (!this_insn
22896 || (!BARRIER_P (this_insn)
22897 && !LABEL_P (this_insn)));
22899 if (!this_insn)
22901 /* Oh, dear! we ran off the end.. give up. */
22902 extract_constrain_insn_cached (insn);
22903 arm_ccfsm_state = 0;
22904 arm_target_insn = NULL;
22905 return;
22907 arm_target_insn = this_insn;
22910 /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from
22911 what it was. */
22912 if (!reverse)
22913 arm_current_cc = get_arm_condition_code (XEXP (SET_SRC (body), 0));
22915 if (reverse || then_not_else)
22916 arm_current_cc = ARM_INVERSE_CONDITION_CODE (arm_current_cc);
22919 /* Restore recog_data (getting the attributes of other insns can
22920 destroy this array, but final.c assumes that it remains intact
22921 across this call. */
22922 extract_constrain_insn_cached (insn);
22926 /* Output IT instructions. */
22927 void
22928 thumb2_asm_output_opcode (FILE * stream)
22930 char buff[5];
22931 int n;
22933 if (arm_condexec_mask)
22935 for (n = 0; n < arm_condexec_masklen; n++)
22936 buff[n] = (arm_condexec_mask & (1 << n)) ? 't' : 'e';
22937 buff[n] = 0;
22938 asm_fprintf(stream, "i%s\t%s\n\t", buff,
22939 arm_condition_codes[arm_current_cc]);
22940 arm_condexec_mask = 0;
22944 /* Returns true if REGNO is a valid register
22945 for holding a quantity of type MODE. */
22947 arm_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
22949 if (GET_MODE_CLASS (mode) == MODE_CC)
22950 return (regno == CC_REGNUM
22951 || (TARGET_HARD_FLOAT && TARGET_VFP
22952 && regno == VFPCC_REGNUM));
22954 if (regno == CC_REGNUM && GET_MODE_CLASS (mode) != MODE_CC)
22955 return false;
22957 if (TARGET_THUMB1)
22958 /* For the Thumb we only allow values bigger than SImode in
22959 registers 0 - 6, so that there is always a second low
22960 register available to hold the upper part of the value.
22961 We probably we ought to ensure that the register is the
22962 start of an even numbered register pair. */
22963 return (ARM_NUM_REGS (mode) < 2) || (regno < LAST_LO_REGNUM);
22965 if (TARGET_HARD_FLOAT && TARGET_VFP
22966 && IS_VFP_REGNUM (regno))
22968 if (mode == SFmode || mode == SImode)
22969 return VFP_REGNO_OK_FOR_SINGLE (regno);
22971 if (mode == DFmode)
22972 return VFP_REGNO_OK_FOR_DOUBLE (regno);
22974 /* VFP registers can hold HFmode values, but there is no point in
22975 putting them there unless we have hardware conversion insns. */
22976 if (mode == HFmode)
22977 return TARGET_FP16 && VFP_REGNO_OK_FOR_SINGLE (regno);
22979 if (TARGET_NEON)
22980 return (VALID_NEON_DREG_MODE (mode) && VFP_REGNO_OK_FOR_DOUBLE (regno))
22981 || (VALID_NEON_QREG_MODE (mode)
22982 && NEON_REGNO_OK_FOR_QUAD (regno))
22983 || (mode == TImode && NEON_REGNO_OK_FOR_NREGS (regno, 2))
22984 || (mode == EImode && NEON_REGNO_OK_FOR_NREGS (regno, 3))
22985 || (mode == OImode && NEON_REGNO_OK_FOR_NREGS (regno, 4))
22986 || (mode == CImode && NEON_REGNO_OK_FOR_NREGS (regno, 6))
22987 || (mode == XImode && NEON_REGNO_OK_FOR_NREGS (regno, 8));
22989 return FALSE;
22992 if (TARGET_REALLY_IWMMXT)
22994 if (IS_IWMMXT_GR_REGNUM (regno))
22995 return mode == SImode;
22997 if (IS_IWMMXT_REGNUM (regno))
22998 return VALID_IWMMXT_REG_MODE (mode);
23001 /* We allow almost any value to be stored in the general registers.
23002 Restrict doubleword quantities to even register pairs in ARM state
23003 so that we can use ldrd. Do not allow very large Neon structure
23004 opaque modes in general registers; they would use too many. */
23005 if (regno <= LAST_ARM_REGNUM)
23007 if (ARM_NUM_REGS (mode) > 4)
23008 return FALSE;
23010 if (TARGET_THUMB2)
23011 return TRUE;
23013 return !(TARGET_LDRD && GET_MODE_SIZE (mode) > 4 && (regno & 1) != 0);
23016 if (regno == FRAME_POINTER_REGNUM
23017 || regno == ARG_POINTER_REGNUM)
23018 /* We only allow integers in the fake hard registers. */
23019 return GET_MODE_CLASS (mode) == MODE_INT;
23021 return FALSE;
23024 /* Implement MODES_TIEABLE_P. */
23026 bool
23027 arm_modes_tieable_p (machine_mode mode1, machine_mode mode2)
23029 if (GET_MODE_CLASS (mode1) == GET_MODE_CLASS (mode2))
23030 return true;
23032 /* We specifically want to allow elements of "structure" modes to
23033 be tieable to the structure. This more general condition allows
23034 other rarer situations too. */
23035 if (TARGET_NEON
23036 && (VALID_NEON_DREG_MODE (mode1)
23037 || VALID_NEON_QREG_MODE (mode1)
23038 || VALID_NEON_STRUCT_MODE (mode1))
23039 && (VALID_NEON_DREG_MODE (mode2)
23040 || VALID_NEON_QREG_MODE (mode2)
23041 || VALID_NEON_STRUCT_MODE (mode2)))
23042 return true;
23044 return false;
23047 /* For efficiency and historical reasons LO_REGS, HI_REGS and CC_REGS are
23048 not used in arm mode. */
23050 enum reg_class
23051 arm_regno_class (int regno)
23053 if (regno == PC_REGNUM)
23054 return NO_REGS;
23056 if (TARGET_THUMB1)
23058 if (regno == STACK_POINTER_REGNUM)
23059 return STACK_REG;
23060 if (regno == CC_REGNUM)
23061 return CC_REG;
23062 if (regno < 8)
23063 return LO_REGS;
23064 return HI_REGS;
23067 if (TARGET_THUMB2 && regno < 8)
23068 return LO_REGS;
23070 if ( regno <= LAST_ARM_REGNUM
23071 || regno == FRAME_POINTER_REGNUM
23072 || regno == ARG_POINTER_REGNUM)
23073 return TARGET_THUMB2 ? HI_REGS : GENERAL_REGS;
23075 if (regno == CC_REGNUM || regno == VFPCC_REGNUM)
23076 return TARGET_THUMB2 ? CC_REG : NO_REGS;
23078 if (IS_VFP_REGNUM (regno))
23080 if (regno <= D7_VFP_REGNUM)
23081 return VFP_D0_D7_REGS;
23082 else if (regno <= LAST_LO_VFP_REGNUM)
23083 return VFP_LO_REGS;
23084 else
23085 return VFP_HI_REGS;
23088 if (IS_IWMMXT_REGNUM (regno))
23089 return IWMMXT_REGS;
23091 if (IS_IWMMXT_GR_REGNUM (regno))
23092 return IWMMXT_GR_REGS;
23094 return NO_REGS;
23097 /* Handle a special case when computing the offset
23098 of an argument from the frame pointer. */
23100 arm_debugger_arg_offset (int value, rtx addr)
23102 rtx_insn *insn;
23104 /* We are only interested if dbxout_parms() failed to compute the offset. */
23105 if (value != 0)
23106 return 0;
23108 /* We can only cope with the case where the address is held in a register. */
23109 if (!REG_P (addr))
23110 return 0;
23112 /* If we are using the frame pointer to point at the argument, then
23113 an offset of 0 is correct. */
23114 if (REGNO (addr) == (unsigned) HARD_FRAME_POINTER_REGNUM)
23115 return 0;
23117 /* If we are using the stack pointer to point at the
23118 argument, then an offset of 0 is correct. */
23119 /* ??? Check this is consistent with thumb2 frame layout. */
23120 if ((TARGET_THUMB || !frame_pointer_needed)
23121 && REGNO (addr) == SP_REGNUM)
23122 return 0;
23124 /* Oh dear. The argument is pointed to by a register rather
23125 than being held in a register, or being stored at a known
23126 offset from the frame pointer. Since GDB only understands
23127 those two kinds of argument we must translate the address
23128 held in the register into an offset from the frame pointer.
23129 We do this by searching through the insns for the function
23130 looking to see where this register gets its value. If the
23131 register is initialized from the frame pointer plus an offset
23132 then we are in luck and we can continue, otherwise we give up.
23134 This code is exercised by producing debugging information
23135 for a function with arguments like this:
23137 double func (double a, double b, int c, double d) {return d;}
23139 Without this code the stab for parameter 'd' will be set to
23140 an offset of 0 from the frame pointer, rather than 8. */
23142 /* The if() statement says:
23144 If the insn is a normal instruction
23145 and if the insn is setting the value in a register
23146 and if the register being set is the register holding the address of the argument
23147 and if the address is computing by an addition
23148 that involves adding to a register
23149 which is the frame pointer
23150 a constant integer
23152 then... */
23154 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
23156 if ( NONJUMP_INSN_P (insn)
23157 && GET_CODE (PATTERN (insn)) == SET
23158 && REGNO (XEXP (PATTERN (insn), 0)) == REGNO (addr)
23159 && GET_CODE (XEXP (PATTERN (insn), 1)) == PLUS
23160 && REG_P (XEXP (XEXP (PATTERN (insn), 1), 0))
23161 && REGNO (XEXP (XEXP (PATTERN (insn), 1), 0)) == (unsigned) HARD_FRAME_POINTER_REGNUM
23162 && CONST_INT_P (XEXP (XEXP (PATTERN (insn), 1), 1))
23165 value = INTVAL (XEXP (XEXP (PATTERN (insn), 1), 1));
23167 break;
23171 if (value == 0)
23173 debug_rtx (addr);
23174 warning (0, "unable to compute real location of stacked parameter");
23175 value = 8; /* XXX magic hack */
23178 return value;
23181 /* Implement TARGET_INVALID_PARAMETER_TYPE. */
23183 static const char *
23184 arm_invalid_parameter_type (const_tree t)
23186 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
23187 return N_("function parameters cannot have __fp16 type");
23188 return NULL;
23191 /* Implement TARGET_INVALID_PARAMETER_TYPE. */
23193 static const char *
23194 arm_invalid_return_type (const_tree t)
23196 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
23197 return N_("functions cannot return __fp16 type");
23198 return NULL;
23201 /* Implement TARGET_PROMOTED_TYPE. */
23203 static tree
23204 arm_promoted_type (const_tree t)
23206 if (SCALAR_FLOAT_TYPE_P (t) && TYPE_PRECISION (t) == 16)
23207 return float_type_node;
23208 return NULL_TREE;
23211 /* Implement TARGET_CONVERT_TO_TYPE.
23212 Specifically, this hook implements the peculiarity of the ARM
23213 half-precision floating-point C semantics that requires conversions between
23214 __fp16 to or from double to do an intermediate conversion to float. */
23216 static tree
23217 arm_convert_to_type (tree type, tree expr)
23219 tree fromtype = TREE_TYPE (expr);
23220 if (!SCALAR_FLOAT_TYPE_P (fromtype) || !SCALAR_FLOAT_TYPE_P (type))
23221 return NULL_TREE;
23222 if ((TYPE_PRECISION (fromtype) == 16 && TYPE_PRECISION (type) > 32)
23223 || (TYPE_PRECISION (type) == 16 && TYPE_PRECISION (fromtype) > 32))
23224 return convert (type, convert (float_type_node, expr));
23225 return NULL_TREE;
23228 /* Implement TARGET_SCALAR_MODE_SUPPORTED_P.
23229 This simply adds HFmode as a supported mode; even though we don't
23230 implement arithmetic on this type directly, it's supported by
23231 optabs conversions, much the way the double-word arithmetic is
23232 special-cased in the default hook. */
23234 static bool
23235 arm_scalar_mode_supported_p (machine_mode mode)
23237 if (mode == HFmode)
23238 return (arm_fp16_format != ARM_FP16_FORMAT_NONE);
23239 else if (ALL_FIXED_POINT_MODE_P (mode))
23240 return true;
23241 else
23242 return default_scalar_mode_supported_p (mode);
23245 /* Emit code to reinterpret one Neon type as another, without altering bits. */
23246 void
23247 neon_reinterpret (rtx dest, rtx src)
23249 emit_move_insn (dest, gen_lowpart (GET_MODE (dest), src));
23252 /* Set up OPERANDS for a register copy from SRC to DEST, taking care
23253 not to early-clobber SRC registers in the process.
23255 We assume that the operands described by SRC and DEST represent a
23256 decomposed copy of OPERANDS[1] into OPERANDS[0]. COUNT is the
23257 number of components into which the copy has been decomposed. */
23258 void
23259 neon_disambiguate_copy (rtx *operands, rtx *dest, rtx *src, unsigned int count)
23261 unsigned int i;
23263 if (!reg_overlap_mentioned_p (operands[0], operands[1])
23264 || REGNO (operands[0]) < REGNO (operands[1]))
23266 for (i = 0; i < count; i++)
23268 operands[2 * i] = dest[i];
23269 operands[2 * i + 1] = src[i];
23272 else
23274 for (i = 0; i < count; i++)
23276 operands[2 * i] = dest[count - i - 1];
23277 operands[2 * i + 1] = src[count - i - 1];
23282 /* Split operands into moves from op[1] + op[2] into op[0]. */
23284 void
23285 neon_split_vcombine (rtx operands[3])
23287 unsigned int dest = REGNO (operands[0]);
23288 unsigned int src1 = REGNO (operands[1]);
23289 unsigned int src2 = REGNO (operands[2]);
23290 machine_mode halfmode = GET_MODE (operands[1]);
23291 unsigned int halfregs = HARD_REGNO_NREGS (src1, halfmode);
23292 rtx destlo, desthi;
23294 if (src1 == dest && src2 == dest + halfregs)
23296 /* No-op move. Can't split to nothing; emit something. */
23297 emit_note (NOTE_INSN_DELETED);
23298 return;
23301 /* Preserve register attributes for variable tracking. */
23302 destlo = gen_rtx_REG_offset (operands[0], halfmode, dest, 0);
23303 desthi = gen_rtx_REG_offset (operands[0], halfmode, dest + halfregs,
23304 GET_MODE_SIZE (halfmode));
23306 /* Special case of reversed high/low parts. Use VSWP. */
23307 if (src2 == dest && src1 == dest + halfregs)
23309 rtx x = gen_rtx_SET (destlo, operands[1]);
23310 rtx y = gen_rtx_SET (desthi, operands[2]);
23311 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, x, y)));
23312 return;
23315 if (!reg_overlap_mentioned_p (operands[2], destlo))
23317 /* Try to avoid unnecessary moves if part of the result
23318 is in the right place already. */
23319 if (src1 != dest)
23320 emit_move_insn (destlo, operands[1]);
23321 if (src2 != dest + halfregs)
23322 emit_move_insn (desthi, operands[2]);
23324 else
23326 if (src2 != dest + halfregs)
23327 emit_move_insn (desthi, operands[2]);
23328 if (src1 != dest)
23329 emit_move_insn (destlo, operands[1]);
23333 /* Return the number (counting from 0) of
23334 the least significant set bit in MASK. */
23336 inline static int
23337 number_of_first_bit_set (unsigned mask)
23339 return ctz_hwi (mask);
23342 /* Like emit_multi_reg_push, but allowing for a different set of
23343 registers to be described as saved. MASK is the set of registers
23344 to be saved; REAL_REGS is the set of registers to be described as
23345 saved. If REAL_REGS is 0, only describe the stack adjustment. */
23347 static rtx_insn *
23348 thumb1_emit_multi_reg_push (unsigned long mask, unsigned long real_regs)
23350 unsigned long regno;
23351 rtx par[10], tmp, reg;
23352 rtx_insn *insn;
23353 int i, j;
23355 /* Build the parallel of the registers actually being stored. */
23356 for (i = 0; mask; ++i, mask &= mask - 1)
23358 regno = ctz_hwi (mask);
23359 reg = gen_rtx_REG (SImode, regno);
23361 if (i == 0)
23362 tmp = gen_rtx_UNSPEC (BLKmode, gen_rtvec (1, reg), UNSPEC_PUSH_MULT);
23363 else
23364 tmp = gen_rtx_USE (VOIDmode, reg);
23366 par[i] = tmp;
23369 tmp = plus_constant (Pmode, stack_pointer_rtx, -4 * i);
23370 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
23371 tmp = gen_frame_mem (BLKmode, tmp);
23372 tmp = gen_rtx_SET (tmp, par[0]);
23373 par[0] = tmp;
23375 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (i, par));
23376 insn = emit_insn (tmp);
23378 /* Always build the stack adjustment note for unwind info. */
23379 tmp = plus_constant (Pmode, stack_pointer_rtx, -4 * i);
23380 tmp = gen_rtx_SET (stack_pointer_rtx, tmp);
23381 par[0] = tmp;
23383 /* Build the parallel of the registers recorded as saved for unwind. */
23384 for (j = 0; real_regs; ++j, real_regs &= real_regs - 1)
23386 regno = ctz_hwi (real_regs);
23387 reg = gen_rtx_REG (SImode, regno);
23389 tmp = plus_constant (Pmode, stack_pointer_rtx, j * 4);
23390 tmp = gen_frame_mem (SImode, tmp);
23391 tmp = gen_rtx_SET (tmp, reg);
23392 RTX_FRAME_RELATED_P (tmp) = 1;
23393 par[j + 1] = tmp;
23396 if (j == 0)
23397 tmp = par[0];
23398 else
23400 RTX_FRAME_RELATED_P (par[0]) = 1;
23401 tmp = gen_rtx_SEQUENCE (VOIDmode, gen_rtvec_v (j + 1, par));
23404 add_reg_note (insn, REG_FRAME_RELATED_EXPR, tmp);
23406 return insn;
23409 /* Emit code to push or pop registers to or from the stack. F is the
23410 assembly file. MASK is the registers to pop. */
23411 static void
23412 thumb_pop (FILE *f, unsigned long mask)
23414 int regno;
23415 int lo_mask = mask & 0xFF;
23416 int pushed_words = 0;
23418 gcc_assert (mask);
23420 if (lo_mask == 0 && (mask & (1 << PC_REGNUM)))
23422 /* Special case. Do not generate a POP PC statement here, do it in
23423 thumb_exit() */
23424 thumb_exit (f, -1);
23425 return;
23428 fprintf (f, "\tpop\t{");
23430 /* Look at the low registers first. */
23431 for (regno = 0; regno <= LAST_LO_REGNUM; regno++, lo_mask >>= 1)
23433 if (lo_mask & 1)
23435 asm_fprintf (f, "%r", regno);
23437 if ((lo_mask & ~1) != 0)
23438 fprintf (f, ", ");
23440 pushed_words++;
23444 if (mask & (1 << PC_REGNUM))
23446 /* Catch popping the PC. */
23447 if (TARGET_INTERWORK || TARGET_BACKTRACE
23448 || crtl->calls_eh_return)
23450 /* The PC is never poped directly, instead
23451 it is popped into r3 and then BX is used. */
23452 fprintf (f, "}\n");
23454 thumb_exit (f, -1);
23456 return;
23458 else
23460 if (mask & 0xFF)
23461 fprintf (f, ", ");
23463 asm_fprintf (f, "%r", PC_REGNUM);
23467 fprintf (f, "}\n");
23470 /* Generate code to return from a thumb function.
23471 If 'reg_containing_return_addr' is -1, then the return address is
23472 actually on the stack, at the stack pointer. */
23473 static void
23474 thumb_exit (FILE *f, int reg_containing_return_addr)
23476 unsigned regs_available_for_popping;
23477 unsigned regs_to_pop;
23478 int pops_needed;
23479 unsigned available;
23480 unsigned required;
23481 machine_mode mode;
23482 int size;
23483 int restore_a4 = FALSE;
23485 /* Compute the registers we need to pop. */
23486 regs_to_pop = 0;
23487 pops_needed = 0;
23489 if (reg_containing_return_addr == -1)
23491 regs_to_pop |= 1 << LR_REGNUM;
23492 ++pops_needed;
23495 if (TARGET_BACKTRACE)
23497 /* Restore the (ARM) frame pointer and stack pointer. */
23498 regs_to_pop |= (1 << ARM_HARD_FRAME_POINTER_REGNUM) | (1 << SP_REGNUM);
23499 pops_needed += 2;
23502 /* If there is nothing to pop then just emit the BX instruction and
23503 return. */
23504 if (pops_needed == 0)
23506 if (crtl->calls_eh_return)
23507 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
23509 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
23510 return;
23512 /* Otherwise if we are not supporting interworking and we have not created
23513 a backtrace structure and the function was not entered in ARM mode then
23514 just pop the return address straight into the PC. */
23515 else if (!TARGET_INTERWORK
23516 && !TARGET_BACKTRACE
23517 && !is_called_in_ARM_mode (current_function_decl)
23518 && !crtl->calls_eh_return)
23520 asm_fprintf (f, "\tpop\t{%r}\n", PC_REGNUM);
23521 return;
23524 /* Find out how many of the (return) argument registers we can corrupt. */
23525 regs_available_for_popping = 0;
23527 /* If returning via __builtin_eh_return, the bottom three registers
23528 all contain information needed for the return. */
23529 if (crtl->calls_eh_return)
23530 size = 12;
23531 else
23533 /* If we can deduce the registers used from the function's
23534 return value. This is more reliable that examining
23535 df_regs_ever_live_p () because that will be set if the register is
23536 ever used in the function, not just if the register is used
23537 to hold a return value. */
23539 if (crtl->return_rtx != 0)
23540 mode = GET_MODE (crtl->return_rtx);
23541 else
23542 mode = DECL_MODE (DECL_RESULT (current_function_decl));
23544 size = GET_MODE_SIZE (mode);
23546 if (size == 0)
23548 /* In a void function we can use any argument register.
23549 In a function that returns a structure on the stack
23550 we can use the second and third argument registers. */
23551 if (mode == VOIDmode)
23552 regs_available_for_popping =
23553 (1 << ARG_REGISTER (1))
23554 | (1 << ARG_REGISTER (2))
23555 | (1 << ARG_REGISTER (3));
23556 else
23557 regs_available_for_popping =
23558 (1 << ARG_REGISTER (2))
23559 | (1 << ARG_REGISTER (3));
23561 else if (size <= 4)
23562 regs_available_for_popping =
23563 (1 << ARG_REGISTER (2))
23564 | (1 << ARG_REGISTER (3));
23565 else if (size <= 8)
23566 regs_available_for_popping =
23567 (1 << ARG_REGISTER (3));
23570 /* Match registers to be popped with registers into which we pop them. */
23571 for (available = regs_available_for_popping,
23572 required = regs_to_pop;
23573 required != 0 && available != 0;
23574 available &= ~(available & - available),
23575 required &= ~(required & - required))
23576 -- pops_needed;
23578 /* If we have any popping registers left over, remove them. */
23579 if (available > 0)
23580 regs_available_for_popping &= ~available;
23582 /* Otherwise if we need another popping register we can use
23583 the fourth argument register. */
23584 else if (pops_needed)
23586 /* If we have not found any free argument registers and
23587 reg a4 contains the return address, we must move it. */
23588 if (regs_available_for_popping == 0
23589 && reg_containing_return_addr == LAST_ARG_REGNUM)
23591 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
23592 reg_containing_return_addr = LR_REGNUM;
23594 else if (size > 12)
23596 /* Register a4 is being used to hold part of the return value,
23597 but we have dire need of a free, low register. */
23598 restore_a4 = TRUE;
23600 asm_fprintf (f, "\tmov\t%r, %r\n",IP_REGNUM, LAST_ARG_REGNUM);
23603 if (reg_containing_return_addr != LAST_ARG_REGNUM)
23605 /* The fourth argument register is available. */
23606 regs_available_for_popping |= 1 << LAST_ARG_REGNUM;
23608 --pops_needed;
23612 /* Pop as many registers as we can. */
23613 thumb_pop (f, regs_available_for_popping);
23615 /* Process the registers we popped. */
23616 if (reg_containing_return_addr == -1)
23618 /* The return address was popped into the lowest numbered register. */
23619 regs_to_pop &= ~(1 << LR_REGNUM);
23621 reg_containing_return_addr =
23622 number_of_first_bit_set (regs_available_for_popping);
23624 /* Remove this register for the mask of available registers, so that
23625 the return address will not be corrupted by further pops. */
23626 regs_available_for_popping &= ~(1 << reg_containing_return_addr);
23629 /* If we popped other registers then handle them here. */
23630 if (regs_available_for_popping)
23632 int frame_pointer;
23634 /* Work out which register currently contains the frame pointer. */
23635 frame_pointer = number_of_first_bit_set (regs_available_for_popping);
23637 /* Move it into the correct place. */
23638 asm_fprintf (f, "\tmov\t%r, %r\n",
23639 ARM_HARD_FRAME_POINTER_REGNUM, frame_pointer);
23641 /* (Temporarily) remove it from the mask of popped registers. */
23642 regs_available_for_popping &= ~(1 << frame_pointer);
23643 regs_to_pop &= ~(1 << ARM_HARD_FRAME_POINTER_REGNUM);
23645 if (regs_available_for_popping)
23647 int stack_pointer;
23649 /* We popped the stack pointer as well,
23650 find the register that contains it. */
23651 stack_pointer = number_of_first_bit_set (regs_available_for_popping);
23653 /* Move it into the stack register. */
23654 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, stack_pointer);
23656 /* At this point we have popped all necessary registers, so
23657 do not worry about restoring regs_available_for_popping
23658 to its correct value:
23660 assert (pops_needed == 0)
23661 assert (regs_available_for_popping == (1 << frame_pointer))
23662 assert (regs_to_pop == (1 << STACK_POINTER)) */
23664 else
23666 /* Since we have just move the popped value into the frame
23667 pointer, the popping register is available for reuse, and
23668 we know that we still have the stack pointer left to pop. */
23669 regs_available_for_popping |= (1 << frame_pointer);
23673 /* If we still have registers left on the stack, but we no longer have
23674 any registers into which we can pop them, then we must move the return
23675 address into the link register and make available the register that
23676 contained it. */
23677 if (regs_available_for_popping == 0 && pops_needed > 0)
23679 regs_available_for_popping |= 1 << reg_containing_return_addr;
23681 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM,
23682 reg_containing_return_addr);
23684 reg_containing_return_addr = LR_REGNUM;
23687 /* If we have registers left on the stack then pop some more.
23688 We know that at most we will want to pop FP and SP. */
23689 if (pops_needed > 0)
23691 int popped_into;
23692 int move_to;
23694 thumb_pop (f, regs_available_for_popping);
23696 /* We have popped either FP or SP.
23697 Move whichever one it is into the correct register. */
23698 popped_into = number_of_first_bit_set (regs_available_for_popping);
23699 move_to = number_of_first_bit_set (regs_to_pop);
23701 asm_fprintf (f, "\tmov\t%r, %r\n", move_to, popped_into);
23703 regs_to_pop &= ~(1 << move_to);
23705 --pops_needed;
23708 /* If we still have not popped everything then we must have only
23709 had one register available to us and we are now popping the SP. */
23710 if (pops_needed > 0)
23712 int popped_into;
23714 thumb_pop (f, regs_available_for_popping);
23716 popped_into = number_of_first_bit_set (regs_available_for_popping);
23718 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, popped_into);
23720 assert (regs_to_pop == (1 << STACK_POINTER))
23721 assert (pops_needed == 1)
23725 /* If necessary restore the a4 register. */
23726 if (restore_a4)
23728 if (reg_containing_return_addr != LR_REGNUM)
23730 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
23731 reg_containing_return_addr = LR_REGNUM;
23734 asm_fprintf (f, "\tmov\t%r, %r\n", LAST_ARG_REGNUM, IP_REGNUM);
23737 if (crtl->calls_eh_return)
23738 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
23740 /* Return to caller. */
23741 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
23744 /* Scan INSN just before assembler is output for it.
23745 For Thumb-1, we track the status of the condition codes; this
23746 information is used in the cbranchsi4_insn pattern. */
23747 void
23748 thumb1_final_prescan_insn (rtx_insn *insn)
23750 if (flag_print_asm_name)
23751 asm_fprintf (asm_out_file, "%@ 0x%04x\n",
23752 INSN_ADDRESSES (INSN_UID (insn)));
23753 /* Don't overwrite the previous setter when we get to a cbranch. */
23754 if (INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
23756 enum attr_conds conds;
23758 if (cfun->machine->thumb1_cc_insn)
23760 if (modified_in_p (cfun->machine->thumb1_cc_op0, insn)
23761 || modified_in_p (cfun->machine->thumb1_cc_op1, insn))
23762 CC_STATUS_INIT;
23764 conds = get_attr_conds (insn);
23765 if (conds == CONDS_SET)
23767 rtx set = single_set (insn);
23768 cfun->machine->thumb1_cc_insn = insn;
23769 cfun->machine->thumb1_cc_op0 = SET_DEST (set);
23770 cfun->machine->thumb1_cc_op1 = const0_rtx;
23771 cfun->machine->thumb1_cc_mode = CC_NOOVmode;
23772 if (INSN_CODE (insn) == CODE_FOR_thumb1_subsi3_insn)
23774 rtx src1 = XEXP (SET_SRC (set), 1);
23775 if (src1 == const0_rtx)
23776 cfun->machine->thumb1_cc_mode = CCmode;
23778 else if (REG_P (SET_DEST (set)) && REG_P (SET_SRC (set)))
23780 /* Record the src register operand instead of dest because
23781 cprop_hardreg pass propagates src. */
23782 cfun->machine->thumb1_cc_op0 = SET_SRC (set);
23785 else if (conds != CONDS_NOCOND)
23786 cfun->machine->thumb1_cc_insn = NULL_RTX;
23789 /* Check if unexpected far jump is used. */
23790 if (cfun->machine->lr_save_eliminated
23791 && get_attr_far_jump (insn) == FAR_JUMP_YES)
23792 internal_error("Unexpected thumb1 far jump");
23796 thumb_shiftable_const (unsigned HOST_WIDE_INT val)
23798 unsigned HOST_WIDE_INT mask = 0xff;
23799 int i;
23801 val = val & (unsigned HOST_WIDE_INT)0xffffffffu;
23802 if (val == 0) /* XXX */
23803 return 0;
23805 for (i = 0; i < 25; i++)
23806 if ((val & (mask << i)) == val)
23807 return 1;
23809 return 0;
23812 /* Returns nonzero if the current function contains,
23813 or might contain a far jump. */
23814 static int
23815 thumb_far_jump_used_p (void)
23817 rtx_insn *insn;
23818 bool far_jump = false;
23819 unsigned int func_size = 0;
23821 /* This test is only important for leaf functions. */
23822 /* assert (!leaf_function_p ()); */
23824 /* If we have already decided that far jumps may be used,
23825 do not bother checking again, and always return true even if
23826 it turns out that they are not being used. Once we have made
23827 the decision that far jumps are present (and that hence the link
23828 register will be pushed onto the stack) we cannot go back on it. */
23829 if (cfun->machine->far_jump_used)
23830 return 1;
23832 /* If this function is not being called from the prologue/epilogue
23833 generation code then it must be being called from the
23834 INITIAL_ELIMINATION_OFFSET macro. */
23835 if (!(ARM_DOUBLEWORD_ALIGN || reload_completed))
23837 /* In this case we know that we are being asked about the elimination
23838 of the arg pointer register. If that register is not being used,
23839 then there are no arguments on the stack, and we do not have to
23840 worry that a far jump might force the prologue to push the link
23841 register, changing the stack offsets. In this case we can just
23842 return false, since the presence of far jumps in the function will
23843 not affect stack offsets.
23845 If the arg pointer is live (or if it was live, but has now been
23846 eliminated and so set to dead) then we do have to test to see if
23847 the function might contain a far jump. This test can lead to some
23848 false negatives, since before reload is completed, then length of
23849 branch instructions is not known, so gcc defaults to returning their
23850 longest length, which in turn sets the far jump attribute to true.
23852 A false negative will not result in bad code being generated, but it
23853 will result in a needless push and pop of the link register. We
23854 hope that this does not occur too often.
23856 If we need doubleword stack alignment this could affect the other
23857 elimination offsets so we can't risk getting it wrong. */
23858 if (df_regs_ever_live_p (ARG_POINTER_REGNUM))
23859 cfun->machine->arg_pointer_live = 1;
23860 else if (!cfun->machine->arg_pointer_live)
23861 return 0;
23864 /* We should not change far_jump_used during or after reload, as there is
23865 no chance to change stack frame layout. */
23866 if (reload_in_progress || reload_completed)
23867 return 0;
23869 /* Check to see if the function contains a branch
23870 insn with the far jump attribute set. */
23871 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
23873 if (JUMP_P (insn) && get_attr_far_jump (insn) == FAR_JUMP_YES)
23875 far_jump = true;
23877 func_size += get_attr_length (insn);
23880 /* Attribute far_jump will always be true for thumb1 before
23881 shorten_branch pass. So checking far_jump attribute before
23882 shorten_branch isn't much useful.
23884 Following heuristic tries to estimate more accurately if a far jump
23885 may finally be used. The heuristic is very conservative as there is
23886 no chance to roll-back the decision of not to use far jump.
23888 Thumb1 long branch offset is -2048 to 2046. The worst case is each
23889 2-byte insn is associated with a 4 byte constant pool. Using
23890 function size 2048/3 as the threshold is conservative enough. */
23891 if (far_jump)
23893 if ((func_size * 3) >= 2048)
23895 /* Record the fact that we have decided that
23896 the function does use far jumps. */
23897 cfun->machine->far_jump_used = 1;
23898 return 1;
23902 return 0;
23905 /* Return nonzero if FUNC must be entered in ARM mode. */
23906 static bool
23907 is_called_in_ARM_mode (tree func)
23909 gcc_assert (TREE_CODE (func) == FUNCTION_DECL);
23911 /* Ignore the problem about functions whose address is taken. */
23912 if (TARGET_CALLEE_INTERWORKING && TREE_PUBLIC (func))
23913 return true;
23915 #ifdef ARM_PE
23916 return lookup_attribute ("interfacearm", DECL_ATTRIBUTES (func)) != NULL_TREE;
23917 #else
23918 return false;
23919 #endif
23922 /* Given the stack offsets and register mask in OFFSETS, decide how
23923 many additional registers to push instead of subtracting a constant
23924 from SP. For epilogues the principle is the same except we use pop.
23925 FOR_PROLOGUE indicates which we're generating. */
23926 static int
23927 thumb1_extra_regs_pushed (arm_stack_offsets *offsets, bool for_prologue)
23929 HOST_WIDE_INT amount;
23930 unsigned long live_regs_mask = offsets->saved_regs_mask;
23931 /* Extract a mask of the ones we can give to the Thumb's push/pop
23932 instruction. */
23933 unsigned long l_mask = live_regs_mask & (for_prologue ? 0x40ff : 0xff);
23934 /* Then count how many other high registers will need to be pushed. */
23935 unsigned long high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
23936 int n_free, reg_base, size;
23938 if (!for_prologue && frame_pointer_needed)
23939 amount = offsets->locals_base - offsets->saved_regs;
23940 else
23941 amount = offsets->outgoing_args - offsets->saved_regs;
23943 /* If the stack frame size is 512 exactly, we can save one load
23944 instruction, which should make this a win even when optimizing
23945 for speed. */
23946 if (!optimize_size && amount != 512)
23947 return 0;
23949 /* Can't do this if there are high registers to push. */
23950 if (high_regs_pushed != 0)
23951 return 0;
23953 /* Shouldn't do it in the prologue if no registers would normally
23954 be pushed at all. In the epilogue, also allow it if we'll have
23955 a pop insn for the PC. */
23956 if (l_mask == 0
23957 && (for_prologue
23958 || TARGET_BACKTRACE
23959 || (live_regs_mask & 1 << LR_REGNUM) == 0
23960 || TARGET_INTERWORK
23961 || crtl->args.pretend_args_size != 0))
23962 return 0;
23964 /* Don't do this if thumb_expand_prologue wants to emit instructions
23965 between the push and the stack frame allocation. */
23966 if (for_prologue
23967 && ((flag_pic && arm_pic_register != INVALID_REGNUM)
23968 || (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)))
23969 return 0;
23971 reg_base = 0;
23972 n_free = 0;
23973 if (!for_prologue)
23975 size = arm_size_return_regs ();
23976 reg_base = ARM_NUM_INTS (size);
23977 live_regs_mask >>= reg_base;
23980 while (reg_base + n_free < 8 && !(live_regs_mask & 1)
23981 && (for_prologue || call_used_regs[reg_base + n_free]))
23983 live_regs_mask >>= 1;
23984 n_free++;
23987 if (n_free == 0)
23988 return 0;
23989 gcc_assert (amount / 4 * 4 == amount);
23991 if (amount >= 512 && (amount - n_free * 4) < 512)
23992 return (amount - 508) / 4;
23993 if (amount <= n_free * 4)
23994 return amount / 4;
23995 return 0;
23998 /* The bits which aren't usefully expanded as rtl. */
23999 const char *
24000 thumb1_unexpanded_epilogue (void)
24002 arm_stack_offsets *offsets;
24003 int regno;
24004 unsigned long live_regs_mask = 0;
24005 int high_regs_pushed = 0;
24006 int extra_pop;
24007 int had_to_push_lr;
24008 int size;
24010 if (cfun->machine->return_used_this_function != 0)
24011 return "";
24013 if (IS_NAKED (arm_current_func_type ()))
24014 return "";
24016 offsets = arm_get_frame_offsets ();
24017 live_regs_mask = offsets->saved_regs_mask;
24018 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
24020 /* If we can deduce the registers used from the function's return value.
24021 This is more reliable that examining df_regs_ever_live_p () because that
24022 will be set if the register is ever used in the function, not just if
24023 the register is used to hold a return value. */
24024 size = arm_size_return_regs ();
24026 extra_pop = thumb1_extra_regs_pushed (offsets, false);
24027 if (extra_pop > 0)
24029 unsigned long extra_mask = (1 << extra_pop) - 1;
24030 live_regs_mask |= extra_mask << ARM_NUM_INTS (size);
24033 /* The prolog may have pushed some high registers to use as
24034 work registers. e.g. the testsuite file:
24035 gcc/testsuite/gcc/gcc.c-torture/execute/complex-2.c
24036 compiles to produce:
24037 push {r4, r5, r6, r7, lr}
24038 mov r7, r9
24039 mov r6, r8
24040 push {r6, r7}
24041 as part of the prolog. We have to undo that pushing here. */
24043 if (high_regs_pushed)
24045 unsigned long mask = live_regs_mask & 0xff;
24046 int next_hi_reg;
24048 /* The available low registers depend on the size of the value we are
24049 returning. */
24050 if (size <= 12)
24051 mask |= 1 << 3;
24052 if (size <= 8)
24053 mask |= 1 << 2;
24055 if (mask == 0)
24056 /* Oh dear! We have no low registers into which we can pop
24057 high registers! */
24058 internal_error
24059 ("no low registers available for popping high registers");
24061 for (next_hi_reg = 8; next_hi_reg < 13; next_hi_reg++)
24062 if (live_regs_mask & (1 << next_hi_reg))
24063 break;
24065 while (high_regs_pushed)
24067 /* Find lo register(s) into which the high register(s) can
24068 be popped. */
24069 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
24071 if (mask & (1 << regno))
24072 high_regs_pushed--;
24073 if (high_regs_pushed == 0)
24074 break;
24077 mask &= (2 << regno) - 1; /* A noop if regno == 8 */
24079 /* Pop the values into the low register(s). */
24080 thumb_pop (asm_out_file, mask);
24082 /* Move the value(s) into the high registers. */
24083 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
24085 if (mask & (1 << regno))
24087 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", next_hi_reg,
24088 regno);
24090 for (next_hi_reg++; next_hi_reg < 13; next_hi_reg++)
24091 if (live_regs_mask & (1 << next_hi_reg))
24092 break;
24096 live_regs_mask &= ~0x0f00;
24099 had_to_push_lr = (live_regs_mask & (1 << LR_REGNUM)) != 0;
24100 live_regs_mask &= 0xff;
24102 if (crtl->args.pretend_args_size == 0 || TARGET_BACKTRACE)
24104 /* Pop the return address into the PC. */
24105 if (had_to_push_lr)
24106 live_regs_mask |= 1 << PC_REGNUM;
24108 /* Either no argument registers were pushed or a backtrace
24109 structure was created which includes an adjusted stack
24110 pointer, so just pop everything. */
24111 if (live_regs_mask)
24112 thumb_pop (asm_out_file, live_regs_mask);
24114 /* We have either just popped the return address into the
24115 PC or it is was kept in LR for the entire function.
24116 Note that thumb_pop has already called thumb_exit if the
24117 PC was in the list. */
24118 if (!had_to_push_lr)
24119 thumb_exit (asm_out_file, LR_REGNUM);
24121 else
24123 /* Pop everything but the return address. */
24124 if (live_regs_mask)
24125 thumb_pop (asm_out_file, live_regs_mask);
24127 if (had_to_push_lr)
24129 if (size > 12)
24131 /* We have no free low regs, so save one. */
24132 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", IP_REGNUM,
24133 LAST_ARG_REGNUM);
24136 /* Get the return address into a temporary register. */
24137 thumb_pop (asm_out_file, 1 << LAST_ARG_REGNUM);
24139 if (size > 12)
24141 /* Move the return address to lr. */
24142 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LR_REGNUM,
24143 LAST_ARG_REGNUM);
24144 /* Restore the low register. */
24145 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LAST_ARG_REGNUM,
24146 IP_REGNUM);
24147 regno = LR_REGNUM;
24149 else
24150 regno = LAST_ARG_REGNUM;
24152 else
24153 regno = LR_REGNUM;
24155 /* Remove the argument registers that were pushed onto the stack. */
24156 asm_fprintf (asm_out_file, "\tadd\t%r, %r, #%d\n",
24157 SP_REGNUM, SP_REGNUM,
24158 crtl->args.pretend_args_size);
24160 thumb_exit (asm_out_file, regno);
24163 return "";
24166 /* Functions to save and restore machine-specific function data. */
24167 static struct machine_function *
24168 arm_init_machine_status (void)
24170 struct machine_function *machine;
24171 machine = ggc_cleared_alloc<machine_function> ();
24173 #if ARM_FT_UNKNOWN != 0
24174 machine->func_type = ARM_FT_UNKNOWN;
24175 #endif
24176 return machine;
24179 /* Return an RTX indicating where the return address to the
24180 calling function can be found. */
24182 arm_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
24184 if (count != 0)
24185 return NULL_RTX;
24187 return get_hard_reg_initial_val (Pmode, LR_REGNUM);
24190 /* Do anything needed before RTL is emitted for each function. */
24191 void
24192 arm_init_expanders (void)
24194 /* Arrange to initialize and mark the machine per-function status. */
24195 init_machine_status = arm_init_machine_status;
24197 /* This is to stop the combine pass optimizing away the alignment
24198 adjustment of va_arg. */
24199 /* ??? It is claimed that this should not be necessary. */
24200 if (cfun)
24201 mark_reg_pointer (arg_pointer_rtx, PARM_BOUNDARY);
24205 /* Like arm_compute_initial_elimination offset. Simpler because there
24206 isn't an ABI specified frame pointer for Thumb. Instead, we set it
24207 to point at the base of the local variables after static stack
24208 space for a function has been allocated. */
24210 HOST_WIDE_INT
24211 thumb_compute_initial_elimination_offset (unsigned int from, unsigned int to)
24213 arm_stack_offsets *offsets;
24215 offsets = arm_get_frame_offsets ();
24217 switch (from)
24219 case ARG_POINTER_REGNUM:
24220 switch (to)
24222 case STACK_POINTER_REGNUM:
24223 return offsets->outgoing_args - offsets->saved_args;
24225 case FRAME_POINTER_REGNUM:
24226 return offsets->soft_frame - offsets->saved_args;
24228 case ARM_HARD_FRAME_POINTER_REGNUM:
24229 return offsets->saved_regs - offsets->saved_args;
24231 case THUMB_HARD_FRAME_POINTER_REGNUM:
24232 return offsets->locals_base - offsets->saved_args;
24234 default:
24235 gcc_unreachable ();
24237 break;
24239 case FRAME_POINTER_REGNUM:
24240 switch (to)
24242 case STACK_POINTER_REGNUM:
24243 return offsets->outgoing_args - offsets->soft_frame;
24245 case ARM_HARD_FRAME_POINTER_REGNUM:
24246 return offsets->saved_regs - offsets->soft_frame;
24248 case THUMB_HARD_FRAME_POINTER_REGNUM:
24249 return offsets->locals_base - offsets->soft_frame;
24251 default:
24252 gcc_unreachable ();
24254 break;
24256 default:
24257 gcc_unreachable ();
24261 /* Generate the function's prologue. */
24263 void
24264 thumb1_expand_prologue (void)
24266 rtx_insn *insn;
24268 HOST_WIDE_INT amount;
24269 arm_stack_offsets *offsets;
24270 unsigned long func_type;
24271 int regno;
24272 unsigned long live_regs_mask;
24273 unsigned long l_mask;
24274 unsigned high_regs_pushed = 0;
24276 func_type = arm_current_func_type ();
24278 /* Naked functions don't have prologues. */
24279 if (IS_NAKED (func_type))
24280 return;
24282 if (IS_INTERRUPT (func_type))
24284 error ("interrupt Service Routines cannot be coded in Thumb mode");
24285 return;
24288 if (is_called_in_ARM_mode (current_function_decl))
24289 emit_insn (gen_prologue_thumb1_interwork ());
24291 offsets = arm_get_frame_offsets ();
24292 live_regs_mask = offsets->saved_regs_mask;
24294 /* Extract a mask of the ones we can give to the Thumb's push instruction. */
24295 l_mask = live_regs_mask & 0x40ff;
24296 /* Then count how many other high registers will need to be pushed. */
24297 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
24299 if (crtl->args.pretend_args_size)
24301 rtx x = GEN_INT (-crtl->args.pretend_args_size);
24303 if (cfun->machine->uses_anonymous_args)
24305 int num_pushes = ARM_NUM_INTS (crtl->args.pretend_args_size);
24306 unsigned long mask;
24308 mask = 1ul << (LAST_ARG_REGNUM + 1);
24309 mask -= 1ul << (LAST_ARG_REGNUM + 1 - num_pushes);
24311 insn = thumb1_emit_multi_reg_push (mask, 0);
24313 else
24315 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
24316 stack_pointer_rtx, x));
24318 RTX_FRAME_RELATED_P (insn) = 1;
24321 if (TARGET_BACKTRACE)
24323 HOST_WIDE_INT offset = 0;
24324 unsigned work_register;
24325 rtx work_reg, x, arm_hfp_rtx;
24327 /* We have been asked to create a stack backtrace structure.
24328 The code looks like this:
24330 0 .align 2
24331 0 func:
24332 0 sub SP, #16 Reserve space for 4 registers.
24333 2 push {R7} Push low registers.
24334 4 add R7, SP, #20 Get the stack pointer before the push.
24335 6 str R7, [SP, #8] Store the stack pointer
24336 (before reserving the space).
24337 8 mov R7, PC Get hold of the start of this code + 12.
24338 10 str R7, [SP, #16] Store it.
24339 12 mov R7, FP Get hold of the current frame pointer.
24340 14 str R7, [SP, #4] Store it.
24341 16 mov R7, LR Get hold of the current return address.
24342 18 str R7, [SP, #12] Store it.
24343 20 add R7, SP, #16 Point at the start of the
24344 backtrace structure.
24345 22 mov FP, R7 Put this value into the frame pointer. */
24347 work_register = thumb_find_work_register (live_regs_mask);
24348 work_reg = gen_rtx_REG (SImode, work_register);
24349 arm_hfp_rtx = gen_rtx_REG (SImode, ARM_HARD_FRAME_POINTER_REGNUM);
24351 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
24352 stack_pointer_rtx, GEN_INT (-16)));
24353 RTX_FRAME_RELATED_P (insn) = 1;
24355 if (l_mask)
24357 insn = thumb1_emit_multi_reg_push (l_mask, l_mask);
24358 RTX_FRAME_RELATED_P (insn) = 1;
24360 offset = bit_count (l_mask) * UNITS_PER_WORD;
24363 x = GEN_INT (offset + 16 + crtl->args.pretend_args_size);
24364 emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
24366 x = plus_constant (Pmode, stack_pointer_rtx, offset + 4);
24367 x = gen_frame_mem (SImode, x);
24368 emit_move_insn (x, work_reg);
24370 /* Make sure that the instruction fetching the PC is in the right place
24371 to calculate "start of backtrace creation code + 12". */
24372 /* ??? The stores using the common WORK_REG ought to be enough to
24373 prevent the scheduler from doing anything weird. Failing that
24374 we could always move all of the following into an UNSPEC_VOLATILE. */
24375 if (l_mask)
24377 x = gen_rtx_REG (SImode, PC_REGNUM);
24378 emit_move_insn (work_reg, x);
24380 x = plus_constant (Pmode, stack_pointer_rtx, offset + 12);
24381 x = gen_frame_mem (SImode, x);
24382 emit_move_insn (x, work_reg);
24384 emit_move_insn (work_reg, arm_hfp_rtx);
24386 x = plus_constant (Pmode, stack_pointer_rtx, offset);
24387 x = gen_frame_mem (SImode, x);
24388 emit_move_insn (x, work_reg);
24390 else
24392 emit_move_insn (work_reg, arm_hfp_rtx);
24394 x = plus_constant (Pmode, stack_pointer_rtx, offset);
24395 x = gen_frame_mem (SImode, x);
24396 emit_move_insn (x, work_reg);
24398 x = gen_rtx_REG (SImode, PC_REGNUM);
24399 emit_move_insn (work_reg, x);
24401 x = plus_constant (Pmode, stack_pointer_rtx, offset + 12);
24402 x = gen_frame_mem (SImode, x);
24403 emit_move_insn (x, work_reg);
24406 x = gen_rtx_REG (SImode, LR_REGNUM);
24407 emit_move_insn (work_reg, x);
24409 x = plus_constant (Pmode, stack_pointer_rtx, offset + 8);
24410 x = gen_frame_mem (SImode, x);
24411 emit_move_insn (x, work_reg);
24413 x = GEN_INT (offset + 12);
24414 emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
24416 emit_move_insn (arm_hfp_rtx, work_reg);
24418 /* Optimization: If we are not pushing any low registers but we are going
24419 to push some high registers then delay our first push. This will just
24420 be a push of LR and we can combine it with the push of the first high
24421 register. */
24422 else if ((l_mask & 0xff) != 0
24423 || (high_regs_pushed == 0 && l_mask))
24425 unsigned long mask = l_mask;
24426 mask |= (1 << thumb1_extra_regs_pushed (offsets, true)) - 1;
24427 insn = thumb1_emit_multi_reg_push (mask, mask);
24428 RTX_FRAME_RELATED_P (insn) = 1;
24431 if (high_regs_pushed)
24433 unsigned pushable_regs;
24434 unsigned next_hi_reg;
24435 unsigned arg_regs_num = TARGET_AAPCS_BASED ? crtl->args.info.aapcs_ncrn
24436 : crtl->args.info.nregs;
24437 unsigned arg_regs_mask = (1 << arg_regs_num) - 1;
24439 for (next_hi_reg = 12; next_hi_reg > LAST_LO_REGNUM; next_hi_reg--)
24440 if (live_regs_mask & (1 << next_hi_reg))
24441 break;
24443 /* Here we need to mask out registers used for passing arguments
24444 even if they can be pushed. This is to avoid using them to stash the high
24445 registers. Such kind of stash may clobber the use of arguments. */
24446 pushable_regs = l_mask & (~arg_regs_mask) & 0xff;
24448 if (pushable_regs == 0)
24449 pushable_regs = 1 << thumb_find_work_register (live_regs_mask);
24451 while (high_regs_pushed > 0)
24453 unsigned long real_regs_mask = 0;
24455 for (regno = LAST_LO_REGNUM; regno >= 0; regno --)
24457 if (pushable_regs & (1 << regno))
24459 emit_move_insn (gen_rtx_REG (SImode, regno),
24460 gen_rtx_REG (SImode, next_hi_reg));
24462 high_regs_pushed --;
24463 real_regs_mask |= (1 << next_hi_reg);
24465 if (high_regs_pushed)
24467 for (next_hi_reg --; next_hi_reg > LAST_LO_REGNUM;
24468 next_hi_reg --)
24469 if (live_regs_mask & (1 << next_hi_reg))
24470 break;
24472 else
24474 pushable_regs &= ~((1 << regno) - 1);
24475 break;
24480 /* If we had to find a work register and we have not yet
24481 saved the LR then add it to the list of regs to push. */
24482 if (l_mask == (1 << LR_REGNUM))
24484 pushable_regs |= l_mask;
24485 real_regs_mask |= l_mask;
24486 l_mask = 0;
24489 insn = thumb1_emit_multi_reg_push (pushable_regs, real_regs_mask);
24490 RTX_FRAME_RELATED_P (insn) = 1;
24494 /* Load the pic register before setting the frame pointer,
24495 so we can use r7 as a temporary work register. */
24496 if (flag_pic && arm_pic_register != INVALID_REGNUM)
24497 arm_load_pic_register (live_regs_mask);
24499 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
24500 emit_move_insn (gen_rtx_REG (Pmode, ARM_HARD_FRAME_POINTER_REGNUM),
24501 stack_pointer_rtx);
24503 if (flag_stack_usage_info)
24504 current_function_static_stack_size
24505 = offsets->outgoing_args - offsets->saved_args;
24507 amount = offsets->outgoing_args - offsets->saved_regs;
24508 amount -= 4 * thumb1_extra_regs_pushed (offsets, true);
24509 if (amount)
24511 if (amount < 512)
24513 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
24514 GEN_INT (- amount)));
24515 RTX_FRAME_RELATED_P (insn) = 1;
24517 else
24519 rtx reg, dwarf;
24521 /* The stack decrement is too big for an immediate value in a single
24522 insn. In theory we could issue multiple subtracts, but after
24523 three of them it becomes more space efficient to place the full
24524 value in the constant pool and load into a register. (Also the
24525 ARM debugger really likes to see only one stack decrement per
24526 function). So instead we look for a scratch register into which
24527 we can load the decrement, and then we subtract this from the
24528 stack pointer. Unfortunately on the thumb the only available
24529 scratch registers are the argument registers, and we cannot use
24530 these as they may hold arguments to the function. Instead we
24531 attempt to locate a call preserved register which is used by this
24532 function. If we can find one, then we know that it will have
24533 been pushed at the start of the prologue and so we can corrupt
24534 it now. */
24535 for (regno = LAST_ARG_REGNUM + 1; regno <= LAST_LO_REGNUM; regno++)
24536 if (live_regs_mask & (1 << regno))
24537 break;
24539 gcc_assert(regno <= LAST_LO_REGNUM);
24541 reg = gen_rtx_REG (SImode, regno);
24543 emit_insn (gen_movsi (reg, GEN_INT (- amount)));
24545 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
24546 stack_pointer_rtx, reg));
24548 dwarf = gen_rtx_SET (stack_pointer_rtx,
24549 plus_constant (Pmode, stack_pointer_rtx,
24550 -amount));
24551 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
24552 RTX_FRAME_RELATED_P (insn) = 1;
24556 if (frame_pointer_needed)
24557 thumb_set_frame_pointer (offsets);
24559 /* If we are profiling, make sure no instructions are scheduled before
24560 the call to mcount. Similarly if the user has requested no
24561 scheduling in the prolog. Similarly if we want non-call exceptions
24562 using the EABI unwinder, to prevent faulting instructions from being
24563 swapped with a stack adjustment. */
24564 if (crtl->profile || !TARGET_SCHED_PROLOG
24565 || (arm_except_unwind_info (&global_options) == UI_TARGET
24566 && cfun->can_throw_non_call_exceptions))
24567 emit_insn (gen_blockage ());
24569 cfun->machine->lr_save_eliminated = !thumb_force_lr_save ();
24570 if (live_regs_mask & 0xff)
24571 cfun->machine->lr_save_eliminated = 0;
24574 /* Generate pattern *pop_multiple_with_stack_update_and_return if single
24575 POP instruction can be generated. LR should be replaced by PC. All
24576 the checks required are already done by USE_RETURN_INSN (). Hence,
24577 all we really need to check here is if single register is to be
24578 returned, or multiple register return. */
24579 void
24580 thumb2_expand_return (bool simple_return)
24582 int i, num_regs;
24583 unsigned long saved_regs_mask;
24584 arm_stack_offsets *offsets;
24586 offsets = arm_get_frame_offsets ();
24587 saved_regs_mask = offsets->saved_regs_mask;
24589 for (i = 0, num_regs = 0; i <= LAST_ARM_REGNUM; i++)
24590 if (saved_regs_mask & (1 << i))
24591 num_regs++;
24593 if (!simple_return && saved_regs_mask)
24595 if (num_regs == 1)
24597 rtx par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
24598 rtx reg = gen_rtx_REG (SImode, PC_REGNUM);
24599 rtx addr = gen_rtx_MEM (SImode,
24600 gen_rtx_POST_INC (SImode,
24601 stack_pointer_rtx));
24602 set_mem_alias_set (addr, get_frame_alias_set ());
24603 XVECEXP (par, 0, 0) = ret_rtx;
24604 XVECEXP (par, 0, 1) = gen_rtx_SET (reg, addr);
24605 RTX_FRAME_RELATED_P (XVECEXP (par, 0, 1)) = 1;
24606 emit_jump_insn (par);
24608 else
24610 saved_regs_mask &= ~ (1 << LR_REGNUM);
24611 saved_regs_mask |= (1 << PC_REGNUM);
24612 arm_emit_multi_reg_pop (saved_regs_mask);
24615 else
24617 emit_jump_insn (simple_return_rtx);
24621 void
24622 thumb1_expand_epilogue (void)
24624 HOST_WIDE_INT amount;
24625 arm_stack_offsets *offsets;
24626 int regno;
24628 /* Naked functions don't have prologues. */
24629 if (IS_NAKED (arm_current_func_type ()))
24630 return;
24632 offsets = arm_get_frame_offsets ();
24633 amount = offsets->outgoing_args - offsets->saved_regs;
24635 if (frame_pointer_needed)
24637 emit_insn (gen_movsi (stack_pointer_rtx, hard_frame_pointer_rtx));
24638 amount = offsets->locals_base - offsets->saved_regs;
24640 amount -= 4 * thumb1_extra_regs_pushed (offsets, false);
24642 gcc_assert (amount >= 0);
24643 if (amount)
24645 emit_insn (gen_blockage ());
24647 if (amount < 512)
24648 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
24649 GEN_INT (amount)));
24650 else
24652 /* r3 is always free in the epilogue. */
24653 rtx reg = gen_rtx_REG (SImode, LAST_ARG_REGNUM);
24655 emit_insn (gen_movsi (reg, GEN_INT (amount)));
24656 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, reg));
24660 /* Emit a USE (stack_pointer_rtx), so that
24661 the stack adjustment will not be deleted. */
24662 emit_insn (gen_force_register_use (stack_pointer_rtx));
24664 if (crtl->profile || !TARGET_SCHED_PROLOG)
24665 emit_insn (gen_blockage ());
24667 /* Emit a clobber for each insn that will be restored in the epilogue,
24668 so that flow2 will get register lifetimes correct. */
24669 for (regno = 0; regno < 13; regno++)
24670 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
24671 emit_clobber (gen_rtx_REG (SImode, regno));
24673 if (! df_regs_ever_live_p (LR_REGNUM))
24674 emit_use (gen_rtx_REG (SImode, LR_REGNUM));
24677 /* Epilogue code for APCS frame. */
24678 static void
24679 arm_expand_epilogue_apcs_frame (bool really_return)
24681 unsigned long func_type;
24682 unsigned long saved_regs_mask;
24683 int num_regs = 0;
24684 int i;
24685 int floats_from_frame = 0;
24686 arm_stack_offsets *offsets;
24688 gcc_assert (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM);
24689 func_type = arm_current_func_type ();
24691 /* Get frame offsets for ARM. */
24692 offsets = arm_get_frame_offsets ();
24693 saved_regs_mask = offsets->saved_regs_mask;
24695 /* Find the offset of the floating-point save area in the frame. */
24696 floats_from_frame
24697 = (offsets->saved_args
24698 + arm_compute_static_chain_stack_bytes ()
24699 - offsets->frame);
24701 /* Compute how many core registers saved and how far away the floats are. */
24702 for (i = 0; i <= LAST_ARM_REGNUM; i++)
24703 if (saved_regs_mask & (1 << i))
24705 num_regs++;
24706 floats_from_frame += 4;
24709 if (TARGET_HARD_FLOAT && TARGET_VFP)
24711 int start_reg;
24712 rtx ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
24714 /* The offset is from IP_REGNUM. */
24715 int saved_size = arm_get_vfp_saved_size ();
24716 if (saved_size > 0)
24718 rtx_insn *insn;
24719 floats_from_frame += saved_size;
24720 insn = emit_insn (gen_addsi3 (ip_rtx,
24721 hard_frame_pointer_rtx,
24722 GEN_INT (-floats_from_frame)));
24723 arm_add_cfa_adjust_cfa_note (insn, -floats_from_frame,
24724 ip_rtx, hard_frame_pointer_rtx);
24727 /* Generate VFP register multi-pop. */
24728 start_reg = FIRST_VFP_REGNUM;
24730 for (i = FIRST_VFP_REGNUM; i < LAST_VFP_REGNUM; i += 2)
24731 /* Look for a case where a reg does not need restoring. */
24732 if ((!df_regs_ever_live_p (i) || call_used_regs[i])
24733 && (!df_regs_ever_live_p (i + 1)
24734 || call_used_regs[i + 1]))
24736 if (start_reg != i)
24737 arm_emit_vfp_multi_reg_pop (start_reg,
24738 (i - start_reg) / 2,
24739 gen_rtx_REG (SImode,
24740 IP_REGNUM));
24741 start_reg = i + 2;
24744 /* Restore the remaining regs that we have discovered (or possibly
24745 even all of them, if the conditional in the for loop never
24746 fired). */
24747 if (start_reg != i)
24748 arm_emit_vfp_multi_reg_pop (start_reg,
24749 (i - start_reg) / 2,
24750 gen_rtx_REG (SImode, IP_REGNUM));
24753 if (TARGET_IWMMXT)
24755 /* The frame pointer is guaranteed to be non-double-word aligned, as
24756 it is set to double-word-aligned old_stack_pointer - 4. */
24757 rtx_insn *insn;
24758 int lrm_count = (num_regs % 2) ? (num_regs + 2) : (num_regs + 1);
24760 for (i = LAST_IWMMXT_REGNUM; i >= FIRST_IWMMXT_REGNUM; i--)
24761 if (df_regs_ever_live_p (i) && !call_used_regs[i])
24763 rtx addr = gen_frame_mem (V2SImode,
24764 plus_constant (Pmode, hard_frame_pointer_rtx,
24765 - lrm_count * 4));
24766 insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
24767 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
24768 gen_rtx_REG (V2SImode, i),
24769 NULL_RTX);
24770 lrm_count += 2;
24774 /* saved_regs_mask should contain IP which contains old stack pointer
24775 at the time of activation creation. Since SP and IP are adjacent registers,
24776 we can restore the value directly into SP. */
24777 gcc_assert (saved_regs_mask & (1 << IP_REGNUM));
24778 saved_regs_mask &= ~(1 << IP_REGNUM);
24779 saved_regs_mask |= (1 << SP_REGNUM);
24781 /* There are two registers left in saved_regs_mask - LR and PC. We
24782 only need to restore LR (the return address), but to
24783 save time we can load it directly into PC, unless we need a
24784 special function exit sequence, or we are not really returning. */
24785 if (really_return
24786 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
24787 && !crtl->calls_eh_return)
24788 /* Delete LR from the register mask, so that LR on
24789 the stack is loaded into the PC in the register mask. */
24790 saved_regs_mask &= ~(1 << LR_REGNUM);
24791 else
24792 saved_regs_mask &= ~(1 << PC_REGNUM);
24794 num_regs = bit_count (saved_regs_mask);
24795 if ((offsets->outgoing_args != (1 + num_regs)) || cfun->calls_alloca)
24797 rtx_insn *insn;
24798 emit_insn (gen_blockage ());
24799 /* Unwind the stack to just below the saved registers. */
24800 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
24801 hard_frame_pointer_rtx,
24802 GEN_INT (- 4 * num_regs)));
24804 arm_add_cfa_adjust_cfa_note (insn, - 4 * num_regs,
24805 stack_pointer_rtx, hard_frame_pointer_rtx);
24808 arm_emit_multi_reg_pop (saved_regs_mask);
24810 if (IS_INTERRUPT (func_type))
24812 /* Interrupt handlers will have pushed the
24813 IP onto the stack, so restore it now. */
24814 rtx_insn *insn;
24815 rtx addr = gen_rtx_MEM (SImode,
24816 gen_rtx_POST_INC (SImode,
24817 stack_pointer_rtx));
24818 set_mem_alias_set (addr, get_frame_alias_set ());
24819 insn = emit_insn (gen_movsi (gen_rtx_REG (SImode, IP_REGNUM), addr));
24820 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
24821 gen_rtx_REG (SImode, IP_REGNUM),
24822 NULL_RTX);
24825 if (!really_return || (saved_regs_mask & (1 << PC_REGNUM)))
24826 return;
24828 if (crtl->calls_eh_return)
24829 emit_insn (gen_addsi3 (stack_pointer_rtx,
24830 stack_pointer_rtx,
24831 gen_rtx_REG (SImode, ARM_EH_STACKADJ_REGNUM)));
24833 if (IS_STACKALIGN (func_type))
24834 /* Restore the original stack pointer. Before prologue, the stack was
24835 realigned and the original stack pointer saved in r0. For details,
24836 see comment in arm_expand_prologue. */
24837 emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, R0_REGNUM)));
24839 emit_jump_insn (simple_return_rtx);
24842 /* Generate RTL to represent ARM epilogue. Really_return is true if the
24843 function is not a sibcall. */
24844 void
24845 arm_expand_epilogue (bool really_return)
24847 unsigned long func_type;
24848 unsigned long saved_regs_mask;
24849 int num_regs = 0;
24850 int i;
24851 int amount;
24852 arm_stack_offsets *offsets;
24854 func_type = arm_current_func_type ();
24856 /* Naked functions don't have epilogue. Hence, generate return pattern, and
24857 let output_return_instruction take care of instruction emission if any. */
24858 if (IS_NAKED (func_type)
24859 || (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN))
24861 if (really_return)
24862 emit_jump_insn (simple_return_rtx);
24863 return;
24866 /* If we are throwing an exception, then we really must be doing a
24867 return, so we can't tail-call. */
24868 gcc_assert (!crtl->calls_eh_return || really_return);
24870 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
24872 arm_expand_epilogue_apcs_frame (really_return);
24873 return;
24876 /* Get frame offsets for ARM. */
24877 offsets = arm_get_frame_offsets ();
24878 saved_regs_mask = offsets->saved_regs_mask;
24879 num_regs = bit_count (saved_regs_mask);
24881 if (frame_pointer_needed)
24883 rtx_insn *insn;
24884 /* Restore stack pointer if necessary. */
24885 if (TARGET_ARM)
24887 /* In ARM mode, frame pointer points to first saved register.
24888 Restore stack pointer to last saved register. */
24889 amount = offsets->frame - offsets->saved_regs;
24891 /* Force out any pending memory operations that reference stacked data
24892 before stack de-allocation occurs. */
24893 emit_insn (gen_blockage ());
24894 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
24895 hard_frame_pointer_rtx,
24896 GEN_INT (amount)));
24897 arm_add_cfa_adjust_cfa_note (insn, amount,
24898 stack_pointer_rtx,
24899 hard_frame_pointer_rtx);
24901 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
24902 deleted. */
24903 emit_insn (gen_force_register_use (stack_pointer_rtx));
24905 else
24907 /* In Thumb-2 mode, the frame pointer points to the last saved
24908 register. */
24909 amount = offsets->locals_base - offsets->saved_regs;
24910 if (amount)
24912 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
24913 hard_frame_pointer_rtx,
24914 GEN_INT (amount)));
24915 arm_add_cfa_adjust_cfa_note (insn, amount,
24916 hard_frame_pointer_rtx,
24917 hard_frame_pointer_rtx);
24920 /* Force out any pending memory operations that reference stacked data
24921 before stack de-allocation occurs. */
24922 emit_insn (gen_blockage ());
24923 insn = emit_insn (gen_movsi (stack_pointer_rtx,
24924 hard_frame_pointer_rtx));
24925 arm_add_cfa_adjust_cfa_note (insn, 0,
24926 stack_pointer_rtx,
24927 hard_frame_pointer_rtx);
24928 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
24929 deleted. */
24930 emit_insn (gen_force_register_use (stack_pointer_rtx));
24933 else
24935 /* Pop off outgoing args and local frame to adjust stack pointer to
24936 last saved register. */
24937 amount = offsets->outgoing_args - offsets->saved_regs;
24938 if (amount)
24940 rtx_insn *tmp;
24941 /* Force out any pending memory operations that reference stacked data
24942 before stack de-allocation occurs. */
24943 emit_insn (gen_blockage ());
24944 tmp = emit_insn (gen_addsi3 (stack_pointer_rtx,
24945 stack_pointer_rtx,
24946 GEN_INT (amount)));
24947 arm_add_cfa_adjust_cfa_note (tmp, amount,
24948 stack_pointer_rtx, stack_pointer_rtx);
24949 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is
24950 not deleted. */
24951 emit_insn (gen_force_register_use (stack_pointer_rtx));
24955 if (TARGET_HARD_FLOAT && TARGET_VFP)
24957 /* Generate VFP register multi-pop. */
24958 int end_reg = LAST_VFP_REGNUM + 1;
24960 /* Scan the registers in reverse order. We need to match
24961 any groupings made in the prologue and generate matching
24962 vldm operations. The need to match groups is because,
24963 unlike pop, vldm can only do consecutive regs. */
24964 for (i = LAST_VFP_REGNUM - 1; i >= FIRST_VFP_REGNUM; i -= 2)
24965 /* Look for a case where a reg does not need restoring. */
24966 if ((!df_regs_ever_live_p (i) || call_used_regs[i])
24967 && (!df_regs_ever_live_p (i + 1)
24968 || call_used_regs[i + 1]))
24970 /* Restore the regs discovered so far (from reg+2 to
24971 end_reg). */
24972 if (end_reg > i + 2)
24973 arm_emit_vfp_multi_reg_pop (i + 2,
24974 (end_reg - (i + 2)) / 2,
24975 stack_pointer_rtx);
24976 end_reg = i;
24979 /* Restore the remaining regs that we have discovered (or possibly
24980 even all of them, if the conditional in the for loop never
24981 fired). */
24982 if (end_reg > i + 2)
24983 arm_emit_vfp_multi_reg_pop (i + 2,
24984 (end_reg - (i + 2)) / 2,
24985 stack_pointer_rtx);
24988 if (TARGET_IWMMXT)
24989 for (i = FIRST_IWMMXT_REGNUM; i <= LAST_IWMMXT_REGNUM; i++)
24990 if (df_regs_ever_live_p (i) && !call_used_regs[i])
24992 rtx_insn *insn;
24993 rtx addr = gen_rtx_MEM (V2SImode,
24994 gen_rtx_POST_INC (SImode,
24995 stack_pointer_rtx));
24996 set_mem_alias_set (addr, get_frame_alias_set ());
24997 insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
24998 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
24999 gen_rtx_REG (V2SImode, i),
25000 NULL_RTX);
25001 arm_add_cfa_adjust_cfa_note (insn, UNITS_PER_WORD,
25002 stack_pointer_rtx, stack_pointer_rtx);
25005 if (saved_regs_mask)
25007 rtx insn;
25008 bool return_in_pc = false;
25010 if (ARM_FUNC_TYPE (func_type) != ARM_FT_INTERWORKED
25011 && (TARGET_ARM || ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL)
25012 && !IS_STACKALIGN (func_type)
25013 && really_return
25014 && crtl->args.pretend_args_size == 0
25015 && saved_regs_mask & (1 << LR_REGNUM)
25016 && !crtl->calls_eh_return)
25018 saved_regs_mask &= ~(1 << LR_REGNUM);
25019 saved_regs_mask |= (1 << PC_REGNUM);
25020 return_in_pc = true;
25023 if (num_regs == 1 && (!IS_INTERRUPT (func_type) || !return_in_pc))
25025 for (i = 0; i <= LAST_ARM_REGNUM; i++)
25026 if (saved_regs_mask & (1 << i))
25028 rtx addr = gen_rtx_MEM (SImode,
25029 gen_rtx_POST_INC (SImode,
25030 stack_pointer_rtx));
25031 set_mem_alias_set (addr, get_frame_alias_set ());
25033 if (i == PC_REGNUM)
25035 insn = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
25036 XVECEXP (insn, 0, 0) = ret_rtx;
25037 XVECEXP (insn, 0, 1) = gen_rtx_SET (gen_rtx_REG (SImode, i),
25038 addr);
25039 RTX_FRAME_RELATED_P (XVECEXP (insn, 0, 1)) = 1;
25040 insn = emit_jump_insn (insn);
25042 else
25044 insn = emit_insn (gen_movsi (gen_rtx_REG (SImode, i),
25045 addr));
25046 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
25047 gen_rtx_REG (SImode, i),
25048 NULL_RTX);
25049 arm_add_cfa_adjust_cfa_note (insn, UNITS_PER_WORD,
25050 stack_pointer_rtx,
25051 stack_pointer_rtx);
25055 else
25057 if (TARGET_LDRD
25058 && current_tune->prefer_ldrd_strd
25059 && !optimize_function_for_size_p (cfun))
25061 if (TARGET_THUMB2)
25062 thumb2_emit_ldrd_pop (saved_regs_mask);
25063 else if (TARGET_ARM && !IS_INTERRUPT (func_type))
25064 arm_emit_ldrd_pop (saved_regs_mask);
25065 else
25066 arm_emit_multi_reg_pop (saved_regs_mask);
25068 else
25069 arm_emit_multi_reg_pop (saved_regs_mask);
25072 if (return_in_pc)
25073 return;
25076 if (crtl->args.pretend_args_size)
25078 int i, j;
25079 rtx dwarf = NULL_RTX;
25080 rtx_insn *tmp =
25081 emit_insn (gen_addsi3 (stack_pointer_rtx,
25082 stack_pointer_rtx,
25083 GEN_INT (crtl->args.pretend_args_size)));
25085 RTX_FRAME_RELATED_P (tmp) = 1;
25087 if (cfun->machine->uses_anonymous_args)
25089 /* Restore pretend args. Refer arm_expand_prologue on how to save
25090 pretend_args in stack. */
25091 int num_regs = crtl->args.pretend_args_size / 4;
25092 saved_regs_mask = (0xf0 >> num_regs) & 0xf;
25093 for (j = 0, i = 0; j < num_regs; i++)
25094 if (saved_regs_mask & (1 << i))
25096 rtx reg = gen_rtx_REG (SImode, i);
25097 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
25098 j++;
25100 REG_NOTES (tmp) = dwarf;
25102 arm_add_cfa_adjust_cfa_note (tmp, crtl->args.pretend_args_size,
25103 stack_pointer_rtx, stack_pointer_rtx);
25106 if (!really_return)
25107 return;
25109 if (crtl->calls_eh_return)
25110 emit_insn (gen_addsi3 (stack_pointer_rtx,
25111 stack_pointer_rtx,
25112 gen_rtx_REG (SImode, ARM_EH_STACKADJ_REGNUM)));
25114 if (IS_STACKALIGN (func_type))
25115 /* Restore the original stack pointer. Before prologue, the stack was
25116 realigned and the original stack pointer saved in r0. For details,
25117 see comment in arm_expand_prologue. */
25118 emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, R0_REGNUM)));
25120 emit_jump_insn (simple_return_rtx);
25123 /* Implementation of insn prologue_thumb1_interwork. This is the first
25124 "instruction" of a function called in ARM mode. Swap to thumb mode. */
25126 const char *
25127 thumb1_output_interwork (void)
25129 const char * name;
25130 FILE *f = asm_out_file;
25132 gcc_assert (MEM_P (DECL_RTL (current_function_decl)));
25133 gcc_assert (GET_CODE (XEXP (DECL_RTL (current_function_decl), 0))
25134 == SYMBOL_REF);
25135 name = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
25137 /* Generate code sequence to switch us into Thumb mode. */
25138 /* The .code 32 directive has already been emitted by
25139 ASM_DECLARE_FUNCTION_NAME. */
25140 asm_fprintf (f, "\torr\t%r, %r, #1\n", IP_REGNUM, PC_REGNUM);
25141 asm_fprintf (f, "\tbx\t%r\n", IP_REGNUM);
25143 /* Generate a label, so that the debugger will notice the
25144 change in instruction sets. This label is also used by
25145 the assembler to bypass the ARM code when this function
25146 is called from a Thumb encoded function elsewhere in the
25147 same file. Hence the definition of STUB_NAME here must
25148 agree with the definition in gas/config/tc-arm.c. */
25150 #define STUB_NAME ".real_start_of"
25152 fprintf (f, "\t.code\t16\n");
25153 #ifdef ARM_PE
25154 if (arm_dllexport_name_p (name))
25155 name = arm_strip_name_encoding (name);
25156 #endif
25157 asm_fprintf (f, "\t.globl %s%U%s\n", STUB_NAME, name);
25158 fprintf (f, "\t.thumb_func\n");
25159 asm_fprintf (f, "%s%U%s:\n", STUB_NAME, name);
25161 return "";
25164 /* Handle the case of a double word load into a low register from
25165 a computed memory address. The computed address may involve a
25166 register which is overwritten by the load. */
25167 const char *
25168 thumb_load_double_from_address (rtx *operands)
25170 rtx addr;
25171 rtx base;
25172 rtx offset;
25173 rtx arg1;
25174 rtx arg2;
25176 gcc_assert (REG_P (operands[0]));
25177 gcc_assert (MEM_P (operands[1]));
25179 /* Get the memory address. */
25180 addr = XEXP (operands[1], 0);
25182 /* Work out how the memory address is computed. */
25183 switch (GET_CODE (addr))
25185 case REG:
25186 operands[2] = adjust_address (operands[1], SImode, 4);
25188 if (REGNO (operands[0]) == REGNO (addr))
25190 output_asm_insn ("ldr\t%H0, %2", operands);
25191 output_asm_insn ("ldr\t%0, %1", operands);
25193 else
25195 output_asm_insn ("ldr\t%0, %1", operands);
25196 output_asm_insn ("ldr\t%H0, %2", operands);
25198 break;
25200 case CONST:
25201 /* Compute <address> + 4 for the high order load. */
25202 operands[2] = adjust_address (operands[1], SImode, 4);
25204 output_asm_insn ("ldr\t%0, %1", operands);
25205 output_asm_insn ("ldr\t%H0, %2", operands);
25206 break;
25208 case PLUS:
25209 arg1 = XEXP (addr, 0);
25210 arg2 = XEXP (addr, 1);
25212 if (CONSTANT_P (arg1))
25213 base = arg2, offset = arg1;
25214 else
25215 base = arg1, offset = arg2;
25217 gcc_assert (REG_P (base));
25219 /* Catch the case of <address> = <reg> + <reg> */
25220 if (REG_P (offset))
25222 int reg_offset = REGNO (offset);
25223 int reg_base = REGNO (base);
25224 int reg_dest = REGNO (operands[0]);
25226 /* Add the base and offset registers together into the
25227 higher destination register. */
25228 asm_fprintf (asm_out_file, "\tadd\t%r, %r, %r",
25229 reg_dest + 1, reg_base, reg_offset);
25231 /* Load the lower destination register from the address in
25232 the higher destination register. */
25233 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #0]",
25234 reg_dest, reg_dest + 1);
25236 /* Load the higher destination register from its own address
25237 plus 4. */
25238 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #4]",
25239 reg_dest + 1, reg_dest + 1);
25241 else
25243 /* Compute <address> + 4 for the high order load. */
25244 operands[2] = adjust_address (operands[1], SImode, 4);
25246 /* If the computed address is held in the low order register
25247 then load the high order register first, otherwise always
25248 load the low order register first. */
25249 if (REGNO (operands[0]) == REGNO (base))
25251 output_asm_insn ("ldr\t%H0, %2", operands);
25252 output_asm_insn ("ldr\t%0, %1", operands);
25254 else
25256 output_asm_insn ("ldr\t%0, %1", operands);
25257 output_asm_insn ("ldr\t%H0, %2", operands);
25260 break;
25262 case LABEL_REF:
25263 /* With no registers to worry about we can just load the value
25264 directly. */
25265 operands[2] = adjust_address (operands[1], SImode, 4);
25267 output_asm_insn ("ldr\t%H0, %2", operands);
25268 output_asm_insn ("ldr\t%0, %1", operands);
25269 break;
25271 default:
25272 gcc_unreachable ();
25275 return "";
25278 const char *
25279 thumb_output_move_mem_multiple (int n, rtx *operands)
25281 rtx tmp;
25283 switch (n)
25285 case 2:
25286 if (REGNO (operands[4]) > REGNO (operands[5]))
25288 tmp = operands[4];
25289 operands[4] = operands[5];
25290 operands[5] = tmp;
25292 output_asm_insn ("ldmia\t%1!, {%4, %5}", operands);
25293 output_asm_insn ("stmia\t%0!, {%4, %5}", operands);
25294 break;
25296 case 3:
25297 if (REGNO (operands[4]) > REGNO (operands[5]))
25298 std::swap (operands[4], operands[5]);
25299 if (REGNO (operands[5]) > REGNO (operands[6]))
25300 std::swap (operands[5], operands[6]);
25301 if (REGNO (operands[4]) > REGNO (operands[5]))
25302 std::swap (operands[4], operands[5]);
25304 output_asm_insn ("ldmia\t%1!, {%4, %5, %6}", operands);
25305 output_asm_insn ("stmia\t%0!, {%4, %5, %6}", operands);
25306 break;
25308 default:
25309 gcc_unreachable ();
25312 return "";
25315 /* Output a call-via instruction for thumb state. */
25316 const char *
25317 thumb_call_via_reg (rtx reg)
25319 int regno = REGNO (reg);
25320 rtx *labelp;
25322 gcc_assert (regno < LR_REGNUM);
25324 /* If we are in the normal text section we can use a single instance
25325 per compilation unit. If we are doing function sections, then we need
25326 an entry per section, since we can't rely on reachability. */
25327 if (in_section == text_section)
25329 thumb_call_reg_needed = 1;
25331 if (thumb_call_via_label[regno] == NULL)
25332 thumb_call_via_label[regno] = gen_label_rtx ();
25333 labelp = thumb_call_via_label + regno;
25335 else
25337 if (cfun->machine->call_via[regno] == NULL)
25338 cfun->machine->call_via[regno] = gen_label_rtx ();
25339 labelp = cfun->machine->call_via + regno;
25342 output_asm_insn ("bl\t%a0", labelp);
25343 return "";
25346 /* Routines for generating rtl. */
25347 void
25348 thumb_expand_movmemqi (rtx *operands)
25350 rtx out = copy_to_mode_reg (SImode, XEXP (operands[0], 0));
25351 rtx in = copy_to_mode_reg (SImode, XEXP (operands[1], 0));
25352 HOST_WIDE_INT len = INTVAL (operands[2]);
25353 HOST_WIDE_INT offset = 0;
25355 while (len >= 12)
25357 emit_insn (gen_movmem12b (out, in, out, in));
25358 len -= 12;
25361 if (len >= 8)
25363 emit_insn (gen_movmem8b (out, in, out, in));
25364 len -= 8;
25367 if (len >= 4)
25369 rtx reg = gen_reg_rtx (SImode);
25370 emit_insn (gen_movsi (reg, gen_rtx_MEM (SImode, in)));
25371 emit_insn (gen_movsi (gen_rtx_MEM (SImode, out), reg));
25372 len -= 4;
25373 offset += 4;
25376 if (len >= 2)
25378 rtx reg = gen_reg_rtx (HImode);
25379 emit_insn (gen_movhi (reg, gen_rtx_MEM (HImode,
25380 plus_constant (Pmode, in,
25381 offset))));
25382 emit_insn (gen_movhi (gen_rtx_MEM (HImode, plus_constant (Pmode, out,
25383 offset)),
25384 reg));
25385 len -= 2;
25386 offset += 2;
25389 if (len)
25391 rtx reg = gen_reg_rtx (QImode);
25392 emit_insn (gen_movqi (reg, gen_rtx_MEM (QImode,
25393 plus_constant (Pmode, in,
25394 offset))));
25395 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, out,
25396 offset)),
25397 reg));
25401 void
25402 thumb_reload_out_hi (rtx *operands)
25404 emit_insn (gen_thumb_movhi_clobber (operands[0], operands[1], operands[2]));
25407 /* Handle reading a half-word from memory during reload. */
25408 void
25409 thumb_reload_in_hi (rtx *operands ATTRIBUTE_UNUSED)
25411 gcc_unreachable ();
25414 /* Return the length of a function name prefix
25415 that starts with the character 'c'. */
25416 static int
25417 arm_get_strip_length (int c)
25419 switch (c)
25421 ARM_NAME_ENCODING_LENGTHS
25422 default: return 0;
25426 /* Return a pointer to a function's name with any
25427 and all prefix encodings stripped from it. */
25428 const char *
25429 arm_strip_name_encoding (const char *name)
25431 int skip;
25433 while ((skip = arm_get_strip_length (* name)))
25434 name += skip;
25436 return name;
25439 /* If there is a '*' anywhere in the name's prefix, then
25440 emit the stripped name verbatim, otherwise prepend an
25441 underscore if leading underscores are being used. */
25442 void
25443 arm_asm_output_labelref (FILE *stream, const char *name)
25445 int skip;
25446 int verbatim = 0;
25448 while ((skip = arm_get_strip_length (* name)))
25450 verbatim |= (*name == '*');
25451 name += skip;
25454 if (verbatim)
25455 fputs (name, stream);
25456 else
25457 asm_fprintf (stream, "%U%s", name);
25460 /* This function is used to emit an EABI tag and its associated value.
25461 We emit the numerical value of the tag in case the assembler does not
25462 support textual tags. (Eg gas prior to 2.20). If requested we include
25463 the tag name in a comment so that anyone reading the assembler output
25464 will know which tag is being set.
25466 This function is not static because arm-c.c needs it too. */
25468 void
25469 arm_emit_eabi_attribute (const char *name, int num, int val)
25471 asm_fprintf (asm_out_file, "\t.eabi_attribute %d, %d", num, val);
25472 if (flag_verbose_asm || flag_debug_asm)
25473 asm_fprintf (asm_out_file, "\t%s %s", ASM_COMMENT_START, name);
25474 asm_fprintf (asm_out_file, "\n");
25477 /* This function is used to print CPU tuning information as comment
25478 in assembler file. Pointers are not printed for now. */
25480 void
25481 arm_print_tune_info (void)
25483 asm_fprintf (asm_out_file, "\t@.tune parameters\n");
25484 asm_fprintf (asm_out_file, "\t\t@constant_limit:\t%d\n",
25485 current_tune->constant_limit);
25486 asm_fprintf (asm_out_file, "\t\t@max_insns_skipped:\t%d\n",
25487 current_tune->max_insns_skipped);
25488 asm_fprintf (asm_out_file, "\t\t@prefetch.num_slots:\t%d\n",
25489 current_tune->prefetch.num_slots);
25490 asm_fprintf (asm_out_file, "\t\t@prefetch.l1_cache_size:\t%d\n",
25491 current_tune->prefetch.l1_cache_size);
25492 asm_fprintf (asm_out_file, "\t\t@prefetch.l1_cache_line_size:\t%d\n",
25493 current_tune->prefetch.l1_cache_line_size);
25494 asm_fprintf (asm_out_file, "\t\t@prefer_constant_pool:\t%d\n",
25495 (int) current_tune->prefer_constant_pool);
25496 asm_fprintf (asm_out_file, "\t\t@branch_cost:\t(s:speed, p:predictable)\n");
25497 asm_fprintf (asm_out_file, "\t\t\t\ts&p\tcost\n");
25498 asm_fprintf (asm_out_file, "\t\t\t\t00\t%d\n",
25499 current_tune->branch_cost (false, false));
25500 asm_fprintf (asm_out_file, "\t\t\t\t01\t%d\n",
25501 current_tune->branch_cost (false, true));
25502 asm_fprintf (asm_out_file, "\t\t\t\t10\t%d\n",
25503 current_tune->branch_cost (true, false));
25504 asm_fprintf (asm_out_file, "\t\t\t\t11\t%d\n",
25505 current_tune->branch_cost (true, true));
25506 asm_fprintf (asm_out_file, "\t\t@prefer_ldrd_strd:\t%d\n",
25507 (int) current_tune->prefer_ldrd_strd);
25508 asm_fprintf (asm_out_file, "\t\t@logical_op_non_short_circuit:\t[%d,%d]\n",
25509 (int) current_tune->logical_op_non_short_circuit_thumb,
25510 (int) current_tune->logical_op_non_short_circuit_arm);
25511 asm_fprintf (asm_out_file, "\t\t@prefer_neon_for_64bits:\t%d\n",
25512 (int) current_tune->prefer_neon_for_64bits);
25513 asm_fprintf (asm_out_file,
25514 "\t\t@disparage_flag_setting_t16_encodings:\t%d\n",
25515 (int) current_tune->disparage_flag_setting_t16_encodings);
25516 asm_fprintf (asm_out_file, "\t\t@string_ops_prefer_neon:\t%d\n",
25517 (int) current_tune->string_ops_prefer_neon);
25518 asm_fprintf (asm_out_file, "\t\t@max_insns_inline_memset:\t%d\n",
25519 current_tune->max_insns_inline_memset);
25520 asm_fprintf (asm_out_file, "\t\t@fuseable_ops:\t%u\n",
25521 current_tune->fuseable_ops);
25522 asm_fprintf (asm_out_file, "\t\t@sched_autopref:\t%d\n",
25523 (int) current_tune->sched_autopref);
25526 static void
25527 arm_file_start (void)
25529 int val;
25531 if (TARGET_UNIFIED_ASM)
25532 asm_fprintf (asm_out_file, "\t.syntax unified\n");
25534 if (TARGET_BPABI)
25536 const char *fpu_name;
25537 if (arm_selected_arch)
25539 /* armv7ve doesn't support any extensions. */
25540 if (strcmp (arm_selected_arch->name, "armv7ve") == 0)
25542 /* Keep backward compatability for assemblers
25543 which don't support armv7ve. */
25544 asm_fprintf (asm_out_file, "\t.arch armv7-a\n");
25545 asm_fprintf (asm_out_file, "\t.arch_extension virt\n");
25546 asm_fprintf (asm_out_file, "\t.arch_extension idiv\n");
25547 asm_fprintf (asm_out_file, "\t.arch_extension sec\n");
25548 asm_fprintf (asm_out_file, "\t.arch_extension mp\n");
25550 else
25552 const char* pos = strchr (arm_selected_arch->name, '+');
25553 if (pos)
25555 char buf[15];
25556 gcc_assert (strlen (arm_selected_arch->name)
25557 <= sizeof (buf) / sizeof (*pos));
25558 strncpy (buf, arm_selected_arch->name,
25559 (pos - arm_selected_arch->name) * sizeof (*pos));
25560 buf[pos - arm_selected_arch->name] = '\0';
25561 asm_fprintf (asm_out_file, "\t.arch %s\n", buf);
25562 asm_fprintf (asm_out_file, "\t.arch_extension %s\n", pos + 1);
25564 else
25565 asm_fprintf (asm_out_file, "\t.arch %s\n", arm_selected_arch->name);
25568 else if (strncmp (arm_selected_cpu->name, "generic", 7) == 0)
25569 asm_fprintf (asm_out_file, "\t.arch %s\n", arm_selected_cpu->name + 8);
25570 else
25572 const char* truncated_name
25573 = arm_rewrite_selected_cpu (arm_selected_cpu->name);
25574 asm_fprintf (asm_out_file, "\t.cpu %s\n", truncated_name);
25577 if (print_tune_info)
25578 arm_print_tune_info ();
25580 if (TARGET_SOFT_FLOAT)
25582 fpu_name = "softvfp";
25584 else
25586 fpu_name = arm_fpu_desc->name;
25587 if (arm_fpu_desc->model == ARM_FP_MODEL_VFP)
25589 if (TARGET_HARD_FLOAT && TARGET_VFP_SINGLE)
25590 arm_emit_eabi_attribute ("Tag_ABI_HardFP_use", 27, 1);
25592 if (TARGET_HARD_FLOAT_ABI)
25593 arm_emit_eabi_attribute ("Tag_ABI_VFP_args", 28, 1);
25596 asm_fprintf (asm_out_file, "\t.fpu %s\n", fpu_name);
25598 /* Some of these attributes only apply when the corresponding features
25599 are used. However we don't have any easy way of figuring this out.
25600 Conservatively record the setting that would have been used. */
25602 if (flag_rounding_math)
25603 arm_emit_eabi_attribute ("Tag_ABI_FP_rounding", 19, 1);
25605 if (!flag_unsafe_math_optimizations)
25607 arm_emit_eabi_attribute ("Tag_ABI_FP_denormal", 20, 1);
25608 arm_emit_eabi_attribute ("Tag_ABI_FP_exceptions", 21, 1);
25610 if (flag_signaling_nans)
25611 arm_emit_eabi_attribute ("Tag_ABI_FP_user_exceptions", 22, 1);
25613 arm_emit_eabi_attribute ("Tag_ABI_FP_number_model", 23,
25614 flag_finite_math_only ? 1 : 3);
25616 arm_emit_eabi_attribute ("Tag_ABI_align8_needed", 24, 1);
25617 arm_emit_eabi_attribute ("Tag_ABI_align8_preserved", 25, 1);
25618 arm_emit_eabi_attribute ("Tag_ABI_enum_size", 26,
25619 flag_short_enums ? 1 : 2);
25621 /* Tag_ABI_optimization_goals. */
25622 if (optimize_size)
25623 val = 4;
25624 else if (optimize >= 2)
25625 val = 2;
25626 else if (optimize)
25627 val = 1;
25628 else
25629 val = 6;
25630 arm_emit_eabi_attribute ("Tag_ABI_optimization_goals", 30, val);
25632 arm_emit_eabi_attribute ("Tag_CPU_unaligned_access", 34,
25633 unaligned_access);
25635 if (arm_fp16_format)
25636 arm_emit_eabi_attribute ("Tag_ABI_FP_16bit_format", 38,
25637 (int) arm_fp16_format);
25639 if (arm_lang_output_object_attributes_hook)
25640 arm_lang_output_object_attributes_hook();
25643 default_file_start ();
25646 static void
25647 arm_file_end (void)
25649 int regno;
25651 if (NEED_INDICATE_EXEC_STACK)
25652 /* Add .note.GNU-stack. */
25653 file_end_indicate_exec_stack ();
25655 if (! thumb_call_reg_needed)
25656 return;
25658 switch_to_section (text_section);
25659 asm_fprintf (asm_out_file, "\t.code 16\n");
25660 ASM_OUTPUT_ALIGN (asm_out_file, 1);
25662 for (regno = 0; regno < LR_REGNUM; regno++)
25664 rtx label = thumb_call_via_label[regno];
25666 if (label != 0)
25668 targetm.asm_out.internal_label (asm_out_file, "L",
25669 CODE_LABEL_NUMBER (label));
25670 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
25675 #ifndef ARM_PE
25676 /* Symbols in the text segment can be accessed without indirecting via the
25677 constant pool; it may take an extra binary operation, but this is still
25678 faster than indirecting via memory. Don't do this when not optimizing,
25679 since we won't be calculating al of the offsets necessary to do this
25680 simplification. */
25682 static void
25683 arm_encode_section_info (tree decl, rtx rtl, int first)
25685 if (optimize > 0 && TREE_CONSTANT (decl))
25686 SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1;
25688 default_encode_section_info (decl, rtl, first);
25690 #endif /* !ARM_PE */
25692 static void
25693 arm_internal_label (FILE *stream, const char *prefix, unsigned long labelno)
25695 if (arm_ccfsm_state == 3 && (unsigned) arm_target_label == labelno
25696 && !strcmp (prefix, "L"))
25698 arm_ccfsm_state = 0;
25699 arm_target_insn = NULL;
25701 default_internal_label (stream, prefix, labelno);
25704 /* Output code to add DELTA to the first argument, and then jump
25705 to FUNCTION. Used for C++ multiple inheritance. */
25706 static void
25707 arm_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
25708 HOST_WIDE_INT delta,
25709 HOST_WIDE_INT vcall_offset ATTRIBUTE_UNUSED,
25710 tree function)
25712 static int thunk_label = 0;
25713 char label[256];
25714 char labelpc[256];
25715 int mi_delta = delta;
25716 const char *const mi_op = mi_delta < 0 ? "sub" : "add";
25717 int shift = 0;
25718 int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function)
25719 ? 1 : 0);
25720 if (mi_delta < 0)
25721 mi_delta = - mi_delta;
25723 final_start_function (emit_barrier (), file, 1);
25725 if (TARGET_THUMB1)
25727 int labelno = thunk_label++;
25728 ASM_GENERATE_INTERNAL_LABEL (label, "LTHUMBFUNC", labelno);
25729 /* Thunks are entered in arm mode when avaiable. */
25730 if (TARGET_THUMB1_ONLY)
25732 /* push r3 so we can use it as a temporary. */
25733 /* TODO: Omit this save if r3 is not used. */
25734 fputs ("\tpush {r3}\n", file);
25735 fputs ("\tldr\tr3, ", file);
25737 else
25739 fputs ("\tldr\tr12, ", file);
25741 assemble_name (file, label);
25742 fputc ('\n', file);
25743 if (flag_pic)
25745 /* If we are generating PIC, the ldr instruction below loads
25746 "(target - 7) - .LTHUNKPCn" into r12. The pc reads as
25747 the address of the add + 8, so we have:
25749 r12 = (target - 7) - .LTHUNKPCn + (.LTHUNKPCn + 8)
25750 = target + 1.
25752 Note that we have "+ 1" because some versions of GNU ld
25753 don't set the low bit of the result for R_ARM_REL32
25754 relocations against thumb function symbols.
25755 On ARMv6M this is +4, not +8. */
25756 ASM_GENERATE_INTERNAL_LABEL (labelpc, "LTHUNKPC", labelno);
25757 assemble_name (file, labelpc);
25758 fputs (":\n", file);
25759 if (TARGET_THUMB1_ONLY)
25761 /* This is 2 insns after the start of the thunk, so we know it
25762 is 4-byte aligned. */
25763 fputs ("\tadd\tr3, pc, r3\n", file);
25764 fputs ("\tmov r12, r3\n", file);
25766 else
25767 fputs ("\tadd\tr12, pc, r12\n", file);
25769 else if (TARGET_THUMB1_ONLY)
25770 fputs ("\tmov r12, r3\n", file);
25772 if (TARGET_THUMB1_ONLY)
25774 if (mi_delta > 255)
25776 fputs ("\tldr\tr3, ", file);
25777 assemble_name (file, label);
25778 fputs ("+4\n", file);
25779 asm_fprintf (file, "\t%ss\t%r, %r, r3\n",
25780 mi_op, this_regno, this_regno);
25782 else if (mi_delta != 0)
25784 /* Thumb1 unified syntax requires s suffix in instruction name when
25785 one of the operands is immediate. */
25786 asm_fprintf (file, "\t%ss\t%r, %r, #%d\n",
25787 mi_op, this_regno, this_regno,
25788 mi_delta);
25791 else
25793 /* TODO: Use movw/movt for large constants when available. */
25794 while (mi_delta != 0)
25796 if ((mi_delta & (3 << shift)) == 0)
25797 shift += 2;
25798 else
25800 asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
25801 mi_op, this_regno, this_regno,
25802 mi_delta & (0xff << shift));
25803 mi_delta &= ~(0xff << shift);
25804 shift += 8;
25808 if (TARGET_THUMB1)
25810 if (TARGET_THUMB1_ONLY)
25811 fputs ("\tpop\t{r3}\n", file);
25813 fprintf (file, "\tbx\tr12\n");
25814 ASM_OUTPUT_ALIGN (file, 2);
25815 assemble_name (file, label);
25816 fputs (":\n", file);
25817 if (flag_pic)
25819 /* Output ".word .LTHUNKn-[3,7]-.LTHUNKPCn". */
25820 rtx tem = XEXP (DECL_RTL (function), 0);
25821 /* For TARGET_THUMB1_ONLY the thunk is in Thumb mode, so the PC
25822 pipeline offset is four rather than eight. Adjust the offset
25823 accordingly. */
25824 tem = plus_constant (GET_MODE (tem), tem,
25825 TARGET_THUMB1_ONLY ? -3 : -7);
25826 tem = gen_rtx_MINUS (GET_MODE (tem),
25827 tem,
25828 gen_rtx_SYMBOL_REF (Pmode,
25829 ggc_strdup (labelpc)));
25830 assemble_integer (tem, 4, BITS_PER_WORD, 1);
25832 else
25833 /* Output ".word .LTHUNKn". */
25834 assemble_integer (XEXP (DECL_RTL (function), 0), 4, BITS_PER_WORD, 1);
25836 if (TARGET_THUMB1_ONLY && mi_delta > 255)
25837 assemble_integer (GEN_INT(mi_delta), 4, BITS_PER_WORD, 1);
25839 else
25841 fputs ("\tb\t", file);
25842 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
25843 if (NEED_PLT_RELOC)
25844 fputs ("(PLT)", file);
25845 fputc ('\n', file);
25848 final_end_function ();
25852 arm_emit_vector_const (FILE *file, rtx x)
25854 int i;
25855 const char * pattern;
25857 gcc_assert (GET_CODE (x) == CONST_VECTOR);
25859 switch (GET_MODE (x))
25861 case V2SImode: pattern = "%08x"; break;
25862 case V4HImode: pattern = "%04x"; break;
25863 case V8QImode: pattern = "%02x"; break;
25864 default: gcc_unreachable ();
25867 fprintf (file, "0x");
25868 for (i = CONST_VECTOR_NUNITS (x); i--;)
25870 rtx element;
25872 element = CONST_VECTOR_ELT (x, i);
25873 fprintf (file, pattern, INTVAL (element));
25876 return 1;
25879 /* Emit a fp16 constant appropriately padded to occupy a 4-byte word.
25880 HFmode constant pool entries are actually loaded with ldr. */
25881 void
25882 arm_emit_fp16_const (rtx c)
25884 REAL_VALUE_TYPE r;
25885 long bits;
25887 REAL_VALUE_FROM_CONST_DOUBLE (r, c);
25888 bits = real_to_target (NULL, &r, HFmode);
25889 if (WORDS_BIG_ENDIAN)
25890 assemble_zeros (2);
25891 assemble_integer (GEN_INT (bits), 2, BITS_PER_WORD, 1);
25892 if (!WORDS_BIG_ENDIAN)
25893 assemble_zeros (2);
25896 const char *
25897 arm_output_load_gr (rtx *operands)
25899 rtx reg;
25900 rtx offset;
25901 rtx wcgr;
25902 rtx sum;
25904 if (!MEM_P (operands [1])
25905 || GET_CODE (sum = XEXP (operands [1], 0)) != PLUS
25906 || !REG_P (reg = XEXP (sum, 0))
25907 || !CONST_INT_P (offset = XEXP (sum, 1))
25908 || ((INTVAL (offset) < 1024) && (INTVAL (offset) > -1024)))
25909 return "wldrw%?\t%0, %1";
25911 /* Fix up an out-of-range load of a GR register. */
25912 output_asm_insn ("str%?\t%0, [sp, #-4]!\t@ Start of GR load expansion", & reg);
25913 wcgr = operands[0];
25914 operands[0] = reg;
25915 output_asm_insn ("ldr%?\t%0, %1", operands);
25917 operands[0] = wcgr;
25918 operands[1] = reg;
25919 output_asm_insn ("tmcr%?\t%0, %1", operands);
25920 output_asm_insn ("ldr%?\t%0, [sp], #4\t@ End of GR load expansion", & reg);
25922 return "";
25925 /* Worker function for TARGET_SETUP_INCOMING_VARARGS.
25927 On the ARM, PRETEND_SIZE is set in order to have the prologue push the last
25928 named arg and all anonymous args onto the stack.
25929 XXX I know the prologue shouldn't be pushing registers, but it is faster
25930 that way. */
25932 static void
25933 arm_setup_incoming_varargs (cumulative_args_t pcum_v,
25934 machine_mode mode,
25935 tree type,
25936 int *pretend_size,
25937 int second_time ATTRIBUTE_UNUSED)
25939 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
25940 int nregs;
25942 cfun->machine->uses_anonymous_args = 1;
25943 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
25945 nregs = pcum->aapcs_ncrn;
25946 if ((nregs & 1) && arm_needs_doubleword_align (mode, type))
25947 nregs++;
25949 else
25950 nregs = pcum->nregs;
25952 if (nregs < NUM_ARG_REGS)
25953 *pretend_size = (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
25956 /* We can't rely on the caller doing the proper promotion when
25957 using APCS or ATPCS. */
25959 static bool
25960 arm_promote_prototypes (const_tree t ATTRIBUTE_UNUSED)
25962 return !TARGET_AAPCS_BASED;
25965 static machine_mode
25966 arm_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
25967 machine_mode mode,
25968 int *punsignedp ATTRIBUTE_UNUSED,
25969 const_tree fntype ATTRIBUTE_UNUSED,
25970 int for_return ATTRIBUTE_UNUSED)
25972 if (GET_MODE_CLASS (mode) == MODE_INT
25973 && GET_MODE_SIZE (mode) < 4)
25974 return SImode;
25976 return mode;
25979 /* AAPCS based ABIs use short enums by default. */
25981 static bool
25982 arm_default_short_enums (void)
25984 return TARGET_AAPCS_BASED && arm_abi != ARM_ABI_AAPCS_LINUX;
25988 /* AAPCS requires that anonymous bitfields affect structure alignment. */
25990 static bool
25991 arm_align_anon_bitfield (void)
25993 return TARGET_AAPCS_BASED;
25997 /* The generic C++ ABI says 64-bit (long long). The EABI says 32-bit. */
25999 static tree
26000 arm_cxx_guard_type (void)
26002 return TARGET_AAPCS_BASED ? integer_type_node : long_long_integer_type_node;
26006 /* The EABI says test the least significant bit of a guard variable. */
26008 static bool
26009 arm_cxx_guard_mask_bit (void)
26011 return TARGET_AAPCS_BASED;
26015 /* The EABI specifies that all array cookies are 8 bytes long. */
26017 static tree
26018 arm_get_cookie_size (tree type)
26020 tree size;
26022 if (!TARGET_AAPCS_BASED)
26023 return default_cxx_get_cookie_size (type);
26025 size = build_int_cst (sizetype, 8);
26026 return size;
26030 /* The EABI says that array cookies should also contain the element size. */
26032 static bool
26033 arm_cookie_has_size (void)
26035 return TARGET_AAPCS_BASED;
26039 /* The EABI says constructors and destructors should return a pointer to
26040 the object constructed/destroyed. */
26042 static bool
26043 arm_cxx_cdtor_returns_this (void)
26045 return TARGET_AAPCS_BASED;
26048 /* The EABI says that an inline function may never be the key
26049 method. */
26051 static bool
26052 arm_cxx_key_method_may_be_inline (void)
26054 return !TARGET_AAPCS_BASED;
26057 static void
26058 arm_cxx_determine_class_data_visibility (tree decl)
26060 if (!TARGET_AAPCS_BASED
26061 || !TARGET_DLLIMPORT_DECL_ATTRIBUTES)
26062 return;
26064 /* In general, \S 3.2.5.5 of the ARM EABI requires that class data
26065 is exported. However, on systems without dynamic vague linkage,
26066 \S 3.2.5.6 says that COMDAT class data has hidden linkage. */
26067 if (!TARGET_ARM_DYNAMIC_VAGUE_LINKAGE_P && DECL_COMDAT (decl))
26068 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
26069 else
26070 DECL_VISIBILITY (decl) = VISIBILITY_DEFAULT;
26071 DECL_VISIBILITY_SPECIFIED (decl) = 1;
26074 static bool
26075 arm_cxx_class_data_always_comdat (void)
26077 /* \S 3.2.5.4 of the ARM C++ ABI says that class data only have
26078 vague linkage if the class has no key function. */
26079 return !TARGET_AAPCS_BASED;
26083 /* The EABI says __aeabi_atexit should be used to register static
26084 destructors. */
26086 static bool
26087 arm_cxx_use_aeabi_atexit (void)
26089 return TARGET_AAPCS_BASED;
26093 void
26094 arm_set_return_address (rtx source, rtx scratch)
26096 arm_stack_offsets *offsets;
26097 HOST_WIDE_INT delta;
26098 rtx addr;
26099 unsigned long saved_regs;
26101 offsets = arm_get_frame_offsets ();
26102 saved_regs = offsets->saved_regs_mask;
26104 if ((saved_regs & (1 << LR_REGNUM)) == 0)
26105 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
26106 else
26108 if (frame_pointer_needed)
26109 addr = plus_constant (Pmode, hard_frame_pointer_rtx, -4);
26110 else
26112 /* LR will be the first saved register. */
26113 delta = offsets->outgoing_args - (offsets->frame + 4);
26116 if (delta >= 4096)
26118 emit_insn (gen_addsi3 (scratch, stack_pointer_rtx,
26119 GEN_INT (delta & ~4095)));
26120 addr = scratch;
26121 delta &= 4095;
26123 else
26124 addr = stack_pointer_rtx;
26126 addr = plus_constant (Pmode, addr, delta);
26128 /* The store needs to be marked as frame related in order to prevent
26129 DSE from deleting it as dead if it is based on fp. */
26130 rtx insn = emit_move_insn (gen_frame_mem (Pmode, addr), source);
26131 RTX_FRAME_RELATED_P (insn) = 1;
26132 add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (Pmode, LR_REGNUM));
26137 void
26138 thumb_set_return_address (rtx source, rtx scratch)
26140 arm_stack_offsets *offsets;
26141 HOST_WIDE_INT delta;
26142 HOST_WIDE_INT limit;
26143 int reg;
26144 rtx addr;
26145 unsigned long mask;
26147 emit_use (source);
26149 offsets = arm_get_frame_offsets ();
26150 mask = offsets->saved_regs_mask;
26151 if (mask & (1 << LR_REGNUM))
26153 limit = 1024;
26154 /* Find the saved regs. */
26155 if (frame_pointer_needed)
26157 delta = offsets->soft_frame - offsets->saved_args;
26158 reg = THUMB_HARD_FRAME_POINTER_REGNUM;
26159 if (TARGET_THUMB1)
26160 limit = 128;
26162 else
26164 delta = offsets->outgoing_args - offsets->saved_args;
26165 reg = SP_REGNUM;
26167 /* Allow for the stack frame. */
26168 if (TARGET_THUMB1 && TARGET_BACKTRACE)
26169 delta -= 16;
26170 /* The link register is always the first saved register. */
26171 delta -= 4;
26173 /* Construct the address. */
26174 addr = gen_rtx_REG (SImode, reg);
26175 if (delta > limit)
26177 emit_insn (gen_movsi (scratch, GEN_INT (delta)));
26178 emit_insn (gen_addsi3 (scratch, scratch, stack_pointer_rtx));
26179 addr = scratch;
26181 else
26182 addr = plus_constant (Pmode, addr, delta);
26184 /* The store needs to be marked as frame related in order to prevent
26185 DSE from deleting it as dead if it is based on fp. */
26186 rtx insn = emit_move_insn (gen_frame_mem (Pmode, addr), source);
26187 RTX_FRAME_RELATED_P (insn) = 1;
26188 add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (Pmode, LR_REGNUM));
26190 else
26191 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
26194 /* Implements target hook vector_mode_supported_p. */
26195 bool
26196 arm_vector_mode_supported_p (machine_mode mode)
26198 /* Neon also supports V2SImode, etc. listed in the clause below. */
26199 if (TARGET_NEON && (mode == V2SFmode || mode == V4SImode || mode == V8HImode
26200 || mode == V4HFmode || mode == V16QImode || mode == V4SFmode || mode == V2DImode))
26201 return true;
26203 if ((TARGET_NEON || TARGET_IWMMXT)
26204 && ((mode == V2SImode)
26205 || (mode == V4HImode)
26206 || (mode == V8QImode)))
26207 return true;
26209 if (TARGET_INT_SIMD && (mode == V4UQQmode || mode == V4QQmode
26210 || mode == V2UHQmode || mode == V2HQmode || mode == V2UHAmode
26211 || mode == V2HAmode))
26212 return true;
26214 return false;
26217 /* Implements target hook array_mode_supported_p. */
26219 static bool
26220 arm_array_mode_supported_p (machine_mode mode,
26221 unsigned HOST_WIDE_INT nelems)
26223 if (TARGET_NEON
26224 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
26225 && (nelems >= 2 && nelems <= 4))
26226 return true;
26228 return false;
26231 /* Use the option -mvectorize-with-neon-double to override the use of quardword
26232 registers when autovectorizing for Neon, at least until multiple vector
26233 widths are supported properly by the middle-end. */
26235 static machine_mode
26236 arm_preferred_simd_mode (machine_mode mode)
26238 if (TARGET_NEON)
26239 switch (mode)
26241 case SFmode:
26242 return TARGET_NEON_VECTORIZE_DOUBLE ? V2SFmode : V4SFmode;
26243 case SImode:
26244 return TARGET_NEON_VECTORIZE_DOUBLE ? V2SImode : V4SImode;
26245 case HImode:
26246 return TARGET_NEON_VECTORIZE_DOUBLE ? V4HImode : V8HImode;
26247 case QImode:
26248 return TARGET_NEON_VECTORIZE_DOUBLE ? V8QImode : V16QImode;
26249 case DImode:
26250 if (!TARGET_NEON_VECTORIZE_DOUBLE)
26251 return V2DImode;
26252 break;
26254 default:;
26257 if (TARGET_REALLY_IWMMXT)
26258 switch (mode)
26260 case SImode:
26261 return V2SImode;
26262 case HImode:
26263 return V4HImode;
26264 case QImode:
26265 return V8QImode;
26267 default:;
26270 return word_mode;
26273 /* Implement TARGET_CLASS_LIKELY_SPILLED_P.
26275 We need to define this for LO_REGS on Thumb-1. Otherwise we can end up
26276 using r0-r4 for function arguments, r7 for the stack frame and don't have
26277 enough left over to do doubleword arithmetic. For Thumb-2 all the
26278 potentially problematic instructions accept high registers so this is not
26279 necessary. Care needs to be taken to avoid adding new Thumb-2 patterns
26280 that require many low registers. */
26281 static bool
26282 arm_class_likely_spilled_p (reg_class_t rclass)
26284 if ((TARGET_THUMB1 && rclass == LO_REGS)
26285 || rclass == CC_REG)
26286 return true;
26288 return false;
26291 /* Implements target hook small_register_classes_for_mode_p. */
26292 bool
26293 arm_small_register_classes_for_mode_p (machine_mode mode ATTRIBUTE_UNUSED)
26295 return TARGET_THUMB1;
26298 /* Implement TARGET_SHIFT_TRUNCATION_MASK. SImode shifts use normal
26299 ARM insns and therefore guarantee that the shift count is modulo 256.
26300 DImode shifts (those implemented by lib1funcs.S or by optabs.c)
26301 guarantee no particular behavior for out-of-range counts. */
26303 static unsigned HOST_WIDE_INT
26304 arm_shift_truncation_mask (machine_mode mode)
26306 return mode == SImode ? 255 : 0;
26310 /* Map internal gcc register numbers to DWARF2 register numbers. */
26312 unsigned int
26313 arm_dbx_register_number (unsigned int regno)
26315 if (regno < 16)
26316 return regno;
26318 if (IS_VFP_REGNUM (regno))
26320 /* See comment in arm_dwarf_register_span. */
26321 if (VFP_REGNO_OK_FOR_SINGLE (regno))
26322 return 64 + regno - FIRST_VFP_REGNUM;
26323 else
26324 return 256 + (regno - FIRST_VFP_REGNUM) / 2;
26327 if (IS_IWMMXT_GR_REGNUM (regno))
26328 return 104 + regno - FIRST_IWMMXT_GR_REGNUM;
26330 if (IS_IWMMXT_REGNUM (regno))
26331 return 112 + regno - FIRST_IWMMXT_REGNUM;
26333 gcc_unreachable ();
26336 /* Dwarf models VFPv3 registers as 32 64-bit registers.
26337 GCC models tham as 64 32-bit registers, so we need to describe this to
26338 the DWARF generation code. Other registers can use the default. */
26339 static rtx
26340 arm_dwarf_register_span (rtx rtl)
26342 machine_mode mode;
26343 unsigned regno;
26344 rtx parts[16];
26345 int nregs;
26346 int i;
26348 regno = REGNO (rtl);
26349 if (!IS_VFP_REGNUM (regno))
26350 return NULL_RTX;
26352 /* XXX FIXME: The EABI defines two VFP register ranges:
26353 64-95: Legacy VFPv2 numbering for S0-S31 (obsolescent)
26354 256-287: D0-D31
26355 The recommended encoding for S0-S31 is a DW_OP_bit_piece of the
26356 corresponding D register. Until GDB supports this, we shall use the
26357 legacy encodings. We also use these encodings for D0-D15 for
26358 compatibility with older debuggers. */
26359 mode = GET_MODE (rtl);
26360 if (GET_MODE_SIZE (mode) < 8)
26361 return NULL_RTX;
26363 if (VFP_REGNO_OK_FOR_SINGLE (regno))
26365 nregs = GET_MODE_SIZE (mode) / 4;
26366 for (i = 0; i < nregs; i += 2)
26367 if (TARGET_BIG_END)
26369 parts[i] = gen_rtx_REG (SImode, regno + i + 1);
26370 parts[i + 1] = gen_rtx_REG (SImode, regno + i);
26372 else
26374 parts[i] = gen_rtx_REG (SImode, regno + i);
26375 parts[i + 1] = gen_rtx_REG (SImode, regno + i + 1);
26378 else
26380 nregs = GET_MODE_SIZE (mode) / 8;
26381 for (i = 0; i < nregs; i++)
26382 parts[i] = gen_rtx_REG (DImode, regno + i);
26385 return gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nregs , parts));
26388 #if ARM_UNWIND_INFO
26389 /* Emit unwind directives for a store-multiple instruction or stack pointer
26390 push during alignment.
26391 These should only ever be generated by the function prologue code, so
26392 expect them to have a particular form.
26393 The store-multiple instruction sometimes pushes pc as the last register,
26394 although it should not be tracked into unwind information, or for -Os
26395 sometimes pushes some dummy registers before first register that needs
26396 to be tracked in unwind information; such dummy registers are there just
26397 to avoid separate stack adjustment, and will not be restored in the
26398 epilogue. */
26400 static void
26401 arm_unwind_emit_sequence (FILE * asm_out_file, rtx p)
26403 int i;
26404 HOST_WIDE_INT offset;
26405 HOST_WIDE_INT nregs;
26406 int reg_size;
26407 unsigned reg;
26408 unsigned lastreg;
26409 unsigned padfirst = 0, padlast = 0;
26410 rtx e;
26412 e = XVECEXP (p, 0, 0);
26413 gcc_assert (GET_CODE (e) == SET);
26415 /* First insn will adjust the stack pointer. */
26416 gcc_assert (GET_CODE (e) == SET
26417 && REG_P (SET_DEST (e))
26418 && REGNO (SET_DEST (e)) == SP_REGNUM
26419 && GET_CODE (SET_SRC (e)) == PLUS);
26421 offset = -INTVAL (XEXP (SET_SRC (e), 1));
26422 nregs = XVECLEN (p, 0) - 1;
26423 gcc_assert (nregs);
26425 reg = REGNO (SET_SRC (XVECEXP (p, 0, 1)));
26426 if (reg < 16)
26428 /* For -Os dummy registers can be pushed at the beginning to
26429 avoid separate stack pointer adjustment. */
26430 e = XVECEXP (p, 0, 1);
26431 e = XEXP (SET_DEST (e), 0);
26432 if (GET_CODE (e) == PLUS)
26433 padfirst = INTVAL (XEXP (e, 1));
26434 gcc_assert (padfirst == 0 || optimize_size);
26435 /* The function prologue may also push pc, but not annotate it as it is
26436 never restored. We turn this into a stack pointer adjustment. */
26437 e = XVECEXP (p, 0, nregs);
26438 e = XEXP (SET_DEST (e), 0);
26439 if (GET_CODE (e) == PLUS)
26440 padlast = offset - INTVAL (XEXP (e, 1)) - 4;
26441 else
26442 padlast = offset - 4;
26443 gcc_assert (padlast == 0 || padlast == 4);
26444 if (padlast == 4)
26445 fprintf (asm_out_file, "\t.pad #4\n");
26446 reg_size = 4;
26447 fprintf (asm_out_file, "\t.save {");
26449 else if (IS_VFP_REGNUM (reg))
26451 reg_size = 8;
26452 fprintf (asm_out_file, "\t.vsave {");
26454 else
26455 /* Unknown register type. */
26456 gcc_unreachable ();
26458 /* If the stack increment doesn't match the size of the saved registers,
26459 something has gone horribly wrong. */
26460 gcc_assert (offset == padfirst + nregs * reg_size + padlast);
26462 offset = padfirst;
26463 lastreg = 0;
26464 /* The remaining insns will describe the stores. */
26465 for (i = 1; i <= nregs; i++)
26467 /* Expect (set (mem <addr>) (reg)).
26468 Where <addr> is (reg:SP) or (plus (reg:SP) (const_int)). */
26469 e = XVECEXP (p, 0, i);
26470 gcc_assert (GET_CODE (e) == SET
26471 && MEM_P (SET_DEST (e))
26472 && REG_P (SET_SRC (e)));
26474 reg = REGNO (SET_SRC (e));
26475 gcc_assert (reg >= lastreg);
26477 if (i != 1)
26478 fprintf (asm_out_file, ", ");
26479 /* We can't use %r for vfp because we need to use the
26480 double precision register names. */
26481 if (IS_VFP_REGNUM (reg))
26482 asm_fprintf (asm_out_file, "d%d", (reg - FIRST_VFP_REGNUM) / 2);
26483 else
26484 asm_fprintf (asm_out_file, "%r", reg);
26486 #ifdef ENABLE_CHECKING
26487 /* Check that the addresses are consecutive. */
26488 e = XEXP (SET_DEST (e), 0);
26489 if (GET_CODE (e) == PLUS)
26490 gcc_assert (REG_P (XEXP (e, 0))
26491 && REGNO (XEXP (e, 0)) == SP_REGNUM
26492 && CONST_INT_P (XEXP (e, 1))
26493 && offset == INTVAL (XEXP (e, 1)));
26494 else
26495 gcc_assert (i == 1
26496 && REG_P (e)
26497 && REGNO (e) == SP_REGNUM);
26498 offset += reg_size;
26499 #endif
26501 fprintf (asm_out_file, "}\n");
26502 if (padfirst)
26503 fprintf (asm_out_file, "\t.pad #%d\n", padfirst);
26506 /* Emit unwind directives for a SET. */
26508 static void
26509 arm_unwind_emit_set (FILE * asm_out_file, rtx p)
26511 rtx e0;
26512 rtx e1;
26513 unsigned reg;
26515 e0 = XEXP (p, 0);
26516 e1 = XEXP (p, 1);
26517 switch (GET_CODE (e0))
26519 case MEM:
26520 /* Pushing a single register. */
26521 if (GET_CODE (XEXP (e0, 0)) != PRE_DEC
26522 || !REG_P (XEXP (XEXP (e0, 0), 0))
26523 || REGNO (XEXP (XEXP (e0, 0), 0)) != SP_REGNUM)
26524 abort ();
26526 asm_fprintf (asm_out_file, "\t.save ");
26527 if (IS_VFP_REGNUM (REGNO (e1)))
26528 asm_fprintf(asm_out_file, "{d%d}\n",
26529 (REGNO (e1) - FIRST_VFP_REGNUM) / 2);
26530 else
26531 asm_fprintf(asm_out_file, "{%r}\n", REGNO (e1));
26532 break;
26534 case REG:
26535 if (REGNO (e0) == SP_REGNUM)
26537 /* A stack increment. */
26538 if (GET_CODE (e1) != PLUS
26539 || !REG_P (XEXP (e1, 0))
26540 || REGNO (XEXP (e1, 0)) != SP_REGNUM
26541 || !CONST_INT_P (XEXP (e1, 1)))
26542 abort ();
26544 asm_fprintf (asm_out_file, "\t.pad #%wd\n",
26545 -INTVAL (XEXP (e1, 1)));
26547 else if (REGNO (e0) == HARD_FRAME_POINTER_REGNUM)
26549 HOST_WIDE_INT offset;
26551 if (GET_CODE (e1) == PLUS)
26553 if (!REG_P (XEXP (e1, 0))
26554 || !CONST_INT_P (XEXP (e1, 1)))
26555 abort ();
26556 reg = REGNO (XEXP (e1, 0));
26557 offset = INTVAL (XEXP (e1, 1));
26558 asm_fprintf (asm_out_file, "\t.setfp %r, %r, #%wd\n",
26559 HARD_FRAME_POINTER_REGNUM, reg,
26560 offset);
26562 else if (REG_P (e1))
26564 reg = REGNO (e1);
26565 asm_fprintf (asm_out_file, "\t.setfp %r, %r\n",
26566 HARD_FRAME_POINTER_REGNUM, reg);
26568 else
26569 abort ();
26571 else if (REG_P (e1) && REGNO (e1) == SP_REGNUM)
26573 /* Move from sp to reg. */
26574 asm_fprintf (asm_out_file, "\t.movsp %r\n", REGNO (e0));
26576 else if (GET_CODE (e1) == PLUS
26577 && REG_P (XEXP (e1, 0))
26578 && REGNO (XEXP (e1, 0)) == SP_REGNUM
26579 && CONST_INT_P (XEXP (e1, 1)))
26581 /* Set reg to offset from sp. */
26582 asm_fprintf (asm_out_file, "\t.movsp %r, #%d\n",
26583 REGNO (e0), (int)INTVAL(XEXP (e1, 1)));
26585 else
26586 abort ();
26587 break;
26589 default:
26590 abort ();
26595 /* Emit unwind directives for the given insn. */
26597 static void
26598 arm_unwind_emit (FILE * asm_out_file, rtx_insn *insn)
26600 rtx note, pat;
26601 bool handled_one = false;
26603 if (arm_except_unwind_info (&global_options) != UI_TARGET)
26604 return;
26606 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
26607 && (TREE_NOTHROW (current_function_decl)
26608 || crtl->all_throwers_are_sibcalls))
26609 return;
26611 if (NOTE_P (insn) || !RTX_FRAME_RELATED_P (insn))
26612 return;
26614 for (note = REG_NOTES (insn); note ; note = XEXP (note, 1))
26616 switch (REG_NOTE_KIND (note))
26618 case REG_FRAME_RELATED_EXPR:
26619 pat = XEXP (note, 0);
26620 goto found;
26622 case REG_CFA_REGISTER:
26623 pat = XEXP (note, 0);
26624 if (pat == NULL)
26626 pat = PATTERN (insn);
26627 if (GET_CODE (pat) == PARALLEL)
26628 pat = XVECEXP (pat, 0, 0);
26631 /* Only emitted for IS_STACKALIGN re-alignment. */
26633 rtx dest, src;
26634 unsigned reg;
26636 src = SET_SRC (pat);
26637 dest = SET_DEST (pat);
26639 gcc_assert (src == stack_pointer_rtx);
26640 reg = REGNO (dest);
26641 asm_fprintf (asm_out_file, "\t.unwind_raw 0, 0x%x @ vsp = r%d\n",
26642 reg + 0x90, reg);
26644 handled_one = true;
26645 break;
26647 /* The INSN is generated in epilogue. It is set as RTX_FRAME_RELATED_P
26648 to get correct dwarf information for shrink-wrap. We should not
26649 emit unwind information for it because these are used either for
26650 pretend arguments or notes to adjust sp and restore registers from
26651 stack. */
26652 case REG_CFA_DEF_CFA:
26653 case REG_CFA_ADJUST_CFA:
26654 case REG_CFA_RESTORE:
26655 return;
26657 case REG_CFA_EXPRESSION:
26658 case REG_CFA_OFFSET:
26659 /* ??? Only handling here what we actually emit. */
26660 gcc_unreachable ();
26662 default:
26663 break;
26666 if (handled_one)
26667 return;
26668 pat = PATTERN (insn);
26669 found:
26671 switch (GET_CODE (pat))
26673 case SET:
26674 arm_unwind_emit_set (asm_out_file, pat);
26675 break;
26677 case SEQUENCE:
26678 /* Store multiple. */
26679 arm_unwind_emit_sequence (asm_out_file, pat);
26680 break;
26682 default:
26683 abort();
26688 /* Output a reference from a function exception table to the type_info
26689 object X. The EABI specifies that the symbol should be relocated by
26690 an R_ARM_TARGET2 relocation. */
26692 static bool
26693 arm_output_ttype (rtx x)
26695 fputs ("\t.word\t", asm_out_file);
26696 output_addr_const (asm_out_file, x);
26697 /* Use special relocations for symbol references. */
26698 if (!CONST_INT_P (x))
26699 fputs ("(TARGET2)", asm_out_file);
26700 fputc ('\n', asm_out_file);
26702 return TRUE;
26705 /* Implement TARGET_ASM_EMIT_EXCEPT_PERSONALITY. */
26707 static void
26708 arm_asm_emit_except_personality (rtx personality)
26710 fputs ("\t.personality\t", asm_out_file);
26711 output_addr_const (asm_out_file, personality);
26712 fputc ('\n', asm_out_file);
26715 /* Implement TARGET_ASM_INITIALIZE_SECTIONS. */
26717 static void
26718 arm_asm_init_sections (void)
26720 exception_section = get_unnamed_section (0, output_section_asm_op,
26721 "\t.handlerdata");
26723 #endif /* ARM_UNWIND_INFO */
26725 /* Output unwind directives for the start/end of a function. */
26727 void
26728 arm_output_fn_unwind (FILE * f, bool prologue)
26730 if (arm_except_unwind_info (&global_options) != UI_TARGET)
26731 return;
26733 if (prologue)
26734 fputs ("\t.fnstart\n", f);
26735 else
26737 /* If this function will never be unwound, then mark it as such.
26738 The came condition is used in arm_unwind_emit to suppress
26739 the frame annotations. */
26740 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
26741 && (TREE_NOTHROW (current_function_decl)
26742 || crtl->all_throwers_are_sibcalls))
26743 fputs("\t.cantunwind\n", f);
26745 fputs ("\t.fnend\n", f);
26749 static bool
26750 arm_emit_tls_decoration (FILE *fp, rtx x)
26752 enum tls_reloc reloc;
26753 rtx val;
26755 val = XVECEXP (x, 0, 0);
26756 reloc = (enum tls_reloc) INTVAL (XVECEXP (x, 0, 1));
26758 output_addr_const (fp, val);
26760 switch (reloc)
26762 case TLS_GD32:
26763 fputs ("(tlsgd)", fp);
26764 break;
26765 case TLS_LDM32:
26766 fputs ("(tlsldm)", fp);
26767 break;
26768 case TLS_LDO32:
26769 fputs ("(tlsldo)", fp);
26770 break;
26771 case TLS_IE32:
26772 fputs ("(gottpoff)", fp);
26773 break;
26774 case TLS_LE32:
26775 fputs ("(tpoff)", fp);
26776 break;
26777 case TLS_DESCSEQ:
26778 fputs ("(tlsdesc)", fp);
26779 break;
26780 default:
26781 gcc_unreachable ();
26784 switch (reloc)
26786 case TLS_GD32:
26787 case TLS_LDM32:
26788 case TLS_IE32:
26789 case TLS_DESCSEQ:
26790 fputs (" + (. - ", fp);
26791 output_addr_const (fp, XVECEXP (x, 0, 2));
26792 /* For DESCSEQ the 3rd operand encodes thumbness, and is added */
26793 fputs (reloc == TLS_DESCSEQ ? " + " : " - ", fp);
26794 output_addr_const (fp, XVECEXP (x, 0, 3));
26795 fputc (')', fp);
26796 break;
26797 default:
26798 break;
26801 return TRUE;
26804 /* ARM implementation of TARGET_ASM_OUTPUT_DWARF_DTPREL. */
26806 static void
26807 arm_output_dwarf_dtprel (FILE *file, int size, rtx x)
26809 gcc_assert (size == 4);
26810 fputs ("\t.word\t", file);
26811 output_addr_const (file, x);
26812 fputs ("(tlsldo)", file);
26815 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
26817 static bool
26818 arm_output_addr_const_extra (FILE *fp, rtx x)
26820 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
26821 return arm_emit_tls_decoration (fp, x);
26822 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PIC_LABEL)
26824 char label[256];
26825 int labelno = INTVAL (XVECEXP (x, 0, 0));
26827 ASM_GENERATE_INTERNAL_LABEL (label, "LPIC", labelno);
26828 assemble_name_raw (fp, label);
26830 return TRUE;
26832 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_GOTSYM_OFF)
26834 assemble_name (fp, "_GLOBAL_OFFSET_TABLE_");
26835 if (GOT_PCREL)
26836 fputs ("+.", fp);
26837 fputs ("-(", fp);
26838 output_addr_const (fp, XVECEXP (x, 0, 0));
26839 fputc (')', fp);
26840 return TRUE;
26842 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_SYMBOL_OFFSET)
26844 output_addr_const (fp, XVECEXP (x, 0, 0));
26845 if (GOT_PCREL)
26846 fputs ("+.", fp);
26847 fputs ("-(", fp);
26848 output_addr_const (fp, XVECEXP (x, 0, 1));
26849 fputc (')', fp);
26850 return TRUE;
26852 else if (GET_CODE (x) == CONST_VECTOR)
26853 return arm_emit_vector_const (fp, x);
26855 return FALSE;
26858 /* Output assembly for a shift instruction.
26859 SET_FLAGS determines how the instruction modifies the condition codes.
26860 0 - Do not set condition codes.
26861 1 - Set condition codes.
26862 2 - Use smallest instruction. */
26863 const char *
26864 arm_output_shift(rtx * operands, int set_flags)
26866 char pattern[100];
26867 static const char flag_chars[3] = {'?', '.', '!'};
26868 const char *shift;
26869 HOST_WIDE_INT val;
26870 char c;
26872 c = flag_chars[set_flags];
26873 if (TARGET_UNIFIED_ASM)
26875 shift = shift_op(operands[3], &val);
26876 if (shift)
26878 if (val != -1)
26879 operands[2] = GEN_INT(val);
26880 sprintf (pattern, "%s%%%c\t%%0, %%1, %%2", shift, c);
26882 else
26883 sprintf (pattern, "mov%%%c\t%%0, %%1", c);
26885 else
26886 sprintf (pattern, "mov%%%c\t%%0, %%1%%S3", c);
26887 output_asm_insn (pattern, operands);
26888 return "";
26891 /* Output assembly for a WMMX immediate shift instruction. */
26892 const char *
26893 arm_output_iwmmxt_shift_immediate (const char *insn_name, rtx *operands, bool wror_or_wsra)
26895 int shift = INTVAL (operands[2]);
26896 char templ[50];
26897 machine_mode opmode = GET_MODE (operands[0]);
26899 gcc_assert (shift >= 0);
26901 /* If the shift value in the register versions is > 63 (for D qualifier),
26902 31 (for W qualifier) or 15 (for H qualifier). */
26903 if (((opmode == V4HImode) && (shift > 15))
26904 || ((opmode == V2SImode) && (shift > 31))
26905 || ((opmode == DImode) && (shift > 63)))
26907 if (wror_or_wsra)
26909 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, 32);
26910 output_asm_insn (templ, operands);
26911 if (opmode == DImode)
26913 sprintf (templ, "%s\t%%0, %%0, #%d", insn_name, 32);
26914 output_asm_insn (templ, operands);
26917 else
26919 /* The destination register will contain all zeros. */
26920 sprintf (templ, "wzero\t%%0");
26921 output_asm_insn (templ, operands);
26923 return "";
26926 if ((opmode == DImode) && (shift > 32))
26928 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, 32);
26929 output_asm_insn (templ, operands);
26930 sprintf (templ, "%s\t%%0, %%0, #%d", insn_name, shift - 32);
26931 output_asm_insn (templ, operands);
26933 else
26935 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, shift);
26936 output_asm_insn (templ, operands);
26938 return "";
26941 /* Output assembly for a WMMX tinsr instruction. */
26942 const char *
26943 arm_output_iwmmxt_tinsr (rtx *operands)
26945 int mask = INTVAL (operands[3]);
26946 int i;
26947 char templ[50];
26948 int units = mode_nunits[GET_MODE (operands[0])];
26949 gcc_assert ((mask & (mask - 1)) == 0);
26950 for (i = 0; i < units; ++i)
26952 if ((mask & 0x01) == 1)
26954 break;
26956 mask >>= 1;
26958 gcc_assert (i < units);
26960 switch (GET_MODE (operands[0]))
26962 case V8QImode:
26963 sprintf (templ, "tinsrb%%?\t%%0, %%2, #%d", i);
26964 break;
26965 case V4HImode:
26966 sprintf (templ, "tinsrh%%?\t%%0, %%2, #%d", i);
26967 break;
26968 case V2SImode:
26969 sprintf (templ, "tinsrw%%?\t%%0, %%2, #%d", i);
26970 break;
26971 default:
26972 gcc_unreachable ();
26973 break;
26975 output_asm_insn (templ, operands);
26977 return "";
26980 /* Output a Thumb-1 casesi dispatch sequence. */
26981 const char *
26982 thumb1_output_casesi (rtx *operands)
26984 rtx diff_vec = PATTERN (NEXT_INSN (as_a <rtx_insn *> (operands[0])));
26986 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
26988 switch (GET_MODE(diff_vec))
26990 case QImode:
26991 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
26992 "bl\t%___gnu_thumb1_case_uqi" : "bl\t%___gnu_thumb1_case_sqi");
26993 case HImode:
26994 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
26995 "bl\t%___gnu_thumb1_case_uhi" : "bl\t%___gnu_thumb1_case_shi");
26996 case SImode:
26997 return "bl\t%___gnu_thumb1_case_si";
26998 default:
26999 gcc_unreachable ();
27003 /* Output a Thumb-2 casesi instruction. */
27004 const char *
27005 thumb2_output_casesi (rtx *operands)
27007 rtx diff_vec = PATTERN (NEXT_INSN (as_a <rtx_insn *> (operands[2])));
27009 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
27011 output_asm_insn ("cmp\t%0, %1", operands);
27012 output_asm_insn ("bhi\t%l3", operands);
27013 switch (GET_MODE(diff_vec))
27015 case QImode:
27016 return "tbb\t[%|pc, %0]";
27017 case HImode:
27018 return "tbh\t[%|pc, %0, lsl #1]";
27019 case SImode:
27020 if (flag_pic)
27022 output_asm_insn ("adr\t%4, %l2", operands);
27023 output_asm_insn ("ldr\t%5, [%4, %0, lsl #2]", operands);
27024 output_asm_insn ("add\t%4, %4, %5", operands);
27025 return "bx\t%4";
27027 else
27029 output_asm_insn ("adr\t%4, %l2", operands);
27030 return "ldr\t%|pc, [%4, %0, lsl #2]";
27032 default:
27033 gcc_unreachable ();
27037 /* Implement TARGET_SCHED_ISSUE_RATE. Lookup the issue rate in the
27038 per-core tuning structs. */
27039 static int
27040 arm_issue_rate (void)
27042 return current_tune->issue_rate;
27045 /* Return how many instructions should scheduler lookahead to choose the
27046 best one. */
27047 static int
27048 arm_first_cycle_multipass_dfa_lookahead (void)
27050 int issue_rate = arm_issue_rate ();
27052 return issue_rate > 1 && !sched_fusion ? issue_rate : 0;
27055 /* Enable modeling of L2 auto-prefetcher. */
27056 static int
27057 arm_first_cycle_multipass_dfa_lookahead_guard (rtx_insn *insn, int ready_index)
27059 return autopref_multipass_dfa_lookahead_guard (insn, ready_index);
27062 const char *
27063 arm_mangle_type (const_tree type)
27065 /* The ARM ABI documents (10th October 2008) say that "__va_list"
27066 has to be managled as if it is in the "std" namespace. */
27067 if (TARGET_AAPCS_BASED
27068 && lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
27069 return "St9__va_list";
27071 /* Half-precision float. */
27072 if (TREE_CODE (type) == REAL_TYPE && TYPE_PRECISION (type) == 16)
27073 return "Dh";
27075 /* Try mangling as a Neon type, TYPE_NAME is non-NULL if this is a
27076 builtin type. */
27077 if (TYPE_NAME (type) != NULL)
27078 return arm_mangle_builtin_type (type);
27080 /* Use the default mangling. */
27081 return NULL;
27084 /* Order of allocation of core registers for Thumb: this allocation is
27085 written over the corresponding initial entries of the array
27086 initialized with REG_ALLOC_ORDER. We allocate all low registers
27087 first. Saving and restoring a low register is usually cheaper than
27088 using a call-clobbered high register. */
27090 static const int thumb_core_reg_alloc_order[] =
27092 3, 2, 1, 0, 4, 5, 6, 7,
27093 14, 12, 8, 9, 10, 11
27096 /* Adjust register allocation order when compiling for Thumb. */
27098 void
27099 arm_order_regs_for_local_alloc (void)
27101 const int arm_reg_alloc_order[] = REG_ALLOC_ORDER;
27102 memcpy(reg_alloc_order, arm_reg_alloc_order, sizeof (reg_alloc_order));
27103 if (TARGET_THUMB)
27104 memcpy (reg_alloc_order, thumb_core_reg_alloc_order,
27105 sizeof (thumb_core_reg_alloc_order));
27108 /* Implement TARGET_FRAME_POINTER_REQUIRED. */
27110 bool
27111 arm_frame_pointer_required (void)
27113 return (cfun->has_nonlocal_label
27114 || SUBTARGET_FRAME_POINTER_REQUIRED
27115 || (TARGET_ARM && TARGET_APCS_FRAME && ! leaf_function_p ()));
27118 /* Only thumb1 can't support conditional execution, so return true if
27119 the target is not thumb1. */
27120 static bool
27121 arm_have_conditional_execution (void)
27123 return !TARGET_THUMB1;
27126 /* The AAPCS sets the maximum alignment of a vector to 64 bits. */
27127 static HOST_WIDE_INT
27128 arm_vector_alignment (const_tree type)
27130 HOST_WIDE_INT align = tree_to_shwi (TYPE_SIZE (type));
27132 if (TARGET_AAPCS_BASED)
27133 align = MIN (align, 64);
27135 return align;
27138 static unsigned int
27139 arm_autovectorize_vector_sizes (void)
27141 return TARGET_NEON_VECTORIZE_DOUBLE ? 0 : (16 | 8);
27144 static bool
27145 arm_vector_alignment_reachable (const_tree type, bool is_packed)
27147 /* Vectors which aren't in packed structures will not be less aligned than
27148 the natural alignment of their element type, so this is safe. */
27149 if (TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access)
27150 return !is_packed;
27152 return default_builtin_vector_alignment_reachable (type, is_packed);
27155 static bool
27156 arm_builtin_support_vector_misalignment (machine_mode mode,
27157 const_tree type, int misalignment,
27158 bool is_packed)
27160 if (TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access)
27162 HOST_WIDE_INT align = TYPE_ALIGN_UNIT (type);
27164 if (is_packed)
27165 return align == 1;
27167 /* If the misalignment is unknown, we should be able to handle the access
27168 so long as it is not to a member of a packed data structure. */
27169 if (misalignment == -1)
27170 return true;
27172 /* Return true if the misalignment is a multiple of the natural alignment
27173 of the vector's element type. This is probably always going to be
27174 true in practice, since we've already established that this isn't a
27175 packed access. */
27176 return ((misalignment % align) == 0);
27179 return default_builtin_support_vector_misalignment (mode, type, misalignment,
27180 is_packed);
27183 static void
27184 arm_conditional_register_usage (void)
27186 int regno;
27188 if (TARGET_THUMB1 && optimize_size)
27190 /* When optimizing for size on Thumb-1, it's better not
27191 to use the HI regs, because of the overhead of
27192 stacking them. */
27193 for (regno = FIRST_HI_REGNUM; regno <= LAST_HI_REGNUM; ++regno)
27194 fixed_regs[regno] = call_used_regs[regno] = 1;
27197 /* The link register can be clobbered by any branch insn,
27198 but we have no way to track that at present, so mark
27199 it as unavailable. */
27200 if (TARGET_THUMB1)
27201 fixed_regs[LR_REGNUM] = call_used_regs[LR_REGNUM] = 1;
27203 if (TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_VFP)
27205 /* VFPv3 registers are disabled when earlier VFP
27206 versions are selected due to the definition of
27207 LAST_VFP_REGNUM. */
27208 for (regno = FIRST_VFP_REGNUM;
27209 regno <= LAST_VFP_REGNUM; ++ regno)
27211 fixed_regs[regno] = 0;
27212 call_used_regs[regno] = regno < FIRST_VFP_REGNUM + 16
27213 || regno >= FIRST_VFP_REGNUM + 32;
27217 if (TARGET_REALLY_IWMMXT)
27219 regno = FIRST_IWMMXT_GR_REGNUM;
27220 /* The 2002/10/09 revision of the XScale ABI has wCG0
27221 and wCG1 as call-preserved registers. The 2002/11/21
27222 revision changed this so that all wCG registers are
27223 scratch registers. */
27224 for (regno = FIRST_IWMMXT_GR_REGNUM;
27225 regno <= LAST_IWMMXT_GR_REGNUM; ++ regno)
27226 fixed_regs[regno] = 0;
27227 /* The XScale ABI has wR0 - wR9 as scratch registers,
27228 the rest as call-preserved registers. */
27229 for (regno = FIRST_IWMMXT_REGNUM;
27230 regno <= LAST_IWMMXT_REGNUM; ++ regno)
27232 fixed_regs[regno] = 0;
27233 call_used_regs[regno] = regno < FIRST_IWMMXT_REGNUM + 10;
27237 if ((unsigned) PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM)
27239 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
27240 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
27242 else if (TARGET_APCS_STACK)
27244 fixed_regs[10] = 1;
27245 call_used_regs[10] = 1;
27247 /* -mcaller-super-interworking reserves r11 for calls to
27248 _interwork_r11_call_via_rN(). Making the register global
27249 is an easy way of ensuring that it remains valid for all
27250 calls. */
27251 if (TARGET_APCS_FRAME || TARGET_CALLER_INTERWORKING
27252 || TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME)
27254 fixed_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
27255 call_used_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
27256 if (TARGET_CALLER_INTERWORKING)
27257 global_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
27259 SUBTARGET_CONDITIONAL_REGISTER_USAGE
27262 static reg_class_t
27263 arm_preferred_rename_class (reg_class_t rclass)
27265 /* Thumb-2 instructions using LO_REGS may be smaller than instructions
27266 using GENERIC_REGS. During register rename pass, we prefer LO_REGS,
27267 and code size can be reduced. */
27268 if (TARGET_THUMB2 && rclass == GENERAL_REGS)
27269 return LO_REGS;
27270 else
27271 return NO_REGS;
27274 /* Compute the atrribute "length" of insn "*push_multi".
27275 So this function MUST be kept in sync with that insn pattern. */
27277 arm_attr_length_push_multi(rtx parallel_op, rtx first_op)
27279 int i, regno, hi_reg;
27280 int num_saves = XVECLEN (parallel_op, 0);
27282 /* ARM mode. */
27283 if (TARGET_ARM)
27284 return 4;
27285 /* Thumb1 mode. */
27286 if (TARGET_THUMB1)
27287 return 2;
27289 /* Thumb2 mode. */
27290 regno = REGNO (first_op);
27291 hi_reg = (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
27292 for (i = 1; i < num_saves && !hi_reg; i++)
27294 regno = REGNO (XEXP (XVECEXP (parallel_op, 0, i), 0));
27295 hi_reg |= (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
27298 if (!hi_reg)
27299 return 2;
27300 return 4;
27303 /* Compute the number of instructions emitted by output_move_double. */
27305 arm_count_output_move_double_insns (rtx *operands)
27307 int count;
27308 rtx ops[2];
27309 /* output_move_double may modify the operands array, so call it
27310 here on a copy of the array. */
27311 ops[0] = operands[0];
27312 ops[1] = operands[1];
27313 output_move_double (ops, false, &count);
27314 return count;
27318 vfp3_const_double_for_fract_bits (rtx operand)
27320 REAL_VALUE_TYPE r0;
27322 if (!CONST_DOUBLE_P (operand))
27323 return 0;
27325 REAL_VALUE_FROM_CONST_DOUBLE (r0, operand);
27326 if (exact_real_inverse (DFmode, &r0))
27328 if (exact_real_truncate (DFmode, &r0))
27330 HOST_WIDE_INT value = real_to_integer (&r0);
27331 value = value & 0xffffffff;
27332 if ((value != 0) && ( (value & (value - 1)) == 0))
27333 return int_log2 (value);
27336 return 0;
27340 vfp3_const_double_for_bits (rtx operand)
27342 REAL_VALUE_TYPE r0;
27344 if (!CONST_DOUBLE_P (operand))
27345 return 0;
27347 REAL_VALUE_FROM_CONST_DOUBLE (r0, operand);
27348 if (exact_real_truncate (DFmode, &r0))
27350 HOST_WIDE_INT value = real_to_integer (&r0);
27351 value = value & 0xffffffff;
27352 if ((value != 0) && ( (value & (value - 1)) == 0))
27353 return int_log2 (value);
27356 return 0;
27359 /* Emit a memory barrier around an atomic sequence according to MODEL. */
27361 static void
27362 arm_pre_atomic_barrier (enum memmodel model)
27364 if (need_atomic_barrier_p (model, true))
27365 emit_insn (gen_memory_barrier ());
27368 static void
27369 arm_post_atomic_barrier (enum memmodel model)
27371 if (need_atomic_barrier_p (model, false))
27372 emit_insn (gen_memory_barrier ());
27375 /* Emit the load-exclusive and store-exclusive instructions.
27376 Use acquire and release versions if necessary. */
27378 static void
27379 arm_emit_load_exclusive (machine_mode mode, rtx rval, rtx mem, bool acq)
27381 rtx (*gen) (rtx, rtx);
27383 if (acq)
27385 switch (mode)
27387 case QImode: gen = gen_arm_load_acquire_exclusiveqi; break;
27388 case HImode: gen = gen_arm_load_acquire_exclusivehi; break;
27389 case SImode: gen = gen_arm_load_acquire_exclusivesi; break;
27390 case DImode: gen = gen_arm_load_acquire_exclusivedi; break;
27391 default:
27392 gcc_unreachable ();
27395 else
27397 switch (mode)
27399 case QImode: gen = gen_arm_load_exclusiveqi; break;
27400 case HImode: gen = gen_arm_load_exclusivehi; break;
27401 case SImode: gen = gen_arm_load_exclusivesi; break;
27402 case DImode: gen = gen_arm_load_exclusivedi; break;
27403 default:
27404 gcc_unreachable ();
27408 emit_insn (gen (rval, mem));
27411 static void
27412 arm_emit_store_exclusive (machine_mode mode, rtx bval, rtx rval,
27413 rtx mem, bool rel)
27415 rtx (*gen) (rtx, rtx, rtx);
27417 if (rel)
27419 switch (mode)
27421 case QImode: gen = gen_arm_store_release_exclusiveqi; break;
27422 case HImode: gen = gen_arm_store_release_exclusivehi; break;
27423 case SImode: gen = gen_arm_store_release_exclusivesi; break;
27424 case DImode: gen = gen_arm_store_release_exclusivedi; break;
27425 default:
27426 gcc_unreachable ();
27429 else
27431 switch (mode)
27433 case QImode: gen = gen_arm_store_exclusiveqi; break;
27434 case HImode: gen = gen_arm_store_exclusivehi; break;
27435 case SImode: gen = gen_arm_store_exclusivesi; break;
27436 case DImode: gen = gen_arm_store_exclusivedi; break;
27437 default:
27438 gcc_unreachable ();
27442 emit_insn (gen (bval, rval, mem));
27445 /* Mark the previous jump instruction as unlikely. */
27447 static void
27448 emit_unlikely_jump (rtx insn)
27450 int very_unlikely = REG_BR_PROB_BASE / 100 - 1;
27452 insn = emit_jump_insn (insn);
27453 add_int_reg_note (insn, REG_BR_PROB, very_unlikely);
27456 /* Expand a compare and swap pattern. */
27458 void
27459 arm_expand_compare_and_swap (rtx operands[])
27461 rtx bval, rval, mem, oldval, newval, is_weak, mod_s, mod_f, x;
27462 machine_mode mode;
27463 rtx (*gen) (rtx, rtx, rtx, rtx, rtx, rtx, rtx);
27465 bval = operands[0];
27466 rval = operands[1];
27467 mem = operands[2];
27468 oldval = operands[3];
27469 newval = operands[4];
27470 is_weak = operands[5];
27471 mod_s = operands[6];
27472 mod_f = operands[7];
27473 mode = GET_MODE (mem);
27475 /* Normally the succ memory model must be stronger than fail, but in the
27476 unlikely event of fail being ACQUIRE and succ being RELEASE we need to
27477 promote succ to ACQ_REL so that we don't lose the acquire semantics. */
27479 if (TARGET_HAVE_LDACQ
27480 && is_mm_acquire (memmodel_from_int (INTVAL (mod_f)))
27481 && is_mm_release (memmodel_from_int (INTVAL (mod_s))))
27482 mod_s = GEN_INT (MEMMODEL_ACQ_REL);
27484 switch (mode)
27486 case QImode:
27487 case HImode:
27488 /* For narrow modes, we're going to perform the comparison in SImode,
27489 so do the zero-extension now. */
27490 rval = gen_reg_rtx (SImode);
27491 oldval = convert_modes (SImode, mode, oldval, true);
27492 /* FALLTHRU */
27494 case SImode:
27495 /* Force the value into a register if needed. We waited until after
27496 the zero-extension above to do this properly. */
27497 if (!arm_add_operand (oldval, SImode))
27498 oldval = force_reg (SImode, oldval);
27499 break;
27501 case DImode:
27502 if (!cmpdi_operand (oldval, mode))
27503 oldval = force_reg (mode, oldval);
27504 break;
27506 default:
27507 gcc_unreachable ();
27510 switch (mode)
27512 case QImode: gen = gen_atomic_compare_and_swapqi_1; break;
27513 case HImode: gen = gen_atomic_compare_and_swaphi_1; break;
27514 case SImode: gen = gen_atomic_compare_and_swapsi_1; break;
27515 case DImode: gen = gen_atomic_compare_and_swapdi_1; break;
27516 default:
27517 gcc_unreachable ();
27520 emit_insn (gen (rval, mem, oldval, newval, is_weak, mod_s, mod_f));
27522 if (mode == QImode || mode == HImode)
27523 emit_move_insn (operands[1], gen_lowpart (mode, rval));
27525 /* In all cases, we arrange for success to be signaled by Z set.
27526 This arrangement allows for the boolean result to be used directly
27527 in a subsequent branch, post optimization. */
27528 x = gen_rtx_REG (CCmode, CC_REGNUM);
27529 x = gen_rtx_EQ (SImode, x, const0_rtx);
27530 emit_insn (gen_rtx_SET (bval, x));
27533 /* Split a compare and swap pattern. It is IMPLEMENTATION DEFINED whether
27534 another memory store between the load-exclusive and store-exclusive can
27535 reset the monitor from Exclusive to Open state. This means we must wait
27536 until after reload to split the pattern, lest we get a register spill in
27537 the middle of the atomic sequence. */
27539 void
27540 arm_split_compare_and_swap (rtx operands[])
27542 rtx rval, mem, oldval, newval, scratch;
27543 machine_mode mode;
27544 enum memmodel mod_s, mod_f;
27545 bool is_weak;
27546 rtx_code_label *label1, *label2;
27547 rtx x, cond;
27549 rval = operands[0];
27550 mem = operands[1];
27551 oldval = operands[2];
27552 newval = operands[3];
27553 is_weak = (operands[4] != const0_rtx);
27554 mod_s = memmodel_from_int (INTVAL (operands[5]));
27555 mod_f = memmodel_from_int (INTVAL (operands[6]));
27556 scratch = operands[7];
27557 mode = GET_MODE (mem);
27559 bool use_acquire = TARGET_HAVE_LDACQ
27560 && !(is_mm_relaxed (mod_s) || is_mm_consume (mod_s)
27561 || is_mm_release (mod_s));
27563 bool use_release = TARGET_HAVE_LDACQ
27564 && !(is_mm_relaxed (mod_s) || is_mm_consume (mod_s)
27565 || is_mm_acquire (mod_s));
27567 /* Checks whether a barrier is needed and emits one accordingly. */
27568 if (!(use_acquire || use_release))
27569 arm_pre_atomic_barrier (mod_s);
27571 label1 = NULL;
27572 if (!is_weak)
27574 label1 = gen_label_rtx ();
27575 emit_label (label1);
27577 label2 = gen_label_rtx ();
27579 arm_emit_load_exclusive (mode, rval, mem, use_acquire);
27581 cond = arm_gen_compare_reg (NE, rval, oldval, scratch);
27582 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
27583 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
27584 gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
27585 emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
27587 arm_emit_store_exclusive (mode, scratch, mem, newval, use_release);
27589 /* Weak or strong, we want EQ to be true for success, so that we
27590 match the flags that we got from the compare above. */
27591 cond = gen_rtx_REG (CCmode, CC_REGNUM);
27592 x = gen_rtx_COMPARE (CCmode, scratch, const0_rtx);
27593 emit_insn (gen_rtx_SET (cond, x));
27595 if (!is_weak)
27597 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
27598 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
27599 gen_rtx_LABEL_REF (Pmode, label1), pc_rtx);
27600 emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
27603 if (!is_mm_relaxed (mod_f))
27604 emit_label (label2);
27606 /* Checks whether a barrier is needed and emits one accordingly. */
27607 if (!(use_acquire || use_release))
27608 arm_post_atomic_barrier (mod_s);
27610 if (is_mm_relaxed (mod_f))
27611 emit_label (label2);
27614 void
27615 arm_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem,
27616 rtx value, rtx model_rtx, rtx cond)
27618 enum memmodel model = memmodel_from_int (INTVAL (model_rtx));
27619 machine_mode mode = GET_MODE (mem);
27620 machine_mode wmode = (mode == DImode ? DImode : SImode);
27621 rtx_code_label *label;
27622 rtx x;
27624 bool use_acquire = TARGET_HAVE_LDACQ
27625 && !(is_mm_relaxed (model) || is_mm_consume (model)
27626 || is_mm_release (model));
27628 bool use_release = TARGET_HAVE_LDACQ
27629 && !(is_mm_relaxed (model) || is_mm_consume (model)
27630 || is_mm_acquire (model));
27632 /* Checks whether a barrier is needed and emits one accordingly. */
27633 if (!(use_acquire || use_release))
27634 arm_pre_atomic_barrier (model);
27636 label = gen_label_rtx ();
27637 emit_label (label);
27639 if (new_out)
27640 new_out = gen_lowpart (wmode, new_out);
27641 if (old_out)
27642 old_out = gen_lowpart (wmode, old_out);
27643 else
27644 old_out = new_out;
27645 value = simplify_gen_subreg (wmode, value, mode, 0);
27647 arm_emit_load_exclusive (mode, old_out, mem, use_acquire);
27649 switch (code)
27651 case SET:
27652 new_out = value;
27653 break;
27655 case NOT:
27656 x = gen_rtx_AND (wmode, old_out, value);
27657 emit_insn (gen_rtx_SET (new_out, x));
27658 x = gen_rtx_NOT (wmode, new_out);
27659 emit_insn (gen_rtx_SET (new_out, x));
27660 break;
27662 case MINUS:
27663 if (CONST_INT_P (value))
27665 value = GEN_INT (-INTVAL (value));
27666 code = PLUS;
27668 /* FALLTHRU */
27670 case PLUS:
27671 if (mode == DImode)
27673 /* DImode plus/minus need to clobber flags. */
27674 /* The adddi3 and subdi3 patterns are incorrectly written so that
27675 they require matching operands, even when we could easily support
27676 three operands. Thankfully, this can be fixed up post-splitting,
27677 as the individual add+adc patterns do accept three operands and
27678 post-reload cprop can make these moves go away. */
27679 emit_move_insn (new_out, old_out);
27680 if (code == PLUS)
27681 x = gen_adddi3 (new_out, new_out, value);
27682 else
27683 x = gen_subdi3 (new_out, new_out, value);
27684 emit_insn (x);
27685 break;
27687 /* FALLTHRU */
27689 default:
27690 x = gen_rtx_fmt_ee (code, wmode, old_out, value);
27691 emit_insn (gen_rtx_SET (new_out, x));
27692 break;
27695 arm_emit_store_exclusive (mode, cond, mem, gen_lowpart (mode, new_out),
27696 use_release);
27698 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
27699 emit_unlikely_jump (gen_cbranchsi4 (x, cond, const0_rtx, label));
27701 /* Checks whether a barrier is needed and emits one accordingly. */
27702 if (!(use_acquire || use_release))
27703 arm_post_atomic_barrier (model);
27706 #define MAX_VECT_LEN 16
27708 struct expand_vec_perm_d
27710 rtx target, op0, op1;
27711 unsigned char perm[MAX_VECT_LEN];
27712 machine_mode vmode;
27713 unsigned char nelt;
27714 bool one_vector_p;
27715 bool testing_p;
27718 /* Generate a variable permutation. */
27720 static void
27721 arm_expand_vec_perm_1 (rtx target, rtx op0, rtx op1, rtx sel)
27723 machine_mode vmode = GET_MODE (target);
27724 bool one_vector_p = rtx_equal_p (op0, op1);
27726 gcc_checking_assert (vmode == V8QImode || vmode == V16QImode);
27727 gcc_checking_assert (GET_MODE (op0) == vmode);
27728 gcc_checking_assert (GET_MODE (op1) == vmode);
27729 gcc_checking_assert (GET_MODE (sel) == vmode);
27730 gcc_checking_assert (TARGET_NEON);
27732 if (one_vector_p)
27734 if (vmode == V8QImode)
27735 emit_insn (gen_neon_vtbl1v8qi (target, op0, sel));
27736 else
27737 emit_insn (gen_neon_vtbl1v16qi (target, op0, sel));
27739 else
27741 rtx pair;
27743 if (vmode == V8QImode)
27745 pair = gen_reg_rtx (V16QImode);
27746 emit_insn (gen_neon_vcombinev8qi (pair, op0, op1));
27747 pair = gen_lowpart (TImode, pair);
27748 emit_insn (gen_neon_vtbl2v8qi (target, pair, sel));
27750 else
27752 pair = gen_reg_rtx (OImode);
27753 emit_insn (gen_neon_vcombinev16qi (pair, op0, op1));
27754 emit_insn (gen_neon_vtbl2v16qi (target, pair, sel));
27759 void
27760 arm_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel)
27762 machine_mode vmode = GET_MODE (target);
27763 unsigned int i, nelt = GET_MODE_NUNITS (vmode);
27764 bool one_vector_p = rtx_equal_p (op0, op1);
27765 rtx rmask[MAX_VECT_LEN], mask;
27767 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
27768 numbering of elements for big-endian, we must reverse the order. */
27769 gcc_checking_assert (!BYTES_BIG_ENDIAN);
27771 /* The VTBL instruction does not use a modulo index, so we must take care
27772 of that ourselves. */
27773 mask = GEN_INT (one_vector_p ? nelt - 1 : 2 * nelt - 1);
27774 for (i = 0; i < nelt; ++i)
27775 rmask[i] = mask;
27776 mask = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rmask));
27777 sel = expand_simple_binop (vmode, AND, sel, mask, NULL, 0, OPTAB_LIB_WIDEN);
27779 arm_expand_vec_perm_1 (target, op0, op1, sel);
27782 /* Generate or test for an insn that supports a constant permutation. */
27784 /* Recognize patterns for the VUZP insns. */
27786 static bool
27787 arm_evpc_neon_vuzp (struct expand_vec_perm_d *d)
27789 unsigned int i, odd, mask, nelt = d->nelt;
27790 rtx out0, out1, in0, in1, x;
27791 rtx (*gen)(rtx, rtx, rtx, rtx);
27793 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
27794 return false;
27796 /* Note that these are little-endian tests. Adjust for big-endian later. */
27797 if (d->perm[0] == 0)
27798 odd = 0;
27799 else if (d->perm[0] == 1)
27800 odd = 1;
27801 else
27802 return false;
27803 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
27805 for (i = 0; i < nelt; i++)
27807 unsigned elt = (i * 2 + odd) & mask;
27808 if (d->perm[i] != elt)
27809 return false;
27812 /* Success! */
27813 if (d->testing_p)
27814 return true;
27816 switch (d->vmode)
27818 case V16QImode: gen = gen_neon_vuzpv16qi_internal; break;
27819 case V8QImode: gen = gen_neon_vuzpv8qi_internal; break;
27820 case V8HImode: gen = gen_neon_vuzpv8hi_internal; break;
27821 case V4HImode: gen = gen_neon_vuzpv4hi_internal; break;
27822 case V4SImode: gen = gen_neon_vuzpv4si_internal; break;
27823 case V2SImode: gen = gen_neon_vuzpv2si_internal; break;
27824 case V2SFmode: gen = gen_neon_vuzpv2sf_internal; break;
27825 case V4SFmode: gen = gen_neon_vuzpv4sf_internal; break;
27826 default:
27827 gcc_unreachable ();
27830 in0 = d->op0;
27831 in1 = d->op1;
27832 if (BYTES_BIG_ENDIAN)
27834 x = in0, in0 = in1, in1 = x;
27835 odd = !odd;
27838 out0 = d->target;
27839 out1 = gen_reg_rtx (d->vmode);
27840 if (odd)
27841 x = out0, out0 = out1, out1 = x;
27843 emit_insn (gen (out0, in0, in1, out1));
27844 return true;
27847 /* Recognize patterns for the VZIP insns. */
27849 static bool
27850 arm_evpc_neon_vzip (struct expand_vec_perm_d *d)
27852 unsigned int i, high, mask, nelt = d->nelt;
27853 rtx out0, out1, in0, in1, x;
27854 rtx (*gen)(rtx, rtx, rtx, rtx);
27856 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
27857 return false;
27859 /* Note that these are little-endian tests. Adjust for big-endian later. */
27860 high = nelt / 2;
27861 if (d->perm[0] == high)
27863 else if (d->perm[0] == 0)
27864 high = 0;
27865 else
27866 return false;
27867 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
27869 for (i = 0; i < nelt / 2; i++)
27871 unsigned elt = (i + high) & mask;
27872 if (d->perm[i * 2] != elt)
27873 return false;
27874 elt = (elt + nelt) & mask;
27875 if (d->perm[i * 2 + 1] != elt)
27876 return false;
27879 /* Success! */
27880 if (d->testing_p)
27881 return true;
27883 switch (d->vmode)
27885 case V16QImode: gen = gen_neon_vzipv16qi_internal; break;
27886 case V8QImode: gen = gen_neon_vzipv8qi_internal; break;
27887 case V8HImode: gen = gen_neon_vzipv8hi_internal; break;
27888 case V4HImode: gen = gen_neon_vzipv4hi_internal; break;
27889 case V4SImode: gen = gen_neon_vzipv4si_internal; break;
27890 case V2SImode: gen = gen_neon_vzipv2si_internal; break;
27891 case V2SFmode: gen = gen_neon_vzipv2sf_internal; break;
27892 case V4SFmode: gen = gen_neon_vzipv4sf_internal; break;
27893 default:
27894 gcc_unreachable ();
27897 in0 = d->op0;
27898 in1 = d->op1;
27899 if (BYTES_BIG_ENDIAN)
27901 x = in0, in0 = in1, in1 = x;
27902 high = !high;
27905 out0 = d->target;
27906 out1 = gen_reg_rtx (d->vmode);
27907 if (high)
27908 x = out0, out0 = out1, out1 = x;
27910 emit_insn (gen (out0, in0, in1, out1));
27911 return true;
27914 /* Recognize patterns for the VREV insns. */
27916 static bool
27917 arm_evpc_neon_vrev (struct expand_vec_perm_d *d)
27919 unsigned int i, j, diff, nelt = d->nelt;
27920 rtx (*gen)(rtx, rtx);
27922 if (!d->one_vector_p)
27923 return false;
27925 diff = d->perm[0];
27926 switch (diff)
27928 case 7:
27929 switch (d->vmode)
27931 case V16QImode: gen = gen_neon_vrev64v16qi; break;
27932 case V8QImode: gen = gen_neon_vrev64v8qi; break;
27933 default:
27934 return false;
27936 break;
27937 case 3:
27938 switch (d->vmode)
27940 case V16QImode: gen = gen_neon_vrev32v16qi; break;
27941 case V8QImode: gen = gen_neon_vrev32v8qi; break;
27942 case V8HImode: gen = gen_neon_vrev64v8hi; break;
27943 case V4HImode: gen = gen_neon_vrev64v4hi; break;
27944 default:
27945 return false;
27947 break;
27948 case 1:
27949 switch (d->vmode)
27951 case V16QImode: gen = gen_neon_vrev16v16qi; break;
27952 case V8QImode: gen = gen_neon_vrev16v8qi; break;
27953 case V8HImode: gen = gen_neon_vrev32v8hi; break;
27954 case V4HImode: gen = gen_neon_vrev32v4hi; break;
27955 case V4SImode: gen = gen_neon_vrev64v4si; break;
27956 case V2SImode: gen = gen_neon_vrev64v2si; break;
27957 case V4SFmode: gen = gen_neon_vrev64v4sf; break;
27958 case V2SFmode: gen = gen_neon_vrev64v2sf; break;
27959 default:
27960 return false;
27962 break;
27963 default:
27964 return false;
27967 for (i = 0; i < nelt ; i += diff + 1)
27968 for (j = 0; j <= diff; j += 1)
27970 /* This is guaranteed to be true as the value of diff
27971 is 7, 3, 1 and we should have enough elements in the
27972 queue to generate this. Getting a vector mask with a
27973 value of diff other than these values implies that
27974 something is wrong by the time we get here. */
27975 gcc_assert (i + j < nelt);
27976 if (d->perm[i + j] != i + diff - j)
27977 return false;
27980 /* Success! */
27981 if (d->testing_p)
27982 return true;
27984 emit_insn (gen (d->target, d->op0));
27985 return true;
27988 /* Recognize patterns for the VTRN insns. */
27990 static bool
27991 arm_evpc_neon_vtrn (struct expand_vec_perm_d *d)
27993 unsigned int i, odd, mask, nelt = d->nelt;
27994 rtx out0, out1, in0, in1, x;
27995 rtx (*gen)(rtx, rtx, rtx, rtx);
27997 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
27998 return false;
28000 /* Note that these are little-endian tests. Adjust for big-endian later. */
28001 if (d->perm[0] == 0)
28002 odd = 0;
28003 else if (d->perm[0] == 1)
28004 odd = 1;
28005 else
28006 return false;
28007 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
28009 for (i = 0; i < nelt; i += 2)
28011 if (d->perm[i] != i + odd)
28012 return false;
28013 if (d->perm[i + 1] != ((i + nelt + odd) & mask))
28014 return false;
28017 /* Success! */
28018 if (d->testing_p)
28019 return true;
28021 switch (d->vmode)
28023 case V16QImode: gen = gen_neon_vtrnv16qi_internal; break;
28024 case V8QImode: gen = gen_neon_vtrnv8qi_internal; break;
28025 case V8HImode: gen = gen_neon_vtrnv8hi_internal; break;
28026 case V4HImode: gen = gen_neon_vtrnv4hi_internal; break;
28027 case V4SImode: gen = gen_neon_vtrnv4si_internal; break;
28028 case V2SImode: gen = gen_neon_vtrnv2si_internal; break;
28029 case V2SFmode: gen = gen_neon_vtrnv2sf_internal; break;
28030 case V4SFmode: gen = gen_neon_vtrnv4sf_internal; break;
28031 default:
28032 gcc_unreachable ();
28035 in0 = d->op0;
28036 in1 = d->op1;
28037 if (BYTES_BIG_ENDIAN)
28039 x = in0, in0 = in1, in1 = x;
28040 odd = !odd;
28043 out0 = d->target;
28044 out1 = gen_reg_rtx (d->vmode);
28045 if (odd)
28046 x = out0, out0 = out1, out1 = x;
28048 emit_insn (gen (out0, in0, in1, out1));
28049 return true;
28052 /* Recognize patterns for the VEXT insns. */
28054 static bool
28055 arm_evpc_neon_vext (struct expand_vec_perm_d *d)
28057 unsigned int i, nelt = d->nelt;
28058 rtx (*gen) (rtx, rtx, rtx, rtx);
28059 rtx offset;
28061 unsigned int location;
28063 unsigned int next = d->perm[0] + 1;
28065 /* TODO: Handle GCC's numbering of elements for big-endian. */
28066 if (BYTES_BIG_ENDIAN)
28067 return false;
28069 /* Check if the extracted indexes are increasing by one. */
28070 for (i = 1; i < nelt; next++, i++)
28072 /* If we hit the most significant element of the 2nd vector in
28073 the previous iteration, no need to test further. */
28074 if (next == 2 * nelt)
28075 return false;
28077 /* If we are operating on only one vector: it could be a
28078 rotation. If there are only two elements of size < 64, let
28079 arm_evpc_neon_vrev catch it. */
28080 if (d->one_vector_p && (next == nelt))
28082 if ((nelt == 2) && (d->vmode != V2DImode))
28083 return false;
28084 else
28085 next = 0;
28088 if (d->perm[i] != next)
28089 return false;
28092 location = d->perm[0];
28094 switch (d->vmode)
28096 case V16QImode: gen = gen_neon_vextv16qi; break;
28097 case V8QImode: gen = gen_neon_vextv8qi; break;
28098 case V4HImode: gen = gen_neon_vextv4hi; break;
28099 case V8HImode: gen = gen_neon_vextv8hi; break;
28100 case V2SImode: gen = gen_neon_vextv2si; break;
28101 case V4SImode: gen = gen_neon_vextv4si; break;
28102 case V2SFmode: gen = gen_neon_vextv2sf; break;
28103 case V4SFmode: gen = gen_neon_vextv4sf; break;
28104 case V2DImode: gen = gen_neon_vextv2di; break;
28105 default:
28106 return false;
28109 /* Success! */
28110 if (d->testing_p)
28111 return true;
28113 offset = GEN_INT (location);
28114 emit_insn (gen (d->target, d->op0, d->op1, offset));
28115 return true;
28118 /* The NEON VTBL instruction is a fully variable permuation that's even
28119 stronger than what we expose via VEC_PERM_EXPR. What it doesn't do
28120 is mask the index operand as VEC_PERM_EXPR requires. Therefore we
28121 can do slightly better by expanding this as a constant where we don't
28122 have to apply a mask. */
28124 static bool
28125 arm_evpc_neon_vtbl (struct expand_vec_perm_d *d)
28127 rtx rperm[MAX_VECT_LEN], sel;
28128 machine_mode vmode = d->vmode;
28129 unsigned int i, nelt = d->nelt;
28131 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
28132 numbering of elements for big-endian, we must reverse the order. */
28133 if (BYTES_BIG_ENDIAN)
28134 return false;
28136 if (d->testing_p)
28137 return true;
28139 /* Generic code will try constant permutation twice. Once with the
28140 original mode and again with the elements lowered to QImode.
28141 So wait and don't do the selector expansion ourselves. */
28142 if (vmode != V8QImode && vmode != V16QImode)
28143 return false;
28145 for (i = 0; i < nelt; ++i)
28146 rperm[i] = GEN_INT (d->perm[i]);
28147 sel = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
28148 sel = force_reg (vmode, sel);
28150 arm_expand_vec_perm_1 (d->target, d->op0, d->op1, sel);
28151 return true;
28154 static bool
28155 arm_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
28157 /* Check if the input mask matches vext before reordering the
28158 operands. */
28159 if (TARGET_NEON)
28160 if (arm_evpc_neon_vext (d))
28161 return true;
28163 /* The pattern matching functions above are written to look for a small
28164 number to begin the sequence (0, 1, N/2). If we begin with an index
28165 from the second operand, we can swap the operands. */
28166 if (d->perm[0] >= d->nelt)
28168 unsigned i, nelt = d->nelt;
28169 rtx x;
28171 for (i = 0; i < nelt; ++i)
28172 d->perm[i] = (d->perm[i] + nelt) & (2 * nelt - 1);
28174 x = d->op0;
28175 d->op0 = d->op1;
28176 d->op1 = x;
28179 if (TARGET_NEON)
28181 if (arm_evpc_neon_vuzp (d))
28182 return true;
28183 if (arm_evpc_neon_vzip (d))
28184 return true;
28185 if (arm_evpc_neon_vrev (d))
28186 return true;
28187 if (arm_evpc_neon_vtrn (d))
28188 return true;
28189 return arm_evpc_neon_vtbl (d);
28191 return false;
28194 /* Expand a vec_perm_const pattern. */
28196 bool
28197 arm_expand_vec_perm_const (rtx target, rtx op0, rtx op1, rtx sel)
28199 struct expand_vec_perm_d d;
28200 int i, nelt, which;
28202 d.target = target;
28203 d.op0 = op0;
28204 d.op1 = op1;
28206 d.vmode = GET_MODE (target);
28207 gcc_assert (VECTOR_MODE_P (d.vmode));
28208 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
28209 d.testing_p = false;
28211 for (i = which = 0; i < nelt; ++i)
28213 rtx e = XVECEXP (sel, 0, i);
28214 int ei = INTVAL (e) & (2 * nelt - 1);
28215 which |= (ei < nelt ? 1 : 2);
28216 d.perm[i] = ei;
28219 switch (which)
28221 default:
28222 gcc_unreachable();
28224 case 3:
28225 d.one_vector_p = false;
28226 if (!rtx_equal_p (op0, op1))
28227 break;
28229 /* The elements of PERM do not suggest that only the first operand
28230 is used, but both operands are identical. Allow easier matching
28231 of the permutation by folding the permutation into the single
28232 input vector. */
28233 /* FALLTHRU */
28234 case 2:
28235 for (i = 0; i < nelt; ++i)
28236 d.perm[i] &= nelt - 1;
28237 d.op0 = op1;
28238 d.one_vector_p = true;
28239 break;
28241 case 1:
28242 d.op1 = op0;
28243 d.one_vector_p = true;
28244 break;
28247 return arm_expand_vec_perm_const_1 (&d);
28250 /* Implement TARGET_VECTORIZE_VEC_PERM_CONST_OK. */
28252 static bool
28253 arm_vectorize_vec_perm_const_ok (machine_mode vmode,
28254 const unsigned char *sel)
28256 struct expand_vec_perm_d d;
28257 unsigned int i, nelt, which;
28258 bool ret;
28260 d.vmode = vmode;
28261 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
28262 d.testing_p = true;
28263 memcpy (d.perm, sel, nelt);
28265 /* Categorize the set of elements in the selector. */
28266 for (i = which = 0; i < nelt; ++i)
28268 unsigned char e = d.perm[i];
28269 gcc_assert (e < 2 * nelt);
28270 which |= (e < nelt ? 1 : 2);
28273 /* For all elements from second vector, fold the elements to first. */
28274 if (which == 2)
28275 for (i = 0; i < nelt; ++i)
28276 d.perm[i] -= nelt;
28278 /* Check whether the mask can be applied to the vector type. */
28279 d.one_vector_p = (which != 3);
28281 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
28282 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
28283 if (!d.one_vector_p)
28284 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
28286 start_sequence ();
28287 ret = arm_expand_vec_perm_const_1 (&d);
28288 end_sequence ();
28290 return ret;
28293 bool
28294 arm_autoinc_modes_ok_p (machine_mode mode, enum arm_auto_incmodes code)
28296 /* If we are soft float and we do not have ldrd
28297 then all auto increment forms are ok. */
28298 if (TARGET_SOFT_FLOAT && (TARGET_LDRD || GET_MODE_SIZE (mode) <= 4))
28299 return true;
28301 switch (code)
28303 /* Post increment and Pre Decrement are supported for all
28304 instruction forms except for vector forms. */
28305 case ARM_POST_INC:
28306 case ARM_PRE_DEC:
28307 if (VECTOR_MODE_P (mode))
28309 if (code != ARM_PRE_DEC)
28310 return true;
28311 else
28312 return false;
28315 return true;
28317 case ARM_POST_DEC:
28318 case ARM_PRE_INC:
28319 /* Without LDRD and mode size greater than
28320 word size, there is no point in auto-incrementing
28321 because ldm and stm will not have these forms. */
28322 if (!TARGET_LDRD && GET_MODE_SIZE (mode) > 4)
28323 return false;
28325 /* Vector and floating point modes do not support
28326 these auto increment forms. */
28327 if (FLOAT_MODE_P (mode) || VECTOR_MODE_P (mode))
28328 return false;
28330 return true;
28332 default:
28333 return false;
28337 return false;
28340 /* The default expansion of general 64-bit shifts in core-regs is suboptimal,
28341 on ARM, since we know that shifts by negative amounts are no-ops.
28342 Additionally, the default expansion code is not available or suitable
28343 for post-reload insn splits (this can occur when the register allocator
28344 chooses not to do a shift in NEON).
28346 This function is used in both initial expand and post-reload splits, and
28347 handles all kinds of 64-bit shifts.
28349 Input requirements:
28350 - It is safe for the input and output to be the same register, but
28351 early-clobber rules apply for the shift amount and scratch registers.
28352 - Shift by register requires both scratch registers. In all other cases
28353 the scratch registers may be NULL.
28354 - Ashiftrt by a register also clobbers the CC register. */
28355 void
28356 arm_emit_coreregs_64bit_shift (enum rtx_code code, rtx out, rtx in,
28357 rtx amount, rtx scratch1, rtx scratch2)
28359 rtx out_high = gen_highpart (SImode, out);
28360 rtx out_low = gen_lowpart (SImode, out);
28361 rtx in_high = gen_highpart (SImode, in);
28362 rtx in_low = gen_lowpart (SImode, in);
28364 /* Terminology:
28365 in = the register pair containing the input value.
28366 out = the destination register pair.
28367 up = the high- or low-part of each pair.
28368 down = the opposite part to "up".
28369 In a shift, we can consider bits to shift from "up"-stream to
28370 "down"-stream, so in a left-shift "up" is the low-part and "down"
28371 is the high-part of each register pair. */
28373 rtx out_up = code == ASHIFT ? out_low : out_high;
28374 rtx out_down = code == ASHIFT ? out_high : out_low;
28375 rtx in_up = code == ASHIFT ? in_low : in_high;
28376 rtx in_down = code == ASHIFT ? in_high : in_low;
28378 gcc_assert (code == ASHIFT || code == ASHIFTRT || code == LSHIFTRT);
28379 gcc_assert (out
28380 && (REG_P (out) || GET_CODE (out) == SUBREG)
28381 && GET_MODE (out) == DImode);
28382 gcc_assert (in
28383 && (REG_P (in) || GET_CODE (in) == SUBREG)
28384 && GET_MODE (in) == DImode);
28385 gcc_assert (amount
28386 && (((REG_P (amount) || GET_CODE (amount) == SUBREG)
28387 && GET_MODE (amount) == SImode)
28388 || CONST_INT_P (amount)));
28389 gcc_assert (scratch1 == NULL
28390 || (GET_CODE (scratch1) == SCRATCH)
28391 || (GET_MODE (scratch1) == SImode
28392 && REG_P (scratch1)));
28393 gcc_assert (scratch2 == NULL
28394 || (GET_CODE (scratch2) == SCRATCH)
28395 || (GET_MODE (scratch2) == SImode
28396 && REG_P (scratch2)));
28397 gcc_assert (!REG_P (out) || !REG_P (amount)
28398 || !HARD_REGISTER_P (out)
28399 || (REGNO (out) != REGNO (amount)
28400 && REGNO (out) + 1 != REGNO (amount)));
28402 /* Macros to make following code more readable. */
28403 #define SUB_32(DEST,SRC) \
28404 gen_addsi3 ((DEST), (SRC), GEN_INT (-32))
28405 #define RSB_32(DEST,SRC) \
28406 gen_subsi3 ((DEST), GEN_INT (32), (SRC))
28407 #define SUB_S_32(DEST,SRC) \
28408 gen_addsi3_compare0 ((DEST), (SRC), \
28409 GEN_INT (-32))
28410 #define SET(DEST,SRC) \
28411 gen_rtx_SET ((DEST), (SRC))
28412 #define SHIFT(CODE,SRC,AMOUNT) \
28413 gen_rtx_fmt_ee ((CODE), SImode, (SRC), (AMOUNT))
28414 #define LSHIFT(CODE,SRC,AMOUNT) \
28415 gen_rtx_fmt_ee ((CODE) == ASHIFT ? ASHIFT : LSHIFTRT, \
28416 SImode, (SRC), (AMOUNT))
28417 #define REV_LSHIFT(CODE,SRC,AMOUNT) \
28418 gen_rtx_fmt_ee ((CODE) == ASHIFT ? LSHIFTRT : ASHIFT, \
28419 SImode, (SRC), (AMOUNT))
28420 #define ORR(A,B) \
28421 gen_rtx_IOR (SImode, (A), (B))
28422 #define BRANCH(COND,LABEL) \
28423 gen_arm_cond_branch ((LABEL), \
28424 gen_rtx_ ## COND (CCmode, cc_reg, \
28425 const0_rtx), \
28426 cc_reg)
28428 /* Shifts by register and shifts by constant are handled separately. */
28429 if (CONST_INT_P (amount))
28431 /* We have a shift-by-constant. */
28433 /* First, handle out-of-range shift amounts.
28434 In both cases we try to match the result an ARM instruction in a
28435 shift-by-register would give. This helps reduce execution
28436 differences between optimization levels, but it won't stop other
28437 parts of the compiler doing different things. This is "undefined
28438 behaviour, in any case. */
28439 if (INTVAL (amount) <= 0)
28440 emit_insn (gen_movdi (out, in));
28441 else if (INTVAL (amount) >= 64)
28443 if (code == ASHIFTRT)
28445 rtx const31_rtx = GEN_INT (31);
28446 emit_insn (SET (out_down, SHIFT (code, in_up, const31_rtx)));
28447 emit_insn (SET (out_up, SHIFT (code, in_up, const31_rtx)));
28449 else
28450 emit_insn (gen_movdi (out, const0_rtx));
28453 /* Now handle valid shifts. */
28454 else if (INTVAL (amount) < 32)
28456 /* Shifts by a constant less than 32. */
28457 rtx reverse_amount = GEN_INT (32 - INTVAL (amount));
28459 emit_insn (SET (out_down, LSHIFT (code, in_down, amount)));
28460 emit_insn (SET (out_down,
28461 ORR (REV_LSHIFT (code, in_up, reverse_amount),
28462 out_down)));
28463 emit_insn (SET (out_up, SHIFT (code, in_up, amount)));
28465 else
28467 /* Shifts by a constant greater than 31. */
28468 rtx adj_amount = GEN_INT (INTVAL (amount) - 32);
28470 emit_insn (SET (out_down, SHIFT (code, in_up, adj_amount)));
28471 if (code == ASHIFTRT)
28472 emit_insn (gen_ashrsi3 (out_up, in_up,
28473 GEN_INT (31)));
28474 else
28475 emit_insn (SET (out_up, const0_rtx));
28478 else
28480 /* We have a shift-by-register. */
28481 rtx cc_reg = gen_rtx_REG (CC_NOOVmode, CC_REGNUM);
28483 /* This alternative requires the scratch registers. */
28484 gcc_assert (scratch1 && REG_P (scratch1));
28485 gcc_assert (scratch2 && REG_P (scratch2));
28487 /* We will need the values "amount-32" and "32-amount" later.
28488 Swapping them around now allows the later code to be more general. */
28489 switch (code)
28491 case ASHIFT:
28492 emit_insn (SUB_32 (scratch1, amount));
28493 emit_insn (RSB_32 (scratch2, amount));
28494 break;
28495 case ASHIFTRT:
28496 emit_insn (RSB_32 (scratch1, amount));
28497 /* Also set CC = amount > 32. */
28498 emit_insn (SUB_S_32 (scratch2, amount));
28499 break;
28500 case LSHIFTRT:
28501 emit_insn (RSB_32 (scratch1, amount));
28502 emit_insn (SUB_32 (scratch2, amount));
28503 break;
28504 default:
28505 gcc_unreachable ();
28508 /* Emit code like this:
28510 arithmetic-left:
28511 out_down = in_down << amount;
28512 out_down = (in_up << (amount - 32)) | out_down;
28513 out_down = ((unsigned)in_up >> (32 - amount)) | out_down;
28514 out_up = in_up << amount;
28516 arithmetic-right:
28517 out_down = in_down >> amount;
28518 out_down = (in_up << (32 - amount)) | out_down;
28519 if (amount < 32)
28520 out_down = ((signed)in_up >> (amount - 32)) | out_down;
28521 out_up = in_up << amount;
28523 logical-right:
28524 out_down = in_down >> amount;
28525 out_down = (in_up << (32 - amount)) | out_down;
28526 if (amount < 32)
28527 out_down = ((unsigned)in_up >> (amount - 32)) | out_down;
28528 out_up = in_up << amount;
28530 The ARM and Thumb2 variants are the same but implemented slightly
28531 differently. If this were only called during expand we could just
28532 use the Thumb2 case and let combine do the right thing, but this
28533 can also be called from post-reload splitters. */
28535 emit_insn (SET (out_down, LSHIFT (code, in_down, amount)));
28537 if (!TARGET_THUMB2)
28539 /* Emit code for ARM mode. */
28540 emit_insn (SET (out_down,
28541 ORR (SHIFT (ASHIFT, in_up, scratch1), out_down)));
28542 if (code == ASHIFTRT)
28544 rtx_code_label *done_label = gen_label_rtx ();
28545 emit_jump_insn (BRANCH (LT, done_label));
28546 emit_insn (SET (out_down, ORR (SHIFT (ASHIFTRT, in_up, scratch2),
28547 out_down)));
28548 emit_label (done_label);
28550 else
28551 emit_insn (SET (out_down, ORR (SHIFT (LSHIFTRT, in_up, scratch2),
28552 out_down)));
28554 else
28556 /* Emit code for Thumb2 mode.
28557 Thumb2 can't do shift and or in one insn. */
28558 emit_insn (SET (scratch1, SHIFT (ASHIFT, in_up, scratch1)));
28559 emit_insn (gen_iorsi3 (out_down, out_down, scratch1));
28561 if (code == ASHIFTRT)
28563 rtx_code_label *done_label = gen_label_rtx ();
28564 emit_jump_insn (BRANCH (LT, done_label));
28565 emit_insn (SET (scratch2, SHIFT (ASHIFTRT, in_up, scratch2)));
28566 emit_insn (SET (out_down, ORR (out_down, scratch2)));
28567 emit_label (done_label);
28569 else
28571 emit_insn (SET (scratch2, SHIFT (LSHIFTRT, in_up, scratch2)));
28572 emit_insn (gen_iorsi3 (out_down, out_down, scratch2));
28576 emit_insn (SET (out_up, SHIFT (code, in_up, amount)));
28579 #undef SUB_32
28580 #undef RSB_32
28581 #undef SUB_S_32
28582 #undef SET
28583 #undef SHIFT
28584 #undef LSHIFT
28585 #undef REV_LSHIFT
28586 #undef ORR
28587 #undef BRANCH
28591 /* Returns true if a valid comparison operation and makes
28592 the operands in a form that is valid. */
28593 bool
28594 arm_validize_comparison (rtx *comparison, rtx * op1, rtx * op2)
28596 enum rtx_code code = GET_CODE (*comparison);
28597 int code_int;
28598 machine_mode mode = (GET_MODE (*op1) == VOIDmode)
28599 ? GET_MODE (*op2) : GET_MODE (*op1);
28601 gcc_assert (GET_MODE (*op1) != VOIDmode || GET_MODE (*op2) != VOIDmode);
28603 if (code == UNEQ || code == LTGT)
28604 return false;
28606 code_int = (int)code;
28607 arm_canonicalize_comparison (&code_int, op1, op2, 0);
28608 PUT_CODE (*comparison, (enum rtx_code)code_int);
28610 switch (mode)
28612 case SImode:
28613 if (!arm_add_operand (*op1, mode))
28614 *op1 = force_reg (mode, *op1);
28615 if (!arm_add_operand (*op2, mode))
28616 *op2 = force_reg (mode, *op2);
28617 return true;
28619 case DImode:
28620 if (!cmpdi_operand (*op1, mode))
28621 *op1 = force_reg (mode, *op1);
28622 if (!cmpdi_operand (*op2, mode))
28623 *op2 = force_reg (mode, *op2);
28624 return true;
28626 case SFmode:
28627 case DFmode:
28628 if (!arm_float_compare_operand (*op1, mode))
28629 *op1 = force_reg (mode, *op1);
28630 if (!arm_float_compare_operand (*op2, mode))
28631 *op2 = force_reg (mode, *op2);
28632 return true;
28633 default:
28634 break;
28637 return false;
28641 /* Maximum number of instructions to set block of memory. */
28642 static int
28643 arm_block_set_max_insns (void)
28645 if (optimize_function_for_size_p (cfun))
28646 return 4;
28647 else
28648 return current_tune->max_insns_inline_memset;
28651 /* Return TRUE if it's profitable to set block of memory for
28652 non-vectorized case. VAL is the value to set the memory
28653 with. LENGTH is the number of bytes to set. ALIGN is the
28654 alignment of the destination memory in bytes. UNALIGNED_P
28655 is TRUE if we can only set the memory with instructions
28656 meeting alignment requirements. USE_STRD_P is TRUE if we
28657 can use strd to set the memory. */
28658 static bool
28659 arm_block_set_non_vect_profit_p (rtx val,
28660 unsigned HOST_WIDE_INT length,
28661 unsigned HOST_WIDE_INT align,
28662 bool unaligned_p, bool use_strd_p)
28664 int num = 0;
28665 /* For leftovers in bytes of 0-7, we can set the memory block using
28666 strb/strh/str with minimum instruction number. */
28667 const int leftover[8] = {0, 1, 1, 2, 1, 2, 2, 3};
28669 if (unaligned_p)
28671 num = arm_const_inline_cost (SET, val);
28672 num += length / align + length % align;
28674 else if (use_strd_p)
28676 num = arm_const_double_inline_cost (val);
28677 num += (length >> 3) + leftover[length & 7];
28679 else
28681 num = arm_const_inline_cost (SET, val);
28682 num += (length >> 2) + leftover[length & 3];
28685 /* We may be able to combine last pair STRH/STRB into a single STR
28686 by shifting one byte back. */
28687 if (unaligned_access && length > 3 && (length & 3) == 3)
28688 num--;
28690 return (num <= arm_block_set_max_insns ());
28693 /* Return TRUE if it's profitable to set block of memory for
28694 vectorized case. LENGTH is the number of bytes to set.
28695 ALIGN is the alignment of destination memory in bytes.
28696 MODE is the vector mode used to set the memory. */
28697 static bool
28698 arm_block_set_vect_profit_p (unsigned HOST_WIDE_INT length,
28699 unsigned HOST_WIDE_INT align,
28700 machine_mode mode)
28702 int num;
28703 bool unaligned_p = ((align & 3) != 0);
28704 unsigned int nelt = GET_MODE_NUNITS (mode);
28706 /* Instruction loading constant value. */
28707 num = 1;
28708 /* Instructions storing the memory. */
28709 num += (length + nelt - 1) / nelt;
28710 /* Instructions adjusting the address expression. Only need to
28711 adjust address expression if it's 4 bytes aligned and bytes
28712 leftover can only be stored by mis-aligned store instruction. */
28713 if (!unaligned_p && (length & 3) != 0)
28714 num++;
28716 /* Store the first 16 bytes using vst1:v16qi for the aligned case. */
28717 if (!unaligned_p && mode == V16QImode)
28718 num--;
28720 return (num <= arm_block_set_max_insns ());
28723 /* Set a block of memory using vectorization instructions for the
28724 unaligned case. We fill the first LENGTH bytes of the memory
28725 area starting from DSTBASE with byte constant VALUE. ALIGN is
28726 the alignment requirement of memory. Return TRUE if succeeded. */
28727 static bool
28728 arm_block_set_unaligned_vect (rtx dstbase,
28729 unsigned HOST_WIDE_INT length,
28730 unsigned HOST_WIDE_INT value,
28731 unsigned HOST_WIDE_INT align)
28733 unsigned int i, j, nelt_v16, nelt_v8, nelt_mode;
28734 rtx dst, mem;
28735 rtx val_elt, val_vec, reg;
28736 rtx rval[MAX_VECT_LEN];
28737 rtx (*gen_func) (rtx, rtx);
28738 machine_mode mode;
28739 unsigned HOST_WIDE_INT v = value;
28741 gcc_assert ((align & 0x3) != 0);
28742 nelt_v8 = GET_MODE_NUNITS (V8QImode);
28743 nelt_v16 = GET_MODE_NUNITS (V16QImode);
28744 if (length >= nelt_v16)
28746 mode = V16QImode;
28747 gen_func = gen_movmisalignv16qi;
28749 else
28751 mode = V8QImode;
28752 gen_func = gen_movmisalignv8qi;
28754 nelt_mode = GET_MODE_NUNITS (mode);
28755 gcc_assert (length >= nelt_mode);
28756 /* Skip if it isn't profitable. */
28757 if (!arm_block_set_vect_profit_p (length, align, mode))
28758 return false;
28760 dst = copy_addr_to_reg (XEXP (dstbase, 0));
28761 mem = adjust_automodify_address (dstbase, mode, dst, 0);
28763 v = sext_hwi (v, BITS_PER_WORD);
28764 val_elt = GEN_INT (v);
28765 for (j = 0; j < nelt_mode; j++)
28766 rval[j] = val_elt;
28768 reg = gen_reg_rtx (mode);
28769 val_vec = gen_rtx_CONST_VECTOR (mode, gen_rtvec_v (nelt_mode, rval));
28770 /* Emit instruction loading the constant value. */
28771 emit_move_insn (reg, val_vec);
28773 /* Handle nelt_mode bytes in a vector. */
28774 for (i = 0; (i + nelt_mode <= length); i += nelt_mode)
28776 emit_insn ((*gen_func) (mem, reg));
28777 if (i + 2 * nelt_mode <= length)
28778 emit_insn (gen_add2_insn (dst, GEN_INT (nelt_mode)));
28781 /* If there are not less than nelt_v8 bytes leftover, we must be in
28782 V16QI mode. */
28783 gcc_assert ((i + nelt_v8) > length || mode == V16QImode);
28785 /* Handle (8, 16) bytes leftover. */
28786 if (i + nelt_v8 < length)
28788 emit_insn (gen_add2_insn (dst, GEN_INT (length - i)));
28789 /* We are shifting bytes back, set the alignment accordingly. */
28790 if ((length & 1) != 0 && align >= 2)
28791 set_mem_align (mem, BITS_PER_UNIT);
28793 emit_insn (gen_movmisalignv16qi (mem, reg));
28795 /* Handle (0, 8] bytes leftover. */
28796 else if (i < length && i + nelt_v8 >= length)
28798 if (mode == V16QImode)
28800 reg = gen_lowpart (V8QImode, reg);
28801 mem = adjust_automodify_address (dstbase, V8QImode, dst, 0);
28803 emit_insn (gen_add2_insn (dst, GEN_INT ((length - i)
28804 + (nelt_mode - nelt_v8))));
28805 /* We are shifting bytes back, set the alignment accordingly. */
28806 if ((length & 1) != 0 && align >= 2)
28807 set_mem_align (mem, BITS_PER_UNIT);
28809 emit_insn (gen_movmisalignv8qi (mem, reg));
28812 return true;
28815 /* Set a block of memory using vectorization instructions for the
28816 aligned case. We fill the first LENGTH bytes of the memory area
28817 starting from DSTBASE with byte constant VALUE. ALIGN is the
28818 alignment requirement of memory. Return TRUE if succeeded. */
28819 static bool
28820 arm_block_set_aligned_vect (rtx dstbase,
28821 unsigned HOST_WIDE_INT length,
28822 unsigned HOST_WIDE_INT value,
28823 unsigned HOST_WIDE_INT align)
28825 unsigned int i, j, nelt_v8, nelt_v16, nelt_mode;
28826 rtx dst, addr, mem;
28827 rtx val_elt, val_vec, reg;
28828 rtx rval[MAX_VECT_LEN];
28829 machine_mode mode;
28830 unsigned HOST_WIDE_INT v = value;
28832 gcc_assert ((align & 0x3) == 0);
28833 nelt_v8 = GET_MODE_NUNITS (V8QImode);
28834 nelt_v16 = GET_MODE_NUNITS (V16QImode);
28835 if (length >= nelt_v16 && unaligned_access && !BYTES_BIG_ENDIAN)
28836 mode = V16QImode;
28837 else
28838 mode = V8QImode;
28840 nelt_mode = GET_MODE_NUNITS (mode);
28841 gcc_assert (length >= nelt_mode);
28842 /* Skip if it isn't profitable. */
28843 if (!arm_block_set_vect_profit_p (length, align, mode))
28844 return false;
28846 dst = copy_addr_to_reg (XEXP (dstbase, 0));
28848 v = sext_hwi (v, BITS_PER_WORD);
28849 val_elt = GEN_INT (v);
28850 for (j = 0; j < nelt_mode; j++)
28851 rval[j] = val_elt;
28853 reg = gen_reg_rtx (mode);
28854 val_vec = gen_rtx_CONST_VECTOR (mode, gen_rtvec_v (nelt_mode, rval));
28855 /* Emit instruction loading the constant value. */
28856 emit_move_insn (reg, val_vec);
28858 i = 0;
28859 /* Handle first 16 bytes specially using vst1:v16qi instruction. */
28860 if (mode == V16QImode)
28862 mem = adjust_automodify_address (dstbase, mode, dst, 0);
28863 emit_insn (gen_movmisalignv16qi (mem, reg));
28864 i += nelt_mode;
28865 /* Handle (8, 16) bytes leftover using vst1:v16qi again. */
28866 if (i + nelt_v8 < length && i + nelt_v16 > length)
28868 emit_insn (gen_add2_insn (dst, GEN_INT (length - nelt_mode)));
28869 mem = adjust_automodify_address (dstbase, mode, dst, 0);
28870 /* We are shifting bytes back, set the alignment accordingly. */
28871 if ((length & 0x3) == 0)
28872 set_mem_align (mem, BITS_PER_UNIT * 4);
28873 else if ((length & 0x1) == 0)
28874 set_mem_align (mem, BITS_PER_UNIT * 2);
28875 else
28876 set_mem_align (mem, BITS_PER_UNIT);
28878 emit_insn (gen_movmisalignv16qi (mem, reg));
28879 return true;
28881 /* Fall through for bytes leftover. */
28882 mode = V8QImode;
28883 nelt_mode = GET_MODE_NUNITS (mode);
28884 reg = gen_lowpart (V8QImode, reg);
28887 /* Handle 8 bytes in a vector. */
28888 for (; (i + nelt_mode <= length); i += nelt_mode)
28890 addr = plus_constant (Pmode, dst, i);
28891 mem = adjust_automodify_address (dstbase, mode, addr, i);
28892 emit_move_insn (mem, reg);
28895 /* Handle single word leftover by shifting 4 bytes back. We can
28896 use aligned access for this case. */
28897 if (i + UNITS_PER_WORD == length)
28899 addr = plus_constant (Pmode, dst, i - UNITS_PER_WORD);
28900 mem = adjust_automodify_address (dstbase, mode,
28901 addr, i - UNITS_PER_WORD);
28902 /* We are shifting 4 bytes back, set the alignment accordingly. */
28903 if (align > UNITS_PER_WORD)
28904 set_mem_align (mem, BITS_PER_UNIT * UNITS_PER_WORD);
28906 emit_move_insn (mem, reg);
28908 /* Handle (0, 4), (4, 8) bytes leftover by shifting bytes back.
28909 We have to use unaligned access for this case. */
28910 else if (i < length)
28912 emit_insn (gen_add2_insn (dst, GEN_INT (length - nelt_mode)));
28913 mem = adjust_automodify_address (dstbase, mode, dst, 0);
28914 /* We are shifting bytes back, set the alignment accordingly. */
28915 if ((length & 1) == 0)
28916 set_mem_align (mem, BITS_PER_UNIT * 2);
28917 else
28918 set_mem_align (mem, BITS_PER_UNIT);
28920 emit_insn (gen_movmisalignv8qi (mem, reg));
28923 return true;
28926 /* Set a block of memory using plain strh/strb instructions, only
28927 using instructions allowed by ALIGN on processor. We fill the
28928 first LENGTH bytes of the memory area starting from DSTBASE
28929 with byte constant VALUE. ALIGN is the alignment requirement
28930 of memory. */
28931 static bool
28932 arm_block_set_unaligned_non_vect (rtx dstbase,
28933 unsigned HOST_WIDE_INT length,
28934 unsigned HOST_WIDE_INT value,
28935 unsigned HOST_WIDE_INT align)
28937 unsigned int i;
28938 rtx dst, addr, mem;
28939 rtx val_exp, val_reg, reg;
28940 machine_mode mode;
28941 HOST_WIDE_INT v = value;
28943 gcc_assert (align == 1 || align == 2);
28945 if (align == 2)
28946 v |= (value << BITS_PER_UNIT);
28948 v = sext_hwi (v, BITS_PER_WORD);
28949 val_exp = GEN_INT (v);
28950 /* Skip if it isn't profitable. */
28951 if (!arm_block_set_non_vect_profit_p (val_exp, length,
28952 align, true, false))
28953 return false;
28955 dst = copy_addr_to_reg (XEXP (dstbase, 0));
28956 mode = (align == 2 ? HImode : QImode);
28957 val_reg = force_reg (SImode, val_exp);
28958 reg = gen_lowpart (mode, val_reg);
28960 for (i = 0; (i + GET_MODE_SIZE (mode) <= length); i += GET_MODE_SIZE (mode))
28962 addr = plus_constant (Pmode, dst, i);
28963 mem = adjust_automodify_address (dstbase, mode, addr, i);
28964 emit_move_insn (mem, reg);
28967 /* Handle single byte leftover. */
28968 if (i + 1 == length)
28970 reg = gen_lowpart (QImode, val_reg);
28971 addr = plus_constant (Pmode, dst, i);
28972 mem = adjust_automodify_address (dstbase, QImode, addr, i);
28973 emit_move_insn (mem, reg);
28974 i++;
28977 gcc_assert (i == length);
28978 return true;
28981 /* Set a block of memory using plain strd/str/strh/strb instructions,
28982 to permit unaligned copies on processors which support unaligned
28983 semantics for those instructions. We fill the first LENGTH bytes
28984 of the memory area starting from DSTBASE with byte constant VALUE.
28985 ALIGN is the alignment requirement of memory. */
28986 static bool
28987 arm_block_set_aligned_non_vect (rtx dstbase,
28988 unsigned HOST_WIDE_INT length,
28989 unsigned HOST_WIDE_INT value,
28990 unsigned HOST_WIDE_INT align)
28992 unsigned int i;
28993 rtx dst, addr, mem;
28994 rtx val_exp, val_reg, reg;
28995 unsigned HOST_WIDE_INT v;
28996 bool use_strd_p;
28998 use_strd_p = (length >= 2 * UNITS_PER_WORD && (align & 3) == 0
28999 && TARGET_LDRD && current_tune->prefer_ldrd_strd);
29001 v = (value | (value << 8) | (value << 16) | (value << 24));
29002 if (length < UNITS_PER_WORD)
29003 v &= (0xFFFFFFFF >> (UNITS_PER_WORD - length) * BITS_PER_UNIT);
29005 if (use_strd_p)
29006 v |= (v << BITS_PER_WORD);
29007 else
29008 v = sext_hwi (v, BITS_PER_WORD);
29010 val_exp = GEN_INT (v);
29011 /* Skip if it isn't profitable. */
29012 if (!arm_block_set_non_vect_profit_p (val_exp, length,
29013 align, false, use_strd_p))
29015 if (!use_strd_p)
29016 return false;
29018 /* Try without strd. */
29019 v = (v >> BITS_PER_WORD);
29020 v = sext_hwi (v, BITS_PER_WORD);
29021 val_exp = GEN_INT (v);
29022 use_strd_p = false;
29023 if (!arm_block_set_non_vect_profit_p (val_exp, length,
29024 align, false, use_strd_p))
29025 return false;
29028 i = 0;
29029 dst = copy_addr_to_reg (XEXP (dstbase, 0));
29030 /* Handle double words using strd if possible. */
29031 if (use_strd_p)
29033 val_reg = force_reg (DImode, val_exp);
29034 reg = val_reg;
29035 for (; (i + 8 <= length); i += 8)
29037 addr = plus_constant (Pmode, dst, i);
29038 mem = adjust_automodify_address (dstbase, DImode, addr, i);
29039 emit_move_insn (mem, reg);
29042 else
29043 val_reg = force_reg (SImode, val_exp);
29045 /* Handle words. */
29046 reg = (use_strd_p ? gen_lowpart (SImode, val_reg) : val_reg);
29047 for (; (i + 4 <= length); i += 4)
29049 addr = plus_constant (Pmode, dst, i);
29050 mem = adjust_automodify_address (dstbase, SImode, addr, i);
29051 if ((align & 3) == 0)
29052 emit_move_insn (mem, reg);
29053 else
29054 emit_insn (gen_unaligned_storesi (mem, reg));
29057 /* Merge last pair of STRH and STRB into a STR if possible. */
29058 if (unaligned_access && i > 0 && (i + 3) == length)
29060 addr = plus_constant (Pmode, dst, i - 1);
29061 mem = adjust_automodify_address (dstbase, SImode, addr, i - 1);
29062 /* We are shifting one byte back, set the alignment accordingly. */
29063 if ((align & 1) == 0)
29064 set_mem_align (mem, BITS_PER_UNIT);
29066 /* Most likely this is an unaligned access, and we can't tell at
29067 compilation time. */
29068 emit_insn (gen_unaligned_storesi (mem, reg));
29069 return true;
29072 /* Handle half word leftover. */
29073 if (i + 2 <= length)
29075 reg = gen_lowpart (HImode, val_reg);
29076 addr = plus_constant (Pmode, dst, i);
29077 mem = adjust_automodify_address (dstbase, HImode, addr, i);
29078 if ((align & 1) == 0)
29079 emit_move_insn (mem, reg);
29080 else
29081 emit_insn (gen_unaligned_storehi (mem, reg));
29083 i += 2;
29086 /* Handle single byte leftover. */
29087 if (i + 1 == length)
29089 reg = gen_lowpart (QImode, val_reg);
29090 addr = plus_constant (Pmode, dst, i);
29091 mem = adjust_automodify_address (dstbase, QImode, addr, i);
29092 emit_move_insn (mem, reg);
29095 return true;
29098 /* Set a block of memory using vectorization instructions for both
29099 aligned and unaligned cases. We fill the first LENGTH bytes of
29100 the memory area starting from DSTBASE with byte constant VALUE.
29101 ALIGN is the alignment requirement of memory. */
29102 static bool
29103 arm_block_set_vect (rtx dstbase,
29104 unsigned HOST_WIDE_INT length,
29105 unsigned HOST_WIDE_INT value,
29106 unsigned HOST_WIDE_INT align)
29108 /* Check whether we need to use unaligned store instruction. */
29109 if (((align & 3) != 0 || (length & 3) != 0)
29110 /* Check whether unaligned store instruction is available. */
29111 && (!unaligned_access || BYTES_BIG_ENDIAN))
29112 return false;
29114 if ((align & 3) == 0)
29115 return arm_block_set_aligned_vect (dstbase, length, value, align);
29116 else
29117 return arm_block_set_unaligned_vect (dstbase, length, value, align);
29120 /* Expand string store operation. Firstly we try to do that by using
29121 vectorization instructions, then try with ARM unaligned access and
29122 double-word store if profitable. OPERANDS[0] is the destination,
29123 OPERANDS[1] is the number of bytes, operands[2] is the value to
29124 initialize the memory, OPERANDS[3] is the known alignment of the
29125 destination. */
29126 bool
29127 arm_gen_setmem (rtx *operands)
29129 rtx dstbase = operands[0];
29130 unsigned HOST_WIDE_INT length;
29131 unsigned HOST_WIDE_INT value;
29132 unsigned HOST_WIDE_INT align;
29134 if (!CONST_INT_P (operands[2]) || !CONST_INT_P (operands[1]))
29135 return false;
29137 length = UINTVAL (operands[1]);
29138 if (length > 64)
29139 return false;
29141 value = (UINTVAL (operands[2]) & 0xFF);
29142 align = UINTVAL (operands[3]);
29143 if (TARGET_NEON && length >= 8
29144 && current_tune->string_ops_prefer_neon
29145 && arm_block_set_vect (dstbase, length, value, align))
29146 return true;
29148 if (!unaligned_access && (align & 3) != 0)
29149 return arm_block_set_unaligned_non_vect (dstbase, length, value, align);
29151 return arm_block_set_aligned_non_vect (dstbase, length, value, align);
29155 static bool
29156 arm_macro_fusion_p (void)
29158 return current_tune->fuseable_ops != tune_params::FUSE_NOTHING;
29162 static bool
29163 aarch_macro_fusion_pair_p (rtx_insn* prev, rtx_insn* curr)
29165 rtx set_dest;
29166 rtx prev_set = single_set (prev);
29167 rtx curr_set = single_set (curr);
29169 if (!prev_set
29170 || !curr_set)
29171 return false;
29173 if (any_condjump_p (curr))
29174 return false;
29176 if (!arm_macro_fusion_p ())
29177 return false;
29179 if (current_tune->fuseable_ops & tune_params::FUSE_MOVW_MOVT)
29181 /* We are trying to fuse
29182 movw imm / movt imm
29183 instructions as a group that gets scheduled together. */
29185 set_dest = SET_DEST (curr_set);
29187 if (GET_MODE (set_dest) != SImode)
29188 return false;
29190 /* We are trying to match:
29191 prev (movw) == (set (reg r0) (const_int imm16))
29192 curr (movt) == (set (zero_extract (reg r0)
29193 (const_int 16)
29194 (const_int 16))
29195 (const_int imm16_1))
29197 prev (movw) == (set (reg r1)
29198 (high (symbol_ref ("SYM"))))
29199 curr (movt) == (set (reg r0)
29200 (lo_sum (reg r1)
29201 (symbol_ref ("SYM")))) */
29202 if (GET_CODE (set_dest) == ZERO_EXTRACT)
29204 if (CONST_INT_P (SET_SRC (curr_set))
29205 && CONST_INT_P (SET_SRC (prev_set))
29206 && REG_P (XEXP (set_dest, 0))
29207 && REG_P (SET_DEST (prev_set))
29208 && REGNO (XEXP (set_dest, 0)) == REGNO (SET_DEST (prev_set)))
29209 return true;
29211 else if (GET_CODE (SET_SRC (curr_set)) == LO_SUM
29212 && REG_P (SET_DEST (curr_set))
29213 && REG_P (SET_DEST (prev_set))
29214 && GET_CODE (SET_SRC (prev_set)) == HIGH
29215 && REGNO (SET_DEST (curr_set)) == REGNO (SET_DEST (prev_set)))
29216 return true;
29218 return false;
29221 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
29223 static unsigned HOST_WIDE_INT
29224 arm_asan_shadow_offset (void)
29226 return (unsigned HOST_WIDE_INT) 1 << 29;
29230 /* This is a temporary fix for PR60655. Ideally we need
29231 to handle most of these cases in the generic part but
29232 currently we reject minus (..) (sym_ref). We try to
29233 ameliorate the case with minus (sym_ref1) (sym_ref2)
29234 where they are in the same section. */
29236 static bool
29237 arm_const_not_ok_for_debug_p (rtx p)
29239 tree decl_op0 = NULL;
29240 tree decl_op1 = NULL;
29242 if (GET_CODE (p) == MINUS)
29244 if (GET_CODE (XEXP (p, 1)) == SYMBOL_REF)
29246 decl_op1 = SYMBOL_REF_DECL (XEXP (p, 1));
29247 if (decl_op1
29248 && GET_CODE (XEXP (p, 0)) == SYMBOL_REF
29249 && (decl_op0 = SYMBOL_REF_DECL (XEXP (p, 0))))
29251 if ((TREE_CODE (decl_op1) == VAR_DECL
29252 || TREE_CODE (decl_op1) == CONST_DECL)
29253 && (TREE_CODE (decl_op0) == VAR_DECL
29254 || TREE_CODE (decl_op0) == CONST_DECL))
29255 return (get_variable_section (decl_op1, false)
29256 != get_variable_section (decl_op0, false));
29258 if (TREE_CODE (decl_op1) == LABEL_DECL
29259 && TREE_CODE (decl_op0) == LABEL_DECL)
29260 return (DECL_CONTEXT (decl_op1)
29261 != DECL_CONTEXT (decl_op0));
29264 return true;
29268 return false;
29271 /* return TRUE if x is a reference to a value in a constant pool */
29272 extern bool
29273 arm_is_constant_pool_ref (rtx x)
29275 return (MEM_P (x)
29276 && GET_CODE (XEXP (x, 0)) == SYMBOL_REF
29277 && CONSTANT_POOL_ADDRESS_P (XEXP (x, 0)));
29280 void
29281 arm_declare_function_name (FILE *stream, const char *name, tree decl)
29283 if (TARGET_THUMB)
29285 if (is_called_in_ARM_mode (decl)
29286 || (TARGET_THUMB1 && !TARGET_THUMB1_ONLY
29287 && cfun->is_thunk))
29288 fprintf (stream, "\t.code 32\n");
29289 else if (TARGET_THUMB1)
29290 fprintf (stream, "\t.code\t16\n\t.thumb_func\n");
29291 else
29292 fprintf (stream, "\t.thumb\n\t.thumb_func\n");
29295 if (TARGET_POKE_FUNCTION_NAME)
29296 arm_poke_function_name (stream, (const char *) name);
29299 /* If MEM is in the form of [base+offset], extract the two parts
29300 of address and set to BASE and OFFSET, otherwise return false
29301 after clearing BASE and OFFSET. */
29303 static bool
29304 extract_base_offset_in_addr (rtx mem, rtx *base, rtx *offset)
29306 rtx addr;
29308 gcc_assert (MEM_P (mem));
29310 addr = XEXP (mem, 0);
29312 /* Strip off const from addresses like (const (addr)). */
29313 if (GET_CODE (addr) == CONST)
29314 addr = XEXP (addr, 0);
29316 if (GET_CODE (addr) == REG)
29318 *base = addr;
29319 *offset = const0_rtx;
29320 return true;
29323 if (GET_CODE (addr) == PLUS
29324 && GET_CODE (XEXP (addr, 0)) == REG
29325 && CONST_INT_P (XEXP (addr, 1)))
29327 *base = XEXP (addr, 0);
29328 *offset = XEXP (addr, 1);
29329 return true;
29332 *base = NULL_RTX;
29333 *offset = NULL_RTX;
29335 return false;
29338 /* If INSN is a load or store of address in the form of [base+offset],
29339 extract the two parts and set to BASE and OFFSET. IS_LOAD is set
29340 to TRUE if it's a load. Return TRUE if INSN is such an instruction,
29341 otherwise return FALSE. */
29343 static bool
29344 fusion_load_store (rtx_insn *insn, rtx *base, rtx *offset, bool *is_load)
29346 rtx x, dest, src;
29348 gcc_assert (INSN_P (insn));
29349 x = PATTERN (insn);
29350 if (GET_CODE (x) != SET)
29351 return false;
29353 src = SET_SRC (x);
29354 dest = SET_DEST (x);
29355 if (GET_CODE (src) == REG && GET_CODE (dest) == MEM)
29357 *is_load = false;
29358 extract_base_offset_in_addr (dest, base, offset);
29360 else if (GET_CODE (src) == MEM && GET_CODE (dest) == REG)
29362 *is_load = true;
29363 extract_base_offset_in_addr (src, base, offset);
29365 else
29366 return false;
29368 return (*base != NULL_RTX && *offset != NULL_RTX);
29371 /* Implement the TARGET_SCHED_FUSION_PRIORITY hook.
29373 Currently we only support to fuse ldr or str instructions, so FUSION_PRI
29374 and PRI are only calculated for these instructions. For other instruction,
29375 FUSION_PRI and PRI are simply set to MAX_PRI. In the future, other kind
29376 instruction fusion can be supported by returning different priorities.
29378 It's important that irrelevant instructions get the largest FUSION_PRI. */
29380 static void
29381 arm_sched_fusion_priority (rtx_insn *insn, int max_pri,
29382 int *fusion_pri, int *pri)
29384 int tmp, off_val;
29385 bool is_load;
29386 rtx base, offset;
29388 gcc_assert (INSN_P (insn));
29390 tmp = max_pri - 1;
29391 if (!fusion_load_store (insn, &base, &offset, &is_load))
29393 *pri = tmp;
29394 *fusion_pri = tmp;
29395 return;
29398 /* Load goes first. */
29399 if (is_load)
29400 *fusion_pri = tmp - 1;
29401 else
29402 *fusion_pri = tmp - 2;
29404 tmp /= 2;
29406 /* INSN with smaller base register goes first. */
29407 tmp -= ((REGNO (base) & 0xff) << 20);
29409 /* INSN with smaller offset goes first. */
29410 off_val = (int)(INTVAL (offset));
29411 if (off_val >= 0)
29412 tmp -= (off_val & 0xfffff);
29413 else
29414 tmp += ((- off_val) & 0xfffff);
29416 *pri = tmp;
29417 return;
29419 #include "gt-arm.h"