* common.opt (-Wattributes): New. Default true.
[official-gcc.git] / gcc / config / arm / arm.c
blobf2266c7c9910159c2531276b656c0369f0694866
1 /* Output routines for GCC for ARM.
2 Copyright (C) 1991, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004, 2005 Free Software Foundation, Inc.
4 Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
5 and Martin Simmons (@harleqn.co.uk).
6 More major hacks by Richard Earnshaw (rearnsha@arm.com).
8 This file is part of GCC.
10 GCC is free software; you can redistribute it and/or modify it
11 under the terms of the GNU General Public License as published
12 by the Free Software Foundation; either version 2, or (at your
13 option) any later version.
15 GCC is distributed in the hope that it will be useful, but WITHOUT
16 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
17 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
18 License for more details.
20 You should have received a copy of the GNU General Public License
21 along with GCC; see the file COPYING. If not, write to
22 the Free Software Foundation, 59 Temple Place - Suite 330,
23 Boston, MA 02111-1307, USA. */
25 #include "config.h"
26 #include "system.h"
27 #include "coretypes.h"
28 #include "tm.h"
29 #include "rtl.h"
30 #include "tree.h"
31 #include "obstack.h"
32 #include "regs.h"
33 #include "hard-reg-set.h"
34 #include "real.h"
35 #include "insn-config.h"
36 #include "conditions.h"
37 #include "output.h"
38 #include "insn-attr.h"
39 #include "flags.h"
40 #include "reload.h"
41 #include "function.h"
42 #include "expr.h"
43 #include "optabs.h"
44 #include "toplev.h"
45 #include "recog.h"
46 #include "ggc.h"
47 #include "except.h"
48 #include "c-pragma.h"
49 #include "integrate.h"
50 #include "tm_p.h"
51 #include "target.h"
52 #include "target-def.h"
53 #include "debug.h"
54 #include "langhooks.h"
56 /* Forward definitions of types. */
57 typedef struct minipool_node Mnode;
58 typedef struct minipool_fixup Mfix;
60 const struct attribute_spec arm_attribute_table[];
62 /* Forward function declarations. */
63 static arm_stack_offsets *arm_get_frame_offsets (void);
64 static void arm_add_gc_roots (void);
65 static int arm_gen_constant (enum rtx_code, enum machine_mode, rtx,
66 HOST_WIDE_INT, rtx, rtx, int, int);
67 static unsigned bit_count (unsigned long);
68 static int arm_address_register_rtx_p (rtx, int);
69 static int arm_legitimate_index_p (enum machine_mode, rtx, RTX_CODE, int);
70 static int thumb_base_register_rtx_p (rtx, enum machine_mode, int);
71 inline static int thumb_index_register_rtx_p (rtx, int);
72 static int thumb_far_jump_used_p (void);
73 static bool thumb_force_lr_save (void);
74 static int const_ok_for_op (HOST_WIDE_INT, enum rtx_code);
75 static rtx emit_sfm (int, int);
76 static int arm_size_return_regs (void);
77 #ifndef AOF_ASSEMBLER
78 static bool arm_assemble_integer (rtx, unsigned int, int);
79 #endif
80 static const char *fp_const_from_val (REAL_VALUE_TYPE *);
81 static arm_cc get_arm_condition_code (rtx);
82 static HOST_WIDE_INT int_log2 (HOST_WIDE_INT);
83 static rtx is_jump_table (rtx);
84 static const char *output_multi_immediate (rtx *, const char *, const char *,
85 int, HOST_WIDE_INT);
86 static const char *shift_op (rtx, HOST_WIDE_INT *);
87 static struct machine_function *arm_init_machine_status (void);
88 static void thumb_exit (FILE *, int);
89 static rtx is_jump_table (rtx);
90 static HOST_WIDE_INT get_jump_table_size (rtx);
91 static Mnode *move_minipool_fix_forward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
92 static Mnode *add_minipool_forward_ref (Mfix *);
93 static Mnode *move_minipool_fix_backward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
94 static Mnode *add_minipool_backward_ref (Mfix *);
95 static void assign_minipool_offsets (Mfix *);
96 static void arm_print_value (FILE *, rtx);
97 static void dump_minipool (rtx);
98 static int arm_barrier_cost (rtx);
99 static Mfix *create_fix_barrier (Mfix *, HOST_WIDE_INT);
100 static void push_minipool_barrier (rtx, HOST_WIDE_INT);
101 static void push_minipool_fix (rtx, HOST_WIDE_INT, rtx *, enum machine_mode,
102 rtx);
103 static void arm_reorg (void);
104 static bool note_invalid_constants (rtx, HOST_WIDE_INT, int);
105 static int current_file_function_operand (rtx);
106 static unsigned long arm_compute_save_reg0_reg12_mask (void);
107 static unsigned long arm_compute_save_reg_mask (void);
108 static unsigned long arm_isr_value (tree);
109 static unsigned long arm_compute_func_type (void);
110 static tree arm_handle_fndecl_attribute (tree *, tree, tree, int, bool *);
111 static tree arm_handle_isr_attribute (tree *, tree, tree, int, bool *);
112 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
113 static tree arm_handle_notshared_attribute (tree *, tree, tree, int, bool *);
114 #endif
115 static void arm_output_function_epilogue (FILE *, HOST_WIDE_INT);
116 static void arm_output_function_prologue (FILE *, HOST_WIDE_INT);
117 static void thumb_output_function_prologue (FILE *, HOST_WIDE_INT);
118 static int arm_comp_type_attributes (tree, tree);
119 static void arm_set_default_type_attributes (tree);
120 static int arm_adjust_cost (rtx, rtx, rtx, int);
121 static int count_insns_for_constant (HOST_WIDE_INT, int);
122 static int arm_get_strip_length (int);
123 static bool arm_function_ok_for_sibcall (tree, tree);
124 static void arm_internal_label (FILE *, const char *, unsigned long);
125 static void arm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT,
126 tree);
127 static int arm_rtx_costs_1 (rtx, enum rtx_code, enum rtx_code);
128 static bool arm_size_rtx_costs (rtx, int, int, int *);
129 static bool arm_slowmul_rtx_costs (rtx, int, int, int *);
130 static bool arm_fastmul_rtx_costs (rtx, int, int, int *);
131 static bool arm_xscale_rtx_costs (rtx, int, int, int *);
132 static bool arm_9e_rtx_costs (rtx, int, int, int *);
133 static int arm_address_cost (rtx);
134 static bool arm_memory_load_p (rtx);
135 static bool arm_cirrus_insn_p (rtx);
136 static void cirrus_reorg (rtx);
137 static void arm_init_builtins (void);
138 static rtx arm_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
139 static void arm_init_iwmmxt_builtins (void);
140 static rtx safe_vector_operand (rtx, enum machine_mode);
141 static rtx arm_expand_binop_builtin (enum insn_code, tree, rtx);
142 static rtx arm_expand_unop_builtin (enum insn_code, tree, rtx, int);
143 static rtx arm_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
144 static void emit_constant_insn (rtx cond, rtx pattern);
145 static int arm_arg_partial_bytes (CUMULATIVE_ARGS *, enum machine_mode,
146 tree, bool);
148 #ifdef OBJECT_FORMAT_ELF
149 static void arm_elf_asm_constructor (rtx, int);
150 #endif
151 #ifndef ARM_PE
152 static void arm_encode_section_info (tree, rtx, int);
153 #endif
155 static void arm_file_end (void);
157 #ifdef AOF_ASSEMBLER
158 static void aof_globalize_label (FILE *, const char *);
159 static void aof_dump_imports (FILE *);
160 static void aof_dump_pic_table (FILE *);
161 static void aof_file_start (void);
162 static void aof_file_end (void);
163 #endif
164 static rtx arm_struct_value_rtx (tree, int);
165 static void arm_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode,
166 tree, int *, int);
167 static bool arm_pass_by_reference (CUMULATIVE_ARGS *,
168 enum machine_mode, tree, bool);
169 static bool arm_promote_prototypes (tree);
170 static bool arm_default_short_enums (void);
171 static bool arm_align_anon_bitfield (void);
172 static bool arm_return_in_msb (tree);
173 static bool arm_must_pass_in_stack (enum machine_mode, tree);
175 static tree arm_cxx_guard_type (void);
176 static bool arm_cxx_guard_mask_bit (void);
177 static tree arm_get_cookie_size (tree);
178 static bool arm_cookie_has_size (void);
179 static bool arm_cxx_cdtor_returns_this (void);
180 static bool arm_cxx_key_method_may_be_inline (void);
181 static void arm_cxx_determine_class_data_visibility (tree);
182 static bool arm_cxx_class_data_always_comdat (void);
183 static bool arm_cxx_use_aeabi_atexit (void);
184 static void arm_init_libfuncs (void);
185 static bool arm_handle_option (size_t, const char *, int);
186 static unsigned HOST_WIDE_INT arm_shift_truncation_mask (enum machine_mode);
188 /* Initialize the GCC target structure. */
189 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
190 #undef TARGET_MERGE_DECL_ATTRIBUTES
191 #define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
192 #endif
194 #undef TARGET_ATTRIBUTE_TABLE
195 #define TARGET_ATTRIBUTE_TABLE arm_attribute_table
197 #undef TARGET_ASM_FILE_END
198 #define TARGET_ASM_FILE_END arm_file_end
200 #ifdef AOF_ASSEMBLER
201 #undef TARGET_ASM_BYTE_OP
202 #define TARGET_ASM_BYTE_OP "\tDCB\t"
203 #undef TARGET_ASM_ALIGNED_HI_OP
204 #define TARGET_ASM_ALIGNED_HI_OP "\tDCW\t"
205 #undef TARGET_ASM_ALIGNED_SI_OP
206 #define TARGET_ASM_ALIGNED_SI_OP "\tDCD\t"
207 #undef TARGET_ASM_GLOBALIZE_LABEL
208 #define TARGET_ASM_GLOBALIZE_LABEL aof_globalize_label
209 #undef TARGET_ASM_FILE_START
210 #define TARGET_ASM_FILE_START aof_file_start
211 #undef TARGET_ASM_FILE_END
212 #define TARGET_ASM_FILE_END aof_file_end
213 #else
214 #undef TARGET_ASM_ALIGNED_SI_OP
215 #define TARGET_ASM_ALIGNED_SI_OP NULL
216 #undef TARGET_ASM_INTEGER
217 #define TARGET_ASM_INTEGER arm_assemble_integer
218 #endif
220 #undef TARGET_ASM_FUNCTION_PROLOGUE
221 #define TARGET_ASM_FUNCTION_PROLOGUE arm_output_function_prologue
223 #undef TARGET_ASM_FUNCTION_EPILOGUE
224 #define TARGET_ASM_FUNCTION_EPILOGUE arm_output_function_epilogue
226 #undef TARGET_DEFAULT_TARGET_FLAGS
227 #define TARGET_DEFAULT_TARGET_FLAGS (TARGET_DEFAULT | MASK_SCHED_PROLOG)
228 #undef TARGET_HANDLE_OPTION
229 #define TARGET_HANDLE_OPTION arm_handle_option
231 #undef TARGET_COMP_TYPE_ATTRIBUTES
232 #define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes
234 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
235 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES arm_set_default_type_attributes
237 #undef TARGET_SCHED_ADJUST_COST
238 #define TARGET_SCHED_ADJUST_COST arm_adjust_cost
240 #undef TARGET_ENCODE_SECTION_INFO
241 #ifdef ARM_PE
242 #define TARGET_ENCODE_SECTION_INFO arm_pe_encode_section_info
243 #else
244 #define TARGET_ENCODE_SECTION_INFO arm_encode_section_info
245 #endif
247 #undef TARGET_STRIP_NAME_ENCODING
248 #define TARGET_STRIP_NAME_ENCODING arm_strip_name_encoding
250 #undef TARGET_ASM_INTERNAL_LABEL
251 #define TARGET_ASM_INTERNAL_LABEL arm_internal_label
253 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
254 #define TARGET_FUNCTION_OK_FOR_SIBCALL arm_function_ok_for_sibcall
256 #undef TARGET_ASM_OUTPUT_MI_THUNK
257 #define TARGET_ASM_OUTPUT_MI_THUNK arm_output_mi_thunk
258 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
259 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK default_can_output_mi_thunk_no_vcall
261 /* This will be overridden in arm_override_options. */
262 #undef TARGET_RTX_COSTS
263 #define TARGET_RTX_COSTS arm_slowmul_rtx_costs
264 #undef TARGET_ADDRESS_COST
265 #define TARGET_ADDRESS_COST arm_address_cost
267 #undef TARGET_SHIFT_TRUNCATION_MASK
268 #define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask
269 #undef TARGET_VECTOR_MODE_SUPPORTED_P
270 #define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p
272 #undef TARGET_MACHINE_DEPENDENT_REORG
273 #define TARGET_MACHINE_DEPENDENT_REORG arm_reorg
275 #undef TARGET_INIT_BUILTINS
276 #define TARGET_INIT_BUILTINS arm_init_builtins
277 #undef TARGET_EXPAND_BUILTIN
278 #define TARGET_EXPAND_BUILTIN arm_expand_builtin
280 #undef TARGET_INIT_LIBFUNCS
281 #define TARGET_INIT_LIBFUNCS arm_init_libfuncs
283 #undef TARGET_PROMOTE_FUNCTION_ARGS
284 #define TARGET_PROMOTE_FUNCTION_ARGS hook_bool_tree_true
285 #undef TARGET_PROMOTE_FUNCTION_RETURN
286 #define TARGET_PROMOTE_FUNCTION_RETURN hook_bool_tree_true
287 #undef TARGET_PROMOTE_PROTOTYPES
288 #define TARGET_PROMOTE_PROTOTYPES arm_promote_prototypes
289 #undef TARGET_PASS_BY_REFERENCE
290 #define TARGET_PASS_BY_REFERENCE arm_pass_by_reference
291 #undef TARGET_ARG_PARTIAL_BYTES
292 #define TARGET_ARG_PARTIAL_BYTES arm_arg_partial_bytes
294 #undef TARGET_STRUCT_VALUE_RTX
295 #define TARGET_STRUCT_VALUE_RTX arm_struct_value_rtx
297 #undef TARGET_SETUP_INCOMING_VARARGS
298 #define TARGET_SETUP_INCOMING_VARARGS arm_setup_incoming_varargs
300 #undef TARGET_DEFAULT_SHORT_ENUMS
301 #define TARGET_DEFAULT_SHORT_ENUMS arm_default_short_enums
303 #undef TARGET_ALIGN_ANON_BITFIELD
304 #define TARGET_ALIGN_ANON_BITFIELD arm_align_anon_bitfield
306 #undef TARGET_CXX_GUARD_TYPE
307 #define TARGET_CXX_GUARD_TYPE arm_cxx_guard_type
309 #undef TARGET_CXX_GUARD_MASK_BIT
310 #define TARGET_CXX_GUARD_MASK_BIT arm_cxx_guard_mask_bit
312 #undef TARGET_CXX_GET_COOKIE_SIZE
313 #define TARGET_CXX_GET_COOKIE_SIZE arm_get_cookie_size
315 #undef TARGET_CXX_COOKIE_HAS_SIZE
316 #define TARGET_CXX_COOKIE_HAS_SIZE arm_cookie_has_size
318 #undef TARGET_CXX_CDTOR_RETURNS_THIS
319 #define TARGET_CXX_CDTOR_RETURNS_THIS arm_cxx_cdtor_returns_this
321 #undef TARGET_CXX_KEY_METHOD_MAY_BE_INLINE
322 #define TARGET_CXX_KEY_METHOD_MAY_BE_INLINE arm_cxx_key_method_may_be_inline
324 #undef TARGET_CXX_USE_AEABI_ATEXIT
325 #define TARGET_CXX_USE_AEABI_ATEXIT arm_cxx_use_aeabi_atexit
327 #undef TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY
328 #define TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY \
329 arm_cxx_determine_class_data_visibility
331 #undef TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT
332 #define TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT arm_cxx_class_data_always_comdat
334 #undef TARGET_RETURN_IN_MSB
335 #define TARGET_RETURN_IN_MSB arm_return_in_msb
337 #undef TARGET_MUST_PASS_IN_STACK
338 #define TARGET_MUST_PASS_IN_STACK arm_must_pass_in_stack
340 struct gcc_target targetm = TARGET_INITIALIZER;
342 /* Obstack for minipool constant handling. */
343 static struct obstack minipool_obstack;
344 static char * minipool_startobj;
346 /* The maximum number of insns skipped which
347 will be conditionalised if possible. */
348 static int max_insns_skipped = 5;
350 extern FILE * asm_out_file;
352 /* True if we are currently building a constant table. */
353 int making_const_table;
355 /* Define the information needed to generate branch insns. This is
356 stored from the compare operation. */
357 rtx arm_compare_op0, arm_compare_op1;
359 /* The processor for which instructions should be scheduled. */
360 enum processor_type arm_tune = arm_none;
362 /* Which floating point model to use. */
363 enum arm_fp_model arm_fp_model;
365 /* Which floating point hardware is available. */
366 enum fputype arm_fpu_arch;
368 /* Which floating point hardware to schedule for. */
369 enum fputype arm_fpu_tune;
371 /* Whether to use floating point hardware. */
372 enum float_abi_type arm_float_abi;
374 /* Which ABI to use. */
375 enum arm_abi_type arm_abi;
377 /* Set by the -mfpu=... option. */
378 static const char * target_fpu_name = NULL;
380 /* Set by the -mfpe=... option. */
381 static const char * target_fpe_name = NULL;
383 /* Set by the -mfloat-abi=... option. */
384 static const char * target_float_abi_name = NULL;
386 /* Set by the -mabi=... option. */
387 static const char * target_abi_name = NULL;
389 /* Used to parse -mstructure_size_boundary command line option. */
390 static const char * structure_size_string = NULL;
391 int arm_structure_size_boundary = DEFAULT_STRUCTURE_SIZE_BOUNDARY;
393 /* Used for Thumb call_via trampolines. */
394 rtx thumb_call_via_label[14];
395 static int thumb_call_reg_needed;
397 /* Bit values used to identify processor capabilities. */
398 #define FL_CO_PROC (1 << 0) /* Has external co-processor bus */
399 #define FL_ARCH3M (1 << 1) /* Extended multiply */
400 #define FL_MODE26 (1 << 2) /* 26-bit mode support */
401 #define FL_MODE32 (1 << 3) /* 32-bit mode support */
402 #define FL_ARCH4 (1 << 4) /* Architecture rel 4 */
403 #define FL_ARCH5 (1 << 5) /* Architecture rel 5 */
404 #define FL_THUMB (1 << 6) /* Thumb aware */
405 #define FL_LDSCHED (1 << 7) /* Load scheduling necessary */
406 #define FL_STRONG (1 << 8) /* StrongARM */
407 #define FL_ARCH5E (1 << 9) /* DSP extensions to v5 */
408 #define FL_XSCALE (1 << 10) /* XScale */
409 #define FL_CIRRUS (1 << 11) /* Cirrus/DSP. */
410 #define FL_ARCH6 (1 << 12) /* Architecture rel 6. Adds
411 media instructions. */
412 #define FL_VFPV2 (1 << 13) /* Vector Floating Point V2. */
413 #define FL_WBUF (1 << 14) /* Schedule for write buffer ops.
414 Note: ARM6 & 7 derivatives only. */
416 #define FL_IWMMXT (1 << 29) /* XScale v2 or "Intel Wireless MMX technology". */
418 #define FL_FOR_ARCH2 0
419 #define FL_FOR_ARCH3 FL_MODE32
420 #define FL_FOR_ARCH3M (FL_FOR_ARCH3 | FL_ARCH3M)
421 #define FL_FOR_ARCH4 (FL_FOR_ARCH3M | FL_ARCH4)
422 #define FL_FOR_ARCH4T (FL_FOR_ARCH4 | FL_THUMB)
423 #define FL_FOR_ARCH5 (FL_FOR_ARCH4 | FL_ARCH5)
424 #define FL_FOR_ARCH5T (FL_FOR_ARCH5 | FL_THUMB)
425 #define FL_FOR_ARCH5E (FL_FOR_ARCH5 | FL_ARCH5E)
426 #define FL_FOR_ARCH5TE (FL_FOR_ARCH5E | FL_THUMB)
427 #define FL_FOR_ARCH5TEJ FL_FOR_ARCH5TE
428 #define FL_FOR_ARCH6 (FL_FOR_ARCH5TE | FL_ARCH6)
429 #define FL_FOR_ARCH6J FL_FOR_ARCH6
430 #define FL_FOR_ARCH6K FL_FOR_ARCH6
431 #define FL_FOR_ARCH6Z FL_FOR_ARCH6
432 #define FL_FOR_ARCH6ZK FL_FOR_ARCH6
434 /* The bits in this mask specify which
435 instructions we are allowed to generate. */
436 static unsigned long insn_flags = 0;
438 /* The bits in this mask specify which instruction scheduling options should
439 be used. */
440 static unsigned long tune_flags = 0;
442 /* The following are used in the arm.md file as equivalents to bits
443 in the above two flag variables. */
445 /* Nonzero if this chip supports the ARM Architecture 3M extensions. */
446 int arm_arch3m = 0;
448 /* Nonzero if this chip supports the ARM Architecture 4 extensions. */
449 int arm_arch4 = 0;
451 /* Nonzero if this chip supports the ARM Architecture 4t extensions. */
452 int arm_arch4t = 0;
454 /* Nonzero if this chip supports the ARM Architecture 5 extensions. */
455 int arm_arch5 = 0;
457 /* Nonzero if this chip supports the ARM Architecture 5E extensions. */
458 int arm_arch5e = 0;
460 /* Nonzero if this chip supports the ARM Architecture 6 extensions. */
461 int arm_arch6 = 0;
463 /* Nonzero if this chip can benefit from load scheduling. */
464 int arm_ld_sched = 0;
466 /* Nonzero if this chip is a StrongARM. */
467 int arm_tune_strongarm = 0;
469 /* Nonzero if this chip is a Cirrus variant. */
470 int arm_arch_cirrus = 0;
472 /* Nonzero if this chip supports Intel Wireless MMX technology. */
473 int arm_arch_iwmmxt = 0;
475 /* Nonzero if this chip is an XScale. */
476 int arm_arch_xscale = 0;
478 /* Nonzero if tuning for XScale */
479 int arm_tune_xscale = 0;
481 /* Nonzero if we want to tune for stores that access the write-buffer.
482 This typically means an ARM6 or ARM7 with MMU or MPU. */
483 int arm_tune_wbuf = 0;
485 /* Nonzero if generating Thumb instructions. */
486 int thumb_code = 0;
488 /* Nonzero if we should define __THUMB_INTERWORK__ in the
489 preprocessor.
490 XXX This is a bit of a hack, it's intended to help work around
491 problems in GLD which doesn't understand that armv5t code is
492 interworking clean. */
493 int arm_cpp_interwork = 0;
495 /* In case of a PRE_INC, POST_INC, PRE_DEC, POST_DEC memory reference, we
496 must report the mode of the memory reference from PRINT_OPERAND to
497 PRINT_OPERAND_ADDRESS. */
498 enum machine_mode output_memory_reference_mode;
500 /* The register number to be used for the PIC offset register. */
501 static const char * arm_pic_register_string = NULL;
502 int arm_pic_register = INVALID_REGNUM;
504 /* Set to 1 when a return insn is output, this means that the epilogue
505 is not needed. */
506 int return_used_this_function;
508 /* Set to 1 after arm_reorg has started. Reset to start at the start of
509 the next function. */
510 static int after_arm_reorg = 0;
512 /* The maximum number of insns to be used when loading a constant. */
513 static int arm_constant_limit = 3;
515 /* For an explanation of these variables, see final_prescan_insn below. */
516 int arm_ccfsm_state;
517 enum arm_cond_code arm_current_cc;
518 rtx arm_target_insn;
519 int arm_target_label;
521 /* The condition codes of the ARM, and the inverse function. */
522 static const char * const arm_condition_codes[] =
524 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
525 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
528 #define streq(string1, string2) (strcmp (string1, string2) == 0)
530 /* Initialization code. */
532 struct processors
534 const char *const name;
535 enum processor_type core;
536 const char *arch;
537 const unsigned long flags;
538 bool (* rtx_costs) (rtx, int, int, int *);
541 /* Not all of these give usefully different compilation alternatives,
542 but there is no simple way of generalizing them. */
543 static const struct processors all_cores[] =
545 /* ARM Cores */
546 #define ARM_CORE(NAME, IDENT, ARCH, FLAGS, COSTS) \
547 {NAME, arm_none, #ARCH, FLAGS | FL_FOR_ARCH##ARCH, arm_##COSTS##_rtx_costs},
548 #include "arm-cores.def"
549 #undef ARM_CORE
550 {NULL, arm_none, NULL, 0, NULL}
553 static const struct processors all_architectures[] =
555 /* ARM Architectures */
556 /* We don't specify rtx_costs here as it will be figured out
557 from the core. */
559 {"armv2", arm2, "2", FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH2, NULL},
560 {"armv2a", arm2, "2", FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH2, NULL},
561 {"armv3", arm6, "3", FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH3, NULL},
562 {"armv3m", arm7m, "3M", FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH3M, NULL},
563 {"armv4", arm7tdmi, "4", FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH4, NULL},
564 /* Strictly, FL_MODE26 is a permitted option for v4t, but there are no
565 implementations that support it, so we will leave it out for now. */
566 {"armv4t", arm7tdmi, "4T", FL_CO_PROC | FL_FOR_ARCH4T, NULL},
567 {"armv5", arm10tdmi, "5", FL_CO_PROC | FL_FOR_ARCH5, NULL},
568 {"armv5t", arm10tdmi, "5T", FL_CO_PROC | FL_FOR_ARCH5T, NULL},
569 {"armv5e", arm1026ejs, "5E", FL_CO_PROC | FL_FOR_ARCH5E, NULL},
570 {"armv5te", arm1026ejs, "5TE", FL_CO_PROC | FL_FOR_ARCH5TE, NULL},
571 {"armv6", arm1136js, "6", FL_CO_PROC | FL_FOR_ARCH6, NULL},
572 {"armv6j", arm1136js, "6J", FL_CO_PROC | FL_FOR_ARCH6J, NULL},
573 {"armv6k", mpcore, "6K", FL_CO_PROC | FL_FOR_ARCH6K, NULL},
574 {"armv6z", arm1176jzs, "6Z", FL_CO_PROC | FL_FOR_ARCH6Z, NULL},
575 {"armv6zk", arm1176jzs, "6ZK", FL_CO_PROC | FL_FOR_ARCH6ZK, NULL},
576 {"ep9312", ep9312, "4T", FL_LDSCHED | FL_CIRRUS | FL_FOR_ARCH4, NULL},
577 {"iwmmxt", iwmmxt, "5TE", FL_LDSCHED | FL_STRONG | FL_FOR_ARCH5TE | FL_XSCALE | FL_IWMMXT , NULL},
578 {NULL, arm_none, NULL, 0 , NULL}
581 struct arm_cpu_select
583 const char * string;
584 const char * name;
585 const struct processors * processors;
588 /* This is a magic structure. The 'string' field is magically filled in
589 with a pointer to the value specified by the user on the command line
590 assuming that the user has specified such a value. */
592 static struct arm_cpu_select arm_select[] =
594 /* string name processors */
595 { NULL, "-mcpu=", all_cores },
596 { NULL, "-march=", all_architectures },
597 { NULL, "-mtune=", all_cores }
600 /* Defines representing the indexes into the above table. */
601 #define ARM_OPT_SET_CPU 0
602 #define ARM_OPT_SET_ARCH 1
603 #define ARM_OPT_SET_TUNE 2
605 /* The name of the proprocessor macro to define for this architecture. */
607 char arm_arch_name[] = "__ARM_ARCH_0UNK__";
609 struct fpu_desc
611 const char * name;
612 enum fputype fpu;
616 /* Available values for for -mfpu=. */
618 static const struct fpu_desc all_fpus[] =
620 {"fpa", FPUTYPE_FPA},
621 {"fpe2", FPUTYPE_FPA_EMU2},
622 {"fpe3", FPUTYPE_FPA_EMU2},
623 {"maverick", FPUTYPE_MAVERICK},
624 {"vfp", FPUTYPE_VFP}
628 /* Floating point models used by the different hardware.
629 See fputype in arm.h. */
631 static const enum fputype fp_model_for_fpu[] =
633 /* No FP hardware. */
634 ARM_FP_MODEL_UNKNOWN, /* FPUTYPE_NONE */
635 ARM_FP_MODEL_FPA, /* FPUTYPE_FPA */
636 ARM_FP_MODEL_FPA, /* FPUTYPE_FPA_EMU2 */
637 ARM_FP_MODEL_FPA, /* FPUTYPE_FPA_EMU3 */
638 ARM_FP_MODEL_MAVERICK, /* FPUTYPE_MAVERICK */
639 ARM_FP_MODEL_VFP /* FPUTYPE_VFP */
643 struct float_abi
645 const char * name;
646 enum float_abi_type abi_type;
650 /* Available values for -mfloat-abi=. */
652 static const struct float_abi all_float_abis[] =
654 {"soft", ARM_FLOAT_ABI_SOFT},
655 {"softfp", ARM_FLOAT_ABI_SOFTFP},
656 {"hard", ARM_FLOAT_ABI_HARD}
660 struct abi_name
662 const char *name;
663 enum arm_abi_type abi_type;
667 /* Available values for -mabi=. */
669 static const struct abi_name arm_all_abis[] =
671 {"apcs-gnu", ARM_ABI_APCS},
672 {"atpcs", ARM_ABI_ATPCS},
673 {"aapcs", ARM_ABI_AAPCS},
674 {"iwmmxt", ARM_ABI_IWMMXT}
677 /* Return the number of bits set in VALUE. */
678 static unsigned
679 bit_count (unsigned long value)
681 unsigned long count = 0;
683 while (value)
685 count++;
686 value &= value - 1; /* Clear the least-significant set bit. */
689 return count;
692 /* Set up library functions unique to ARM. */
694 static void
695 arm_init_libfuncs (void)
697 /* There are no special library functions unless we are using the
698 ARM BPABI. */
699 if (!TARGET_BPABI)
700 return;
702 /* The functions below are described in Section 4 of the "Run-Time
703 ABI for the ARM architecture", Version 1.0. */
705 /* Double-precision floating-point arithmetic. Table 2. */
706 set_optab_libfunc (add_optab, DFmode, "__aeabi_dadd");
707 set_optab_libfunc (sdiv_optab, DFmode, "__aeabi_ddiv");
708 set_optab_libfunc (smul_optab, DFmode, "__aeabi_dmul");
709 set_optab_libfunc (neg_optab, DFmode, "__aeabi_dneg");
710 set_optab_libfunc (sub_optab, DFmode, "__aeabi_dsub");
712 /* Double-precision comparisons. Table 3. */
713 set_optab_libfunc (eq_optab, DFmode, "__aeabi_dcmpeq");
714 set_optab_libfunc (ne_optab, DFmode, NULL);
715 set_optab_libfunc (lt_optab, DFmode, "__aeabi_dcmplt");
716 set_optab_libfunc (le_optab, DFmode, "__aeabi_dcmple");
717 set_optab_libfunc (ge_optab, DFmode, "__aeabi_dcmpge");
718 set_optab_libfunc (gt_optab, DFmode, "__aeabi_dcmpgt");
719 set_optab_libfunc (unord_optab, DFmode, "__aeabi_dcmpun");
721 /* Single-precision floating-point arithmetic. Table 4. */
722 set_optab_libfunc (add_optab, SFmode, "__aeabi_fadd");
723 set_optab_libfunc (sdiv_optab, SFmode, "__aeabi_fdiv");
724 set_optab_libfunc (smul_optab, SFmode, "__aeabi_fmul");
725 set_optab_libfunc (neg_optab, SFmode, "__aeabi_fneg");
726 set_optab_libfunc (sub_optab, SFmode, "__aeabi_fsub");
728 /* Single-precision comparisons. Table 5. */
729 set_optab_libfunc (eq_optab, SFmode, "__aeabi_fcmpeq");
730 set_optab_libfunc (ne_optab, SFmode, NULL);
731 set_optab_libfunc (lt_optab, SFmode, "__aeabi_fcmplt");
732 set_optab_libfunc (le_optab, SFmode, "__aeabi_fcmple");
733 set_optab_libfunc (ge_optab, SFmode, "__aeabi_fcmpge");
734 set_optab_libfunc (gt_optab, SFmode, "__aeabi_fcmpgt");
735 set_optab_libfunc (unord_optab, SFmode, "__aeabi_fcmpun");
737 /* Floating-point to integer conversions. Table 6. */
738 set_conv_libfunc (sfix_optab, SImode, DFmode, "__aeabi_d2iz");
739 set_conv_libfunc (ufix_optab, SImode, DFmode, "__aeabi_d2uiz");
740 set_conv_libfunc (sfix_optab, DImode, DFmode, "__aeabi_d2lz");
741 set_conv_libfunc (ufix_optab, DImode, DFmode, "__aeabi_d2ulz");
742 set_conv_libfunc (sfix_optab, SImode, SFmode, "__aeabi_f2iz");
743 set_conv_libfunc (ufix_optab, SImode, SFmode, "__aeabi_f2uiz");
744 set_conv_libfunc (sfix_optab, DImode, SFmode, "__aeabi_f2lz");
745 set_conv_libfunc (ufix_optab, DImode, SFmode, "__aeabi_f2ulz");
747 /* Conversions between floating types. Table 7. */
748 set_conv_libfunc (trunc_optab, SFmode, DFmode, "__aeabi_d2f");
749 set_conv_libfunc (sext_optab, DFmode, SFmode, "__aeabi_f2d");
751 /* Integer to floating-point conversions. Table 8. */
752 set_conv_libfunc (sfloat_optab, DFmode, SImode, "__aeabi_i2d");
753 set_conv_libfunc (ufloat_optab, DFmode, SImode, "__aeabi_ui2d");
754 set_conv_libfunc (sfloat_optab, DFmode, DImode, "__aeabi_l2d");
755 set_conv_libfunc (ufloat_optab, DFmode, DImode, "__aeabi_ul2d");
756 set_conv_libfunc (sfloat_optab, SFmode, SImode, "__aeabi_i2f");
757 set_conv_libfunc (ufloat_optab, SFmode, SImode, "__aeabi_ui2f");
758 set_conv_libfunc (sfloat_optab, SFmode, DImode, "__aeabi_l2f");
759 set_conv_libfunc (ufloat_optab, SFmode, DImode, "__aeabi_ul2f");
761 /* Long long. Table 9. */
762 set_optab_libfunc (smul_optab, DImode, "__aeabi_lmul");
763 set_optab_libfunc (sdivmod_optab, DImode, "__aeabi_ldivmod");
764 set_optab_libfunc (udivmod_optab, DImode, "__aeabi_uldivmod");
765 set_optab_libfunc (ashl_optab, DImode, "__aeabi_llsl");
766 set_optab_libfunc (lshr_optab, DImode, "__aeabi_llsr");
767 set_optab_libfunc (ashr_optab, DImode, "__aeabi_lasr");
768 set_optab_libfunc (cmp_optab, DImode, "__aeabi_lcmp");
769 set_optab_libfunc (ucmp_optab, DImode, "__aeabi_ulcmp");
771 /* Integer (32/32->32) division. \S 4.3.1. */
772 set_optab_libfunc (sdivmod_optab, SImode, "__aeabi_idivmod");
773 set_optab_libfunc (udivmod_optab, SImode, "__aeabi_uidivmod");
775 /* The divmod functions are designed so that they can be used for
776 plain division, even though they return both the quotient and the
777 remainder. The quotient is returned in the usual location (i.e.,
778 r0 for SImode, {r0, r1} for DImode), just as would be expected
779 for an ordinary division routine. Because the AAPCS calling
780 conventions specify that all of { r0, r1, r2, r3 } are
781 callee-saved registers, there is no need to tell the compiler
782 explicitly that those registers are clobbered by these
783 routines. */
784 set_optab_libfunc (sdiv_optab, DImode, "__aeabi_ldivmod");
785 set_optab_libfunc (udiv_optab, DImode, "__aeabi_uldivmod");
786 set_optab_libfunc (sdiv_optab, SImode, "__aeabi_idivmod");
787 set_optab_libfunc (udiv_optab, SImode, "__aeabi_uidivmod");
789 /* We don't have mod libcalls. Fortunately gcc knows how to use the
790 divmod libcalls instead. */
791 set_optab_libfunc (smod_optab, DImode, NULL);
792 set_optab_libfunc (umod_optab, DImode, NULL);
793 set_optab_libfunc (smod_optab, SImode, NULL);
794 set_optab_libfunc (umod_optab, SImode, NULL);
797 /* Implement TARGET_HANDLE_OPTION. */
799 static bool
800 arm_handle_option (size_t code, const char *arg, int value ATTRIBUTE_UNUSED)
802 switch (code)
804 case OPT_mabi_:
805 target_abi_name = arg;
806 return true;
808 case OPT_march_:
809 arm_select[1].string = arg;
810 return true;
812 case OPT_mcpu_:
813 arm_select[0].string = arg;
814 return true;
816 case OPT_mfloat_abi_:
817 target_float_abi_name = arg;
818 return true;
820 case OPT_mfp_:
821 case OPT_mfpe_:
822 target_fpe_name = arg;
823 return true;
825 case OPT_mfpu_:
826 target_fpu_name = arg;
827 return true;
829 case OPT_mhard_float:
830 target_float_abi_name = "hard";
831 return true;
833 case OPT_mpic_register_:
834 arm_pic_register_string = arg;
835 return true;
837 case OPT_msoft_float:
838 target_float_abi_name = "soft";
839 return true;
841 case OPT_mstructure_size_boundary_:
842 structure_size_string = arg;
843 return true;
845 case OPT_mtune_:
846 arm_select[2].string = arg;
847 return true;
849 default:
850 return true;
854 /* Fix up any incompatible options that the user has specified.
855 This has now turned into a maze. */
856 void
857 arm_override_options (void)
859 unsigned i;
860 enum processor_type target_arch_cpu = arm_none;
862 /* Set up the flags based on the cpu/architecture selected by the user. */
863 for (i = ARRAY_SIZE (arm_select); i--;)
865 struct arm_cpu_select * ptr = arm_select + i;
867 if (ptr->string != NULL && ptr->string[0] != '\0')
869 const struct processors * sel;
871 for (sel = ptr->processors; sel->name != NULL; sel++)
872 if (streq (ptr->string, sel->name))
874 /* Set the architecture define. */
875 if (i != ARM_OPT_SET_TUNE)
876 sprintf (arm_arch_name, "__ARM_ARCH_%s__", sel->arch);
878 /* Determine the processor core for which we should
879 tune code-generation. */
880 if (/* -mcpu= is a sensible default. */
881 i == ARM_OPT_SET_CPU
882 /* -mtune= overrides -mcpu= and -march=. */
883 || i == ARM_OPT_SET_TUNE)
884 arm_tune = (enum processor_type) (sel - ptr->processors);
886 /* Remember the CPU associated with this architecture.
887 If no other option is used to set the CPU type,
888 we'll use this to guess the most suitable tuning
889 options. */
890 if (i == ARM_OPT_SET_ARCH)
891 target_arch_cpu = sel->core;
893 if (i != ARM_OPT_SET_TUNE)
895 /* If we have been given an architecture and a processor
896 make sure that they are compatible. We only generate
897 a warning though, and we prefer the CPU over the
898 architecture. */
899 if (insn_flags != 0 && (insn_flags ^ sel->flags))
900 warning (0, "switch -mcpu=%s conflicts with -march= switch",
901 ptr->string);
903 insn_flags = sel->flags;
906 break;
909 if (sel->name == NULL)
910 error ("bad value (%s) for %s switch", ptr->string, ptr->name);
914 /* Guess the tuning options from the architecture if necessary. */
915 if (arm_tune == arm_none)
916 arm_tune = target_arch_cpu;
918 /* If the user did not specify a processor, choose one for them. */
919 if (insn_flags == 0)
921 const struct processors * sel;
922 unsigned int sought;
923 enum processor_type cpu;
925 cpu = TARGET_CPU_DEFAULT;
926 if (cpu == arm_none)
928 #ifdef SUBTARGET_CPU_DEFAULT
929 /* Use the subtarget default CPU if none was specified by
930 configure. */
931 cpu = SUBTARGET_CPU_DEFAULT;
932 #endif
933 /* Default to ARM6. */
934 if (cpu == arm_none)
935 cpu = arm6;
937 sel = &all_cores[cpu];
939 insn_flags = sel->flags;
941 /* Now check to see if the user has specified some command line
942 switch that require certain abilities from the cpu. */
943 sought = 0;
945 if (TARGET_INTERWORK || TARGET_THUMB)
947 sought |= (FL_THUMB | FL_MODE32);
949 /* There are no ARM processors that support both APCS-26 and
950 interworking. Therefore we force FL_MODE26 to be removed
951 from insn_flags here (if it was set), so that the search
952 below will always be able to find a compatible processor. */
953 insn_flags &= ~FL_MODE26;
956 if (sought != 0 && ((sought & insn_flags) != sought))
958 /* Try to locate a CPU type that supports all of the abilities
959 of the default CPU, plus the extra abilities requested by
960 the user. */
961 for (sel = all_cores; sel->name != NULL; sel++)
962 if ((sel->flags & sought) == (sought | insn_flags))
963 break;
965 if (sel->name == NULL)
967 unsigned current_bit_count = 0;
968 const struct processors * best_fit = NULL;
970 /* Ideally we would like to issue an error message here
971 saying that it was not possible to find a CPU compatible
972 with the default CPU, but which also supports the command
973 line options specified by the programmer, and so they
974 ought to use the -mcpu=<name> command line option to
975 override the default CPU type.
977 If we cannot find a cpu that has both the
978 characteristics of the default cpu and the given
979 command line options we scan the array again looking
980 for a best match. */
981 for (sel = all_cores; sel->name != NULL; sel++)
982 if ((sel->flags & sought) == sought)
984 unsigned count;
986 count = bit_count (sel->flags & insn_flags);
988 if (count >= current_bit_count)
990 best_fit = sel;
991 current_bit_count = count;
995 gcc_assert (best_fit);
996 sel = best_fit;
999 insn_flags = sel->flags;
1001 sprintf (arm_arch_name, "__ARM_ARCH_%s__", sel->arch);
1002 if (arm_tune == arm_none)
1003 arm_tune = (enum processor_type) (sel - all_cores);
1006 /* The processor for which we should tune should now have been
1007 chosen. */
1008 gcc_assert (arm_tune != arm_none);
1010 tune_flags = all_cores[(int)arm_tune].flags;
1011 if (optimize_size)
1012 targetm.rtx_costs = arm_size_rtx_costs;
1013 else
1014 targetm.rtx_costs = all_cores[(int)arm_tune].rtx_costs;
1016 /* Make sure that the processor choice does not conflict with any of the
1017 other command line choices. */
1018 if (TARGET_INTERWORK && !(insn_flags & FL_THUMB))
1020 warning (0, "target CPU does not support interworking" );
1021 target_flags &= ~MASK_INTERWORK;
1024 if (TARGET_THUMB && !(insn_flags & FL_THUMB))
1026 warning (0, "target CPU does not support THUMB instructions");
1027 target_flags &= ~MASK_THUMB;
1030 if (TARGET_APCS_FRAME && TARGET_THUMB)
1032 /* warning (0, "ignoring -mapcs-frame because -mthumb was used"); */
1033 target_flags &= ~MASK_APCS_FRAME;
1036 /* TARGET_BACKTRACE calls leaf_function_p, which causes a crash if done
1037 from here where no function is being compiled currently. */
1038 if ((TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME) && TARGET_ARM)
1039 warning (0, "enabling backtrace support is only meaningful when compiling for the Thumb");
1041 if (TARGET_ARM && TARGET_CALLEE_INTERWORKING)
1042 warning (0, "enabling callee interworking support is only meaningful when compiling for the Thumb");
1044 if (TARGET_ARM && TARGET_CALLER_INTERWORKING)
1045 warning (0, "enabling caller interworking support is only meaningful when compiling for the Thumb");
1047 if (TARGET_APCS_STACK && !TARGET_APCS_FRAME)
1049 warning (0, "-mapcs-stack-check incompatible with -mno-apcs-frame");
1050 target_flags |= MASK_APCS_FRAME;
1053 if (TARGET_POKE_FUNCTION_NAME)
1054 target_flags |= MASK_APCS_FRAME;
1056 if (TARGET_APCS_REENT && flag_pic)
1057 error ("-fpic and -mapcs-reent are incompatible");
1059 if (TARGET_APCS_REENT)
1060 warning (0, "APCS reentrant code not supported. Ignored");
1062 /* If this target is normally configured to use APCS frames, warn if they
1063 are turned off and debugging is turned on. */
1064 if (TARGET_ARM
1065 && write_symbols != NO_DEBUG
1066 && !TARGET_APCS_FRAME
1067 && (TARGET_DEFAULT & MASK_APCS_FRAME))
1068 warning (0, "-g with -mno-apcs-frame may not give sensible debugging");
1070 /* If stack checking is disabled, we can use r10 as the PIC register,
1071 which keeps r9 available. */
1072 if (flag_pic)
1073 arm_pic_register = TARGET_APCS_STACK ? 9 : 10;
1075 if (TARGET_APCS_FLOAT)
1076 warning (0, "passing floating point arguments in fp regs not yet supported");
1078 /* Initialize boolean versions of the flags, for use in the arm.md file. */
1079 arm_arch3m = (insn_flags & FL_ARCH3M) != 0;
1080 arm_arch4 = (insn_flags & FL_ARCH4) != 0;
1081 arm_arch4t = arm_arch4 & ((insn_flags & FL_THUMB) != 0);
1082 arm_arch5 = (insn_flags & FL_ARCH5) != 0;
1083 arm_arch5e = (insn_flags & FL_ARCH5E) != 0;
1084 arm_arch6 = (insn_flags & FL_ARCH6) != 0;
1085 arm_arch_xscale = (insn_flags & FL_XSCALE) != 0;
1086 arm_arch_cirrus = (insn_flags & FL_CIRRUS) != 0;
1088 arm_ld_sched = (tune_flags & FL_LDSCHED) != 0;
1089 arm_tune_strongarm = (tune_flags & FL_STRONG) != 0;
1090 thumb_code = (TARGET_ARM == 0);
1091 arm_tune_wbuf = (tune_flags & FL_WBUF) != 0;
1092 arm_tune_xscale = (tune_flags & FL_XSCALE) != 0;
1093 arm_arch_iwmmxt = (insn_flags & FL_IWMMXT) != 0;
1095 /* V5 code we generate is completely interworking capable, so we turn off
1096 TARGET_INTERWORK here to avoid many tests later on. */
1098 /* XXX However, we must pass the right pre-processor defines to CPP
1099 or GLD can get confused. This is a hack. */
1100 if (TARGET_INTERWORK)
1101 arm_cpp_interwork = 1;
1103 if (arm_arch5)
1104 target_flags &= ~MASK_INTERWORK;
1106 if (target_abi_name)
1108 for (i = 0; i < ARRAY_SIZE (arm_all_abis); i++)
1110 if (streq (arm_all_abis[i].name, target_abi_name))
1112 arm_abi = arm_all_abis[i].abi_type;
1113 break;
1116 if (i == ARRAY_SIZE (arm_all_abis))
1117 error ("invalid ABI option: -mabi=%s", target_abi_name);
1119 else
1120 arm_abi = ARM_DEFAULT_ABI;
1122 if (TARGET_IWMMXT && !ARM_DOUBLEWORD_ALIGN)
1123 error ("iwmmxt requires an AAPCS compatible ABI for proper operation");
1125 if (TARGET_IWMMXT_ABI && !TARGET_IWMMXT)
1126 error ("iwmmxt abi requires an iwmmxt capable cpu");
1128 arm_fp_model = ARM_FP_MODEL_UNKNOWN;
1129 if (target_fpu_name == NULL && target_fpe_name != NULL)
1131 if (streq (target_fpe_name, "2"))
1132 target_fpu_name = "fpe2";
1133 else if (streq (target_fpe_name, "3"))
1134 target_fpu_name = "fpe3";
1135 else
1136 error ("invalid floating point emulation option: -mfpe=%s",
1137 target_fpe_name);
1139 if (target_fpu_name != NULL)
1141 /* The user specified a FPU. */
1142 for (i = 0; i < ARRAY_SIZE (all_fpus); i++)
1144 if (streq (all_fpus[i].name, target_fpu_name))
1146 arm_fpu_arch = all_fpus[i].fpu;
1147 arm_fpu_tune = arm_fpu_arch;
1148 arm_fp_model = fp_model_for_fpu[arm_fpu_arch];
1149 break;
1152 if (arm_fp_model == ARM_FP_MODEL_UNKNOWN)
1153 error ("invalid floating point option: -mfpu=%s", target_fpu_name);
1155 else
1157 #ifdef FPUTYPE_DEFAULT
1158 /* Use the default if it is specified for this platform. */
1159 arm_fpu_arch = FPUTYPE_DEFAULT;
1160 arm_fpu_tune = FPUTYPE_DEFAULT;
1161 #else
1162 /* Pick one based on CPU type. */
1163 /* ??? Some targets assume FPA is the default.
1164 if ((insn_flags & FL_VFP) != 0)
1165 arm_fpu_arch = FPUTYPE_VFP;
1166 else
1168 if (arm_arch_cirrus)
1169 arm_fpu_arch = FPUTYPE_MAVERICK;
1170 else
1171 arm_fpu_arch = FPUTYPE_FPA_EMU2;
1172 #endif
1173 if (tune_flags & FL_CO_PROC && arm_fpu_arch == FPUTYPE_FPA_EMU2)
1174 arm_fpu_tune = FPUTYPE_FPA;
1175 else
1176 arm_fpu_tune = arm_fpu_arch;
1177 arm_fp_model = fp_model_for_fpu[arm_fpu_arch];
1178 gcc_assert (arm_fp_model != ARM_FP_MODEL_UNKNOWN);
1181 if (target_float_abi_name != NULL)
1183 /* The user specified a FP ABI. */
1184 for (i = 0; i < ARRAY_SIZE (all_float_abis); i++)
1186 if (streq (all_float_abis[i].name, target_float_abi_name))
1188 arm_float_abi = all_float_abis[i].abi_type;
1189 break;
1192 if (i == ARRAY_SIZE (all_float_abis))
1193 error ("invalid floating point abi: -mfloat-abi=%s",
1194 target_float_abi_name);
1196 else
1197 arm_float_abi = TARGET_DEFAULT_FLOAT_ABI;
1199 if (arm_float_abi == ARM_FLOAT_ABI_HARD && TARGET_VFP)
1200 sorry ("-mfloat-abi=hard and VFP");
1202 /* If soft-float is specified then don't use FPU. */
1203 if (TARGET_SOFT_FLOAT)
1204 arm_fpu_arch = FPUTYPE_NONE;
1206 /* For arm2/3 there is no need to do any scheduling if there is only
1207 a floating point emulator, or we are doing software floating-point. */
1208 if ((TARGET_SOFT_FLOAT
1209 || arm_fpu_tune == FPUTYPE_FPA_EMU2
1210 || arm_fpu_tune == FPUTYPE_FPA_EMU3)
1211 && (tune_flags & FL_MODE32) == 0)
1212 flag_schedule_insns = flag_schedule_insns_after_reload = 0;
1214 /* Override the default structure alignment for AAPCS ABI. */
1215 if (arm_abi == ARM_ABI_AAPCS)
1216 arm_structure_size_boundary = 8;
1218 if (structure_size_string != NULL)
1220 int size = strtol (structure_size_string, NULL, 0);
1222 if (size == 8 || size == 32
1223 || (ARM_DOUBLEWORD_ALIGN && size == 64))
1224 arm_structure_size_boundary = size;
1225 else
1226 warning (0, "structure size boundary can only be set to %s",
1227 ARM_DOUBLEWORD_ALIGN ? "8, 32 or 64": "8 or 32");
1230 if (arm_pic_register_string != NULL)
1232 int pic_register = decode_reg_name (arm_pic_register_string);
1234 if (!flag_pic)
1235 warning (0, "-mpic-register= is useless without -fpic");
1237 /* Prevent the user from choosing an obviously stupid PIC register. */
1238 else if (pic_register < 0 || call_used_regs[pic_register]
1239 || pic_register == HARD_FRAME_POINTER_REGNUM
1240 || pic_register == STACK_POINTER_REGNUM
1241 || pic_register >= PC_REGNUM)
1242 error ("unable to use '%s' for PIC register", arm_pic_register_string);
1243 else
1244 arm_pic_register = pic_register;
1247 if (TARGET_THUMB && flag_schedule_insns)
1249 /* Don't warn since it's on by default in -O2. */
1250 flag_schedule_insns = 0;
1253 if (optimize_size)
1255 /* There's some dispute as to whether this should be 1 or 2. However,
1256 experiments seem to show that in pathological cases a setting of
1257 1 degrades less severely than a setting of 2. This could change if
1258 other parts of the compiler change their behavior. */
1259 arm_constant_limit = 1;
1261 /* If optimizing for size, bump the number of instructions that we
1262 are prepared to conditionally execute (even on a StrongARM). */
1263 max_insns_skipped = 6;
1265 else
1267 /* For processors with load scheduling, it never costs more than
1268 2 cycles to load a constant, and the load scheduler may well
1269 reduce that to 1. */
1270 if (arm_ld_sched)
1271 arm_constant_limit = 1;
1273 /* On XScale the longer latency of a load makes it more difficult
1274 to achieve a good schedule, so it's faster to synthesize
1275 constants that can be done in two insns. */
1276 if (arm_tune_xscale)
1277 arm_constant_limit = 2;
1279 /* StrongARM has early execution of branches, so a sequence
1280 that is worth skipping is shorter. */
1281 if (arm_tune_strongarm)
1282 max_insns_skipped = 3;
1285 /* Register global variables with the garbage collector. */
1286 arm_add_gc_roots ();
1289 static void
1290 arm_add_gc_roots (void)
1292 gcc_obstack_init(&minipool_obstack);
1293 minipool_startobj = (char *) obstack_alloc (&minipool_obstack, 0);
1296 /* A table of known ARM exception types.
1297 For use with the interrupt function attribute. */
1299 typedef struct
1301 const char *const arg;
1302 const unsigned long return_value;
1304 isr_attribute_arg;
1306 static const isr_attribute_arg isr_attribute_args [] =
1308 { "IRQ", ARM_FT_ISR },
1309 { "irq", ARM_FT_ISR },
1310 { "FIQ", ARM_FT_FIQ },
1311 { "fiq", ARM_FT_FIQ },
1312 { "ABORT", ARM_FT_ISR },
1313 { "abort", ARM_FT_ISR },
1314 { "ABORT", ARM_FT_ISR },
1315 { "abort", ARM_FT_ISR },
1316 { "UNDEF", ARM_FT_EXCEPTION },
1317 { "undef", ARM_FT_EXCEPTION },
1318 { "SWI", ARM_FT_EXCEPTION },
1319 { "swi", ARM_FT_EXCEPTION },
1320 { NULL, ARM_FT_NORMAL }
1323 /* Returns the (interrupt) function type of the current
1324 function, or ARM_FT_UNKNOWN if the type cannot be determined. */
1326 static unsigned long
1327 arm_isr_value (tree argument)
1329 const isr_attribute_arg * ptr;
1330 const char * arg;
1332 /* No argument - default to IRQ. */
1333 if (argument == NULL_TREE)
1334 return ARM_FT_ISR;
1336 /* Get the value of the argument. */
1337 if (TREE_VALUE (argument) == NULL_TREE
1338 || TREE_CODE (TREE_VALUE (argument)) != STRING_CST)
1339 return ARM_FT_UNKNOWN;
1341 arg = TREE_STRING_POINTER (TREE_VALUE (argument));
1343 /* Check it against the list of known arguments. */
1344 for (ptr = isr_attribute_args; ptr->arg != NULL; ptr++)
1345 if (streq (arg, ptr->arg))
1346 return ptr->return_value;
1348 /* An unrecognized interrupt type. */
1349 return ARM_FT_UNKNOWN;
1352 /* Computes the type of the current function. */
1354 static unsigned long
1355 arm_compute_func_type (void)
1357 unsigned long type = ARM_FT_UNKNOWN;
1358 tree a;
1359 tree attr;
1361 gcc_assert (TREE_CODE (current_function_decl) == FUNCTION_DECL);
1363 /* Decide if the current function is volatile. Such functions
1364 never return, and many memory cycles can be saved by not storing
1365 register values that will never be needed again. This optimization
1366 was added to speed up context switching in a kernel application. */
1367 if (optimize > 0
1368 && TREE_NOTHROW (current_function_decl)
1369 && TREE_THIS_VOLATILE (current_function_decl))
1370 type |= ARM_FT_VOLATILE;
1372 if (cfun->static_chain_decl != NULL)
1373 type |= ARM_FT_NESTED;
1375 attr = DECL_ATTRIBUTES (current_function_decl);
1377 a = lookup_attribute ("naked", attr);
1378 if (a != NULL_TREE)
1379 type |= ARM_FT_NAKED;
1381 a = lookup_attribute ("isr", attr);
1382 if (a == NULL_TREE)
1383 a = lookup_attribute ("interrupt", attr);
1385 if (a == NULL_TREE)
1386 type |= TARGET_INTERWORK ? ARM_FT_INTERWORKED : ARM_FT_NORMAL;
1387 else
1388 type |= arm_isr_value (TREE_VALUE (a));
1390 return type;
1393 /* Returns the type of the current function. */
1395 unsigned long
1396 arm_current_func_type (void)
1398 if (ARM_FUNC_TYPE (cfun->machine->func_type) == ARM_FT_UNKNOWN)
1399 cfun->machine->func_type = arm_compute_func_type ();
1401 return cfun->machine->func_type;
1404 /* Return 1 if it is possible to return using a single instruction.
1405 If SIBLING is non-null, this is a test for a return before a sibling
1406 call. SIBLING is the call insn, so we can examine its register usage. */
1409 use_return_insn (int iscond, rtx sibling)
1411 int regno;
1412 unsigned int func_type;
1413 unsigned long saved_int_regs;
1414 unsigned HOST_WIDE_INT stack_adjust;
1415 arm_stack_offsets *offsets;
1417 /* Never use a return instruction before reload has run. */
1418 if (!reload_completed)
1419 return 0;
1421 func_type = arm_current_func_type ();
1423 /* Naked functions and volatile functions need special
1424 consideration. */
1425 if (func_type & (ARM_FT_VOLATILE | ARM_FT_NAKED))
1426 return 0;
1428 /* So do interrupt functions that use the frame pointer. */
1429 if (IS_INTERRUPT (func_type) && frame_pointer_needed)
1430 return 0;
1432 offsets = arm_get_frame_offsets ();
1433 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
1435 /* As do variadic functions. */
1436 if (current_function_pretend_args_size
1437 || cfun->machine->uses_anonymous_args
1438 /* Or if the function calls __builtin_eh_return () */
1439 || current_function_calls_eh_return
1440 /* Or if the function calls alloca */
1441 || current_function_calls_alloca
1442 /* Or if there is a stack adjustment. However, if the stack pointer
1443 is saved on the stack, we can use a pre-incrementing stack load. */
1444 || !(stack_adjust == 0 || (frame_pointer_needed && stack_adjust == 4)))
1445 return 0;
1447 saved_int_regs = arm_compute_save_reg_mask ();
1449 /* Unfortunately, the insn
1451 ldmib sp, {..., sp, ...}
1453 triggers a bug on most SA-110 based devices, such that the stack
1454 pointer won't be correctly restored if the instruction takes a
1455 page fault. We work around this problem by popping r3 along with
1456 the other registers, since that is never slower than executing
1457 another instruction.
1459 We test for !arm_arch5 here, because code for any architecture
1460 less than this could potentially be run on one of the buggy
1461 chips. */
1462 if (stack_adjust == 4 && !arm_arch5)
1464 /* Validate that r3 is a call-clobbered register (always true in
1465 the default abi) ... */
1466 if (!call_used_regs[3])
1467 return 0;
1469 /* ... that it isn't being used for a return value ... */
1470 if (arm_size_return_regs () >= (4 * UNITS_PER_WORD))
1471 return 0;
1473 /* ... or for a tail-call argument ... */
1474 if (sibling)
1476 gcc_assert (GET_CODE (sibling) == CALL_INSN);
1478 if (find_regno_fusage (sibling, USE, 3))
1479 return 0;
1482 /* ... and that there are no call-saved registers in r0-r2
1483 (always true in the default ABI). */
1484 if (saved_int_regs & 0x7)
1485 return 0;
1488 /* Can't be done if interworking with Thumb, and any registers have been
1489 stacked. */
1490 if (TARGET_INTERWORK && saved_int_regs != 0)
1491 return 0;
1493 /* On StrongARM, conditional returns are expensive if they aren't
1494 taken and multiple registers have been stacked. */
1495 if (iscond && arm_tune_strongarm)
1497 /* Conditional return when just the LR is stored is a simple
1498 conditional-load instruction, that's not expensive. */
1499 if (saved_int_regs != 0 && saved_int_regs != (1 << LR_REGNUM))
1500 return 0;
1502 if (flag_pic && regs_ever_live[PIC_OFFSET_TABLE_REGNUM])
1503 return 0;
1506 /* If there are saved registers but the LR isn't saved, then we need
1507 two instructions for the return. */
1508 if (saved_int_regs && !(saved_int_regs & (1 << LR_REGNUM)))
1509 return 0;
1511 /* Can't be done if any of the FPA regs are pushed,
1512 since this also requires an insn. */
1513 if (TARGET_HARD_FLOAT && TARGET_FPA)
1514 for (regno = FIRST_FPA_REGNUM; regno <= LAST_FPA_REGNUM; regno++)
1515 if (regs_ever_live[regno] && !call_used_regs[regno])
1516 return 0;
1518 /* Likewise VFP regs. */
1519 if (TARGET_HARD_FLOAT && TARGET_VFP)
1520 for (regno = FIRST_VFP_REGNUM; regno <= LAST_VFP_REGNUM; regno++)
1521 if (regs_ever_live[regno] && !call_used_regs[regno])
1522 return 0;
1524 if (TARGET_REALLY_IWMMXT)
1525 for (regno = FIRST_IWMMXT_REGNUM; regno <= LAST_IWMMXT_REGNUM; regno++)
1526 if (regs_ever_live[regno] && ! call_used_regs [regno])
1527 return 0;
1529 return 1;
1532 /* Return TRUE if int I is a valid immediate ARM constant. */
1535 const_ok_for_arm (HOST_WIDE_INT i)
1537 int lowbit;
1539 /* For machines with >32 bit HOST_WIDE_INT, the bits above bit 31 must
1540 be all zero, or all one. */
1541 if ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff) != 0
1542 && ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff)
1543 != ((~(unsigned HOST_WIDE_INT) 0)
1544 & ~(unsigned HOST_WIDE_INT) 0xffffffff)))
1545 return FALSE;
1547 i &= (unsigned HOST_WIDE_INT) 0xffffffff;
1549 /* Fast return for 0 and small values. We must do this for zero, since
1550 the code below can't handle that one case. */
1551 if ((i & ~(unsigned HOST_WIDE_INT) 0xff) == 0)
1552 return TRUE;
1554 /* Get the number of trailing zeros, rounded down to the nearest even
1555 number. */
1556 lowbit = (ffs ((int) i) - 1) & ~1;
1558 if ((i & ~(((unsigned HOST_WIDE_INT) 0xff) << lowbit)) == 0)
1559 return TRUE;
1560 else if (lowbit <= 4
1561 && ((i & ~0xc000003f) == 0
1562 || (i & ~0xf000000f) == 0
1563 || (i & ~0xfc000003) == 0))
1564 return TRUE;
1566 return FALSE;
1569 /* Return true if I is a valid constant for the operation CODE. */
1570 static int
1571 const_ok_for_op (HOST_WIDE_INT i, enum rtx_code code)
1573 if (const_ok_for_arm (i))
1574 return 1;
1576 switch (code)
1578 case PLUS:
1579 return const_ok_for_arm (ARM_SIGN_EXTEND (-i));
1581 case MINUS: /* Should only occur with (MINUS I reg) => rsb */
1582 case XOR:
1583 case IOR:
1584 return 0;
1586 case AND:
1587 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
1589 default:
1590 gcc_unreachable ();
1594 /* Emit a sequence of insns to handle a large constant.
1595 CODE is the code of the operation required, it can be any of SET, PLUS,
1596 IOR, AND, XOR, MINUS;
1597 MODE is the mode in which the operation is being performed;
1598 VAL is the integer to operate on;
1599 SOURCE is the other operand (a register, or a null-pointer for SET);
1600 SUBTARGETS means it is safe to create scratch registers if that will
1601 either produce a simpler sequence, or we will want to cse the values.
1602 Return value is the number of insns emitted. */
1605 arm_split_constant (enum rtx_code code, enum machine_mode mode, rtx insn,
1606 HOST_WIDE_INT val, rtx target, rtx source, int subtargets)
1608 rtx cond;
1610 if (insn && GET_CODE (PATTERN (insn)) == COND_EXEC)
1611 cond = COND_EXEC_TEST (PATTERN (insn));
1612 else
1613 cond = NULL_RTX;
1615 if (subtargets || code == SET
1616 || (GET_CODE (target) == REG && GET_CODE (source) == REG
1617 && REGNO (target) != REGNO (source)))
1619 /* After arm_reorg has been called, we can't fix up expensive
1620 constants by pushing them into memory so we must synthesize
1621 them in-line, regardless of the cost. This is only likely to
1622 be more costly on chips that have load delay slots and we are
1623 compiling without running the scheduler (so no splitting
1624 occurred before the final instruction emission).
1626 Ref: gcc -O1 -mcpu=strongarm gcc.c-torture/compile/980506-2.c
1628 if (!after_arm_reorg
1629 && !cond
1630 && (arm_gen_constant (code, mode, NULL_RTX, val, target, source,
1631 1, 0)
1632 > arm_constant_limit + (code != SET)))
1634 if (code == SET)
1636 /* Currently SET is the only monadic value for CODE, all
1637 the rest are diadic. */
1638 emit_insn (gen_rtx_SET (VOIDmode, target, GEN_INT (val)));
1639 return 1;
1641 else
1643 rtx temp = subtargets ? gen_reg_rtx (mode) : target;
1645 emit_insn (gen_rtx_SET (VOIDmode, temp, GEN_INT (val)));
1646 /* For MINUS, the value is subtracted from, since we never
1647 have subtraction of a constant. */
1648 if (code == MINUS)
1649 emit_insn (gen_rtx_SET (VOIDmode, target,
1650 gen_rtx_MINUS (mode, temp, source)));
1651 else
1652 emit_insn (gen_rtx_SET (VOIDmode, target,
1653 gen_rtx_fmt_ee (code, mode, source, temp)));
1654 return 2;
1659 return arm_gen_constant (code, mode, cond, val, target, source, subtargets,
1663 static int
1664 count_insns_for_constant (HOST_WIDE_INT remainder, int i)
1666 HOST_WIDE_INT temp1;
1667 int num_insns = 0;
1670 int end;
1672 if (i <= 0)
1673 i += 32;
1674 if (remainder & (3 << (i - 2)))
1676 end = i - 8;
1677 if (end < 0)
1678 end += 32;
1679 temp1 = remainder & ((0x0ff << end)
1680 | ((i < end) ? (0xff >> (32 - end)) : 0));
1681 remainder &= ~temp1;
1682 num_insns++;
1683 i -= 6;
1685 i -= 2;
1686 } while (remainder);
1687 return num_insns;
1690 /* Emit an instruction with the indicated PATTERN. If COND is
1691 non-NULL, conditionalize the execution of the instruction on COND
1692 being true. */
1694 static void
1695 emit_constant_insn (rtx cond, rtx pattern)
1697 if (cond)
1698 pattern = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond), pattern);
1699 emit_insn (pattern);
1702 /* As above, but extra parameter GENERATE which, if clear, suppresses
1703 RTL generation. */
1705 static int
1706 arm_gen_constant (enum rtx_code code, enum machine_mode mode, rtx cond,
1707 HOST_WIDE_INT val, rtx target, rtx source, int subtargets,
1708 int generate)
1710 int can_invert = 0;
1711 int can_negate = 0;
1712 int can_negate_initial = 0;
1713 int can_shift = 0;
1714 int i;
1715 int num_bits_set = 0;
1716 int set_sign_bit_copies = 0;
1717 int clear_sign_bit_copies = 0;
1718 int clear_zero_bit_copies = 0;
1719 int set_zero_bit_copies = 0;
1720 int insns = 0;
1721 unsigned HOST_WIDE_INT temp1, temp2;
1722 unsigned HOST_WIDE_INT remainder = val & 0xffffffff;
1724 /* Find out which operations are safe for a given CODE. Also do a quick
1725 check for degenerate cases; these can occur when DImode operations
1726 are split. */
1727 switch (code)
1729 case SET:
1730 can_invert = 1;
1731 can_shift = 1;
1732 can_negate = 1;
1733 break;
1735 case PLUS:
1736 can_negate = 1;
1737 can_negate_initial = 1;
1738 break;
1740 case IOR:
1741 if (remainder == 0xffffffff)
1743 if (generate)
1744 emit_constant_insn (cond,
1745 gen_rtx_SET (VOIDmode, target,
1746 GEN_INT (ARM_SIGN_EXTEND (val))));
1747 return 1;
1749 if (remainder == 0)
1751 if (reload_completed && rtx_equal_p (target, source))
1752 return 0;
1753 if (generate)
1754 emit_constant_insn (cond,
1755 gen_rtx_SET (VOIDmode, target, source));
1756 return 1;
1758 break;
1760 case AND:
1761 if (remainder == 0)
1763 if (generate)
1764 emit_constant_insn (cond,
1765 gen_rtx_SET (VOIDmode, target, const0_rtx));
1766 return 1;
1768 if (remainder == 0xffffffff)
1770 if (reload_completed && rtx_equal_p (target, source))
1771 return 0;
1772 if (generate)
1773 emit_constant_insn (cond,
1774 gen_rtx_SET (VOIDmode, target, source));
1775 return 1;
1777 can_invert = 1;
1778 break;
1780 case XOR:
1781 if (remainder == 0)
1783 if (reload_completed && rtx_equal_p (target, source))
1784 return 0;
1785 if (generate)
1786 emit_constant_insn (cond,
1787 gen_rtx_SET (VOIDmode, target, source));
1788 return 1;
1791 /* We don't know how to handle other cases yet. */
1792 gcc_assert (remainder == 0xffffffff);
1794 if (generate)
1795 emit_constant_insn (cond,
1796 gen_rtx_SET (VOIDmode, target,
1797 gen_rtx_NOT (mode, source)));
1798 return 1;
1800 case MINUS:
1801 /* We treat MINUS as (val - source), since (source - val) is always
1802 passed as (source + (-val)). */
1803 if (remainder == 0)
1805 if (generate)
1806 emit_constant_insn (cond,
1807 gen_rtx_SET (VOIDmode, target,
1808 gen_rtx_NEG (mode, source)));
1809 return 1;
1811 if (const_ok_for_arm (val))
1813 if (generate)
1814 emit_constant_insn (cond,
1815 gen_rtx_SET (VOIDmode, target,
1816 gen_rtx_MINUS (mode, GEN_INT (val),
1817 source)));
1818 return 1;
1820 can_negate = 1;
1822 break;
1824 default:
1825 gcc_unreachable ();
1828 /* If we can do it in one insn get out quickly. */
1829 if (const_ok_for_arm (val)
1830 || (can_negate_initial && const_ok_for_arm (-val))
1831 || (can_invert && const_ok_for_arm (~val)))
1833 if (generate)
1834 emit_constant_insn (cond,
1835 gen_rtx_SET (VOIDmode, target,
1836 (source
1837 ? gen_rtx_fmt_ee (code, mode, source,
1838 GEN_INT (val))
1839 : GEN_INT (val))));
1840 return 1;
1843 /* Calculate a few attributes that may be useful for specific
1844 optimizations. */
1845 for (i = 31; i >= 0; i--)
1847 if ((remainder & (1 << i)) == 0)
1848 clear_sign_bit_copies++;
1849 else
1850 break;
1853 for (i = 31; i >= 0; i--)
1855 if ((remainder & (1 << i)) != 0)
1856 set_sign_bit_copies++;
1857 else
1858 break;
1861 for (i = 0; i <= 31; i++)
1863 if ((remainder & (1 << i)) == 0)
1864 clear_zero_bit_copies++;
1865 else
1866 break;
1869 for (i = 0; i <= 31; i++)
1871 if ((remainder & (1 << i)) != 0)
1872 set_zero_bit_copies++;
1873 else
1874 break;
1877 switch (code)
1879 case SET:
1880 /* See if we can do this by sign_extending a constant that is known
1881 to be negative. This is a good, way of doing it, since the shift
1882 may well merge into a subsequent insn. */
1883 if (set_sign_bit_copies > 1)
1885 if (const_ok_for_arm
1886 (temp1 = ARM_SIGN_EXTEND (remainder
1887 << (set_sign_bit_copies - 1))))
1889 if (generate)
1891 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
1892 emit_constant_insn (cond,
1893 gen_rtx_SET (VOIDmode, new_src,
1894 GEN_INT (temp1)));
1895 emit_constant_insn (cond,
1896 gen_ashrsi3 (target, new_src,
1897 GEN_INT (set_sign_bit_copies - 1)));
1899 return 2;
1901 /* For an inverted constant, we will need to set the low bits,
1902 these will be shifted out of harm's way. */
1903 temp1 |= (1 << (set_sign_bit_copies - 1)) - 1;
1904 if (const_ok_for_arm (~temp1))
1906 if (generate)
1908 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
1909 emit_constant_insn (cond,
1910 gen_rtx_SET (VOIDmode, new_src,
1911 GEN_INT (temp1)));
1912 emit_constant_insn (cond,
1913 gen_ashrsi3 (target, new_src,
1914 GEN_INT (set_sign_bit_copies - 1)));
1916 return 2;
1920 /* See if we can calculate the value as the difference between two
1921 valid immediates. */
1922 if (clear_sign_bit_copies + clear_zero_bit_copies <= 16)
1924 int topshift = clear_sign_bit_copies & ~1;
1926 temp1 = ARM_SIGN_EXTEND ((remainder + (0x00800000 >> topshift))
1927 & (0xff000000 >> topshift));
1929 /* If temp1 is zero, then that means the 9 most significant
1930 bits of remainder were 1 and we've caused it to overflow.
1931 When topshift is 0 we don't need to do anything since we
1932 can borrow from 'bit 32'. */
1933 if (temp1 == 0 && topshift != 0)
1934 temp1 = 0x80000000 >> (topshift - 1);
1936 temp2 = ARM_SIGN_EXTEND (temp1 - remainder);
1938 if (const_ok_for_arm (temp2))
1940 if (generate)
1942 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
1943 emit_constant_insn (cond,
1944 gen_rtx_SET (VOIDmode, new_src,
1945 GEN_INT (temp1)));
1946 emit_constant_insn (cond,
1947 gen_addsi3 (target, new_src,
1948 GEN_INT (-temp2)));
1951 return 2;
1955 /* See if we can generate this by setting the bottom (or the top)
1956 16 bits, and then shifting these into the other half of the
1957 word. We only look for the simplest cases, to do more would cost
1958 too much. Be careful, however, not to generate this when the
1959 alternative would take fewer insns. */
1960 if (val & 0xffff0000)
1962 temp1 = remainder & 0xffff0000;
1963 temp2 = remainder & 0x0000ffff;
1965 /* Overlaps outside this range are best done using other methods. */
1966 for (i = 9; i < 24; i++)
1968 if ((((temp2 | (temp2 << i)) & 0xffffffff) == remainder)
1969 && !const_ok_for_arm (temp2))
1971 rtx new_src = (subtargets
1972 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
1973 : target);
1974 insns = arm_gen_constant (code, mode, cond, temp2, new_src,
1975 source, subtargets, generate);
1976 source = new_src;
1977 if (generate)
1978 emit_constant_insn
1979 (cond,
1980 gen_rtx_SET
1981 (VOIDmode, target,
1982 gen_rtx_IOR (mode,
1983 gen_rtx_ASHIFT (mode, source,
1984 GEN_INT (i)),
1985 source)));
1986 return insns + 1;
1990 /* Don't duplicate cases already considered. */
1991 for (i = 17; i < 24; i++)
1993 if (((temp1 | (temp1 >> i)) == remainder)
1994 && !const_ok_for_arm (temp1))
1996 rtx new_src = (subtargets
1997 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
1998 : target);
1999 insns = arm_gen_constant (code, mode, cond, temp1, new_src,
2000 source, subtargets, generate);
2001 source = new_src;
2002 if (generate)
2003 emit_constant_insn
2004 (cond,
2005 gen_rtx_SET (VOIDmode, target,
2006 gen_rtx_IOR
2007 (mode,
2008 gen_rtx_LSHIFTRT (mode, source,
2009 GEN_INT (i)),
2010 source)));
2011 return insns + 1;
2015 break;
2017 case IOR:
2018 case XOR:
2019 /* If we have IOR or XOR, and the constant can be loaded in a
2020 single instruction, and we can find a temporary to put it in,
2021 then this can be done in two instructions instead of 3-4. */
2022 if (subtargets
2023 /* TARGET can't be NULL if SUBTARGETS is 0 */
2024 || (reload_completed && !reg_mentioned_p (target, source)))
2026 if (const_ok_for_arm (ARM_SIGN_EXTEND (~val)))
2028 if (generate)
2030 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
2032 emit_constant_insn (cond,
2033 gen_rtx_SET (VOIDmode, sub,
2034 GEN_INT (val)));
2035 emit_constant_insn (cond,
2036 gen_rtx_SET (VOIDmode, target,
2037 gen_rtx_fmt_ee (code, mode,
2038 source, sub)));
2040 return 2;
2044 if (code == XOR)
2045 break;
2047 if (set_sign_bit_copies > 8
2048 && (val & (-1 << (32 - set_sign_bit_copies))) == val)
2050 if (generate)
2052 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
2053 rtx shift = GEN_INT (set_sign_bit_copies);
2055 emit_constant_insn
2056 (cond,
2057 gen_rtx_SET (VOIDmode, sub,
2058 gen_rtx_NOT (mode,
2059 gen_rtx_ASHIFT (mode,
2060 source,
2061 shift))));
2062 emit_constant_insn
2063 (cond,
2064 gen_rtx_SET (VOIDmode, target,
2065 gen_rtx_NOT (mode,
2066 gen_rtx_LSHIFTRT (mode, sub,
2067 shift))));
2069 return 2;
2072 if (set_zero_bit_copies > 8
2073 && (remainder & ((1 << set_zero_bit_copies) - 1)) == remainder)
2075 if (generate)
2077 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
2078 rtx shift = GEN_INT (set_zero_bit_copies);
2080 emit_constant_insn
2081 (cond,
2082 gen_rtx_SET (VOIDmode, sub,
2083 gen_rtx_NOT (mode,
2084 gen_rtx_LSHIFTRT (mode,
2085 source,
2086 shift))));
2087 emit_constant_insn
2088 (cond,
2089 gen_rtx_SET (VOIDmode, target,
2090 gen_rtx_NOT (mode,
2091 gen_rtx_ASHIFT (mode, sub,
2092 shift))));
2094 return 2;
2097 if (const_ok_for_arm (temp1 = ARM_SIGN_EXTEND (~val)))
2099 if (generate)
2101 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
2102 emit_constant_insn (cond,
2103 gen_rtx_SET (VOIDmode, sub,
2104 gen_rtx_NOT (mode, source)));
2105 source = sub;
2106 if (subtargets)
2107 sub = gen_reg_rtx (mode);
2108 emit_constant_insn (cond,
2109 gen_rtx_SET (VOIDmode, sub,
2110 gen_rtx_AND (mode, source,
2111 GEN_INT (temp1))));
2112 emit_constant_insn (cond,
2113 gen_rtx_SET (VOIDmode, target,
2114 gen_rtx_NOT (mode, sub)));
2116 return 3;
2118 break;
2120 case AND:
2121 /* See if two shifts will do 2 or more insn's worth of work. */
2122 if (clear_sign_bit_copies >= 16 && clear_sign_bit_copies < 24)
2124 HOST_WIDE_INT shift_mask = ((0xffffffff
2125 << (32 - clear_sign_bit_copies))
2126 & 0xffffffff);
2128 if ((remainder | shift_mask) != 0xffffffff)
2130 if (generate)
2132 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2133 insns = arm_gen_constant (AND, mode, cond,
2134 remainder | shift_mask,
2135 new_src, source, subtargets, 1);
2136 source = new_src;
2138 else
2140 rtx targ = subtargets ? NULL_RTX : target;
2141 insns = arm_gen_constant (AND, mode, cond,
2142 remainder | shift_mask,
2143 targ, source, subtargets, 0);
2147 if (generate)
2149 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2150 rtx shift = GEN_INT (clear_sign_bit_copies);
2152 emit_insn (gen_ashlsi3 (new_src, source, shift));
2153 emit_insn (gen_lshrsi3 (target, new_src, shift));
2156 return insns + 2;
2159 if (clear_zero_bit_copies >= 16 && clear_zero_bit_copies < 24)
2161 HOST_WIDE_INT shift_mask = (1 << clear_zero_bit_copies) - 1;
2163 if ((remainder | shift_mask) != 0xffffffff)
2165 if (generate)
2167 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2169 insns = arm_gen_constant (AND, mode, cond,
2170 remainder | shift_mask,
2171 new_src, source, subtargets, 1);
2172 source = new_src;
2174 else
2176 rtx targ = subtargets ? NULL_RTX : target;
2178 insns = arm_gen_constant (AND, mode, cond,
2179 remainder | shift_mask,
2180 targ, source, subtargets, 0);
2184 if (generate)
2186 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2187 rtx shift = GEN_INT (clear_zero_bit_copies);
2189 emit_insn (gen_lshrsi3 (new_src, source, shift));
2190 emit_insn (gen_ashlsi3 (target, new_src, shift));
2193 return insns + 2;
2196 break;
2198 default:
2199 break;
2202 for (i = 0; i < 32; i++)
2203 if (remainder & (1 << i))
2204 num_bits_set++;
2206 if (code == AND || (can_invert && num_bits_set > 16))
2207 remainder = (~remainder) & 0xffffffff;
2208 else if (code == PLUS && num_bits_set > 16)
2209 remainder = (-remainder) & 0xffffffff;
2210 else
2212 can_invert = 0;
2213 can_negate = 0;
2216 /* Now try and find a way of doing the job in either two or three
2217 instructions.
2218 We start by looking for the largest block of zeros that are aligned on
2219 a 2-bit boundary, we then fill up the temps, wrapping around to the
2220 top of the word when we drop off the bottom.
2221 In the worst case this code should produce no more than four insns. */
2223 int best_start = 0;
2224 int best_consecutive_zeros = 0;
2226 for (i = 0; i < 32; i += 2)
2228 int consecutive_zeros = 0;
2230 if (!(remainder & (3 << i)))
2232 while ((i < 32) && !(remainder & (3 << i)))
2234 consecutive_zeros += 2;
2235 i += 2;
2237 if (consecutive_zeros > best_consecutive_zeros)
2239 best_consecutive_zeros = consecutive_zeros;
2240 best_start = i - consecutive_zeros;
2242 i -= 2;
2246 /* So long as it won't require any more insns to do so, it's
2247 desirable to emit a small constant (in bits 0...9) in the last
2248 insn. This way there is more chance that it can be combined with
2249 a later addressing insn to form a pre-indexed load or store
2250 operation. Consider:
2252 *((volatile int *)0xe0000100) = 1;
2253 *((volatile int *)0xe0000110) = 2;
2255 We want this to wind up as:
2257 mov rA, #0xe0000000
2258 mov rB, #1
2259 str rB, [rA, #0x100]
2260 mov rB, #2
2261 str rB, [rA, #0x110]
2263 rather than having to synthesize both large constants from scratch.
2265 Therefore, we calculate how many insns would be required to emit
2266 the constant starting from `best_start', and also starting from
2267 zero (i.e. with bit 31 first to be output). If `best_start' doesn't
2268 yield a shorter sequence, we may as well use zero. */
2269 if (best_start != 0
2270 && ((((unsigned HOST_WIDE_INT) 1) << best_start) < remainder)
2271 && (count_insns_for_constant (remainder, 0) <=
2272 count_insns_for_constant (remainder, best_start)))
2273 best_start = 0;
2275 /* Now start emitting the insns. */
2276 i = best_start;
2279 int end;
2281 if (i <= 0)
2282 i += 32;
2283 if (remainder & (3 << (i - 2)))
2285 end = i - 8;
2286 if (end < 0)
2287 end += 32;
2288 temp1 = remainder & ((0x0ff << end)
2289 | ((i < end) ? (0xff >> (32 - end)) : 0));
2290 remainder &= ~temp1;
2292 if (generate)
2294 rtx new_src, temp1_rtx;
2296 if (code == SET || code == MINUS)
2298 new_src = (subtargets ? gen_reg_rtx (mode) : target);
2299 if (can_invert && code != MINUS)
2300 temp1 = ~temp1;
2302 else
2304 if (remainder && subtargets)
2305 new_src = gen_reg_rtx (mode);
2306 else
2307 new_src = target;
2308 if (can_invert)
2309 temp1 = ~temp1;
2310 else if (can_negate)
2311 temp1 = -temp1;
2314 temp1 = trunc_int_for_mode (temp1, mode);
2315 temp1_rtx = GEN_INT (temp1);
2317 if (code == SET)
2319 else if (code == MINUS)
2320 temp1_rtx = gen_rtx_MINUS (mode, temp1_rtx, source);
2321 else
2322 temp1_rtx = gen_rtx_fmt_ee (code, mode, source, temp1_rtx);
2324 emit_constant_insn (cond,
2325 gen_rtx_SET (VOIDmode, new_src,
2326 temp1_rtx));
2327 source = new_src;
2330 if (code == SET)
2332 can_invert = 0;
2333 code = PLUS;
2335 else if (code == MINUS)
2336 code = PLUS;
2338 insns++;
2339 i -= 6;
2341 i -= 2;
2343 while (remainder);
2346 return insns;
2349 /* Canonicalize a comparison so that we are more likely to recognize it.
2350 This can be done for a few constant compares, where we can make the
2351 immediate value easier to load. */
2353 enum rtx_code
2354 arm_canonicalize_comparison (enum rtx_code code, rtx * op1)
2356 unsigned HOST_WIDE_INT i = INTVAL (*op1);
2358 switch (code)
2360 case EQ:
2361 case NE:
2362 return code;
2364 case GT:
2365 case LE:
2366 if (i != ((((unsigned HOST_WIDE_INT) 1) << (HOST_BITS_PER_WIDE_INT - 1)) - 1)
2367 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
2369 *op1 = GEN_INT (i + 1);
2370 return code == GT ? GE : LT;
2372 break;
2374 case GE:
2375 case LT:
2376 if (i != (((unsigned HOST_WIDE_INT) 1) << (HOST_BITS_PER_WIDE_INT - 1))
2377 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
2379 *op1 = GEN_INT (i - 1);
2380 return code == GE ? GT : LE;
2382 break;
2384 case GTU:
2385 case LEU:
2386 if (i != ~((unsigned HOST_WIDE_INT) 0)
2387 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
2389 *op1 = GEN_INT (i + 1);
2390 return code == GTU ? GEU : LTU;
2392 break;
2394 case GEU:
2395 case LTU:
2396 if (i != 0
2397 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
2399 *op1 = GEN_INT (i - 1);
2400 return code == GEU ? GTU : LEU;
2402 break;
2404 default:
2405 gcc_unreachable ();
2408 return code;
2412 /* Define how to find the value returned by a function. */
2415 arm_function_value(tree type, tree func ATTRIBUTE_UNUSED)
2417 enum machine_mode mode;
2418 int unsignedp ATTRIBUTE_UNUSED;
2419 rtx r ATTRIBUTE_UNUSED;
2421 mode = TYPE_MODE (type);
2422 /* Promote integer types. */
2423 if (INTEGRAL_TYPE_P (type))
2424 PROMOTE_FUNCTION_MODE (mode, unsignedp, type);
2426 /* Promotes small structs returned in a register to full-word size
2427 for big-endian AAPCS. */
2428 if (arm_return_in_msb (type))
2430 HOST_WIDE_INT size = int_size_in_bytes (type);
2431 if (size % UNITS_PER_WORD != 0)
2433 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
2434 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
2438 return LIBCALL_VALUE(mode);
2441 /* Determine the amount of memory needed to store the possible return
2442 registers of an untyped call. */
2444 arm_apply_result_size (void)
2446 int size = 16;
2448 if (TARGET_ARM)
2450 if (TARGET_HARD_FLOAT_ABI)
2452 if (TARGET_FPA)
2453 size += 12;
2454 if (TARGET_MAVERICK)
2455 size += 8;
2457 if (TARGET_IWMMXT_ABI)
2458 size += 8;
2461 return size;
2464 /* Decide whether a type should be returned in memory (true)
2465 or in a register (false). This is called by the macro
2466 RETURN_IN_MEMORY. */
2468 arm_return_in_memory (tree type)
2470 HOST_WIDE_INT size;
2472 if (!AGGREGATE_TYPE_P (type) &&
2473 (TREE_CODE (type) != VECTOR_TYPE) &&
2474 !(TARGET_AAPCS_BASED && TREE_CODE (type) == COMPLEX_TYPE))
2475 /* All simple types are returned in registers.
2476 For AAPCS, complex types are treated the same as aggregates. */
2477 return 0;
2479 size = int_size_in_bytes (type);
2481 if (arm_abi != ARM_ABI_APCS)
2483 /* ATPCS and later return aggregate types in memory only if they are
2484 larger than a word (or are variable size). */
2485 return (size < 0 || size > UNITS_PER_WORD);
2488 /* To maximize backwards compatibility with previous versions of gcc,
2489 return vectors up to 4 words in registers. */
2490 if (TREE_CODE (type) == VECTOR_TYPE)
2491 return (size < 0 || size > (4 * UNITS_PER_WORD));
2493 /* For the arm-wince targets we choose to be compatible with Microsoft's
2494 ARM and Thumb compilers, which always return aggregates in memory. */
2495 #ifndef ARM_WINCE
2496 /* All structures/unions bigger than one word are returned in memory.
2497 Also catch the case where int_size_in_bytes returns -1. In this case
2498 the aggregate is either huge or of variable size, and in either case
2499 we will want to return it via memory and not in a register. */
2500 if (size < 0 || size > UNITS_PER_WORD)
2501 return 1;
2503 if (TREE_CODE (type) == RECORD_TYPE)
2505 tree field;
2507 /* For a struct the APCS says that we only return in a register
2508 if the type is 'integer like' and every addressable element
2509 has an offset of zero. For practical purposes this means
2510 that the structure can have at most one non bit-field element
2511 and that this element must be the first one in the structure. */
2513 /* Find the first field, ignoring non FIELD_DECL things which will
2514 have been created by C++. */
2515 for (field = TYPE_FIELDS (type);
2516 field && TREE_CODE (field) != FIELD_DECL;
2517 field = TREE_CHAIN (field))
2518 continue;
2520 if (field == NULL)
2521 return 0; /* An empty structure. Allowed by an extension to ANSI C. */
2523 /* Check that the first field is valid for returning in a register. */
2525 /* ... Floats are not allowed */
2526 if (FLOAT_TYPE_P (TREE_TYPE (field)))
2527 return 1;
2529 /* ... Aggregates that are not themselves valid for returning in
2530 a register are not allowed. */
2531 if (RETURN_IN_MEMORY (TREE_TYPE (field)))
2532 return 1;
2534 /* Now check the remaining fields, if any. Only bitfields are allowed,
2535 since they are not addressable. */
2536 for (field = TREE_CHAIN (field);
2537 field;
2538 field = TREE_CHAIN (field))
2540 if (TREE_CODE (field) != FIELD_DECL)
2541 continue;
2543 if (!DECL_BIT_FIELD_TYPE (field))
2544 return 1;
2547 return 0;
2550 if (TREE_CODE (type) == UNION_TYPE)
2552 tree field;
2554 /* Unions can be returned in registers if every element is
2555 integral, or can be returned in an integer register. */
2556 for (field = TYPE_FIELDS (type);
2557 field;
2558 field = TREE_CHAIN (field))
2560 if (TREE_CODE (field) != FIELD_DECL)
2561 continue;
2563 if (FLOAT_TYPE_P (TREE_TYPE (field)))
2564 return 1;
2566 if (RETURN_IN_MEMORY (TREE_TYPE (field)))
2567 return 1;
2570 return 0;
2572 #endif /* not ARM_WINCE */
2574 /* Return all other types in memory. */
2575 return 1;
2578 /* Indicate whether or not words of a double are in big-endian order. */
2581 arm_float_words_big_endian (void)
2583 if (TARGET_MAVERICK)
2584 return 0;
2586 /* For FPA, float words are always big-endian. For VFP, floats words
2587 follow the memory system mode. */
2589 if (TARGET_FPA)
2591 return 1;
2594 if (TARGET_VFP)
2595 return (TARGET_BIG_END ? 1 : 0);
2597 return 1;
2600 /* Initialize a variable CUM of type CUMULATIVE_ARGS
2601 for a call to a function whose data type is FNTYPE.
2602 For a library call, FNTYPE is NULL. */
2603 void
2604 arm_init_cumulative_args (CUMULATIVE_ARGS *pcum, tree fntype,
2605 rtx libname ATTRIBUTE_UNUSED,
2606 tree fndecl ATTRIBUTE_UNUSED)
2608 /* On the ARM, the offset starts at 0. */
2609 pcum->nregs = ((fntype && aggregate_value_p (TREE_TYPE (fntype), fntype)) ? 1 : 0);
2610 pcum->iwmmxt_nregs = 0;
2611 pcum->can_split = true;
2613 pcum->call_cookie = CALL_NORMAL;
2615 if (TARGET_LONG_CALLS)
2616 pcum->call_cookie = CALL_LONG;
2618 /* Check for long call/short call attributes. The attributes
2619 override any command line option. */
2620 if (fntype)
2622 if (lookup_attribute ("short_call", TYPE_ATTRIBUTES (fntype)))
2623 pcum->call_cookie = CALL_SHORT;
2624 else if (lookup_attribute ("long_call", TYPE_ATTRIBUTES (fntype)))
2625 pcum->call_cookie = CALL_LONG;
2628 /* Varargs vectors are treated the same as long long.
2629 named_count avoids having to change the way arm handles 'named' */
2630 pcum->named_count = 0;
2631 pcum->nargs = 0;
2633 if (TARGET_REALLY_IWMMXT && fntype)
2635 tree fn_arg;
2637 for (fn_arg = TYPE_ARG_TYPES (fntype);
2638 fn_arg;
2639 fn_arg = TREE_CHAIN (fn_arg))
2640 pcum->named_count += 1;
2642 if (! pcum->named_count)
2643 pcum->named_count = INT_MAX;
2648 /* Return true if mode/type need doubleword alignment. */
2649 bool
2650 arm_needs_doubleword_align (enum machine_mode mode, tree type)
2652 return (GET_MODE_ALIGNMENT (mode) > PARM_BOUNDARY
2653 || (type && TYPE_ALIGN (type) > PARM_BOUNDARY));
2657 /* Determine where to put an argument to a function.
2658 Value is zero to push the argument on the stack,
2659 or a hard register in which to store the argument.
2661 MODE is the argument's machine mode.
2662 TYPE is the data type of the argument (as a tree).
2663 This is null for libcalls where that information may
2664 not be available.
2665 CUM is a variable of type CUMULATIVE_ARGS which gives info about
2666 the preceding args and about the function being called.
2667 NAMED is nonzero if this argument is a named parameter
2668 (otherwise it is an extra parameter matching an ellipsis). */
2671 arm_function_arg (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
2672 tree type, int named)
2674 int nregs;
2676 /* Varargs vectors are treated the same as long long.
2677 named_count avoids having to change the way arm handles 'named' */
2678 if (TARGET_IWMMXT_ABI
2679 && arm_vector_mode_supported_p (mode)
2680 && pcum->named_count > pcum->nargs + 1)
2682 if (pcum->iwmmxt_nregs <= 9)
2683 return gen_rtx_REG (mode, pcum->iwmmxt_nregs + FIRST_IWMMXT_REGNUM);
2684 else
2686 pcum->can_split = false;
2687 return NULL_RTX;
2691 /* Put doubleword aligned quantities in even register pairs. */
2692 if (pcum->nregs & 1
2693 && ARM_DOUBLEWORD_ALIGN
2694 && arm_needs_doubleword_align (mode, type))
2695 pcum->nregs++;
2697 if (mode == VOIDmode)
2698 /* Compute operand 2 of the call insn. */
2699 return GEN_INT (pcum->call_cookie);
2701 /* Only allow splitting an arg between regs and memory if all preceding
2702 args were allocated to regs. For args passed by reference we only count
2703 the reference pointer. */
2704 if (pcum->can_split)
2705 nregs = 1;
2706 else
2707 nregs = ARM_NUM_REGS2 (mode, type);
2709 if (!named || pcum->nregs + nregs > NUM_ARG_REGS)
2710 return NULL_RTX;
2712 return gen_rtx_REG (mode, pcum->nregs);
2715 static int
2716 arm_arg_partial_bytes (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
2717 tree type, bool named ATTRIBUTE_UNUSED)
2719 int nregs = pcum->nregs;
2721 if (arm_vector_mode_supported_p (mode))
2722 return 0;
2724 if (NUM_ARG_REGS > nregs
2725 && (NUM_ARG_REGS < nregs + ARM_NUM_REGS2 (mode, type))
2726 && pcum->can_split)
2727 return (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
2729 return 0;
2732 /* Variable sized types are passed by reference. This is a GCC
2733 extension to the ARM ABI. */
2735 static bool
2736 arm_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
2737 enum machine_mode mode ATTRIBUTE_UNUSED,
2738 tree type, bool named ATTRIBUTE_UNUSED)
2740 return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
2743 /* Encode the current state of the #pragma [no_]long_calls. */
2744 typedef enum
2746 OFF, /* No #pramgma [no_]long_calls is in effect. */
2747 LONG, /* #pragma long_calls is in effect. */
2748 SHORT /* #pragma no_long_calls is in effect. */
2749 } arm_pragma_enum;
2751 static arm_pragma_enum arm_pragma_long_calls = OFF;
2753 void
2754 arm_pr_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
2756 arm_pragma_long_calls = LONG;
2759 void
2760 arm_pr_no_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
2762 arm_pragma_long_calls = SHORT;
2765 void
2766 arm_pr_long_calls_off (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
2768 arm_pragma_long_calls = OFF;
2771 /* Table of machine attributes. */
2772 const struct attribute_spec arm_attribute_table[] =
2774 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
2775 /* Function calls made to this symbol must be done indirectly, because
2776 it may lie outside of the 26 bit addressing range of a normal function
2777 call. */
2778 { "long_call", 0, 0, false, true, true, NULL },
2779 /* Whereas these functions are always known to reside within the 26 bit
2780 addressing range. */
2781 { "short_call", 0, 0, false, true, true, NULL },
2782 /* Interrupt Service Routines have special prologue and epilogue requirements. */
2783 { "isr", 0, 1, false, false, false, arm_handle_isr_attribute },
2784 { "interrupt", 0, 1, false, false, false, arm_handle_isr_attribute },
2785 { "naked", 0, 0, true, false, false, arm_handle_fndecl_attribute },
2786 #ifdef ARM_PE
2787 /* ARM/PE has three new attributes:
2788 interfacearm - ?
2789 dllexport - for exporting a function/variable that will live in a dll
2790 dllimport - for importing a function/variable from a dll
2792 Microsoft allows multiple declspecs in one __declspec, separating
2793 them with spaces. We do NOT support this. Instead, use __declspec
2794 multiple times.
2796 { "dllimport", 0, 0, true, false, false, NULL },
2797 { "dllexport", 0, 0, true, false, false, NULL },
2798 { "interfacearm", 0, 0, true, false, false, arm_handle_fndecl_attribute },
2799 #elif TARGET_DLLIMPORT_DECL_ATTRIBUTES
2800 { "dllimport", 0, 0, false, false, false, handle_dll_attribute },
2801 { "dllexport", 0, 0, false, false, false, handle_dll_attribute },
2802 { "notshared", 0, 0, false, true, false, arm_handle_notshared_attribute },
2803 #endif
2804 { NULL, 0, 0, false, false, false, NULL }
2807 /* Handle an attribute requiring a FUNCTION_DECL;
2808 arguments as in struct attribute_spec.handler. */
2809 static tree
2810 arm_handle_fndecl_attribute (tree *node, tree name, tree args ATTRIBUTE_UNUSED,
2811 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
2813 if (TREE_CODE (*node) != FUNCTION_DECL)
2815 warning (OPT_Wattributes, "%qs attribute only applies to functions",
2816 IDENTIFIER_POINTER (name));
2817 *no_add_attrs = true;
2820 return NULL_TREE;
2823 /* Handle an "interrupt" or "isr" attribute;
2824 arguments as in struct attribute_spec.handler. */
2825 static tree
2826 arm_handle_isr_attribute (tree *node, tree name, tree args, int flags,
2827 bool *no_add_attrs)
2829 if (DECL_P (*node))
2831 if (TREE_CODE (*node) != FUNCTION_DECL)
2833 warning (OPT_Wattributes, "%qs attribute only applies to functions",
2834 IDENTIFIER_POINTER (name));
2835 *no_add_attrs = true;
2837 /* FIXME: the argument if any is checked for type attributes;
2838 should it be checked for decl ones? */
2840 else
2842 if (TREE_CODE (*node) == FUNCTION_TYPE
2843 || TREE_CODE (*node) == METHOD_TYPE)
2845 if (arm_isr_value (args) == ARM_FT_UNKNOWN)
2847 warning (OPT_Wattributes, "%qs attribute ignored",
2848 IDENTIFIER_POINTER (name));
2849 *no_add_attrs = true;
2852 else if (TREE_CODE (*node) == POINTER_TYPE
2853 && (TREE_CODE (TREE_TYPE (*node)) == FUNCTION_TYPE
2854 || TREE_CODE (TREE_TYPE (*node)) == METHOD_TYPE)
2855 && arm_isr_value (args) != ARM_FT_UNKNOWN)
2857 *node = build_variant_type_copy (*node);
2858 TREE_TYPE (*node) = build_type_attribute_variant
2859 (TREE_TYPE (*node),
2860 tree_cons (name, args, TYPE_ATTRIBUTES (TREE_TYPE (*node))));
2861 *no_add_attrs = true;
2863 else
2865 /* Possibly pass this attribute on from the type to a decl. */
2866 if (flags & ((int) ATTR_FLAG_DECL_NEXT
2867 | (int) ATTR_FLAG_FUNCTION_NEXT
2868 | (int) ATTR_FLAG_ARRAY_NEXT))
2870 *no_add_attrs = true;
2871 return tree_cons (name, args, NULL_TREE);
2873 else
2875 warning (OPT_Wattributes, "%qs attribute ignored",
2876 IDENTIFIER_POINTER (name));
2881 return NULL_TREE;
2884 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
2885 /* Handle the "notshared" attribute. This attribute is another way of
2886 requesting hidden visibility. ARM's compiler supports
2887 "__declspec(notshared)"; we support the same thing via an
2888 attribute. */
2890 static tree
2891 arm_handle_notshared_attribute (tree *node,
2892 tree name ATTRIBUTE_UNUSED,
2893 tree args ATTRIBUTE_UNUSED,
2894 int flags ATTRIBUTE_UNUSED,
2895 bool *no_add_attrs)
2897 tree decl = TYPE_NAME (*node);
2899 if (decl)
2901 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
2902 DECL_VISIBILITY_SPECIFIED (decl) = 1;
2903 *no_add_attrs = false;
2905 return NULL_TREE;
2907 #endif
2909 /* Return 0 if the attributes for two types are incompatible, 1 if they
2910 are compatible, and 2 if they are nearly compatible (which causes a
2911 warning to be generated). */
2912 static int
2913 arm_comp_type_attributes (tree type1, tree type2)
2915 int l1, l2, s1, s2;
2917 /* Check for mismatch of non-default calling convention. */
2918 if (TREE_CODE (type1) != FUNCTION_TYPE)
2919 return 1;
2921 /* Check for mismatched call attributes. */
2922 l1 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1)) != NULL;
2923 l2 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2)) != NULL;
2924 s1 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1)) != NULL;
2925 s2 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2)) != NULL;
2927 /* Only bother to check if an attribute is defined. */
2928 if (l1 | l2 | s1 | s2)
2930 /* If one type has an attribute, the other must have the same attribute. */
2931 if ((l1 != l2) || (s1 != s2))
2932 return 0;
2934 /* Disallow mixed attributes. */
2935 if ((l1 & s2) || (l2 & s1))
2936 return 0;
2939 /* Check for mismatched ISR attribute. */
2940 l1 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type1)) != NULL;
2941 if (! l1)
2942 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type1)) != NULL;
2943 l2 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type2)) != NULL;
2944 if (! l2)
2945 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type2)) != NULL;
2946 if (l1 != l2)
2947 return 0;
2949 return 1;
2952 /* Encode long_call or short_call attribute by prefixing
2953 symbol name in DECL with a special character FLAG. */
2954 void
2955 arm_encode_call_attribute (tree decl, int flag)
2957 const char * str = XSTR (XEXP (DECL_RTL (decl), 0), 0);
2958 int len = strlen (str);
2959 char * newstr;
2961 /* Do not allow weak functions to be treated as short call. */
2962 if (DECL_WEAK (decl) && flag == SHORT_CALL_FLAG_CHAR)
2963 return;
2965 newstr = alloca (len + 2);
2966 newstr[0] = flag;
2967 strcpy (newstr + 1, str);
2969 newstr = (char *) ggc_alloc_string (newstr, len + 1);
2970 XSTR (XEXP (DECL_RTL (decl), 0), 0) = newstr;
2973 /* Assigns default attributes to newly defined type. This is used to
2974 set short_call/long_call attributes for function types of
2975 functions defined inside corresponding #pragma scopes. */
2976 static void
2977 arm_set_default_type_attributes (tree type)
2979 /* Add __attribute__ ((long_call)) to all functions, when
2980 inside #pragma long_calls or __attribute__ ((short_call)),
2981 when inside #pragma no_long_calls. */
2982 if (TREE_CODE (type) == FUNCTION_TYPE || TREE_CODE (type) == METHOD_TYPE)
2984 tree type_attr_list, attr_name;
2985 type_attr_list = TYPE_ATTRIBUTES (type);
2987 if (arm_pragma_long_calls == LONG)
2988 attr_name = get_identifier ("long_call");
2989 else if (arm_pragma_long_calls == SHORT)
2990 attr_name = get_identifier ("short_call");
2991 else
2992 return;
2994 type_attr_list = tree_cons (attr_name, NULL_TREE, type_attr_list);
2995 TYPE_ATTRIBUTES (type) = type_attr_list;
2999 /* Return 1 if the operand is a SYMBOL_REF for a function known to be
3000 defined within the current compilation unit. If this cannot be
3001 determined, then 0 is returned. */
3002 static int
3003 current_file_function_operand (rtx sym_ref)
3005 /* This is a bit of a fib. A function will have a short call flag
3006 applied to its name if it has the short call attribute, or it has
3007 already been defined within the current compilation unit. */
3008 if (ENCODED_SHORT_CALL_ATTR_P (XSTR (sym_ref, 0)))
3009 return 1;
3011 /* The current function is always defined within the current compilation
3012 unit. If it s a weak definition however, then this may not be the real
3013 definition of the function, and so we have to say no. */
3014 if (sym_ref == XEXP (DECL_RTL (current_function_decl), 0)
3015 && !DECL_WEAK (current_function_decl))
3016 return 1;
3018 /* We cannot make the determination - default to returning 0. */
3019 return 0;
3022 /* Return nonzero if a 32 bit "long_call" should be generated for
3023 this call. We generate a long_call if the function:
3025 a. has an __attribute__((long call))
3026 or b. is within the scope of a #pragma long_calls
3027 or c. the -mlong-calls command line switch has been specified
3028 . and either:
3029 1. -ffunction-sections is in effect
3030 or 2. the current function has __attribute__ ((section))
3031 or 3. the target function has __attribute__ ((section))
3033 However we do not generate a long call if the function:
3035 d. has an __attribute__ ((short_call))
3036 or e. is inside the scope of a #pragma no_long_calls
3037 or f. is defined within the current compilation unit.
3039 This function will be called by C fragments contained in the machine
3040 description file. SYM_REF and CALL_COOKIE correspond to the matched
3041 rtl operands. CALL_SYMBOL is used to distinguish between
3042 two different callers of the function. It is set to 1 in the
3043 "call_symbol" and "call_symbol_value" patterns and to 0 in the "call"
3044 and "call_value" patterns. This is because of the difference in the
3045 SYM_REFs passed by these patterns. */
3047 arm_is_longcall_p (rtx sym_ref, int call_cookie, int call_symbol)
3049 if (!call_symbol)
3051 if (GET_CODE (sym_ref) != MEM)
3052 return 0;
3054 sym_ref = XEXP (sym_ref, 0);
3057 if (GET_CODE (sym_ref) != SYMBOL_REF)
3058 return 0;
3060 if (call_cookie & CALL_SHORT)
3061 return 0;
3063 if (TARGET_LONG_CALLS)
3065 if (flag_function_sections
3066 || DECL_SECTION_NAME (current_function_decl))
3067 /* c.3 is handled by the definition of the
3068 ARM_DECLARE_FUNCTION_SIZE macro. */
3069 return 1;
3072 if (current_file_function_operand (sym_ref))
3073 return 0;
3075 return (call_cookie & CALL_LONG)
3076 || ENCODED_LONG_CALL_ATTR_P (XSTR (sym_ref, 0))
3077 || TARGET_LONG_CALLS;
3080 /* Return nonzero if it is ok to make a tail-call to DECL. */
3081 static bool
3082 arm_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
3084 int call_type = TARGET_LONG_CALLS ? CALL_LONG : CALL_NORMAL;
3086 if (cfun->machine->sibcall_blocked)
3087 return false;
3089 /* Never tailcall something for which we have no decl, or if we
3090 are in Thumb mode. */
3091 if (decl == NULL || TARGET_THUMB)
3092 return false;
3094 /* Get the calling method. */
3095 if (lookup_attribute ("short_call", TYPE_ATTRIBUTES (TREE_TYPE (decl))))
3096 call_type = CALL_SHORT;
3097 else if (lookup_attribute ("long_call", TYPE_ATTRIBUTES (TREE_TYPE (decl))))
3098 call_type = CALL_LONG;
3100 /* Cannot tail-call to long calls, since these are out of range of
3101 a branch instruction. However, if not compiling PIC, we know
3102 we can reach the symbol if it is in this compilation unit. */
3103 if (call_type == CALL_LONG && (flag_pic || !TREE_ASM_WRITTEN (decl)))
3104 return false;
3106 /* If we are interworking and the function is not declared static
3107 then we can't tail-call it unless we know that it exists in this
3108 compilation unit (since it might be a Thumb routine). */
3109 if (TARGET_INTERWORK && TREE_PUBLIC (decl) && !TREE_ASM_WRITTEN (decl))
3110 return false;
3112 /* Never tailcall from an ISR routine - it needs a special exit sequence. */
3113 if (IS_INTERRUPT (arm_current_func_type ()))
3114 return false;
3116 /* Everything else is ok. */
3117 return true;
3121 /* Addressing mode support functions. */
3123 /* Return nonzero if X is a legitimate immediate operand when compiling
3124 for PIC. */
3126 legitimate_pic_operand_p (rtx x)
3128 if (CONSTANT_P (x)
3129 && flag_pic
3130 && (GET_CODE (x) == SYMBOL_REF
3131 || (GET_CODE (x) == CONST
3132 && GET_CODE (XEXP (x, 0)) == PLUS
3133 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)))
3134 return 0;
3136 return 1;
3140 legitimize_pic_address (rtx orig, enum machine_mode mode, rtx reg)
3142 if (GET_CODE (orig) == SYMBOL_REF
3143 || GET_CODE (orig) == LABEL_REF)
3145 #ifndef AOF_ASSEMBLER
3146 rtx pic_ref, address;
3147 #endif
3148 rtx insn;
3149 int subregs = 0;
3151 if (reg == 0)
3153 gcc_assert (!no_new_pseudos);
3154 reg = gen_reg_rtx (Pmode);
3156 subregs = 1;
3159 #ifdef AOF_ASSEMBLER
3160 /* The AOF assembler can generate relocations for these directly, and
3161 understands that the PIC register has to be added into the offset. */
3162 insn = emit_insn (gen_pic_load_addr_based (reg, orig));
3163 #else
3164 if (subregs)
3165 address = gen_reg_rtx (Pmode);
3166 else
3167 address = reg;
3169 if (TARGET_ARM)
3170 emit_insn (gen_pic_load_addr_arm (address, orig));
3171 else
3172 emit_insn (gen_pic_load_addr_thumb (address, orig));
3174 if ((GET_CODE (orig) == LABEL_REF
3175 || (GET_CODE (orig) == SYMBOL_REF &&
3176 SYMBOL_REF_LOCAL_P (orig)))
3177 && NEED_GOT_RELOC)
3178 pic_ref = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, address);
3179 else
3181 pic_ref = gen_const_mem (Pmode,
3182 gen_rtx_PLUS (Pmode, pic_offset_table_rtx,
3183 address));
3186 insn = emit_move_insn (reg, pic_ref);
3187 #endif
3188 current_function_uses_pic_offset_table = 1;
3189 /* Put a REG_EQUAL note on this insn, so that it can be optimized
3190 by loop. */
3191 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_EQUAL, orig,
3192 REG_NOTES (insn));
3193 return reg;
3195 else if (GET_CODE (orig) == CONST)
3197 rtx base, offset;
3199 if (GET_CODE (XEXP (orig, 0)) == PLUS
3200 && XEXP (XEXP (orig, 0), 0) == pic_offset_table_rtx)
3201 return orig;
3203 if (reg == 0)
3205 gcc_assert (!no_new_pseudos);
3206 reg = gen_reg_rtx (Pmode);
3209 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
3211 base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
3212 offset = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
3213 base == reg ? 0 : reg);
3215 if (GET_CODE (offset) == CONST_INT)
3217 /* The base register doesn't really matter, we only want to
3218 test the index for the appropriate mode. */
3219 if (!arm_legitimate_index_p (mode, offset, SET, 0))
3221 gcc_assert (!no_new_pseudos);
3222 offset = force_reg (Pmode, offset);
3225 if (GET_CODE (offset) == CONST_INT)
3226 return plus_constant (base, INTVAL (offset));
3229 if (GET_MODE_SIZE (mode) > 4
3230 && (GET_MODE_CLASS (mode) == MODE_INT
3231 || TARGET_SOFT_FLOAT))
3233 emit_insn (gen_addsi3 (reg, base, offset));
3234 return reg;
3237 return gen_rtx_PLUS (Pmode, base, offset);
3240 return orig;
3244 /* Find a spare low register to use during the prolog of a function. */
3246 static int
3247 thumb_find_work_register (unsigned long pushed_regs_mask)
3249 int reg;
3251 /* Check the argument registers first as these are call-used. The
3252 register allocation order means that sometimes r3 might be used
3253 but earlier argument registers might not, so check them all. */
3254 for (reg = LAST_ARG_REGNUM; reg >= 0; reg --)
3255 if (!regs_ever_live[reg])
3256 return reg;
3258 /* Before going on to check the call-saved registers we can try a couple
3259 more ways of deducing that r3 is available. The first is when we are
3260 pushing anonymous arguments onto the stack and we have less than 4
3261 registers worth of fixed arguments(*). In this case r3 will be part of
3262 the variable argument list and so we can be sure that it will be
3263 pushed right at the start of the function. Hence it will be available
3264 for the rest of the prologue.
3265 (*): ie current_function_pretend_args_size is greater than 0. */
3266 if (cfun->machine->uses_anonymous_args
3267 && current_function_pretend_args_size > 0)
3268 return LAST_ARG_REGNUM;
3270 /* The other case is when we have fixed arguments but less than 4 registers
3271 worth. In this case r3 might be used in the body of the function, but
3272 it is not being used to convey an argument into the function. In theory
3273 we could just check current_function_args_size to see how many bytes are
3274 being passed in argument registers, but it seems that it is unreliable.
3275 Sometimes it will have the value 0 when in fact arguments are being
3276 passed. (See testcase execute/20021111-1.c for an example). So we also
3277 check the args_info.nregs field as well. The problem with this field is
3278 that it makes no allowances for arguments that are passed to the
3279 function but which are not used. Hence we could miss an opportunity
3280 when a function has an unused argument in r3. But it is better to be
3281 safe than to be sorry. */
3282 if (! cfun->machine->uses_anonymous_args
3283 && current_function_args_size >= 0
3284 && current_function_args_size <= (LAST_ARG_REGNUM * UNITS_PER_WORD)
3285 && cfun->args_info.nregs < 4)
3286 return LAST_ARG_REGNUM;
3288 /* Otherwise look for a call-saved register that is going to be pushed. */
3289 for (reg = LAST_LO_REGNUM; reg > LAST_ARG_REGNUM; reg --)
3290 if (pushed_regs_mask & (1 << reg))
3291 return reg;
3293 /* Something went wrong - thumb_compute_save_reg_mask()
3294 should have arranged for a suitable register to be pushed. */
3295 gcc_unreachable ();
3299 /* Generate code to load the PIC register. In thumb mode SCRATCH is a
3300 low register. */
3302 void
3303 arm_load_pic_register (unsigned int scratch)
3305 #ifndef AOF_ASSEMBLER
3306 rtx l1, pic_tmp, pic_tmp2, pic_rtx;
3307 rtx global_offset_table;
3309 if (current_function_uses_pic_offset_table == 0 || TARGET_SINGLE_PIC_BASE)
3310 return;
3312 gcc_assert (flag_pic);
3314 l1 = gen_label_rtx ();
3316 global_offset_table = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
3317 /* On the ARM the PC register contains 'dot + 8' at the time of the
3318 addition, on the Thumb it is 'dot + 4'. */
3319 pic_tmp = plus_constant (gen_rtx_LABEL_REF (Pmode, l1), TARGET_ARM ? 8 : 4);
3320 if (GOT_PCREL)
3321 pic_tmp2 = gen_rtx_CONST (VOIDmode,
3322 gen_rtx_PLUS (Pmode, global_offset_table, pc_rtx));
3323 else
3324 pic_tmp2 = gen_rtx_CONST (VOIDmode, global_offset_table);
3326 pic_rtx = gen_rtx_CONST (Pmode, gen_rtx_MINUS (Pmode, pic_tmp2, pic_tmp));
3328 if (TARGET_ARM)
3330 emit_insn (gen_pic_load_addr_arm (pic_offset_table_rtx, pic_rtx));
3331 emit_insn (gen_pic_add_dot_plus_eight (pic_offset_table_rtx, l1));
3333 else
3335 if (REGNO (pic_offset_table_rtx) > LAST_LO_REGNUM)
3337 /* We will have pushed the pic register, so should always be
3338 able to find a work register. */
3339 pic_tmp = gen_rtx_REG (SImode, scratch);
3340 emit_insn (gen_pic_load_addr_thumb (pic_tmp, pic_rtx));
3341 emit_insn (gen_movsi (pic_offset_table_rtx, pic_tmp));
3343 else
3344 emit_insn (gen_pic_load_addr_thumb (pic_offset_table_rtx, pic_rtx));
3345 emit_insn (gen_pic_add_dot_plus_four (pic_offset_table_rtx, l1));
3348 /* Need to emit this whether or not we obey regdecls,
3349 since setjmp/longjmp can cause life info to screw up. */
3350 emit_insn (gen_rtx_USE (VOIDmode, pic_offset_table_rtx));
3351 #endif /* AOF_ASSEMBLER */
3355 /* Return nonzero if X is valid as an ARM state addressing register. */
3356 static int
3357 arm_address_register_rtx_p (rtx x, int strict_p)
3359 int regno;
3361 if (GET_CODE (x) != REG)
3362 return 0;
3364 regno = REGNO (x);
3366 if (strict_p)
3367 return ARM_REGNO_OK_FOR_BASE_P (regno);
3369 return (regno <= LAST_ARM_REGNUM
3370 || regno >= FIRST_PSEUDO_REGISTER
3371 || regno == FRAME_POINTER_REGNUM
3372 || regno == ARG_POINTER_REGNUM);
3375 /* Return nonzero if X is a valid ARM state address operand. */
3377 arm_legitimate_address_p (enum machine_mode mode, rtx x, RTX_CODE outer,
3378 int strict_p)
3380 bool use_ldrd;
3381 enum rtx_code code = GET_CODE (x);
3383 if (arm_address_register_rtx_p (x, strict_p))
3384 return 1;
3386 use_ldrd = (TARGET_LDRD
3387 && (mode == DImode
3388 || (mode == DFmode && (TARGET_SOFT_FLOAT || TARGET_VFP))));
3390 if (code == POST_INC || code == PRE_DEC
3391 || ((code == PRE_INC || code == POST_DEC)
3392 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
3393 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
3395 else if ((code == POST_MODIFY || code == PRE_MODIFY)
3396 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
3397 && GET_CODE (XEXP (x, 1)) == PLUS
3398 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
3400 rtx addend = XEXP (XEXP (x, 1), 1);
3402 /* Don't allow ldrd post increment by register because it's hard
3403 to fixup invalid register choices. */
3404 if (use_ldrd
3405 && GET_CODE (x) == POST_MODIFY
3406 && GET_CODE (addend) == REG)
3407 return 0;
3409 return ((use_ldrd || GET_MODE_SIZE (mode) <= 4)
3410 && arm_legitimate_index_p (mode, addend, outer, strict_p));
3413 /* After reload constants split into minipools will have addresses
3414 from a LABEL_REF. */
3415 else if (reload_completed
3416 && (code == LABEL_REF
3417 || (code == CONST
3418 && GET_CODE (XEXP (x, 0)) == PLUS
3419 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
3420 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)))
3421 return 1;
3423 else if (mode == TImode)
3424 return 0;
3426 else if (code == PLUS)
3428 rtx xop0 = XEXP (x, 0);
3429 rtx xop1 = XEXP (x, 1);
3431 return ((arm_address_register_rtx_p (xop0, strict_p)
3432 && arm_legitimate_index_p (mode, xop1, outer, strict_p))
3433 || (arm_address_register_rtx_p (xop1, strict_p)
3434 && arm_legitimate_index_p (mode, xop0, outer, strict_p)));
3437 #if 0
3438 /* Reload currently can't handle MINUS, so disable this for now */
3439 else if (GET_CODE (x) == MINUS)
3441 rtx xop0 = XEXP (x, 0);
3442 rtx xop1 = XEXP (x, 1);
3444 return (arm_address_register_rtx_p (xop0, strict_p)
3445 && arm_legitimate_index_p (mode, xop1, outer, strict_p));
3447 #endif
3449 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
3450 && code == SYMBOL_REF
3451 && CONSTANT_POOL_ADDRESS_P (x)
3452 && ! (flag_pic
3453 && symbol_mentioned_p (get_pool_constant (x))))
3454 return 1;
3456 return 0;
3459 /* Return nonzero if INDEX is valid for an address index operand in
3460 ARM state. */
3461 static int
3462 arm_legitimate_index_p (enum machine_mode mode, rtx index, RTX_CODE outer,
3463 int strict_p)
3465 HOST_WIDE_INT range;
3466 enum rtx_code code = GET_CODE (index);
3468 /* Standard coprocessor addressing modes. */
3469 if (TARGET_HARD_FLOAT
3470 && (TARGET_FPA || TARGET_MAVERICK)
3471 && (GET_MODE_CLASS (mode) == MODE_FLOAT
3472 || (TARGET_MAVERICK && mode == DImode)))
3473 return (code == CONST_INT && INTVAL (index) < 1024
3474 && INTVAL (index) > -1024
3475 && (INTVAL (index) & 3) == 0);
3477 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
3478 return (code == CONST_INT
3479 && INTVAL (index) < 1024
3480 && INTVAL (index) > -1024
3481 && (INTVAL (index) & 3) == 0);
3483 if (arm_address_register_rtx_p (index, strict_p)
3484 && (GET_MODE_SIZE (mode) <= 4))
3485 return 1;
3487 if (mode == DImode || mode == DFmode)
3489 if (code == CONST_INT)
3491 HOST_WIDE_INT val = INTVAL (index);
3493 if (TARGET_LDRD)
3494 return val > -256 && val < 256;
3495 else
3496 return val > -4096 && val < 4092;
3499 return TARGET_LDRD && arm_address_register_rtx_p (index, strict_p);
3502 if (GET_MODE_SIZE (mode) <= 4
3503 && ! (arm_arch4
3504 && (mode == HImode
3505 || (mode == QImode && outer == SIGN_EXTEND))))
3507 if (code == MULT)
3509 rtx xiop0 = XEXP (index, 0);
3510 rtx xiop1 = XEXP (index, 1);
3512 return ((arm_address_register_rtx_p (xiop0, strict_p)
3513 && power_of_two_operand (xiop1, SImode))
3514 || (arm_address_register_rtx_p (xiop1, strict_p)
3515 && power_of_two_operand (xiop0, SImode)));
3517 else if (code == LSHIFTRT || code == ASHIFTRT
3518 || code == ASHIFT || code == ROTATERT)
3520 rtx op = XEXP (index, 1);
3522 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
3523 && GET_CODE (op) == CONST_INT
3524 && INTVAL (op) > 0
3525 && INTVAL (op) <= 31);
3529 /* For ARM v4 we may be doing a sign-extend operation during the
3530 load. */
3531 if (arm_arch4)
3533 if (mode == HImode || (outer == SIGN_EXTEND && mode == QImode))
3534 range = 256;
3535 else
3536 range = 4096;
3538 else
3539 range = (mode == HImode) ? 4095 : 4096;
3541 return (code == CONST_INT
3542 && INTVAL (index) < range
3543 && INTVAL (index) > -range);
3546 /* Return nonzero if X is valid as a Thumb state base register. */
3547 static int
3548 thumb_base_register_rtx_p (rtx x, enum machine_mode mode, int strict_p)
3550 int regno;
3552 if (GET_CODE (x) != REG)
3553 return 0;
3555 regno = REGNO (x);
3557 if (strict_p)
3558 return THUMB_REGNO_MODE_OK_FOR_BASE_P (regno, mode);
3560 return (regno <= LAST_LO_REGNUM
3561 || regno > LAST_VIRTUAL_REGISTER
3562 || regno == FRAME_POINTER_REGNUM
3563 || (GET_MODE_SIZE (mode) >= 4
3564 && (regno == STACK_POINTER_REGNUM
3565 || regno >= FIRST_PSEUDO_REGISTER
3566 || x == hard_frame_pointer_rtx
3567 || x == arg_pointer_rtx)));
3570 /* Return nonzero if x is a legitimate index register. This is the case
3571 for any base register that can access a QImode object. */
3572 inline static int
3573 thumb_index_register_rtx_p (rtx x, int strict_p)
3575 return thumb_base_register_rtx_p (x, QImode, strict_p);
3578 /* Return nonzero if x is a legitimate Thumb-state address.
3580 The AP may be eliminated to either the SP or the FP, so we use the
3581 least common denominator, e.g. SImode, and offsets from 0 to 64.
3583 ??? Verify whether the above is the right approach.
3585 ??? Also, the FP may be eliminated to the SP, so perhaps that
3586 needs special handling also.
3588 ??? Look at how the mips16 port solves this problem. It probably uses
3589 better ways to solve some of these problems.
3591 Although it is not incorrect, we don't accept QImode and HImode
3592 addresses based on the frame pointer or arg pointer until the
3593 reload pass starts. This is so that eliminating such addresses
3594 into stack based ones won't produce impossible code. */
3596 thumb_legitimate_address_p (enum machine_mode mode, rtx x, int strict_p)
3598 /* ??? Not clear if this is right. Experiment. */
3599 if (GET_MODE_SIZE (mode) < 4
3600 && !(reload_in_progress || reload_completed)
3601 && (reg_mentioned_p (frame_pointer_rtx, x)
3602 || reg_mentioned_p (arg_pointer_rtx, x)
3603 || reg_mentioned_p (virtual_incoming_args_rtx, x)
3604 || reg_mentioned_p (virtual_outgoing_args_rtx, x)
3605 || reg_mentioned_p (virtual_stack_dynamic_rtx, x)
3606 || reg_mentioned_p (virtual_stack_vars_rtx, x)))
3607 return 0;
3609 /* Accept any base register. SP only in SImode or larger. */
3610 else if (thumb_base_register_rtx_p (x, mode, strict_p))
3611 return 1;
3613 /* This is PC relative data before arm_reorg runs. */
3614 else if (GET_MODE_SIZE (mode) >= 4 && CONSTANT_P (x)
3615 && GET_CODE (x) == SYMBOL_REF
3616 && CONSTANT_POOL_ADDRESS_P (x) && ! flag_pic)
3617 return 1;
3619 /* This is PC relative data after arm_reorg runs. */
3620 else if (GET_MODE_SIZE (mode) >= 4 && reload_completed
3621 && (GET_CODE (x) == LABEL_REF
3622 || (GET_CODE (x) == CONST
3623 && GET_CODE (XEXP (x, 0)) == PLUS
3624 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
3625 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)))
3626 return 1;
3628 /* Post-inc indexing only supported for SImode and larger. */
3629 else if (GET_CODE (x) == POST_INC && GET_MODE_SIZE (mode) >= 4
3630 && thumb_index_register_rtx_p (XEXP (x, 0), strict_p))
3631 return 1;
3633 else if (GET_CODE (x) == PLUS)
3635 /* REG+REG address can be any two index registers. */
3636 /* We disallow FRAME+REG addressing since we know that FRAME
3637 will be replaced with STACK, and SP relative addressing only
3638 permits SP+OFFSET. */
3639 if (GET_MODE_SIZE (mode) <= 4
3640 && XEXP (x, 0) != frame_pointer_rtx
3641 && XEXP (x, 1) != frame_pointer_rtx
3642 && thumb_index_register_rtx_p (XEXP (x, 0), strict_p)
3643 && thumb_index_register_rtx_p (XEXP (x, 1), strict_p))
3644 return 1;
3646 /* REG+const has 5-7 bit offset for non-SP registers. */
3647 else if ((thumb_index_register_rtx_p (XEXP (x, 0), strict_p)
3648 || XEXP (x, 0) == arg_pointer_rtx)
3649 && GET_CODE (XEXP (x, 1)) == CONST_INT
3650 && thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
3651 return 1;
3653 /* REG+const has 10 bit offset for SP, but only SImode and
3654 larger is supported. */
3655 /* ??? Should probably check for DI/DFmode overflow here
3656 just like GO_IF_LEGITIMATE_OFFSET does. */
3657 else if (GET_CODE (XEXP (x, 0)) == REG
3658 && REGNO (XEXP (x, 0)) == STACK_POINTER_REGNUM
3659 && GET_MODE_SIZE (mode) >= 4
3660 && GET_CODE (XEXP (x, 1)) == CONST_INT
3661 && INTVAL (XEXP (x, 1)) >= 0
3662 && INTVAL (XEXP (x, 1)) + GET_MODE_SIZE (mode) <= 1024
3663 && (INTVAL (XEXP (x, 1)) & 3) == 0)
3664 return 1;
3666 else if (GET_CODE (XEXP (x, 0)) == REG
3667 && REGNO (XEXP (x, 0)) == FRAME_POINTER_REGNUM
3668 && GET_MODE_SIZE (mode) >= 4
3669 && GET_CODE (XEXP (x, 1)) == CONST_INT
3670 && (INTVAL (XEXP (x, 1)) & 3) == 0)
3671 return 1;
3674 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
3675 && GET_MODE_SIZE (mode) == 4
3676 && GET_CODE (x) == SYMBOL_REF
3677 && CONSTANT_POOL_ADDRESS_P (x)
3678 && !(flag_pic
3679 && symbol_mentioned_p (get_pool_constant (x))))
3680 return 1;
3682 return 0;
3685 /* Return nonzero if VAL can be used as an offset in a Thumb-state address
3686 instruction of mode MODE. */
3688 thumb_legitimate_offset_p (enum machine_mode mode, HOST_WIDE_INT val)
3690 switch (GET_MODE_SIZE (mode))
3692 case 1:
3693 return val >= 0 && val < 32;
3695 case 2:
3696 return val >= 0 && val < 64 && (val & 1) == 0;
3698 default:
3699 return (val >= 0
3700 && (val + GET_MODE_SIZE (mode)) <= 128
3701 && (val & 3) == 0);
3705 /* Try machine-dependent ways of modifying an illegitimate address
3706 to be legitimate. If we find one, return the new, valid address. */
3708 arm_legitimize_address (rtx x, rtx orig_x, enum machine_mode mode)
3710 if (GET_CODE (x) == PLUS)
3712 rtx xop0 = XEXP (x, 0);
3713 rtx xop1 = XEXP (x, 1);
3715 if (CONSTANT_P (xop0) && !symbol_mentioned_p (xop0))
3716 xop0 = force_reg (SImode, xop0);
3718 if (CONSTANT_P (xop1) && !symbol_mentioned_p (xop1))
3719 xop1 = force_reg (SImode, xop1);
3721 if (ARM_BASE_REGISTER_RTX_P (xop0)
3722 && GET_CODE (xop1) == CONST_INT)
3724 HOST_WIDE_INT n, low_n;
3725 rtx base_reg, val;
3726 n = INTVAL (xop1);
3728 /* VFP addressing modes actually allow greater offsets, but for
3729 now we just stick with the lowest common denominator. */
3730 if (mode == DImode
3731 || ((TARGET_SOFT_FLOAT || TARGET_VFP) && mode == DFmode))
3733 low_n = n & 0x0f;
3734 n &= ~0x0f;
3735 if (low_n > 4)
3737 n += 16;
3738 low_n -= 16;
3741 else
3743 low_n = ((mode) == TImode ? 0
3744 : n >= 0 ? (n & 0xfff) : -((-n) & 0xfff));
3745 n -= low_n;
3748 base_reg = gen_reg_rtx (SImode);
3749 val = force_operand (gen_rtx_PLUS (SImode, xop0,
3750 GEN_INT (n)), NULL_RTX);
3751 emit_move_insn (base_reg, val);
3752 x = (low_n == 0 ? base_reg
3753 : gen_rtx_PLUS (SImode, base_reg, GEN_INT (low_n)));
3755 else if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
3756 x = gen_rtx_PLUS (SImode, xop0, xop1);
3759 /* XXX We don't allow MINUS any more -- see comment in
3760 arm_legitimate_address_p (). */
3761 else if (GET_CODE (x) == MINUS)
3763 rtx xop0 = XEXP (x, 0);
3764 rtx xop1 = XEXP (x, 1);
3766 if (CONSTANT_P (xop0))
3767 xop0 = force_reg (SImode, xop0);
3769 if (CONSTANT_P (xop1) && ! symbol_mentioned_p (xop1))
3770 xop1 = force_reg (SImode, xop1);
3772 if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
3773 x = gen_rtx_MINUS (SImode, xop0, xop1);
3776 if (flag_pic)
3778 /* We need to find and carefully transform any SYMBOL and LABEL
3779 references; so go back to the original address expression. */
3780 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
3782 if (new_x != orig_x)
3783 x = new_x;
3786 return x;
3790 /* Try machine-dependent ways of modifying an illegitimate Thumb address
3791 to be legitimate. If we find one, return the new, valid address. */
3793 thumb_legitimize_address (rtx x, rtx orig_x, enum machine_mode mode)
3795 if (GET_CODE (x) == PLUS
3796 && GET_CODE (XEXP (x, 1)) == CONST_INT
3797 && (INTVAL (XEXP (x, 1)) >= 32 * GET_MODE_SIZE (mode)
3798 || INTVAL (XEXP (x, 1)) < 0))
3800 rtx xop0 = XEXP (x, 0);
3801 rtx xop1 = XEXP (x, 1);
3802 HOST_WIDE_INT offset = INTVAL (xop1);
3804 /* Try and fold the offset into a biasing of the base register and
3805 then offsetting that. Don't do this when optimizing for space
3806 since it can cause too many CSEs. */
3807 if (optimize_size && offset >= 0
3808 && offset < 256 + 31 * GET_MODE_SIZE (mode))
3810 HOST_WIDE_INT delta;
3812 if (offset >= 256)
3813 delta = offset - (256 - GET_MODE_SIZE (mode));
3814 else if (offset < 32 * GET_MODE_SIZE (mode) + 8)
3815 delta = 31 * GET_MODE_SIZE (mode);
3816 else
3817 delta = offset & (~31 * GET_MODE_SIZE (mode));
3819 xop0 = force_operand (plus_constant (xop0, offset - delta),
3820 NULL_RTX);
3821 x = plus_constant (xop0, delta);
3823 else if (offset < 0 && offset > -256)
3824 /* Small negative offsets are best done with a subtract before the
3825 dereference, forcing these into a register normally takes two
3826 instructions. */
3827 x = force_operand (x, NULL_RTX);
3828 else
3830 /* For the remaining cases, force the constant into a register. */
3831 xop1 = force_reg (SImode, xop1);
3832 x = gen_rtx_PLUS (SImode, xop0, xop1);
3835 else if (GET_CODE (x) == PLUS
3836 && s_register_operand (XEXP (x, 1), SImode)
3837 && !s_register_operand (XEXP (x, 0), SImode))
3839 rtx xop0 = force_operand (XEXP (x, 0), NULL_RTX);
3841 x = gen_rtx_PLUS (SImode, xop0, XEXP (x, 1));
3844 if (flag_pic)
3846 /* We need to find and carefully transform any SYMBOL and LABEL
3847 references; so go back to the original address expression. */
3848 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
3850 if (new_x != orig_x)
3851 x = new_x;
3854 return x;
3859 #define REG_OR_SUBREG_REG(X) \
3860 (GET_CODE (X) == REG \
3861 || (GET_CODE (X) == SUBREG && GET_CODE (SUBREG_REG (X)) == REG))
3863 #define REG_OR_SUBREG_RTX(X) \
3864 (GET_CODE (X) == REG ? (X) : SUBREG_REG (X))
3866 #ifndef COSTS_N_INSNS
3867 #define COSTS_N_INSNS(N) ((N) * 4 - 2)
3868 #endif
3869 static inline int
3870 thumb_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
3872 enum machine_mode mode = GET_MODE (x);
3874 switch (code)
3876 case ASHIFT:
3877 case ASHIFTRT:
3878 case LSHIFTRT:
3879 case ROTATERT:
3880 case PLUS:
3881 case MINUS:
3882 case COMPARE:
3883 case NEG:
3884 case NOT:
3885 return COSTS_N_INSNS (1);
3887 case MULT:
3888 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
3890 int cycles = 0;
3891 unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
3893 while (i)
3895 i >>= 2;
3896 cycles++;
3898 return COSTS_N_INSNS (2) + cycles;
3900 return COSTS_N_INSNS (1) + 16;
3902 case SET:
3903 return (COSTS_N_INSNS (1)
3904 + 4 * ((GET_CODE (SET_SRC (x)) == MEM)
3905 + GET_CODE (SET_DEST (x)) == MEM));
3907 case CONST_INT:
3908 if (outer == SET)
3910 if ((unsigned HOST_WIDE_INT) INTVAL (x) < 256)
3911 return 0;
3912 if (thumb_shiftable_const (INTVAL (x)))
3913 return COSTS_N_INSNS (2);
3914 return COSTS_N_INSNS (3);
3916 else if ((outer == PLUS || outer == COMPARE)
3917 && INTVAL (x) < 256 && INTVAL (x) > -256)
3918 return 0;
3919 else if (outer == AND
3920 && INTVAL (x) < 256 && INTVAL (x) >= -256)
3921 return COSTS_N_INSNS (1);
3922 else if (outer == ASHIFT || outer == ASHIFTRT
3923 || outer == LSHIFTRT)
3924 return 0;
3925 return COSTS_N_INSNS (2);
3927 case CONST:
3928 case CONST_DOUBLE:
3929 case LABEL_REF:
3930 case SYMBOL_REF:
3931 return COSTS_N_INSNS (3);
3933 case UDIV:
3934 case UMOD:
3935 case DIV:
3936 case MOD:
3937 return 100;
3939 case TRUNCATE:
3940 return 99;
3942 case AND:
3943 case XOR:
3944 case IOR:
3945 /* XXX guess. */
3946 return 8;
3948 case MEM:
3949 /* XXX another guess. */
3950 /* Memory costs quite a lot for the first word, but subsequent words
3951 load at the equivalent of a single insn each. */
3952 return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
3953 + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
3954 ? 4 : 0));
3956 case IF_THEN_ELSE:
3957 /* XXX a guess. */
3958 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
3959 return 14;
3960 return 2;
3962 case ZERO_EXTEND:
3963 /* XXX still guessing. */
3964 switch (GET_MODE (XEXP (x, 0)))
3966 case QImode:
3967 return (1 + (mode == DImode ? 4 : 0)
3968 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
3970 case HImode:
3971 return (4 + (mode == DImode ? 4 : 0)
3972 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
3974 case SImode:
3975 return (1 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
3977 default:
3978 return 99;
3981 default:
3982 return 99;
3987 /* Worker routine for arm_rtx_costs. */
3988 static inline int
3989 arm_rtx_costs_1 (rtx x, enum rtx_code code, enum rtx_code outer)
3991 enum machine_mode mode = GET_MODE (x);
3992 enum rtx_code subcode;
3993 int extra_cost;
3995 switch (code)
3997 case MEM:
3998 /* Memory costs quite a lot for the first word, but subsequent words
3999 load at the equivalent of a single insn each. */
4000 return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
4001 + (GET_CODE (x) == SYMBOL_REF
4002 && CONSTANT_POOL_ADDRESS_P (x) ? 4 : 0));
4004 case DIV:
4005 case MOD:
4006 case UDIV:
4007 case UMOD:
4008 return optimize_size ? COSTS_N_INSNS (2) : 100;
4010 case ROTATE:
4011 if (mode == SImode && GET_CODE (XEXP (x, 1)) == REG)
4012 return 4;
4013 /* Fall through */
4014 case ROTATERT:
4015 if (mode != SImode)
4016 return 8;
4017 /* Fall through */
4018 case ASHIFT: case LSHIFTRT: case ASHIFTRT:
4019 if (mode == DImode)
4020 return (8 + (GET_CODE (XEXP (x, 1)) == CONST_INT ? 0 : 8)
4021 + ((GET_CODE (XEXP (x, 0)) == REG
4022 || (GET_CODE (XEXP (x, 0)) == SUBREG
4023 && GET_CODE (SUBREG_REG (XEXP (x, 0))) == REG))
4024 ? 0 : 8));
4025 return (1 + ((GET_CODE (XEXP (x, 0)) == REG
4026 || (GET_CODE (XEXP (x, 0)) == SUBREG
4027 && GET_CODE (SUBREG_REG (XEXP (x, 0))) == REG))
4028 ? 0 : 4)
4029 + ((GET_CODE (XEXP (x, 1)) == REG
4030 || (GET_CODE (XEXP (x, 1)) == SUBREG
4031 && GET_CODE (SUBREG_REG (XEXP (x, 1))) == REG)
4032 || (GET_CODE (XEXP (x, 1)) == CONST_INT))
4033 ? 0 : 4));
4035 case MINUS:
4036 if (mode == DImode)
4037 return (4 + (REG_OR_SUBREG_REG (XEXP (x, 1)) ? 0 : 8)
4038 + ((REG_OR_SUBREG_REG (XEXP (x, 0))
4039 || (GET_CODE (XEXP (x, 0)) == CONST_INT
4040 && const_ok_for_arm (INTVAL (XEXP (x, 0)))))
4041 ? 0 : 8));
4043 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
4044 return (2 + ((REG_OR_SUBREG_REG (XEXP (x, 1))
4045 || (GET_CODE (XEXP (x, 1)) == CONST_DOUBLE
4046 && arm_const_double_rtx (XEXP (x, 1))))
4047 ? 0 : 8)
4048 + ((REG_OR_SUBREG_REG (XEXP (x, 0))
4049 || (GET_CODE (XEXP (x, 0)) == CONST_DOUBLE
4050 && arm_const_double_rtx (XEXP (x, 0))))
4051 ? 0 : 8));
4053 if (((GET_CODE (XEXP (x, 0)) == CONST_INT
4054 && const_ok_for_arm (INTVAL (XEXP (x, 0)))
4055 && REG_OR_SUBREG_REG (XEXP (x, 1))))
4056 || (((subcode = GET_CODE (XEXP (x, 1))) == ASHIFT
4057 || subcode == ASHIFTRT || subcode == LSHIFTRT
4058 || subcode == ROTATE || subcode == ROTATERT
4059 || (subcode == MULT
4060 && GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT
4061 && ((INTVAL (XEXP (XEXP (x, 1), 1)) &
4062 (INTVAL (XEXP (XEXP (x, 1), 1)) - 1)) == 0)))
4063 && REG_OR_SUBREG_REG (XEXP (XEXP (x, 1), 0))
4064 && (REG_OR_SUBREG_REG (XEXP (XEXP (x, 1), 1))
4065 || GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT)
4066 && REG_OR_SUBREG_REG (XEXP (x, 0))))
4067 return 1;
4068 /* Fall through */
4070 case PLUS:
4071 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
4072 return (2 + (REG_OR_SUBREG_REG (XEXP (x, 0)) ? 0 : 8)
4073 + ((REG_OR_SUBREG_REG (XEXP (x, 1))
4074 || (GET_CODE (XEXP (x, 1)) == CONST_DOUBLE
4075 && arm_const_double_rtx (XEXP (x, 1))))
4076 ? 0 : 8));
4078 /* Fall through */
4079 case AND: case XOR: case IOR:
4080 extra_cost = 0;
4082 /* Normally the frame registers will be spilt into reg+const during
4083 reload, so it is a bad idea to combine them with other instructions,
4084 since then they might not be moved outside of loops. As a compromise
4085 we allow integration with ops that have a constant as their second
4086 operand. */
4087 if ((REG_OR_SUBREG_REG (XEXP (x, 0))
4088 && ARM_FRAME_RTX (REG_OR_SUBREG_RTX (XEXP (x, 0)))
4089 && GET_CODE (XEXP (x, 1)) != CONST_INT)
4090 || (REG_OR_SUBREG_REG (XEXP (x, 0))
4091 && ARM_FRAME_RTX (REG_OR_SUBREG_RTX (XEXP (x, 0)))))
4092 extra_cost = 4;
4094 if (mode == DImode)
4095 return (4 + extra_cost + (REG_OR_SUBREG_REG (XEXP (x, 0)) ? 0 : 8)
4096 + ((REG_OR_SUBREG_REG (XEXP (x, 1))
4097 || (GET_CODE (XEXP (x, 1)) == CONST_INT
4098 && const_ok_for_op (INTVAL (XEXP (x, 1)), code)))
4099 ? 0 : 8));
4101 if (REG_OR_SUBREG_REG (XEXP (x, 0)))
4102 return (1 + (GET_CODE (XEXP (x, 1)) == CONST_INT ? 0 : extra_cost)
4103 + ((REG_OR_SUBREG_REG (XEXP (x, 1))
4104 || (GET_CODE (XEXP (x, 1)) == CONST_INT
4105 && const_ok_for_op (INTVAL (XEXP (x, 1)), code)))
4106 ? 0 : 4));
4108 else if (REG_OR_SUBREG_REG (XEXP (x, 1)))
4109 return (1 + extra_cost
4110 + ((((subcode = GET_CODE (XEXP (x, 0))) == ASHIFT
4111 || subcode == LSHIFTRT || subcode == ASHIFTRT
4112 || subcode == ROTATE || subcode == ROTATERT
4113 || (subcode == MULT
4114 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
4115 && ((INTVAL (XEXP (XEXP (x, 0), 1)) &
4116 (INTVAL (XEXP (XEXP (x, 0), 1)) - 1)) == 0)))
4117 && (REG_OR_SUBREG_REG (XEXP (XEXP (x, 0), 0)))
4118 && ((REG_OR_SUBREG_REG (XEXP (XEXP (x, 0), 1)))
4119 || GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT))
4120 ? 0 : 4));
4122 return 8;
4124 case MULT:
4125 /* This should have been handled by the CPU specific routines. */
4126 gcc_unreachable ();
4128 case TRUNCATE:
4129 if (arm_arch3m && mode == SImode
4130 && GET_CODE (XEXP (x, 0)) == LSHIFTRT
4131 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
4132 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0))
4133 == GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)))
4134 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
4135 || GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND))
4136 return 8;
4137 return 99;
4139 case NEG:
4140 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
4141 return 4 + (REG_OR_SUBREG_REG (XEXP (x, 0)) ? 0 : 6);
4142 /* Fall through */
4143 case NOT:
4144 if (mode == DImode)
4145 return 4 + (REG_OR_SUBREG_REG (XEXP (x, 0)) ? 0 : 4);
4147 return 1 + (REG_OR_SUBREG_REG (XEXP (x, 0)) ? 0 : 4);
4149 case IF_THEN_ELSE:
4150 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
4151 return 14;
4152 return 2;
4154 case COMPARE:
4155 return 1;
4157 case ABS:
4158 return 4 + (mode == DImode ? 4 : 0);
4160 case SIGN_EXTEND:
4161 if (GET_MODE (XEXP (x, 0)) == QImode)
4162 return (4 + (mode == DImode ? 4 : 0)
4163 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
4164 /* Fall through */
4165 case ZERO_EXTEND:
4166 switch (GET_MODE (XEXP (x, 0)))
4168 case QImode:
4169 return (1 + (mode == DImode ? 4 : 0)
4170 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
4172 case HImode:
4173 return (4 + (mode == DImode ? 4 : 0)
4174 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
4176 case SImode:
4177 return (1 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
4179 case V8QImode:
4180 case V4HImode:
4181 case V2SImode:
4182 case V4QImode:
4183 case V2HImode:
4184 return 1;
4186 default:
4187 gcc_unreachable ();
4189 gcc_unreachable ();
4191 case CONST_INT:
4192 if (const_ok_for_arm (INTVAL (x)))
4193 return outer == SET ? 2 : -1;
4194 else if (outer == AND
4195 && const_ok_for_arm (~INTVAL (x)))
4196 return -1;
4197 else if ((outer == COMPARE
4198 || outer == PLUS || outer == MINUS)
4199 && const_ok_for_arm (-INTVAL (x)))
4200 return -1;
4201 else
4202 return 5;
4204 case CONST:
4205 case LABEL_REF:
4206 case SYMBOL_REF:
4207 return 6;
4209 case CONST_DOUBLE:
4210 if (arm_const_double_rtx (x))
4211 return outer == SET ? 2 : -1;
4212 else if ((outer == COMPARE || outer == PLUS)
4213 && neg_const_double_rtx_ok_for_fpa (x))
4214 return -1;
4215 return 7;
4217 default:
4218 return 99;
4222 /* RTX costs when optimizing for size. */
4223 static bool
4224 arm_size_rtx_costs (rtx x, int code, int outer_code, int *total)
4226 enum machine_mode mode = GET_MODE (x);
4228 if (TARGET_THUMB)
4230 /* XXX TBD. For now, use the standard costs. */
4231 *total = thumb_rtx_costs (x, code, outer_code);
4232 return true;
4235 switch (code)
4237 case MEM:
4238 /* A memory access costs 1 insn if the mode is small, or the address is
4239 a single register, otherwise it costs one insn per word. */
4240 if (REG_P (XEXP (x, 0)))
4241 *total = COSTS_N_INSNS (1);
4242 else
4243 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
4244 return true;
4246 case DIV:
4247 case MOD:
4248 case UDIV:
4249 case UMOD:
4250 /* Needs a libcall, so it costs about this. */
4251 *total = COSTS_N_INSNS (2);
4252 return false;
4254 case ROTATE:
4255 if (mode == SImode && GET_CODE (XEXP (x, 1)) == REG)
4257 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code);
4258 return true;
4260 /* Fall through */
4261 case ROTATERT:
4262 case ASHIFT:
4263 case LSHIFTRT:
4264 case ASHIFTRT:
4265 if (mode == DImode && GET_CODE (XEXP (x, 1)) == CONST_INT)
4267 *total = COSTS_N_INSNS (3) + rtx_cost (XEXP (x, 0), code);
4268 return true;
4270 else if (mode == SImode)
4272 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code);
4273 /* Slightly disparage register shifts, but not by much. */
4274 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
4275 *total += 1 + rtx_cost (XEXP (x, 1), code);
4276 return true;
4279 /* Needs a libcall. */
4280 *total = COSTS_N_INSNS (2);
4281 return false;
4283 case MINUS:
4284 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT)
4286 *total = COSTS_N_INSNS (1);
4287 return false;
4290 if (mode == SImode)
4292 enum rtx_code subcode0 = GET_CODE (XEXP (x, 0));
4293 enum rtx_code subcode1 = GET_CODE (XEXP (x, 1));
4295 if (subcode0 == ROTATE || subcode0 == ROTATERT || subcode0 == ASHIFT
4296 || subcode0 == LSHIFTRT || subcode0 == ASHIFTRT
4297 || subcode1 == ROTATE || subcode1 == ROTATERT
4298 || subcode1 == ASHIFT || subcode1 == LSHIFTRT
4299 || subcode1 == ASHIFTRT)
4301 /* It's just the cost of the two operands. */
4302 *total = 0;
4303 return false;
4306 *total = COSTS_N_INSNS (1);
4307 return false;
4310 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
4311 return false;
4313 case PLUS:
4314 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT)
4316 *total = COSTS_N_INSNS (1);
4317 return false;
4320 /* Fall through */
4321 case AND: case XOR: case IOR:
4322 if (mode == SImode)
4324 enum rtx_code subcode = GET_CODE (XEXP (x, 0));
4326 if (subcode == ROTATE || subcode == ROTATERT || subcode == ASHIFT
4327 || subcode == LSHIFTRT || subcode == ASHIFTRT
4328 || (code == AND && subcode == NOT))
4330 /* It's just the cost of the two operands. */
4331 *total = 0;
4332 return false;
4336 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
4337 return false;
4339 case MULT:
4340 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
4341 return false;
4343 case NEG:
4344 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT)
4345 *total = COSTS_N_INSNS (1);
4346 /* Fall through */
4347 case NOT:
4348 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
4350 return false;
4352 case IF_THEN_ELSE:
4353 *total = 0;
4354 return false;
4356 case COMPARE:
4357 if (cc_register (XEXP (x, 0), VOIDmode))
4358 * total = 0;
4359 else
4360 *total = COSTS_N_INSNS (1);
4361 return false;
4363 case ABS:
4364 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT)
4365 *total = COSTS_N_INSNS (1);
4366 else
4367 *total = COSTS_N_INSNS (1 + ARM_NUM_REGS (mode));
4368 return false;
4370 case SIGN_EXTEND:
4371 *total = 0;
4372 if (GET_MODE_SIZE (GET_MODE (XEXP (x, 0))) < 4)
4374 if (!(arm_arch4 && MEM_P (XEXP (x, 0))))
4375 *total += COSTS_N_INSNS (arm_arch6 ? 1 : 2);
4377 if (mode == DImode)
4378 *total += COSTS_N_INSNS (1);
4379 return false;
4381 case ZERO_EXTEND:
4382 *total = 0;
4383 if (!(arm_arch4 && MEM_P (XEXP (x, 0))))
4385 switch (GET_MODE (XEXP (x, 0)))
4387 case QImode:
4388 *total += COSTS_N_INSNS (1);
4389 break;
4391 case HImode:
4392 *total += COSTS_N_INSNS (arm_arch6 ? 1 : 2);
4394 case SImode:
4395 break;
4397 default:
4398 *total += COSTS_N_INSNS (2);
4402 if (mode == DImode)
4403 *total += COSTS_N_INSNS (1);
4405 return false;
4407 case CONST_INT:
4408 if (const_ok_for_arm (INTVAL (x)))
4409 *total = COSTS_N_INSNS (outer_code == SET ? 1 : 0);
4410 else if (const_ok_for_arm (~INTVAL (x)))
4411 *total = COSTS_N_INSNS (outer_code == AND ? 0 : 1);
4412 else if (const_ok_for_arm (-INTVAL (x)))
4414 if (outer_code == COMPARE || outer_code == PLUS
4415 || outer_code == MINUS)
4416 *total = 0;
4417 else
4418 *total = COSTS_N_INSNS (1);
4420 else
4421 *total = COSTS_N_INSNS (2);
4422 return true;
4424 case CONST:
4425 case LABEL_REF:
4426 case SYMBOL_REF:
4427 *total = COSTS_N_INSNS (2);
4428 return true;
4430 case CONST_DOUBLE:
4431 *total = COSTS_N_INSNS (4);
4432 return true;
4434 default:
4435 if (mode != VOIDmode)
4436 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
4437 else
4438 *total = COSTS_N_INSNS (4); /* How knows? */
4439 return false;
4443 /* RTX costs for cores with a slow MUL implementation. */
4445 static bool
4446 arm_slowmul_rtx_costs (rtx x, int code, int outer_code, int *total)
4448 enum machine_mode mode = GET_MODE (x);
4450 if (TARGET_THUMB)
4452 *total = thumb_rtx_costs (x, code, outer_code);
4453 return true;
4456 switch (code)
4458 case MULT:
4459 if (GET_MODE_CLASS (mode) == MODE_FLOAT
4460 || mode == DImode)
4462 *total = 30;
4463 return true;
4466 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
4468 unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
4469 & (unsigned HOST_WIDE_INT) 0xffffffff);
4470 int cost, const_ok = const_ok_for_arm (i);
4471 int j, booth_unit_size;
4473 /* Tune as appropriate. */
4474 cost = const_ok ? 4 : 8;
4475 booth_unit_size = 2;
4476 for (j = 0; i && j < 32; j += booth_unit_size)
4478 i >>= booth_unit_size;
4479 cost += 2;
4482 *total = cost;
4483 return true;
4486 *total = 30 + (REG_OR_SUBREG_REG (XEXP (x, 0)) ? 0 : 4)
4487 + (REG_OR_SUBREG_REG (XEXP (x, 1)) ? 0 : 4);
4488 return true;
4490 default:
4491 *total = arm_rtx_costs_1 (x, code, outer_code);
4492 return true;
4497 /* RTX cost for cores with a fast multiply unit (M variants). */
4499 static bool
4500 arm_fastmul_rtx_costs (rtx x, int code, int outer_code, int *total)
4502 enum machine_mode mode = GET_MODE (x);
4504 if (TARGET_THUMB)
4506 *total = thumb_rtx_costs (x, code, outer_code);
4507 return true;
4510 switch (code)
4512 case MULT:
4513 /* There is no point basing this on the tuning, since it is always the
4514 fast variant if it exists at all. */
4515 if (mode == DImode
4516 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
4517 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
4518 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
4520 *total = 8;
4521 return true;
4525 if (GET_MODE_CLASS (mode) == MODE_FLOAT
4526 || mode == DImode)
4528 *total = 30;
4529 return true;
4532 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
4534 unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
4535 & (unsigned HOST_WIDE_INT) 0xffffffff);
4536 int cost, const_ok = const_ok_for_arm (i);
4537 int j, booth_unit_size;
4539 /* Tune as appropriate. */
4540 cost = const_ok ? 4 : 8;
4541 booth_unit_size = 8;
4542 for (j = 0; i && j < 32; j += booth_unit_size)
4544 i >>= booth_unit_size;
4545 cost += 2;
4548 *total = cost;
4549 return true;
4552 *total = 8 + (REG_OR_SUBREG_REG (XEXP (x, 0)) ? 0 : 4)
4553 + (REG_OR_SUBREG_REG (XEXP (x, 1)) ? 0 : 4);
4554 return true;
4556 default:
4557 *total = arm_rtx_costs_1 (x, code, outer_code);
4558 return true;
4563 /* RTX cost for XScale CPUs. */
4565 static bool
4566 arm_xscale_rtx_costs (rtx x, int code, int outer_code, int *total)
4568 enum machine_mode mode = GET_MODE (x);
4570 if (TARGET_THUMB)
4572 *total = thumb_rtx_costs (x, code, outer_code);
4573 return true;
4576 switch (code)
4578 case MULT:
4579 /* There is no point basing this on the tuning, since it is always the
4580 fast variant if it exists at all. */
4581 if (mode == DImode
4582 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
4583 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
4584 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
4586 *total = 8;
4587 return true;
4591 if (GET_MODE_CLASS (mode) == MODE_FLOAT
4592 || mode == DImode)
4594 *total = 30;
4595 return true;
4598 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
4600 unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
4601 & (unsigned HOST_WIDE_INT) 0xffffffff);
4602 int cost, const_ok = const_ok_for_arm (i);
4603 unsigned HOST_WIDE_INT masked_const;
4605 /* The cost will be related to two insns.
4606 First a load of the constant (MOV or LDR), then a multiply. */
4607 cost = 2;
4608 if (! const_ok)
4609 cost += 1; /* LDR is probably more expensive because
4610 of longer result latency. */
4611 masked_const = i & 0xffff8000;
4612 if (masked_const != 0 && masked_const != 0xffff8000)
4614 masked_const = i & 0xf8000000;
4615 if (masked_const == 0 || masked_const == 0xf8000000)
4616 cost += 1;
4617 else
4618 cost += 2;
4620 *total = cost;
4621 return true;
4624 *total = 8 + (REG_OR_SUBREG_REG (XEXP (x, 0)) ? 0 : 4)
4625 + (REG_OR_SUBREG_REG (XEXP (x, 1)) ? 0 : 4);
4626 return true;
4628 case COMPARE:
4629 /* A COMPARE of a MULT is slow on XScale; the muls instruction
4630 will stall until the multiplication is complete. */
4631 if (GET_CODE (XEXP (x, 0)) == MULT)
4632 *total = 4 + rtx_cost (XEXP (x, 0), code);
4633 else
4634 *total = arm_rtx_costs_1 (x, code, outer_code);
4635 return true;
4637 default:
4638 *total = arm_rtx_costs_1 (x, code, outer_code);
4639 return true;
4644 /* RTX costs for 9e (and later) cores. */
4646 static bool
4647 arm_9e_rtx_costs (rtx x, int code, int outer_code, int *total)
4649 enum machine_mode mode = GET_MODE (x);
4650 int nonreg_cost;
4651 int cost;
4653 if (TARGET_THUMB)
4655 switch (code)
4657 case MULT:
4658 *total = COSTS_N_INSNS (3);
4659 return true;
4661 default:
4662 *total = thumb_rtx_costs (x, code, outer_code);
4663 return true;
4667 switch (code)
4669 case MULT:
4670 /* There is no point basing this on the tuning, since it is always the
4671 fast variant if it exists at all. */
4672 if (mode == DImode
4673 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
4674 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
4675 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
4677 *total = 3;
4678 return true;
4682 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
4684 *total = 30;
4685 return true;
4687 if (mode == DImode)
4689 cost = 7;
4690 nonreg_cost = 8;
4692 else
4694 cost = 2;
4695 nonreg_cost = 4;
4699 *total = cost + (REG_OR_SUBREG_REG (XEXP (x, 0)) ? 0 : nonreg_cost)
4700 + (REG_OR_SUBREG_REG (XEXP (x, 1)) ? 0 : nonreg_cost);
4701 return true;
4703 default:
4704 *total = arm_rtx_costs_1 (x, code, outer_code);
4705 return true;
4708 /* All address computations that can be done are free, but rtx cost returns
4709 the same for practically all of them. So we weight the different types
4710 of address here in the order (most pref first):
4711 PRE/POST_INC/DEC, SHIFT or NON-INT sum, INT sum, REG, MEM or LABEL. */
4712 static inline int
4713 arm_arm_address_cost (rtx x)
4715 enum rtx_code c = GET_CODE (x);
4717 if (c == PRE_INC || c == PRE_DEC || c == POST_INC || c == POST_DEC)
4718 return 0;
4719 if (c == MEM || c == LABEL_REF || c == SYMBOL_REF)
4720 return 10;
4722 if (c == PLUS || c == MINUS)
4724 if (GET_CODE (XEXP (x, 0)) == CONST_INT)
4725 return 2;
4727 if (ARITHMETIC_P (XEXP (x, 0)) || ARITHMETIC_P (XEXP (x, 1)))
4728 return 3;
4730 return 4;
4733 return 6;
4736 static inline int
4737 arm_thumb_address_cost (rtx x)
4739 enum rtx_code c = GET_CODE (x);
4741 if (c == REG)
4742 return 1;
4743 if (c == PLUS
4744 && GET_CODE (XEXP (x, 0)) == REG
4745 && GET_CODE (XEXP (x, 1)) == CONST_INT)
4746 return 1;
4748 return 2;
4751 static int
4752 arm_address_cost (rtx x)
4754 return TARGET_ARM ? arm_arm_address_cost (x) : arm_thumb_address_cost (x);
4757 static int
4758 arm_adjust_cost (rtx insn, rtx link, rtx dep, int cost)
4760 rtx i_pat, d_pat;
4762 /* Some true dependencies can have a higher cost depending
4763 on precisely how certain input operands are used. */
4764 if (arm_tune_xscale
4765 && REG_NOTE_KIND (link) == 0
4766 && recog_memoized (insn) >= 0
4767 && recog_memoized (dep) >= 0)
4769 int shift_opnum = get_attr_shift (insn);
4770 enum attr_type attr_type = get_attr_type (dep);
4772 /* If nonzero, SHIFT_OPNUM contains the operand number of a shifted
4773 operand for INSN. If we have a shifted input operand and the
4774 instruction we depend on is another ALU instruction, then we may
4775 have to account for an additional stall. */
4776 if (shift_opnum != 0
4777 && (attr_type == TYPE_ALU_SHIFT || attr_type == TYPE_ALU_SHIFT_REG))
4779 rtx shifted_operand;
4780 int opno;
4782 /* Get the shifted operand. */
4783 extract_insn (insn);
4784 shifted_operand = recog_data.operand[shift_opnum];
4786 /* Iterate over all the operands in DEP. If we write an operand
4787 that overlaps with SHIFTED_OPERAND, then we have increase the
4788 cost of this dependency. */
4789 extract_insn (dep);
4790 preprocess_constraints ();
4791 for (opno = 0; opno < recog_data.n_operands; opno++)
4793 /* We can ignore strict inputs. */
4794 if (recog_data.operand_type[opno] == OP_IN)
4795 continue;
4797 if (reg_overlap_mentioned_p (recog_data.operand[opno],
4798 shifted_operand))
4799 return 2;
4804 /* XXX This is not strictly true for the FPA. */
4805 if (REG_NOTE_KIND (link) == REG_DEP_ANTI
4806 || REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
4807 return 0;
4809 /* Call insns don't incur a stall, even if they follow a load. */
4810 if (REG_NOTE_KIND (link) == 0
4811 && GET_CODE (insn) == CALL_INSN)
4812 return 1;
4814 if ((i_pat = single_set (insn)) != NULL
4815 && GET_CODE (SET_SRC (i_pat)) == MEM
4816 && (d_pat = single_set (dep)) != NULL
4817 && GET_CODE (SET_DEST (d_pat)) == MEM)
4819 rtx src_mem = XEXP (SET_SRC (i_pat), 0);
4820 /* This is a load after a store, there is no conflict if the load reads
4821 from a cached area. Assume that loads from the stack, and from the
4822 constant pool are cached, and that others will miss. This is a
4823 hack. */
4825 if ((GET_CODE (src_mem) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (src_mem))
4826 || reg_mentioned_p (stack_pointer_rtx, src_mem)
4827 || reg_mentioned_p (frame_pointer_rtx, src_mem)
4828 || reg_mentioned_p (hard_frame_pointer_rtx, src_mem))
4829 return 1;
4832 return cost;
4835 static int fp_consts_inited = 0;
4837 /* Only zero is valid for VFP. Other values are also valid for FPA. */
4838 static const char * const strings_fp[8] =
4840 "0", "1", "2", "3",
4841 "4", "5", "0.5", "10"
4844 static REAL_VALUE_TYPE values_fp[8];
4846 static void
4847 init_fp_table (void)
4849 int i;
4850 REAL_VALUE_TYPE r;
4852 if (TARGET_VFP)
4853 fp_consts_inited = 1;
4854 else
4855 fp_consts_inited = 8;
4857 for (i = 0; i < fp_consts_inited; i++)
4859 r = REAL_VALUE_ATOF (strings_fp[i], DFmode);
4860 values_fp[i] = r;
4864 /* Return TRUE if rtx X is a valid immediate FP constant. */
4866 arm_const_double_rtx (rtx x)
4868 REAL_VALUE_TYPE r;
4869 int i;
4871 if (!fp_consts_inited)
4872 init_fp_table ();
4874 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
4875 if (REAL_VALUE_MINUS_ZERO (r))
4876 return 0;
4878 for (i = 0; i < fp_consts_inited; i++)
4879 if (REAL_VALUES_EQUAL (r, values_fp[i]))
4880 return 1;
4882 return 0;
4885 /* Return TRUE if rtx X is a valid immediate FPA constant. */
4887 neg_const_double_rtx_ok_for_fpa (rtx x)
4889 REAL_VALUE_TYPE r;
4890 int i;
4892 if (!fp_consts_inited)
4893 init_fp_table ();
4895 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
4896 r = REAL_VALUE_NEGATE (r);
4897 if (REAL_VALUE_MINUS_ZERO (r))
4898 return 0;
4900 for (i = 0; i < 8; i++)
4901 if (REAL_VALUES_EQUAL (r, values_fp[i]))
4902 return 1;
4904 return 0;
4907 /* Predicates for `match_operand' and `match_operator'. */
4909 /* Return nonzero if OP is a valid Cirrus memory address pattern. */
4911 cirrus_memory_offset (rtx op)
4913 /* Reject eliminable registers. */
4914 if (! (reload_in_progress || reload_completed)
4915 && ( reg_mentioned_p (frame_pointer_rtx, op)
4916 || reg_mentioned_p (arg_pointer_rtx, op)
4917 || reg_mentioned_p (virtual_incoming_args_rtx, op)
4918 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
4919 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
4920 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
4921 return 0;
4923 if (GET_CODE (op) == MEM)
4925 rtx ind;
4927 ind = XEXP (op, 0);
4929 /* Match: (mem (reg)). */
4930 if (GET_CODE (ind) == REG)
4931 return 1;
4933 /* Match:
4934 (mem (plus (reg)
4935 (const))). */
4936 if (GET_CODE (ind) == PLUS
4937 && GET_CODE (XEXP (ind, 0)) == REG
4938 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
4939 && GET_CODE (XEXP (ind, 1)) == CONST_INT)
4940 return 1;
4943 return 0;
4946 /* Return TRUE if OP is a valid VFP memory address pattern.
4947 WB if true if writeback address modes are allowed. */
4950 arm_coproc_mem_operand (rtx op, bool wb)
4952 rtx ind;
4954 /* Reject eliminable registers. */
4955 if (! (reload_in_progress || reload_completed)
4956 && ( reg_mentioned_p (frame_pointer_rtx, op)
4957 || reg_mentioned_p (arg_pointer_rtx, op)
4958 || reg_mentioned_p (virtual_incoming_args_rtx, op)
4959 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
4960 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
4961 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
4962 return FALSE;
4964 /* Constants are converted into offsets from labels. */
4965 if (GET_CODE (op) != MEM)
4966 return FALSE;
4968 ind = XEXP (op, 0);
4970 if (reload_completed
4971 && (GET_CODE (ind) == LABEL_REF
4972 || (GET_CODE (ind) == CONST
4973 && GET_CODE (XEXP (ind, 0)) == PLUS
4974 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
4975 && GET_CODE (XEXP (XEXP (ind, 0), 1)) == CONST_INT)))
4976 return TRUE;
4978 /* Match: (mem (reg)). */
4979 if (GET_CODE (ind) == REG)
4980 return arm_address_register_rtx_p (ind, 0);
4982 /* Autoincremment addressing modes. */
4983 if (wb
4984 && (GET_CODE (ind) == PRE_INC
4985 || GET_CODE (ind) == POST_INC
4986 || GET_CODE (ind) == PRE_DEC
4987 || GET_CODE (ind) == POST_DEC))
4988 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
4990 if (wb
4991 && (GET_CODE (ind) == POST_MODIFY || GET_CODE (ind) == PRE_MODIFY)
4992 && arm_address_register_rtx_p (XEXP (ind, 0), 0)
4993 && GET_CODE (XEXP (ind, 1)) == PLUS
4994 && rtx_equal_p (XEXP (XEXP (ind, 1), 0), XEXP (ind, 0)))
4995 ind = XEXP (ind, 1);
4997 /* Match:
4998 (plus (reg)
4999 (const)). */
5000 if (GET_CODE (ind) == PLUS
5001 && GET_CODE (XEXP (ind, 0)) == REG
5002 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
5003 && GET_CODE (XEXP (ind, 1)) == CONST_INT
5004 && INTVAL (XEXP (ind, 1)) > -1024
5005 && INTVAL (XEXP (ind, 1)) < 1024
5006 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
5007 return TRUE;
5009 return FALSE;
5012 /* Return true if X is a register that will be eliminated later on. */
5014 arm_eliminable_register (rtx x)
5016 return REG_P (x) && (REGNO (x) == FRAME_POINTER_REGNUM
5017 || REGNO (x) == ARG_POINTER_REGNUM
5018 || (REGNO (x) >= FIRST_VIRTUAL_REGISTER
5019 && REGNO (x) <= LAST_VIRTUAL_REGISTER));
5022 /* Return GENERAL_REGS if a scratch register required to reload x to/from
5023 VFP registers. Otherwise return NO_REGS. */
5025 enum reg_class
5026 vfp_secondary_reload_class (enum machine_mode mode, rtx x)
5028 if (arm_coproc_mem_operand (x, FALSE) || s_register_operand (x, mode))
5029 return NO_REGS;
5031 return GENERAL_REGS;
5034 /* Values which must be returned in the most-significant end of the return
5035 register. */
5037 static bool
5038 arm_return_in_msb (tree valtype)
5040 return (TARGET_AAPCS_BASED
5041 && BYTES_BIG_ENDIAN
5042 && (AGGREGATE_TYPE_P (valtype)
5043 || TREE_CODE (valtype) == COMPLEX_TYPE));
5046 /* Returns TRUE if INSN is an "LDR REG, ADDR" instruction.
5047 Use by the Cirrus Maverick code which has to workaround
5048 a hardware bug triggered by such instructions. */
5049 static bool
5050 arm_memory_load_p (rtx insn)
5052 rtx body, lhs, rhs;;
5054 if (insn == NULL_RTX || GET_CODE (insn) != INSN)
5055 return false;
5057 body = PATTERN (insn);
5059 if (GET_CODE (body) != SET)
5060 return false;
5062 lhs = XEXP (body, 0);
5063 rhs = XEXP (body, 1);
5065 lhs = REG_OR_SUBREG_RTX (lhs);
5067 /* If the destination is not a general purpose
5068 register we do not have to worry. */
5069 if (GET_CODE (lhs) != REG
5070 || REGNO_REG_CLASS (REGNO (lhs)) != GENERAL_REGS)
5071 return false;
5073 /* As well as loads from memory we also have to react
5074 to loads of invalid constants which will be turned
5075 into loads from the minipool. */
5076 return (GET_CODE (rhs) == MEM
5077 || GET_CODE (rhs) == SYMBOL_REF
5078 || note_invalid_constants (insn, -1, false));
5081 /* Return TRUE if INSN is a Cirrus instruction. */
5082 static bool
5083 arm_cirrus_insn_p (rtx insn)
5085 enum attr_cirrus attr;
5087 /* get_attr cannot accept USE or CLOBBER. */
5088 if (!insn
5089 || GET_CODE (insn) != INSN
5090 || GET_CODE (PATTERN (insn)) == USE
5091 || GET_CODE (PATTERN (insn)) == CLOBBER)
5092 return 0;
5094 attr = get_attr_cirrus (insn);
5096 return attr != CIRRUS_NOT;
5099 /* Cirrus reorg for invalid instruction combinations. */
5100 static void
5101 cirrus_reorg (rtx first)
5103 enum attr_cirrus attr;
5104 rtx body = PATTERN (first);
5105 rtx t;
5106 int nops;
5108 /* Any branch must be followed by 2 non Cirrus instructions. */
5109 if (GET_CODE (first) == JUMP_INSN && GET_CODE (body) != RETURN)
5111 nops = 0;
5112 t = next_nonnote_insn (first);
5114 if (arm_cirrus_insn_p (t))
5115 ++ nops;
5117 if (arm_cirrus_insn_p (next_nonnote_insn (t)))
5118 ++ nops;
5120 while (nops --)
5121 emit_insn_after (gen_nop (), first);
5123 return;
5126 /* (float (blah)) is in parallel with a clobber. */
5127 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
5128 body = XVECEXP (body, 0, 0);
5130 if (GET_CODE (body) == SET)
5132 rtx lhs = XEXP (body, 0), rhs = XEXP (body, 1);
5134 /* cfldrd, cfldr64, cfstrd, cfstr64 must
5135 be followed by a non Cirrus insn. */
5136 if (get_attr_cirrus (first) == CIRRUS_DOUBLE)
5138 if (arm_cirrus_insn_p (next_nonnote_insn (first)))
5139 emit_insn_after (gen_nop (), first);
5141 return;
5143 else if (arm_memory_load_p (first))
5145 unsigned int arm_regno;
5147 /* Any ldr/cfmvdlr, ldr/cfmvdhr, ldr/cfmvsr, ldr/cfmv64lr,
5148 ldr/cfmv64hr combination where the Rd field is the same
5149 in both instructions must be split with a non Cirrus
5150 insn. Example:
5152 ldr r0, blah
5154 cfmvsr mvf0, r0. */
5156 /* Get Arm register number for ldr insn. */
5157 if (GET_CODE (lhs) == REG)
5158 arm_regno = REGNO (lhs);
5159 else
5161 gcc_assert (GET_CODE (rhs) == REG);
5162 arm_regno = REGNO (rhs);
5165 /* Next insn. */
5166 first = next_nonnote_insn (first);
5168 if (! arm_cirrus_insn_p (first))
5169 return;
5171 body = PATTERN (first);
5173 /* (float (blah)) is in parallel with a clobber. */
5174 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0))
5175 body = XVECEXP (body, 0, 0);
5177 if (GET_CODE (body) == FLOAT)
5178 body = XEXP (body, 0);
5180 if (get_attr_cirrus (first) == CIRRUS_MOVE
5181 && GET_CODE (XEXP (body, 1)) == REG
5182 && arm_regno == REGNO (XEXP (body, 1)))
5183 emit_insn_after (gen_nop (), first);
5185 return;
5189 /* get_attr cannot accept USE or CLOBBER. */
5190 if (!first
5191 || GET_CODE (first) != INSN
5192 || GET_CODE (PATTERN (first)) == USE
5193 || GET_CODE (PATTERN (first)) == CLOBBER)
5194 return;
5196 attr = get_attr_cirrus (first);
5198 /* Any coprocessor compare instruction (cfcmps, cfcmpd, ...)
5199 must be followed by a non-coprocessor instruction. */
5200 if (attr == CIRRUS_COMPARE)
5202 nops = 0;
5204 t = next_nonnote_insn (first);
5206 if (arm_cirrus_insn_p (t))
5207 ++ nops;
5209 if (arm_cirrus_insn_p (next_nonnote_insn (t)))
5210 ++ nops;
5212 while (nops --)
5213 emit_insn_after (gen_nop (), first);
5215 return;
5219 /* Return TRUE if X references a SYMBOL_REF. */
5221 symbol_mentioned_p (rtx x)
5223 const char * fmt;
5224 int i;
5226 if (GET_CODE (x) == SYMBOL_REF)
5227 return 1;
5229 fmt = GET_RTX_FORMAT (GET_CODE (x));
5231 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
5233 if (fmt[i] == 'E')
5235 int j;
5237 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
5238 if (symbol_mentioned_p (XVECEXP (x, i, j)))
5239 return 1;
5241 else if (fmt[i] == 'e' && symbol_mentioned_p (XEXP (x, i)))
5242 return 1;
5245 return 0;
5248 /* Return TRUE if X references a LABEL_REF. */
5250 label_mentioned_p (rtx x)
5252 const char * fmt;
5253 int i;
5255 if (GET_CODE (x) == LABEL_REF)
5256 return 1;
5258 fmt = GET_RTX_FORMAT (GET_CODE (x));
5259 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
5261 if (fmt[i] == 'E')
5263 int j;
5265 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
5266 if (label_mentioned_p (XVECEXP (x, i, j)))
5267 return 1;
5269 else if (fmt[i] == 'e' && label_mentioned_p (XEXP (x, i)))
5270 return 1;
5273 return 0;
5276 enum rtx_code
5277 minmax_code (rtx x)
5279 enum rtx_code code = GET_CODE (x);
5281 switch (code)
5283 case SMAX:
5284 return GE;
5285 case SMIN:
5286 return LE;
5287 case UMIN:
5288 return LEU;
5289 case UMAX:
5290 return GEU;
5291 default:
5292 gcc_unreachable ();
5296 /* Return 1 if memory locations are adjacent. */
5298 adjacent_mem_locations (rtx a, rtx b)
5300 /* We don't guarantee to preserve the order of these memory refs. */
5301 if (volatile_refs_p (a) || volatile_refs_p (b))
5302 return 0;
5304 if ((GET_CODE (XEXP (a, 0)) == REG
5305 || (GET_CODE (XEXP (a, 0)) == PLUS
5306 && GET_CODE (XEXP (XEXP (a, 0), 1)) == CONST_INT))
5307 && (GET_CODE (XEXP (b, 0)) == REG
5308 || (GET_CODE (XEXP (b, 0)) == PLUS
5309 && GET_CODE (XEXP (XEXP (b, 0), 1)) == CONST_INT)))
5311 HOST_WIDE_INT val0 = 0, val1 = 0;
5312 rtx reg0, reg1;
5313 int val_diff;
5315 if (GET_CODE (XEXP (a, 0)) == PLUS)
5317 reg0 = XEXP (XEXP (a, 0), 0);
5318 val0 = INTVAL (XEXP (XEXP (a, 0), 1));
5320 else
5321 reg0 = XEXP (a, 0);
5323 if (GET_CODE (XEXP (b, 0)) == PLUS)
5325 reg1 = XEXP (XEXP (b, 0), 0);
5326 val1 = INTVAL (XEXP (XEXP (b, 0), 1));
5328 else
5329 reg1 = XEXP (b, 0);
5331 /* Don't accept any offset that will require multiple
5332 instructions to handle, since this would cause the
5333 arith_adjacentmem pattern to output an overlong sequence. */
5334 if (!const_ok_for_op (PLUS, val0) || !const_ok_for_op (PLUS, val1))
5335 return 0;
5337 /* Don't allow an eliminable register: register elimination can make
5338 the offset too large. */
5339 if (arm_eliminable_register (reg0))
5340 return 0;
5342 val_diff = val1 - val0;
5344 if (arm_ld_sched)
5346 /* If the target has load delay slots, then there's no benefit
5347 to using an ldm instruction unless the offset is zero and
5348 we are optimizing for size. */
5349 return (optimize_size && (REGNO (reg0) == REGNO (reg1))
5350 && (val0 == 0 || val1 == 0 || val0 == 4 || val1 == 4)
5351 && (val_diff == 4 || val_diff == -4));
5354 return ((REGNO (reg0) == REGNO (reg1))
5355 && (val_diff == 4 || val_diff == -4));
5358 return 0;
5362 load_multiple_sequence (rtx *operands, int nops, int *regs, int *base,
5363 HOST_WIDE_INT *load_offset)
5365 int unsorted_regs[4];
5366 HOST_WIDE_INT unsorted_offsets[4];
5367 int order[4];
5368 int base_reg = -1;
5369 int i;
5371 /* Can only handle 2, 3, or 4 insns at present,
5372 though could be easily extended if required. */
5373 gcc_assert (nops >= 2 && nops <= 4);
5375 /* Loop over the operands and check that the memory references are
5376 suitable (i.e. immediate offsets from the same base register). At
5377 the same time, extract the target register, and the memory
5378 offsets. */
5379 for (i = 0; i < nops; i++)
5381 rtx reg;
5382 rtx offset;
5384 /* Convert a subreg of a mem into the mem itself. */
5385 if (GET_CODE (operands[nops + i]) == SUBREG)
5386 operands[nops + i] = alter_subreg (operands + (nops + i));
5388 gcc_assert (GET_CODE (operands[nops + i]) == MEM);
5390 /* Don't reorder volatile memory references; it doesn't seem worth
5391 looking for the case where the order is ok anyway. */
5392 if (MEM_VOLATILE_P (operands[nops + i]))
5393 return 0;
5395 offset = const0_rtx;
5397 if ((GET_CODE (reg = XEXP (operands[nops + i], 0)) == REG
5398 || (GET_CODE (reg) == SUBREG
5399 && GET_CODE (reg = SUBREG_REG (reg)) == REG))
5400 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
5401 && ((GET_CODE (reg = XEXP (XEXP (operands[nops + i], 0), 0))
5402 == REG)
5403 || (GET_CODE (reg) == SUBREG
5404 && GET_CODE (reg = SUBREG_REG (reg)) == REG))
5405 && (GET_CODE (offset = XEXP (XEXP (operands[nops + i], 0), 1))
5406 == CONST_INT)))
5408 if (i == 0)
5410 base_reg = REGNO (reg);
5411 unsorted_regs[0] = (GET_CODE (operands[i]) == REG
5412 ? REGNO (operands[i])
5413 : REGNO (SUBREG_REG (operands[i])));
5414 order[0] = 0;
5416 else
5418 if (base_reg != (int) REGNO (reg))
5419 /* Not addressed from the same base register. */
5420 return 0;
5422 unsorted_regs[i] = (GET_CODE (operands[i]) == REG
5423 ? REGNO (operands[i])
5424 : REGNO (SUBREG_REG (operands[i])));
5425 if (unsorted_regs[i] < unsorted_regs[order[0]])
5426 order[0] = i;
5429 /* If it isn't an integer register, or if it overwrites the
5430 base register but isn't the last insn in the list, then
5431 we can't do this. */
5432 if (unsorted_regs[i] < 0 || unsorted_regs[i] > 14
5433 || (i != nops - 1 && unsorted_regs[i] == base_reg))
5434 return 0;
5436 unsorted_offsets[i] = INTVAL (offset);
5438 else
5439 /* Not a suitable memory address. */
5440 return 0;
5443 /* All the useful information has now been extracted from the
5444 operands into unsorted_regs and unsorted_offsets; additionally,
5445 order[0] has been set to the lowest numbered register in the
5446 list. Sort the registers into order, and check that the memory
5447 offsets are ascending and adjacent. */
5449 for (i = 1; i < nops; i++)
5451 int j;
5453 order[i] = order[i - 1];
5454 for (j = 0; j < nops; j++)
5455 if (unsorted_regs[j] > unsorted_regs[order[i - 1]]
5456 && (order[i] == order[i - 1]
5457 || unsorted_regs[j] < unsorted_regs[order[i]]))
5458 order[i] = j;
5460 /* Have we found a suitable register? if not, one must be used more
5461 than once. */
5462 if (order[i] == order[i - 1])
5463 return 0;
5465 /* Is the memory address adjacent and ascending? */
5466 if (unsorted_offsets[order[i]] != unsorted_offsets[order[i - 1]] + 4)
5467 return 0;
5470 if (base)
5472 *base = base_reg;
5474 for (i = 0; i < nops; i++)
5475 regs[i] = unsorted_regs[order[i]];
5477 *load_offset = unsorted_offsets[order[0]];
5480 if (unsorted_offsets[order[0]] == 0)
5481 return 1; /* ldmia */
5483 if (unsorted_offsets[order[0]] == 4)
5484 return 2; /* ldmib */
5486 if (unsorted_offsets[order[nops - 1]] == 0)
5487 return 3; /* ldmda */
5489 if (unsorted_offsets[order[nops - 1]] == -4)
5490 return 4; /* ldmdb */
5492 /* For ARM8,9 & StrongARM, 2 ldr instructions are faster than an ldm
5493 if the offset isn't small enough. The reason 2 ldrs are faster
5494 is because these ARMs are able to do more than one cache access
5495 in a single cycle. The ARM9 and StrongARM have Harvard caches,
5496 whilst the ARM8 has a double bandwidth cache. This means that
5497 these cores can do both an instruction fetch and a data fetch in
5498 a single cycle, so the trick of calculating the address into a
5499 scratch register (one of the result regs) and then doing a load
5500 multiple actually becomes slower (and no smaller in code size).
5501 That is the transformation
5503 ldr rd1, [rbase + offset]
5504 ldr rd2, [rbase + offset + 4]
5508 add rd1, rbase, offset
5509 ldmia rd1, {rd1, rd2}
5511 produces worse code -- '3 cycles + any stalls on rd2' instead of
5512 '2 cycles + any stalls on rd2'. On ARMs with only one cache
5513 access per cycle, the first sequence could never complete in less
5514 than 6 cycles, whereas the ldm sequence would only take 5 and
5515 would make better use of sequential accesses if not hitting the
5516 cache.
5518 We cheat here and test 'arm_ld_sched' which we currently know to
5519 only be true for the ARM8, ARM9 and StrongARM. If this ever
5520 changes, then the test below needs to be reworked. */
5521 if (nops == 2 && arm_ld_sched)
5522 return 0;
5524 /* Can't do it without setting up the offset, only do this if it takes
5525 no more than one insn. */
5526 return (const_ok_for_arm (unsorted_offsets[order[0]])
5527 || const_ok_for_arm (-unsorted_offsets[order[0]])) ? 5 : 0;
5530 const char *
5531 emit_ldm_seq (rtx *operands, int nops)
5533 int regs[4];
5534 int base_reg;
5535 HOST_WIDE_INT offset;
5536 char buf[100];
5537 int i;
5539 switch (load_multiple_sequence (operands, nops, regs, &base_reg, &offset))
5541 case 1:
5542 strcpy (buf, "ldm%?ia\t");
5543 break;
5545 case 2:
5546 strcpy (buf, "ldm%?ib\t");
5547 break;
5549 case 3:
5550 strcpy (buf, "ldm%?da\t");
5551 break;
5553 case 4:
5554 strcpy (buf, "ldm%?db\t");
5555 break;
5557 case 5:
5558 if (offset >= 0)
5559 sprintf (buf, "add%%?\t%s%s, %s%s, #%ld", REGISTER_PREFIX,
5560 reg_names[regs[0]], REGISTER_PREFIX, reg_names[base_reg],
5561 (long) offset);
5562 else
5563 sprintf (buf, "sub%%?\t%s%s, %s%s, #%ld", REGISTER_PREFIX,
5564 reg_names[regs[0]], REGISTER_PREFIX, reg_names[base_reg],
5565 (long) -offset);
5566 output_asm_insn (buf, operands);
5567 base_reg = regs[0];
5568 strcpy (buf, "ldm%?ia\t");
5569 break;
5571 default:
5572 gcc_unreachable ();
5575 sprintf (buf + strlen (buf), "%s%s, {%s%s", REGISTER_PREFIX,
5576 reg_names[base_reg], REGISTER_PREFIX, reg_names[regs[0]]);
5578 for (i = 1; i < nops; i++)
5579 sprintf (buf + strlen (buf), ", %s%s", REGISTER_PREFIX,
5580 reg_names[regs[i]]);
5582 strcat (buf, "}\t%@ phole ldm");
5584 output_asm_insn (buf, operands);
5585 return "";
5589 store_multiple_sequence (rtx *operands, int nops, int *regs, int *base,
5590 HOST_WIDE_INT * load_offset)
5592 int unsorted_regs[4];
5593 HOST_WIDE_INT unsorted_offsets[4];
5594 int order[4];
5595 int base_reg = -1;
5596 int i;
5598 /* Can only handle 2, 3, or 4 insns at present, though could be easily
5599 extended if required. */
5600 gcc_assert (nops >= 2 && nops <= 4);
5602 /* Loop over the operands and check that the memory references are
5603 suitable (i.e. immediate offsets from the same base register). At
5604 the same time, extract the target register, and the memory
5605 offsets. */
5606 for (i = 0; i < nops; i++)
5608 rtx reg;
5609 rtx offset;
5611 /* Convert a subreg of a mem into the mem itself. */
5612 if (GET_CODE (operands[nops + i]) == SUBREG)
5613 operands[nops + i] = alter_subreg (operands + (nops + i));
5615 gcc_assert (GET_CODE (operands[nops + i]) == MEM);
5617 /* Don't reorder volatile memory references; it doesn't seem worth
5618 looking for the case where the order is ok anyway. */
5619 if (MEM_VOLATILE_P (operands[nops + i]))
5620 return 0;
5622 offset = const0_rtx;
5624 if ((GET_CODE (reg = XEXP (operands[nops + i], 0)) == REG
5625 || (GET_CODE (reg) == SUBREG
5626 && GET_CODE (reg = SUBREG_REG (reg)) == REG))
5627 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
5628 && ((GET_CODE (reg = XEXP (XEXP (operands[nops + i], 0), 0))
5629 == REG)
5630 || (GET_CODE (reg) == SUBREG
5631 && GET_CODE (reg = SUBREG_REG (reg)) == REG))
5632 && (GET_CODE (offset = XEXP (XEXP (operands[nops + i], 0), 1))
5633 == CONST_INT)))
5635 if (i == 0)
5637 base_reg = REGNO (reg);
5638 unsorted_regs[0] = (GET_CODE (operands[i]) == REG
5639 ? REGNO (operands[i])
5640 : REGNO (SUBREG_REG (operands[i])));
5641 order[0] = 0;
5643 else
5645 if (base_reg != (int) REGNO (reg))
5646 /* Not addressed from the same base register. */
5647 return 0;
5649 unsorted_regs[i] = (GET_CODE (operands[i]) == REG
5650 ? REGNO (operands[i])
5651 : REGNO (SUBREG_REG (operands[i])));
5652 if (unsorted_regs[i] < unsorted_regs[order[0]])
5653 order[0] = i;
5656 /* If it isn't an integer register, then we can't do this. */
5657 if (unsorted_regs[i] < 0 || unsorted_regs[i] > 14)
5658 return 0;
5660 unsorted_offsets[i] = INTVAL (offset);
5662 else
5663 /* Not a suitable memory address. */
5664 return 0;
5667 /* All the useful information has now been extracted from the
5668 operands into unsorted_regs and unsorted_offsets; additionally,
5669 order[0] has been set to the lowest numbered register in the
5670 list. Sort the registers into order, and check that the memory
5671 offsets are ascending and adjacent. */
5673 for (i = 1; i < nops; i++)
5675 int j;
5677 order[i] = order[i - 1];
5678 for (j = 0; j < nops; j++)
5679 if (unsorted_regs[j] > unsorted_regs[order[i - 1]]
5680 && (order[i] == order[i - 1]
5681 || unsorted_regs[j] < unsorted_regs[order[i]]))
5682 order[i] = j;
5684 /* Have we found a suitable register? if not, one must be used more
5685 than once. */
5686 if (order[i] == order[i - 1])
5687 return 0;
5689 /* Is the memory address adjacent and ascending? */
5690 if (unsorted_offsets[order[i]] != unsorted_offsets[order[i - 1]] + 4)
5691 return 0;
5694 if (base)
5696 *base = base_reg;
5698 for (i = 0; i < nops; i++)
5699 regs[i] = unsorted_regs[order[i]];
5701 *load_offset = unsorted_offsets[order[0]];
5704 if (unsorted_offsets[order[0]] == 0)
5705 return 1; /* stmia */
5707 if (unsorted_offsets[order[0]] == 4)
5708 return 2; /* stmib */
5710 if (unsorted_offsets[order[nops - 1]] == 0)
5711 return 3; /* stmda */
5713 if (unsorted_offsets[order[nops - 1]] == -4)
5714 return 4; /* stmdb */
5716 return 0;
5719 const char *
5720 emit_stm_seq (rtx *operands, int nops)
5722 int regs[4];
5723 int base_reg;
5724 HOST_WIDE_INT offset;
5725 char buf[100];
5726 int i;
5728 switch (store_multiple_sequence (operands, nops, regs, &base_reg, &offset))
5730 case 1:
5731 strcpy (buf, "stm%?ia\t");
5732 break;
5734 case 2:
5735 strcpy (buf, "stm%?ib\t");
5736 break;
5738 case 3:
5739 strcpy (buf, "stm%?da\t");
5740 break;
5742 case 4:
5743 strcpy (buf, "stm%?db\t");
5744 break;
5746 default:
5747 gcc_unreachable ();
5750 sprintf (buf + strlen (buf), "%s%s, {%s%s", REGISTER_PREFIX,
5751 reg_names[base_reg], REGISTER_PREFIX, reg_names[regs[0]]);
5753 for (i = 1; i < nops; i++)
5754 sprintf (buf + strlen (buf), ", %s%s", REGISTER_PREFIX,
5755 reg_names[regs[i]]);
5757 strcat (buf, "}\t%@ phole stm");
5759 output_asm_insn (buf, operands);
5760 return "";
5764 /* Routines for use in generating RTL. */
5767 arm_gen_load_multiple (int base_regno, int count, rtx from, int up,
5768 int write_back, rtx basemem, HOST_WIDE_INT *offsetp)
5770 HOST_WIDE_INT offset = *offsetp;
5771 int i = 0, j;
5772 rtx result;
5773 int sign = up ? 1 : -1;
5774 rtx mem, addr;
5776 /* XScale has load-store double instructions, but they have stricter
5777 alignment requirements than load-store multiple, so we cannot
5778 use them.
5780 For XScale ldm requires 2 + NREGS cycles to complete and blocks
5781 the pipeline until completion.
5783 NREGS CYCLES
5789 An ldr instruction takes 1-3 cycles, but does not block the
5790 pipeline.
5792 NREGS CYCLES
5793 1 1-3
5794 2 2-6
5795 3 3-9
5796 4 4-12
5798 Best case ldr will always win. However, the more ldr instructions
5799 we issue, the less likely we are to be able to schedule them well.
5800 Using ldr instructions also increases code size.
5802 As a compromise, we use ldr for counts of 1 or 2 regs, and ldm
5803 for counts of 3 or 4 regs. */
5804 if (arm_tune_xscale && count <= 2 && ! optimize_size)
5806 rtx seq;
5808 start_sequence ();
5810 for (i = 0; i < count; i++)
5812 addr = plus_constant (from, i * 4 * sign);
5813 mem = adjust_automodify_address (basemem, SImode, addr, offset);
5814 emit_move_insn (gen_rtx_REG (SImode, base_regno + i), mem);
5815 offset += 4 * sign;
5818 if (write_back)
5820 emit_move_insn (from, plus_constant (from, count * 4 * sign));
5821 *offsetp = offset;
5824 seq = get_insns ();
5825 end_sequence ();
5827 return seq;
5830 result = gen_rtx_PARALLEL (VOIDmode,
5831 rtvec_alloc (count + (write_back ? 1 : 0)));
5832 if (write_back)
5834 XVECEXP (result, 0, 0)
5835 = gen_rtx_SET (GET_MODE (from), from,
5836 plus_constant (from, count * 4 * sign));
5837 i = 1;
5838 count++;
5841 for (j = 0; i < count; i++, j++)
5843 addr = plus_constant (from, j * 4 * sign);
5844 mem = adjust_automodify_address_nv (basemem, SImode, addr, offset);
5845 XVECEXP (result, 0, i)
5846 = gen_rtx_SET (VOIDmode, gen_rtx_REG (SImode, base_regno + j), mem);
5847 offset += 4 * sign;
5850 if (write_back)
5851 *offsetp = offset;
5853 return result;
5857 arm_gen_store_multiple (int base_regno, int count, rtx to, int up,
5858 int write_back, rtx basemem, HOST_WIDE_INT *offsetp)
5860 HOST_WIDE_INT offset = *offsetp;
5861 int i = 0, j;
5862 rtx result;
5863 int sign = up ? 1 : -1;
5864 rtx mem, addr;
5866 /* See arm_gen_load_multiple for discussion of
5867 the pros/cons of ldm/stm usage for XScale. */
5868 if (arm_tune_xscale && count <= 2 && ! optimize_size)
5870 rtx seq;
5872 start_sequence ();
5874 for (i = 0; i < count; i++)
5876 addr = plus_constant (to, i * 4 * sign);
5877 mem = adjust_automodify_address (basemem, SImode, addr, offset);
5878 emit_move_insn (mem, gen_rtx_REG (SImode, base_regno + i));
5879 offset += 4 * sign;
5882 if (write_back)
5884 emit_move_insn (to, plus_constant (to, count * 4 * sign));
5885 *offsetp = offset;
5888 seq = get_insns ();
5889 end_sequence ();
5891 return seq;
5894 result = gen_rtx_PARALLEL (VOIDmode,
5895 rtvec_alloc (count + (write_back ? 1 : 0)));
5896 if (write_back)
5898 XVECEXP (result, 0, 0)
5899 = gen_rtx_SET (GET_MODE (to), to,
5900 plus_constant (to, count * 4 * sign));
5901 i = 1;
5902 count++;
5905 for (j = 0; i < count; i++, j++)
5907 addr = plus_constant (to, j * 4 * sign);
5908 mem = adjust_automodify_address_nv (basemem, SImode, addr, offset);
5909 XVECEXP (result, 0, i)
5910 = gen_rtx_SET (VOIDmode, mem, gen_rtx_REG (SImode, base_regno + j));
5911 offset += 4 * sign;
5914 if (write_back)
5915 *offsetp = offset;
5917 return result;
5921 arm_gen_movmemqi (rtx *operands)
5923 HOST_WIDE_INT in_words_to_go, out_words_to_go, last_bytes;
5924 HOST_WIDE_INT srcoffset, dstoffset;
5925 int i;
5926 rtx src, dst, srcbase, dstbase;
5927 rtx part_bytes_reg = NULL;
5928 rtx mem;
5930 if (GET_CODE (operands[2]) != CONST_INT
5931 || GET_CODE (operands[3]) != CONST_INT
5932 || INTVAL (operands[2]) > 64
5933 || INTVAL (operands[3]) & 3)
5934 return 0;
5936 dstbase = operands[0];
5937 srcbase = operands[1];
5939 dst = copy_to_mode_reg (SImode, XEXP (dstbase, 0));
5940 src = copy_to_mode_reg (SImode, XEXP (srcbase, 0));
5942 in_words_to_go = ARM_NUM_INTS (INTVAL (operands[2]));
5943 out_words_to_go = INTVAL (operands[2]) / 4;
5944 last_bytes = INTVAL (operands[2]) & 3;
5945 dstoffset = srcoffset = 0;
5947 if (out_words_to_go != in_words_to_go && ((in_words_to_go - 1) & 3) != 0)
5948 part_bytes_reg = gen_rtx_REG (SImode, (in_words_to_go - 1) & 3);
5950 for (i = 0; in_words_to_go >= 2; i+=4)
5952 if (in_words_to_go > 4)
5953 emit_insn (arm_gen_load_multiple (0, 4, src, TRUE, TRUE,
5954 srcbase, &srcoffset));
5955 else
5956 emit_insn (arm_gen_load_multiple (0, in_words_to_go, src, TRUE,
5957 FALSE, srcbase, &srcoffset));
5959 if (out_words_to_go)
5961 if (out_words_to_go > 4)
5962 emit_insn (arm_gen_store_multiple (0, 4, dst, TRUE, TRUE,
5963 dstbase, &dstoffset));
5964 else if (out_words_to_go != 1)
5965 emit_insn (arm_gen_store_multiple (0, out_words_to_go,
5966 dst, TRUE,
5967 (last_bytes == 0
5968 ? FALSE : TRUE),
5969 dstbase, &dstoffset));
5970 else
5972 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
5973 emit_move_insn (mem, gen_rtx_REG (SImode, 0));
5974 if (last_bytes != 0)
5976 emit_insn (gen_addsi3 (dst, dst, GEN_INT (4)));
5977 dstoffset += 4;
5982 in_words_to_go -= in_words_to_go < 4 ? in_words_to_go : 4;
5983 out_words_to_go -= out_words_to_go < 4 ? out_words_to_go : 4;
5986 /* OUT_WORDS_TO_GO will be zero here if there are byte stores to do. */
5987 if (out_words_to_go)
5989 rtx sreg;
5991 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
5992 sreg = copy_to_reg (mem);
5994 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
5995 emit_move_insn (mem, sreg);
5996 in_words_to_go--;
5998 gcc_assert (!in_words_to_go); /* Sanity check */
6001 if (in_words_to_go)
6003 gcc_assert (in_words_to_go > 0);
6005 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
6006 part_bytes_reg = copy_to_mode_reg (SImode, mem);
6009 gcc_assert (!last_bytes || part_bytes_reg);
6011 if (BYTES_BIG_ENDIAN && last_bytes)
6013 rtx tmp = gen_reg_rtx (SImode);
6015 /* The bytes we want are in the top end of the word. */
6016 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg,
6017 GEN_INT (8 * (4 - last_bytes))));
6018 part_bytes_reg = tmp;
6020 while (last_bytes)
6022 mem = adjust_automodify_address (dstbase, QImode,
6023 plus_constant (dst, last_bytes - 1),
6024 dstoffset + last_bytes - 1);
6025 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
6027 if (--last_bytes)
6029 tmp = gen_reg_rtx (SImode);
6030 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (8)));
6031 part_bytes_reg = tmp;
6036 else
6038 if (last_bytes > 1)
6040 mem = adjust_automodify_address (dstbase, HImode, dst, dstoffset);
6041 emit_move_insn (mem, gen_lowpart (HImode, part_bytes_reg));
6042 last_bytes -= 2;
6043 if (last_bytes)
6045 rtx tmp = gen_reg_rtx (SImode);
6046 emit_insn (gen_addsi3 (dst, dst, const2_rtx));
6047 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (16)));
6048 part_bytes_reg = tmp;
6049 dstoffset += 2;
6053 if (last_bytes)
6055 mem = adjust_automodify_address (dstbase, QImode, dst, dstoffset);
6056 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
6060 return 1;
6063 /* Generate a memory reference for a half word, such that it will be loaded
6064 into the top 16 bits of the word. We can assume that the address is
6065 known to be alignable and of the form reg, or plus (reg, const). */
6068 arm_gen_rotated_half_load (rtx memref)
6070 HOST_WIDE_INT offset = 0;
6071 rtx base = XEXP (memref, 0);
6073 if (GET_CODE (base) == PLUS)
6075 offset = INTVAL (XEXP (base, 1));
6076 base = XEXP (base, 0);
6079 /* If we aren't allowed to generate unaligned addresses, then fail. */
6080 if ((BYTES_BIG_ENDIAN ? 1 : 0) ^ ((offset & 2) == 0))
6081 return NULL;
6083 base = gen_rtx_MEM (SImode, plus_constant (base, offset & ~2));
6085 if ((BYTES_BIG_ENDIAN ? 1 : 0) ^ ((offset & 2) == 2))
6086 return base;
6088 return gen_rtx_ROTATE (SImode, base, GEN_INT (16));
6091 /* Select a dominance comparison mode if possible for a test of the general
6092 form (OP (COND_OR (X) (Y)) (const_int 0)). We support three forms.
6093 COND_OR == DOM_CC_X_AND_Y => (X && Y)
6094 COND_OR == DOM_CC_NX_OR_Y => ((! X) || Y)
6095 COND_OR == DOM_CC_X_OR_Y => (X || Y)
6096 In all cases OP will be either EQ or NE, but we don't need to know which
6097 here. If we are unable to support a dominance comparison we return
6098 CC mode. This will then fail to match for the RTL expressions that
6099 generate this call. */
6100 enum machine_mode
6101 arm_select_dominance_cc_mode (rtx x, rtx y, HOST_WIDE_INT cond_or)
6103 enum rtx_code cond1, cond2;
6104 int swapped = 0;
6106 /* Currently we will probably get the wrong result if the individual
6107 comparisons are not simple. This also ensures that it is safe to
6108 reverse a comparison if necessary. */
6109 if ((arm_select_cc_mode (cond1 = GET_CODE (x), XEXP (x, 0), XEXP (x, 1))
6110 != CCmode)
6111 || (arm_select_cc_mode (cond2 = GET_CODE (y), XEXP (y, 0), XEXP (y, 1))
6112 != CCmode))
6113 return CCmode;
6115 /* The if_then_else variant of this tests the second condition if the
6116 first passes, but is true if the first fails. Reverse the first
6117 condition to get a true "inclusive-or" expression. */
6118 if (cond_or == DOM_CC_NX_OR_Y)
6119 cond1 = reverse_condition (cond1);
6121 /* If the comparisons are not equal, and one doesn't dominate the other,
6122 then we can't do this. */
6123 if (cond1 != cond2
6124 && !comparison_dominates_p (cond1, cond2)
6125 && (swapped = 1, !comparison_dominates_p (cond2, cond1)))
6126 return CCmode;
6128 if (swapped)
6130 enum rtx_code temp = cond1;
6131 cond1 = cond2;
6132 cond2 = temp;
6135 switch (cond1)
6137 case EQ:
6138 if (cond_or == DOM_CC_X_AND_Y)
6139 return CC_DEQmode;
6141 switch (cond2)
6143 case EQ: return CC_DEQmode;
6144 case LE: return CC_DLEmode;
6145 case LEU: return CC_DLEUmode;
6146 case GE: return CC_DGEmode;
6147 case GEU: return CC_DGEUmode;
6148 default: gcc_unreachable ();
6151 case LT:
6152 if (cond_or == DOM_CC_X_AND_Y)
6153 return CC_DLTmode;
6155 switch (cond2)
6157 case LT:
6158 return CC_DLTmode;
6159 case LE:
6160 return CC_DLEmode;
6161 case NE:
6162 return CC_DNEmode;
6163 default:
6164 gcc_unreachable ();
6167 case GT:
6168 if (cond_or == DOM_CC_X_AND_Y)
6169 return CC_DGTmode;
6171 switch (cond2)
6173 case GT:
6174 return CC_DGTmode;
6175 case GE:
6176 return CC_DGEmode;
6177 case NE:
6178 return CC_DNEmode;
6179 default:
6180 gcc_unreachable ();
6183 case LTU:
6184 if (cond_or == DOM_CC_X_AND_Y)
6185 return CC_DLTUmode;
6187 switch (cond2)
6189 case LTU:
6190 return CC_DLTUmode;
6191 case LEU:
6192 return CC_DLEUmode;
6193 case NE:
6194 return CC_DNEmode;
6195 default:
6196 gcc_unreachable ();
6199 case GTU:
6200 if (cond_or == DOM_CC_X_AND_Y)
6201 return CC_DGTUmode;
6203 switch (cond2)
6205 case GTU:
6206 return CC_DGTUmode;
6207 case GEU:
6208 return CC_DGEUmode;
6209 case NE:
6210 return CC_DNEmode;
6211 default:
6212 gcc_unreachable ();
6215 /* The remaining cases only occur when both comparisons are the
6216 same. */
6217 case NE:
6218 gcc_assert (cond1 == cond2);
6219 return CC_DNEmode;
6221 case LE:
6222 gcc_assert (cond1 == cond2);
6223 return CC_DLEmode;
6225 case GE:
6226 gcc_assert (cond1 == cond2);
6227 return CC_DGEmode;
6229 case LEU:
6230 gcc_assert (cond1 == cond2);
6231 return CC_DLEUmode;
6233 case GEU:
6234 gcc_assert (cond1 == cond2);
6235 return CC_DGEUmode;
6237 default:
6238 gcc_unreachable ();
6242 enum machine_mode
6243 arm_select_cc_mode (enum rtx_code op, rtx x, rtx y)
6245 /* All floating point compares return CCFP if it is an equality
6246 comparison, and CCFPE otherwise. */
6247 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
6249 switch (op)
6251 case EQ:
6252 case NE:
6253 case UNORDERED:
6254 case ORDERED:
6255 case UNLT:
6256 case UNLE:
6257 case UNGT:
6258 case UNGE:
6259 case UNEQ:
6260 case LTGT:
6261 return CCFPmode;
6263 case LT:
6264 case LE:
6265 case GT:
6266 case GE:
6267 if (TARGET_HARD_FLOAT && TARGET_MAVERICK)
6268 return CCFPmode;
6269 return CCFPEmode;
6271 default:
6272 gcc_unreachable ();
6276 /* A compare with a shifted operand. Because of canonicalization, the
6277 comparison will have to be swapped when we emit the assembler. */
6278 if (GET_MODE (y) == SImode && GET_CODE (y) == REG
6279 && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
6280 || GET_CODE (x) == LSHIFTRT || GET_CODE (x) == ROTATE
6281 || GET_CODE (x) == ROTATERT))
6282 return CC_SWPmode;
6284 /* This operation is performed swapped, but since we only rely on the Z
6285 flag we don't need an additional mode. */
6286 if (GET_MODE (y) == SImode && REG_P (y)
6287 && GET_CODE (x) == NEG
6288 && (op == EQ || op == NE))
6289 return CC_Zmode;
6291 /* This is a special case that is used by combine to allow a
6292 comparison of a shifted byte load to be split into a zero-extend
6293 followed by a comparison of the shifted integer (only valid for
6294 equalities and unsigned inequalities). */
6295 if (GET_MODE (x) == SImode
6296 && GET_CODE (x) == ASHIFT
6297 && GET_CODE (XEXP (x, 1)) == CONST_INT && INTVAL (XEXP (x, 1)) == 24
6298 && GET_CODE (XEXP (x, 0)) == SUBREG
6299 && GET_CODE (SUBREG_REG (XEXP (x, 0))) == MEM
6300 && GET_MODE (SUBREG_REG (XEXP (x, 0))) == QImode
6301 && (op == EQ || op == NE
6302 || op == GEU || op == GTU || op == LTU || op == LEU)
6303 && GET_CODE (y) == CONST_INT)
6304 return CC_Zmode;
6306 /* A construct for a conditional compare, if the false arm contains
6307 0, then both conditions must be true, otherwise either condition
6308 must be true. Not all conditions are possible, so CCmode is
6309 returned if it can't be done. */
6310 if (GET_CODE (x) == IF_THEN_ELSE
6311 && (XEXP (x, 2) == const0_rtx
6312 || XEXP (x, 2) == const1_rtx)
6313 && COMPARISON_P (XEXP (x, 0))
6314 && COMPARISON_P (XEXP (x, 1)))
6315 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
6316 INTVAL (XEXP (x, 2)));
6318 /* Alternate canonicalizations of the above. These are somewhat cleaner. */
6319 if (GET_CODE (x) == AND
6320 && COMPARISON_P (XEXP (x, 0))
6321 && COMPARISON_P (XEXP (x, 1)))
6322 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
6323 DOM_CC_X_AND_Y);
6325 if (GET_CODE (x) == IOR
6326 && COMPARISON_P (XEXP (x, 0))
6327 && COMPARISON_P (XEXP (x, 1)))
6328 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
6329 DOM_CC_X_OR_Y);
6331 /* An operation (on Thumb) where we want to test for a single bit.
6332 This is done by shifting that bit up into the top bit of a
6333 scratch register; we can then branch on the sign bit. */
6334 if (TARGET_THUMB
6335 && GET_MODE (x) == SImode
6336 && (op == EQ || op == NE)
6337 && (GET_CODE (x) == ZERO_EXTRACT))
6338 return CC_Nmode;
6340 /* An operation that sets the condition codes as a side-effect, the
6341 V flag is not set correctly, so we can only use comparisons where
6342 this doesn't matter. (For LT and GE we can use "mi" and "pl"
6343 instead.) */
6344 if (GET_MODE (x) == SImode
6345 && y == const0_rtx
6346 && (op == EQ || op == NE || op == LT || op == GE)
6347 && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS
6348 || GET_CODE (x) == AND || GET_CODE (x) == IOR
6349 || GET_CODE (x) == XOR || GET_CODE (x) == MULT
6350 || GET_CODE (x) == NOT || GET_CODE (x) == NEG
6351 || GET_CODE (x) == LSHIFTRT
6352 || GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
6353 || GET_CODE (x) == ROTATERT
6354 || (TARGET_ARM && GET_CODE (x) == ZERO_EXTRACT)))
6355 return CC_NOOVmode;
6357 if (GET_MODE (x) == QImode && (op == EQ || op == NE))
6358 return CC_Zmode;
6360 if (GET_MODE (x) == SImode && (op == LTU || op == GEU)
6361 && GET_CODE (x) == PLUS
6362 && (rtx_equal_p (XEXP (x, 0), y) || rtx_equal_p (XEXP (x, 1), y)))
6363 return CC_Cmode;
6365 return CCmode;
6368 /* X and Y are two things to compare using CODE. Emit the compare insn and
6369 return the rtx for register 0 in the proper mode. FP means this is a
6370 floating point compare: I don't think that it is needed on the arm. */
6372 arm_gen_compare_reg (enum rtx_code code, rtx x, rtx y)
6374 enum machine_mode mode = SELECT_CC_MODE (code, x, y);
6375 rtx cc_reg = gen_rtx_REG (mode, CC_REGNUM);
6377 emit_insn (gen_rtx_SET (VOIDmode, cc_reg,
6378 gen_rtx_COMPARE (mode, x, y)));
6380 return cc_reg;
6383 /* Generate a sequence of insns that will generate the correct return
6384 address mask depending on the physical architecture that the program
6385 is running on. */
6387 arm_gen_return_addr_mask (void)
6389 rtx reg = gen_reg_rtx (Pmode);
6391 emit_insn (gen_return_addr_mask (reg));
6392 return reg;
6395 void
6396 arm_reload_in_hi (rtx *operands)
6398 rtx ref = operands[1];
6399 rtx base, scratch;
6400 HOST_WIDE_INT offset = 0;
6402 if (GET_CODE (ref) == SUBREG)
6404 offset = SUBREG_BYTE (ref);
6405 ref = SUBREG_REG (ref);
6408 if (GET_CODE (ref) == REG)
6410 /* We have a pseudo which has been spilt onto the stack; there
6411 are two cases here: the first where there is a simple
6412 stack-slot replacement and a second where the stack-slot is
6413 out of range, or is used as a subreg. */
6414 if (reg_equiv_mem[REGNO (ref)])
6416 ref = reg_equiv_mem[REGNO (ref)];
6417 base = find_replacement (&XEXP (ref, 0));
6419 else
6420 /* The slot is out of range, or was dressed up in a SUBREG. */
6421 base = reg_equiv_address[REGNO (ref)];
6423 else
6424 base = find_replacement (&XEXP (ref, 0));
6426 /* Handle the case where the address is too complex to be offset by 1. */
6427 if (GET_CODE (base) == MINUS
6428 || (GET_CODE (base) == PLUS && GET_CODE (XEXP (base, 1)) != CONST_INT))
6430 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
6432 emit_insn (gen_rtx_SET (VOIDmode, base_plus, base));
6433 base = base_plus;
6435 else if (GET_CODE (base) == PLUS)
6437 /* The addend must be CONST_INT, or we would have dealt with it above. */
6438 HOST_WIDE_INT hi, lo;
6440 offset += INTVAL (XEXP (base, 1));
6441 base = XEXP (base, 0);
6443 /* Rework the address into a legal sequence of insns. */
6444 /* Valid range for lo is -4095 -> 4095 */
6445 lo = (offset >= 0
6446 ? (offset & 0xfff)
6447 : -((-offset) & 0xfff));
6449 /* Corner case, if lo is the max offset then we would be out of range
6450 once we have added the additional 1 below, so bump the msb into the
6451 pre-loading insn(s). */
6452 if (lo == 4095)
6453 lo &= 0x7ff;
6455 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
6456 ^ (HOST_WIDE_INT) 0x80000000)
6457 - (HOST_WIDE_INT) 0x80000000);
6459 gcc_assert (hi + lo == offset);
6461 if (hi != 0)
6463 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
6465 /* Get the base address; addsi3 knows how to handle constants
6466 that require more than one insn. */
6467 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
6468 base = base_plus;
6469 offset = lo;
6473 /* Operands[2] may overlap operands[0] (though it won't overlap
6474 operands[1]), that's why we asked for a DImode reg -- so we can
6475 use the bit that does not overlap. */
6476 if (REGNO (operands[2]) == REGNO (operands[0]))
6477 scratch = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
6478 else
6479 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
6481 emit_insn (gen_zero_extendqisi2 (scratch,
6482 gen_rtx_MEM (QImode,
6483 plus_constant (base,
6484 offset))));
6485 emit_insn (gen_zero_extendqisi2 (gen_rtx_SUBREG (SImode, operands[0], 0),
6486 gen_rtx_MEM (QImode,
6487 plus_constant (base,
6488 offset + 1))));
6489 if (!BYTES_BIG_ENDIAN)
6490 emit_insn (gen_rtx_SET (VOIDmode, gen_rtx_SUBREG (SImode, operands[0], 0),
6491 gen_rtx_IOR (SImode,
6492 gen_rtx_ASHIFT
6493 (SImode,
6494 gen_rtx_SUBREG (SImode, operands[0], 0),
6495 GEN_INT (8)),
6496 scratch)));
6497 else
6498 emit_insn (gen_rtx_SET (VOIDmode, gen_rtx_SUBREG (SImode, operands[0], 0),
6499 gen_rtx_IOR (SImode,
6500 gen_rtx_ASHIFT (SImode, scratch,
6501 GEN_INT (8)),
6502 gen_rtx_SUBREG (SImode, operands[0],
6503 0))));
6506 /* Handle storing a half-word to memory during reload by synthesizing as two
6507 byte stores. Take care not to clobber the input values until after we
6508 have moved them somewhere safe. This code assumes that if the DImode
6509 scratch in operands[2] overlaps either the input value or output address
6510 in some way, then that value must die in this insn (we absolutely need
6511 two scratch registers for some corner cases). */
6512 void
6513 arm_reload_out_hi (rtx *operands)
6515 rtx ref = operands[0];
6516 rtx outval = operands[1];
6517 rtx base, scratch;
6518 HOST_WIDE_INT offset = 0;
6520 if (GET_CODE (ref) == SUBREG)
6522 offset = SUBREG_BYTE (ref);
6523 ref = SUBREG_REG (ref);
6526 if (GET_CODE (ref) == REG)
6528 /* We have a pseudo which has been spilt onto the stack; there
6529 are two cases here: the first where there is a simple
6530 stack-slot replacement and a second where the stack-slot is
6531 out of range, or is used as a subreg. */
6532 if (reg_equiv_mem[REGNO (ref)])
6534 ref = reg_equiv_mem[REGNO (ref)];
6535 base = find_replacement (&XEXP (ref, 0));
6537 else
6538 /* The slot is out of range, or was dressed up in a SUBREG. */
6539 base = reg_equiv_address[REGNO (ref)];
6541 else
6542 base = find_replacement (&XEXP (ref, 0));
6544 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
6546 /* Handle the case where the address is too complex to be offset by 1. */
6547 if (GET_CODE (base) == MINUS
6548 || (GET_CODE (base) == PLUS && GET_CODE (XEXP (base, 1)) != CONST_INT))
6550 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
6552 /* Be careful not to destroy OUTVAL. */
6553 if (reg_overlap_mentioned_p (base_plus, outval))
6555 /* Updating base_plus might destroy outval, see if we can
6556 swap the scratch and base_plus. */
6557 if (!reg_overlap_mentioned_p (scratch, outval))
6559 rtx tmp = scratch;
6560 scratch = base_plus;
6561 base_plus = tmp;
6563 else
6565 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
6567 /* Be conservative and copy OUTVAL into the scratch now,
6568 this should only be necessary if outval is a subreg
6569 of something larger than a word. */
6570 /* XXX Might this clobber base? I can't see how it can,
6571 since scratch is known to overlap with OUTVAL, and
6572 must be wider than a word. */
6573 emit_insn (gen_movhi (scratch_hi, outval));
6574 outval = scratch_hi;
6578 emit_insn (gen_rtx_SET (VOIDmode, base_plus, base));
6579 base = base_plus;
6581 else if (GET_CODE (base) == PLUS)
6583 /* The addend must be CONST_INT, or we would have dealt with it above. */
6584 HOST_WIDE_INT hi, lo;
6586 offset += INTVAL (XEXP (base, 1));
6587 base = XEXP (base, 0);
6589 /* Rework the address into a legal sequence of insns. */
6590 /* Valid range for lo is -4095 -> 4095 */
6591 lo = (offset >= 0
6592 ? (offset & 0xfff)
6593 : -((-offset) & 0xfff));
6595 /* Corner case, if lo is the max offset then we would be out of range
6596 once we have added the additional 1 below, so bump the msb into the
6597 pre-loading insn(s). */
6598 if (lo == 4095)
6599 lo &= 0x7ff;
6601 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
6602 ^ (HOST_WIDE_INT) 0x80000000)
6603 - (HOST_WIDE_INT) 0x80000000);
6605 gcc_assert (hi + lo == offset);
6607 if (hi != 0)
6609 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
6611 /* Be careful not to destroy OUTVAL. */
6612 if (reg_overlap_mentioned_p (base_plus, outval))
6614 /* Updating base_plus might destroy outval, see if we
6615 can swap the scratch and base_plus. */
6616 if (!reg_overlap_mentioned_p (scratch, outval))
6618 rtx tmp = scratch;
6619 scratch = base_plus;
6620 base_plus = tmp;
6622 else
6624 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
6626 /* Be conservative and copy outval into scratch now,
6627 this should only be necessary if outval is a
6628 subreg of something larger than a word. */
6629 /* XXX Might this clobber base? I can't see how it
6630 can, since scratch is known to overlap with
6631 outval. */
6632 emit_insn (gen_movhi (scratch_hi, outval));
6633 outval = scratch_hi;
6637 /* Get the base address; addsi3 knows how to handle constants
6638 that require more than one insn. */
6639 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
6640 base = base_plus;
6641 offset = lo;
6645 if (BYTES_BIG_ENDIAN)
6647 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
6648 plus_constant (base, offset + 1)),
6649 gen_lowpart (QImode, outval)));
6650 emit_insn (gen_lshrsi3 (scratch,
6651 gen_rtx_SUBREG (SImode, outval, 0),
6652 GEN_INT (8)));
6653 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (base, offset)),
6654 gen_lowpart (QImode, scratch)));
6656 else
6658 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (base, offset)),
6659 gen_lowpart (QImode, outval)));
6660 emit_insn (gen_lshrsi3 (scratch,
6661 gen_rtx_SUBREG (SImode, outval, 0),
6662 GEN_INT (8)));
6663 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
6664 plus_constant (base, offset + 1)),
6665 gen_lowpart (QImode, scratch)));
6669 /* Return true if a type must be passed in memory. For AAPCS, small aggregates
6670 (padded to the size of a word) should be passed in a register. */
6672 static bool
6673 arm_must_pass_in_stack (enum machine_mode mode, tree type)
6675 if (TARGET_AAPCS_BASED)
6676 return must_pass_in_stack_var_size (mode, type);
6677 else
6678 return must_pass_in_stack_var_size_or_pad (mode, type);
6682 /* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
6683 Return true if an argument passed on the stack should be padded upwards,
6684 i.e. if the least-significant byte has useful data. */
6686 bool
6687 arm_pad_arg_upward (enum machine_mode mode, tree type)
6689 if (!TARGET_AAPCS_BASED)
6690 return DEFAULT_FUNCTION_ARG_PADDING(mode, type);
6692 if (type && BYTES_BIG_ENDIAN && INTEGRAL_TYPE_P (type))
6693 return false;
6695 return true;
6699 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
6700 For non-AAPCS, return !BYTES_BIG_ENDIAN if the least significant
6701 byte of the register has useful data, and return the opposite if the
6702 most significant byte does.
6703 For AAPCS, small aggregates and small complex types are always padded
6704 upwards. */
6706 bool
6707 arm_pad_reg_upward (enum machine_mode mode ATTRIBUTE_UNUSED,
6708 tree type, int first ATTRIBUTE_UNUSED)
6710 if (TARGET_AAPCS_BASED
6711 && BYTES_BIG_ENDIAN
6712 && (AGGREGATE_TYPE_P (type) || TREE_CODE (type) == COMPLEX_TYPE)
6713 && int_size_in_bytes (type) <= 4)
6714 return true;
6716 /* Otherwise, use default padding. */
6717 return !BYTES_BIG_ENDIAN;
6722 /* Print a symbolic form of X to the debug file, F. */
6723 static void
6724 arm_print_value (FILE *f, rtx x)
6726 switch (GET_CODE (x))
6728 case CONST_INT:
6729 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
6730 return;
6732 case CONST_DOUBLE:
6733 fprintf (f, "<0x%lx,0x%lx>", (long)XWINT (x, 2), (long)XWINT (x, 3));
6734 return;
6736 case CONST_VECTOR:
6738 int i;
6740 fprintf (f, "<");
6741 for (i = 0; i < CONST_VECTOR_NUNITS (x); i++)
6743 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (CONST_VECTOR_ELT (x, i)));
6744 if (i < (CONST_VECTOR_NUNITS (x) - 1))
6745 fputc (',', f);
6747 fprintf (f, ">");
6749 return;
6751 case CONST_STRING:
6752 fprintf (f, "\"%s\"", XSTR (x, 0));
6753 return;
6755 case SYMBOL_REF:
6756 fprintf (f, "`%s'", XSTR (x, 0));
6757 return;
6759 case LABEL_REF:
6760 fprintf (f, "L%d", INSN_UID (XEXP (x, 0)));
6761 return;
6763 case CONST:
6764 arm_print_value (f, XEXP (x, 0));
6765 return;
6767 case PLUS:
6768 arm_print_value (f, XEXP (x, 0));
6769 fprintf (f, "+");
6770 arm_print_value (f, XEXP (x, 1));
6771 return;
6773 case PC:
6774 fprintf (f, "pc");
6775 return;
6777 default:
6778 fprintf (f, "????");
6779 return;
6783 /* Routines for manipulation of the constant pool. */
6785 /* Arm instructions cannot load a large constant directly into a
6786 register; they have to come from a pc relative load. The constant
6787 must therefore be placed in the addressable range of the pc
6788 relative load. Depending on the precise pc relative load
6789 instruction the range is somewhere between 256 bytes and 4k. This
6790 means that we often have to dump a constant inside a function, and
6791 generate code to branch around it.
6793 It is important to minimize this, since the branches will slow
6794 things down and make the code larger.
6796 Normally we can hide the table after an existing unconditional
6797 branch so that there is no interruption of the flow, but in the
6798 worst case the code looks like this:
6800 ldr rn, L1
6802 b L2
6803 align
6804 L1: .long value
6808 ldr rn, L3
6810 b L4
6811 align
6812 L3: .long value
6816 We fix this by performing a scan after scheduling, which notices
6817 which instructions need to have their operands fetched from the
6818 constant table and builds the table.
6820 The algorithm starts by building a table of all the constants that
6821 need fixing up and all the natural barriers in the function (places
6822 where a constant table can be dropped without breaking the flow).
6823 For each fixup we note how far the pc-relative replacement will be
6824 able to reach and the offset of the instruction into the function.
6826 Having built the table we then group the fixes together to form
6827 tables that are as large as possible (subject to addressing
6828 constraints) and emit each table of constants after the last
6829 barrier that is within range of all the instructions in the group.
6830 If a group does not contain a barrier, then we forcibly create one
6831 by inserting a jump instruction into the flow. Once the table has
6832 been inserted, the insns are then modified to reference the
6833 relevant entry in the pool.
6835 Possible enhancements to the algorithm (not implemented) are:
6837 1) For some processors and object formats, there may be benefit in
6838 aligning the pools to the start of cache lines; this alignment
6839 would need to be taken into account when calculating addressability
6840 of a pool. */
6842 /* These typedefs are located at the start of this file, so that
6843 they can be used in the prototypes there. This comment is to
6844 remind readers of that fact so that the following structures
6845 can be understood more easily.
6847 typedef struct minipool_node Mnode;
6848 typedef struct minipool_fixup Mfix; */
6850 struct minipool_node
6852 /* Doubly linked chain of entries. */
6853 Mnode * next;
6854 Mnode * prev;
6855 /* The maximum offset into the code that this entry can be placed. While
6856 pushing fixes for forward references, all entries are sorted in order
6857 of increasing max_address. */
6858 HOST_WIDE_INT max_address;
6859 /* Similarly for an entry inserted for a backwards ref. */
6860 HOST_WIDE_INT min_address;
6861 /* The number of fixes referencing this entry. This can become zero
6862 if we "unpush" an entry. In this case we ignore the entry when we
6863 come to emit the code. */
6864 int refcount;
6865 /* The offset from the start of the minipool. */
6866 HOST_WIDE_INT offset;
6867 /* The value in table. */
6868 rtx value;
6869 /* The mode of value. */
6870 enum machine_mode mode;
6871 /* The size of the value. With iWMMXt enabled
6872 sizes > 4 also imply an alignment of 8-bytes. */
6873 int fix_size;
6876 struct minipool_fixup
6878 Mfix * next;
6879 rtx insn;
6880 HOST_WIDE_INT address;
6881 rtx * loc;
6882 enum machine_mode mode;
6883 int fix_size;
6884 rtx value;
6885 Mnode * minipool;
6886 HOST_WIDE_INT forwards;
6887 HOST_WIDE_INT backwards;
6890 /* Fixes less than a word need padding out to a word boundary. */
6891 #define MINIPOOL_FIX_SIZE(mode) \
6892 (GET_MODE_SIZE ((mode)) >= 4 ? GET_MODE_SIZE ((mode)) : 4)
6894 static Mnode * minipool_vector_head;
6895 static Mnode * minipool_vector_tail;
6896 static rtx minipool_vector_label;
6898 /* The linked list of all minipool fixes required for this function. */
6899 Mfix * minipool_fix_head;
6900 Mfix * minipool_fix_tail;
6901 /* The fix entry for the current minipool, once it has been placed. */
6902 Mfix * minipool_barrier;
6904 /* Determines if INSN is the start of a jump table. Returns the end
6905 of the TABLE or NULL_RTX. */
6906 static rtx
6907 is_jump_table (rtx insn)
6909 rtx table;
6911 if (GET_CODE (insn) == JUMP_INSN
6912 && JUMP_LABEL (insn) != NULL
6913 && ((table = next_real_insn (JUMP_LABEL (insn)))
6914 == next_real_insn (insn))
6915 && table != NULL
6916 && GET_CODE (table) == JUMP_INSN
6917 && (GET_CODE (PATTERN (table)) == ADDR_VEC
6918 || GET_CODE (PATTERN (table)) == ADDR_DIFF_VEC))
6919 return table;
6921 return NULL_RTX;
6924 #ifndef JUMP_TABLES_IN_TEXT_SECTION
6925 #define JUMP_TABLES_IN_TEXT_SECTION 0
6926 #endif
6928 static HOST_WIDE_INT
6929 get_jump_table_size (rtx insn)
6931 /* ADDR_VECs only take room if read-only data does into the text
6932 section. */
6933 if (JUMP_TABLES_IN_TEXT_SECTION
6934 #if !defined(READONLY_DATA_SECTION) && !defined(READONLY_DATA_SECTION_ASM_OP)
6935 || 1
6936 #endif
6939 rtx body = PATTERN (insn);
6940 int elt = GET_CODE (body) == ADDR_DIFF_VEC ? 1 : 0;
6942 return GET_MODE_SIZE (GET_MODE (body)) * XVECLEN (body, elt);
6945 return 0;
6948 /* Move a minipool fix MP from its current location to before MAX_MP.
6949 If MAX_MP is NULL, then MP doesn't need moving, but the addressing
6950 constraints may need updating. */
6951 static Mnode *
6952 move_minipool_fix_forward_ref (Mnode *mp, Mnode *max_mp,
6953 HOST_WIDE_INT max_address)
6955 /* The code below assumes these are different. */
6956 gcc_assert (mp != max_mp);
6958 if (max_mp == NULL)
6960 if (max_address < mp->max_address)
6961 mp->max_address = max_address;
6963 else
6965 if (max_address > max_mp->max_address - mp->fix_size)
6966 mp->max_address = max_mp->max_address - mp->fix_size;
6967 else
6968 mp->max_address = max_address;
6970 /* Unlink MP from its current position. Since max_mp is non-null,
6971 mp->prev must be non-null. */
6972 mp->prev->next = mp->next;
6973 if (mp->next != NULL)
6974 mp->next->prev = mp->prev;
6975 else
6976 minipool_vector_tail = mp->prev;
6978 /* Re-insert it before MAX_MP. */
6979 mp->next = max_mp;
6980 mp->prev = max_mp->prev;
6981 max_mp->prev = mp;
6983 if (mp->prev != NULL)
6984 mp->prev->next = mp;
6985 else
6986 minipool_vector_head = mp;
6989 /* Save the new entry. */
6990 max_mp = mp;
6992 /* Scan over the preceding entries and adjust their addresses as
6993 required. */
6994 while (mp->prev != NULL
6995 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
6997 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
6998 mp = mp->prev;
7001 return max_mp;
7004 /* Add a constant to the minipool for a forward reference. Returns the
7005 node added or NULL if the constant will not fit in this pool. */
7006 static Mnode *
7007 add_minipool_forward_ref (Mfix *fix)
7009 /* If set, max_mp is the first pool_entry that has a lower
7010 constraint than the one we are trying to add. */
7011 Mnode * max_mp = NULL;
7012 HOST_WIDE_INT max_address = fix->address + fix->forwards;
7013 Mnode * mp;
7015 /* If this fix's address is greater than the address of the first
7016 entry, then we can't put the fix in this pool. We subtract the
7017 size of the current fix to ensure that if the table is fully
7018 packed we still have enough room to insert this value by suffling
7019 the other fixes forwards. */
7020 if (minipool_vector_head &&
7021 fix->address >= minipool_vector_head->max_address - fix->fix_size)
7022 return NULL;
7024 /* Scan the pool to see if a constant with the same value has
7025 already been added. While we are doing this, also note the
7026 location where we must insert the constant if it doesn't already
7027 exist. */
7028 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
7030 if (GET_CODE (fix->value) == GET_CODE (mp->value)
7031 && fix->mode == mp->mode
7032 && (GET_CODE (fix->value) != CODE_LABEL
7033 || (CODE_LABEL_NUMBER (fix->value)
7034 == CODE_LABEL_NUMBER (mp->value)))
7035 && rtx_equal_p (fix->value, mp->value))
7037 /* More than one fix references this entry. */
7038 mp->refcount++;
7039 return move_minipool_fix_forward_ref (mp, max_mp, max_address);
7042 /* Note the insertion point if necessary. */
7043 if (max_mp == NULL
7044 && mp->max_address > max_address)
7045 max_mp = mp;
7047 /* If we are inserting an 8-bytes aligned quantity and
7048 we have not already found an insertion point, then
7049 make sure that all such 8-byte aligned quantities are
7050 placed at the start of the pool. */
7051 if (ARM_DOUBLEWORD_ALIGN
7052 && max_mp == NULL
7053 && fix->fix_size == 8
7054 && mp->fix_size != 8)
7056 max_mp = mp;
7057 max_address = mp->max_address;
7061 /* The value is not currently in the minipool, so we need to create
7062 a new entry for it. If MAX_MP is NULL, the entry will be put on
7063 the end of the list since the placement is less constrained than
7064 any existing entry. Otherwise, we insert the new fix before
7065 MAX_MP and, if necessary, adjust the constraints on the other
7066 entries. */
7067 mp = xmalloc (sizeof (* mp));
7068 mp->fix_size = fix->fix_size;
7069 mp->mode = fix->mode;
7070 mp->value = fix->value;
7071 mp->refcount = 1;
7072 /* Not yet required for a backwards ref. */
7073 mp->min_address = -65536;
7075 if (max_mp == NULL)
7077 mp->max_address = max_address;
7078 mp->next = NULL;
7079 mp->prev = minipool_vector_tail;
7081 if (mp->prev == NULL)
7083 minipool_vector_head = mp;
7084 minipool_vector_label = gen_label_rtx ();
7086 else
7087 mp->prev->next = mp;
7089 minipool_vector_tail = mp;
7091 else
7093 if (max_address > max_mp->max_address - mp->fix_size)
7094 mp->max_address = max_mp->max_address - mp->fix_size;
7095 else
7096 mp->max_address = max_address;
7098 mp->next = max_mp;
7099 mp->prev = max_mp->prev;
7100 max_mp->prev = mp;
7101 if (mp->prev != NULL)
7102 mp->prev->next = mp;
7103 else
7104 minipool_vector_head = mp;
7107 /* Save the new entry. */
7108 max_mp = mp;
7110 /* Scan over the preceding entries and adjust their addresses as
7111 required. */
7112 while (mp->prev != NULL
7113 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
7115 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
7116 mp = mp->prev;
7119 return max_mp;
7122 static Mnode *
7123 move_minipool_fix_backward_ref (Mnode *mp, Mnode *min_mp,
7124 HOST_WIDE_INT min_address)
7126 HOST_WIDE_INT offset;
7128 /* The code below assumes these are different. */
7129 gcc_assert (mp != min_mp);
7131 if (min_mp == NULL)
7133 if (min_address > mp->min_address)
7134 mp->min_address = min_address;
7136 else
7138 /* We will adjust this below if it is too loose. */
7139 mp->min_address = min_address;
7141 /* Unlink MP from its current position. Since min_mp is non-null,
7142 mp->next must be non-null. */
7143 mp->next->prev = mp->prev;
7144 if (mp->prev != NULL)
7145 mp->prev->next = mp->next;
7146 else
7147 minipool_vector_head = mp->next;
7149 /* Reinsert it after MIN_MP. */
7150 mp->prev = min_mp;
7151 mp->next = min_mp->next;
7152 min_mp->next = mp;
7153 if (mp->next != NULL)
7154 mp->next->prev = mp;
7155 else
7156 minipool_vector_tail = mp;
7159 min_mp = mp;
7161 offset = 0;
7162 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
7164 mp->offset = offset;
7165 if (mp->refcount > 0)
7166 offset += mp->fix_size;
7168 if (mp->next && mp->next->min_address < mp->min_address + mp->fix_size)
7169 mp->next->min_address = mp->min_address + mp->fix_size;
7172 return min_mp;
7175 /* Add a constant to the minipool for a backward reference. Returns the
7176 node added or NULL if the constant will not fit in this pool.
7178 Note that the code for insertion for a backwards reference can be
7179 somewhat confusing because the calculated offsets for each fix do
7180 not take into account the size of the pool (which is still under
7181 construction. */
7182 static Mnode *
7183 add_minipool_backward_ref (Mfix *fix)
7185 /* If set, min_mp is the last pool_entry that has a lower constraint
7186 than the one we are trying to add. */
7187 Mnode *min_mp = NULL;
7188 /* This can be negative, since it is only a constraint. */
7189 HOST_WIDE_INT min_address = fix->address - fix->backwards;
7190 Mnode *mp;
7192 /* If we can't reach the current pool from this insn, or if we can't
7193 insert this entry at the end of the pool without pushing other
7194 fixes out of range, then we don't try. This ensures that we
7195 can't fail later on. */
7196 if (min_address >= minipool_barrier->address
7197 || (minipool_vector_tail->min_address + fix->fix_size
7198 >= minipool_barrier->address))
7199 return NULL;
7201 /* Scan the pool to see if a constant with the same value has
7202 already been added. While we are doing this, also note the
7203 location where we must insert the constant if it doesn't already
7204 exist. */
7205 for (mp = minipool_vector_tail; mp != NULL; mp = mp->prev)
7207 if (GET_CODE (fix->value) == GET_CODE (mp->value)
7208 && fix->mode == mp->mode
7209 && (GET_CODE (fix->value) != CODE_LABEL
7210 || (CODE_LABEL_NUMBER (fix->value)
7211 == CODE_LABEL_NUMBER (mp->value)))
7212 && rtx_equal_p (fix->value, mp->value)
7213 /* Check that there is enough slack to move this entry to the
7214 end of the table (this is conservative). */
7215 && (mp->max_address
7216 > (minipool_barrier->address
7217 + minipool_vector_tail->offset
7218 + minipool_vector_tail->fix_size)))
7220 mp->refcount++;
7221 return move_minipool_fix_backward_ref (mp, min_mp, min_address);
7224 if (min_mp != NULL)
7225 mp->min_address += fix->fix_size;
7226 else
7228 /* Note the insertion point if necessary. */
7229 if (mp->min_address < min_address)
7231 /* For now, we do not allow the insertion of 8-byte alignment
7232 requiring nodes anywhere but at the start of the pool. */
7233 if (ARM_DOUBLEWORD_ALIGN
7234 && fix->fix_size == 8 && mp->fix_size != 8)
7235 return NULL;
7236 else
7237 min_mp = mp;
7239 else if (mp->max_address
7240 < minipool_barrier->address + mp->offset + fix->fix_size)
7242 /* Inserting before this entry would push the fix beyond
7243 its maximum address (which can happen if we have
7244 re-located a forwards fix); force the new fix to come
7245 after it. */
7246 min_mp = mp;
7247 min_address = mp->min_address + fix->fix_size;
7249 /* If we are inserting an 8-bytes aligned quantity and
7250 we have not already found an insertion point, then
7251 make sure that all such 8-byte aligned quantities are
7252 placed at the start of the pool. */
7253 else if (ARM_DOUBLEWORD_ALIGN
7254 && min_mp == NULL
7255 && fix->fix_size == 8
7256 && mp->fix_size < 8)
7258 min_mp = mp;
7259 min_address = mp->min_address + fix->fix_size;
7264 /* We need to create a new entry. */
7265 mp = xmalloc (sizeof (* mp));
7266 mp->fix_size = fix->fix_size;
7267 mp->mode = fix->mode;
7268 mp->value = fix->value;
7269 mp->refcount = 1;
7270 mp->max_address = minipool_barrier->address + 65536;
7272 mp->min_address = min_address;
7274 if (min_mp == NULL)
7276 mp->prev = NULL;
7277 mp->next = minipool_vector_head;
7279 if (mp->next == NULL)
7281 minipool_vector_tail = mp;
7282 minipool_vector_label = gen_label_rtx ();
7284 else
7285 mp->next->prev = mp;
7287 minipool_vector_head = mp;
7289 else
7291 mp->next = min_mp->next;
7292 mp->prev = min_mp;
7293 min_mp->next = mp;
7295 if (mp->next != NULL)
7296 mp->next->prev = mp;
7297 else
7298 minipool_vector_tail = mp;
7301 /* Save the new entry. */
7302 min_mp = mp;
7304 if (mp->prev)
7305 mp = mp->prev;
7306 else
7307 mp->offset = 0;
7309 /* Scan over the following entries and adjust their offsets. */
7310 while (mp->next != NULL)
7312 if (mp->next->min_address < mp->min_address + mp->fix_size)
7313 mp->next->min_address = mp->min_address + mp->fix_size;
7315 if (mp->refcount)
7316 mp->next->offset = mp->offset + mp->fix_size;
7317 else
7318 mp->next->offset = mp->offset;
7320 mp = mp->next;
7323 return min_mp;
7326 static void
7327 assign_minipool_offsets (Mfix *barrier)
7329 HOST_WIDE_INT offset = 0;
7330 Mnode *mp;
7332 minipool_barrier = barrier;
7334 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
7336 mp->offset = offset;
7338 if (mp->refcount > 0)
7339 offset += mp->fix_size;
7343 /* Output the literal table */
7344 static void
7345 dump_minipool (rtx scan)
7347 Mnode * mp;
7348 Mnode * nmp;
7349 int align64 = 0;
7351 if (ARM_DOUBLEWORD_ALIGN)
7352 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
7353 if (mp->refcount > 0 && mp->fix_size == 8)
7355 align64 = 1;
7356 break;
7359 if (dump_file)
7360 fprintf (dump_file,
7361 ";; Emitting minipool after insn %u; address %ld; align %d (bytes)\n",
7362 INSN_UID (scan), (unsigned long) minipool_barrier->address, align64 ? 8 : 4);
7364 scan = emit_label_after (gen_label_rtx (), scan);
7365 scan = emit_insn_after (align64 ? gen_align_8 () : gen_align_4 (), scan);
7366 scan = emit_label_after (minipool_vector_label, scan);
7368 for (mp = minipool_vector_head; mp != NULL; mp = nmp)
7370 if (mp->refcount > 0)
7372 if (dump_file)
7374 fprintf (dump_file,
7375 ";; Offset %u, min %ld, max %ld ",
7376 (unsigned) mp->offset, (unsigned long) mp->min_address,
7377 (unsigned long) mp->max_address);
7378 arm_print_value (dump_file, mp->value);
7379 fputc ('\n', dump_file);
7382 switch (mp->fix_size)
7384 #ifdef HAVE_consttable_1
7385 case 1:
7386 scan = emit_insn_after (gen_consttable_1 (mp->value), scan);
7387 break;
7389 #endif
7390 #ifdef HAVE_consttable_2
7391 case 2:
7392 scan = emit_insn_after (gen_consttable_2 (mp->value), scan);
7393 break;
7395 #endif
7396 #ifdef HAVE_consttable_4
7397 case 4:
7398 scan = emit_insn_after (gen_consttable_4 (mp->value), scan);
7399 break;
7401 #endif
7402 #ifdef HAVE_consttable_8
7403 case 8:
7404 scan = emit_insn_after (gen_consttable_8 (mp->value), scan);
7405 break;
7407 #endif
7408 default:
7409 gcc_unreachable ();
7413 nmp = mp->next;
7414 free (mp);
7417 minipool_vector_head = minipool_vector_tail = NULL;
7418 scan = emit_insn_after (gen_consttable_end (), scan);
7419 scan = emit_barrier_after (scan);
7422 /* Return the cost of forcibly inserting a barrier after INSN. */
7423 static int
7424 arm_barrier_cost (rtx insn)
7426 /* Basing the location of the pool on the loop depth is preferable,
7427 but at the moment, the basic block information seems to be
7428 corrupt by this stage of the compilation. */
7429 int base_cost = 50;
7430 rtx next = next_nonnote_insn (insn);
7432 if (next != NULL && GET_CODE (next) == CODE_LABEL)
7433 base_cost -= 20;
7435 switch (GET_CODE (insn))
7437 case CODE_LABEL:
7438 /* It will always be better to place the table before the label, rather
7439 than after it. */
7440 return 50;
7442 case INSN:
7443 case CALL_INSN:
7444 return base_cost;
7446 case JUMP_INSN:
7447 return base_cost - 10;
7449 default:
7450 return base_cost + 10;
7454 /* Find the best place in the insn stream in the range
7455 (FIX->address,MAX_ADDRESS) to forcibly insert a minipool barrier.
7456 Create the barrier by inserting a jump and add a new fix entry for
7457 it. */
7458 static Mfix *
7459 create_fix_barrier (Mfix *fix, HOST_WIDE_INT max_address)
7461 HOST_WIDE_INT count = 0;
7462 rtx barrier;
7463 rtx from = fix->insn;
7464 rtx selected = from;
7465 int selected_cost;
7466 HOST_WIDE_INT selected_address;
7467 Mfix * new_fix;
7468 HOST_WIDE_INT max_count = max_address - fix->address;
7469 rtx label = gen_label_rtx ();
7471 selected_cost = arm_barrier_cost (from);
7472 selected_address = fix->address;
7474 while (from && count < max_count)
7476 rtx tmp;
7477 int new_cost;
7479 /* This code shouldn't have been called if there was a natural barrier
7480 within range. */
7481 gcc_assert (GET_CODE (from) != BARRIER);
7483 /* Count the length of this insn. */
7484 count += get_attr_length (from);
7486 /* If there is a jump table, add its length. */
7487 tmp = is_jump_table (from);
7488 if (tmp != NULL)
7490 count += get_jump_table_size (tmp);
7492 /* Jump tables aren't in a basic block, so base the cost on
7493 the dispatch insn. If we select this location, we will
7494 still put the pool after the table. */
7495 new_cost = arm_barrier_cost (from);
7497 if (count < max_count && new_cost <= selected_cost)
7499 selected = tmp;
7500 selected_cost = new_cost;
7501 selected_address = fix->address + count;
7504 /* Continue after the dispatch table. */
7505 from = NEXT_INSN (tmp);
7506 continue;
7509 new_cost = arm_barrier_cost (from);
7511 if (count < max_count && new_cost <= selected_cost)
7513 selected = from;
7514 selected_cost = new_cost;
7515 selected_address = fix->address + count;
7518 from = NEXT_INSN (from);
7521 /* Create a new JUMP_INSN that branches around a barrier. */
7522 from = emit_jump_insn_after (gen_jump (label), selected);
7523 JUMP_LABEL (from) = label;
7524 barrier = emit_barrier_after (from);
7525 emit_label_after (label, barrier);
7527 /* Create a minipool barrier entry for the new barrier. */
7528 new_fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* new_fix));
7529 new_fix->insn = barrier;
7530 new_fix->address = selected_address;
7531 new_fix->next = fix->next;
7532 fix->next = new_fix;
7534 return new_fix;
7537 /* Record that there is a natural barrier in the insn stream at
7538 ADDRESS. */
7539 static void
7540 push_minipool_barrier (rtx insn, HOST_WIDE_INT address)
7542 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
7544 fix->insn = insn;
7545 fix->address = address;
7547 fix->next = NULL;
7548 if (minipool_fix_head != NULL)
7549 minipool_fix_tail->next = fix;
7550 else
7551 minipool_fix_head = fix;
7553 minipool_fix_tail = fix;
7556 /* Record INSN, which will need fixing up to load a value from the
7557 minipool. ADDRESS is the offset of the insn since the start of the
7558 function; LOC is a pointer to the part of the insn which requires
7559 fixing; VALUE is the constant that must be loaded, which is of type
7560 MODE. */
7561 static void
7562 push_minipool_fix (rtx insn, HOST_WIDE_INT address, rtx *loc,
7563 enum machine_mode mode, rtx value)
7565 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
7567 #ifdef AOF_ASSEMBLER
7568 /* PIC symbol references need to be converted into offsets into the
7569 based area. */
7570 /* XXX This shouldn't be done here. */
7571 if (flag_pic && GET_CODE (value) == SYMBOL_REF)
7572 value = aof_pic_entry (value);
7573 #endif /* AOF_ASSEMBLER */
7575 fix->insn = insn;
7576 fix->address = address;
7577 fix->loc = loc;
7578 fix->mode = mode;
7579 fix->fix_size = MINIPOOL_FIX_SIZE (mode);
7580 fix->value = value;
7581 fix->forwards = get_attr_pool_range (insn);
7582 fix->backwards = get_attr_neg_pool_range (insn);
7583 fix->minipool = NULL;
7585 /* If an insn doesn't have a range defined for it, then it isn't
7586 expecting to be reworked by this code. Better to stop now than
7587 to generate duff assembly code. */
7588 gcc_assert (fix->forwards || fix->backwards);
7590 /* With AAPCS/iWMMXt enabled, the pool is aligned to an 8-byte boundary.
7591 So there might be an empty word before the start of the pool.
7592 Hence we reduce the forward range by 4 to allow for this
7593 possibility. */
7594 if (ARM_DOUBLEWORD_ALIGN && fix->fix_size == 8)
7595 fix->forwards -= 4;
7597 if (dump_file)
7599 fprintf (dump_file,
7600 ";; %smode fixup for i%d; addr %lu, range (%ld,%ld): ",
7601 GET_MODE_NAME (mode),
7602 INSN_UID (insn), (unsigned long) address,
7603 -1 * (long)fix->backwards, (long)fix->forwards);
7604 arm_print_value (dump_file, fix->value);
7605 fprintf (dump_file, "\n");
7608 /* Add it to the chain of fixes. */
7609 fix->next = NULL;
7611 if (minipool_fix_head != NULL)
7612 minipool_fix_tail->next = fix;
7613 else
7614 minipool_fix_head = fix;
7616 minipool_fix_tail = fix;
7619 /* Return the cost of synthesizing a 64-bit constant VAL inline.
7620 Returns the number of insns needed, or 99 if we don't know how to
7621 do it. */
7623 arm_const_double_inline_cost (rtx val)
7625 rtx lowpart, highpart;
7626 enum machine_mode mode;
7628 mode = GET_MODE (val);
7630 if (mode == VOIDmode)
7631 mode = DImode;
7633 gcc_assert (GET_MODE_SIZE (mode) == 8);
7635 lowpart = gen_lowpart (SImode, val);
7636 highpart = gen_highpart_mode (SImode, mode, val);
7638 gcc_assert (GET_CODE (lowpart) == CONST_INT);
7639 gcc_assert (GET_CODE (highpart) == CONST_INT);
7641 return (arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (lowpart),
7642 NULL_RTX, NULL_RTX, 0, 0)
7643 + arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (highpart),
7644 NULL_RTX, NULL_RTX, 0, 0));
7647 /* Return true if it is worthwhile to split a 64-bit constant into two
7648 32-bit operations. This is the case if optimizing for size, or
7649 if we have load delay slots, or if one 32-bit part can be done with
7650 a single data operation. */
7651 bool
7652 arm_const_double_by_parts (rtx val)
7654 enum machine_mode mode = GET_MODE (val);
7655 rtx part;
7657 if (optimize_size || arm_ld_sched)
7658 return true;
7660 if (mode == VOIDmode)
7661 mode = DImode;
7663 part = gen_highpart_mode (SImode, mode, val);
7665 gcc_assert (GET_CODE (part) == CONST_INT);
7667 if (const_ok_for_arm (INTVAL (part))
7668 || const_ok_for_arm (~INTVAL (part)))
7669 return true;
7671 part = gen_lowpart (SImode, val);
7673 gcc_assert (GET_CODE (part) == CONST_INT);
7675 if (const_ok_for_arm (INTVAL (part))
7676 || const_ok_for_arm (~INTVAL (part)))
7677 return true;
7679 return false;
7682 /* Scan INSN and note any of its operands that need fixing.
7683 If DO_PUSHES is false we do not actually push any of the fixups
7684 needed. The function returns TRUE if any fixups were needed/pushed.
7685 This is used by arm_memory_load_p() which needs to know about loads
7686 of constants that will be converted into minipool loads. */
7687 static bool
7688 note_invalid_constants (rtx insn, HOST_WIDE_INT address, int do_pushes)
7690 bool result = false;
7691 int opno;
7693 extract_insn (insn);
7695 if (!constrain_operands (1))
7696 fatal_insn_not_found (insn);
7698 if (recog_data.n_alternatives == 0)
7699 return false;
7701 /* Fill in recog_op_alt with information about the constraints of
7702 this insn. */
7703 preprocess_constraints ();
7705 for (opno = 0; opno < recog_data.n_operands; opno++)
7707 /* Things we need to fix can only occur in inputs. */
7708 if (recog_data.operand_type[opno] != OP_IN)
7709 continue;
7711 /* If this alternative is a memory reference, then any mention
7712 of constants in this alternative is really to fool reload
7713 into allowing us to accept one there. We need to fix them up
7714 now so that we output the right code. */
7715 if (recog_op_alt[opno][which_alternative].memory_ok)
7717 rtx op = recog_data.operand[opno];
7719 if (CONSTANT_P (op))
7721 if (do_pushes)
7722 push_minipool_fix (insn, address, recog_data.operand_loc[opno],
7723 recog_data.operand_mode[opno], op);
7724 result = true;
7726 else if (GET_CODE (op) == MEM
7727 && GET_CODE (XEXP (op, 0)) == SYMBOL_REF
7728 && CONSTANT_POOL_ADDRESS_P (XEXP (op, 0)))
7730 if (do_pushes)
7732 rtx cop = avoid_constant_pool_reference (op);
7734 /* Casting the address of something to a mode narrower
7735 than a word can cause avoid_constant_pool_reference()
7736 to return the pool reference itself. That's no good to
7737 us here. Lets just hope that we can use the
7738 constant pool value directly. */
7739 if (op == cop)
7740 cop = get_pool_constant (XEXP (op, 0));
7742 push_minipool_fix (insn, address,
7743 recog_data.operand_loc[opno],
7744 recog_data.operand_mode[opno], cop);
7747 result = true;
7752 return result;
7755 /* Gcc puts the pool in the wrong place for ARM, since we can only
7756 load addresses a limited distance around the pc. We do some
7757 special munging to move the constant pool values to the correct
7758 point in the code. */
7759 static void
7760 arm_reorg (void)
7762 rtx insn;
7763 HOST_WIDE_INT address = 0;
7764 Mfix * fix;
7766 minipool_fix_head = minipool_fix_tail = NULL;
7768 /* The first insn must always be a note, or the code below won't
7769 scan it properly. */
7770 insn = get_insns ();
7771 gcc_assert (GET_CODE (insn) == NOTE);
7773 /* Scan all the insns and record the operands that will need fixing. */
7774 for (insn = next_nonnote_insn (insn); insn; insn = next_nonnote_insn (insn))
7776 if (TARGET_CIRRUS_FIX_INVALID_INSNS
7777 && (arm_cirrus_insn_p (insn)
7778 || GET_CODE (insn) == JUMP_INSN
7779 || arm_memory_load_p (insn)))
7780 cirrus_reorg (insn);
7782 if (GET_CODE (insn) == BARRIER)
7783 push_minipool_barrier (insn, address);
7784 else if (INSN_P (insn))
7786 rtx table;
7788 note_invalid_constants (insn, address, true);
7789 address += get_attr_length (insn);
7791 /* If the insn is a vector jump, add the size of the table
7792 and skip the table. */
7793 if ((table = is_jump_table (insn)) != NULL)
7795 address += get_jump_table_size (table);
7796 insn = table;
7801 fix = minipool_fix_head;
7803 /* Now scan the fixups and perform the required changes. */
7804 while (fix)
7806 Mfix * ftmp;
7807 Mfix * fdel;
7808 Mfix * last_added_fix;
7809 Mfix * last_barrier = NULL;
7810 Mfix * this_fix;
7812 /* Skip any further barriers before the next fix. */
7813 while (fix && GET_CODE (fix->insn) == BARRIER)
7814 fix = fix->next;
7816 /* No more fixes. */
7817 if (fix == NULL)
7818 break;
7820 last_added_fix = NULL;
7822 for (ftmp = fix; ftmp; ftmp = ftmp->next)
7824 if (GET_CODE (ftmp->insn) == BARRIER)
7826 if (ftmp->address >= minipool_vector_head->max_address)
7827 break;
7829 last_barrier = ftmp;
7831 else if ((ftmp->minipool = add_minipool_forward_ref (ftmp)) == NULL)
7832 break;
7834 last_added_fix = ftmp; /* Keep track of the last fix added. */
7837 /* If we found a barrier, drop back to that; any fixes that we
7838 could have reached but come after the barrier will now go in
7839 the next mini-pool. */
7840 if (last_barrier != NULL)
7842 /* Reduce the refcount for those fixes that won't go into this
7843 pool after all. */
7844 for (fdel = last_barrier->next;
7845 fdel && fdel != ftmp;
7846 fdel = fdel->next)
7848 fdel->minipool->refcount--;
7849 fdel->minipool = NULL;
7852 ftmp = last_barrier;
7854 else
7856 /* ftmp is first fix that we can't fit into this pool and
7857 there no natural barriers that we could use. Insert a
7858 new barrier in the code somewhere between the previous
7859 fix and this one, and arrange to jump around it. */
7860 HOST_WIDE_INT max_address;
7862 /* The last item on the list of fixes must be a barrier, so
7863 we can never run off the end of the list of fixes without
7864 last_barrier being set. */
7865 gcc_assert (ftmp);
7867 max_address = minipool_vector_head->max_address;
7868 /* Check that there isn't another fix that is in range that
7869 we couldn't fit into this pool because the pool was
7870 already too large: we need to put the pool before such an
7871 instruction. */
7872 if (ftmp->address < max_address)
7873 max_address = ftmp->address;
7875 last_barrier = create_fix_barrier (last_added_fix, max_address);
7878 assign_minipool_offsets (last_barrier);
7880 while (ftmp)
7882 if (GET_CODE (ftmp->insn) != BARRIER
7883 && ((ftmp->minipool = add_minipool_backward_ref (ftmp))
7884 == NULL))
7885 break;
7887 ftmp = ftmp->next;
7890 /* Scan over the fixes we have identified for this pool, fixing them
7891 up and adding the constants to the pool itself. */
7892 for (this_fix = fix; this_fix && ftmp != this_fix;
7893 this_fix = this_fix->next)
7894 if (GET_CODE (this_fix->insn) != BARRIER)
7896 rtx addr
7897 = plus_constant (gen_rtx_LABEL_REF (VOIDmode,
7898 minipool_vector_label),
7899 this_fix->minipool->offset);
7900 *this_fix->loc = gen_rtx_MEM (this_fix->mode, addr);
7903 dump_minipool (last_barrier->insn);
7904 fix = ftmp;
7907 /* From now on we must synthesize any constants that we can't handle
7908 directly. This can happen if the RTL gets split during final
7909 instruction generation. */
7910 after_arm_reorg = 1;
7912 /* Free the minipool memory. */
7913 obstack_free (&minipool_obstack, minipool_startobj);
7916 /* Routines to output assembly language. */
7918 /* If the rtx is the correct value then return the string of the number.
7919 In this way we can ensure that valid double constants are generated even
7920 when cross compiling. */
7921 const char *
7922 fp_immediate_constant (rtx x)
7924 REAL_VALUE_TYPE r;
7925 int i;
7927 if (!fp_consts_inited)
7928 init_fp_table ();
7930 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
7931 for (i = 0; i < 8; i++)
7932 if (REAL_VALUES_EQUAL (r, values_fp[i]))
7933 return strings_fp[i];
7935 gcc_unreachable ();
7938 /* As for fp_immediate_constant, but value is passed directly, not in rtx. */
7939 static const char *
7940 fp_const_from_val (REAL_VALUE_TYPE *r)
7942 int i;
7944 if (!fp_consts_inited)
7945 init_fp_table ();
7947 for (i = 0; i < 8; i++)
7948 if (REAL_VALUES_EQUAL (*r, values_fp[i]))
7949 return strings_fp[i];
7951 gcc_unreachable ();
7954 /* Output the operands of a LDM/STM instruction to STREAM.
7955 MASK is the ARM register set mask of which only bits 0-15 are important.
7956 REG is the base register, either the frame pointer or the stack pointer,
7957 INSTR is the possibly suffixed load or store instruction. */
7959 static void
7960 print_multi_reg (FILE *stream, const char *instr, unsigned reg,
7961 unsigned long mask)
7963 unsigned i;
7964 bool not_first = FALSE;
7966 fputc ('\t', stream);
7967 asm_fprintf (stream, instr, reg);
7968 fputs (", {", stream);
7970 for (i = 0; i <= LAST_ARM_REGNUM; i++)
7971 if (mask & (1 << i))
7973 if (not_first)
7974 fprintf (stream, ", ");
7976 asm_fprintf (stream, "%r", i);
7977 not_first = TRUE;
7980 fprintf (stream, "}\n");
7984 /* Output a FLDMX instruction to STREAM.
7985 BASE if the register containing the address.
7986 REG and COUNT specify the register range.
7987 Extra registers may be added to avoid hardware bugs. */
7989 static void
7990 arm_output_fldmx (FILE * stream, unsigned int base, int reg, int count)
7992 int i;
7994 /* Workaround ARM10 VFPr1 bug. */
7995 if (count == 2 && !arm_arch6)
7997 if (reg == 15)
7998 reg--;
7999 count++;
8002 fputc ('\t', stream);
8003 asm_fprintf (stream, "fldmfdx\t%r!, {", base);
8005 for (i = reg; i < reg + count; i++)
8007 if (i > reg)
8008 fputs (", ", stream);
8009 asm_fprintf (stream, "d%d", i);
8011 fputs ("}\n", stream);
8016 /* Output the assembly for a store multiple. */
8018 const char *
8019 vfp_output_fstmx (rtx * operands)
8021 char pattern[100];
8022 int p;
8023 int base;
8024 int i;
8026 strcpy (pattern, "fstmfdx\t%m0!, {%P1");
8027 p = strlen (pattern);
8029 gcc_assert (GET_CODE (operands[1]) == REG);
8031 base = (REGNO (operands[1]) - FIRST_VFP_REGNUM) / 2;
8032 for (i = 1; i < XVECLEN (operands[2], 0); i++)
8034 p += sprintf (&pattern[p], ", d%d", base + i);
8036 strcpy (&pattern[p], "}");
8038 output_asm_insn (pattern, operands);
8039 return "";
8043 /* Emit RTL to save block of VFP register pairs to the stack. Returns the
8044 number of bytes pushed. */
8046 static int
8047 vfp_emit_fstmx (int base_reg, int count)
8049 rtx par;
8050 rtx dwarf;
8051 rtx tmp, reg;
8052 int i;
8054 /* Workaround ARM10 VFPr1 bug. Data corruption can occur when exactly two
8055 register pairs are stored by a store multiple insn. We avoid this
8056 by pushing an extra pair. */
8057 if (count == 2 && !arm_arch6)
8059 if (base_reg == LAST_VFP_REGNUM - 3)
8060 base_reg -= 2;
8061 count++;
8064 /* ??? The frame layout is implementation defined. We describe
8065 standard format 1 (equivalent to a FSTMD insn and unused pad word).
8066 We really need some way of representing the whole block so that the
8067 unwinder can figure it out at runtime. */
8068 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
8069 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1));
8071 reg = gen_rtx_REG (DFmode, base_reg);
8072 base_reg += 2;
8074 XVECEXP (par, 0, 0)
8075 = gen_rtx_SET (VOIDmode,
8076 gen_rtx_MEM (BLKmode,
8077 gen_rtx_PRE_DEC (BLKmode, stack_pointer_rtx)),
8078 gen_rtx_UNSPEC (BLKmode,
8079 gen_rtvec (1, reg),
8080 UNSPEC_PUSH_MULT));
8082 tmp = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
8083 gen_rtx_PLUS (SImode, stack_pointer_rtx,
8084 GEN_INT (-(count * 8 + 4))));
8085 RTX_FRAME_RELATED_P (tmp) = 1;
8086 XVECEXP (dwarf, 0, 0) = tmp;
8088 tmp = gen_rtx_SET (VOIDmode,
8089 gen_rtx_MEM (DFmode, stack_pointer_rtx),
8090 reg);
8091 RTX_FRAME_RELATED_P (tmp) = 1;
8092 XVECEXP (dwarf, 0, 1) = tmp;
8094 for (i = 1; i < count; i++)
8096 reg = gen_rtx_REG (DFmode, base_reg);
8097 base_reg += 2;
8098 XVECEXP (par, 0, i) = gen_rtx_USE (VOIDmode, reg);
8100 tmp = gen_rtx_SET (VOIDmode,
8101 gen_rtx_MEM (DFmode,
8102 gen_rtx_PLUS (SImode,
8103 stack_pointer_rtx,
8104 GEN_INT (i * 8))),
8105 reg);
8106 RTX_FRAME_RELATED_P (tmp) = 1;
8107 XVECEXP (dwarf, 0, i + 1) = tmp;
8110 par = emit_insn (par);
8111 REG_NOTES (par) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, dwarf,
8112 REG_NOTES (par));
8113 RTX_FRAME_RELATED_P (par) = 1;
8115 return count * 8 + 4;
8119 /* Output a 'call' insn. */
8120 const char *
8121 output_call (rtx *operands)
8123 gcc_assert (!arm_arch5); /* Patterns should call blx <reg> directly. */
8125 /* Handle calls to lr using ip (which may be clobbered in subr anyway). */
8126 if (REGNO (operands[0]) == LR_REGNUM)
8128 operands[0] = gen_rtx_REG (SImode, IP_REGNUM);
8129 output_asm_insn ("mov%?\t%0, %|lr", operands);
8132 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
8134 if (TARGET_INTERWORK || arm_arch4t)
8135 output_asm_insn ("bx%?\t%0", operands);
8136 else
8137 output_asm_insn ("mov%?\t%|pc, %0", operands);
8139 return "";
8142 /* Output a 'call' insn that is a reference in memory. */
8143 const char *
8144 output_call_mem (rtx *operands)
8146 if (TARGET_INTERWORK && !arm_arch5)
8148 output_asm_insn ("ldr%?\t%|ip, %0", operands);
8149 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
8150 output_asm_insn ("bx%?\t%|ip", operands);
8152 else if (regno_use_in (LR_REGNUM, operands[0]))
8154 /* LR is used in the memory address. We load the address in the
8155 first instruction. It's safe to use IP as the target of the
8156 load since the call will kill it anyway. */
8157 output_asm_insn ("ldr%?\t%|ip, %0", operands);
8158 if (arm_arch5)
8159 output_asm_insn ("blx%?\t%|ip", operands);
8160 else
8162 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
8163 if (arm_arch4t)
8164 output_asm_insn ("bx%?\t%|ip", operands);
8165 else
8166 output_asm_insn ("mov%?\t%|pc, %|ip", operands);
8169 else
8171 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
8172 output_asm_insn ("ldr%?\t%|pc, %0", operands);
8175 return "";
8179 /* Output a move from arm registers to an fpa registers.
8180 OPERANDS[0] is an fpa register.
8181 OPERANDS[1] is the first registers of an arm register pair. */
8182 const char *
8183 output_mov_long_double_fpa_from_arm (rtx *operands)
8185 int arm_reg0 = REGNO (operands[1]);
8186 rtx ops[3];
8188 gcc_assert (arm_reg0 != IP_REGNUM);
8190 ops[0] = gen_rtx_REG (SImode, arm_reg0);
8191 ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0);
8192 ops[2] = gen_rtx_REG (SImode, 2 + arm_reg0);
8194 output_asm_insn ("stm%?fd\t%|sp!, {%0, %1, %2}", ops);
8195 output_asm_insn ("ldf%?e\t%0, [%|sp], #12", operands);
8197 return "";
8200 /* Output a move from an fpa register to arm registers.
8201 OPERANDS[0] is the first registers of an arm register pair.
8202 OPERANDS[1] is an fpa register. */
8203 const char *
8204 output_mov_long_double_arm_from_fpa (rtx *operands)
8206 int arm_reg0 = REGNO (operands[0]);
8207 rtx ops[3];
8209 gcc_assert (arm_reg0 != IP_REGNUM);
8211 ops[0] = gen_rtx_REG (SImode, arm_reg0);
8212 ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0);
8213 ops[2] = gen_rtx_REG (SImode, 2 + arm_reg0);
8215 output_asm_insn ("stf%?e\t%1, [%|sp, #-12]!", operands);
8216 output_asm_insn ("ldm%?fd\t%|sp!, {%0, %1, %2}", ops);
8217 return "";
8220 /* Output a move from arm registers to arm registers of a long double
8221 OPERANDS[0] is the destination.
8222 OPERANDS[1] is the source. */
8223 const char *
8224 output_mov_long_double_arm_from_arm (rtx *operands)
8226 /* We have to be careful here because the two might overlap. */
8227 int dest_start = REGNO (operands[0]);
8228 int src_start = REGNO (operands[1]);
8229 rtx ops[2];
8230 int i;
8232 if (dest_start < src_start)
8234 for (i = 0; i < 3; i++)
8236 ops[0] = gen_rtx_REG (SImode, dest_start + i);
8237 ops[1] = gen_rtx_REG (SImode, src_start + i);
8238 output_asm_insn ("mov%?\t%0, %1", ops);
8241 else
8243 for (i = 2; i >= 0; i--)
8245 ops[0] = gen_rtx_REG (SImode, dest_start + i);
8246 ops[1] = gen_rtx_REG (SImode, src_start + i);
8247 output_asm_insn ("mov%?\t%0, %1", ops);
8251 return "";
8255 /* Output a move from arm registers to an fpa registers.
8256 OPERANDS[0] is an fpa register.
8257 OPERANDS[1] is the first registers of an arm register pair. */
8258 const char *
8259 output_mov_double_fpa_from_arm (rtx *operands)
8261 int arm_reg0 = REGNO (operands[1]);
8262 rtx ops[2];
8264 gcc_assert (arm_reg0 != IP_REGNUM);
8266 ops[0] = gen_rtx_REG (SImode, arm_reg0);
8267 ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0);
8268 output_asm_insn ("stm%?fd\t%|sp!, {%0, %1}", ops);
8269 output_asm_insn ("ldf%?d\t%0, [%|sp], #8", operands);
8270 return "";
8273 /* Output a move from an fpa register to arm registers.
8274 OPERANDS[0] is the first registers of an arm register pair.
8275 OPERANDS[1] is an fpa register. */
8276 const char *
8277 output_mov_double_arm_from_fpa (rtx *operands)
8279 int arm_reg0 = REGNO (operands[0]);
8280 rtx ops[2];
8282 gcc_assert (arm_reg0 != IP_REGNUM);
8284 ops[0] = gen_rtx_REG (SImode, arm_reg0);
8285 ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0);
8286 output_asm_insn ("stf%?d\t%1, [%|sp, #-8]!", operands);
8287 output_asm_insn ("ldm%?fd\t%|sp!, {%0, %1}", ops);
8288 return "";
8291 /* Output a move between double words.
8292 It must be REG<-REG, REG<-CONST_DOUBLE, REG<-CONST_INT, REG<-MEM
8293 or MEM<-REG and all MEMs must be offsettable addresses. */
8294 const char *
8295 output_move_double (rtx *operands)
8297 enum rtx_code code0 = GET_CODE (operands[0]);
8298 enum rtx_code code1 = GET_CODE (operands[1]);
8299 rtx otherops[3];
8301 if (code0 == REG)
8303 int reg0 = REGNO (operands[0]);
8305 otherops[0] = gen_rtx_REG (SImode, 1 + reg0);
8307 gcc_assert (code1 == MEM); /* Constraints should ensure this. */
8309 switch (GET_CODE (XEXP (operands[1], 0)))
8311 case REG:
8312 output_asm_insn ("ldm%?ia\t%m1, %M0", operands);
8313 break;
8315 case PRE_INC:
8316 gcc_assert (TARGET_LDRD);
8317 output_asm_insn ("ldr%?d\t%0, [%m1, #8]!", operands);
8318 break;
8320 case PRE_DEC:
8321 output_asm_insn ("ldm%?db\t%m1!, %M0", operands);
8322 break;
8324 case POST_INC:
8325 output_asm_insn ("ldm%?ia\t%m1!, %M0", operands);
8326 break;
8328 case POST_DEC:
8329 gcc_assert (TARGET_LDRD);
8330 output_asm_insn ("ldr%?d\t%0, [%m1], #-8", operands);
8331 break;
8333 case PRE_MODIFY:
8334 case POST_MODIFY:
8335 otherops[0] = operands[0];
8336 otherops[1] = XEXP (XEXP (XEXP (operands[1], 0), 1), 0);
8337 otherops[2] = XEXP (XEXP (XEXP (operands[1], 0), 1), 1);
8339 if (GET_CODE (XEXP (operands[1], 0)) == PRE_MODIFY)
8341 if (reg_overlap_mentioned_p (otherops[0], otherops[2]))
8343 /* Registers overlap so split out the increment. */
8344 output_asm_insn ("add%?\t%1, %1, %2", otherops);
8345 output_asm_insn ("ldr%?d\t%0, [%1] @split", otherops);
8347 else
8348 output_asm_insn ("ldr%?d\t%0, [%1, %2]!", otherops);
8350 else
8352 /* We only allow constant increments, so this is safe. */
8353 output_asm_insn ("ldr%?d\t%0, [%1], %2", otherops);
8355 break;
8357 case LABEL_REF:
8358 case CONST:
8359 output_asm_insn ("adr%?\t%0, %1", operands);
8360 output_asm_insn ("ldm%?ia\t%0, %M0", operands);
8361 break;
8363 default:
8364 if (arm_add_operand (XEXP (XEXP (operands[1], 0), 1),
8365 GET_MODE (XEXP (XEXP (operands[1], 0), 1))))
8367 otherops[0] = operands[0];
8368 otherops[1] = XEXP (XEXP (operands[1], 0), 0);
8369 otherops[2] = XEXP (XEXP (operands[1], 0), 1);
8371 if (GET_CODE (XEXP (operands[1], 0)) == PLUS)
8373 if (GET_CODE (otherops[2]) == CONST_INT)
8375 switch ((int) INTVAL (otherops[2]))
8377 case -8:
8378 output_asm_insn ("ldm%?db\t%1, %M0", otherops);
8379 return "";
8380 case -4:
8381 output_asm_insn ("ldm%?da\t%1, %M0", otherops);
8382 return "";
8383 case 4:
8384 output_asm_insn ("ldm%?ib\t%1, %M0", otherops);
8385 return "";
8388 if (TARGET_LDRD
8389 && (GET_CODE (otherops[2]) == REG
8390 || (GET_CODE (otherops[2]) == CONST_INT
8391 && INTVAL (otherops[2]) > -256
8392 && INTVAL (otherops[2]) < 256)))
8394 if (reg_overlap_mentioned_p (otherops[0],
8395 otherops[2]))
8397 /* Swap base and index registers over to
8398 avoid a conflict. */
8399 otherops[1] = XEXP (XEXP (operands[1], 0), 1);
8400 otherops[2] = XEXP (XEXP (operands[1], 0), 0);
8403 /* If both registers conflict, it will usually
8404 have been fixed by a splitter. */
8405 if (reg_overlap_mentioned_p (otherops[0], otherops[2]))
8407 output_asm_insn ("add%?\t%1, %1, %2", otherops);
8408 output_asm_insn ("ldr%?d\t%0, [%1]",
8409 otherops);
8411 else
8412 output_asm_insn ("ldr%?d\t%0, [%1, %2]", otherops);
8413 return "";
8416 if (GET_CODE (otherops[2]) == CONST_INT)
8418 if (!(const_ok_for_arm (INTVAL (otherops[2]))))
8419 output_asm_insn ("sub%?\t%0, %1, #%n2", otherops);
8420 else
8421 output_asm_insn ("add%?\t%0, %1, %2", otherops);
8423 else
8424 output_asm_insn ("add%?\t%0, %1, %2", otherops);
8426 else
8427 output_asm_insn ("sub%?\t%0, %1, %2", otherops);
8429 return "ldm%?ia\t%0, %M0";
8431 else
8433 otherops[1] = adjust_address (operands[1], SImode, 4);
8434 /* Take care of overlapping base/data reg. */
8435 if (reg_mentioned_p (operands[0], operands[1]))
8437 output_asm_insn ("ldr%?\t%0, %1", otherops);
8438 output_asm_insn ("ldr%?\t%0, %1", operands);
8440 else
8442 output_asm_insn ("ldr%?\t%0, %1", operands);
8443 output_asm_insn ("ldr%?\t%0, %1", otherops);
8448 else
8450 /* Constraints should ensure this. */
8451 gcc_assert (code0 == MEM && code1 == REG);
8452 gcc_assert (REGNO (operands[1]) != IP_REGNUM);
8454 switch (GET_CODE (XEXP (operands[0], 0)))
8456 case REG:
8457 output_asm_insn ("stm%?ia\t%m0, %M1", operands);
8458 break;
8460 case PRE_INC:
8461 gcc_assert (TARGET_LDRD);
8462 output_asm_insn ("str%?d\t%1, [%m0, #8]!", operands);
8463 break;
8465 case PRE_DEC:
8466 output_asm_insn ("stm%?db\t%m0!, %M1", operands);
8467 break;
8469 case POST_INC:
8470 output_asm_insn ("stm%?ia\t%m0!, %M1", operands);
8471 break;
8473 case POST_DEC:
8474 gcc_assert (TARGET_LDRD);
8475 output_asm_insn ("str%?d\t%1, [%m0], #-8", operands);
8476 break;
8478 case PRE_MODIFY:
8479 case POST_MODIFY:
8480 otherops[0] = operands[1];
8481 otherops[1] = XEXP (XEXP (XEXP (operands[0], 0), 1), 0);
8482 otherops[2] = XEXP (XEXP (XEXP (operands[0], 0), 1), 1);
8484 if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
8485 output_asm_insn ("str%?d\t%0, [%1, %2]!", otherops);
8486 else
8487 output_asm_insn ("str%?d\t%0, [%1], %2", otherops);
8488 break;
8490 case PLUS:
8491 otherops[2] = XEXP (XEXP (operands[0], 0), 1);
8492 if (GET_CODE (otherops[2]) == CONST_INT)
8494 switch ((int) INTVAL (XEXP (XEXP (operands[0], 0), 1)))
8496 case -8:
8497 output_asm_insn ("stm%?db\t%m0, %M1", operands);
8498 return "";
8500 case -4:
8501 output_asm_insn ("stm%?da\t%m0, %M1", operands);
8502 return "";
8504 case 4:
8505 output_asm_insn ("stm%?ib\t%m0, %M1", operands);
8506 return "";
8509 if (TARGET_LDRD
8510 && (GET_CODE (otherops[2]) == REG
8511 || (GET_CODE (otherops[2]) == CONST_INT
8512 && INTVAL (otherops[2]) > -256
8513 && INTVAL (otherops[2]) < 256)))
8515 otherops[0] = operands[1];
8516 otherops[1] = XEXP (XEXP (operands[0], 0), 0);
8517 output_asm_insn ("str%?d\t%0, [%1, %2]", otherops);
8518 return "";
8520 /* Fall through */
8522 default:
8523 otherops[0] = adjust_address (operands[0], SImode, 4);
8524 otherops[1] = gen_rtx_REG (SImode, 1 + REGNO (operands[1]));
8525 output_asm_insn ("str%?\t%1, %0", operands);
8526 output_asm_insn ("str%?\t%1, %0", otherops);
8530 return "";
8533 /* Output an ADD r, s, #n where n may be too big for one instruction.
8534 If adding zero to one register, output nothing. */
8535 const char *
8536 output_add_immediate (rtx *operands)
8538 HOST_WIDE_INT n = INTVAL (operands[2]);
8540 if (n != 0 || REGNO (operands[0]) != REGNO (operands[1]))
8542 if (n < 0)
8543 output_multi_immediate (operands,
8544 "sub%?\t%0, %1, %2", "sub%?\t%0, %0, %2", 2,
8545 -n);
8546 else
8547 output_multi_immediate (operands,
8548 "add%?\t%0, %1, %2", "add%?\t%0, %0, %2", 2,
8552 return "";
8555 /* Output a multiple immediate operation.
8556 OPERANDS is the vector of operands referred to in the output patterns.
8557 INSTR1 is the output pattern to use for the first constant.
8558 INSTR2 is the output pattern to use for subsequent constants.
8559 IMMED_OP is the index of the constant slot in OPERANDS.
8560 N is the constant value. */
8561 static const char *
8562 output_multi_immediate (rtx *operands, const char *instr1, const char *instr2,
8563 int immed_op, HOST_WIDE_INT n)
8565 #if HOST_BITS_PER_WIDE_INT > 32
8566 n &= 0xffffffff;
8567 #endif
8569 if (n == 0)
8571 /* Quick and easy output. */
8572 operands[immed_op] = const0_rtx;
8573 output_asm_insn (instr1, operands);
8575 else
8577 int i;
8578 const char * instr = instr1;
8580 /* Note that n is never zero here (which would give no output). */
8581 for (i = 0; i < 32; i += 2)
8583 if (n & (3 << i))
8585 operands[immed_op] = GEN_INT (n & (255 << i));
8586 output_asm_insn (instr, operands);
8587 instr = instr2;
8588 i += 6;
8593 return "";
8596 /* Return the appropriate ARM instruction for the operation code.
8597 The returned result should not be overwritten. OP is the rtx of the
8598 operation. SHIFT_FIRST_ARG is TRUE if the first argument of the operator
8599 was shifted. */
8600 const char *
8601 arithmetic_instr (rtx op, int shift_first_arg)
8603 switch (GET_CODE (op))
8605 case PLUS:
8606 return "add";
8608 case MINUS:
8609 return shift_first_arg ? "rsb" : "sub";
8611 case IOR:
8612 return "orr";
8614 case XOR:
8615 return "eor";
8617 case AND:
8618 return "and";
8620 default:
8621 gcc_unreachable ();
8625 /* Ensure valid constant shifts and return the appropriate shift mnemonic
8626 for the operation code. The returned result should not be overwritten.
8627 OP is the rtx code of the shift.
8628 On exit, *AMOUNTP will be -1 if the shift is by a register, or a constant
8629 shift. */
8630 static const char *
8631 shift_op (rtx op, HOST_WIDE_INT *amountp)
8633 const char * mnem;
8634 enum rtx_code code = GET_CODE (op);
8636 switch (GET_CODE (XEXP (op, 1)))
8638 case REG:
8639 case SUBREG:
8640 *amountp = -1;
8641 break;
8643 case CONST_INT:
8644 *amountp = INTVAL (XEXP (op, 1));
8645 break;
8647 default:
8648 gcc_unreachable ();
8651 switch (code)
8653 case ASHIFT:
8654 mnem = "asl";
8655 break;
8657 case ASHIFTRT:
8658 mnem = "asr";
8659 break;
8661 case LSHIFTRT:
8662 mnem = "lsr";
8663 break;
8665 case ROTATE:
8666 gcc_assert (*amountp != -1);
8667 *amountp = 32 - *amountp;
8669 /* Fall through. */
8671 case ROTATERT:
8672 mnem = "ror";
8673 break;
8675 case MULT:
8676 /* We never have to worry about the amount being other than a
8677 power of 2, since this case can never be reloaded from a reg. */
8678 gcc_assert (*amountp != -1);
8679 *amountp = int_log2 (*amountp);
8680 return "asl";
8682 default:
8683 gcc_unreachable ();
8686 if (*amountp != -1)
8688 /* This is not 100% correct, but follows from the desire to merge
8689 multiplication by a power of 2 with the recognizer for a
8690 shift. >=32 is not a valid shift for "asl", so we must try and
8691 output a shift that produces the correct arithmetical result.
8692 Using lsr #32 is identical except for the fact that the carry bit
8693 is not set correctly if we set the flags; but we never use the
8694 carry bit from such an operation, so we can ignore that. */
8695 if (code == ROTATERT)
8696 /* Rotate is just modulo 32. */
8697 *amountp &= 31;
8698 else if (*amountp != (*amountp & 31))
8700 if (code == ASHIFT)
8701 mnem = "lsr";
8702 *amountp = 32;
8705 /* Shifts of 0 are no-ops. */
8706 if (*amountp == 0)
8707 return NULL;
8710 return mnem;
8713 /* Obtain the shift from the POWER of two. */
8715 static HOST_WIDE_INT
8716 int_log2 (HOST_WIDE_INT power)
8718 HOST_WIDE_INT shift = 0;
8720 while ((((HOST_WIDE_INT) 1 << shift) & power) == 0)
8722 gcc_assert (shift <= 31);
8723 shift++;
8726 return shift;
8729 /* Output a .ascii pseudo-op, keeping track of lengths. This is
8730 because /bin/as is horribly restrictive. The judgement about
8731 whether or not each character is 'printable' (and can be output as
8732 is) or not (and must be printed with an octal escape) must be made
8733 with reference to the *host* character set -- the situation is
8734 similar to that discussed in the comments above pp_c_char in
8735 c-pretty-print.c. */
8737 #define MAX_ASCII_LEN 51
8739 void
8740 output_ascii_pseudo_op (FILE *stream, const unsigned char *p, int len)
8742 int i;
8743 int len_so_far = 0;
8745 fputs ("\t.ascii\t\"", stream);
8747 for (i = 0; i < len; i++)
8749 int c = p[i];
8751 if (len_so_far >= MAX_ASCII_LEN)
8753 fputs ("\"\n\t.ascii\t\"", stream);
8754 len_so_far = 0;
8757 if (ISPRINT (c))
8759 if (c == '\\' || c == '\"')
8761 putc ('\\', stream);
8762 len_so_far++;
8764 putc (c, stream);
8765 len_so_far++;
8767 else
8769 fprintf (stream, "\\%03o", c);
8770 len_so_far += 4;
8774 fputs ("\"\n", stream);
8777 /* Compute the register save mask for registers 0 through 12
8778 inclusive. This code is used by arm_compute_save_reg_mask. */
8780 static unsigned long
8781 arm_compute_save_reg0_reg12_mask (void)
8783 unsigned long func_type = arm_current_func_type ();
8784 unsigned long save_reg_mask = 0;
8785 unsigned int reg;
8787 if (IS_INTERRUPT (func_type))
8789 unsigned int max_reg;
8790 /* Interrupt functions must not corrupt any registers,
8791 even call clobbered ones. If this is a leaf function
8792 we can just examine the registers used by the RTL, but
8793 otherwise we have to assume that whatever function is
8794 called might clobber anything, and so we have to save
8795 all the call-clobbered registers as well. */
8796 if (ARM_FUNC_TYPE (func_type) == ARM_FT_FIQ)
8797 /* FIQ handlers have registers r8 - r12 banked, so
8798 we only need to check r0 - r7, Normal ISRs only
8799 bank r14 and r15, so we must check up to r12.
8800 r13 is the stack pointer which is always preserved,
8801 so we do not need to consider it here. */
8802 max_reg = 7;
8803 else
8804 max_reg = 12;
8806 for (reg = 0; reg <= max_reg; reg++)
8807 if (regs_ever_live[reg]
8808 || (! current_function_is_leaf && call_used_regs [reg]))
8809 save_reg_mask |= (1 << reg);
8811 /* Also save the pic base register if necessary. */
8812 if (flag_pic
8813 && !TARGET_SINGLE_PIC_BASE
8814 && current_function_uses_pic_offset_table)
8815 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
8817 else
8819 /* In the normal case we only need to save those registers
8820 which are call saved and which are used by this function. */
8821 for (reg = 0; reg <= 10; reg++)
8822 if (regs_ever_live[reg] && ! call_used_regs [reg])
8823 save_reg_mask |= (1 << reg);
8825 /* Handle the frame pointer as a special case. */
8826 if (! TARGET_APCS_FRAME
8827 && ! frame_pointer_needed
8828 && regs_ever_live[HARD_FRAME_POINTER_REGNUM]
8829 && ! call_used_regs[HARD_FRAME_POINTER_REGNUM])
8830 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
8832 /* If we aren't loading the PIC register,
8833 don't stack it even though it may be live. */
8834 if (flag_pic
8835 && !TARGET_SINGLE_PIC_BASE
8836 && (regs_ever_live[PIC_OFFSET_TABLE_REGNUM]
8837 || current_function_uses_pic_offset_table))
8838 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
8841 /* Save registers so the exception handler can modify them. */
8842 if (current_function_calls_eh_return)
8844 unsigned int i;
8846 for (i = 0; ; i++)
8848 reg = EH_RETURN_DATA_REGNO (i);
8849 if (reg == INVALID_REGNUM)
8850 break;
8851 save_reg_mask |= 1 << reg;
8855 return save_reg_mask;
8858 /* Compute a bit mask of which registers need to be
8859 saved on the stack for the current function. */
8861 static unsigned long
8862 arm_compute_save_reg_mask (void)
8864 unsigned int save_reg_mask = 0;
8865 unsigned long func_type = arm_current_func_type ();
8867 if (IS_NAKED (func_type))
8868 /* This should never really happen. */
8869 return 0;
8871 /* If we are creating a stack frame, then we must save the frame pointer,
8872 IP (which will hold the old stack pointer), LR and the PC. */
8873 if (frame_pointer_needed)
8874 save_reg_mask |=
8875 (1 << ARM_HARD_FRAME_POINTER_REGNUM)
8876 | (1 << IP_REGNUM)
8877 | (1 << LR_REGNUM)
8878 | (1 << PC_REGNUM);
8880 /* Volatile functions do not return, so there
8881 is no need to save any other registers. */
8882 if (IS_VOLATILE (func_type))
8883 return save_reg_mask;
8885 save_reg_mask |= arm_compute_save_reg0_reg12_mask ();
8887 /* Decide if we need to save the link register.
8888 Interrupt routines have their own banked link register,
8889 so they never need to save it.
8890 Otherwise if we do not use the link register we do not need to save
8891 it. If we are pushing other registers onto the stack however, we
8892 can save an instruction in the epilogue by pushing the link register
8893 now and then popping it back into the PC. This incurs extra memory
8894 accesses though, so we only do it when optimizing for size, and only
8895 if we know that we will not need a fancy return sequence. */
8896 if (regs_ever_live [LR_REGNUM]
8897 || (save_reg_mask
8898 && optimize_size
8899 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
8900 && !current_function_calls_eh_return))
8901 save_reg_mask |= 1 << LR_REGNUM;
8903 if (cfun->machine->lr_save_eliminated)
8904 save_reg_mask &= ~ (1 << LR_REGNUM);
8906 if (TARGET_REALLY_IWMMXT
8907 && ((bit_count (save_reg_mask)
8908 + ARM_NUM_INTS (current_function_pretend_args_size)) % 2) != 0)
8910 unsigned int reg;
8912 /* The total number of registers that are going to be pushed
8913 onto the stack is odd. We need to ensure that the stack
8914 is 64-bit aligned before we start to save iWMMXt registers,
8915 and also before we start to create locals. (A local variable
8916 might be a double or long long which we will load/store using
8917 an iWMMXt instruction). Therefore we need to push another
8918 ARM register, so that the stack will be 64-bit aligned. We
8919 try to avoid using the arg registers (r0 -r3) as they might be
8920 used to pass values in a tail call. */
8921 for (reg = 4; reg <= 12; reg++)
8922 if ((save_reg_mask & (1 << reg)) == 0)
8923 break;
8925 if (reg <= 12)
8926 save_reg_mask |= (1 << reg);
8927 else
8929 cfun->machine->sibcall_blocked = 1;
8930 save_reg_mask |= (1 << 3);
8934 return save_reg_mask;
8938 /* Compute a bit mask of which registers need to be
8939 saved on the stack for the current function. */
8940 static unsigned long
8941 thumb_compute_save_reg_mask (void)
8943 unsigned long mask;
8944 unsigned reg;
8946 mask = 0;
8947 for (reg = 0; reg < 12; reg ++)
8948 if (regs_ever_live[reg] && !call_used_regs[reg])
8949 mask |= 1 << reg;
8951 if (flag_pic && !TARGET_SINGLE_PIC_BASE)
8952 mask |= (1 << PIC_OFFSET_TABLE_REGNUM);
8954 if (TARGET_SINGLE_PIC_BASE)
8955 mask &= ~(1 << arm_pic_register);
8957 /* See if we might need r11 for calls to _interwork_r11_call_via_rN(). */
8958 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
8959 mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
8961 /* LR will also be pushed if any lo regs are pushed. */
8962 if (mask & 0xff || thumb_force_lr_save ())
8963 mask |= (1 << LR_REGNUM);
8965 /* Make sure we have a low work register if we need one.
8966 We will need one if we are going to push a high register,
8967 but we are not currently intending to push a low register. */
8968 if ((mask & 0xff) == 0
8969 && ((mask & 0x0f00) || TARGET_BACKTRACE))
8971 /* Use thumb_find_work_register to choose which register
8972 we will use. If the register is live then we will
8973 have to push it. Use LAST_LO_REGNUM as our fallback
8974 choice for the register to select. */
8975 reg = thumb_find_work_register (1 << LAST_LO_REGNUM);
8977 if (! call_used_regs[reg])
8978 mask |= 1 << reg;
8981 return mask;
8985 /* Return the number of bytes required to save VFP registers. */
8986 static int
8987 arm_get_vfp_saved_size (void)
8989 unsigned int regno;
8990 int count;
8991 int saved;
8993 saved = 0;
8994 /* Space for saved VFP registers. */
8995 if (TARGET_HARD_FLOAT && TARGET_VFP)
8997 count = 0;
8998 for (regno = FIRST_VFP_REGNUM;
8999 regno < LAST_VFP_REGNUM;
9000 regno += 2)
9002 if ((!regs_ever_live[regno] || call_used_regs[regno])
9003 && (!regs_ever_live[regno + 1] || call_used_regs[regno + 1]))
9005 if (count > 0)
9007 /* Workaround ARM10 VFPr1 bug. */
9008 if (count == 2 && !arm_arch6)
9009 count++;
9010 saved += count * 8 + 4;
9012 count = 0;
9014 else
9015 count++;
9017 if (count > 0)
9019 if (count == 2 && !arm_arch6)
9020 count++;
9021 saved += count * 8 + 4;
9024 return saved;
9028 /* Generate a function exit sequence. If REALLY_RETURN is false, then do
9029 everything bar the final return instruction. */
9030 const char *
9031 output_return_instruction (rtx operand, int really_return, int reverse)
9033 char conditional[10];
9034 char instr[100];
9035 unsigned reg;
9036 unsigned long live_regs_mask;
9037 unsigned long func_type;
9038 arm_stack_offsets *offsets;
9040 func_type = arm_current_func_type ();
9042 if (IS_NAKED (func_type))
9043 return "";
9045 if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)
9047 /* If this function was declared non-returning, and we have
9048 found a tail call, then we have to trust that the called
9049 function won't return. */
9050 if (really_return)
9052 rtx ops[2];
9054 /* Otherwise, trap an attempted return by aborting. */
9055 ops[0] = operand;
9056 ops[1] = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)"
9057 : "abort");
9058 assemble_external_libcall (ops[1]);
9059 output_asm_insn (reverse ? "bl%D0\t%a1" : "bl%d0\t%a1", ops);
9062 return "";
9065 gcc_assert (!current_function_calls_alloca || really_return);
9067 sprintf (conditional, "%%?%%%c0", reverse ? 'D' : 'd');
9069 return_used_this_function = 1;
9071 live_regs_mask = arm_compute_save_reg_mask ();
9073 if (live_regs_mask)
9075 const char * return_reg;
9077 /* If we do not have any special requirements for function exit
9078 (e.g. interworking, or ISR) then we can load the return address
9079 directly into the PC. Otherwise we must load it into LR. */
9080 if (really_return
9081 && ! TARGET_INTERWORK)
9082 return_reg = reg_names[PC_REGNUM];
9083 else
9084 return_reg = reg_names[LR_REGNUM];
9086 if ((live_regs_mask & (1 << IP_REGNUM)) == (1 << IP_REGNUM))
9088 /* There are three possible reasons for the IP register
9089 being saved. 1) a stack frame was created, in which case
9090 IP contains the old stack pointer, or 2) an ISR routine
9091 corrupted it, or 3) it was saved to align the stack on
9092 iWMMXt. In case 1, restore IP into SP, otherwise just
9093 restore IP. */
9094 if (frame_pointer_needed)
9096 live_regs_mask &= ~ (1 << IP_REGNUM);
9097 live_regs_mask |= (1 << SP_REGNUM);
9099 else
9100 gcc_assert (IS_INTERRUPT (func_type) || TARGET_REALLY_IWMMXT);
9103 /* On some ARM architectures it is faster to use LDR rather than
9104 LDM to load a single register. On other architectures, the
9105 cost is the same. In 26 bit mode, or for exception handlers,
9106 we have to use LDM to load the PC so that the CPSR is also
9107 restored. */
9108 for (reg = 0; reg <= LAST_ARM_REGNUM; reg++)
9109 if (live_regs_mask == (1U << reg))
9110 break;
9112 if (reg <= LAST_ARM_REGNUM
9113 && (reg != LR_REGNUM
9114 || ! really_return
9115 || ! IS_INTERRUPT (func_type)))
9117 sprintf (instr, "ldr%s\t%%|%s, [%%|sp], #4", conditional,
9118 (reg == LR_REGNUM) ? return_reg : reg_names[reg]);
9120 else
9122 char *p;
9123 int first = 1;
9125 /* Generate the load multiple instruction to restore the
9126 registers. Note we can get here, even if
9127 frame_pointer_needed is true, but only if sp already
9128 points to the base of the saved core registers. */
9129 if (live_regs_mask & (1 << SP_REGNUM))
9131 unsigned HOST_WIDE_INT stack_adjust;
9133 offsets = arm_get_frame_offsets ();
9134 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
9135 gcc_assert (stack_adjust == 0 || stack_adjust == 4);
9137 if (stack_adjust && arm_arch5)
9138 sprintf (instr, "ldm%sib\t%%|sp, {", conditional);
9139 else
9141 /* If we can't use ldmib (SA110 bug),
9142 then try to pop r3 instead. */
9143 if (stack_adjust)
9144 live_regs_mask |= 1 << 3;
9145 sprintf (instr, "ldm%sfd\t%%|sp, {", conditional);
9148 else
9149 sprintf (instr, "ldm%sfd\t%%|sp!, {", conditional);
9151 p = instr + strlen (instr);
9153 for (reg = 0; reg <= SP_REGNUM; reg++)
9154 if (live_regs_mask & (1 << reg))
9156 int l = strlen (reg_names[reg]);
9158 if (first)
9159 first = 0;
9160 else
9162 memcpy (p, ", ", 2);
9163 p += 2;
9166 memcpy (p, "%|", 2);
9167 memcpy (p + 2, reg_names[reg], l);
9168 p += l + 2;
9171 if (live_regs_mask & (1 << LR_REGNUM))
9173 sprintf (p, "%s%%|%s}", first ? "" : ", ", return_reg);
9174 /* If returning from an interrupt, restore the CPSR. */
9175 if (IS_INTERRUPT (func_type))
9176 strcat (p, "^");
9178 else
9179 strcpy (p, "}");
9182 output_asm_insn (instr, & operand);
9184 /* See if we need to generate an extra instruction to
9185 perform the actual function return. */
9186 if (really_return
9187 && func_type != ARM_FT_INTERWORKED
9188 && (live_regs_mask & (1 << LR_REGNUM)) != 0)
9190 /* The return has already been handled
9191 by loading the LR into the PC. */
9192 really_return = 0;
9196 if (really_return)
9198 switch ((int) ARM_FUNC_TYPE (func_type))
9200 case ARM_FT_ISR:
9201 case ARM_FT_FIQ:
9202 sprintf (instr, "sub%ss\t%%|pc, %%|lr, #4", conditional);
9203 break;
9205 case ARM_FT_INTERWORKED:
9206 sprintf (instr, "bx%s\t%%|lr", conditional);
9207 break;
9209 case ARM_FT_EXCEPTION:
9210 sprintf (instr, "mov%ss\t%%|pc, %%|lr", conditional);
9211 break;
9213 default:
9214 /* Use bx if it's available. */
9215 if (arm_arch5 || arm_arch4t)
9216 sprintf (instr, "bx%s\t%%|lr", conditional);
9217 else
9218 sprintf (instr, "mov%s\t%%|pc, %%|lr", conditional);
9219 break;
9222 output_asm_insn (instr, & operand);
9225 return "";
9228 /* Write the function name into the code section, directly preceding
9229 the function prologue.
9231 Code will be output similar to this:
9233 .ascii "arm_poke_function_name", 0
9234 .align
9236 .word 0xff000000 + (t1 - t0)
9237 arm_poke_function_name
9238 mov ip, sp
9239 stmfd sp!, {fp, ip, lr, pc}
9240 sub fp, ip, #4
9242 When performing a stack backtrace, code can inspect the value
9243 of 'pc' stored at 'fp' + 0. If the trace function then looks
9244 at location pc - 12 and the top 8 bits are set, then we know
9245 that there is a function name embedded immediately preceding this
9246 location and has length ((pc[-3]) & 0xff000000).
9248 We assume that pc is declared as a pointer to an unsigned long.
9250 It is of no benefit to output the function name if we are assembling
9251 a leaf function. These function types will not contain a stack
9252 backtrace structure, therefore it is not possible to determine the
9253 function name. */
9254 void
9255 arm_poke_function_name (FILE *stream, const char *name)
9257 unsigned long alignlength;
9258 unsigned long length;
9259 rtx x;
9261 length = strlen (name) + 1;
9262 alignlength = ROUND_UP_WORD (length);
9264 ASM_OUTPUT_ASCII (stream, name, length);
9265 ASM_OUTPUT_ALIGN (stream, 2);
9266 x = GEN_INT ((unsigned HOST_WIDE_INT) 0xff000000 + alignlength);
9267 assemble_aligned_integer (UNITS_PER_WORD, x);
9270 /* Place some comments into the assembler stream
9271 describing the current function. */
9272 static void
9273 arm_output_function_prologue (FILE *f, HOST_WIDE_INT frame_size)
9275 unsigned long func_type;
9277 if (!TARGET_ARM)
9279 thumb_output_function_prologue (f, frame_size);
9280 return;
9283 /* Sanity check. */
9284 gcc_assert (!arm_ccfsm_state && !arm_target_insn);
9286 func_type = arm_current_func_type ();
9288 switch ((int) ARM_FUNC_TYPE (func_type))
9290 default:
9291 case ARM_FT_NORMAL:
9292 break;
9293 case ARM_FT_INTERWORKED:
9294 asm_fprintf (f, "\t%@ Function supports interworking.\n");
9295 break;
9296 case ARM_FT_ISR:
9297 asm_fprintf (f, "\t%@ Interrupt Service Routine.\n");
9298 break;
9299 case ARM_FT_FIQ:
9300 asm_fprintf (f, "\t%@ Fast Interrupt Service Routine.\n");
9301 break;
9302 case ARM_FT_EXCEPTION:
9303 asm_fprintf (f, "\t%@ ARM Exception Handler.\n");
9304 break;
9307 if (IS_NAKED (func_type))
9308 asm_fprintf (f, "\t%@ Naked Function: prologue and epilogue provided by programmer.\n");
9310 if (IS_VOLATILE (func_type))
9311 asm_fprintf (f, "\t%@ Volatile: function does not return.\n");
9313 if (IS_NESTED (func_type))
9314 asm_fprintf (f, "\t%@ Nested: function declared inside another function.\n");
9316 asm_fprintf (f, "\t%@ args = %d, pretend = %d, frame = %wd\n",
9317 current_function_args_size,
9318 current_function_pretend_args_size, frame_size);
9320 asm_fprintf (f, "\t%@ frame_needed = %d, uses_anonymous_args = %d\n",
9321 frame_pointer_needed,
9322 cfun->machine->uses_anonymous_args);
9324 if (cfun->machine->lr_save_eliminated)
9325 asm_fprintf (f, "\t%@ link register save eliminated.\n");
9327 if (current_function_calls_eh_return)
9328 asm_fprintf (f, "\t@ Calls __builtin_eh_return.\n");
9330 #ifdef AOF_ASSEMBLER
9331 if (flag_pic)
9332 asm_fprintf (f, "\tmov\t%r, %r\n", IP_REGNUM, PIC_OFFSET_TABLE_REGNUM);
9333 #endif
9335 return_used_this_function = 0;
9338 const char *
9339 arm_output_epilogue (rtx sibling)
9341 int reg;
9342 unsigned long saved_regs_mask;
9343 unsigned long func_type;
9344 /* Floats_offset is the offset from the "virtual" frame. In an APCS
9345 frame that is $fp + 4 for a non-variadic function. */
9346 int floats_offset = 0;
9347 rtx operands[3];
9348 FILE * f = asm_out_file;
9349 unsigned int lrm_count = 0;
9350 int really_return = (sibling == NULL);
9351 int start_reg;
9352 arm_stack_offsets *offsets;
9354 /* If we have already generated the return instruction
9355 then it is futile to generate anything else. */
9356 if (use_return_insn (FALSE, sibling) && return_used_this_function)
9357 return "";
9359 func_type = arm_current_func_type ();
9361 if (IS_NAKED (func_type))
9362 /* Naked functions don't have epilogues. */
9363 return "";
9365 if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)
9367 rtx op;
9369 /* A volatile function should never return. Call abort. */
9370 op = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)" : "abort");
9371 assemble_external_libcall (op);
9372 output_asm_insn ("bl\t%a0", &op);
9374 return "";
9377 /* If we are throwing an exception, then we really must be doing a
9378 return, so we can't tail-call. */
9379 gcc_assert (!current_function_calls_eh_return || really_return);
9381 offsets = arm_get_frame_offsets ();
9382 saved_regs_mask = arm_compute_save_reg_mask ();
9384 if (TARGET_IWMMXT)
9385 lrm_count = bit_count (saved_regs_mask);
9387 floats_offset = offsets->saved_args;
9388 /* Compute how far away the floats will be. */
9389 for (reg = 0; reg <= LAST_ARM_REGNUM; reg++)
9390 if (saved_regs_mask & (1 << reg))
9391 floats_offset += 4;
9393 if (frame_pointer_needed)
9395 /* This variable is for the Virtual Frame Pointer, not VFP regs. */
9396 int vfp_offset = offsets->frame;
9398 if (arm_fpu_arch == FPUTYPE_FPA_EMU2)
9400 for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
9401 if (regs_ever_live[reg] && !call_used_regs[reg])
9403 floats_offset += 12;
9404 asm_fprintf (f, "\tldfe\t%r, [%r, #-%d]\n",
9405 reg, FP_REGNUM, floats_offset - vfp_offset);
9408 else
9410 start_reg = LAST_FPA_REGNUM;
9412 for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
9414 if (regs_ever_live[reg] && !call_used_regs[reg])
9416 floats_offset += 12;
9418 /* We can't unstack more than four registers at once. */
9419 if (start_reg - reg == 3)
9421 asm_fprintf (f, "\tlfm\t%r, 4, [%r, #-%d]\n",
9422 reg, FP_REGNUM, floats_offset - vfp_offset);
9423 start_reg = reg - 1;
9426 else
9428 if (reg != start_reg)
9429 asm_fprintf (f, "\tlfm\t%r, %d, [%r, #-%d]\n",
9430 reg + 1, start_reg - reg,
9431 FP_REGNUM, floats_offset - vfp_offset);
9432 start_reg = reg - 1;
9436 /* Just in case the last register checked also needs unstacking. */
9437 if (reg != start_reg)
9438 asm_fprintf (f, "\tlfm\t%r, %d, [%r, #-%d]\n",
9439 reg + 1, start_reg - reg,
9440 FP_REGNUM, floats_offset - vfp_offset);
9443 if (TARGET_HARD_FLOAT && TARGET_VFP)
9445 int saved_size;
9447 /* The fldmx insn does not have base+offset addressing modes,
9448 so we use IP to hold the address. */
9449 saved_size = arm_get_vfp_saved_size ();
9451 if (saved_size > 0)
9453 floats_offset += saved_size;
9454 asm_fprintf (f, "\tsub\t%r, %r, #%d\n", IP_REGNUM,
9455 FP_REGNUM, floats_offset - vfp_offset);
9457 start_reg = FIRST_VFP_REGNUM;
9458 for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
9460 if ((!regs_ever_live[reg] || call_used_regs[reg])
9461 && (!regs_ever_live[reg + 1] || call_used_regs[reg + 1]))
9463 if (start_reg != reg)
9464 arm_output_fldmx (f, IP_REGNUM,
9465 (start_reg - FIRST_VFP_REGNUM) / 2,
9466 (reg - start_reg) / 2);
9467 start_reg = reg + 2;
9470 if (start_reg != reg)
9471 arm_output_fldmx (f, IP_REGNUM,
9472 (start_reg - FIRST_VFP_REGNUM) / 2,
9473 (reg - start_reg) / 2);
9476 if (TARGET_IWMMXT)
9478 /* The frame pointer is guaranteed to be non-double-word aligned.
9479 This is because it is set to (old_stack_pointer - 4) and the
9480 old_stack_pointer was double word aligned. Thus the offset to
9481 the iWMMXt registers to be loaded must also be non-double-word
9482 sized, so that the resultant address *is* double-word aligned.
9483 We can ignore floats_offset since that was already included in
9484 the live_regs_mask. */
9485 lrm_count += (lrm_count % 2 ? 2 : 1);
9487 for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
9488 if (regs_ever_live[reg] && !call_used_regs[reg])
9490 asm_fprintf (f, "\twldrd\t%r, [%r, #-%d]\n",
9491 reg, FP_REGNUM, lrm_count * 4);
9492 lrm_count += 2;
9496 /* saved_regs_mask should contain the IP, which at the time of stack
9497 frame generation actually contains the old stack pointer. So a
9498 quick way to unwind the stack is just pop the IP register directly
9499 into the stack pointer. */
9500 gcc_assert (saved_regs_mask & (1 << IP_REGNUM));
9501 saved_regs_mask &= ~ (1 << IP_REGNUM);
9502 saved_regs_mask |= (1 << SP_REGNUM);
9504 /* There are two registers left in saved_regs_mask - LR and PC. We
9505 only need to restore the LR register (the return address), but to
9506 save time we can load it directly into the PC, unless we need a
9507 special function exit sequence, or we are not really returning. */
9508 if (really_return
9509 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
9510 && !current_function_calls_eh_return)
9511 /* Delete the LR from the register mask, so that the LR on
9512 the stack is loaded into the PC in the register mask. */
9513 saved_regs_mask &= ~ (1 << LR_REGNUM);
9514 else
9515 saved_regs_mask &= ~ (1 << PC_REGNUM);
9517 /* We must use SP as the base register, because SP is one of the
9518 registers being restored. If an interrupt or page fault
9519 happens in the ldm instruction, the SP might or might not
9520 have been restored. That would be bad, as then SP will no
9521 longer indicate the safe area of stack, and we can get stack
9522 corruption. Using SP as the base register means that it will
9523 be reset correctly to the original value, should an interrupt
9524 occur. If the stack pointer already points at the right
9525 place, then omit the subtraction. */
9526 if (offsets->outgoing_args != (1 + (int) bit_count (saved_regs_mask))
9527 || current_function_calls_alloca)
9528 asm_fprintf (f, "\tsub\t%r, %r, #%d\n", SP_REGNUM, FP_REGNUM,
9529 4 * bit_count (saved_regs_mask));
9530 print_multi_reg (f, "ldmfd\t%r", SP_REGNUM, saved_regs_mask);
9532 if (IS_INTERRUPT (func_type))
9533 /* Interrupt handlers will have pushed the
9534 IP onto the stack, so restore it now. */
9535 print_multi_reg (f, "ldmfd\t%r!", SP_REGNUM, 1 << IP_REGNUM);
9537 else
9539 /* Restore stack pointer if necessary. */
9540 if (offsets->outgoing_args != offsets->saved_regs)
9542 operands[0] = operands[1] = stack_pointer_rtx;
9543 operands[2] = GEN_INT (offsets->outgoing_args - offsets->saved_regs);
9544 output_add_immediate (operands);
9547 if (arm_fpu_arch == FPUTYPE_FPA_EMU2)
9549 for (reg = FIRST_FPA_REGNUM; reg <= LAST_FPA_REGNUM; reg++)
9550 if (regs_ever_live[reg] && !call_used_regs[reg])
9551 asm_fprintf (f, "\tldfe\t%r, [%r], #12\n",
9552 reg, SP_REGNUM);
9554 else
9556 start_reg = FIRST_FPA_REGNUM;
9558 for (reg = FIRST_FPA_REGNUM; reg <= LAST_FPA_REGNUM; reg++)
9560 if (regs_ever_live[reg] && !call_used_regs[reg])
9562 if (reg - start_reg == 3)
9564 asm_fprintf (f, "\tlfmfd\t%r, 4, [%r]!\n",
9565 start_reg, SP_REGNUM);
9566 start_reg = reg + 1;
9569 else
9571 if (reg != start_reg)
9572 asm_fprintf (f, "\tlfmfd\t%r, %d, [%r]!\n",
9573 start_reg, reg - start_reg,
9574 SP_REGNUM);
9576 start_reg = reg + 1;
9580 /* Just in case the last register checked also needs unstacking. */
9581 if (reg != start_reg)
9582 asm_fprintf (f, "\tlfmfd\t%r, %d, [%r]!\n",
9583 start_reg, reg - start_reg, SP_REGNUM);
9586 if (TARGET_HARD_FLOAT && TARGET_VFP)
9588 start_reg = FIRST_VFP_REGNUM;
9589 for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
9591 if ((!regs_ever_live[reg] || call_used_regs[reg])
9592 && (!regs_ever_live[reg + 1] || call_used_regs[reg + 1]))
9594 if (start_reg != reg)
9595 arm_output_fldmx (f, SP_REGNUM,
9596 (start_reg - FIRST_VFP_REGNUM) / 2,
9597 (reg - start_reg) / 2);
9598 start_reg = reg + 2;
9601 if (start_reg != reg)
9602 arm_output_fldmx (f, SP_REGNUM,
9603 (start_reg - FIRST_VFP_REGNUM) / 2,
9604 (reg - start_reg) / 2);
9606 if (TARGET_IWMMXT)
9607 for (reg = FIRST_IWMMXT_REGNUM; reg <= LAST_IWMMXT_REGNUM; reg++)
9608 if (regs_ever_live[reg] && !call_used_regs[reg])
9609 asm_fprintf (f, "\twldrd\t%r, [%r], #8\n", reg, SP_REGNUM);
9611 /* If we can, restore the LR into the PC. */
9612 if (ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
9613 && really_return
9614 && current_function_pretend_args_size == 0
9615 && saved_regs_mask & (1 << LR_REGNUM)
9616 && !current_function_calls_eh_return)
9618 saved_regs_mask &= ~ (1 << LR_REGNUM);
9619 saved_regs_mask |= (1 << PC_REGNUM);
9622 /* Load the registers off the stack. If we only have one register
9623 to load use the LDR instruction - it is faster. */
9624 if (saved_regs_mask == (1 << LR_REGNUM))
9626 asm_fprintf (f, "\tldr\t%r, [%r], #4\n", LR_REGNUM, SP_REGNUM);
9628 else if (saved_regs_mask)
9630 if (saved_regs_mask & (1 << SP_REGNUM))
9631 /* Note - write back to the stack register is not enabled
9632 (i.e. "ldmfd sp!..."). We know that the stack pointer is
9633 in the list of registers and if we add writeback the
9634 instruction becomes UNPREDICTABLE. */
9635 print_multi_reg (f, "ldmfd\t%r", SP_REGNUM, saved_regs_mask);
9636 else
9637 print_multi_reg (f, "ldmfd\t%r!", SP_REGNUM, saved_regs_mask);
9640 if (current_function_pretend_args_size)
9642 /* Unwind the pre-pushed regs. */
9643 operands[0] = operands[1] = stack_pointer_rtx;
9644 operands[2] = GEN_INT (current_function_pretend_args_size);
9645 output_add_immediate (operands);
9649 /* We may have already restored PC directly from the stack. */
9650 if (!really_return || saved_regs_mask & (1 << PC_REGNUM))
9651 return "";
9653 /* Stack adjustment for exception handler. */
9654 if (current_function_calls_eh_return)
9655 asm_fprintf (f, "\tadd\t%r, %r, %r\n", SP_REGNUM, SP_REGNUM,
9656 ARM_EH_STACKADJ_REGNUM);
9658 /* Generate the return instruction. */
9659 switch ((int) ARM_FUNC_TYPE (func_type))
9661 case ARM_FT_ISR:
9662 case ARM_FT_FIQ:
9663 asm_fprintf (f, "\tsubs\t%r, %r, #4\n", PC_REGNUM, LR_REGNUM);
9664 break;
9666 case ARM_FT_EXCEPTION:
9667 asm_fprintf (f, "\tmovs\t%r, %r\n", PC_REGNUM, LR_REGNUM);
9668 break;
9670 case ARM_FT_INTERWORKED:
9671 asm_fprintf (f, "\tbx\t%r\n", LR_REGNUM);
9672 break;
9674 default:
9675 if (arm_arch5 || arm_arch4t)
9676 asm_fprintf (f, "\tbx\t%r\n", LR_REGNUM);
9677 else
9678 asm_fprintf (f, "\tmov\t%r, %r\n", PC_REGNUM, LR_REGNUM);
9679 break;
9682 return "";
9685 static void
9686 arm_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
9687 HOST_WIDE_INT frame_size ATTRIBUTE_UNUSED)
9689 arm_stack_offsets *offsets;
9691 if (TARGET_THUMB)
9693 int regno;
9695 /* Emit any call-via-reg trampolines that are needed for v4t support
9696 of call_reg and call_value_reg type insns. */
9697 for (regno = 0; regno < LR_REGNUM; regno++)
9699 rtx label = cfun->machine->call_via[regno];
9701 if (label != NULL)
9703 function_section (current_function_decl);
9704 targetm.asm_out.internal_label (asm_out_file, "L",
9705 CODE_LABEL_NUMBER (label));
9706 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
9710 /* ??? Probably not safe to set this here, since it assumes that a
9711 function will be emitted as assembly immediately after we generate
9712 RTL for it. This does not happen for inline functions. */
9713 return_used_this_function = 0;
9715 else
9717 /* We need to take into account any stack-frame rounding. */
9718 offsets = arm_get_frame_offsets ();
9720 gcc_assert (!use_return_insn (FALSE, NULL)
9721 || !return_used_this_function
9722 || offsets->saved_regs == offsets->outgoing_args
9723 || frame_pointer_needed);
9725 /* Reset the ARM-specific per-function variables. */
9726 after_arm_reorg = 0;
9730 /* Generate and emit an insn that we will recognize as a push_multi.
9731 Unfortunately, since this insn does not reflect very well the actual
9732 semantics of the operation, we need to annotate the insn for the benefit
9733 of DWARF2 frame unwind information. */
9734 static rtx
9735 emit_multi_reg_push (unsigned long mask)
9737 int num_regs = 0;
9738 int num_dwarf_regs;
9739 int i, j;
9740 rtx par;
9741 rtx dwarf;
9742 int dwarf_par_index;
9743 rtx tmp, reg;
9745 for (i = 0; i <= LAST_ARM_REGNUM; i++)
9746 if (mask & (1 << i))
9747 num_regs++;
9749 gcc_assert (num_regs && num_regs <= 16);
9751 /* We don't record the PC in the dwarf frame information. */
9752 num_dwarf_regs = num_regs;
9753 if (mask & (1 << PC_REGNUM))
9754 num_dwarf_regs--;
9756 /* For the body of the insn we are going to generate an UNSPEC in
9757 parallel with several USEs. This allows the insn to be recognized
9758 by the push_multi pattern in the arm.md file. The insn looks
9759 something like this:
9761 (parallel [
9762 (set (mem:BLK (pre_dec:BLK (reg:SI sp)))
9763 (unspec:BLK [(reg:SI r4)] UNSPEC_PUSH_MULT))
9764 (use (reg:SI 11 fp))
9765 (use (reg:SI 12 ip))
9766 (use (reg:SI 14 lr))
9767 (use (reg:SI 15 pc))
9770 For the frame note however, we try to be more explicit and actually
9771 show each register being stored into the stack frame, plus a (single)
9772 decrement of the stack pointer. We do it this way in order to be
9773 friendly to the stack unwinding code, which only wants to see a single
9774 stack decrement per instruction. The RTL we generate for the note looks
9775 something like this:
9777 (sequence [
9778 (set (reg:SI sp) (plus:SI (reg:SI sp) (const_int -20)))
9779 (set (mem:SI (reg:SI sp)) (reg:SI r4))
9780 (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI fp))
9781 (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI ip))
9782 (set (mem:SI (plus:SI (reg:SI sp) (const_int 12))) (reg:SI lr))
9785 This sequence is used both by the code to support stack unwinding for
9786 exceptions handlers and the code to generate dwarf2 frame debugging. */
9788 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs));
9789 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_dwarf_regs + 1));
9790 dwarf_par_index = 1;
9792 for (i = 0; i <= LAST_ARM_REGNUM; i++)
9794 if (mask & (1 << i))
9796 reg = gen_rtx_REG (SImode, i);
9798 XVECEXP (par, 0, 0)
9799 = gen_rtx_SET (VOIDmode,
9800 gen_rtx_MEM (BLKmode,
9801 gen_rtx_PRE_DEC (BLKmode,
9802 stack_pointer_rtx)),
9803 gen_rtx_UNSPEC (BLKmode,
9804 gen_rtvec (1, reg),
9805 UNSPEC_PUSH_MULT));
9807 if (i != PC_REGNUM)
9809 tmp = gen_rtx_SET (VOIDmode,
9810 gen_rtx_MEM (SImode, stack_pointer_rtx),
9811 reg);
9812 RTX_FRAME_RELATED_P (tmp) = 1;
9813 XVECEXP (dwarf, 0, dwarf_par_index) = tmp;
9814 dwarf_par_index++;
9817 break;
9821 for (j = 1, i++; j < num_regs; i++)
9823 if (mask & (1 << i))
9825 reg = gen_rtx_REG (SImode, i);
9827 XVECEXP (par, 0, j) = gen_rtx_USE (VOIDmode, reg);
9829 if (i != PC_REGNUM)
9831 tmp = gen_rtx_SET (VOIDmode,
9832 gen_rtx_MEM (SImode,
9833 plus_constant (stack_pointer_rtx,
9834 4 * j)),
9835 reg);
9836 RTX_FRAME_RELATED_P (tmp) = 1;
9837 XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
9840 j++;
9844 par = emit_insn (par);
9846 tmp = gen_rtx_SET (SImode,
9847 stack_pointer_rtx,
9848 gen_rtx_PLUS (SImode,
9849 stack_pointer_rtx,
9850 GEN_INT (-4 * num_regs)));
9851 RTX_FRAME_RELATED_P (tmp) = 1;
9852 XVECEXP (dwarf, 0, 0) = tmp;
9854 REG_NOTES (par) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, dwarf,
9855 REG_NOTES (par));
9856 return par;
9859 /* Calculate the size of the return value that is passed in registers. */
9860 static int
9861 arm_size_return_regs (void)
9863 enum machine_mode mode;
9865 if (current_function_return_rtx != 0)
9866 mode = GET_MODE (current_function_return_rtx);
9867 else
9868 mode = DECL_MODE (DECL_RESULT (current_function_decl));
9870 return GET_MODE_SIZE (mode);
9873 static rtx
9874 emit_sfm (int base_reg, int count)
9876 rtx par;
9877 rtx dwarf;
9878 rtx tmp, reg;
9879 int i;
9881 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
9882 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1));
9884 reg = gen_rtx_REG (XFmode, base_reg++);
9886 XVECEXP (par, 0, 0)
9887 = gen_rtx_SET (VOIDmode,
9888 gen_rtx_MEM (BLKmode,
9889 gen_rtx_PRE_DEC (BLKmode, stack_pointer_rtx)),
9890 gen_rtx_UNSPEC (BLKmode,
9891 gen_rtvec (1, reg),
9892 UNSPEC_PUSH_MULT));
9893 tmp = gen_rtx_SET (VOIDmode,
9894 gen_rtx_MEM (XFmode, stack_pointer_rtx), reg);
9895 RTX_FRAME_RELATED_P (tmp) = 1;
9896 XVECEXP (dwarf, 0, 1) = tmp;
9898 for (i = 1; i < count; i++)
9900 reg = gen_rtx_REG (XFmode, base_reg++);
9901 XVECEXP (par, 0, i) = gen_rtx_USE (VOIDmode, reg);
9903 tmp = gen_rtx_SET (VOIDmode,
9904 gen_rtx_MEM (XFmode,
9905 plus_constant (stack_pointer_rtx,
9906 i * 12)),
9907 reg);
9908 RTX_FRAME_RELATED_P (tmp) = 1;
9909 XVECEXP (dwarf, 0, i + 1) = tmp;
9912 tmp = gen_rtx_SET (VOIDmode,
9913 stack_pointer_rtx,
9914 gen_rtx_PLUS (SImode,
9915 stack_pointer_rtx,
9916 GEN_INT (-12 * count)));
9917 RTX_FRAME_RELATED_P (tmp) = 1;
9918 XVECEXP (dwarf, 0, 0) = tmp;
9920 par = emit_insn (par);
9921 REG_NOTES (par) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, dwarf,
9922 REG_NOTES (par));
9923 return par;
9927 /* Return true if the current function needs to save/restore LR. */
9929 static bool
9930 thumb_force_lr_save (void)
9932 return !cfun->machine->lr_save_eliminated
9933 && (!leaf_function_p ()
9934 || thumb_far_jump_used_p ()
9935 || regs_ever_live [LR_REGNUM]);
9939 /* Compute the distance from register FROM to register TO.
9940 These can be the arg pointer (26), the soft frame pointer (25),
9941 the stack pointer (13) or the hard frame pointer (11).
9942 In thumb mode r7 is used as the soft frame pointer, if needed.
9943 Typical stack layout looks like this:
9945 old stack pointer -> | |
9946 ----
9947 | | \
9948 | | saved arguments for
9949 | | vararg functions
9950 | | /
9952 hard FP & arg pointer -> | | \
9953 | | stack
9954 | | frame
9955 | | /
9957 | | \
9958 | | call saved
9959 | | registers
9960 soft frame pointer -> | | /
9962 | | \
9963 | | local
9964 | | variables
9965 | | /
9967 | | \
9968 | | outgoing
9969 | | arguments
9970 current stack pointer -> | | /
9973 For a given function some or all of these stack components
9974 may not be needed, giving rise to the possibility of
9975 eliminating some of the registers.
9977 The values returned by this function must reflect the behavior
9978 of arm_expand_prologue() and arm_compute_save_reg_mask().
9980 The sign of the number returned reflects the direction of stack
9981 growth, so the values are positive for all eliminations except
9982 from the soft frame pointer to the hard frame pointer.
9984 SFP may point just inside the local variables block to ensure correct
9985 alignment. */
9988 /* Calculate stack offsets. These are used to calculate register elimination
9989 offsets and in prologue/epilogue code. */
9991 static arm_stack_offsets *
9992 arm_get_frame_offsets (void)
9994 struct arm_stack_offsets *offsets;
9995 unsigned long func_type;
9996 int leaf;
9997 int saved;
9998 HOST_WIDE_INT frame_size;
10000 offsets = &cfun->machine->stack_offsets;
10002 /* We need to know if we are a leaf function. Unfortunately, it
10003 is possible to be called after start_sequence has been called,
10004 which causes get_insns to return the insns for the sequence,
10005 not the function, which will cause leaf_function_p to return
10006 the incorrect result.
10008 to know about leaf functions once reload has completed, and the
10009 frame size cannot be changed after that time, so we can safely
10010 use the cached value. */
10012 if (reload_completed)
10013 return offsets;
10015 /* Initially this is the size of the local variables. It will translated
10016 into an offset once we have determined the size of preceding data. */
10017 frame_size = ROUND_UP_WORD (get_frame_size ());
10019 leaf = leaf_function_p ();
10021 /* Space for variadic functions. */
10022 offsets->saved_args = current_function_pretend_args_size;
10024 offsets->frame = offsets->saved_args + (frame_pointer_needed ? 4 : 0);
10026 if (TARGET_ARM)
10028 unsigned int regno;
10030 saved = bit_count (arm_compute_save_reg_mask ()) * 4;
10032 /* We know that SP will be doubleword aligned on entry, and we must
10033 preserve that condition at any subroutine call. We also require the
10034 soft frame pointer to be doubleword aligned. */
10036 if (TARGET_REALLY_IWMMXT)
10038 /* Check for the call-saved iWMMXt registers. */
10039 for (regno = FIRST_IWMMXT_REGNUM;
10040 regno <= LAST_IWMMXT_REGNUM;
10041 regno++)
10042 if (regs_ever_live [regno] && ! call_used_regs [regno])
10043 saved += 8;
10046 func_type = arm_current_func_type ();
10047 if (! IS_VOLATILE (func_type))
10049 /* Space for saved FPA registers. */
10050 for (regno = FIRST_FPA_REGNUM; regno <= LAST_FPA_REGNUM; regno++)
10051 if (regs_ever_live[regno] && ! call_used_regs[regno])
10052 saved += 12;
10054 /* Space for saved VFP registers. */
10055 if (TARGET_HARD_FLOAT && TARGET_VFP)
10056 saved += arm_get_vfp_saved_size ();
10059 else /* TARGET_THUMB */
10061 saved = bit_count (thumb_compute_save_reg_mask ()) * 4;
10062 if (TARGET_BACKTRACE)
10063 saved += 16;
10066 /* Saved registers include the stack frame. */
10067 offsets->saved_regs = offsets->saved_args + saved;
10068 offsets->soft_frame = offsets->saved_regs + CALLER_INTERWORKING_SLOT_SIZE;
10069 /* A leaf function does not need any stack alignment if it has nothing
10070 on the stack. */
10071 if (leaf && frame_size == 0)
10073 offsets->outgoing_args = offsets->soft_frame;
10074 return offsets;
10077 /* Ensure SFP has the correct alignment. */
10078 if (ARM_DOUBLEWORD_ALIGN
10079 && (offsets->soft_frame & 7))
10080 offsets->soft_frame += 4;
10082 offsets->outgoing_args = offsets->soft_frame + frame_size
10083 + current_function_outgoing_args_size;
10085 if (ARM_DOUBLEWORD_ALIGN)
10087 /* Ensure SP remains doubleword aligned. */
10088 if (offsets->outgoing_args & 7)
10089 offsets->outgoing_args += 4;
10090 gcc_assert (!(offsets->outgoing_args & 7));
10093 return offsets;
10097 /* Calculate the relative offsets for the different stack pointers. Positive
10098 offsets are in the direction of stack growth. */
10100 HOST_WIDE_INT
10101 arm_compute_initial_elimination_offset (unsigned int from, unsigned int to)
10103 arm_stack_offsets *offsets;
10105 offsets = arm_get_frame_offsets ();
10107 /* OK, now we have enough information to compute the distances.
10108 There must be an entry in these switch tables for each pair
10109 of registers in ELIMINABLE_REGS, even if some of the entries
10110 seem to be redundant or useless. */
10111 switch (from)
10113 case ARG_POINTER_REGNUM:
10114 switch (to)
10116 case THUMB_HARD_FRAME_POINTER_REGNUM:
10117 return 0;
10119 case FRAME_POINTER_REGNUM:
10120 /* This is the reverse of the soft frame pointer
10121 to hard frame pointer elimination below. */
10122 return offsets->soft_frame - offsets->saved_args;
10124 case ARM_HARD_FRAME_POINTER_REGNUM:
10125 /* If there is no stack frame then the hard
10126 frame pointer and the arg pointer coincide. */
10127 if (offsets->frame == offsets->saved_regs)
10128 return 0;
10129 /* FIXME: Not sure about this. Maybe we should always return 0 ? */
10130 return (frame_pointer_needed
10131 && cfun->static_chain_decl != NULL
10132 && ! cfun->machine->uses_anonymous_args) ? 4 : 0;
10134 case STACK_POINTER_REGNUM:
10135 /* If nothing has been pushed on the stack at all
10136 then this will return -4. This *is* correct! */
10137 return offsets->outgoing_args - (offsets->saved_args + 4);
10139 default:
10140 gcc_unreachable ();
10142 gcc_unreachable ();
10144 case FRAME_POINTER_REGNUM:
10145 switch (to)
10147 case THUMB_HARD_FRAME_POINTER_REGNUM:
10148 return 0;
10150 case ARM_HARD_FRAME_POINTER_REGNUM:
10151 /* The hard frame pointer points to the top entry in the
10152 stack frame. The soft frame pointer to the bottom entry
10153 in the stack frame. If there is no stack frame at all,
10154 then they are identical. */
10156 return offsets->frame - offsets->soft_frame;
10158 case STACK_POINTER_REGNUM:
10159 return offsets->outgoing_args - offsets->soft_frame;
10161 default:
10162 gcc_unreachable ();
10164 gcc_unreachable ();
10166 default:
10167 /* You cannot eliminate from the stack pointer.
10168 In theory you could eliminate from the hard frame
10169 pointer to the stack pointer, but this will never
10170 happen, since if a stack frame is not needed the
10171 hard frame pointer will never be used. */
10172 gcc_unreachable ();
10177 /* Generate the prologue instructions for entry into an ARM function. */
10178 void
10179 arm_expand_prologue (void)
10181 int reg;
10182 rtx amount;
10183 rtx insn;
10184 rtx ip_rtx;
10185 unsigned long live_regs_mask;
10186 unsigned long func_type;
10187 int fp_offset = 0;
10188 int saved_pretend_args = 0;
10189 int saved_regs = 0;
10190 unsigned HOST_WIDE_INT args_to_push;
10191 arm_stack_offsets *offsets;
10193 func_type = arm_current_func_type ();
10195 /* Naked functions don't have prologues. */
10196 if (IS_NAKED (func_type))
10197 return;
10199 /* Make a copy of c_f_p_a_s as we may need to modify it locally. */
10200 args_to_push = current_function_pretend_args_size;
10202 /* Compute which register we will have to save onto the stack. */
10203 live_regs_mask = arm_compute_save_reg_mask ();
10205 ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
10207 if (frame_pointer_needed)
10209 if (IS_INTERRUPT (func_type))
10211 /* Interrupt functions must not corrupt any registers.
10212 Creating a frame pointer however, corrupts the IP
10213 register, so we must push it first. */
10214 insn = emit_multi_reg_push (1 << IP_REGNUM);
10216 /* Do not set RTX_FRAME_RELATED_P on this insn.
10217 The dwarf stack unwinding code only wants to see one
10218 stack decrement per function, and this is not it. If
10219 this instruction is labeled as being part of the frame
10220 creation sequence then dwarf2out_frame_debug_expr will
10221 die when it encounters the assignment of IP to FP
10222 later on, since the use of SP here establishes SP as
10223 the CFA register and not IP.
10225 Anyway this instruction is not really part of the stack
10226 frame creation although it is part of the prologue. */
10228 else if (IS_NESTED (func_type))
10230 /* The Static chain register is the same as the IP register
10231 used as a scratch register during stack frame creation.
10232 To get around this need to find somewhere to store IP
10233 whilst the frame is being created. We try the following
10234 places in order:
10236 1. The last argument register.
10237 2. A slot on the stack above the frame. (This only
10238 works if the function is not a varargs function).
10239 3. Register r3, after pushing the argument registers
10240 onto the stack.
10242 Note - we only need to tell the dwarf2 backend about the SP
10243 adjustment in the second variant; the static chain register
10244 doesn't need to be unwound, as it doesn't contain a value
10245 inherited from the caller. */
10247 if (regs_ever_live[3] == 0)
10249 insn = gen_rtx_REG (SImode, 3);
10250 insn = gen_rtx_SET (SImode, insn, ip_rtx);
10251 insn = emit_insn (insn);
10253 else if (args_to_push == 0)
10255 rtx dwarf;
10256 insn = gen_rtx_PRE_DEC (SImode, stack_pointer_rtx);
10257 insn = gen_rtx_MEM (SImode, insn);
10258 insn = gen_rtx_SET (VOIDmode, insn, ip_rtx);
10259 insn = emit_insn (insn);
10261 fp_offset = 4;
10263 /* Just tell the dwarf backend that we adjusted SP. */
10264 dwarf = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10265 gen_rtx_PLUS (SImode, stack_pointer_rtx,
10266 GEN_INT (-fp_offset)));
10267 RTX_FRAME_RELATED_P (insn) = 1;
10268 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
10269 dwarf, REG_NOTES (insn));
10271 else
10273 /* Store the args on the stack. */
10274 if (cfun->machine->uses_anonymous_args)
10275 insn = emit_multi_reg_push
10276 ((0xf0 >> (args_to_push / 4)) & 0xf);
10277 else
10278 insn = emit_insn
10279 (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
10280 GEN_INT (- args_to_push)));
10282 RTX_FRAME_RELATED_P (insn) = 1;
10284 saved_pretend_args = 1;
10285 fp_offset = args_to_push;
10286 args_to_push = 0;
10288 /* Now reuse r3 to preserve IP. */
10289 insn = gen_rtx_REG (SImode, 3);
10290 insn = gen_rtx_SET (SImode, insn, ip_rtx);
10291 (void) emit_insn (insn);
10295 if (fp_offset)
10297 insn = gen_rtx_PLUS (SImode, stack_pointer_rtx, GEN_INT (fp_offset));
10298 insn = gen_rtx_SET (SImode, ip_rtx, insn);
10300 else
10301 insn = gen_movsi (ip_rtx, stack_pointer_rtx);
10303 insn = emit_insn (insn);
10304 RTX_FRAME_RELATED_P (insn) = 1;
10307 if (args_to_push)
10309 /* Push the argument registers, or reserve space for them. */
10310 if (cfun->machine->uses_anonymous_args)
10311 insn = emit_multi_reg_push
10312 ((0xf0 >> (args_to_push / 4)) & 0xf);
10313 else
10314 insn = emit_insn
10315 (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
10316 GEN_INT (- args_to_push)));
10317 RTX_FRAME_RELATED_P (insn) = 1;
10320 /* If this is an interrupt service routine, and the link register
10321 is going to be pushed, and we are not creating a stack frame,
10322 (which would involve an extra push of IP and a pop in the epilogue)
10323 subtracting four from LR now will mean that the function return
10324 can be done with a single instruction. */
10325 if ((func_type == ARM_FT_ISR || func_type == ARM_FT_FIQ)
10326 && (live_regs_mask & (1 << LR_REGNUM)) != 0
10327 && ! frame_pointer_needed)
10328 emit_insn (gen_rtx_SET (SImode,
10329 gen_rtx_REG (SImode, LR_REGNUM),
10330 gen_rtx_PLUS (SImode,
10331 gen_rtx_REG (SImode, LR_REGNUM),
10332 GEN_INT (-4))));
10334 if (live_regs_mask)
10336 insn = emit_multi_reg_push (live_regs_mask);
10337 saved_regs += bit_count (live_regs_mask) * 4;
10338 RTX_FRAME_RELATED_P (insn) = 1;
10341 if (TARGET_IWMMXT)
10342 for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
10343 if (regs_ever_live[reg] && ! call_used_regs [reg])
10345 insn = gen_rtx_PRE_DEC (V2SImode, stack_pointer_rtx);
10346 insn = gen_rtx_MEM (V2SImode, insn);
10347 insn = emit_insn (gen_rtx_SET (VOIDmode, insn,
10348 gen_rtx_REG (V2SImode, reg)));
10349 RTX_FRAME_RELATED_P (insn) = 1;
10350 saved_regs += 8;
10353 if (! IS_VOLATILE (func_type))
10355 int start_reg;
10357 /* Save any floating point call-saved registers used by this
10358 function. */
10359 if (arm_fpu_arch == FPUTYPE_FPA_EMU2)
10361 for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
10362 if (regs_ever_live[reg] && !call_used_regs[reg])
10364 insn = gen_rtx_PRE_DEC (XFmode, stack_pointer_rtx);
10365 insn = gen_rtx_MEM (XFmode, insn);
10366 insn = emit_insn (gen_rtx_SET (VOIDmode, insn,
10367 gen_rtx_REG (XFmode, reg)));
10368 RTX_FRAME_RELATED_P (insn) = 1;
10369 saved_regs += 12;
10372 else
10374 start_reg = LAST_FPA_REGNUM;
10376 for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
10378 if (regs_ever_live[reg] && !call_used_regs[reg])
10380 if (start_reg - reg == 3)
10382 insn = emit_sfm (reg, 4);
10383 RTX_FRAME_RELATED_P (insn) = 1;
10384 saved_regs += 48;
10385 start_reg = reg - 1;
10388 else
10390 if (start_reg != reg)
10392 insn = emit_sfm (reg + 1, start_reg - reg);
10393 RTX_FRAME_RELATED_P (insn) = 1;
10394 saved_regs += (start_reg - reg) * 12;
10396 start_reg = reg - 1;
10400 if (start_reg != reg)
10402 insn = emit_sfm (reg + 1, start_reg - reg);
10403 saved_regs += (start_reg - reg) * 12;
10404 RTX_FRAME_RELATED_P (insn) = 1;
10407 if (TARGET_HARD_FLOAT && TARGET_VFP)
10409 start_reg = FIRST_VFP_REGNUM;
10411 for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
10413 if ((!regs_ever_live[reg] || call_used_regs[reg])
10414 && (!regs_ever_live[reg + 1] || call_used_regs[reg + 1]))
10416 if (start_reg != reg)
10417 saved_regs += vfp_emit_fstmx (start_reg,
10418 (reg - start_reg) / 2);
10419 start_reg = reg + 2;
10422 if (start_reg != reg)
10423 saved_regs += vfp_emit_fstmx (start_reg,
10424 (reg - start_reg) / 2);
10428 if (frame_pointer_needed)
10430 /* Create the new frame pointer. */
10431 insn = GEN_INT (-(4 + args_to_push + fp_offset));
10432 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx, ip_rtx, insn));
10433 RTX_FRAME_RELATED_P (insn) = 1;
10435 if (IS_NESTED (func_type))
10437 /* Recover the static chain register. */
10438 if (regs_ever_live [3] == 0
10439 || saved_pretend_args)
10440 insn = gen_rtx_REG (SImode, 3);
10441 else /* if (current_function_pretend_args_size == 0) */
10443 insn = gen_rtx_PLUS (SImode, hard_frame_pointer_rtx,
10444 GEN_INT (4));
10445 insn = gen_rtx_MEM (SImode, insn);
10448 emit_insn (gen_rtx_SET (SImode, ip_rtx, insn));
10449 /* Add a USE to stop propagate_one_insn() from barfing. */
10450 emit_insn (gen_prologue_use (ip_rtx));
10454 offsets = arm_get_frame_offsets ();
10455 if (offsets->outgoing_args != offsets->saved_args + saved_regs)
10457 /* This add can produce multiple insns for a large constant, so we
10458 need to get tricky. */
10459 rtx last = get_last_insn ();
10461 amount = GEN_INT (offsets->saved_args + saved_regs
10462 - offsets->outgoing_args);
10464 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
10465 amount));
10468 last = last ? NEXT_INSN (last) : get_insns ();
10469 RTX_FRAME_RELATED_P (last) = 1;
10471 while (last != insn);
10473 /* If the frame pointer is needed, emit a special barrier that
10474 will prevent the scheduler from moving stores to the frame
10475 before the stack adjustment. */
10476 if (frame_pointer_needed)
10477 insn = emit_insn (gen_stack_tie (stack_pointer_rtx,
10478 hard_frame_pointer_rtx));
10482 if (flag_pic)
10483 arm_load_pic_register (INVALID_REGNUM);
10485 /* If we are profiling, make sure no instructions are scheduled before
10486 the call to mcount. Similarly if the user has requested no
10487 scheduling in the prolog. */
10488 if (current_function_profile || !TARGET_SCHED_PROLOG)
10489 emit_insn (gen_blockage ());
10491 /* If the link register is being kept alive, with the return address in it,
10492 then make sure that it does not get reused by the ce2 pass. */
10493 if ((live_regs_mask & (1 << LR_REGNUM)) == 0)
10495 emit_insn (gen_prologue_use (gen_rtx_REG (SImode, LR_REGNUM)));
10496 cfun->machine->lr_save_eliminated = 1;
10500 /* If CODE is 'd', then the X is a condition operand and the instruction
10501 should only be executed if the condition is true.
10502 if CODE is 'D', then the X is a condition operand and the instruction
10503 should only be executed if the condition is false: however, if the mode
10504 of the comparison is CCFPEmode, then always execute the instruction -- we
10505 do this because in these circumstances !GE does not necessarily imply LT;
10506 in these cases the instruction pattern will take care to make sure that
10507 an instruction containing %d will follow, thereby undoing the effects of
10508 doing this instruction unconditionally.
10509 If CODE is 'N' then X is a floating point operand that must be negated
10510 before output.
10511 If CODE is 'B' then output a bitwise inverted value of X (a const int).
10512 If X is a REG and CODE is `M', output a ldm/stm style multi-reg. */
10513 void
10514 arm_print_operand (FILE *stream, rtx x, int code)
10516 switch (code)
10518 case '@':
10519 fputs (ASM_COMMENT_START, stream);
10520 return;
10522 case '_':
10523 fputs (user_label_prefix, stream);
10524 return;
10526 case '|':
10527 fputs (REGISTER_PREFIX, stream);
10528 return;
10530 case '?':
10531 if (arm_ccfsm_state == 3 || arm_ccfsm_state == 4)
10533 if (TARGET_THUMB)
10535 output_operand_lossage ("predicated Thumb instruction");
10536 break;
10538 if (current_insn_predicate != NULL)
10540 output_operand_lossage
10541 ("predicated instruction in conditional sequence");
10542 break;
10545 fputs (arm_condition_codes[arm_current_cc], stream);
10547 else if (current_insn_predicate)
10549 enum arm_cond_code code;
10551 if (TARGET_THUMB)
10553 output_operand_lossage ("predicated Thumb instruction");
10554 break;
10557 code = get_arm_condition_code (current_insn_predicate);
10558 fputs (arm_condition_codes[code], stream);
10560 return;
10562 case 'N':
10564 REAL_VALUE_TYPE r;
10565 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
10566 r = REAL_VALUE_NEGATE (r);
10567 fprintf (stream, "%s", fp_const_from_val (&r));
10569 return;
10571 case 'B':
10572 if (GET_CODE (x) == CONST_INT)
10574 HOST_WIDE_INT val;
10575 val = ARM_SIGN_EXTEND (~INTVAL (x));
10576 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, val);
10578 else
10580 putc ('~', stream);
10581 output_addr_const (stream, x);
10583 return;
10585 case 'i':
10586 fprintf (stream, "%s", arithmetic_instr (x, 1));
10587 return;
10589 /* Truncate Cirrus shift counts. */
10590 case 's':
10591 if (GET_CODE (x) == CONST_INT)
10593 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 0x3f);
10594 return;
10596 arm_print_operand (stream, x, 0);
10597 return;
10599 case 'I':
10600 fprintf (stream, "%s", arithmetic_instr (x, 0));
10601 return;
10603 case 'S':
10605 HOST_WIDE_INT val;
10606 const char * shift = shift_op (x, &val);
10608 if (shift)
10610 fprintf (stream, ", %s ", shift_op (x, &val));
10611 if (val == -1)
10612 arm_print_operand (stream, XEXP (x, 1), 0);
10613 else
10614 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
10617 return;
10619 /* An explanation of the 'Q', 'R' and 'H' register operands:
10621 In a pair of registers containing a DI or DF value the 'Q'
10622 operand returns the register number of the register containing
10623 the least significant part of the value. The 'R' operand returns
10624 the register number of the register containing the most
10625 significant part of the value.
10627 The 'H' operand returns the higher of the two register numbers.
10628 On a run where WORDS_BIG_ENDIAN is true the 'H' operand is the
10629 same as the 'Q' operand, since the most significant part of the
10630 value is held in the lower number register. The reverse is true
10631 on systems where WORDS_BIG_ENDIAN is false.
10633 The purpose of these operands is to distinguish between cases
10634 where the endian-ness of the values is important (for example
10635 when they are added together), and cases where the endian-ness
10636 is irrelevant, but the order of register operations is important.
10637 For example when loading a value from memory into a register
10638 pair, the endian-ness does not matter. Provided that the value
10639 from the lower memory address is put into the lower numbered
10640 register, and the value from the higher address is put into the
10641 higher numbered register, the load will work regardless of whether
10642 the value being loaded is big-wordian or little-wordian. The
10643 order of the two register loads can matter however, if the address
10644 of the memory location is actually held in one of the registers
10645 being overwritten by the load. */
10646 case 'Q':
10647 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
10649 output_operand_lossage ("invalid operand for code '%c'", code);
10650 return;
10653 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 1 : 0));
10654 return;
10656 case 'R':
10657 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
10659 output_operand_lossage ("invalid operand for code '%c'", code);
10660 return;
10663 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 0 : 1));
10664 return;
10666 case 'H':
10667 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
10669 output_operand_lossage ("invalid operand for code '%c'", code);
10670 return;
10673 asm_fprintf (stream, "%r", REGNO (x) + 1);
10674 return;
10676 case 'm':
10677 asm_fprintf (stream, "%r",
10678 GET_CODE (XEXP (x, 0)) == REG
10679 ? REGNO (XEXP (x, 0)) : REGNO (XEXP (XEXP (x, 0), 0)));
10680 return;
10682 case 'M':
10683 asm_fprintf (stream, "{%r-%r}",
10684 REGNO (x),
10685 REGNO (x) + ARM_NUM_REGS (GET_MODE (x)) - 1);
10686 return;
10688 case 'd':
10689 /* CONST_TRUE_RTX means always -- that's the default. */
10690 if (x == const_true_rtx)
10691 return;
10693 if (!COMPARISON_P (x))
10695 output_operand_lossage ("invalid operand for code '%c'", code);
10696 return;
10699 fputs (arm_condition_codes[get_arm_condition_code (x)],
10700 stream);
10701 return;
10703 case 'D':
10704 /* CONST_TRUE_RTX means not always -- i.e. never. We shouldn't ever
10705 want to do that. */
10706 if (x == const_true_rtx)
10708 output_operand_lossage ("instruction never exectued");
10709 return;
10711 if (!COMPARISON_P (x))
10713 output_operand_lossage ("invalid operand for code '%c'", code);
10714 return;
10717 fputs (arm_condition_codes[ARM_INVERSE_CONDITION_CODE
10718 (get_arm_condition_code (x))],
10719 stream);
10720 return;
10722 /* Cirrus registers can be accessed in a variety of ways:
10723 single floating point (f)
10724 double floating point (d)
10725 32bit integer (fx)
10726 64bit integer (dx). */
10727 case 'W': /* Cirrus register in F mode. */
10728 case 'X': /* Cirrus register in D mode. */
10729 case 'Y': /* Cirrus register in FX mode. */
10730 case 'Z': /* Cirrus register in DX mode. */
10731 gcc_assert (GET_CODE (x) == REG
10732 && REGNO_REG_CLASS (REGNO (x)) == CIRRUS_REGS);
10734 fprintf (stream, "mv%s%s",
10735 code == 'W' ? "f"
10736 : code == 'X' ? "d"
10737 : code == 'Y' ? "fx" : "dx", reg_names[REGNO (x)] + 2);
10739 return;
10741 /* Print cirrus register in the mode specified by the register's mode. */
10742 case 'V':
10744 int mode = GET_MODE (x);
10746 if (GET_CODE (x) != REG || REGNO_REG_CLASS (REGNO (x)) != CIRRUS_REGS)
10748 output_operand_lossage ("invalid operand for code '%c'", code);
10749 return;
10752 fprintf (stream, "mv%s%s",
10753 mode == DFmode ? "d"
10754 : mode == SImode ? "fx"
10755 : mode == DImode ? "dx"
10756 : "f", reg_names[REGNO (x)] + 2);
10758 return;
10761 case 'U':
10762 if (GET_CODE (x) != REG
10763 || REGNO (x) < FIRST_IWMMXT_GR_REGNUM
10764 || REGNO (x) > LAST_IWMMXT_GR_REGNUM)
10765 /* Bad value for wCG register number. */
10767 output_operand_lossage ("invalid operand for code '%c'", code);
10768 return;
10771 else
10772 fprintf (stream, "%d", REGNO (x) - FIRST_IWMMXT_GR_REGNUM);
10773 return;
10775 /* Print an iWMMXt control register name. */
10776 case 'w':
10777 if (GET_CODE (x) != CONST_INT
10778 || INTVAL (x) < 0
10779 || INTVAL (x) >= 16)
10780 /* Bad value for wC register number. */
10782 output_operand_lossage ("invalid operand for code '%c'", code);
10783 return;
10786 else
10788 static const char * wc_reg_names [16] =
10790 "wCID", "wCon", "wCSSF", "wCASF",
10791 "wC4", "wC5", "wC6", "wC7",
10792 "wCGR0", "wCGR1", "wCGR2", "wCGR3",
10793 "wC12", "wC13", "wC14", "wC15"
10796 fprintf (stream, wc_reg_names [INTVAL (x)]);
10798 return;
10800 /* Print a VFP double precision register name. */
10801 case 'P':
10803 int mode = GET_MODE (x);
10804 int num;
10806 if (mode != DImode && mode != DFmode)
10808 output_operand_lossage ("invalid operand for code '%c'", code);
10809 return;
10812 if (GET_CODE (x) != REG
10813 || !IS_VFP_REGNUM (REGNO (x)))
10815 output_operand_lossage ("invalid operand for code '%c'", code);
10816 return;
10819 num = REGNO(x) - FIRST_VFP_REGNUM;
10820 if (num & 1)
10822 output_operand_lossage ("invalid operand for code '%c'", code);
10823 return;
10826 fprintf (stream, "d%d", num >> 1);
10828 return;
10830 default:
10831 if (x == 0)
10833 output_operand_lossage ("missing operand");
10834 return;
10837 switch (GET_CODE (x))
10839 case REG:
10840 asm_fprintf (stream, "%r", REGNO (x));
10841 break;
10843 case MEM:
10844 output_memory_reference_mode = GET_MODE (x);
10845 output_address (XEXP (x, 0));
10846 break;
10848 case CONST_DOUBLE:
10849 fprintf (stream, "#%s", fp_immediate_constant (x));
10850 break;
10852 default:
10853 gcc_assert (GET_CODE (x) != NEG);
10854 fputc ('#', stream);
10855 output_addr_const (stream, x);
10856 break;
10861 #ifndef AOF_ASSEMBLER
10862 /* Target hook for assembling integer objects. The ARM version needs to
10863 handle word-sized values specially. */
10864 static bool
10865 arm_assemble_integer (rtx x, unsigned int size, int aligned_p)
10867 if (size == UNITS_PER_WORD && aligned_p)
10869 fputs ("\t.word\t", asm_out_file);
10870 output_addr_const (asm_out_file, x);
10872 /* Mark symbols as position independent. We only do this in the
10873 .text segment, not in the .data segment. */
10874 if (NEED_GOT_RELOC && flag_pic && making_const_table &&
10875 (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF))
10877 if (GET_CODE (x) == SYMBOL_REF
10878 && (CONSTANT_POOL_ADDRESS_P (x)
10879 || SYMBOL_REF_LOCAL_P (x)))
10880 fputs ("(GOTOFF)", asm_out_file);
10881 else if (GET_CODE (x) == LABEL_REF)
10882 fputs ("(GOTOFF)", asm_out_file);
10883 else
10884 fputs ("(GOT)", asm_out_file);
10886 fputc ('\n', asm_out_file);
10887 return true;
10890 if (arm_vector_mode_supported_p (GET_MODE (x)))
10892 int i, units;
10894 gcc_assert (GET_CODE (x) == CONST_VECTOR);
10896 units = CONST_VECTOR_NUNITS (x);
10898 switch (GET_MODE (x))
10900 case V2SImode: size = 4; break;
10901 case V4HImode: size = 2; break;
10902 case V8QImode: size = 1; break;
10903 default:
10904 gcc_unreachable ();
10907 for (i = 0; i < units; i++)
10909 rtx elt;
10911 elt = CONST_VECTOR_ELT (x, i);
10912 assemble_integer
10913 (elt, size, i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT, 1);
10916 return true;
10919 return default_assemble_integer (x, size, aligned_p);
10923 /* Add a function to the list of static constructors. */
10925 static void
10926 arm_elf_asm_constructor (rtx symbol, int priority ATTRIBUTE_UNUSED)
10928 if (!TARGET_AAPCS_BASED)
10930 default_named_section_asm_out_constructor (symbol, priority);
10931 return;
10934 /* Put these in the .init_array section, using a special relocation. */
10935 ctors_section ();
10936 assemble_align (POINTER_SIZE);
10937 fputs ("\t.word\t", asm_out_file);
10938 output_addr_const (asm_out_file, symbol);
10939 fputs ("(target1)\n", asm_out_file);
10941 #endif
10943 /* A finite state machine takes care of noticing whether or not instructions
10944 can be conditionally executed, and thus decrease execution time and code
10945 size by deleting branch instructions. The fsm is controlled by
10946 final_prescan_insn, and controls the actions of ASM_OUTPUT_OPCODE. */
10948 /* The state of the fsm controlling condition codes are:
10949 0: normal, do nothing special
10950 1: make ASM_OUTPUT_OPCODE not output this instruction
10951 2: make ASM_OUTPUT_OPCODE not output this instruction
10952 3: make instructions conditional
10953 4: make instructions conditional
10955 State transitions (state->state by whom under condition):
10956 0 -> 1 final_prescan_insn if the `target' is a label
10957 0 -> 2 final_prescan_insn if the `target' is an unconditional branch
10958 1 -> 3 ASM_OUTPUT_OPCODE after not having output the conditional branch
10959 2 -> 4 ASM_OUTPUT_OPCODE after not having output the conditional branch
10960 3 -> 0 (*targetm.asm_out.internal_label) if the `target' label is reached
10961 (the target label has CODE_LABEL_NUMBER equal to arm_target_label).
10962 4 -> 0 final_prescan_insn if the `target' unconditional branch is reached
10963 (the target insn is arm_target_insn).
10965 If the jump clobbers the conditions then we use states 2 and 4.
10967 A similar thing can be done with conditional return insns.
10969 XXX In case the `target' is an unconditional branch, this conditionalising
10970 of the instructions always reduces code size, but not always execution
10971 time. But then, I want to reduce the code size to somewhere near what
10972 /bin/cc produces. */
10974 /* Returns the index of the ARM condition code string in
10975 `arm_condition_codes'. COMPARISON should be an rtx like
10976 `(eq (...) (...))'. */
10977 static enum arm_cond_code
10978 get_arm_condition_code (rtx comparison)
10980 enum machine_mode mode = GET_MODE (XEXP (comparison, 0));
10981 int code;
10982 enum rtx_code comp_code = GET_CODE (comparison);
10984 if (GET_MODE_CLASS (mode) != MODE_CC)
10985 mode = SELECT_CC_MODE (comp_code, XEXP (comparison, 0),
10986 XEXP (comparison, 1));
10988 switch (mode)
10990 case CC_DNEmode: code = ARM_NE; goto dominance;
10991 case CC_DEQmode: code = ARM_EQ; goto dominance;
10992 case CC_DGEmode: code = ARM_GE; goto dominance;
10993 case CC_DGTmode: code = ARM_GT; goto dominance;
10994 case CC_DLEmode: code = ARM_LE; goto dominance;
10995 case CC_DLTmode: code = ARM_LT; goto dominance;
10996 case CC_DGEUmode: code = ARM_CS; goto dominance;
10997 case CC_DGTUmode: code = ARM_HI; goto dominance;
10998 case CC_DLEUmode: code = ARM_LS; goto dominance;
10999 case CC_DLTUmode: code = ARM_CC;
11001 dominance:
11002 gcc_assert (comp_code == EQ || comp_code == NE);
11004 if (comp_code == EQ)
11005 return ARM_INVERSE_CONDITION_CODE (code);
11006 return code;
11008 case CC_NOOVmode:
11009 switch (comp_code)
11011 case NE: return ARM_NE;
11012 case EQ: return ARM_EQ;
11013 case GE: return ARM_PL;
11014 case LT: return ARM_MI;
11015 default: gcc_unreachable ();
11018 case CC_Zmode:
11019 switch (comp_code)
11021 case NE: return ARM_NE;
11022 case EQ: return ARM_EQ;
11023 default: gcc_unreachable ();
11026 case CC_Nmode:
11027 switch (comp_code)
11029 case NE: return ARM_MI;
11030 case EQ: return ARM_PL;
11031 default: gcc_unreachable ();
11034 case CCFPEmode:
11035 case CCFPmode:
11036 /* These encodings assume that AC=1 in the FPA system control
11037 byte. This allows us to handle all cases except UNEQ and
11038 LTGT. */
11039 switch (comp_code)
11041 case GE: return ARM_GE;
11042 case GT: return ARM_GT;
11043 case LE: return ARM_LS;
11044 case LT: return ARM_MI;
11045 case NE: return ARM_NE;
11046 case EQ: return ARM_EQ;
11047 case ORDERED: return ARM_VC;
11048 case UNORDERED: return ARM_VS;
11049 case UNLT: return ARM_LT;
11050 case UNLE: return ARM_LE;
11051 case UNGT: return ARM_HI;
11052 case UNGE: return ARM_PL;
11053 /* UNEQ and LTGT do not have a representation. */
11054 case UNEQ: /* Fall through. */
11055 case LTGT: /* Fall through. */
11056 default: gcc_unreachable ();
11059 case CC_SWPmode:
11060 switch (comp_code)
11062 case NE: return ARM_NE;
11063 case EQ: return ARM_EQ;
11064 case GE: return ARM_LE;
11065 case GT: return ARM_LT;
11066 case LE: return ARM_GE;
11067 case LT: return ARM_GT;
11068 case GEU: return ARM_LS;
11069 case GTU: return ARM_CC;
11070 case LEU: return ARM_CS;
11071 case LTU: return ARM_HI;
11072 default: gcc_unreachable ();
11075 case CC_Cmode:
11076 switch (comp_code)
11078 case LTU: return ARM_CS;
11079 case GEU: return ARM_CC;
11080 default: gcc_unreachable ();
11083 case CCmode:
11084 switch (comp_code)
11086 case NE: return ARM_NE;
11087 case EQ: return ARM_EQ;
11088 case GE: return ARM_GE;
11089 case GT: return ARM_GT;
11090 case LE: return ARM_LE;
11091 case LT: return ARM_LT;
11092 case GEU: return ARM_CS;
11093 case GTU: return ARM_HI;
11094 case LEU: return ARM_LS;
11095 case LTU: return ARM_CC;
11096 default: gcc_unreachable ();
11099 default: gcc_unreachable ();
11103 void
11104 arm_final_prescan_insn (rtx insn)
11106 /* BODY will hold the body of INSN. */
11107 rtx body = PATTERN (insn);
11109 /* This will be 1 if trying to repeat the trick, and things need to be
11110 reversed if it appears to fail. */
11111 int reverse = 0;
11113 /* JUMP_CLOBBERS will be one implies that the conditions if a branch is
11114 taken are clobbered, even if the rtl suggests otherwise. It also
11115 means that we have to grub around within the jump expression to find
11116 out what the conditions are when the jump isn't taken. */
11117 int jump_clobbers = 0;
11119 /* If we start with a return insn, we only succeed if we find another one. */
11120 int seeking_return = 0;
11122 /* START_INSN will hold the insn from where we start looking. This is the
11123 first insn after the following code_label if REVERSE is true. */
11124 rtx start_insn = insn;
11126 /* If in state 4, check if the target branch is reached, in order to
11127 change back to state 0. */
11128 if (arm_ccfsm_state == 4)
11130 if (insn == arm_target_insn)
11132 arm_target_insn = NULL;
11133 arm_ccfsm_state = 0;
11135 return;
11138 /* If in state 3, it is possible to repeat the trick, if this insn is an
11139 unconditional branch to a label, and immediately following this branch
11140 is the previous target label which is only used once, and the label this
11141 branch jumps to is not too far off. */
11142 if (arm_ccfsm_state == 3)
11144 if (simplejump_p (insn))
11146 start_insn = next_nonnote_insn (start_insn);
11147 if (GET_CODE (start_insn) == BARRIER)
11149 /* XXX Isn't this always a barrier? */
11150 start_insn = next_nonnote_insn (start_insn);
11152 if (GET_CODE (start_insn) == CODE_LABEL
11153 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
11154 && LABEL_NUSES (start_insn) == 1)
11155 reverse = TRUE;
11156 else
11157 return;
11159 else if (GET_CODE (body) == RETURN)
11161 start_insn = next_nonnote_insn (start_insn);
11162 if (GET_CODE (start_insn) == BARRIER)
11163 start_insn = next_nonnote_insn (start_insn);
11164 if (GET_CODE (start_insn) == CODE_LABEL
11165 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
11166 && LABEL_NUSES (start_insn) == 1)
11168 reverse = TRUE;
11169 seeking_return = 1;
11171 else
11172 return;
11174 else
11175 return;
11178 gcc_assert (!arm_ccfsm_state || reverse);
11179 if (GET_CODE (insn) != JUMP_INSN)
11180 return;
11182 /* This jump might be paralleled with a clobber of the condition codes
11183 the jump should always come first */
11184 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
11185 body = XVECEXP (body, 0, 0);
11187 if (reverse
11188 || (GET_CODE (body) == SET && GET_CODE (SET_DEST (body)) == PC
11189 && GET_CODE (SET_SRC (body)) == IF_THEN_ELSE))
11191 int insns_skipped;
11192 int fail = FALSE, succeed = FALSE;
11193 /* Flag which part of the IF_THEN_ELSE is the LABEL_REF. */
11194 int then_not_else = TRUE;
11195 rtx this_insn = start_insn, label = 0;
11197 /* If the jump cannot be done with one instruction, we cannot
11198 conditionally execute the instruction in the inverse case. */
11199 if (get_attr_conds (insn) == CONDS_JUMP_CLOB)
11201 jump_clobbers = 1;
11202 return;
11205 /* Register the insn jumped to. */
11206 if (reverse)
11208 if (!seeking_return)
11209 label = XEXP (SET_SRC (body), 0);
11211 else if (GET_CODE (XEXP (SET_SRC (body), 1)) == LABEL_REF)
11212 label = XEXP (XEXP (SET_SRC (body), 1), 0);
11213 else if (GET_CODE (XEXP (SET_SRC (body), 2)) == LABEL_REF)
11215 label = XEXP (XEXP (SET_SRC (body), 2), 0);
11216 then_not_else = FALSE;
11218 else if (GET_CODE (XEXP (SET_SRC (body), 1)) == RETURN)
11219 seeking_return = 1;
11220 else if (GET_CODE (XEXP (SET_SRC (body), 2)) == RETURN)
11222 seeking_return = 1;
11223 then_not_else = FALSE;
11225 else
11226 gcc_unreachable ();
11228 /* See how many insns this branch skips, and what kind of insns. If all
11229 insns are okay, and the label or unconditional branch to the same
11230 label is not too far away, succeed. */
11231 for (insns_skipped = 0;
11232 !fail && !succeed && insns_skipped++ < max_insns_skipped;)
11234 rtx scanbody;
11236 this_insn = next_nonnote_insn (this_insn);
11237 if (!this_insn)
11238 break;
11240 switch (GET_CODE (this_insn))
11242 case CODE_LABEL:
11243 /* Succeed if it is the target label, otherwise fail since
11244 control falls in from somewhere else. */
11245 if (this_insn == label)
11247 if (jump_clobbers)
11249 arm_ccfsm_state = 2;
11250 this_insn = next_nonnote_insn (this_insn);
11252 else
11253 arm_ccfsm_state = 1;
11254 succeed = TRUE;
11256 else
11257 fail = TRUE;
11258 break;
11260 case BARRIER:
11261 /* Succeed if the following insn is the target label.
11262 Otherwise fail.
11263 If return insns are used then the last insn in a function
11264 will be a barrier. */
11265 this_insn = next_nonnote_insn (this_insn);
11266 if (this_insn && this_insn == label)
11268 if (jump_clobbers)
11270 arm_ccfsm_state = 2;
11271 this_insn = next_nonnote_insn (this_insn);
11273 else
11274 arm_ccfsm_state = 1;
11275 succeed = TRUE;
11277 else
11278 fail = TRUE;
11279 break;
11281 case CALL_INSN:
11282 /* The AAPCS says that conditional calls should not be
11283 used since they make interworking inefficient (the
11284 linker can't transform BL<cond> into BLX). That's
11285 only a problem if the machine has BLX. */
11286 if (arm_arch5)
11288 fail = TRUE;
11289 break;
11292 /* Succeed if the following insn is the target label, or
11293 if the following two insns are a barrier and the
11294 target label. */
11295 this_insn = next_nonnote_insn (this_insn);
11296 if (this_insn && GET_CODE (this_insn) == BARRIER)
11297 this_insn = next_nonnote_insn (this_insn);
11299 if (this_insn && this_insn == label
11300 && insns_skipped < max_insns_skipped)
11302 if (jump_clobbers)
11304 arm_ccfsm_state = 2;
11305 this_insn = next_nonnote_insn (this_insn);
11307 else
11308 arm_ccfsm_state = 1;
11309 succeed = TRUE;
11311 else
11312 fail = TRUE;
11313 break;
11315 case JUMP_INSN:
11316 /* If this is an unconditional branch to the same label, succeed.
11317 If it is to another label, do nothing. If it is conditional,
11318 fail. */
11319 /* XXX Probably, the tests for SET and the PC are
11320 unnecessary. */
11322 scanbody = PATTERN (this_insn);
11323 if (GET_CODE (scanbody) == SET
11324 && GET_CODE (SET_DEST (scanbody)) == PC)
11326 if (GET_CODE (SET_SRC (scanbody)) == LABEL_REF
11327 && XEXP (SET_SRC (scanbody), 0) == label && !reverse)
11329 arm_ccfsm_state = 2;
11330 succeed = TRUE;
11332 else if (GET_CODE (SET_SRC (scanbody)) == IF_THEN_ELSE)
11333 fail = TRUE;
11335 /* Fail if a conditional return is undesirable (e.g. on a
11336 StrongARM), but still allow this if optimizing for size. */
11337 else if (GET_CODE (scanbody) == RETURN
11338 && !use_return_insn (TRUE, NULL)
11339 && !optimize_size)
11340 fail = TRUE;
11341 else if (GET_CODE (scanbody) == RETURN
11342 && seeking_return)
11344 arm_ccfsm_state = 2;
11345 succeed = TRUE;
11347 else if (GET_CODE (scanbody) == PARALLEL)
11349 switch (get_attr_conds (this_insn))
11351 case CONDS_NOCOND:
11352 break;
11353 default:
11354 fail = TRUE;
11355 break;
11358 else
11359 fail = TRUE; /* Unrecognized jump (e.g. epilogue). */
11361 break;
11363 case INSN:
11364 /* Instructions using or affecting the condition codes make it
11365 fail. */
11366 scanbody = PATTERN (this_insn);
11367 if (!(GET_CODE (scanbody) == SET
11368 || GET_CODE (scanbody) == PARALLEL)
11369 || get_attr_conds (this_insn) != CONDS_NOCOND)
11370 fail = TRUE;
11372 /* A conditional cirrus instruction must be followed by
11373 a non Cirrus instruction. However, since we
11374 conditionalize instructions in this function and by
11375 the time we get here we can't add instructions
11376 (nops), because shorten_branches() has already been
11377 called, we will disable conditionalizing Cirrus
11378 instructions to be safe. */
11379 if (GET_CODE (scanbody) != USE
11380 && GET_CODE (scanbody) != CLOBBER
11381 && get_attr_cirrus (this_insn) != CIRRUS_NOT)
11382 fail = TRUE;
11383 break;
11385 default:
11386 break;
11389 if (succeed)
11391 if ((!seeking_return) && (arm_ccfsm_state == 1 || reverse))
11392 arm_target_label = CODE_LABEL_NUMBER (label);
11393 else
11395 gcc_assert (seeking_return || arm_ccfsm_state == 2);
11397 while (this_insn && GET_CODE (PATTERN (this_insn)) == USE)
11399 this_insn = next_nonnote_insn (this_insn);
11400 gcc_assert (!this_insn
11401 || (GET_CODE (this_insn) != BARRIER
11402 && GET_CODE (this_insn) != CODE_LABEL));
11404 if (!this_insn)
11406 /* Oh, dear! we ran off the end.. give up. */
11407 recog (PATTERN (insn), insn, NULL);
11408 arm_ccfsm_state = 0;
11409 arm_target_insn = NULL;
11410 return;
11412 arm_target_insn = this_insn;
11414 if (jump_clobbers)
11416 gcc_assert (!reverse);
11417 arm_current_cc =
11418 get_arm_condition_code (XEXP (XEXP (XEXP (SET_SRC (body),
11419 0), 0), 1));
11420 if (GET_CODE (XEXP (XEXP (SET_SRC (body), 0), 0)) == AND)
11421 arm_current_cc = ARM_INVERSE_CONDITION_CODE (arm_current_cc);
11422 if (GET_CODE (XEXP (SET_SRC (body), 0)) == NE)
11423 arm_current_cc = ARM_INVERSE_CONDITION_CODE (arm_current_cc);
11425 else
11427 /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from
11428 what it was. */
11429 if (!reverse)
11430 arm_current_cc = get_arm_condition_code (XEXP (SET_SRC (body),
11431 0));
11434 if (reverse || then_not_else)
11435 arm_current_cc = ARM_INVERSE_CONDITION_CODE (arm_current_cc);
11438 /* Restore recog_data (getting the attributes of other insns can
11439 destroy this array, but final.c assumes that it remains intact
11440 across this call; since the insn has been recognized already we
11441 call recog direct). */
11442 recog (PATTERN (insn), insn, NULL);
11446 /* Returns true if REGNO is a valid register
11447 for holding a quantity of type MODE. */
11449 arm_hard_regno_mode_ok (unsigned int regno, enum machine_mode mode)
11451 if (GET_MODE_CLASS (mode) == MODE_CC)
11452 return regno == CC_REGNUM || regno == VFPCC_REGNUM;
11454 if (TARGET_THUMB)
11455 /* For the Thumb we only allow values bigger than SImode in
11456 registers 0 - 6, so that there is always a second low
11457 register available to hold the upper part of the value.
11458 We probably we ought to ensure that the register is the
11459 start of an even numbered register pair. */
11460 return (ARM_NUM_REGS (mode) < 2) || (regno < LAST_LO_REGNUM);
11462 if (IS_CIRRUS_REGNUM (regno))
11463 /* We have outlawed SI values in Cirrus registers because they
11464 reside in the lower 32 bits, but SF values reside in the
11465 upper 32 bits. This causes gcc all sorts of grief. We can't
11466 even split the registers into pairs because Cirrus SI values
11467 get sign extended to 64bits-- aldyh. */
11468 return (GET_MODE_CLASS (mode) == MODE_FLOAT) || (mode == DImode);
11470 if (IS_VFP_REGNUM (regno))
11472 if (mode == SFmode || mode == SImode)
11473 return TRUE;
11475 /* DFmode values are only valid in even register pairs. */
11476 if (mode == DFmode)
11477 return ((regno - FIRST_VFP_REGNUM) & 1) == 0;
11478 return FALSE;
11481 if (IS_IWMMXT_GR_REGNUM (regno))
11482 return mode == SImode;
11484 if (IS_IWMMXT_REGNUM (regno))
11485 return VALID_IWMMXT_REG_MODE (mode);
11487 /* We allow any value to be stored in the general registers.
11488 Restrict doubleword quantities to even register pairs so that we can
11489 use ldrd. */
11490 if (regno <= LAST_ARM_REGNUM)
11491 return !(TARGET_LDRD && GET_MODE_SIZE (mode) > 4 && (regno & 1) != 0);
11493 if ( regno == FRAME_POINTER_REGNUM
11494 || regno == ARG_POINTER_REGNUM)
11495 /* We only allow integers in the fake hard registers. */
11496 return GET_MODE_CLASS (mode) == MODE_INT;
11498 /* The only registers left are the FPA registers
11499 which we only allow to hold FP values. */
11500 return GET_MODE_CLASS (mode) == MODE_FLOAT
11501 && regno >= FIRST_FPA_REGNUM
11502 && regno <= LAST_FPA_REGNUM;
11506 arm_regno_class (int regno)
11508 if (TARGET_THUMB)
11510 if (regno == STACK_POINTER_REGNUM)
11511 return STACK_REG;
11512 if (regno == CC_REGNUM)
11513 return CC_REG;
11514 if (regno < 8)
11515 return LO_REGS;
11516 return HI_REGS;
11519 if ( regno <= LAST_ARM_REGNUM
11520 || regno == FRAME_POINTER_REGNUM
11521 || regno == ARG_POINTER_REGNUM)
11522 return GENERAL_REGS;
11524 if (regno == CC_REGNUM || regno == VFPCC_REGNUM)
11525 return NO_REGS;
11527 if (IS_CIRRUS_REGNUM (regno))
11528 return CIRRUS_REGS;
11530 if (IS_VFP_REGNUM (regno))
11531 return VFP_REGS;
11533 if (IS_IWMMXT_REGNUM (regno))
11534 return IWMMXT_REGS;
11536 if (IS_IWMMXT_GR_REGNUM (regno))
11537 return IWMMXT_GR_REGS;
11539 return FPA_REGS;
11542 /* Handle a special case when computing the offset
11543 of an argument from the frame pointer. */
11545 arm_debugger_arg_offset (int value, rtx addr)
11547 rtx insn;
11549 /* We are only interested if dbxout_parms() failed to compute the offset. */
11550 if (value != 0)
11551 return 0;
11553 /* We can only cope with the case where the address is held in a register. */
11554 if (GET_CODE (addr) != REG)
11555 return 0;
11557 /* If we are using the frame pointer to point at the argument, then
11558 an offset of 0 is correct. */
11559 if (REGNO (addr) == (unsigned) HARD_FRAME_POINTER_REGNUM)
11560 return 0;
11562 /* If we are using the stack pointer to point at the
11563 argument, then an offset of 0 is correct. */
11564 if ((TARGET_THUMB || !frame_pointer_needed)
11565 && REGNO (addr) == SP_REGNUM)
11566 return 0;
11568 /* Oh dear. The argument is pointed to by a register rather
11569 than being held in a register, or being stored at a known
11570 offset from the frame pointer. Since GDB only understands
11571 those two kinds of argument we must translate the address
11572 held in the register into an offset from the frame pointer.
11573 We do this by searching through the insns for the function
11574 looking to see where this register gets its value. If the
11575 register is initialized from the frame pointer plus an offset
11576 then we are in luck and we can continue, otherwise we give up.
11578 This code is exercised by producing debugging information
11579 for a function with arguments like this:
11581 double func (double a, double b, int c, double d) {return d;}
11583 Without this code the stab for parameter 'd' will be set to
11584 an offset of 0 from the frame pointer, rather than 8. */
11586 /* The if() statement says:
11588 If the insn is a normal instruction
11589 and if the insn is setting the value in a register
11590 and if the register being set is the register holding the address of the argument
11591 and if the address is computing by an addition
11592 that involves adding to a register
11593 which is the frame pointer
11594 a constant integer
11596 then... */
11598 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
11600 if ( GET_CODE (insn) == INSN
11601 && GET_CODE (PATTERN (insn)) == SET
11602 && REGNO (XEXP (PATTERN (insn), 0)) == REGNO (addr)
11603 && GET_CODE (XEXP (PATTERN (insn), 1)) == PLUS
11604 && GET_CODE (XEXP (XEXP (PATTERN (insn), 1), 0)) == REG
11605 && REGNO (XEXP (XEXP (PATTERN (insn), 1), 0)) == (unsigned) HARD_FRAME_POINTER_REGNUM
11606 && GET_CODE (XEXP (XEXP (PATTERN (insn), 1), 1)) == CONST_INT
11609 value = INTVAL (XEXP (XEXP (PATTERN (insn), 1), 1));
11611 break;
11615 if (value == 0)
11617 debug_rtx (addr);
11618 warning (0, "unable to compute real location of stacked parameter");
11619 value = 8; /* XXX magic hack */
11622 return value;
11625 #define def_mbuiltin(MASK, NAME, TYPE, CODE) \
11626 do \
11628 if ((MASK) & insn_flags) \
11629 lang_hooks.builtin_function ((NAME), (TYPE), (CODE), \
11630 BUILT_IN_MD, NULL, NULL_TREE); \
11632 while (0)
11634 struct builtin_description
11636 const unsigned int mask;
11637 const enum insn_code icode;
11638 const char * const name;
11639 const enum arm_builtins code;
11640 const enum rtx_code comparison;
11641 const unsigned int flag;
11644 static const struct builtin_description bdesc_2arg[] =
11646 #define IWMMXT_BUILTIN(code, string, builtin) \
11647 { FL_IWMMXT, CODE_FOR_##code, "__builtin_arm_" string, \
11648 ARM_BUILTIN_##builtin, 0, 0 },
11650 IWMMXT_BUILTIN (addv8qi3, "waddb", WADDB)
11651 IWMMXT_BUILTIN (addv4hi3, "waddh", WADDH)
11652 IWMMXT_BUILTIN (addv2si3, "waddw", WADDW)
11653 IWMMXT_BUILTIN (subv8qi3, "wsubb", WSUBB)
11654 IWMMXT_BUILTIN (subv4hi3, "wsubh", WSUBH)
11655 IWMMXT_BUILTIN (subv2si3, "wsubw", WSUBW)
11656 IWMMXT_BUILTIN (ssaddv8qi3, "waddbss", WADDSSB)
11657 IWMMXT_BUILTIN (ssaddv4hi3, "waddhss", WADDSSH)
11658 IWMMXT_BUILTIN (ssaddv2si3, "waddwss", WADDSSW)
11659 IWMMXT_BUILTIN (sssubv8qi3, "wsubbss", WSUBSSB)
11660 IWMMXT_BUILTIN (sssubv4hi3, "wsubhss", WSUBSSH)
11661 IWMMXT_BUILTIN (sssubv2si3, "wsubwss", WSUBSSW)
11662 IWMMXT_BUILTIN (usaddv8qi3, "waddbus", WADDUSB)
11663 IWMMXT_BUILTIN (usaddv4hi3, "waddhus", WADDUSH)
11664 IWMMXT_BUILTIN (usaddv2si3, "waddwus", WADDUSW)
11665 IWMMXT_BUILTIN (ussubv8qi3, "wsubbus", WSUBUSB)
11666 IWMMXT_BUILTIN (ussubv4hi3, "wsubhus", WSUBUSH)
11667 IWMMXT_BUILTIN (ussubv2si3, "wsubwus", WSUBUSW)
11668 IWMMXT_BUILTIN (mulv4hi3, "wmulul", WMULUL)
11669 IWMMXT_BUILTIN (smulv4hi3_highpart, "wmulsm", WMULSM)
11670 IWMMXT_BUILTIN (umulv4hi3_highpart, "wmulum", WMULUM)
11671 IWMMXT_BUILTIN (eqv8qi3, "wcmpeqb", WCMPEQB)
11672 IWMMXT_BUILTIN (eqv4hi3, "wcmpeqh", WCMPEQH)
11673 IWMMXT_BUILTIN (eqv2si3, "wcmpeqw", WCMPEQW)
11674 IWMMXT_BUILTIN (gtuv8qi3, "wcmpgtub", WCMPGTUB)
11675 IWMMXT_BUILTIN (gtuv4hi3, "wcmpgtuh", WCMPGTUH)
11676 IWMMXT_BUILTIN (gtuv2si3, "wcmpgtuw", WCMPGTUW)
11677 IWMMXT_BUILTIN (gtv8qi3, "wcmpgtsb", WCMPGTSB)
11678 IWMMXT_BUILTIN (gtv4hi3, "wcmpgtsh", WCMPGTSH)
11679 IWMMXT_BUILTIN (gtv2si3, "wcmpgtsw", WCMPGTSW)
11680 IWMMXT_BUILTIN (umaxv8qi3, "wmaxub", WMAXUB)
11681 IWMMXT_BUILTIN (smaxv8qi3, "wmaxsb", WMAXSB)
11682 IWMMXT_BUILTIN (umaxv4hi3, "wmaxuh", WMAXUH)
11683 IWMMXT_BUILTIN (smaxv4hi3, "wmaxsh", WMAXSH)
11684 IWMMXT_BUILTIN (umaxv2si3, "wmaxuw", WMAXUW)
11685 IWMMXT_BUILTIN (smaxv2si3, "wmaxsw", WMAXSW)
11686 IWMMXT_BUILTIN (uminv8qi3, "wminub", WMINUB)
11687 IWMMXT_BUILTIN (sminv8qi3, "wminsb", WMINSB)
11688 IWMMXT_BUILTIN (uminv4hi3, "wminuh", WMINUH)
11689 IWMMXT_BUILTIN (sminv4hi3, "wminsh", WMINSH)
11690 IWMMXT_BUILTIN (uminv2si3, "wminuw", WMINUW)
11691 IWMMXT_BUILTIN (sminv2si3, "wminsw", WMINSW)
11692 IWMMXT_BUILTIN (iwmmxt_anddi3, "wand", WAND)
11693 IWMMXT_BUILTIN (iwmmxt_nanddi3, "wandn", WANDN)
11694 IWMMXT_BUILTIN (iwmmxt_iordi3, "wor", WOR)
11695 IWMMXT_BUILTIN (iwmmxt_xordi3, "wxor", WXOR)
11696 IWMMXT_BUILTIN (iwmmxt_uavgv8qi3, "wavg2b", WAVG2B)
11697 IWMMXT_BUILTIN (iwmmxt_uavgv4hi3, "wavg2h", WAVG2H)
11698 IWMMXT_BUILTIN (iwmmxt_uavgrndv8qi3, "wavg2br", WAVG2BR)
11699 IWMMXT_BUILTIN (iwmmxt_uavgrndv4hi3, "wavg2hr", WAVG2HR)
11700 IWMMXT_BUILTIN (iwmmxt_wunpckilb, "wunpckilb", WUNPCKILB)
11701 IWMMXT_BUILTIN (iwmmxt_wunpckilh, "wunpckilh", WUNPCKILH)
11702 IWMMXT_BUILTIN (iwmmxt_wunpckilw, "wunpckilw", WUNPCKILW)
11703 IWMMXT_BUILTIN (iwmmxt_wunpckihb, "wunpckihb", WUNPCKIHB)
11704 IWMMXT_BUILTIN (iwmmxt_wunpckihh, "wunpckihh", WUNPCKIHH)
11705 IWMMXT_BUILTIN (iwmmxt_wunpckihw, "wunpckihw", WUNPCKIHW)
11706 IWMMXT_BUILTIN (iwmmxt_wmadds, "wmadds", WMADDS)
11707 IWMMXT_BUILTIN (iwmmxt_wmaddu, "wmaddu", WMADDU)
11709 #define IWMMXT_BUILTIN2(code, builtin) \
11710 { FL_IWMMXT, CODE_FOR_##code, NULL, ARM_BUILTIN_##builtin, 0, 0 },
11712 IWMMXT_BUILTIN2 (iwmmxt_wpackhss, WPACKHSS)
11713 IWMMXT_BUILTIN2 (iwmmxt_wpackwss, WPACKWSS)
11714 IWMMXT_BUILTIN2 (iwmmxt_wpackdss, WPACKDSS)
11715 IWMMXT_BUILTIN2 (iwmmxt_wpackhus, WPACKHUS)
11716 IWMMXT_BUILTIN2 (iwmmxt_wpackwus, WPACKWUS)
11717 IWMMXT_BUILTIN2 (iwmmxt_wpackdus, WPACKDUS)
11718 IWMMXT_BUILTIN2 (ashlv4hi3_di, WSLLH)
11719 IWMMXT_BUILTIN2 (ashlv4hi3, WSLLHI)
11720 IWMMXT_BUILTIN2 (ashlv2si3_di, WSLLW)
11721 IWMMXT_BUILTIN2 (ashlv2si3, WSLLWI)
11722 IWMMXT_BUILTIN2 (ashldi3_di, WSLLD)
11723 IWMMXT_BUILTIN2 (ashldi3_iwmmxt, WSLLDI)
11724 IWMMXT_BUILTIN2 (lshrv4hi3_di, WSRLH)
11725 IWMMXT_BUILTIN2 (lshrv4hi3, WSRLHI)
11726 IWMMXT_BUILTIN2 (lshrv2si3_di, WSRLW)
11727 IWMMXT_BUILTIN2 (lshrv2si3, WSRLWI)
11728 IWMMXT_BUILTIN2 (lshrdi3_di, WSRLD)
11729 IWMMXT_BUILTIN2 (lshrdi3_iwmmxt, WSRLDI)
11730 IWMMXT_BUILTIN2 (ashrv4hi3_di, WSRAH)
11731 IWMMXT_BUILTIN2 (ashrv4hi3, WSRAHI)
11732 IWMMXT_BUILTIN2 (ashrv2si3_di, WSRAW)
11733 IWMMXT_BUILTIN2 (ashrv2si3, WSRAWI)
11734 IWMMXT_BUILTIN2 (ashrdi3_di, WSRAD)
11735 IWMMXT_BUILTIN2 (ashrdi3_iwmmxt, WSRADI)
11736 IWMMXT_BUILTIN2 (rorv4hi3_di, WRORH)
11737 IWMMXT_BUILTIN2 (rorv4hi3, WRORHI)
11738 IWMMXT_BUILTIN2 (rorv2si3_di, WRORW)
11739 IWMMXT_BUILTIN2 (rorv2si3, WRORWI)
11740 IWMMXT_BUILTIN2 (rordi3_di, WRORD)
11741 IWMMXT_BUILTIN2 (rordi3, WRORDI)
11742 IWMMXT_BUILTIN2 (iwmmxt_wmacuz, WMACUZ)
11743 IWMMXT_BUILTIN2 (iwmmxt_wmacsz, WMACSZ)
11746 static const struct builtin_description bdesc_1arg[] =
11748 IWMMXT_BUILTIN (iwmmxt_tmovmskb, "tmovmskb", TMOVMSKB)
11749 IWMMXT_BUILTIN (iwmmxt_tmovmskh, "tmovmskh", TMOVMSKH)
11750 IWMMXT_BUILTIN (iwmmxt_tmovmskw, "tmovmskw", TMOVMSKW)
11751 IWMMXT_BUILTIN (iwmmxt_waccb, "waccb", WACCB)
11752 IWMMXT_BUILTIN (iwmmxt_wacch, "wacch", WACCH)
11753 IWMMXT_BUILTIN (iwmmxt_waccw, "waccw", WACCW)
11754 IWMMXT_BUILTIN (iwmmxt_wunpckehub, "wunpckehub", WUNPCKEHUB)
11755 IWMMXT_BUILTIN (iwmmxt_wunpckehuh, "wunpckehuh", WUNPCKEHUH)
11756 IWMMXT_BUILTIN (iwmmxt_wunpckehuw, "wunpckehuw", WUNPCKEHUW)
11757 IWMMXT_BUILTIN (iwmmxt_wunpckehsb, "wunpckehsb", WUNPCKEHSB)
11758 IWMMXT_BUILTIN (iwmmxt_wunpckehsh, "wunpckehsh", WUNPCKEHSH)
11759 IWMMXT_BUILTIN (iwmmxt_wunpckehsw, "wunpckehsw", WUNPCKEHSW)
11760 IWMMXT_BUILTIN (iwmmxt_wunpckelub, "wunpckelub", WUNPCKELUB)
11761 IWMMXT_BUILTIN (iwmmxt_wunpckeluh, "wunpckeluh", WUNPCKELUH)
11762 IWMMXT_BUILTIN (iwmmxt_wunpckeluw, "wunpckeluw", WUNPCKELUW)
11763 IWMMXT_BUILTIN (iwmmxt_wunpckelsb, "wunpckelsb", WUNPCKELSB)
11764 IWMMXT_BUILTIN (iwmmxt_wunpckelsh, "wunpckelsh", WUNPCKELSH)
11765 IWMMXT_BUILTIN (iwmmxt_wunpckelsw, "wunpckelsw", WUNPCKELSW)
11768 /* Set up all the iWMMXt builtins. This is
11769 not called if TARGET_IWMMXT is zero. */
11771 static void
11772 arm_init_iwmmxt_builtins (void)
11774 const struct builtin_description * d;
11775 size_t i;
11776 tree endlink = void_list_node;
11778 tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
11779 tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
11780 tree V8QI_type_node = build_vector_type_for_mode (intQI_type_node, V8QImode);
11782 tree int_ftype_int
11783 = build_function_type (integer_type_node,
11784 tree_cons (NULL_TREE, integer_type_node, endlink));
11785 tree v8qi_ftype_v8qi_v8qi_int
11786 = build_function_type (V8QI_type_node,
11787 tree_cons (NULL_TREE, V8QI_type_node,
11788 tree_cons (NULL_TREE, V8QI_type_node,
11789 tree_cons (NULL_TREE,
11790 integer_type_node,
11791 endlink))));
11792 tree v4hi_ftype_v4hi_int
11793 = build_function_type (V4HI_type_node,
11794 tree_cons (NULL_TREE, V4HI_type_node,
11795 tree_cons (NULL_TREE, integer_type_node,
11796 endlink)));
11797 tree v2si_ftype_v2si_int
11798 = build_function_type (V2SI_type_node,
11799 tree_cons (NULL_TREE, V2SI_type_node,
11800 tree_cons (NULL_TREE, integer_type_node,
11801 endlink)));
11802 tree v2si_ftype_di_di
11803 = build_function_type (V2SI_type_node,
11804 tree_cons (NULL_TREE, long_long_integer_type_node,
11805 tree_cons (NULL_TREE, long_long_integer_type_node,
11806 endlink)));
11807 tree di_ftype_di_int
11808 = build_function_type (long_long_integer_type_node,
11809 tree_cons (NULL_TREE, long_long_integer_type_node,
11810 tree_cons (NULL_TREE, integer_type_node,
11811 endlink)));
11812 tree di_ftype_di_int_int
11813 = build_function_type (long_long_integer_type_node,
11814 tree_cons (NULL_TREE, long_long_integer_type_node,
11815 tree_cons (NULL_TREE, integer_type_node,
11816 tree_cons (NULL_TREE,
11817 integer_type_node,
11818 endlink))));
11819 tree int_ftype_v8qi
11820 = build_function_type (integer_type_node,
11821 tree_cons (NULL_TREE, V8QI_type_node,
11822 endlink));
11823 tree int_ftype_v4hi
11824 = build_function_type (integer_type_node,
11825 tree_cons (NULL_TREE, V4HI_type_node,
11826 endlink));
11827 tree int_ftype_v2si
11828 = build_function_type (integer_type_node,
11829 tree_cons (NULL_TREE, V2SI_type_node,
11830 endlink));
11831 tree int_ftype_v8qi_int
11832 = build_function_type (integer_type_node,
11833 tree_cons (NULL_TREE, V8QI_type_node,
11834 tree_cons (NULL_TREE, integer_type_node,
11835 endlink)));
11836 tree int_ftype_v4hi_int
11837 = build_function_type (integer_type_node,
11838 tree_cons (NULL_TREE, V4HI_type_node,
11839 tree_cons (NULL_TREE, integer_type_node,
11840 endlink)));
11841 tree int_ftype_v2si_int
11842 = build_function_type (integer_type_node,
11843 tree_cons (NULL_TREE, V2SI_type_node,
11844 tree_cons (NULL_TREE, integer_type_node,
11845 endlink)));
11846 tree v8qi_ftype_v8qi_int_int
11847 = build_function_type (V8QI_type_node,
11848 tree_cons (NULL_TREE, V8QI_type_node,
11849 tree_cons (NULL_TREE, integer_type_node,
11850 tree_cons (NULL_TREE,
11851 integer_type_node,
11852 endlink))));
11853 tree v4hi_ftype_v4hi_int_int
11854 = build_function_type (V4HI_type_node,
11855 tree_cons (NULL_TREE, V4HI_type_node,
11856 tree_cons (NULL_TREE, integer_type_node,
11857 tree_cons (NULL_TREE,
11858 integer_type_node,
11859 endlink))));
11860 tree v2si_ftype_v2si_int_int
11861 = build_function_type (V2SI_type_node,
11862 tree_cons (NULL_TREE, V2SI_type_node,
11863 tree_cons (NULL_TREE, integer_type_node,
11864 tree_cons (NULL_TREE,
11865 integer_type_node,
11866 endlink))));
11867 /* Miscellaneous. */
11868 tree v8qi_ftype_v4hi_v4hi
11869 = build_function_type (V8QI_type_node,
11870 tree_cons (NULL_TREE, V4HI_type_node,
11871 tree_cons (NULL_TREE, V4HI_type_node,
11872 endlink)));
11873 tree v4hi_ftype_v2si_v2si
11874 = build_function_type (V4HI_type_node,
11875 tree_cons (NULL_TREE, V2SI_type_node,
11876 tree_cons (NULL_TREE, V2SI_type_node,
11877 endlink)));
11878 tree v2si_ftype_v4hi_v4hi
11879 = build_function_type (V2SI_type_node,
11880 tree_cons (NULL_TREE, V4HI_type_node,
11881 tree_cons (NULL_TREE, V4HI_type_node,
11882 endlink)));
11883 tree v2si_ftype_v8qi_v8qi
11884 = build_function_type (V2SI_type_node,
11885 tree_cons (NULL_TREE, V8QI_type_node,
11886 tree_cons (NULL_TREE, V8QI_type_node,
11887 endlink)));
11888 tree v4hi_ftype_v4hi_di
11889 = build_function_type (V4HI_type_node,
11890 tree_cons (NULL_TREE, V4HI_type_node,
11891 tree_cons (NULL_TREE,
11892 long_long_integer_type_node,
11893 endlink)));
11894 tree v2si_ftype_v2si_di
11895 = build_function_type (V2SI_type_node,
11896 tree_cons (NULL_TREE, V2SI_type_node,
11897 tree_cons (NULL_TREE,
11898 long_long_integer_type_node,
11899 endlink)));
11900 tree void_ftype_int_int
11901 = build_function_type (void_type_node,
11902 tree_cons (NULL_TREE, integer_type_node,
11903 tree_cons (NULL_TREE, integer_type_node,
11904 endlink)));
11905 tree di_ftype_void
11906 = build_function_type (long_long_unsigned_type_node, endlink);
11907 tree di_ftype_v8qi
11908 = build_function_type (long_long_integer_type_node,
11909 tree_cons (NULL_TREE, V8QI_type_node,
11910 endlink));
11911 tree di_ftype_v4hi
11912 = build_function_type (long_long_integer_type_node,
11913 tree_cons (NULL_TREE, V4HI_type_node,
11914 endlink));
11915 tree di_ftype_v2si
11916 = build_function_type (long_long_integer_type_node,
11917 tree_cons (NULL_TREE, V2SI_type_node,
11918 endlink));
11919 tree v2si_ftype_v4hi
11920 = build_function_type (V2SI_type_node,
11921 tree_cons (NULL_TREE, V4HI_type_node,
11922 endlink));
11923 tree v4hi_ftype_v8qi
11924 = build_function_type (V4HI_type_node,
11925 tree_cons (NULL_TREE, V8QI_type_node,
11926 endlink));
11928 tree di_ftype_di_v4hi_v4hi
11929 = build_function_type (long_long_unsigned_type_node,
11930 tree_cons (NULL_TREE,
11931 long_long_unsigned_type_node,
11932 tree_cons (NULL_TREE, V4HI_type_node,
11933 tree_cons (NULL_TREE,
11934 V4HI_type_node,
11935 endlink))));
11937 tree di_ftype_v4hi_v4hi
11938 = build_function_type (long_long_unsigned_type_node,
11939 tree_cons (NULL_TREE, V4HI_type_node,
11940 tree_cons (NULL_TREE, V4HI_type_node,
11941 endlink)));
11943 /* Normal vector binops. */
11944 tree v8qi_ftype_v8qi_v8qi
11945 = build_function_type (V8QI_type_node,
11946 tree_cons (NULL_TREE, V8QI_type_node,
11947 tree_cons (NULL_TREE, V8QI_type_node,
11948 endlink)));
11949 tree v4hi_ftype_v4hi_v4hi
11950 = build_function_type (V4HI_type_node,
11951 tree_cons (NULL_TREE, V4HI_type_node,
11952 tree_cons (NULL_TREE, V4HI_type_node,
11953 endlink)));
11954 tree v2si_ftype_v2si_v2si
11955 = build_function_type (V2SI_type_node,
11956 tree_cons (NULL_TREE, V2SI_type_node,
11957 tree_cons (NULL_TREE, V2SI_type_node,
11958 endlink)));
11959 tree di_ftype_di_di
11960 = build_function_type (long_long_unsigned_type_node,
11961 tree_cons (NULL_TREE, long_long_unsigned_type_node,
11962 tree_cons (NULL_TREE,
11963 long_long_unsigned_type_node,
11964 endlink)));
11966 /* Add all builtins that are more or less simple operations on two
11967 operands. */
11968 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
11970 /* Use one of the operands; the target can have a different mode for
11971 mask-generating compares. */
11972 enum machine_mode mode;
11973 tree type;
11975 if (d->name == 0)
11976 continue;
11978 mode = insn_data[d->icode].operand[1].mode;
11980 switch (mode)
11982 case V8QImode:
11983 type = v8qi_ftype_v8qi_v8qi;
11984 break;
11985 case V4HImode:
11986 type = v4hi_ftype_v4hi_v4hi;
11987 break;
11988 case V2SImode:
11989 type = v2si_ftype_v2si_v2si;
11990 break;
11991 case DImode:
11992 type = di_ftype_di_di;
11993 break;
11995 default:
11996 gcc_unreachable ();
11999 def_mbuiltin (d->mask, d->name, type, d->code);
12002 /* Add the remaining MMX insns with somewhat more complicated types. */
12003 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wzero", di_ftype_void, ARM_BUILTIN_WZERO);
12004 def_mbuiltin (FL_IWMMXT, "__builtin_arm_setwcx", void_ftype_int_int, ARM_BUILTIN_SETWCX);
12005 def_mbuiltin (FL_IWMMXT, "__builtin_arm_getwcx", int_ftype_int, ARM_BUILTIN_GETWCX);
12007 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsllh", v4hi_ftype_v4hi_di, ARM_BUILTIN_WSLLH);
12008 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsllw", v2si_ftype_v2si_di, ARM_BUILTIN_WSLLW);
12009 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wslld", di_ftype_di_di, ARM_BUILTIN_WSLLD);
12010 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsllhi", v4hi_ftype_v4hi_int, ARM_BUILTIN_WSLLHI);
12011 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsllwi", v2si_ftype_v2si_int, ARM_BUILTIN_WSLLWI);
12012 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wslldi", di_ftype_di_int, ARM_BUILTIN_WSLLDI);
12014 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrlh", v4hi_ftype_v4hi_di, ARM_BUILTIN_WSRLH);
12015 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrlw", v2si_ftype_v2si_di, ARM_BUILTIN_WSRLW);
12016 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrld", di_ftype_di_di, ARM_BUILTIN_WSRLD);
12017 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrlhi", v4hi_ftype_v4hi_int, ARM_BUILTIN_WSRLHI);
12018 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrlwi", v2si_ftype_v2si_int, ARM_BUILTIN_WSRLWI);
12019 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrldi", di_ftype_di_int, ARM_BUILTIN_WSRLDI);
12021 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrah", v4hi_ftype_v4hi_di, ARM_BUILTIN_WSRAH);
12022 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsraw", v2si_ftype_v2si_di, ARM_BUILTIN_WSRAW);
12023 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrad", di_ftype_di_di, ARM_BUILTIN_WSRAD);
12024 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrahi", v4hi_ftype_v4hi_int, ARM_BUILTIN_WSRAHI);
12025 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrawi", v2si_ftype_v2si_int, ARM_BUILTIN_WSRAWI);
12026 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsradi", di_ftype_di_int, ARM_BUILTIN_WSRADI);
12028 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrorh", v4hi_ftype_v4hi_di, ARM_BUILTIN_WRORH);
12029 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrorw", v2si_ftype_v2si_di, ARM_BUILTIN_WRORW);
12030 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrord", di_ftype_di_di, ARM_BUILTIN_WRORD);
12031 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrorhi", v4hi_ftype_v4hi_int, ARM_BUILTIN_WRORHI);
12032 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrorwi", v2si_ftype_v2si_int, ARM_BUILTIN_WRORWI);
12033 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrordi", di_ftype_di_int, ARM_BUILTIN_WRORDI);
12035 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wshufh", v4hi_ftype_v4hi_int, ARM_BUILTIN_WSHUFH);
12037 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsadb", v2si_ftype_v8qi_v8qi, ARM_BUILTIN_WSADB);
12038 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsadh", v2si_ftype_v4hi_v4hi, ARM_BUILTIN_WSADH);
12039 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsadbz", v2si_ftype_v8qi_v8qi, ARM_BUILTIN_WSADBZ);
12040 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsadhz", v2si_ftype_v4hi_v4hi, ARM_BUILTIN_WSADHZ);
12042 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmsb", int_ftype_v8qi_int, ARM_BUILTIN_TEXTRMSB);
12043 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmsh", int_ftype_v4hi_int, ARM_BUILTIN_TEXTRMSH);
12044 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmsw", int_ftype_v2si_int, ARM_BUILTIN_TEXTRMSW);
12045 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmub", int_ftype_v8qi_int, ARM_BUILTIN_TEXTRMUB);
12046 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmuh", int_ftype_v4hi_int, ARM_BUILTIN_TEXTRMUH);
12047 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmuw", int_ftype_v2si_int, ARM_BUILTIN_TEXTRMUW);
12048 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tinsrb", v8qi_ftype_v8qi_int_int, ARM_BUILTIN_TINSRB);
12049 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tinsrh", v4hi_ftype_v4hi_int_int, ARM_BUILTIN_TINSRH);
12050 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tinsrw", v2si_ftype_v2si_int_int, ARM_BUILTIN_TINSRW);
12052 def_mbuiltin (FL_IWMMXT, "__builtin_arm_waccb", di_ftype_v8qi, ARM_BUILTIN_WACCB);
12053 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wacch", di_ftype_v4hi, ARM_BUILTIN_WACCH);
12054 def_mbuiltin (FL_IWMMXT, "__builtin_arm_waccw", di_ftype_v2si, ARM_BUILTIN_WACCW);
12056 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmovmskb", int_ftype_v8qi, ARM_BUILTIN_TMOVMSKB);
12057 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmovmskh", int_ftype_v4hi, ARM_BUILTIN_TMOVMSKH);
12058 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmovmskw", int_ftype_v2si, ARM_BUILTIN_TMOVMSKW);
12060 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackhss", v8qi_ftype_v4hi_v4hi, ARM_BUILTIN_WPACKHSS);
12061 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackhus", v8qi_ftype_v4hi_v4hi, ARM_BUILTIN_WPACKHUS);
12062 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackwus", v4hi_ftype_v2si_v2si, ARM_BUILTIN_WPACKWUS);
12063 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackwss", v4hi_ftype_v2si_v2si, ARM_BUILTIN_WPACKWSS);
12064 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackdus", v2si_ftype_di_di, ARM_BUILTIN_WPACKDUS);
12065 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackdss", v2si_ftype_di_di, ARM_BUILTIN_WPACKDSS);
12067 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehub", v4hi_ftype_v8qi, ARM_BUILTIN_WUNPCKEHUB);
12068 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehuh", v2si_ftype_v4hi, ARM_BUILTIN_WUNPCKEHUH);
12069 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehuw", di_ftype_v2si, ARM_BUILTIN_WUNPCKEHUW);
12070 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehsb", v4hi_ftype_v8qi, ARM_BUILTIN_WUNPCKEHSB);
12071 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehsh", v2si_ftype_v4hi, ARM_BUILTIN_WUNPCKEHSH);
12072 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehsw", di_ftype_v2si, ARM_BUILTIN_WUNPCKEHSW);
12073 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckelub", v4hi_ftype_v8qi, ARM_BUILTIN_WUNPCKELUB);
12074 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckeluh", v2si_ftype_v4hi, ARM_BUILTIN_WUNPCKELUH);
12075 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckeluw", di_ftype_v2si, ARM_BUILTIN_WUNPCKELUW);
12076 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckelsb", v4hi_ftype_v8qi, ARM_BUILTIN_WUNPCKELSB);
12077 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckelsh", v2si_ftype_v4hi, ARM_BUILTIN_WUNPCKELSH);
12078 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckelsw", di_ftype_v2si, ARM_BUILTIN_WUNPCKELSW);
12080 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wmacs", di_ftype_di_v4hi_v4hi, ARM_BUILTIN_WMACS);
12081 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wmacsz", di_ftype_v4hi_v4hi, ARM_BUILTIN_WMACSZ);
12082 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wmacu", di_ftype_di_v4hi_v4hi, ARM_BUILTIN_WMACU);
12083 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wmacuz", di_ftype_v4hi_v4hi, ARM_BUILTIN_WMACUZ);
12085 def_mbuiltin (FL_IWMMXT, "__builtin_arm_walign", v8qi_ftype_v8qi_v8qi_int, ARM_BUILTIN_WALIGN);
12086 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmia", di_ftype_di_int_int, ARM_BUILTIN_TMIA);
12087 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiaph", di_ftype_di_int_int, ARM_BUILTIN_TMIAPH);
12088 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiabb", di_ftype_di_int_int, ARM_BUILTIN_TMIABB);
12089 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiabt", di_ftype_di_int_int, ARM_BUILTIN_TMIABT);
12090 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiatb", di_ftype_di_int_int, ARM_BUILTIN_TMIATB);
12091 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiatt", di_ftype_di_int_int, ARM_BUILTIN_TMIATT);
12094 static void
12095 arm_init_builtins (void)
12097 if (TARGET_REALLY_IWMMXT)
12098 arm_init_iwmmxt_builtins ();
12101 /* Errors in the source file can cause expand_expr to return const0_rtx
12102 where we expect a vector. To avoid crashing, use one of the vector
12103 clear instructions. */
12105 static rtx
12106 safe_vector_operand (rtx x, enum machine_mode mode)
12108 if (x != const0_rtx)
12109 return x;
12110 x = gen_reg_rtx (mode);
12112 emit_insn (gen_iwmmxt_clrdi (mode == DImode ? x
12113 : gen_rtx_SUBREG (DImode, x, 0)));
12114 return x;
12117 /* Subroutine of arm_expand_builtin to take care of binop insns. */
12119 static rtx
12120 arm_expand_binop_builtin (enum insn_code icode,
12121 tree arglist, rtx target)
12123 rtx pat;
12124 tree arg0 = TREE_VALUE (arglist);
12125 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12126 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12127 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12128 enum machine_mode tmode = insn_data[icode].operand[0].mode;
12129 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
12130 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
12132 if (VECTOR_MODE_P (mode0))
12133 op0 = safe_vector_operand (op0, mode0);
12134 if (VECTOR_MODE_P (mode1))
12135 op1 = safe_vector_operand (op1, mode1);
12137 if (! target
12138 || GET_MODE (target) != tmode
12139 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12140 target = gen_reg_rtx (tmode);
12142 gcc_assert (GET_MODE (op0) == mode0 && GET_MODE (op1) == mode1);
12144 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12145 op0 = copy_to_mode_reg (mode0, op0);
12146 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
12147 op1 = copy_to_mode_reg (mode1, op1);
12149 pat = GEN_FCN (icode) (target, op0, op1);
12150 if (! pat)
12151 return 0;
12152 emit_insn (pat);
12153 return target;
12156 /* Subroutine of arm_expand_builtin to take care of unop insns. */
12158 static rtx
12159 arm_expand_unop_builtin (enum insn_code icode,
12160 tree arglist, rtx target, int do_load)
12162 rtx pat;
12163 tree arg0 = TREE_VALUE (arglist);
12164 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12165 enum machine_mode tmode = insn_data[icode].operand[0].mode;
12166 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
12168 if (! target
12169 || GET_MODE (target) != tmode
12170 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12171 target = gen_reg_rtx (tmode);
12172 if (do_load)
12173 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
12174 else
12176 if (VECTOR_MODE_P (mode0))
12177 op0 = safe_vector_operand (op0, mode0);
12179 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12180 op0 = copy_to_mode_reg (mode0, op0);
12183 pat = GEN_FCN (icode) (target, op0);
12184 if (! pat)
12185 return 0;
12186 emit_insn (pat);
12187 return target;
12190 /* Expand an expression EXP that calls a built-in function,
12191 with result going to TARGET if that's convenient
12192 (and in mode MODE if that's convenient).
12193 SUBTARGET may be used as the target for computing one of EXP's operands.
12194 IGNORE is nonzero if the value is to be ignored. */
12196 static rtx
12197 arm_expand_builtin (tree exp,
12198 rtx target,
12199 rtx subtarget ATTRIBUTE_UNUSED,
12200 enum machine_mode mode ATTRIBUTE_UNUSED,
12201 int ignore ATTRIBUTE_UNUSED)
12203 const struct builtin_description * d;
12204 enum insn_code icode;
12205 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
12206 tree arglist = TREE_OPERAND (exp, 1);
12207 tree arg0;
12208 tree arg1;
12209 tree arg2;
12210 rtx op0;
12211 rtx op1;
12212 rtx op2;
12213 rtx pat;
12214 int fcode = DECL_FUNCTION_CODE (fndecl);
12215 size_t i;
12216 enum machine_mode tmode;
12217 enum machine_mode mode0;
12218 enum machine_mode mode1;
12219 enum machine_mode mode2;
12221 switch (fcode)
12223 case ARM_BUILTIN_TEXTRMSB:
12224 case ARM_BUILTIN_TEXTRMUB:
12225 case ARM_BUILTIN_TEXTRMSH:
12226 case ARM_BUILTIN_TEXTRMUH:
12227 case ARM_BUILTIN_TEXTRMSW:
12228 case ARM_BUILTIN_TEXTRMUW:
12229 icode = (fcode == ARM_BUILTIN_TEXTRMSB ? CODE_FOR_iwmmxt_textrmsb
12230 : fcode == ARM_BUILTIN_TEXTRMUB ? CODE_FOR_iwmmxt_textrmub
12231 : fcode == ARM_BUILTIN_TEXTRMSH ? CODE_FOR_iwmmxt_textrmsh
12232 : fcode == ARM_BUILTIN_TEXTRMUH ? CODE_FOR_iwmmxt_textrmuh
12233 : CODE_FOR_iwmmxt_textrmw);
12235 arg0 = TREE_VALUE (arglist);
12236 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12237 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12238 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12239 tmode = insn_data[icode].operand[0].mode;
12240 mode0 = insn_data[icode].operand[1].mode;
12241 mode1 = insn_data[icode].operand[2].mode;
12243 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12244 op0 = copy_to_mode_reg (mode0, op0);
12245 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
12247 /* @@@ better error message */
12248 error ("selector must be an immediate");
12249 return gen_reg_rtx (tmode);
12251 if (target == 0
12252 || GET_MODE (target) != tmode
12253 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12254 target = gen_reg_rtx (tmode);
12255 pat = GEN_FCN (icode) (target, op0, op1);
12256 if (! pat)
12257 return 0;
12258 emit_insn (pat);
12259 return target;
12261 case ARM_BUILTIN_TINSRB:
12262 case ARM_BUILTIN_TINSRH:
12263 case ARM_BUILTIN_TINSRW:
12264 icode = (fcode == ARM_BUILTIN_TINSRB ? CODE_FOR_iwmmxt_tinsrb
12265 : fcode == ARM_BUILTIN_TINSRH ? CODE_FOR_iwmmxt_tinsrh
12266 : CODE_FOR_iwmmxt_tinsrw);
12267 arg0 = TREE_VALUE (arglist);
12268 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12269 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
12270 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12271 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12272 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
12273 tmode = insn_data[icode].operand[0].mode;
12274 mode0 = insn_data[icode].operand[1].mode;
12275 mode1 = insn_data[icode].operand[2].mode;
12276 mode2 = insn_data[icode].operand[3].mode;
12278 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12279 op0 = copy_to_mode_reg (mode0, op0);
12280 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
12281 op1 = copy_to_mode_reg (mode1, op1);
12282 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
12284 /* @@@ better error message */
12285 error ("selector must be an immediate");
12286 return const0_rtx;
12288 if (target == 0
12289 || GET_MODE (target) != tmode
12290 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12291 target = gen_reg_rtx (tmode);
12292 pat = GEN_FCN (icode) (target, op0, op1, op2);
12293 if (! pat)
12294 return 0;
12295 emit_insn (pat);
12296 return target;
12298 case ARM_BUILTIN_SETWCX:
12299 arg0 = TREE_VALUE (arglist);
12300 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12301 op0 = force_reg (SImode, expand_expr (arg0, NULL_RTX, VOIDmode, 0));
12302 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12303 emit_insn (gen_iwmmxt_tmcr (op1, op0));
12304 return 0;
12306 case ARM_BUILTIN_GETWCX:
12307 arg0 = TREE_VALUE (arglist);
12308 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12309 target = gen_reg_rtx (SImode);
12310 emit_insn (gen_iwmmxt_tmrc (target, op0));
12311 return target;
12313 case ARM_BUILTIN_WSHUFH:
12314 icode = CODE_FOR_iwmmxt_wshufh;
12315 arg0 = TREE_VALUE (arglist);
12316 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12317 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12318 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12319 tmode = insn_data[icode].operand[0].mode;
12320 mode1 = insn_data[icode].operand[1].mode;
12321 mode2 = insn_data[icode].operand[2].mode;
12323 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
12324 op0 = copy_to_mode_reg (mode1, op0);
12325 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
12327 /* @@@ better error message */
12328 error ("mask must be an immediate");
12329 return const0_rtx;
12331 if (target == 0
12332 || GET_MODE (target) != tmode
12333 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12334 target = gen_reg_rtx (tmode);
12335 pat = GEN_FCN (icode) (target, op0, op1);
12336 if (! pat)
12337 return 0;
12338 emit_insn (pat);
12339 return target;
12341 case ARM_BUILTIN_WSADB:
12342 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadb, arglist, target);
12343 case ARM_BUILTIN_WSADH:
12344 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadh, arglist, target);
12345 case ARM_BUILTIN_WSADBZ:
12346 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadbz, arglist, target);
12347 case ARM_BUILTIN_WSADHZ:
12348 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadhz, arglist, target);
12350 /* Several three-argument builtins. */
12351 case ARM_BUILTIN_WMACS:
12352 case ARM_BUILTIN_WMACU:
12353 case ARM_BUILTIN_WALIGN:
12354 case ARM_BUILTIN_TMIA:
12355 case ARM_BUILTIN_TMIAPH:
12356 case ARM_BUILTIN_TMIATT:
12357 case ARM_BUILTIN_TMIATB:
12358 case ARM_BUILTIN_TMIABT:
12359 case ARM_BUILTIN_TMIABB:
12360 icode = (fcode == ARM_BUILTIN_WMACS ? CODE_FOR_iwmmxt_wmacs
12361 : fcode == ARM_BUILTIN_WMACU ? CODE_FOR_iwmmxt_wmacu
12362 : fcode == ARM_BUILTIN_TMIA ? CODE_FOR_iwmmxt_tmia
12363 : fcode == ARM_BUILTIN_TMIAPH ? CODE_FOR_iwmmxt_tmiaph
12364 : fcode == ARM_BUILTIN_TMIABB ? CODE_FOR_iwmmxt_tmiabb
12365 : fcode == ARM_BUILTIN_TMIABT ? CODE_FOR_iwmmxt_tmiabt
12366 : fcode == ARM_BUILTIN_TMIATB ? CODE_FOR_iwmmxt_tmiatb
12367 : fcode == ARM_BUILTIN_TMIATT ? CODE_FOR_iwmmxt_tmiatt
12368 : CODE_FOR_iwmmxt_walign);
12369 arg0 = TREE_VALUE (arglist);
12370 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12371 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
12372 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12373 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12374 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
12375 tmode = insn_data[icode].operand[0].mode;
12376 mode0 = insn_data[icode].operand[1].mode;
12377 mode1 = insn_data[icode].operand[2].mode;
12378 mode2 = insn_data[icode].operand[3].mode;
12380 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12381 op0 = copy_to_mode_reg (mode0, op0);
12382 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
12383 op1 = copy_to_mode_reg (mode1, op1);
12384 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
12385 op2 = copy_to_mode_reg (mode2, op2);
12386 if (target == 0
12387 || GET_MODE (target) != tmode
12388 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12389 target = gen_reg_rtx (tmode);
12390 pat = GEN_FCN (icode) (target, op0, op1, op2);
12391 if (! pat)
12392 return 0;
12393 emit_insn (pat);
12394 return target;
12396 case ARM_BUILTIN_WZERO:
12397 target = gen_reg_rtx (DImode);
12398 emit_insn (gen_iwmmxt_clrdi (target));
12399 return target;
12401 default:
12402 break;
12405 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
12406 if (d->code == (const enum arm_builtins) fcode)
12407 return arm_expand_binop_builtin (d->icode, arglist, target);
12409 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
12410 if (d->code == (const enum arm_builtins) fcode)
12411 return arm_expand_unop_builtin (d->icode, arglist, target, 0);
12413 /* @@@ Should really do something sensible here. */
12414 return NULL_RTX;
12417 /* Return the number (counting from 0) of
12418 the least significant set bit in MASK. */
12420 inline static int
12421 number_of_first_bit_set (unsigned mask)
12423 int bit;
12425 for (bit = 0;
12426 (mask & (1 << bit)) == 0;
12427 ++bit)
12428 continue;
12430 return bit;
12433 /* Emit code to push or pop registers to or from the stack. F is the
12434 assembly file. MASK is the registers to push or pop. PUSH is
12435 nonzero if we should push, and zero if we should pop. For debugging
12436 output, if pushing, adjust CFA_OFFSET by the amount of space added
12437 to the stack. REAL_REGS should have the same number of bits set as
12438 MASK, and will be used instead (in the same order) to describe which
12439 registers were saved - this is used to mark the save slots when we
12440 push high registers after moving them to low registers. */
12441 static void
12442 thumb_pushpop (FILE *f, unsigned long mask, int push, int *cfa_offset,
12443 unsigned long real_regs)
12445 int regno;
12446 int lo_mask = mask & 0xFF;
12447 int pushed_words = 0;
12449 gcc_assert (mask);
12451 if (lo_mask == 0 && !push && (mask & (1 << PC_REGNUM)))
12453 /* Special case. Do not generate a POP PC statement here, do it in
12454 thumb_exit() */
12455 thumb_exit (f, -1);
12456 return;
12459 fprintf (f, "\t%s\t{", push ? "push" : "pop");
12461 /* Look at the low registers first. */
12462 for (regno = 0; regno <= LAST_LO_REGNUM; regno++, lo_mask >>= 1)
12464 if (lo_mask & 1)
12466 asm_fprintf (f, "%r", regno);
12468 if ((lo_mask & ~1) != 0)
12469 fprintf (f, ", ");
12471 pushed_words++;
12475 if (push && (mask & (1 << LR_REGNUM)))
12477 /* Catch pushing the LR. */
12478 if (mask & 0xFF)
12479 fprintf (f, ", ");
12481 asm_fprintf (f, "%r", LR_REGNUM);
12483 pushed_words++;
12485 else if (!push && (mask & (1 << PC_REGNUM)))
12487 /* Catch popping the PC. */
12488 if (TARGET_INTERWORK || TARGET_BACKTRACE
12489 || current_function_calls_eh_return)
12491 /* The PC is never poped directly, instead
12492 it is popped into r3 and then BX is used. */
12493 fprintf (f, "}\n");
12495 thumb_exit (f, -1);
12497 return;
12499 else
12501 if (mask & 0xFF)
12502 fprintf (f, ", ");
12504 asm_fprintf (f, "%r", PC_REGNUM);
12508 fprintf (f, "}\n");
12510 if (push && pushed_words && dwarf2out_do_frame ())
12512 char *l = dwarf2out_cfi_label ();
12513 int pushed_mask = real_regs;
12515 *cfa_offset += pushed_words * 4;
12516 dwarf2out_def_cfa (l, SP_REGNUM, *cfa_offset);
12518 pushed_words = 0;
12519 pushed_mask = real_regs;
12520 for (regno = 0; regno <= 14; regno++, pushed_mask >>= 1)
12522 if (pushed_mask & 1)
12523 dwarf2out_reg_save (l, regno, 4 * pushed_words++ - *cfa_offset);
12528 /* Generate code to return from a thumb function.
12529 If 'reg_containing_return_addr' is -1, then the return address is
12530 actually on the stack, at the stack pointer. */
12531 static void
12532 thumb_exit (FILE *f, int reg_containing_return_addr)
12534 unsigned regs_available_for_popping;
12535 unsigned regs_to_pop;
12536 int pops_needed;
12537 unsigned available;
12538 unsigned required;
12539 int mode;
12540 int size;
12541 int restore_a4 = FALSE;
12543 /* Compute the registers we need to pop. */
12544 regs_to_pop = 0;
12545 pops_needed = 0;
12547 if (reg_containing_return_addr == -1)
12549 regs_to_pop |= 1 << LR_REGNUM;
12550 ++pops_needed;
12553 if (TARGET_BACKTRACE)
12555 /* Restore the (ARM) frame pointer and stack pointer. */
12556 regs_to_pop |= (1 << ARM_HARD_FRAME_POINTER_REGNUM) | (1 << SP_REGNUM);
12557 pops_needed += 2;
12560 /* If there is nothing to pop then just emit the BX instruction and
12561 return. */
12562 if (pops_needed == 0)
12564 if (current_function_calls_eh_return)
12565 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
12567 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
12568 return;
12570 /* Otherwise if we are not supporting interworking and we have not created
12571 a backtrace structure and the function was not entered in ARM mode then
12572 just pop the return address straight into the PC. */
12573 else if (!TARGET_INTERWORK
12574 && !TARGET_BACKTRACE
12575 && !is_called_in_ARM_mode (current_function_decl)
12576 && !current_function_calls_eh_return)
12578 asm_fprintf (f, "\tpop\t{%r}\n", PC_REGNUM);
12579 return;
12582 /* Find out how many of the (return) argument registers we can corrupt. */
12583 regs_available_for_popping = 0;
12585 /* If returning via __builtin_eh_return, the bottom three registers
12586 all contain information needed for the return. */
12587 if (current_function_calls_eh_return)
12588 size = 12;
12589 else
12591 /* If we can deduce the registers used from the function's
12592 return value. This is more reliable that examining
12593 regs_ever_live[] because that will be set if the register is
12594 ever used in the function, not just if the register is used
12595 to hold a return value. */
12597 if (current_function_return_rtx != 0)
12598 mode = GET_MODE (current_function_return_rtx);
12599 else
12600 mode = DECL_MODE (DECL_RESULT (current_function_decl));
12602 size = GET_MODE_SIZE (mode);
12604 if (size == 0)
12606 /* In a void function we can use any argument register.
12607 In a function that returns a structure on the stack
12608 we can use the second and third argument registers. */
12609 if (mode == VOIDmode)
12610 regs_available_for_popping =
12611 (1 << ARG_REGISTER (1))
12612 | (1 << ARG_REGISTER (2))
12613 | (1 << ARG_REGISTER (3));
12614 else
12615 regs_available_for_popping =
12616 (1 << ARG_REGISTER (2))
12617 | (1 << ARG_REGISTER (3));
12619 else if (size <= 4)
12620 regs_available_for_popping =
12621 (1 << ARG_REGISTER (2))
12622 | (1 << ARG_REGISTER (3));
12623 else if (size <= 8)
12624 regs_available_for_popping =
12625 (1 << ARG_REGISTER (3));
12628 /* Match registers to be popped with registers into which we pop them. */
12629 for (available = regs_available_for_popping,
12630 required = regs_to_pop;
12631 required != 0 && available != 0;
12632 available &= ~(available & - available),
12633 required &= ~(required & - required))
12634 -- pops_needed;
12636 /* If we have any popping registers left over, remove them. */
12637 if (available > 0)
12638 regs_available_for_popping &= ~available;
12640 /* Otherwise if we need another popping register we can use
12641 the fourth argument register. */
12642 else if (pops_needed)
12644 /* If we have not found any free argument registers and
12645 reg a4 contains the return address, we must move it. */
12646 if (regs_available_for_popping == 0
12647 && reg_containing_return_addr == LAST_ARG_REGNUM)
12649 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
12650 reg_containing_return_addr = LR_REGNUM;
12652 else if (size > 12)
12654 /* Register a4 is being used to hold part of the return value,
12655 but we have dire need of a free, low register. */
12656 restore_a4 = TRUE;
12658 asm_fprintf (f, "\tmov\t%r, %r\n",IP_REGNUM, LAST_ARG_REGNUM);
12661 if (reg_containing_return_addr != LAST_ARG_REGNUM)
12663 /* The fourth argument register is available. */
12664 regs_available_for_popping |= 1 << LAST_ARG_REGNUM;
12666 --pops_needed;
12670 /* Pop as many registers as we can. */
12671 thumb_pushpop (f, regs_available_for_popping, FALSE, NULL,
12672 regs_available_for_popping);
12674 /* Process the registers we popped. */
12675 if (reg_containing_return_addr == -1)
12677 /* The return address was popped into the lowest numbered register. */
12678 regs_to_pop &= ~(1 << LR_REGNUM);
12680 reg_containing_return_addr =
12681 number_of_first_bit_set (regs_available_for_popping);
12683 /* Remove this register for the mask of available registers, so that
12684 the return address will not be corrupted by further pops. */
12685 regs_available_for_popping &= ~(1 << reg_containing_return_addr);
12688 /* If we popped other registers then handle them here. */
12689 if (regs_available_for_popping)
12691 int frame_pointer;
12693 /* Work out which register currently contains the frame pointer. */
12694 frame_pointer = number_of_first_bit_set (regs_available_for_popping);
12696 /* Move it into the correct place. */
12697 asm_fprintf (f, "\tmov\t%r, %r\n",
12698 ARM_HARD_FRAME_POINTER_REGNUM, frame_pointer);
12700 /* (Temporarily) remove it from the mask of popped registers. */
12701 regs_available_for_popping &= ~(1 << frame_pointer);
12702 regs_to_pop &= ~(1 << ARM_HARD_FRAME_POINTER_REGNUM);
12704 if (regs_available_for_popping)
12706 int stack_pointer;
12708 /* We popped the stack pointer as well,
12709 find the register that contains it. */
12710 stack_pointer = number_of_first_bit_set (regs_available_for_popping);
12712 /* Move it into the stack register. */
12713 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, stack_pointer);
12715 /* At this point we have popped all necessary registers, so
12716 do not worry about restoring regs_available_for_popping
12717 to its correct value:
12719 assert (pops_needed == 0)
12720 assert (regs_available_for_popping == (1 << frame_pointer))
12721 assert (regs_to_pop == (1 << STACK_POINTER)) */
12723 else
12725 /* Since we have just move the popped value into the frame
12726 pointer, the popping register is available for reuse, and
12727 we know that we still have the stack pointer left to pop. */
12728 regs_available_for_popping |= (1 << frame_pointer);
12732 /* If we still have registers left on the stack, but we no longer have
12733 any registers into which we can pop them, then we must move the return
12734 address into the link register and make available the register that
12735 contained it. */
12736 if (regs_available_for_popping == 0 && pops_needed > 0)
12738 regs_available_for_popping |= 1 << reg_containing_return_addr;
12740 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM,
12741 reg_containing_return_addr);
12743 reg_containing_return_addr = LR_REGNUM;
12746 /* If we have registers left on the stack then pop some more.
12747 We know that at most we will want to pop FP and SP. */
12748 if (pops_needed > 0)
12750 int popped_into;
12751 int move_to;
12753 thumb_pushpop (f, regs_available_for_popping, FALSE, NULL,
12754 regs_available_for_popping);
12756 /* We have popped either FP or SP.
12757 Move whichever one it is into the correct register. */
12758 popped_into = number_of_first_bit_set (regs_available_for_popping);
12759 move_to = number_of_first_bit_set (regs_to_pop);
12761 asm_fprintf (f, "\tmov\t%r, %r\n", move_to, popped_into);
12763 regs_to_pop &= ~(1 << move_to);
12765 --pops_needed;
12768 /* If we still have not popped everything then we must have only
12769 had one register available to us and we are now popping the SP. */
12770 if (pops_needed > 0)
12772 int popped_into;
12774 thumb_pushpop (f, regs_available_for_popping, FALSE, NULL,
12775 regs_available_for_popping);
12777 popped_into = number_of_first_bit_set (regs_available_for_popping);
12779 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, popped_into);
12781 assert (regs_to_pop == (1 << STACK_POINTER))
12782 assert (pops_needed == 1)
12786 /* If necessary restore the a4 register. */
12787 if (restore_a4)
12789 if (reg_containing_return_addr != LR_REGNUM)
12791 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
12792 reg_containing_return_addr = LR_REGNUM;
12795 asm_fprintf (f, "\tmov\t%r, %r\n", LAST_ARG_REGNUM, IP_REGNUM);
12798 if (current_function_calls_eh_return)
12799 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
12801 /* Return to caller. */
12802 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
12806 void
12807 thumb_final_prescan_insn (rtx insn)
12809 if (flag_print_asm_name)
12810 asm_fprintf (asm_out_file, "%@ 0x%04x\n",
12811 INSN_ADDRESSES (INSN_UID (insn)));
12815 thumb_shiftable_const (unsigned HOST_WIDE_INT val)
12817 unsigned HOST_WIDE_INT mask = 0xff;
12818 int i;
12820 if (val == 0) /* XXX */
12821 return 0;
12823 for (i = 0; i < 25; i++)
12824 if ((val & (mask << i)) == val)
12825 return 1;
12827 return 0;
12830 /* Returns nonzero if the current function contains,
12831 or might contain a far jump. */
12832 static int
12833 thumb_far_jump_used_p (void)
12835 rtx insn;
12837 /* This test is only important for leaf functions. */
12838 /* assert (!leaf_function_p ()); */
12840 /* If we have already decided that far jumps may be used,
12841 do not bother checking again, and always return true even if
12842 it turns out that they are not being used. Once we have made
12843 the decision that far jumps are present (and that hence the link
12844 register will be pushed onto the stack) we cannot go back on it. */
12845 if (cfun->machine->far_jump_used)
12846 return 1;
12848 /* If this function is not being called from the prologue/epilogue
12849 generation code then it must be being called from the
12850 INITIAL_ELIMINATION_OFFSET macro. */
12851 if (!(ARM_DOUBLEWORD_ALIGN || reload_completed))
12853 /* In this case we know that we are being asked about the elimination
12854 of the arg pointer register. If that register is not being used,
12855 then there are no arguments on the stack, and we do not have to
12856 worry that a far jump might force the prologue to push the link
12857 register, changing the stack offsets. In this case we can just
12858 return false, since the presence of far jumps in the function will
12859 not affect stack offsets.
12861 If the arg pointer is live (or if it was live, but has now been
12862 eliminated and so set to dead) then we do have to test to see if
12863 the function might contain a far jump. This test can lead to some
12864 false negatives, since before reload is completed, then length of
12865 branch instructions is not known, so gcc defaults to returning their
12866 longest length, which in turn sets the far jump attribute to true.
12868 A false negative will not result in bad code being generated, but it
12869 will result in a needless push and pop of the link register. We
12870 hope that this does not occur too often.
12872 If we need doubleword stack alignment this could affect the other
12873 elimination offsets so we can't risk getting it wrong. */
12874 if (regs_ever_live [ARG_POINTER_REGNUM])
12875 cfun->machine->arg_pointer_live = 1;
12876 else if (!cfun->machine->arg_pointer_live)
12877 return 0;
12880 /* Check to see if the function contains a branch
12881 insn with the far jump attribute set. */
12882 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
12884 if (GET_CODE (insn) == JUMP_INSN
12885 /* Ignore tablejump patterns. */
12886 && GET_CODE (PATTERN (insn)) != ADDR_VEC
12887 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC
12888 && get_attr_far_jump (insn) == FAR_JUMP_YES
12891 /* Record the fact that we have decided that
12892 the function does use far jumps. */
12893 cfun->machine->far_jump_used = 1;
12894 return 1;
12898 return 0;
12901 /* Return nonzero if FUNC must be entered in ARM mode. */
12903 is_called_in_ARM_mode (tree func)
12905 gcc_assert (TREE_CODE (func) == FUNCTION_DECL);
12907 /* Ignore the problem about functions whose address is taken. */
12908 if (TARGET_CALLEE_INTERWORKING && TREE_PUBLIC (func))
12909 return TRUE;
12911 #ifdef ARM_PE
12912 return lookup_attribute ("interfacearm", DECL_ATTRIBUTES (func)) != NULL_TREE;
12913 #else
12914 return FALSE;
12915 #endif
12918 /* The bits which aren't usefully expanded as rtl. */
12919 const char *
12920 thumb_unexpanded_epilogue (void)
12922 int regno;
12923 unsigned long live_regs_mask = 0;
12924 int high_regs_pushed = 0;
12925 int had_to_push_lr;
12926 int size;
12928 if (return_used_this_function)
12929 return "";
12931 if (IS_NAKED (arm_current_func_type ()))
12932 return "";
12934 live_regs_mask = thumb_compute_save_reg_mask ();
12935 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
12937 /* If we can deduce the registers used from the function's return value.
12938 This is more reliable that examining regs_ever_live[] because that
12939 will be set if the register is ever used in the function, not just if
12940 the register is used to hold a return value. */
12941 size = arm_size_return_regs ();
12943 /* The prolog may have pushed some high registers to use as
12944 work registers. e.g. the testsuite file:
12945 gcc/testsuite/gcc/gcc.c-torture/execute/complex-2.c
12946 compiles to produce:
12947 push {r4, r5, r6, r7, lr}
12948 mov r7, r9
12949 mov r6, r8
12950 push {r6, r7}
12951 as part of the prolog. We have to undo that pushing here. */
12953 if (high_regs_pushed)
12955 unsigned long mask = live_regs_mask & 0xff;
12956 int next_hi_reg;
12958 /* The available low registers depend on the size of the value we are
12959 returning. */
12960 if (size <= 12)
12961 mask |= 1 << 3;
12962 if (size <= 8)
12963 mask |= 1 << 2;
12965 if (mask == 0)
12966 /* Oh dear! We have no low registers into which we can pop
12967 high registers! */
12968 internal_error
12969 ("no low registers available for popping high registers");
12971 for (next_hi_reg = 8; next_hi_reg < 13; next_hi_reg++)
12972 if (live_regs_mask & (1 << next_hi_reg))
12973 break;
12975 while (high_regs_pushed)
12977 /* Find lo register(s) into which the high register(s) can
12978 be popped. */
12979 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
12981 if (mask & (1 << regno))
12982 high_regs_pushed--;
12983 if (high_regs_pushed == 0)
12984 break;
12987 mask &= (2 << regno) - 1; /* A noop if regno == 8 */
12989 /* Pop the values into the low register(s). */
12990 thumb_pushpop (asm_out_file, mask, 0, NULL, mask);
12992 /* Move the value(s) into the high registers. */
12993 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
12995 if (mask & (1 << regno))
12997 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", next_hi_reg,
12998 regno);
13000 for (next_hi_reg++; next_hi_reg < 13; next_hi_reg++)
13001 if (live_regs_mask & (1 << next_hi_reg))
13002 break;
13006 live_regs_mask &= ~0x0f00;
13009 had_to_push_lr = (live_regs_mask & (1 << LR_REGNUM)) != 0;
13010 live_regs_mask &= 0xff;
13012 if (current_function_pretend_args_size == 0 || TARGET_BACKTRACE)
13014 /* Pop the return address into the PC. */
13015 if (had_to_push_lr)
13016 live_regs_mask |= 1 << PC_REGNUM;
13018 /* Either no argument registers were pushed or a backtrace
13019 structure was created which includes an adjusted stack
13020 pointer, so just pop everything. */
13021 if (live_regs_mask)
13022 thumb_pushpop (asm_out_file, live_regs_mask, FALSE, NULL,
13023 live_regs_mask);
13025 /* We have either just popped the return address into the
13026 PC or it is was kept in LR for the entire function. */
13027 if (!had_to_push_lr)
13028 thumb_exit (asm_out_file, LR_REGNUM);
13030 else
13032 /* Pop everything but the return address. */
13033 if (live_regs_mask)
13034 thumb_pushpop (asm_out_file, live_regs_mask, FALSE, NULL,
13035 live_regs_mask);
13037 if (had_to_push_lr)
13039 if (size > 12)
13041 /* We have no free low regs, so save one. */
13042 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", IP_REGNUM,
13043 LAST_ARG_REGNUM);
13046 /* Get the return address into a temporary register. */
13047 thumb_pushpop (asm_out_file, 1 << LAST_ARG_REGNUM, 0, NULL,
13048 1 << LAST_ARG_REGNUM);
13050 if (size > 12)
13052 /* Move the return address to lr. */
13053 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LR_REGNUM,
13054 LAST_ARG_REGNUM);
13055 /* Restore the low register. */
13056 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LAST_ARG_REGNUM,
13057 IP_REGNUM);
13058 regno = LR_REGNUM;
13060 else
13061 regno = LAST_ARG_REGNUM;
13063 else
13064 regno = LR_REGNUM;
13066 /* Remove the argument registers that were pushed onto the stack. */
13067 asm_fprintf (asm_out_file, "\tadd\t%r, %r, #%d\n",
13068 SP_REGNUM, SP_REGNUM,
13069 current_function_pretend_args_size);
13071 thumb_exit (asm_out_file, regno);
13074 return "";
13077 /* Functions to save and restore machine-specific function data. */
13078 static struct machine_function *
13079 arm_init_machine_status (void)
13081 struct machine_function *machine;
13082 machine = (machine_function *) ggc_alloc_cleared (sizeof (machine_function));
13084 #if ARM_FT_UNKNOWN != 0
13085 machine->func_type = ARM_FT_UNKNOWN;
13086 #endif
13087 return machine;
13090 /* Return an RTX indicating where the return address to the
13091 calling function can be found. */
13093 arm_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
13095 if (count != 0)
13096 return NULL_RTX;
13098 return get_hard_reg_initial_val (Pmode, LR_REGNUM);
13101 /* Do anything needed before RTL is emitted for each function. */
13102 void
13103 arm_init_expanders (void)
13105 /* Arrange to initialize and mark the machine per-function status. */
13106 init_machine_status = arm_init_machine_status;
13108 /* This is to stop the combine pass optimizing away the alignment
13109 adjustment of va_arg. */
13110 /* ??? It is claimed that this should not be necessary. */
13111 if (cfun)
13112 mark_reg_pointer (arg_pointer_rtx, PARM_BOUNDARY);
13116 /* Like arm_compute_initial_elimination offset. Simpler because
13117 THUMB_HARD_FRAME_POINTER isn't actually the ABI specified frame pointer. */
13119 HOST_WIDE_INT
13120 thumb_compute_initial_elimination_offset (unsigned int from, unsigned int to)
13122 arm_stack_offsets *offsets;
13124 offsets = arm_get_frame_offsets ();
13126 switch (from)
13128 case ARG_POINTER_REGNUM:
13129 switch (to)
13131 case STACK_POINTER_REGNUM:
13132 return offsets->outgoing_args - offsets->saved_args;
13134 case FRAME_POINTER_REGNUM:
13135 return offsets->soft_frame - offsets->saved_args;
13137 case THUMB_HARD_FRAME_POINTER_REGNUM:
13138 case ARM_HARD_FRAME_POINTER_REGNUM:
13139 return offsets->saved_regs - offsets->saved_args;
13141 default:
13142 gcc_unreachable ();
13144 break;
13146 case FRAME_POINTER_REGNUM:
13147 switch (to)
13149 case STACK_POINTER_REGNUM:
13150 return offsets->outgoing_args - offsets->soft_frame;
13152 case THUMB_HARD_FRAME_POINTER_REGNUM:
13153 case ARM_HARD_FRAME_POINTER_REGNUM:
13154 return offsets->saved_regs - offsets->soft_frame;
13156 default:
13157 gcc_unreachable ();
13159 break;
13161 default:
13162 gcc_unreachable ();
13167 /* Generate the rest of a function's prologue. */
13168 void
13169 thumb_expand_prologue (void)
13171 rtx insn, dwarf;
13173 HOST_WIDE_INT amount;
13174 arm_stack_offsets *offsets;
13175 unsigned long func_type;
13176 int regno;
13177 unsigned long live_regs_mask;
13179 func_type = arm_current_func_type ();
13181 /* Naked functions don't have prologues. */
13182 if (IS_NAKED (func_type))
13183 return;
13185 if (IS_INTERRUPT (func_type))
13187 error ("interrupt Service Routines cannot be coded in Thumb mode");
13188 return;
13191 live_regs_mask = thumb_compute_save_reg_mask ();
13192 /* Load the pic register before setting the frame pointer,
13193 so we can use r7 as a temporary work register. */
13194 if (flag_pic)
13195 arm_load_pic_register (thumb_find_work_register (live_regs_mask));
13197 offsets = arm_get_frame_offsets ();
13199 if (frame_pointer_needed)
13201 insn = emit_insn (gen_movsi (hard_frame_pointer_rtx,
13202 stack_pointer_rtx));
13203 RTX_FRAME_RELATED_P (insn) = 1;
13205 else if (CALLER_INTERWORKING_SLOT_SIZE > 0)
13206 emit_move_insn (gen_rtx_REG (Pmode, ARM_HARD_FRAME_POINTER_REGNUM),
13207 stack_pointer_rtx);
13209 amount = offsets->outgoing_args - offsets->saved_regs;
13210 if (amount)
13212 if (amount < 512)
13214 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
13215 GEN_INT (- amount)));
13216 RTX_FRAME_RELATED_P (insn) = 1;
13218 else
13220 rtx reg;
13222 /* The stack decrement is too big for an immediate value in a single
13223 insn. In theory we could issue multiple subtracts, but after
13224 three of them it becomes more space efficient to place the full
13225 value in the constant pool and load into a register. (Also the
13226 ARM debugger really likes to see only one stack decrement per
13227 function). So instead we look for a scratch register into which
13228 we can load the decrement, and then we subtract this from the
13229 stack pointer. Unfortunately on the thumb the only available
13230 scratch registers are the argument registers, and we cannot use
13231 these as they may hold arguments to the function. Instead we
13232 attempt to locate a call preserved register which is used by this
13233 function. If we can find one, then we know that it will have
13234 been pushed at the start of the prologue and so we can corrupt
13235 it now. */
13236 for (regno = LAST_ARG_REGNUM + 1; regno <= LAST_LO_REGNUM; regno++)
13237 if (live_regs_mask & (1 << regno)
13238 && !(frame_pointer_needed
13239 && (regno == THUMB_HARD_FRAME_POINTER_REGNUM)))
13240 break;
13242 if (regno > LAST_LO_REGNUM) /* Very unlikely. */
13244 rtx spare = gen_rtx_REG (SImode, IP_REGNUM);
13246 /* Choose an arbitrary, non-argument low register. */
13247 reg = gen_rtx_REG (SImode, LAST_LO_REGNUM);
13249 /* Save it by copying it into a high, scratch register. */
13250 emit_insn (gen_movsi (spare, reg));
13251 /* Add a USE to stop propagate_one_insn() from barfing. */
13252 emit_insn (gen_prologue_use (spare));
13254 /* Decrement the stack. */
13255 emit_insn (gen_movsi (reg, GEN_INT (- amount)));
13256 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
13257 stack_pointer_rtx, reg));
13258 RTX_FRAME_RELATED_P (insn) = 1;
13259 dwarf = gen_rtx_SET (SImode, stack_pointer_rtx,
13260 plus_constant (stack_pointer_rtx,
13261 -amount));
13262 RTX_FRAME_RELATED_P (dwarf) = 1;
13263 REG_NOTES (insn)
13264 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, dwarf,
13265 REG_NOTES (insn));
13267 /* Restore the low register's original value. */
13268 emit_insn (gen_movsi (reg, spare));
13270 /* Emit a USE of the restored scratch register, so that flow
13271 analysis will not consider the restore redundant. The
13272 register won't be used again in this function and isn't
13273 restored by the epilogue. */
13274 emit_insn (gen_prologue_use (reg));
13276 else
13278 reg = gen_rtx_REG (SImode, regno);
13280 emit_insn (gen_movsi (reg, GEN_INT (- amount)));
13282 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
13283 stack_pointer_rtx, reg));
13284 RTX_FRAME_RELATED_P (insn) = 1;
13285 dwarf = gen_rtx_SET (SImode, stack_pointer_rtx,
13286 plus_constant (stack_pointer_rtx,
13287 -amount));
13288 RTX_FRAME_RELATED_P (dwarf) = 1;
13289 REG_NOTES (insn)
13290 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, dwarf,
13291 REG_NOTES (insn));
13294 /* If the frame pointer is needed, emit a special barrier that
13295 will prevent the scheduler from moving stores to the frame
13296 before the stack adjustment. */
13297 if (frame_pointer_needed)
13298 emit_insn (gen_stack_tie (stack_pointer_rtx,
13299 hard_frame_pointer_rtx));
13302 if (current_function_profile || !TARGET_SCHED_PROLOG)
13303 emit_insn (gen_blockage ());
13305 cfun->machine->lr_save_eliminated = !thumb_force_lr_save ();
13306 if (live_regs_mask & 0xff)
13307 cfun->machine->lr_save_eliminated = 0;
13309 /* If the link register is being kept alive, with the return address in it,
13310 then make sure that it does not get reused by the ce2 pass. */
13311 if (cfun->machine->lr_save_eliminated)
13312 emit_insn (gen_prologue_use (gen_rtx_REG (SImode, LR_REGNUM)));
13316 void
13317 thumb_expand_epilogue (void)
13319 HOST_WIDE_INT amount;
13320 arm_stack_offsets *offsets;
13321 int regno;
13323 /* Naked functions don't have prologues. */
13324 if (IS_NAKED (arm_current_func_type ()))
13325 return;
13327 offsets = arm_get_frame_offsets ();
13328 amount = offsets->outgoing_args - offsets->saved_regs;
13330 if (frame_pointer_needed)
13331 emit_insn (gen_movsi (stack_pointer_rtx, hard_frame_pointer_rtx));
13332 else if (amount)
13334 if (amount < 512)
13335 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
13336 GEN_INT (amount)));
13337 else
13339 /* r3 is always free in the epilogue. */
13340 rtx reg = gen_rtx_REG (SImode, LAST_ARG_REGNUM);
13342 emit_insn (gen_movsi (reg, GEN_INT (amount)));
13343 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, reg));
13347 /* Emit a USE (stack_pointer_rtx), so that
13348 the stack adjustment will not be deleted. */
13349 emit_insn (gen_prologue_use (stack_pointer_rtx));
13351 if (current_function_profile || !TARGET_SCHED_PROLOG)
13352 emit_insn (gen_blockage ());
13354 /* Emit a clobber for each insn that will be restored in the epilogue,
13355 so that flow2 will get register lifetimes correct. */
13356 for (regno = 0; regno < 13; regno++)
13357 if (regs_ever_live[regno] && !call_used_regs[regno])
13358 emit_insn (gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, regno)));
13360 if (! regs_ever_live[LR_REGNUM])
13361 emit_insn (gen_rtx_USE (VOIDmode, gen_rtx_REG (SImode, LR_REGNUM)));
13364 static void
13365 thumb_output_function_prologue (FILE *f, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
13367 unsigned long live_regs_mask = 0;
13368 unsigned long l_mask;
13369 unsigned high_regs_pushed = 0;
13370 int cfa_offset = 0;
13371 int regno;
13373 if (IS_NAKED (arm_current_func_type ()))
13374 return;
13376 if (is_called_in_ARM_mode (current_function_decl))
13378 const char * name;
13380 gcc_assert (GET_CODE (DECL_RTL (current_function_decl)) == MEM);
13381 gcc_assert (GET_CODE (XEXP (DECL_RTL (current_function_decl), 0))
13382 == SYMBOL_REF);
13383 name = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
13385 /* Generate code sequence to switch us into Thumb mode. */
13386 /* The .code 32 directive has already been emitted by
13387 ASM_DECLARE_FUNCTION_NAME. */
13388 asm_fprintf (f, "\torr\t%r, %r, #1\n", IP_REGNUM, PC_REGNUM);
13389 asm_fprintf (f, "\tbx\t%r\n", IP_REGNUM);
13391 /* Generate a label, so that the debugger will notice the
13392 change in instruction sets. This label is also used by
13393 the assembler to bypass the ARM code when this function
13394 is called from a Thumb encoded function elsewhere in the
13395 same file. Hence the definition of STUB_NAME here must
13396 agree with the definition in gas/config/tc-arm.c. */
13398 #define STUB_NAME ".real_start_of"
13400 fprintf (f, "\t.code\t16\n");
13401 #ifdef ARM_PE
13402 if (arm_dllexport_name_p (name))
13403 name = arm_strip_name_encoding (name);
13404 #endif
13405 asm_fprintf (f, "\t.globl %s%U%s\n", STUB_NAME, name);
13406 fprintf (f, "\t.thumb_func\n");
13407 asm_fprintf (f, "%s%U%s:\n", STUB_NAME, name);
13410 if (current_function_pretend_args_size)
13412 if (cfun->machine->uses_anonymous_args)
13414 int num_pushes;
13416 fprintf (f, "\tpush\t{");
13418 num_pushes = ARM_NUM_INTS (current_function_pretend_args_size);
13420 for (regno = LAST_ARG_REGNUM + 1 - num_pushes;
13421 regno <= LAST_ARG_REGNUM;
13422 regno++)
13423 asm_fprintf (f, "%r%s", regno,
13424 regno == LAST_ARG_REGNUM ? "" : ", ");
13426 fprintf (f, "}\n");
13428 else
13429 asm_fprintf (f, "\tsub\t%r, %r, #%d\n",
13430 SP_REGNUM, SP_REGNUM,
13431 current_function_pretend_args_size);
13433 /* We don't need to record the stores for unwinding (would it
13434 help the debugger any if we did?), but record the change in
13435 the stack pointer. */
13436 if (dwarf2out_do_frame ())
13438 char *l = dwarf2out_cfi_label ();
13440 cfa_offset = cfa_offset + current_function_pretend_args_size;
13441 dwarf2out_def_cfa (l, SP_REGNUM, cfa_offset);
13445 /* Get the registers we are going to push. */
13446 live_regs_mask = thumb_compute_save_reg_mask ();
13447 /* Extract a mask of the ones we can give to the Thumb's push instruction. */
13448 l_mask = live_regs_mask & 0x40ff;
13449 /* Then count how many other high registers will need to be pushed. */
13450 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
13452 if (TARGET_BACKTRACE)
13454 unsigned offset;
13455 unsigned work_register;
13457 /* We have been asked to create a stack backtrace structure.
13458 The code looks like this:
13460 0 .align 2
13461 0 func:
13462 0 sub SP, #16 Reserve space for 4 registers.
13463 2 push {R7} Push low registers.
13464 4 add R7, SP, #20 Get the stack pointer before the push.
13465 6 str R7, [SP, #8] Store the stack pointer (before reserving the space).
13466 8 mov R7, PC Get hold of the start of this code plus 12.
13467 10 str R7, [SP, #16] Store it.
13468 12 mov R7, FP Get hold of the current frame pointer.
13469 14 str R7, [SP, #4] Store it.
13470 16 mov R7, LR Get hold of the current return address.
13471 18 str R7, [SP, #12] Store it.
13472 20 add R7, SP, #16 Point at the start of the backtrace structure.
13473 22 mov FP, R7 Put this value into the frame pointer. */
13475 work_register = thumb_find_work_register (live_regs_mask);
13477 asm_fprintf
13478 (f, "\tsub\t%r, %r, #16\t%@ Create stack backtrace structure\n",
13479 SP_REGNUM, SP_REGNUM);
13481 if (dwarf2out_do_frame ())
13483 char *l = dwarf2out_cfi_label ();
13485 cfa_offset = cfa_offset + 16;
13486 dwarf2out_def_cfa (l, SP_REGNUM, cfa_offset);
13489 if (l_mask)
13491 thumb_pushpop (f, l_mask, 1, &cfa_offset, l_mask);
13492 offset = bit_count (l_mask);
13494 else
13495 offset = 0;
13497 asm_fprintf (f, "\tadd\t%r, %r, #%d\n", work_register, SP_REGNUM,
13498 offset + 16 + current_function_pretend_args_size);
13500 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
13501 offset + 4);
13503 /* Make sure that the instruction fetching the PC is in the right place
13504 to calculate "start of backtrace creation code + 12". */
13505 if (l_mask)
13507 asm_fprintf (f, "\tmov\t%r, %r\n", work_register, PC_REGNUM);
13508 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
13509 offset + 12);
13510 asm_fprintf (f, "\tmov\t%r, %r\n", work_register,
13511 ARM_HARD_FRAME_POINTER_REGNUM);
13512 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
13513 offset);
13515 else
13517 asm_fprintf (f, "\tmov\t%r, %r\n", work_register,
13518 ARM_HARD_FRAME_POINTER_REGNUM);
13519 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
13520 offset);
13521 asm_fprintf (f, "\tmov\t%r, %r\n", work_register, PC_REGNUM);
13522 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
13523 offset + 12);
13526 asm_fprintf (f, "\tmov\t%r, %r\n", work_register, LR_REGNUM);
13527 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
13528 offset + 8);
13529 asm_fprintf (f, "\tadd\t%r, %r, #%d\n", work_register, SP_REGNUM,
13530 offset + 12);
13531 asm_fprintf (f, "\tmov\t%r, %r\t\t%@ Backtrace structure created\n",
13532 ARM_HARD_FRAME_POINTER_REGNUM, work_register);
13534 /* Optimisation: If we are not pushing any low registers but we are going
13535 to push some high registers then delay our first push. This will just
13536 be a push of LR and we can combine it with the push of the first high
13537 register. */
13538 else if ((l_mask & 0xff) != 0
13539 || (high_regs_pushed == 0 && l_mask))
13540 thumb_pushpop (f, l_mask, 1, &cfa_offset, l_mask);
13542 if (high_regs_pushed)
13544 unsigned pushable_regs;
13545 unsigned next_hi_reg;
13547 for (next_hi_reg = 12; next_hi_reg > LAST_LO_REGNUM; next_hi_reg--)
13548 if (live_regs_mask & (1 << next_hi_reg))
13549 break;
13551 pushable_regs = l_mask & 0xff;
13553 if (pushable_regs == 0)
13554 pushable_regs = 1 << thumb_find_work_register (live_regs_mask);
13556 while (high_regs_pushed > 0)
13558 unsigned long real_regs_mask = 0;
13560 for (regno = LAST_LO_REGNUM; regno >= 0; regno --)
13562 if (pushable_regs & (1 << regno))
13564 asm_fprintf (f, "\tmov\t%r, %r\n", regno, next_hi_reg);
13566 high_regs_pushed --;
13567 real_regs_mask |= (1 << next_hi_reg);
13569 if (high_regs_pushed)
13571 for (next_hi_reg --; next_hi_reg > LAST_LO_REGNUM;
13572 next_hi_reg --)
13573 if (live_regs_mask & (1 << next_hi_reg))
13574 break;
13576 else
13578 pushable_regs &= ~((1 << regno) - 1);
13579 break;
13584 /* If we had to find a work register and we have not yet
13585 saved the LR then add it to the list of regs to push. */
13586 if (l_mask == (1 << LR_REGNUM))
13588 thumb_pushpop (f, pushable_regs | (1 << LR_REGNUM),
13589 1, &cfa_offset,
13590 real_regs_mask | (1 << LR_REGNUM));
13591 l_mask = 0;
13593 else
13594 thumb_pushpop (f, pushable_regs, 1, &cfa_offset, real_regs_mask);
13599 /* Handle the case of a double word load into a low register from
13600 a computed memory address. The computed address may involve a
13601 register which is overwritten by the load. */
13602 const char *
13603 thumb_load_double_from_address (rtx *operands)
13605 rtx addr;
13606 rtx base;
13607 rtx offset;
13608 rtx arg1;
13609 rtx arg2;
13611 gcc_assert (GET_CODE (operands[0]) == REG);
13612 gcc_assert (GET_CODE (operands[1]) == MEM);
13614 /* Get the memory address. */
13615 addr = XEXP (operands[1], 0);
13617 /* Work out how the memory address is computed. */
13618 switch (GET_CODE (addr))
13620 case REG:
13621 operands[2] = gen_rtx_MEM (SImode,
13622 plus_constant (XEXP (operands[1], 0), 4));
13624 if (REGNO (operands[0]) == REGNO (addr))
13626 output_asm_insn ("ldr\t%H0, %2", operands);
13627 output_asm_insn ("ldr\t%0, %1", operands);
13629 else
13631 output_asm_insn ("ldr\t%0, %1", operands);
13632 output_asm_insn ("ldr\t%H0, %2", operands);
13634 break;
13636 case CONST:
13637 /* Compute <address> + 4 for the high order load. */
13638 operands[2] = gen_rtx_MEM (SImode,
13639 plus_constant (XEXP (operands[1], 0), 4));
13641 output_asm_insn ("ldr\t%0, %1", operands);
13642 output_asm_insn ("ldr\t%H0, %2", operands);
13643 break;
13645 case PLUS:
13646 arg1 = XEXP (addr, 0);
13647 arg2 = XEXP (addr, 1);
13649 if (CONSTANT_P (arg1))
13650 base = arg2, offset = arg1;
13651 else
13652 base = arg1, offset = arg2;
13654 gcc_assert (GET_CODE (base) == REG);
13656 /* Catch the case of <address> = <reg> + <reg> */
13657 if (GET_CODE (offset) == REG)
13659 int reg_offset = REGNO (offset);
13660 int reg_base = REGNO (base);
13661 int reg_dest = REGNO (operands[0]);
13663 /* Add the base and offset registers together into the
13664 higher destination register. */
13665 asm_fprintf (asm_out_file, "\tadd\t%r, %r, %r",
13666 reg_dest + 1, reg_base, reg_offset);
13668 /* Load the lower destination register from the address in
13669 the higher destination register. */
13670 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #0]",
13671 reg_dest, reg_dest + 1);
13673 /* Load the higher destination register from its own address
13674 plus 4. */
13675 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #4]",
13676 reg_dest + 1, reg_dest + 1);
13678 else
13680 /* Compute <address> + 4 for the high order load. */
13681 operands[2] = gen_rtx_MEM (SImode,
13682 plus_constant (XEXP (operands[1], 0), 4));
13684 /* If the computed address is held in the low order register
13685 then load the high order register first, otherwise always
13686 load the low order register first. */
13687 if (REGNO (operands[0]) == REGNO (base))
13689 output_asm_insn ("ldr\t%H0, %2", operands);
13690 output_asm_insn ("ldr\t%0, %1", operands);
13692 else
13694 output_asm_insn ("ldr\t%0, %1", operands);
13695 output_asm_insn ("ldr\t%H0, %2", operands);
13698 break;
13700 case LABEL_REF:
13701 /* With no registers to worry about we can just load the value
13702 directly. */
13703 operands[2] = gen_rtx_MEM (SImode,
13704 plus_constant (XEXP (operands[1], 0), 4));
13706 output_asm_insn ("ldr\t%H0, %2", operands);
13707 output_asm_insn ("ldr\t%0, %1", operands);
13708 break;
13710 default:
13711 gcc_unreachable ();
13714 return "";
13717 const char *
13718 thumb_output_move_mem_multiple (int n, rtx *operands)
13720 rtx tmp;
13722 switch (n)
13724 case 2:
13725 if (REGNO (operands[4]) > REGNO (operands[5]))
13727 tmp = operands[4];
13728 operands[4] = operands[5];
13729 operands[5] = tmp;
13731 output_asm_insn ("ldmia\t%1!, {%4, %5}", operands);
13732 output_asm_insn ("stmia\t%0!, {%4, %5}", operands);
13733 break;
13735 case 3:
13736 if (REGNO (operands[4]) > REGNO (operands[5]))
13738 tmp = operands[4];
13739 operands[4] = operands[5];
13740 operands[5] = tmp;
13742 if (REGNO (operands[5]) > REGNO (operands[6]))
13744 tmp = operands[5];
13745 operands[5] = operands[6];
13746 operands[6] = tmp;
13748 if (REGNO (operands[4]) > REGNO (operands[5]))
13750 tmp = operands[4];
13751 operands[4] = operands[5];
13752 operands[5] = tmp;
13755 output_asm_insn ("ldmia\t%1!, {%4, %5, %6}", operands);
13756 output_asm_insn ("stmia\t%0!, {%4, %5, %6}", operands);
13757 break;
13759 default:
13760 gcc_unreachable ();
13763 return "";
13766 /* Output a call-via instruction for thumb state. */
13767 const char *
13768 thumb_call_via_reg (rtx reg)
13770 int regno = REGNO (reg);
13771 rtx *labelp;
13773 gcc_assert (regno < LR_REGNUM);
13775 /* If we are in the normal text section we can use a single instance
13776 per compilation unit. If we are doing function sections, then we need
13777 an entry per section, since we can't rely on reachability. */
13778 if (in_text_section ())
13780 thumb_call_reg_needed = 1;
13782 if (thumb_call_via_label[regno] == NULL)
13783 thumb_call_via_label[regno] = gen_label_rtx ();
13784 labelp = thumb_call_via_label + regno;
13786 else
13788 if (cfun->machine->call_via[regno] == NULL)
13789 cfun->machine->call_via[regno] = gen_label_rtx ();
13790 labelp = cfun->machine->call_via + regno;
13793 output_asm_insn ("bl\t%a0", labelp);
13794 return "";
13797 /* Routines for generating rtl. */
13798 void
13799 thumb_expand_movmemqi (rtx *operands)
13801 rtx out = copy_to_mode_reg (SImode, XEXP (operands[0], 0));
13802 rtx in = copy_to_mode_reg (SImode, XEXP (operands[1], 0));
13803 HOST_WIDE_INT len = INTVAL (operands[2]);
13804 HOST_WIDE_INT offset = 0;
13806 while (len >= 12)
13808 emit_insn (gen_movmem12b (out, in, out, in));
13809 len -= 12;
13812 if (len >= 8)
13814 emit_insn (gen_movmem8b (out, in, out, in));
13815 len -= 8;
13818 if (len >= 4)
13820 rtx reg = gen_reg_rtx (SImode);
13821 emit_insn (gen_movsi (reg, gen_rtx_MEM (SImode, in)));
13822 emit_insn (gen_movsi (gen_rtx_MEM (SImode, out), reg));
13823 len -= 4;
13824 offset += 4;
13827 if (len >= 2)
13829 rtx reg = gen_reg_rtx (HImode);
13830 emit_insn (gen_movhi (reg, gen_rtx_MEM (HImode,
13831 plus_constant (in, offset))));
13832 emit_insn (gen_movhi (gen_rtx_MEM (HImode, plus_constant (out, offset)),
13833 reg));
13834 len -= 2;
13835 offset += 2;
13838 if (len)
13840 rtx reg = gen_reg_rtx (QImode);
13841 emit_insn (gen_movqi (reg, gen_rtx_MEM (QImode,
13842 plus_constant (in, offset))));
13843 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (out, offset)),
13844 reg));
13848 void
13849 thumb_reload_out_hi (rtx *operands)
13851 emit_insn (gen_thumb_movhi_clobber (operands[0], operands[1], operands[2]));
13854 /* Handle reading a half-word from memory during reload. */
13855 void
13856 thumb_reload_in_hi (rtx *operands ATTRIBUTE_UNUSED)
13858 gcc_unreachable ();
13861 /* Return the length of a function name prefix
13862 that starts with the character 'c'. */
13863 static int
13864 arm_get_strip_length (int c)
13866 switch (c)
13868 ARM_NAME_ENCODING_LENGTHS
13869 default: return 0;
13873 /* Return a pointer to a function's name with any
13874 and all prefix encodings stripped from it. */
13875 const char *
13876 arm_strip_name_encoding (const char *name)
13878 int skip;
13880 while ((skip = arm_get_strip_length (* name)))
13881 name += skip;
13883 return name;
13886 /* If there is a '*' anywhere in the name's prefix, then
13887 emit the stripped name verbatim, otherwise prepend an
13888 underscore if leading underscores are being used. */
13889 void
13890 arm_asm_output_labelref (FILE *stream, const char *name)
13892 int skip;
13893 int verbatim = 0;
13895 while ((skip = arm_get_strip_length (* name)))
13897 verbatim |= (*name == '*');
13898 name += skip;
13901 if (verbatim)
13902 fputs (name, stream);
13903 else
13904 asm_fprintf (stream, "%U%s", name);
13907 static void
13908 arm_file_end (void)
13910 int regno;
13912 if (! thumb_call_reg_needed)
13913 return;
13915 text_section ();
13916 asm_fprintf (asm_out_file, "\t.code 16\n");
13917 ASM_OUTPUT_ALIGN (asm_out_file, 1);
13919 for (regno = 0; regno < LR_REGNUM; regno++)
13921 rtx label = thumb_call_via_label[regno];
13923 if (label != 0)
13925 targetm.asm_out.internal_label (asm_out_file, "L",
13926 CODE_LABEL_NUMBER (label));
13927 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
13932 rtx aof_pic_label;
13934 #ifdef AOF_ASSEMBLER
13935 /* Special functions only needed when producing AOF syntax assembler. */
13937 struct pic_chain
13939 struct pic_chain * next;
13940 const char * symname;
13943 static struct pic_chain * aof_pic_chain = NULL;
13946 aof_pic_entry (rtx x)
13948 struct pic_chain ** chainp;
13949 int offset;
13951 if (aof_pic_label == NULL_RTX)
13953 aof_pic_label = gen_rtx_SYMBOL_REF (Pmode, "x$adcons");
13956 for (offset = 0, chainp = &aof_pic_chain; *chainp;
13957 offset += 4, chainp = &(*chainp)->next)
13958 if ((*chainp)->symname == XSTR (x, 0))
13959 return plus_constant (aof_pic_label, offset);
13961 *chainp = (struct pic_chain *) xmalloc (sizeof (struct pic_chain));
13962 (*chainp)->next = NULL;
13963 (*chainp)->symname = XSTR (x, 0);
13964 return plus_constant (aof_pic_label, offset);
13967 void
13968 aof_dump_pic_table (FILE *f)
13970 struct pic_chain * chain;
13972 if (aof_pic_chain == NULL)
13973 return;
13975 asm_fprintf (f, "\tAREA |%r$$adcons|, BASED %r\n",
13976 PIC_OFFSET_TABLE_REGNUM,
13977 PIC_OFFSET_TABLE_REGNUM);
13978 fputs ("|x$adcons|\n", f);
13980 for (chain = aof_pic_chain; chain; chain = chain->next)
13982 fputs ("\tDCD\t", f);
13983 assemble_name (f, chain->symname);
13984 fputs ("\n", f);
13988 int arm_text_section_count = 1;
13990 char *
13991 aof_text_section (void )
13993 static char buf[100];
13994 sprintf (buf, "\tAREA |C$$code%d|, CODE, READONLY",
13995 arm_text_section_count++);
13996 if (flag_pic)
13997 strcat (buf, ", PIC, REENTRANT");
13998 return buf;
14001 static int arm_data_section_count = 1;
14003 char *
14004 aof_data_section (void)
14006 static char buf[100];
14007 sprintf (buf, "\tAREA |C$$data%d|, DATA", arm_data_section_count++);
14008 return buf;
14011 /* The AOF assembler is religiously strict about declarations of
14012 imported and exported symbols, so that it is impossible to declare
14013 a function as imported near the beginning of the file, and then to
14014 export it later on. It is, however, possible to delay the decision
14015 until all the functions in the file have been compiled. To get
14016 around this, we maintain a list of the imports and exports, and
14017 delete from it any that are subsequently defined. At the end of
14018 compilation we spit the remainder of the list out before the END
14019 directive. */
14021 struct import
14023 struct import * next;
14024 const char * name;
14027 static struct import * imports_list = NULL;
14029 void
14030 aof_add_import (const char *name)
14032 struct import * new;
14034 for (new = imports_list; new; new = new->next)
14035 if (new->name == name)
14036 return;
14038 new = (struct import *) xmalloc (sizeof (struct import));
14039 new->next = imports_list;
14040 imports_list = new;
14041 new->name = name;
14044 void
14045 aof_delete_import (const char *name)
14047 struct import ** old;
14049 for (old = &imports_list; *old; old = & (*old)->next)
14051 if ((*old)->name == name)
14053 *old = (*old)->next;
14054 return;
14059 int arm_main_function = 0;
14061 static void
14062 aof_dump_imports (FILE *f)
14064 /* The AOF assembler needs this to cause the startup code to be extracted
14065 from the library. Brining in __main causes the whole thing to work
14066 automagically. */
14067 if (arm_main_function)
14069 text_section ();
14070 fputs ("\tIMPORT __main\n", f);
14071 fputs ("\tDCD __main\n", f);
14074 /* Now dump the remaining imports. */
14075 while (imports_list)
14077 fprintf (f, "\tIMPORT\t");
14078 assemble_name (f, imports_list->name);
14079 fputc ('\n', f);
14080 imports_list = imports_list->next;
14084 static void
14085 aof_globalize_label (FILE *stream, const char *name)
14087 default_globalize_label (stream, name);
14088 if (! strcmp (name, "main"))
14089 arm_main_function = 1;
14092 static void
14093 aof_file_start (void)
14095 fputs ("__r0\tRN\t0\n", asm_out_file);
14096 fputs ("__a1\tRN\t0\n", asm_out_file);
14097 fputs ("__a2\tRN\t1\n", asm_out_file);
14098 fputs ("__a3\tRN\t2\n", asm_out_file);
14099 fputs ("__a4\tRN\t3\n", asm_out_file);
14100 fputs ("__v1\tRN\t4\n", asm_out_file);
14101 fputs ("__v2\tRN\t5\n", asm_out_file);
14102 fputs ("__v3\tRN\t6\n", asm_out_file);
14103 fputs ("__v4\tRN\t7\n", asm_out_file);
14104 fputs ("__v5\tRN\t8\n", asm_out_file);
14105 fputs ("__v6\tRN\t9\n", asm_out_file);
14106 fputs ("__sl\tRN\t10\n", asm_out_file);
14107 fputs ("__fp\tRN\t11\n", asm_out_file);
14108 fputs ("__ip\tRN\t12\n", asm_out_file);
14109 fputs ("__sp\tRN\t13\n", asm_out_file);
14110 fputs ("__lr\tRN\t14\n", asm_out_file);
14111 fputs ("__pc\tRN\t15\n", asm_out_file);
14112 fputs ("__f0\tFN\t0\n", asm_out_file);
14113 fputs ("__f1\tFN\t1\n", asm_out_file);
14114 fputs ("__f2\tFN\t2\n", asm_out_file);
14115 fputs ("__f3\tFN\t3\n", asm_out_file);
14116 fputs ("__f4\tFN\t4\n", asm_out_file);
14117 fputs ("__f5\tFN\t5\n", asm_out_file);
14118 fputs ("__f6\tFN\t6\n", asm_out_file);
14119 fputs ("__f7\tFN\t7\n", asm_out_file);
14120 text_section ();
14123 static void
14124 aof_file_end (void)
14126 if (flag_pic)
14127 aof_dump_pic_table (asm_out_file);
14128 arm_file_end ();
14129 aof_dump_imports (asm_out_file);
14130 fputs ("\tEND\n", asm_out_file);
14132 #endif /* AOF_ASSEMBLER */
14134 #ifndef ARM_PE
14135 /* Symbols in the text segment can be accessed without indirecting via the
14136 constant pool; it may take an extra binary operation, but this is still
14137 faster than indirecting via memory. Don't do this when not optimizing,
14138 since we won't be calculating al of the offsets necessary to do this
14139 simplification. */
14141 static void
14142 arm_encode_section_info (tree decl, rtx rtl, int first)
14144 /* This doesn't work with AOF syntax, since the string table may be in
14145 a different AREA. */
14146 #ifndef AOF_ASSEMBLER
14147 if (optimize > 0 && TREE_CONSTANT (decl))
14148 SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1;
14149 #endif
14151 /* If we are referencing a function that is weak then encode a long call
14152 flag in the function name, otherwise if the function is static or
14153 or known to be defined in this file then encode a short call flag. */
14154 if (first && DECL_P (decl))
14156 if (TREE_CODE (decl) == FUNCTION_DECL && DECL_WEAK (decl))
14157 arm_encode_call_attribute (decl, LONG_CALL_FLAG_CHAR);
14158 else if (! TREE_PUBLIC (decl))
14159 arm_encode_call_attribute (decl, SHORT_CALL_FLAG_CHAR);
14162 #endif /* !ARM_PE */
14164 static void
14165 arm_internal_label (FILE *stream, const char *prefix, unsigned long labelno)
14167 if (arm_ccfsm_state == 3 && (unsigned) arm_target_label == labelno
14168 && !strcmp (prefix, "L"))
14170 arm_ccfsm_state = 0;
14171 arm_target_insn = NULL;
14173 default_internal_label (stream, prefix, labelno);
14176 /* Output code to add DELTA to the first argument, and then jump
14177 to FUNCTION. Used for C++ multiple inheritance. */
14178 static void
14179 arm_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
14180 HOST_WIDE_INT delta,
14181 HOST_WIDE_INT vcall_offset ATTRIBUTE_UNUSED,
14182 tree function)
14184 static int thunk_label = 0;
14185 char label[256];
14186 int mi_delta = delta;
14187 const char *const mi_op = mi_delta < 0 ? "sub" : "add";
14188 int shift = 0;
14189 int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function)
14190 ? 1 : 0);
14191 if (mi_delta < 0)
14192 mi_delta = - mi_delta;
14193 if (TARGET_THUMB)
14195 int labelno = thunk_label++;
14196 ASM_GENERATE_INTERNAL_LABEL (label, "LTHUMBFUNC", labelno);
14197 fputs ("\tldr\tr12, ", file);
14198 assemble_name (file, label);
14199 fputc ('\n', file);
14201 while (mi_delta != 0)
14203 if ((mi_delta & (3 << shift)) == 0)
14204 shift += 2;
14205 else
14207 asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
14208 mi_op, this_regno, this_regno,
14209 mi_delta & (0xff << shift));
14210 mi_delta &= ~(0xff << shift);
14211 shift += 8;
14214 if (TARGET_THUMB)
14216 fprintf (file, "\tbx\tr12\n");
14217 ASM_OUTPUT_ALIGN (file, 2);
14218 assemble_name (file, label);
14219 fputs (":\n", file);
14220 assemble_integer (XEXP (DECL_RTL (function), 0), 4, BITS_PER_WORD, 1);
14222 else
14224 fputs ("\tb\t", file);
14225 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
14226 if (NEED_PLT_RELOC)
14227 fputs ("(PLT)", file);
14228 fputc ('\n', file);
14233 arm_emit_vector_const (FILE *file, rtx x)
14235 int i;
14236 const char * pattern;
14238 gcc_assert (GET_CODE (x) == CONST_VECTOR);
14240 switch (GET_MODE (x))
14242 case V2SImode: pattern = "%08x"; break;
14243 case V4HImode: pattern = "%04x"; break;
14244 case V8QImode: pattern = "%02x"; break;
14245 default: gcc_unreachable ();
14248 fprintf (file, "0x");
14249 for (i = CONST_VECTOR_NUNITS (x); i--;)
14251 rtx element;
14253 element = CONST_VECTOR_ELT (x, i);
14254 fprintf (file, pattern, INTVAL (element));
14257 return 1;
14260 const char *
14261 arm_output_load_gr (rtx *operands)
14263 rtx reg;
14264 rtx offset;
14265 rtx wcgr;
14266 rtx sum;
14268 if (GET_CODE (operands [1]) != MEM
14269 || GET_CODE (sum = XEXP (operands [1], 0)) != PLUS
14270 || GET_CODE (reg = XEXP (sum, 0)) != REG
14271 || GET_CODE (offset = XEXP (sum, 1)) != CONST_INT
14272 || ((INTVAL (offset) < 1024) && (INTVAL (offset) > -1024)))
14273 return "wldrw%?\t%0, %1";
14275 /* Fix up an out-of-range load of a GR register. */
14276 output_asm_insn ("str%?\t%0, [sp, #-4]!\t@ Start of GR load expansion", & reg);
14277 wcgr = operands[0];
14278 operands[0] = reg;
14279 output_asm_insn ("ldr%?\t%0, %1", operands);
14281 operands[0] = wcgr;
14282 operands[1] = reg;
14283 output_asm_insn ("tmcr%?\t%0, %1", operands);
14284 output_asm_insn ("ldr%?\t%0, [sp], #4\t@ End of GR load expansion", & reg);
14286 return "";
14289 static rtx
14290 arm_struct_value_rtx (tree fntype ATTRIBUTE_UNUSED,
14291 int incoming ATTRIBUTE_UNUSED)
14293 #if 0
14294 /* FIXME: The ARM backend has special code to handle structure
14295 returns, and will reserve its own hidden first argument. So
14296 if this macro is enabled a *second* hidden argument will be
14297 reserved, which will break binary compatibility with old
14298 toolchains and also thunk handling. One day this should be
14299 fixed. */
14300 return 0;
14301 #else
14302 /* Register in which address to store a structure value
14303 is passed to a function. */
14304 return gen_rtx_REG (Pmode, ARG_REGISTER (1));
14305 #endif
14308 /* Worker function for TARGET_SETUP_INCOMING_VARARGS.
14310 On the ARM, PRETEND_SIZE is set in order to have the prologue push the last
14311 named arg and all anonymous args onto the stack.
14312 XXX I know the prologue shouldn't be pushing registers, but it is faster
14313 that way. */
14315 static void
14316 arm_setup_incoming_varargs (CUMULATIVE_ARGS *cum,
14317 enum machine_mode mode ATTRIBUTE_UNUSED,
14318 tree type ATTRIBUTE_UNUSED,
14319 int *pretend_size,
14320 int second_time ATTRIBUTE_UNUSED)
14322 cfun->machine->uses_anonymous_args = 1;
14323 if (cum->nregs < NUM_ARG_REGS)
14324 *pretend_size = (NUM_ARG_REGS - cum->nregs) * UNITS_PER_WORD;
14327 /* Return nonzero if the CONSUMER instruction (a store) does not need
14328 PRODUCER's value to calculate the address. */
14331 arm_no_early_store_addr_dep (rtx producer, rtx consumer)
14333 rtx value = PATTERN (producer);
14334 rtx addr = PATTERN (consumer);
14336 if (GET_CODE (value) == COND_EXEC)
14337 value = COND_EXEC_CODE (value);
14338 if (GET_CODE (value) == PARALLEL)
14339 value = XVECEXP (value, 0, 0);
14340 value = XEXP (value, 0);
14341 if (GET_CODE (addr) == COND_EXEC)
14342 addr = COND_EXEC_CODE (addr);
14343 if (GET_CODE (addr) == PARALLEL)
14344 addr = XVECEXP (addr, 0, 0);
14345 addr = XEXP (addr, 0);
14347 return !reg_overlap_mentioned_p (value, addr);
14350 /* Return nonzero if the CONSUMER instruction (an ALU op) does not
14351 have an early register shift value or amount dependency on the
14352 result of PRODUCER. */
14355 arm_no_early_alu_shift_dep (rtx producer, rtx consumer)
14357 rtx value = PATTERN (producer);
14358 rtx op = PATTERN (consumer);
14359 rtx early_op;
14361 if (GET_CODE (value) == COND_EXEC)
14362 value = COND_EXEC_CODE (value);
14363 if (GET_CODE (value) == PARALLEL)
14364 value = XVECEXP (value, 0, 0);
14365 value = XEXP (value, 0);
14366 if (GET_CODE (op) == COND_EXEC)
14367 op = COND_EXEC_CODE (op);
14368 if (GET_CODE (op) == PARALLEL)
14369 op = XVECEXP (op, 0, 0);
14370 op = XEXP (op, 1);
14372 early_op = XEXP (op, 0);
14373 /* This is either an actual independent shift, or a shift applied to
14374 the first operand of another operation. We want the whole shift
14375 operation. */
14376 if (GET_CODE (early_op) == REG)
14377 early_op = op;
14379 return !reg_overlap_mentioned_p (value, early_op);
14382 /* Return nonzero if the CONSUMER instruction (an ALU op) does not
14383 have an early register shift value dependency on the result of
14384 PRODUCER. */
14387 arm_no_early_alu_shift_value_dep (rtx producer, rtx consumer)
14389 rtx value = PATTERN (producer);
14390 rtx op = PATTERN (consumer);
14391 rtx early_op;
14393 if (GET_CODE (value) == COND_EXEC)
14394 value = COND_EXEC_CODE (value);
14395 if (GET_CODE (value) == PARALLEL)
14396 value = XVECEXP (value, 0, 0);
14397 value = XEXP (value, 0);
14398 if (GET_CODE (op) == COND_EXEC)
14399 op = COND_EXEC_CODE (op);
14400 if (GET_CODE (op) == PARALLEL)
14401 op = XVECEXP (op, 0, 0);
14402 op = XEXP (op, 1);
14404 early_op = XEXP (op, 0);
14406 /* This is either an actual independent shift, or a shift applied to
14407 the first operand of another operation. We want the value being
14408 shifted, in either case. */
14409 if (GET_CODE (early_op) != REG)
14410 early_op = XEXP (early_op, 0);
14412 return !reg_overlap_mentioned_p (value, early_op);
14415 /* Return nonzero if the CONSUMER (a mul or mac op) does not
14416 have an early register mult dependency on the result of
14417 PRODUCER. */
14420 arm_no_early_mul_dep (rtx producer, rtx consumer)
14422 rtx value = PATTERN (producer);
14423 rtx op = PATTERN (consumer);
14425 if (GET_CODE (value) == COND_EXEC)
14426 value = COND_EXEC_CODE (value);
14427 if (GET_CODE (value) == PARALLEL)
14428 value = XVECEXP (value, 0, 0);
14429 value = XEXP (value, 0);
14430 if (GET_CODE (op) == COND_EXEC)
14431 op = COND_EXEC_CODE (op);
14432 if (GET_CODE (op) == PARALLEL)
14433 op = XVECEXP (op, 0, 0);
14434 op = XEXP (op, 1);
14436 return (GET_CODE (op) == PLUS
14437 && !reg_overlap_mentioned_p (value, XEXP (op, 0)));
14441 /* We can't rely on the caller doing the proper promotion when
14442 using APCS or ATPCS. */
14444 static bool
14445 arm_promote_prototypes (tree t ATTRIBUTE_UNUSED)
14447 return !TARGET_AAPCS_BASED;
14451 /* AAPCS based ABIs use short enums by default. */
14453 static bool
14454 arm_default_short_enums (void)
14456 return TARGET_AAPCS_BASED;
14460 /* AAPCS requires that anonymous bitfields affect structure alignment. */
14462 static bool
14463 arm_align_anon_bitfield (void)
14465 return TARGET_AAPCS_BASED;
14469 /* The generic C++ ABI says 64-bit (long long). The EABI says 32-bit. */
14471 static tree
14472 arm_cxx_guard_type (void)
14474 return TARGET_AAPCS_BASED ? integer_type_node : long_long_integer_type_node;
14478 /* The EABI says test the least significan bit of a guard variable. */
14480 static bool
14481 arm_cxx_guard_mask_bit (void)
14483 return TARGET_AAPCS_BASED;
14487 /* The EABI specifies that all array cookies are 8 bytes long. */
14489 static tree
14490 arm_get_cookie_size (tree type)
14492 tree size;
14494 if (!TARGET_AAPCS_BASED)
14495 return default_cxx_get_cookie_size (type);
14497 size = build_int_cst (sizetype, 8);
14498 return size;
14502 /* The EABI says that array cookies should also contain the element size. */
14504 static bool
14505 arm_cookie_has_size (void)
14507 return TARGET_AAPCS_BASED;
14511 /* The EABI says constructors and destructors should return a pointer to
14512 the object constructed/destroyed. */
14514 static bool
14515 arm_cxx_cdtor_returns_this (void)
14517 return TARGET_AAPCS_BASED;
14520 /* The EABI says that an inline function may never be the key
14521 method. */
14523 static bool
14524 arm_cxx_key_method_may_be_inline (void)
14526 return !TARGET_AAPCS_BASED;
14529 static void
14530 arm_cxx_determine_class_data_visibility (tree decl)
14532 if (!TARGET_AAPCS_BASED)
14533 return;
14535 /* In general, \S 3.2.5.5 of the ARM EABI requires that class data
14536 is exported. However, on systems without dynamic vague linkage,
14537 \S 3.2.5.6 says that COMDAT class data has hidden linkage. */
14538 if (!TARGET_ARM_DYNAMIC_VAGUE_LINKAGE_P && DECL_COMDAT (decl))
14539 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
14540 else
14541 DECL_VISIBILITY (decl) = VISIBILITY_DEFAULT;
14542 DECL_VISIBILITY_SPECIFIED (decl) = 1;
14545 static bool
14546 arm_cxx_class_data_always_comdat (void)
14548 /* \S 3.2.5.4 of the ARM C++ ABI says that class data only have
14549 vague linkage if the class has no key function. */
14550 return !TARGET_AAPCS_BASED;
14554 /* The EABI says __aeabi_atexit should be used to register static
14555 destructors. */
14557 static bool
14558 arm_cxx_use_aeabi_atexit (void)
14560 return TARGET_AAPCS_BASED;
14564 void
14565 arm_set_return_address (rtx source, rtx scratch)
14567 arm_stack_offsets *offsets;
14568 HOST_WIDE_INT delta;
14569 rtx addr;
14570 unsigned long saved_regs;
14572 saved_regs = arm_compute_save_reg_mask ();
14574 if ((saved_regs & (1 << LR_REGNUM)) == 0)
14575 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
14576 else
14578 if (frame_pointer_needed)
14579 addr = plus_constant(hard_frame_pointer_rtx, -4);
14580 else
14582 /* LR will be the first saved register. */
14583 offsets = arm_get_frame_offsets ();
14584 delta = offsets->outgoing_args - (offsets->frame + 4);
14587 if (delta >= 4096)
14589 emit_insn (gen_addsi3 (scratch, stack_pointer_rtx,
14590 GEN_INT (delta & ~4095)));
14591 addr = scratch;
14592 delta &= 4095;
14594 else
14595 addr = stack_pointer_rtx;
14597 addr = plus_constant (addr, delta);
14599 emit_move_insn (gen_rtx_MEM (Pmode, addr), source);
14604 void
14605 thumb_set_return_address (rtx source, rtx scratch)
14607 arm_stack_offsets *offsets;
14608 HOST_WIDE_INT delta;
14609 int reg;
14610 rtx addr;
14611 unsigned long mask;
14613 emit_insn (gen_rtx_USE (VOIDmode, source));
14615 mask = thumb_compute_save_reg_mask ();
14616 if (mask & (1 << LR_REGNUM))
14618 offsets = arm_get_frame_offsets ();
14620 /* Find the saved regs. */
14621 if (frame_pointer_needed)
14623 delta = offsets->soft_frame - offsets->saved_args;
14624 reg = THUMB_HARD_FRAME_POINTER_REGNUM;
14626 else
14628 delta = offsets->outgoing_args - offsets->saved_args;
14629 reg = SP_REGNUM;
14631 /* Allow for the stack frame. */
14632 if (TARGET_BACKTRACE)
14633 delta -= 16;
14634 /* The link register is always the first saved register. */
14635 delta -= 4;
14637 /* Construct the address. */
14638 addr = gen_rtx_REG (SImode, reg);
14639 if ((reg != SP_REGNUM && delta >= 128)
14640 || delta >= 1024)
14642 emit_insn (gen_movsi (scratch, GEN_INT (delta)));
14643 emit_insn (gen_addsi3 (scratch, scratch, stack_pointer_rtx));
14644 addr = scratch;
14646 else
14647 addr = plus_constant (addr, delta);
14649 emit_move_insn (gen_rtx_MEM (Pmode, addr), source);
14651 else
14652 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
14655 /* Implements target hook vector_mode_supported_p. */
14656 bool
14657 arm_vector_mode_supported_p (enum machine_mode mode)
14659 if ((mode == V2SImode)
14660 || (mode == V4HImode)
14661 || (mode == V8QImode))
14662 return true;
14664 return false;
14667 /* Implement TARGET_SHIFT_TRUNCATION_MASK. SImode shifts use normal
14668 ARM insns and therefore guarantee that the shift count is modulo 256.
14669 DImode shifts (those implemented by lib1funcs.asm or by optabs.c)
14670 guarantee no particular behavior for out-of-range counts. */
14672 static unsigned HOST_WIDE_INT
14673 arm_shift_truncation_mask (enum machine_mode mode)
14675 return mode == SImode ? 255 : 0;
14679 /* Map internal gcc register numbers to DWARF2 register numbers. */
14681 unsigned int
14682 arm_dbx_register_number (unsigned int regno)
14684 if (regno < 16)
14685 return regno;
14687 /* TODO: Legacy targets output FPA regs as registers 16-23 for backwards
14688 compatibility. The EABI defines them as registers 96-103. */
14689 if (IS_FPA_REGNUM (regno))
14690 return (TARGET_AAPCS_BASED ? 96 : 16) + regno - FIRST_FPA_REGNUM;
14692 if (IS_VFP_REGNUM (regno))
14693 return 64 + regno - FIRST_VFP_REGNUM;
14695 if (IS_IWMMXT_GR_REGNUM (regno))
14696 return 104 + regno - FIRST_IWMMXT_GR_REGNUM;
14698 if (IS_IWMMXT_REGNUM (regno))
14699 return 112 + regno - FIRST_IWMMXT_REGNUM;
14701 gcc_unreachable ();