2005-05-13 Josh Conner <jconner@apple.com>
[official-gcc.git] / gcc / config / arm / arm.c
blob1c0f60fa9ccdd60034c94fb75cebe7cbc9460e05
1 /* Output routines for GCC for ARM.
2 Copyright (C) 1991, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004, 2005 Free Software Foundation, Inc.
4 Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
5 and Martin Simmons (@harleqn.co.uk).
6 More major hacks by Richard Earnshaw (rearnsha@arm.com).
8 This file is part of GCC.
10 GCC is free software; you can redistribute it and/or modify it
11 under the terms of the GNU General Public License as published
12 by the Free Software Foundation; either version 2, or (at your
13 option) any later version.
15 GCC is distributed in the hope that it will be useful, but WITHOUT
16 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
17 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
18 License for more details.
20 You should have received a copy of the GNU General Public License
21 along with GCC; see the file COPYING. If not, write to
22 the Free Software Foundation, 59 Temple Place - Suite 330,
23 Boston, MA 02111-1307, USA. */
25 #include "config.h"
26 #include "system.h"
27 #include "coretypes.h"
28 #include "tm.h"
29 #include "rtl.h"
30 #include "tree.h"
31 #include "obstack.h"
32 #include "regs.h"
33 #include "hard-reg-set.h"
34 #include "real.h"
35 #include "insn-config.h"
36 #include "conditions.h"
37 #include "output.h"
38 #include "insn-attr.h"
39 #include "flags.h"
40 #include "reload.h"
41 #include "function.h"
42 #include "expr.h"
43 #include "optabs.h"
44 #include "toplev.h"
45 #include "recog.h"
46 #include "ggc.h"
47 #include "except.h"
48 #include "c-pragma.h"
49 #include "integrate.h"
50 #include "tm_p.h"
51 #include "target.h"
52 #include "target-def.h"
53 #include "debug.h"
54 #include "langhooks.h"
56 /* Forward definitions of types. */
57 typedef struct minipool_node Mnode;
58 typedef struct minipool_fixup Mfix;
60 const struct attribute_spec arm_attribute_table[];
62 /* Forward function declarations. */
63 static arm_stack_offsets *arm_get_frame_offsets (void);
64 static void arm_add_gc_roots (void);
65 static int arm_gen_constant (enum rtx_code, enum machine_mode, rtx,
66 HOST_WIDE_INT, rtx, rtx, int, int);
67 static unsigned bit_count (unsigned long);
68 static int arm_address_register_rtx_p (rtx, int);
69 static int arm_legitimate_index_p (enum machine_mode, rtx, RTX_CODE, int);
70 static int thumb_base_register_rtx_p (rtx, enum machine_mode, int);
71 inline static int thumb_index_register_rtx_p (rtx, int);
72 static int thumb_far_jump_used_p (void);
73 static bool thumb_force_lr_save (void);
74 static int const_ok_for_op (HOST_WIDE_INT, enum rtx_code);
75 static rtx emit_sfm (int, int);
76 static int arm_size_return_regs (void);
77 #ifndef AOF_ASSEMBLER
78 static bool arm_assemble_integer (rtx, unsigned int, int);
79 #endif
80 static const char *fp_const_from_val (REAL_VALUE_TYPE *);
81 static arm_cc get_arm_condition_code (rtx);
82 static HOST_WIDE_INT int_log2 (HOST_WIDE_INT);
83 static rtx is_jump_table (rtx);
84 static const char *output_multi_immediate (rtx *, const char *, const char *,
85 int, HOST_WIDE_INT);
86 static const char *shift_op (rtx, HOST_WIDE_INT *);
87 static struct machine_function *arm_init_machine_status (void);
88 static void thumb_exit (FILE *, int);
89 static rtx is_jump_table (rtx);
90 static HOST_WIDE_INT get_jump_table_size (rtx);
91 static Mnode *move_minipool_fix_forward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
92 static Mnode *add_minipool_forward_ref (Mfix *);
93 static Mnode *move_minipool_fix_backward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
94 static Mnode *add_minipool_backward_ref (Mfix *);
95 static void assign_minipool_offsets (Mfix *);
96 static void arm_print_value (FILE *, rtx);
97 static void dump_minipool (rtx);
98 static int arm_barrier_cost (rtx);
99 static Mfix *create_fix_barrier (Mfix *, HOST_WIDE_INT);
100 static void push_minipool_barrier (rtx, HOST_WIDE_INT);
101 static void push_minipool_fix (rtx, HOST_WIDE_INT, rtx *, enum machine_mode,
102 rtx);
103 static void arm_reorg (void);
104 static bool note_invalid_constants (rtx, HOST_WIDE_INT, int);
105 static int current_file_function_operand (rtx);
106 static unsigned long arm_compute_save_reg0_reg12_mask (void);
107 static unsigned long arm_compute_save_reg_mask (void);
108 static unsigned long arm_isr_value (tree);
109 static unsigned long arm_compute_func_type (void);
110 static tree arm_handle_fndecl_attribute (tree *, tree, tree, int, bool *);
111 static tree arm_handle_isr_attribute (tree *, tree, tree, int, bool *);
112 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
113 static tree arm_handle_notshared_attribute (tree *, tree, tree, int, bool *);
114 #endif
115 static void arm_output_function_epilogue (FILE *, HOST_WIDE_INT);
116 static void arm_output_function_prologue (FILE *, HOST_WIDE_INT);
117 static void thumb_output_function_prologue (FILE *, HOST_WIDE_INT);
118 static int arm_comp_type_attributes (tree, tree);
119 static void arm_set_default_type_attributes (tree);
120 static int arm_adjust_cost (rtx, rtx, rtx, int);
121 static int count_insns_for_constant (HOST_WIDE_INT, int);
122 static int arm_get_strip_length (int);
123 static bool arm_function_ok_for_sibcall (tree, tree);
124 static void arm_internal_label (FILE *, const char *, unsigned long);
125 static void arm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT,
126 tree);
127 static int arm_rtx_costs_1 (rtx, enum rtx_code, enum rtx_code);
128 static bool arm_size_rtx_costs (rtx, int, int, int *);
129 static bool arm_slowmul_rtx_costs (rtx, int, int, int *);
130 static bool arm_fastmul_rtx_costs (rtx, int, int, int *);
131 static bool arm_xscale_rtx_costs (rtx, int, int, int *);
132 static bool arm_9e_rtx_costs (rtx, int, int, int *);
133 static int arm_address_cost (rtx);
134 static bool arm_memory_load_p (rtx);
135 static bool arm_cirrus_insn_p (rtx);
136 static void cirrus_reorg (rtx);
137 static void arm_init_builtins (void);
138 static rtx arm_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
139 static void arm_init_iwmmxt_builtins (void);
140 static rtx safe_vector_operand (rtx, enum machine_mode);
141 static rtx arm_expand_binop_builtin (enum insn_code, tree, rtx);
142 static rtx arm_expand_unop_builtin (enum insn_code, tree, rtx, int);
143 static rtx arm_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
144 static void emit_constant_insn (rtx cond, rtx pattern);
145 static int arm_arg_partial_bytes (CUMULATIVE_ARGS *, enum machine_mode,
146 tree, bool);
148 #ifdef OBJECT_FORMAT_ELF
149 static void arm_elf_asm_constructor (rtx, int);
150 #endif
151 #ifndef ARM_PE
152 static void arm_encode_section_info (tree, rtx, int);
153 #endif
155 static void arm_file_end (void);
157 #ifdef AOF_ASSEMBLER
158 static void aof_globalize_label (FILE *, const char *);
159 static void aof_dump_imports (FILE *);
160 static void aof_dump_pic_table (FILE *);
161 static void aof_file_start (void);
162 static void aof_file_end (void);
163 #endif
164 static rtx arm_struct_value_rtx (tree, int);
165 static void arm_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode,
166 tree, int *, int);
167 static bool arm_pass_by_reference (CUMULATIVE_ARGS *,
168 enum machine_mode, tree, bool);
169 static bool arm_promote_prototypes (tree);
170 static bool arm_default_short_enums (void);
171 static bool arm_align_anon_bitfield (void);
172 static bool arm_return_in_msb (tree);
173 static bool arm_must_pass_in_stack (enum machine_mode, tree);
175 static tree arm_cxx_guard_type (void);
176 static bool arm_cxx_guard_mask_bit (void);
177 static tree arm_get_cookie_size (tree);
178 static bool arm_cookie_has_size (void);
179 static bool arm_cxx_cdtor_returns_this (void);
180 static bool arm_cxx_key_method_may_be_inline (void);
181 static void arm_cxx_determine_class_data_visibility (tree);
182 static bool arm_cxx_class_data_always_comdat (void);
183 static bool arm_cxx_use_aeabi_atexit (void);
184 static void arm_init_libfuncs (void);
185 static bool arm_handle_option (size_t, const char *, int);
186 static unsigned HOST_WIDE_INT arm_shift_truncation_mask (enum machine_mode);
188 /* Initialize the GCC target structure. */
189 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
190 #undef TARGET_MERGE_DECL_ATTRIBUTES
191 #define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
192 #endif
194 #undef TARGET_ATTRIBUTE_TABLE
195 #define TARGET_ATTRIBUTE_TABLE arm_attribute_table
197 #undef TARGET_ASM_FILE_END
198 #define TARGET_ASM_FILE_END arm_file_end
200 #ifdef AOF_ASSEMBLER
201 #undef TARGET_ASM_BYTE_OP
202 #define TARGET_ASM_BYTE_OP "\tDCB\t"
203 #undef TARGET_ASM_ALIGNED_HI_OP
204 #define TARGET_ASM_ALIGNED_HI_OP "\tDCW\t"
205 #undef TARGET_ASM_ALIGNED_SI_OP
206 #define TARGET_ASM_ALIGNED_SI_OP "\tDCD\t"
207 #undef TARGET_ASM_GLOBALIZE_LABEL
208 #define TARGET_ASM_GLOBALIZE_LABEL aof_globalize_label
209 #undef TARGET_ASM_FILE_START
210 #define TARGET_ASM_FILE_START aof_file_start
211 #undef TARGET_ASM_FILE_END
212 #define TARGET_ASM_FILE_END aof_file_end
213 #else
214 #undef TARGET_ASM_ALIGNED_SI_OP
215 #define TARGET_ASM_ALIGNED_SI_OP NULL
216 #undef TARGET_ASM_INTEGER
217 #define TARGET_ASM_INTEGER arm_assemble_integer
218 #endif
220 #undef TARGET_ASM_FUNCTION_PROLOGUE
221 #define TARGET_ASM_FUNCTION_PROLOGUE arm_output_function_prologue
223 #undef TARGET_ASM_FUNCTION_EPILOGUE
224 #define TARGET_ASM_FUNCTION_EPILOGUE arm_output_function_epilogue
226 #undef TARGET_DEFAULT_TARGET_FLAGS
227 #define TARGET_DEFAULT_TARGET_FLAGS (TARGET_DEFAULT | MASK_SCHED_PROLOG)
228 #undef TARGET_HANDLE_OPTION
229 #define TARGET_HANDLE_OPTION arm_handle_option
231 #undef TARGET_COMP_TYPE_ATTRIBUTES
232 #define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes
234 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
235 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES arm_set_default_type_attributes
237 #undef TARGET_SCHED_ADJUST_COST
238 #define TARGET_SCHED_ADJUST_COST arm_adjust_cost
240 #undef TARGET_ENCODE_SECTION_INFO
241 #ifdef ARM_PE
242 #define TARGET_ENCODE_SECTION_INFO arm_pe_encode_section_info
243 #else
244 #define TARGET_ENCODE_SECTION_INFO arm_encode_section_info
245 #endif
247 #undef TARGET_STRIP_NAME_ENCODING
248 #define TARGET_STRIP_NAME_ENCODING arm_strip_name_encoding
250 #undef TARGET_ASM_INTERNAL_LABEL
251 #define TARGET_ASM_INTERNAL_LABEL arm_internal_label
253 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
254 #define TARGET_FUNCTION_OK_FOR_SIBCALL arm_function_ok_for_sibcall
256 #undef TARGET_ASM_OUTPUT_MI_THUNK
257 #define TARGET_ASM_OUTPUT_MI_THUNK arm_output_mi_thunk
258 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
259 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK default_can_output_mi_thunk_no_vcall
261 /* This will be overridden in arm_override_options. */
262 #undef TARGET_RTX_COSTS
263 #define TARGET_RTX_COSTS arm_slowmul_rtx_costs
264 #undef TARGET_ADDRESS_COST
265 #define TARGET_ADDRESS_COST arm_address_cost
267 #undef TARGET_SHIFT_TRUNCATION_MASK
268 #define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask
269 #undef TARGET_VECTOR_MODE_SUPPORTED_P
270 #define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p
272 #undef TARGET_MACHINE_DEPENDENT_REORG
273 #define TARGET_MACHINE_DEPENDENT_REORG arm_reorg
275 #undef TARGET_INIT_BUILTINS
276 #define TARGET_INIT_BUILTINS arm_init_builtins
277 #undef TARGET_EXPAND_BUILTIN
278 #define TARGET_EXPAND_BUILTIN arm_expand_builtin
280 #undef TARGET_INIT_LIBFUNCS
281 #define TARGET_INIT_LIBFUNCS arm_init_libfuncs
283 #undef TARGET_PROMOTE_FUNCTION_ARGS
284 #define TARGET_PROMOTE_FUNCTION_ARGS hook_bool_tree_true
285 #undef TARGET_PROMOTE_FUNCTION_RETURN
286 #define TARGET_PROMOTE_FUNCTION_RETURN hook_bool_tree_true
287 #undef TARGET_PROMOTE_PROTOTYPES
288 #define TARGET_PROMOTE_PROTOTYPES arm_promote_prototypes
289 #undef TARGET_PASS_BY_REFERENCE
290 #define TARGET_PASS_BY_REFERENCE arm_pass_by_reference
291 #undef TARGET_ARG_PARTIAL_BYTES
292 #define TARGET_ARG_PARTIAL_BYTES arm_arg_partial_bytes
294 #undef TARGET_STRUCT_VALUE_RTX
295 #define TARGET_STRUCT_VALUE_RTX arm_struct_value_rtx
297 #undef TARGET_SETUP_INCOMING_VARARGS
298 #define TARGET_SETUP_INCOMING_VARARGS arm_setup_incoming_varargs
300 #undef TARGET_DEFAULT_SHORT_ENUMS
301 #define TARGET_DEFAULT_SHORT_ENUMS arm_default_short_enums
303 #undef TARGET_ALIGN_ANON_BITFIELD
304 #define TARGET_ALIGN_ANON_BITFIELD arm_align_anon_bitfield
306 #undef TARGET_CXX_GUARD_TYPE
307 #define TARGET_CXX_GUARD_TYPE arm_cxx_guard_type
309 #undef TARGET_CXX_GUARD_MASK_BIT
310 #define TARGET_CXX_GUARD_MASK_BIT arm_cxx_guard_mask_bit
312 #undef TARGET_CXX_GET_COOKIE_SIZE
313 #define TARGET_CXX_GET_COOKIE_SIZE arm_get_cookie_size
315 #undef TARGET_CXX_COOKIE_HAS_SIZE
316 #define TARGET_CXX_COOKIE_HAS_SIZE arm_cookie_has_size
318 #undef TARGET_CXX_CDTOR_RETURNS_THIS
319 #define TARGET_CXX_CDTOR_RETURNS_THIS arm_cxx_cdtor_returns_this
321 #undef TARGET_CXX_KEY_METHOD_MAY_BE_INLINE
322 #define TARGET_CXX_KEY_METHOD_MAY_BE_INLINE arm_cxx_key_method_may_be_inline
324 #undef TARGET_CXX_USE_AEABI_ATEXIT
325 #define TARGET_CXX_USE_AEABI_ATEXIT arm_cxx_use_aeabi_atexit
327 #undef TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY
328 #define TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY \
329 arm_cxx_determine_class_data_visibility
331 #undef TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT
332 #define TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT arm_cxx_class_data_always_comdat
334 #undef TARGET_RETURN_IN_MSB
335 #define TARGET_RETURN_IN_MSB arm_return_in_msb
337 #undef TARGET_MUST_PASS_IN_STACK
338 #define TARGET_MUST_PASS_IN_STACK arm_must_pass_in_stack
340 struct gcc_target targetm = TARGET_INITIALIZER;
342 /* Obstack for minipool constant handling. */
343 static struct obstack minipool_obstack;
344 static char * minipool_startobj;
346 /* The maximum number of insns skipped which
347 will be conditionalised if possible. */
348 static int max_insns_skipped = 5;
350 extern FILE * asm_out_file;
352 /* True if we are currently building a constant table. */
353 int making_const_table;
355 /* Define the information needed to generate branch insns. This is
356 stored from the compare operation. */
357 rtx arm_compare_op0, arm_compare_op1;
359 /* The processor for which instructions should be scheduled. */
360 enum processor_type arm_tune = arm_none;
362 /* Which floating point model to use. */
363 enum arm_fp_model arm_fp_model;
365 /* Which floating point hardware is available. */
366 enum fputype arm_fpu_arch;
368 /* Which floating point hardware to schedule for. */
369 enum fputype arm_fpu_tune;
371 /* Whether to use floating point hardware. */
372 enum float_abi_type arm_float_abi;
374 /* Which ABI to use. */
375 enum arm_abi_type arm_abi;
377 /* Set by the -mfpu=... option. */
378 static const char * target_fpu_name = NULL;
380 /* Set by the -mfpe=... option. */
381 static const char * target_fpe_name = NULL;
383 /* Set by the -mfloat-abi=... option. */
384 static const char * target_float_abi_name = NULL;
386 /* Set by the -mabi=... option. */
387 static const char * target_abi_name = NULL;
389 /* Used to parse -mstructure_size_boundary command line option. */
390 static const char * structure_size_string = NULL;
391 int arm_structure_size_boundary = DEFAULT_STRUCTURE_SIZE_BOUNDARY;
393 /* Used for Thumb call_via trampolines. */
394 rtx thumb_call_via_label[14];
395 static int thumb_call_reg_needed;
397 /* Bit values used to identify processor capabilities. */
398 #define FL_CO_PROC (1 << 0) /* Has external co-processor bus */
399 #define FL_ARCH3M (1 << 1) /* Extended multiply */
400 #define FL_MODE26 (1 << 2) /* 26-bit mode support */
401 #define FL_MODE32 (1 << 3) /* 32-bit mode support */
402 #define FL_ARCH4 (1 << 4) /* Architecture rel 4 */
403 #define FL_ARCH5 (1 << 5) /* Architecture rel 5 */
404 #define FL_THUMB (1 << 6) /* Thumb aware */
405 #define FL_LDSCHED (1 << 7) /* Load scheduling necessary */
406 #define FL_STRONG (1 << 8) /* StrongARM */
407 #define FL_ARCH5E (1 << 9) /* DSP extensions to v5 */
408 #define FL_XSCALE (1 << 10) /* XScale */
409 #define FL_CIRRUS (1 << 11) /* Cirrus/DSP. */
410 #define FL_ARCH6 (1 << 12) /* Architecture rel 6. Adds
411 media instructions. */
412 #define FL_VFPV2 (1 << 13) /* Vector Floating Point V2. */
413 #define FL_WBUF (1 << 14) /* Schedule for write buffer ops.
414 Note: ARM6 & 7 derivatives only. */
416 #define FL_IWMMXT (1 << 29) /* XScale v2 or "Intel Wireless MMX technology". */
418 #define FL_FOR_ARCH2 0
419 #define FL_FOR_ARCH3 FL_MODE32
420 #define FL_FOR_ARCH3M (FL_FOR_ARCH3 | FL_ARCH3M)
421 #define FL_FOR_ARCH4 (FL_FOR_ARCH3M | FL_ARCH4)
422 #define FL_FOR_ARCH4T (FL_FOR_ARCH4 | FL_THUMB)
423 #define FL_FOR_ARCH5 (FL_FOR_ARCH4 | FL_ARCH5)
424 #define FL_FOR_ARCH5T (FL_FOR_ARCH5 | FL_THUMB)
425 #define FL_FOR_ARCH5E (FL_FOR_ARCH5 | FL_ARCH5E)
426 #define FL_FOR_ARCH5TE (FL_FOR_ARCH5E | FL_THUMB)
427 #define FL_FOR_ARCH5TEJ FL_FOR_ARCH5TE
428 #define FL_FOR_ARCH6 (FL_FOR_ARCH5TE | FL_ARCH6)
429 #define FL_FOR_ARCH6J FL_FOR_ARCH6
430 #define FL_FOR_ARCH6K FL_FOR_ARCH6
431 #define FL_FOR_ARCH6Z FL_FOR_ARCH6
432 #define FL_FOR_ARCH6ZK FL_FOR_ARCH6
434 /* The bits in this mask specify which
435 instructions we are allowed to generate. */
436 static unsigned long insn_flags = 0;
438 /* The bits in this mask specify which instruction scheduling options should
439 be used. */
440 static unsigned long tune_flags = 0;
442 /* The following are used in the arm.md file as equivalents to bits
443 in the above two flag variables. */
445 /* Nonzero if this chip supports the ARM Architecture 3M extensions. */
446 int arm_arch3m = 0;
448 /* Nonzero if this chip supports the ARM Architecture 4 extensions. */
449 int arm_arch4 = 0;
451 /* Nonzero if this chip supports the ARM Architecture 4t extensions. */
452 int arm_arch4t = 0;
454 /* Nonzero if this chip supports the ARM Architecture 5 extensions. */
455 int arm_arch5 = 0;
457 /* Nonzero if this chip supports the ARM Architecture 5E extensions. */
458 int arm_arch5e = 0;
460 /* Nonzero if this chip supports the ARM Architecture 6 extensions. */
461 int arm_arch6 = 0;
463 /* Nonzero if this chip can benefit from load scheduling. */
464 int arm_ld_sched = 0;
466 /* Nonzero if this chip is a StrongARM. */
467 int arm_tune_strongarm = 0;
469 /* Nonzero if this chip is a Cirrus variant. */
470 int arm_arch_cirrus = 0;
472 /* Nonzero if this chip supports Intel Wireless MMX technology. */
473 int arm_arch_iwmmxt = 0;
475 /* Nonzero if this chip is an XScale. */
476 int arm_arch_xscale = 0;
478 /* Nonzero if tuning for XScale */
479 int arm_tune_xscale = 0;
481 /* Nonzero if we want to tune for stores that access the write-buffer.
482 This typically means an ARM6 or ARM7 with MMU or MPU. */
483 int arm_tune_wbuf = 0;
485 /* Nonzero if generating Thumb instructions. */
486 int thumb_code = 0;
488 /* Nonzero if we should define __THUMB_INTERWORK__ in the
489 preprocessor.
490 XXX This is a bit of a hack, it's intended to help work around
491 problems in GLD which doesn't understand that armv5t code is
492 interworking clean. */
493 int arm_cpp_interwork = 0;
495 /* In case of a PRE_INC, POST_INC, PRE_DEC, POST_DEC memory reference, we
496 must report the mode of the memory reference from PRINT_OPERAND to
497 PRINT_OPERAND_ADDRESS. */
498 enum machine_mode output_memory_reference_mode;
500 /* The register number to be used for the PIC offset register. */
501 static const char * arm_pic_register_string = NULL;
502 int arm_pic_register = INVALID_REGNUM;
504 /* Set to 1 when a return insn is output, this means that the epilogue
505 is not needed. */
506 int return_used_this_function;
508 /* Set to 1 after arm_reorg has started. Reset to start at the start of
509 the next function. */
510 static int after_arm_reorg = 0;
512 /* The maximum number of insns to be used when loading a constant. */
513 static int arm_constant_limit = 3;
515 /* For an explanation of these variables, see final_prescan_insn below. */
516 int arm_ccfsm_state;
517 enum arm_cond_code arm_current_cc;
518 rtx arm_target_insn;
519 int arm_target_label;
521 /* The condition codes of the ARM, and the inverse function. */
522 static const char * const arm_condition_codes[] =
524 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
525 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
528 #define streq(string1, string2) (strcmp (string1, string2) == 0)
530 /* Initialization code. */
532 struct processors
534 const char *const name;
535 enum processor_type core;
536 const char *arch;
537 const unsigned long flags;
538 bool (* rtx_costs) (rtx, int, int, int *);
541 /* Not all of these give usefully different compilation alternatives,
542 but there is no simple way of generalizing them. */
543 static const struct processors all_cores[] =
545 /* ARM Cores */
546 #define ARM_CORE(NAME, IDENT, ARCH, FLAGS, COSTS) \
547 {NAME, arm_none, #ARCH, FLAGS | FL_FOR_ARCH##ARCH, arm_##COSTS##_rtx_costs},
548 #include "arm-cores.def"
549 #undef ARM_CORE
550 {NULL, arm_none, NULL, 0, NULL}
553 static const struct processors all_architectures[] =
555 /* ARM Architectures */
556 /* We don't specify rtx_costs here as it will be figured out
557 from the core. */
559 {"armv2", arm2, "2", FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH2, NULL},
560 {"armv2a", arm2, "2", FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH2, NULL},
561 {"armv3", arm6, "3", FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH3, NULL},
562 {"armv3m", arm7m, "3M", FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH3M, NULL},
563 {"armv4", arm7tdmi, "4", FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH4, NULL},
564 /* Strictly, FL_MODE26 is a permitted option for v4t, but there are no
565 implementations that support it, so we will leave it out for now. */
566 {"armv4t", arm7tdmi, "4T", FL_CO_PROC | FL_FOR_ARCH4T, NULL},
567 {"armv5", arm10tdmi, "5", FL_CO_PROC | FL_FOR_ARCH5, NULL},
568 {"armv5t", arm10tdmi, "5T", FL_CO_PROC | FL_FOR_ARCH5T, NULL},
569 {"armv5e", arm1026ejs, "5E", FL_CO_PROC | FL_FOR_ARCH5E, NULL},
570 {"armv5te", arm1026ejs, "5TE", FL_CO_PROC | FL_FOR_ARCH5TE, NULL},
571 {"armv6", arm1136js, "6", FL_CO_PROC | FL_FOR_ARCH6, NULL},
572 {"armv6j", arm1136js, "6J", FL_CO_PROC | FL_FOR_ARCH6J, NULL},
573 {"armv6k", mpcore, "6K", FL_CO_PROC | FL_FOR_ARCH6K, NULL},
574 {"armv6z", arm1176jzs, "6Z", FL_CO_PROC | FL_FOR_ARCH6Z, NULL},
575 {"armv6zk", arm1176jzs, "6ZK", FL_CO_PROC | FL_FOR_ARCH6ZK, NULL},
576 {"ep9312", ep9312, "4T", FL_LDSCHED | FL_CIRRUS | FL_FOR_ARCH4, NULL},
577 {"iwmmxt", iwmmxt, "5TE", FL_LDSCHED | FL_STRONG | FL_FOR_ARCH5TE | FL_XSCALE | FL_IWMMXT , NULL},
578 {NULL, arm_none, NULL, 0 , NULL}
581 struct arm_cpu_select
583 const char * string;
584 const char * name;
585 const struct processors * processors;
588 /* This is a magic structure. The 'string' field is magically filled in
589 with a pointer to the value specified by the user on the command line
590 assuming that the user has specified such a value. */
592 static struct arm_cpu_select arm_select[] =
594 /* string name processors */
595 { NULL, "-mcpu=", all_cores },
596 { NULL, "-march=", all_architectures },
597 { NULL, "-mtune=", all_cores }
600 /* Defines representing the indexes into the above table. */
601 #define ARM_OPT_SET_CPU 0
602 #define ARM_OPT_SET_ARCH 1
603 #define ARM_OPT_SET_TUNE 2
605 /* The name of the proprocessor macro to define for this architecture. */
607 char arm_arch_name[] = "__ARM_ARCH_0UNK__";
609 struct fpu_desc
611 const char * name;
612 enum fputype fpu;
616 /* Available values for for -mfpu=. */
618 static const struct fpu_desc all_fpus[] =
620 {"fpa", FPUTYPE_FPA},
621 {"fpe2", FPUTYPE_FPA_EMU2},
622 {"fpe3", FPUTYPE_FPA_EMU2},
623 {"maverick", FPUTYPE_MAVERICK},
624 {"vfp", FPUTYPE_VFP}
628 /* Floating point models used by the different hardware.
629 See fputype in arm.h. */
631 static const enum fputype fp_model_for_fpu[] =
633 /* No FP hardware. */
634 ARM_FP_MODEL_UNKNOWN, /* FPUTYPE_NONE */
635 ARM_FP_MODEL_FPA, /* FPUTYPE_FPA */
636 ARM_FP_MODEL_FPA, /* FPUTYPE_FPA_EMU2 */
637 ARM_FP_MODEL_FPA, /* FPUTYPE_FPA_EMU3 */
638 ARM_FP_MODEL_MAVERICK, /* FPUTYPE_MAVERICK */
639 ARM_FP_MODEL_VFP /* FPUTYPE_VFP */
643 struct float_abi
645 const char * name;
646 enum float_abi_type abi_type;
650 /* Available values for -mfloat-abi=. */
652 static const struct float_abi all_float_abis[] =
654 {"soft", ARM_FLOAT_ABI_SOFT},
655 {"softfp", ARM_FLOAT_ABI_SOFTFP},
656 {"hard", ARM_FLOAT_ABI_HARD}
660 struct abi_name
662 const char *name;
663 enum arm_abi_type abi_type;
667 /* Available values for -mabi=. */
669 static const struct abi_name arm_all_abis[] =
671 {"apcs-gnu", ARM_ABI_APCS},
672 {"atpcs", ARM_ABI_ATPCS},
673 {"aapcs", ARM_ABI_AAPCS},
674 {"iwmmxt", ARM_ABI_IWMMXT}
677 /* Return the number of bits set in VALUE. */
678 static unsigned
679 bit_count (unsigned long value)
681 unsigned long count = 0;
683 while (value)
685 count++;
686 value &= value - 1; /* Clear the least-significant set bit. */
689 return count;
692 /* Set up library functions unique to ARM. */
694 static void
695 arm_init_libfuncs (void)
697 /* There are no special library functions unless we are using the
698 ARM BPABI. */
699 if (!TARGET_BPABI)
700 return;
702 /* The functions below are described in Section 4 of the "Run-Time
703 ABI for the ARM architecture", Version 1.0. */
705 /* Double-precision floating-point arithmetic. Table 2. */
706 set_optab_libfunc (add_optab, DFmode, "__aeabi_dadd");
707 set_optab_libfunc (sdiv_optab, DFmode, "__aeabi_ddiv");
708 set_optab_libfunc (smul_optab, DFmode, "__aeabi_dmul");
709 set_optab_libfunc (neg_optab, DFmode, "__aeabi_dneg");
710 set_optab_libfunc (sub_optab, DFmode, "__aeabi_dsub");
712 /* Double-precision comparisons. Table 3. */
713 set_optab_libfunc (eq_optab, DFmode, "__aeabi_dcmpeq");
714 set_optab_libfunc (ne_optab, DFmode, NULL);
715 set_optab_libfunc (lt_optab, DFmode, "__aeabi_dcmplt");
716 set_optab_libfunc (le_optab, DFmode, "__aeabi_dcmple");
717 set_optab_libfunc (ge_optab, DFmode, "__aeabi_dcmpge");
718 set_optab_libfunc (gt_optab, DFmode, "__aeabi_dcmpgt");
719 set_optab_libfunc (unord_optab, DFmode, "__aeabi_dcmpun");
721 /* Single-precision floating-point arithmetic. Table 4. */
722 set_optab_libfunc (add_optab, SFmode, "__aeabi_fadd");
723 set_optab_libfunc (sdiv_optab, SFmode, "__aeabi_fdiv");
724 set_optab_libfunc (smul_optab, SFmode, "__aeabi_fmul");
725 set_optab_libfunc (neg_optab, SFmode, "__aeabi_fneg");
726 set_optab_libfunc (sub_optab, SFmode, "__aeabi_fsub");
728 /* Single-precision comparisons. Table 5. */
729 set_optab_libfunc (eq_optab, SFmode, "__aeabi_fcmpeq");
730 set_optab_libfunc (ne_optab, SFmode, NULL);
731 set_optab_libfunc (lt_optab, SFmode, "__aeabi_fcmplt");
732 set_optab_libfunc (le_optab, SFmode, "__aeabi_fcmple");
733 set_optab_libfunc (ge_optab, SFmode, "__aeabi_fcmpge");
734 set_optab_libfunc (gt_optab, SFmode, "__aeabi_fcmpgt");
735 set_optab_libfunc (unord_optab, SFmode, "__aeabi_fcmpun");
737 /* Floating-point to integer conversions. Table 6. */
738 set_conv_libfunc (sfix_optab, SImode, DFmode, "__aeabi_d2iz");
739 set_conv_libfunc (ufix_optab, SImode, DFmode, "__aeabi_d2uiz");
740 set_conv_libfunc (sfix_optab, DImode, DFmode, "__aeabi_d2lz");
741 set_conv_libfunc (ufix_optab, DImode, DFmode, "__aeabi_d2ulz");
742 set_conv_libfunc (sfix_optab, SImode, SFmode, "__aeabi_f2iz");
743 set_conv_libfunc (ufix_optab, SImode, SFmode, "__aeabi_f2uiz");
744 set_conv_libfunc (sfix_optab, DImode, SFmode, "__aeabi_f2lz");
745 set_conv_libfunc (ufix_optab, DImode, SFmode, "__aeabi_f2ulz");
747 /* Conversions between floating types. Table 7. */
748 set_conv_libfunc (trunc_optab, SFmode, DFmode, "__aeabi_d2f");
749 set_conv_libfunc (sext_optab, DFmode, SFmode, "__aeabi_f2d");
751 /* Integer to floating-point conversions. Table 8. */
752 set_conv_libfunc (sfloat_optab, DFmode, SImode, "__aeabi_i2d");
753 set_conv_libfunc (ufloat_optab, DFmode, SImode, "__aeabi_ui2d");
754 set_conv_libfunc (sfloat_optab, DFmode, DImode, "__aeabi_l2d");
755 set_conv_libfunc (ufloat_optab, DFmode, DImode, "__aeabi_ul2d");
756 set_conv_libfunc (sfloat_optab, SFmode, SImode, "__aeabi_i2f");
757 set_conv_libfunc (ufloat_optab, SFmode, SImode, "__aeabi_ui2f");
758 set_conv_libfunc (sfloat_optab, SFmode, DImode, "__aeabi_l2f");
759 set_conv_libfunc (ufloat_optab, SFmode, DImode, "__aeabi_ul2f");
761 /* Long long. Table 9. */
762 set_optab_libfunc (smul_optab, DImode, "__aeabi_lmul");
763 set_optab_libfunc (sdivmod_optab, DImode, "__aeabi_ldivmod");
764 set_optab_libfunc (udivmod_optab, DImode, "__aeabi_uldivmod");
765 set_optab_libfunc (ashl_optab, DImode, "__aeabi_llsl");
766 set_optab_libfunc (lshr_optab, DImode, "__aeabi_llsr");
767 set_optab_libfunc (ashr_optab, DImode, "__aeabi_lasr");
768 set_optab_libfunc (cmp_optab, DImode, "__aeabi_lcmp");
769 set_optab_libfunc (ucmp_optab, DImode, "__aeabi_ulcmp");
771 /* Integer (32/32->32) division. \S 4.3.1. */
772 set_optab_libfunc (sdivmod_optab, SImode, "__aeabi_idivmod");
773 set_optab_libfunc (udivmod_optab, SImode, "__aeabi_uidivmod");
775 /* The divmod functions are designed so that they can be used for
776 plain division, even though they return both the quotient and the
777 remainder. The quotient is returned in the usual location (i.e.,
778 r0 for SImode, {r0, r1} for DImode), just as would be expected
779 for an ordinary division routine. Because the AAPCS calling
780 conventions specify that all of { r0, r1, r2, r3 } are
781 callee-saved registers, there is no need to tell the compiler
782 explicitly that those registers are clobbered by these
783 routines. */
784 set_optab_libfunc (sdiv_optab, DImode, "__aeabi_ldivmod");
785 set_optab_libfunc (udiv_optab, DImode, "__aeabi_uldivmod");
786 set_optab_libfunc (sdiv_optab, SImode, "__aeabi_idivmod");
787 set_optab_libfunc (udiv_optab, SImode, "__aeabi_uidivmod");
789 /* We don't have mod libcalls. Fortunately gcc knows how to use the
790 divmod libcalls instead. */
791 set_optab_libfunc (smod_optab, DImode, NULL);
792 set_optab_libfunc (umod_optab, DImode, NULL);
793 set_optab_libfunc (smod_optab, SImode, NULL);
794 set_optab_libfunc (umod_optab, SImode, NULL);
797 /* Implement TARGET_HANDLE_OPTION. */
799 static bool
800 arm_handle_option (size_t code, const char *arg, int value ATTRIBUTE_UNUSED)
802 switch (code)
804 case OPT_mabi_:
805 target_abi_name = arg;
806 return true;
808 case OPT_march_:
809 arm_select[1].string = arg;
810 return true;
812 case OPT_mcpu_:
813 arm_select[0].string = arg;
814 return true;
816 case OPT_mfloat_abi_:
817 target_float_abi_name = arg;
818 return true;
820 case OPT_mfp_:
821 case OPT_mfpe_:
822 target_fpe_name = arg;
823 return true;
825 case OPT_mfpu_:
826 target_fpu_name = arg;
827 return true;
829 case OPT_mhard_float:
830 target_float_abi_name = "hard";
831 return true;
833 case OPT_mpic_register_:
834 arm_pic_register_string = arg;
835 return true;
837 case OPT_msoft_float:
838 target_float_abi_name = "soft";
839 return true;
841 case OPT_mstructure_size_boundary_:
842 structure_size_string = arg;
843 return true;
845 case OPT_mtune_:
846 arm_select[2].string = arg;
847 return true;
849 default:
850 return true;
854 /* Fix up any incompatible options that the user has specified.
855 This has now turned into a maze. */
856 void
857 arm_override_options (void)
859 unsigned i;
860 enum processor_type target_arch_cpu = arm_none;
862 /* Set up the flags based on the cpu/architecture selected by the user. */
863 for (i = ARRAY_SIZE (arm_select); i--;)
865 struct arm_cpu_select * ptr = arm_select + i;
867 if (ptr->string != NULL && ptr->string[0] != '\0')
869 const struct processors * sel;
871 for (sel = ptr->processors; sel->name != NULL; sel++)
872 if (streq (ptr->string, sel->name))
874 /* Set the architecture define. */
875 if (i != ARM_OPT_SET_TUNE)
876 sprintf (arm_arch_name, "__ARM_ARCH_%s__", sel->arch);
878 /* Determine the processor core for which we should
879 tune code-generation. */
880 if (/* -mcpu= is a sensible default. */
881 i == ARM_OPT_SET_CPU
882 /* -mtune= overrides -mcpu= and -march=. */
883 || i == ARM_OPT_SET_TUNE)
884 arm_tune = (enum processor_type) (sel - ptr->processors);
886 /* Remember the CPU associated with this architecture.
887 If no other option is used to set the CPU type,
888 we'll use this to guess the most suitable tuning
889 options. */
890 if (i == ARM_OPT_SET_ARCH)
891 target_arch_cpu = sel->core;
893 if (i != ARM_OPT_SET_TUNE)
895 /* If we have been given an architecture and a processor
896 make sure that they are compatible. We only generate
897 a warning though, and we prefer the CPU over the
898 architecture. */
899 if (insn_flags != 0 && (insn_flags ^ sel->flags))
900 warning (0, "switch -mcpu=%s conflicts with -march= switch",
901 ptr->string);
903 insn_flags = sel->flags;
906 break;
909 if (sel->name == NULL)
910 error ("bad value (%s) for %s switch", ptr->string, ptr->name);
914 /* Guess the tuning options from the architecture if necessary. */
915 if (arm_tune == arm_none)
916 arm_tune = target_arch_cpu;
918 /* If the user did not specify a processor, choose one for them. */
919 if (insn_flags == 0)
921 const struct processors * sel;
922 unsigned int sought;
923 enum processor_type cpu;
925 cpu = TARGET_CPU_DEFAULT;
926 if (cpu == arm_none)
928 #ifdef SUBTARGET_CPU_DEFAULT
929 /* Use the subtarget default CPU if none was specified by
930 configure. */
931 cpu = SUBTARGET_CPU_DEFAULT;
932 #endif
933 /* Default to ARM6. */
934 if (cpu == arm_none)
935 cpu = arm6;
937 sel = &all_cores[cpu];
939 insn_flags = sel->flags;
941 /* Now check to see if the user has specified some command line
942 switch that require certain abilities from the cpu. */
943 sought = 0;
945 if (TARGET_INTERWORK || TARGET_THUMB)
947 sought |= (FL_THUMB | FL_MODE32);
949 /* There are no ARM processors that support both APCS-26 and
950 interworking. Therefore we force FL_MODE26 to be removed
951 from insn_flags here (if it was set), so that the search
952 below will always be able to find a compatible processor. */
953 insn_flags &= ~FL_MODE26;
956 if (sought != 0 && ((sought & insn_flags) != sought))
958 /* Try to locate a CPU type that supports all of the abilities
959 of the default CPU, plus the extra abilities requested by
960 the user. */
961 for (sel = all_cores; sel->name != NULL; sel++)
962 if ((sel->flags & sought) == (sought | insn_flags))
963 break;
965 if (sel->name == NULL)
967 unsigned current_bit_count = 0;
968 const struct processors * best_fit = NULL;
970 /* Ideally we would like to issue an error message here
971 saying that it was not possible to find a CPU compatible
972 with the default CPU, but which also supports the command
973 line options specified by the programmer, and so they
974 ought to use the -mcpu=<name> command line option to
975 override the default CPU type.
977 If we cannot find a cpu that has both the
978 characteristics of the default cpu and the given
979 command line options we scan the array again looking
980 for a best match. */
981 for (sel = all_cores; sel->name != NULL; sel++)
982 if ((sel->flags & sought) == sought)
984 unsigned count;
986 count = bit_count (sel->flags & insn_flags);
988 if (count >= current_bit_count)
990 best_fit = sel;
991 current_bit_count = count;
995 gcc_assert (best_fit);
996 sel = best_fit;
999 insn_flags = sel->flags;
1001 sprintf (arm_arch_name, "__ARM_ARCH_%s__", sel->arch);
1002 if (arm_tune == arm_none)
1003 arm_tune = (enum processor_type) (sel - all_cores);
1006 /* The processor for which we should tune should now have been
1007 chosen. */
1008 gcc_assert (arm_tune != arm_none);
1010 tune_flags = all_cores[(int)arm_tune].flags;
1011 if (optimize_size)
1012 targetm.rtx_costs = arm_size_rtx_costs;
1013 else
1014 targetm.rtx_costs = all_cores[(int)arm_tune].rtx_costs;
1016 /* Make sure that the processor choice does not conflict with any of the
1017 other command line choices. */
1018 if (TARGET_INTERWORK && !(insn_flags & FL_THUMB))
1020 warning (0, "target CPU does not support interworking" );
1021 target_flags &= ~MASK_INTERWORK;
1024 if (TARGET_THUMB && !(insn_flags & FL_THUMB))
1026 warning (0, "target CPU does not support THUMB instructions");
1027 target_flags &= ~MASK_THUMB;
1030 if (TARGET_APCS_FRAME && TARGET_THUMB)
1032 /* warning (0, "ignoring -mapcs-frame because -mthumb was used"); */
1033 target_flags &= ~MASK_APCS_FRAME;
1036 /* TARGET_BACKTRACE calls leaf_function_p, which causes a crash if done
1037 from here where no function is being compiled currently. */
1038 if ((TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME) && TARGET_ARM)
1039 warning (0, "enabling backtrace support is only meaningful when compiling for the Thumb");
1041 if (TARGET_ARM && TARGET_CALLEE_INTERWORKING)
1042 warning (0, "enabling callee interworking support is only meaningful when compiling for the Thumb");
1044 if (TARGET_ARM && TARGET_CALLER_INTERWORKING)
1045 warning (0, "enabling caller interworking support is only meaningful when compiling for the Thumb");
1047 if (TARGET_APCS_STACK && !TARGET_APCS_FRAME)
1049 warning (0, "-mapcs-stack-check incompatible with -mno-apcs-frame");
1050 target_flags |= MASK_APCS_FRAME;
1053 if (TARGET_POKE_FUNCTION_NAME)
1054 target_flags |= MASK_APCS_FRAME;
1056 if (TARGET_APCS_REENT && flag_pic)
1057 error ("-fpic and -mapcs-reent are incompatible");
1059 if (TARGET_APCS_REENT)
1060 warning (0, "APCS reentrant code not supported. Ignored");
1062 /* If this target is normally configured to use APCS frames, warn if they
1063 are turned off and debugging is turned on. */
1064 if (TARGET_ARM
1065 && write_symbols != NO_DEBUG
1066 && !TARGET_APCS_FRAME
1067 && (TARGET_DEFAULT & MASK_APCS_FRAME))
1068 warning (0, "-g with -mno-apcs-frame may not give sensible debugging");
1070 /* If stack checking is disabled, we can use r10 as the PIC register,
1071 which keeps r9 available. */
1072 if (flag_pic)
1073 arm_pic_register = TARGET_APCS_STACK ? 9 : 10;
1075 if (TARGET_APCS_FLOAT)
1076 warning (0, "passing floating point arguments in fp regs not yet supported");
1078 /* Initialize boolean versions of the flags, for use in the arm.md file. */
1079 arm_arch3m = (insn_flags & FL_ARCH3M) != 0;
1080 arm_arch4 = (insn_flags & FL_ARCH4) != 0;
1081 arm_arch4t = arm_arch4 & ((insn_flags & FL_THUMB) != 0);
1082 arm_arch5 = (insn_flags & FL_ARCH5) != 0;
1083 arm_arch5e = (insn_flags & FL_ARCH5E) != 0;
1084 arm_arch6 = (insn_flags & FL_ARCH6) != 0;
1085 arm_arch_xscale = (insn_flags & FL_XSCALE) != 0;
1086 arm_arch_cirrus = (insn_flags & FL_CIRRUS) != 0;
1088 arm_ld_sched = (tune_flags & FL_LDSCHED) != 0;
1089 arm_tune_strongarm = (tune_flags & FL_STRONG) != 0;
1090 thumb_code = (TARGET_ARM == 0);
1091 arm_tune_wbuf = (tune_flags & FL_WBUF) != 0;
1092 arm_tune_xscale = (tune_flags & FL_XSCALE) != 0;
1093 arm_arch_iwmmxt = (insn_flags & FL_IWMMXT) != 0;
1095 /* V5 code we generate is completely interworking capable, so we turn off
1096 TARGET_INTERWORK here to avoid many tests later on. */
1098 /* XXX However, we must pass the right pre-processor defines to CPP
1099 or GLD can get confused. This is a hack. */
1100 if (TARGET_INTERWORK)
1101 arm_cpp_interwork = 1;
1103 if (arm_arch5)
1104 target_flags &= ~MASK_INTERWORK;
1106 if (target_abi_name)
1108 for (i = 0; i < ARRAY_SIZE (arm_all_abis); i++)
1110 if (streq (arm_all_abis[i].name, target_abi_name))
1112 arm_abi = arm_all_abis[i].abi_type;
1113 break;
1116 if (i == ARRAY_SIZE (arm_all_abis))
1117 error ("invalid ABI option: -mabi=%s", target_abi_name);
1119 else
1120 arm_abi = ARM_DEFAULT_ABI;
1122 if (TARGET_IWMMXT && !ARM_DOUBLEWORD_ALIGN)
1123 error ("iwmmxt requires an AAPCS compatible ABI for proper operation");
1125 if (TARGET_IWMMXT_ABI && !TARGET_IWMMXT)
1126 error ("iwmmxt abi requires an iwmmxt capable cpu");
1128 arm_fp_model = ARM_FP_MODEL_UNKNOWN;
1129 if (target_fpu_name == NULL && target_fpe_name != NULL)
1131 if (streq (target_fpe_name, "2"))
1132 target_fpu_name = "fpe2";
1133 else if (streq (target_fpe_name, "3"))
1134 target_fpu_name = "fpe3";
1135 else
1136 error ("invalid floating point emulation option: -mfpe=%s",
1137 target_fpe_name);
1139 if (target_fpu_name != NULL)
1141 /* The user specified a FPU. */
1142 for (i = 0; i < ARRAY_SIZE (all_fpus); i++)
1144 if (streq (all_fpus[i].name, target_fpu_name))
1146 arm_fpu_arch = all_fpus[i].fpu;
1147 arm_fpu_tune = arm_fpu_arch;
1148 arm_fp_model = fp_model_for_fpu[arm_fpu_arch];
1149 break;
1152 if (arm_fp_model == ARM_FP_MODEL_UNKNOWN)
1153 error ("invalid floating point option: -mfpu=%s", target_fpu_name);
1155 else
1157 #ifdef FPUTYPE_DEFAULT
1158 /* Use the default if it is specified for this platform. */
1159 arm_fpu_arch = FPUTYPE_DEFAULT;
1160 arm_fpu_tune = FPUTYPE_DEFAULT;
1161 #else
1162 /* Pick one based on CPU type. */
1163 /* ??? Some targets assume FPA is the default.
1164 if ((insn_flags & FL_VFP) != 0)
1165 arm_fpu_arch = FPUTYPE_VFP;
1166 else
1168 if (arm_arch_cirrus)
1169 arm_fpu_arch = FPUTYPE_MAVERICK;
1170 else
1171 arm_fpu_arch = FPUTYPE_FPA_EMU2;
1172 #endif
1173 if (tune_flags & FL_CO_PROC && arm_fpu_arch == FPUTYPE_FPA_EMU2)
1174 arm_fpu_tune = FPUTYPE_FPA;
1175 else
1176 arm_fpu_tune = arm_fpu_arch;
1177 arm_fp_model = fp_model_for_fpu[arm_fpu_arch];
1178 gcc_assert (arm_fp_model != ARM_FP_MODEL_UNKNOWN);
1181 if (target_float_abi_name != NULL)
1183 /* The user specified a FP ABI. */
1184 for (i = 0; i < ARRAY_SIZE (all_float_abis); i++)
1186 if (streq (all_float_abis[i].name, target_float_abi_name))
1188 arm_float_abi = all_float_abis[i].abi_type;
1189 break;
1192 if (i == ARRAY_SIZE (all_float_abis))
1193 error ("invalid floating point abi: -mfloat-abi=%s",
1194 target_float_abi_name);
1196 else
1197 arm_float_abi = TARGET_DEFAULT_FLOAT_ABI;
1199 if (arm_float_abi == ARM_FLOAT_ABI_HARD && TARGET_VFP)
1200 sorry ("-mfloat-abi=hard and VFP");
1202 /* If soft-float is specified then don't use FPU. */
1203 if (TARGET_SOFT_FLOAT)
1204 arm_fpu_arch = FPUTYPE_NONE;
1206 /* For arm2/3 there is no need to do any scheduling if there is only
1207 a floating point emulator, or we are doing software floating-point. */
1208 if ((TARGET_SOFT_FLOAT
1209 || arm_fpu_tune == FPUTYPE_FPA_EMU2
1210 || arm_fpu_tune == FPUTYPE_FPA_EMU3)
1211 && (tune_flags & FL_MODE32) == 0)
1212 flag_schedule_insns = flag_schedule_insns_after_reload = 0;
1214 /* Override the default structure alignment for AAPCS ABI. */
1215 if (arm_abi == ARM_ABI_AAPCS)
1216 arm_structure_size_boundary = 8;
1218 if (structure_size_string != NULL)
1220 int size = strtol (structure_size_string, NULL, 0);
1222 if (size == 8 || size == 32
1223 || (ARM_DOUBLEWORD_ALIGN && size == 64))
1224 arm_structure_size_boundary = size;
1225 else
1226 warning (0, "structure size boundary can only be set to %s",
1227 ARM_DOUBLEWORD_ALIGN ? "8, 32 or 64": "8 or 32");
1230 if (arm_pic_register_string != NULL)
1232 int pic_register = decode_reg_name (arm_pic_register_string);
1234 if (!flag_pic)
1235 warning (0, "-mpic-register= is useless without -fpic");
1237 /* Prevent the user from choosing an obviously stupid PIC register. */
1238 else if (pic_register < 0 || call_used_regs[pic_register]
1239 || pic_register == HARD_FRAME_POINTER_REGNUM
1240 || pic_register == STACK_POINTER_REGNUM
1241 || pic_register >= PC_REGNUM)
1242 error ("unable to use '%s' for PIC register", arm_pic_register_string);
1243 else
1244 arm_pic_register = pic_register;
1247 if (TARGET_THUMB && flag_schedule_insns)
1249 /* Don't warn since it's on by default in -O2. */
1250 flag_schedule_insns = 0;
1253 if (optimize_size)
1255 /* There's some dispute as to whether this should be 1 or 2. However,
1256 experiments seem to show that in pathological cases a setting of
1257 1 degrades less severely than a setting of 2. This could change if
1258 other parts of the compiler change their behavior. */
1259 arm_constant_limit = 1;
1261 /* If optimizing for size, bump the number of instructions that we
1262 are prepared to conditionally execute (even on a StrongARM). */
1263 max_insns_skipped = 6;
1265 else
1267 /* For processors with load scheduling, it never costs more than
1268 2 cycles to load a constant, and the load scheduler may well
1269 reduce that to 1. */
1270 if (arm_ld_sched)
1271 arm_constant_limit = 1;
1273 /* On XScale the longer latency of a load makes it more difficult
1274 to achieve a good schedule, so it's faster to synthesize
1275 constants that can be done in two insns. */
1276 if (arm_tune_xscale)
1277 arm_constant_limit = 2;
1279 /* StrongARM has early execution of branches, so a sequence
1280 that is worth skipping is shorter. */
1281 if (arm_tune_strongarm)
1282 max_insns_skipped = 3;
1285 /* Register global variables with the garbage collector. */
1286 arm_add_gc_roots ();
1289 static void
1290 arm_add_gc_roots (void)
1292 gcc_obstack_init(&minipool_obstack);
1293 minipool_startobj = (char *) obstack_alloc (&minipool_obstack, 0);
1296 /* A table of known ARM exception types.
1297 For use with the interrupt function attribute. */
1299 typedef struct
1301 const char *const arg;
1302 const unsigned long return_value;
1304 isr_attribute_arg;
1306 static const isr_attribute_arg isr_attribute_args [] =
1308 { "IRQ", ARM_FT_ISR },
1309 { "irq", ARM_FT_ISR },
1310 { "FIQ", ARM_FT_FIQ },
1311 { "fiq", ARM_FT_FIQ },
1312 { "ABORT", ARM_FT_ISR },
1313 { "abort", ARM_FT_ISR },
1314 { "ABORT", ARM_FT_ISR },
1315 { "abort", ARM_FT_ISR },
1316 { "UNDEF", ARM_FT_EXCEPTION },
1317 { "undef", ARM_FT_EXCEPTION },
1318 { "SWI", ARM_FT_EXCEPTION },
1319 { "swi", ARM_FT_EXCEPTION },
1320 { NULL, ARM_FT_NORMAL }
1323 /* Returns the (interrupt) function type of the current
1324 function, or ARM_FT_UNKNOWN if the type cannot be determined. */
1326 static unsigned long
1327 arm_isr_value (tree argument)
1329 const isr_attribute_arg * ptr;
1330 const char * arg;
1332 /* No argument - default to IRQ. */
1333 if (argument == NULL_TREE)
1334 return ARM_FT_ISR;
1336 /* Get the value of the argument. */
1337 if (TREE_VALUE (argument) == NULL_TREE
1338 || TREE_CODE (TREE_VALUE (argument)) != STRING_CST)
1339 return ARM_FT_UNKNOWN;
1341 arg = TREE_STRING_POINTER (TREE_VALUE (argument));
1343 /* Check it against the list of known arguments. */
1344 for (ptr = isr_attribute_args; ptr->arg != NULL; ptr++)
1345 if (streq (arg, ptr->arg))
1346 return ptr->return_value;
1348 /* An unrecognized interrupt type. */
1349 return ARM_FT_UNKNOWN;
1352 /* Computes the type of the current function. */
1354 static unsigned long
1355 arm_compute_func_type (void)
1357 unsigned long type = ARM_FT_UNKNOWN;
1358 tree a;
1359 tree attr;
1361 gcc_assert (TREE_CODE (current_function_decl) == FUNCTION_DECL);
1363 /* Decide if the current function is volatile. Such functions
1364 never return, and many memory cycles can be saved by not storing
1365 register values that will never be needed again. This optimization
1366 was added to speed up context switching in a kernel application. */
1367 if (optimize > 0
1368 && TREE_NOTHROW (current_function_decl)
1369 && TREE_THIS_VOLATILE (current_function_decl))
1370 type |= ARM_FT_VOLATILE;
1372 if (cfun->static_chain_decl != NULL)
1373 type |= ARM_FT_NESTED;
1375 attr = DECL_ATTRIBUTES (current_function_decl);
1377 a = lookup_attribute ("naked", attr);
1378 if (a != NULL_TREE)
1379 type |= ARM_FT_NAKED;
1381 a = lookup_attribute ("isr", attr);
1382 if (a == NULL_TREE)
1383 a = lookup_attribute ("interrupt", attr);
1385 if (a == NULL_TREE)
1386 type |= TARGET_INTERWORK ? ARM_FT_INTERWORKED : ARM_FT_NORMAL;
1387 else
1388 type |= arm_isr_value (TREE_VALUE (a));
1390 return type;
1393 /* Returns the type of the current function. */
1395 unsigned long
1396 arm_current_func_type (void)
1398 if (ARM_FUNC_TYPE (cfun->machine->func_type) == ARM_FT_UNKNOWN)
1399 cfun->machine->func_type = arm_compute_func_type ();
1401 return cfun->machine->func_type;
1404 /* Return 1 if it is possible to return using a single instruction.
1405 If SIBLING is non-null, this is a test for a return before a sibling
1406 call. SIBLING is the call insn, so we can examine its register usage. */
1409 use_return_insn (int iscond, rtx sibling)
1411 int regno;
1412 unsigned int func_type;
1413 unsigned long saved_int_regs;
1414 unsigned HOST_WIDE_INT stack_adjust;
1415 arm_stack_offsets *offsets;
1417 /* Never use a return instruction before reload has run. */
1418 if (!reload_completed)
1419 return 0;
1421 func_type = arm_current_func_type ();
1423 /* Naked functions and volatile functions need special
1424 consideration. */
1425 if (func_type & (ARM_FT_VOLATILE | ARM_FT_NAKED))
1426 return 0;
1428 /* So do interrupt functions that use the frame pointer. */
1429 if (IS_INTERRUPT (func_type) && frame_pointer_needed)
1430 return 0;
1432 offsets = arm_get_frame_offsets ();
1433 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
1435 /* As do variadic functions. */
1436 if (current_function_pretend_args_size
1437 || cfun->machine->uses_anonymous_args
1438 /* Or if the function calls __builtin_eh_return () */
1439 || current_function_calls_eh_return
1440 /* Or if the function calls alloca */
1441 || current_function_calls_alloca
1442 /* Or if there is a stack adjustment. However, if the stack pointer
1443 is saved on the stack, we can use a pre-incrementing stack load. */
1444 || !(stack_adjust == 0 || (frame_pointer_needed && stack_adjust == 4)))
1445 return 0;
1447 saved_int_regs = arm_compute_save_reg_mask ();
1449 /* Unfortunately, the insn
1451 ldmib sp, {..., sp, ...}
1453 triggers a bug on most SA-110 based devices, such that the stack
1454 pointer won't be correctly restored if the instruction takes a
1455 page fault. We work around this problem by popping r3 along with
1456 the other registers, since that is never slower than executing
1457 another instruction.
1459 We test for !arm_arch5 here, because code for any architecture
1460 less than this could potentially be run on one of the buggy
1461 chips. */
1462 if (stack_adjust == 4 && !arm_arch5)
1464 /* Validate that r3 is a call-clobbered register (always true in
1465 the default abi) ... */
1466 if (!call_used_regs[3])
1467 return 0;
1469 /* ... that it isn't being used for a return value ... */
1470 if (arm_size_return_regs () >= (4 * UNITS_PER_WORD))
1471 return 0;
1473 /* ... or for a tail-call argument ... */
1474 if (sibling)
1476 gcc_assert (GET_CODE (sibling) == CALL_INSN);
1478 if (find_regno_fusage (sibling, USE, 3))
1479 return 0;
1482 /* ... and that there are no call-saved registers in r0-r2
1483 (always true in the default ABI). */
1484 if (saved_int_regs & 0x7)
1485 return 0;
1488 /* Can't be done if interworking with Thumb, and any registers have been
1489 stacked. */
1490 if (TARGET_INTERWORK && saved_int_regs != 0)
1491 return 0;
1493 /* On StrongARM, conditional returns are expensive if they aren't
1494 taken and multiple registers have been stacked. */
1495 if (iscond && arm_tune_strongarm)
1497 /* Conditional return when just the LR is stored is a simple
1498 conditional-load instruction, that's not expensive. */
1499 if (saved_int_regs != 0 && saved_int_regs != (1 << LR_REGNUM))
1500 return 0;
1502 if (flag_pic && regs_ever_live[PIC_OFFSET_TABLE_REGNUM])
1503 return 0;
1506 /* If there are saved registers but the LR isn't saved, then we need
1507 two instructions for the return. */
1508 if (saved_int_regs && !(saved_int_regs & (1 << LR_REGNUM)))
1509 return 0;
1511 /* Can't be done if any of the FPA regs are pushed,
1512 since this also requires an insn. */
1513 if (TARGET_HARD_FLOAT && TARGET_FPA)
1514 for (regno = FIRST_FPA_REGNUM; regno <= LAST_FPA_REGNUM; regno++)
1515 if (regs_ever_live[regno] && !call_used_regs[regno])
1516 return 0;
1518 /* Likewise VFP regs. */
1519 if (TARGET_HARD_FLOAT && TARGET_VFP)
1520 for (regno = FIRST_VFP_REGNUM; regno <= LAST_VFP_REGNUM; regno++)
1521 if (regs_ever_live[regno] && !call_used_regs[regno])
1522 return 0;
1524 if (TARGET_REALLY_IWMMXT)
1525 for (regno = FIRST_IWMMXT_REGNUM; regno <= LAST_IWMMXT_REGNUM; regno++)
1526 if (regs_ever_live[regno] && ! call_used_regs [regno])
1527 return 0;
1529 return 1;
1532 /* Return TRUE if int I is a valid immediate ARM constant. */
1535 const_ok_for_arm (HOST_WIDE_INT i)
1537 int lowbit;
1539 /* For machines with >32 bit HOST_WIDE_INT, the bits above bit 31 must
1540 be all zero, or all one. */
1541 if ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff) != 0
1542 && ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff)
1543 != ((~(unsigned HOST_WIDE_INT) 0)
1544 & ~(unsigned HOST_WIDE_INT) 0xffffffff)))
1545 return FALSE;
1547 i &= (unsigned HOST_WIDE_INT) 0xffffffff;
1549 /* Fast return for 0 and small values. We must do this for zero, since
1550 the code below can't handle that one case. */
1551 if ((i & ~(unsigned HOST_WIDE_INT) 0xff) == 0)
1552 return TRUE;
1554 /* Get the number of trailing zeros, rounded down to the nearest even
1555 number. */
1556 lowbit = (ffs ((int) i) - 1) & ~1;
1558 if ((i & ~(((unsigned HOST_WIDE_INT) 0xff) << lowbit)) == 0)
1559 return TRUE;
1560 else if (lowbit <= 4
1561 && ((i & ~0xc000003f) == 0
1562 || (i & ~0xf000000f) == 0
1563 || (i & ~0xfc000003) == 0))
1564 return TRUE;
1566 return FALSE;
1569 /* Return true if I is a valid constant for the operation CODE. */
1570 static int
1571 const_ok_for_op (HOST_WIDE_INT i, enum rtx_code code)
1573 if (const_ok_for_arm (i))
1574 return 1;
1576 switch (code)
1578 case PLUS:
1579 return const_ok_for_arm (ARM_SIGN_EXTEND (-i));
1581 case MINUS: /* Should only occur with (MINUS I reg) => rsb */
1582 case XOR:
1583 case IOR:
1584 return 0;
1586 case AND:
1587 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
1589 default:
1590 gcc_unreachable ();
1594 /* Emit a sequence of insns to handle a large constant.
1595 CODE is the code of the operation required, it can be any of SET, PLUS,
1596 IOR, AND, XOR, MINUS;
1597 MODE is the mode in which the operation is being performed;
1598 VAL is the integer to operate on;
1599 SOURCE is the other operand (a register, or a null-pointer for SET);
1600 SUBTARGETS means it is safe to create scratch registers if that will
1601 either produce a simpler sequence, or we will want to cse the values.
1602 Return value is the number of insns emitted. */
1605 arm_split_constant (enum rtx_code code, enum machine_mode mode, rtx insn,
1606 HOST_WIDE_INT val, rtx target, rtx source, int subtargets)
1608 rtx cond;
1610 if (insn && GET_CODE (PATTERN (insn)) == COND_EXEC)
1611 cond = COND_EXEC_TEST (PATTERN (insn));
1612 else
1613 cond = NULL_RTX;
1615 if (subtargets || code == SET
1616 || (GET_CODE (target) == REG && GET_CODE (source) == REG
1617 && REGNO (target) != REGNO (source)))
1619 /* After arm_reorg has been called, we can't fix up expensive
1620 constants by pushing them into memory so we must synthesize
1621 them in-line, regardless of the cost. This is only likely to
1622 be more costly on chips that have load delay slots and we are
1623 compiling without running the scheduler (so no splitting
1624 occurred before the final instruction emission).
1626 Ref: gcc -O1 -mcpu=strongarm gcc.c-torture/compile/980506-2.c
1628 if (!after_arm_reorg
1629 && !cond
1630 && (arm_gen_constant (code, mode, NULL_RTX, val, target, source,
1631 1, 0)
1632 > arm_constant_limit + (code != SET)))
1634 if (code == SET)
1636 /* Currently SET is the only monadic value for CODE, all
1637 the rest are diadic. */
1638 emit_insn (gen_rtx_SET (VOIDmode, target, GEN_INT (val)));
1639 return 1;
1641 else
1643 rtx temp = subtargets ? gen_reg_rtx (mode) : target;
1645 emit_insn (gen_rtx_SET (VOIDmode, temp, GEN_INT (val)));
1646 /* For MINUS, the value is subtracted from, since we never
1647 have subtraction of a constant. */
1648 if (code == MINUS)
1649 emit_insn (gen_rtx_SET (VOIDmode, target,
1650 gen_rtx_MINUS (mode, temp, source)));
1651 else
1652 emit_insn (gen_rtx_SET (VOIDmode, target,
1653 gen_rtx_fmt_ee (code, mode, source, temp)));
1654 return 2;
1659 return arm_gen_constant (code, mode, cond, val, target, source, subtargets,
1663 static int
1664 count_insns_for_constant (HOST_WIDE_INT remainder, int i)
1666 HOST_WIDE_INT temp1;
1667 int num_insns = 0;
1670 int end;
1672 if (i <= 0)
1673 i += 32;
1674 if (remainder & (3 << (i - 2)))
1676 end = i - 8;
1677 if (end < 0)
1678 end += 32;
1679 temp1 = remainder & ((0x0ff << end)
1680 | ((i < end) ? (0xff >> (32 - end)) : 0));
1681 remainder &= ~temp1;
1682 num_insns++;
1683 i -= 6;
1685 i -= 2;
1686 } while (remainder);
1687 return num_insns;
1690 /* Emit an instruction with the indicated PATTERN. If COND is
1691 non-NULL, conditionalize the execution of the instruction on COND
1692 being true. */
1694 static void
1695 emit_constant_insn (rtx cond, rtx pattern)
1697 if (cond)
1698 pattern = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond), pattern);
1699 emit_insn (pattern);
1702 /* As above, but extra parameter GENERATE which, if clear, suppresses
1703 RTL generation. */
1705 static int
1706 arm_gen_constant (enum rtx_code code, enum machine_mode mode, rtx cond,
1707 HOST_WIDE_INT val, rtx target, rtx source, int subtargets,
1708 int generate)
1710 int can_invert = 0;
1711 int can_negate = 0;
1712 int can_negate_initial = 0;
1713 int can_shift = 0;
1714 int i;
1715 int num_bits_set = 0;
1716 int set_sign_bit_copies = 0;
1717 int clear_sign_bit_copies = 0;
1718 int clear_zero_bit_copies = 0;
1719 int set_zero_bit_copies = 0;
1720 int insns = 0;
1721 unsigned HOST_WIDE_INT temp1, temp2;
1722 unsigned HOST_WIDE_INT remainder = val & 0xffffffff;
1724 /* Find out which operations are safe for a given CODE. Also do a quick
1725 check for degenerate cases; these can occur when DImode operations
1726 are split. */
1727 switch (code)
1729 case SET:
1730 can_invert = 1;
1731 can_shift = 1;
1732 can_negate = 1;
1733 break;
1735 case PLUS:
1736 can_negate = 1;
1737 can_negate_initial = 1;
1738 break;
1740 case IOR:
1741 if (remainder == 0xffffffff)
1743 if (generate)
1744 emit_constant_insn (cond,
1745 gen_rtx_SET (VOIDmode, target,
1746 GEN_INT (ARM_SIGN_EXTEND (val))));
1747 return 1;
1749 if (remainder == 0)
1751 if (reload_completed && rtx_equal_p (target, source))
1752 return 0;
1753 if (generate)
1754 emit_constant_insn (cond,
1755 gen_rtx_SET (VOIDmode, target, source));
1756 return 1;
1758 break;
1760 case AND:
1761 if (remainder == 0)
1763 if (generate)
1764 emit_constant_insn (cond,
1765 gen_rtx_SET (VOIDmode, target, const0_rtx));
1766 return 1;
1768 if (remainder == 0xffffffff)
1770 if (reload_completed && rtx_equal_p (target, source))
1771 return 0;
1772 if (generate)
1773 emit_constant_insn (cond,
1774 gen_rtx_SET (VOIDmode, target, source));
1775 return 1;
1777 can_invert = 1;
1778 break;
1780 case XOR:
1781 if (remainder == 0)
1783 if (reload_completed && rtx_equal_p (target, source))
1784 return 0;
1785 if (generate)
1786 emit_constant_insn (cond,
1787 gen_rtx_SET (VOIDmode, target, source));
1788 return 1;
1791 /* We don't know how to handle other cases yet. */
1792 gcc_assert (remainder == 0xffffffff);
1794 if (generate)
1795 emit_constant_insn (cond,
1796 gen_rtx_SET (VOIDmode, target,
1797 gen_rtx_NOT (mode, source)));
1798 return 1;
1800 case MINUS:
1801 /* We treat MINUS as (val - source), since (source - val) is always
1802 passed as (source + (-val)). */
1803 if (remainder == 0)
1805 if (generate)
1806 emit_constant_insn (cond,
1807 gen_rtx_SET (VOIDmode, target,
1808 gen_rtx_NEG (mode, source)));
1809 return 1;
1811 if (const_ok_for_arm (val))
1813 if (generate)
1814 emit_constant_insn (cond,
1815 gen_rtx_SET (VOIDmode, target,
1816 gen_rtx_MINUS (mode, GEN_INT (val),
1817 source)));
1818 return 1;
1820 can_negate = 1;
1822 break;
1824 default:
1825 gcc_unreachable ();
1828 /* If we can do it in one insn get out quickly. */
1829 if (const_ok_for_arm (val)
1830 || (can_negate_initial && const_ok_for_arm (-val))
1831 || (can_invert && const_ok_for_arm (~val)))
1833 if (generate)
1834 emit_constant_insn (cond,
1835 gen_rtx_SET (VOIDmode, target,
1836 (source
1837 ? gen_rtx_fmt_ee (code, mode, source,
1838 GEN_INT (val))
1839 : GEN_INT (val))));
1840 return 1;
1843 /* Calculate a few attributes that may be useful for specific
1844 optimizations. */
1845 for (i = 31; i >= 0; i--)
1847 if ((remainder & (1 << i)) == 0)
1848 clear_sign_bit_copies++;
1849 else
1850 break;
1853 for (i = 31; i >= 0; i--)
1855 if ((remainder & (1 << i)) != 0)
1856 set_sign_bit_copies++;
1857 else
1858 break;
1861 for (i = 0; i <= 31; i++)
1863 if ((remainder & (1 << i)) == 0)
1864 clear_zero_bit_copies++;
1865 else
1866 break;
1869 for (i = 0; i <= 31; i++)
1871 if ((remainder & (1 << i)) != 0)
1872 set_zero_bit_copies++;
1873 else
1874 break;
1877 switch (code)
1879 case SET:
1880 /* See if we can do this by sign_extending a constant that is known
1881 to be negative. This is a good, way of doing it, since the shift
1882 may well merge into a subsequent insn. */
1883 if (set_sign_bit_copies > 1)
1885 if (const_ok_for_arm
1886 (temp1 = ARM_SIGN_EXTEND (remainder
1887 << (set_sign_bit_copies - 1))))
1889 if (generate)
1891 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
1892 emit_constant_insn (cond,
1893 gen_rtx_SET (VOIDmode, new_src,
1894 GEN_INT (temp1)));
1895 emit_constant_insn (cond,
1896 gen_ashrsi3 (target, new_src,
1897 GEN_INT (set_sign_bit_copies - 1)));
1899 return 2;
1901 /* For an inverted constant, we will need to set the low bits,
1902 these will be shifted out of harm's way. */
1903 temp1 |= (1 << (set_sign_bit_copies - 1)) - 1;
1904 if (const_ok_for_arm (~temp1))
1906 if (generate)
1908 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
1909 emit_constant_insn (cond,
1910 gen_rtx_SET (VOIDmode, new_src,
1911 GEN_INT (temp1)));
1912 emit_constant_insn (cond,
1913 gen_ashrsi3 (target, new_src,
1914 GEN_INT (set_sign_bit_copies - 1)));
1916 return 2;
1920 /* See if we can calculate the value as the difference between two
1921 valid immediates. */
1922 if (clear_sign_bit_copies + clear_zero_bit_copies <= 16)
1924 int topshift = clear_sign_bit_copies & ~1;
1926 temp1 = ARM_SIGN_EXTEND ((remainder + (0x00800000 >> topshift))
1927 & (0xff000000 >> topshift));
1929 /* If temp1 is zero, then that means the 9 most significant
1930 bits of remainder were 1 and we've caused it to overflow.
1931 When topshift is 0 we don't need to do anything since we
1932 can borrow from 'bit 32'. */
1933 if (temp1 == 0 && topshift != 0)
1934 temp1 = 0x80000000 >> (topshift - 1);
1936 temp2 = ARM_SIGN_EXTEND (temp1 - remainder);
1938 if (const_ok_for_arm (temp2))
1940 if (generate)
1942 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
1943 emit_constant_insn (cond,
1944 gen_rtx_SET (VOIDmode, new_src,
1945 GEN_INT (temp1)));
1946 emit_constant_insn (cond,
1947 gen_addsi3 (target, new_src,
1948 GEN_INT (-temp2)));
1951 return 2;
1955 /* See if we can generate this by setting the bottom (or the top)
1956 16 bits, and then shifting these into the other half of the
1957 word. We only look for the simplest cases, to do more would cost
1958 too much. Be careful, however, not to generate this when the
1959 alternative would take fewer insns. */
1960 if (val & 0xffff0000)
1962 temp1 = remainder & 0xffff0000;
1963 temp2 = remainder & 0x0000ffff;
1965 /* Overlaps outside this range are best done using other methods. */
1966 for (i = 9; i < 24; i++)
1968 if ((((temp2 | (temp2 << i)) & 0xffffffff) == remainder)
1969 && !const_ok_for_arm (temp2))
1971 rtx new_src = (subtargets
1972 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
1973 : target);
1974 insns = arm_gen_constant (code, mode, cond, temp2, new_src,
1975 source, subtargets, generate);
1976 source = new_src;
1977 if (generate)
1978 emit_constant_insn
1979 (cond,
1980 gen_rtx_SET
1981 (VOIDmode, target,
1982 gen_rtx_IOR (mode,
1983 gen_rtx_ASHIFT (mode, source,
1984 GEN_INT (i)),
1985 source)));
1986 return insns + 1;
1990 /* Don't duplicate cases already considered. */
1991 for (i = 17; i < 24; i++)
1993 if (((temp1 | (temp1 >> i)) == remainder)
1994 && !const_ok_for_arm (temp1))
1996 rtx new_src = (subtargets
1997 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
1998 : target);
1999 insns = arm_gen_constant (code, mode, cond, temp1, new_src,
2000 source, subtargets, generate);
2001 source = new_src;
2002 if (generate)
2003 emit_constant_insn
2004 (cond,
2005 gen_rtx_SET (VOIDmode, target,
2006 gen_rtx_IOR
2007 (mode,
2008 gen_rtx_LSHIFTRT (mode, source,
2009 GEN_INT (i)),
2010 source)));
2011 return insns + 1;
2015 break;
2017 case IOR:
2018 case XOR:
2019 /* If we have IOR or XOR, and the constant can be loaded in a
2020 single instruction, and we can find a temporary to put it in,
2021 then this can be done in two instructions instead of 3-4. */
2022 if (subtargets
2023 /* TARGET can't be NULL if SUBTARGETS is 0 */
2024 || (reload_completed && !reg_mentioned_p (target, source)))
2026 if (const_ok_for_arm (ARM_SIGN_EXTEND (~val)))
2028 if (generate)
2030 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
2032 emit_constant_insn (cond,
2033 gen_rtx_SET (VOIDmode, sub,
2034 GEN_INT (val)));
2035 emit_constant_insn (cond,
2036 gen_rtx_SET (VOIDmode, target,
2037 gen_rtx_fmt_ee (code, mode,
2038 source, sub)));
2040 return 2;
2044 if (code == XOR)
2045 break;
2047 if (set_sign_bit_copies > 8
2048 && (val & (-1 << (32 - set_sign_bit_copies))) == val)
2050 if (generate)
2052 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
2053 rtx shift = GEN_INT (set_sign_bit_copies);
2055 emit_constant_insn
2056 (cond,
2057 gen_rtx_SET (VOIDmode, sub,
2058 gen_rtx_NOT (mode,
2059 gen_rtx_ASHIFT (mode,
2060 source,
2061 shift))));
2062 emit_constant_insn
2063 (cond,
2064 gen_rtx_SET (VOIDmode, target,
2065 gen_rtx_NOT (mode,
2066 gen_rtx_LSHIFTRT (mode, sub,
2067 shift))));
2069 return 2;
2072 if (set_zero_bit_copies > 8
2073 && (remainder & ((1 << set_zero_bit_copies) - 1)) == remainder)
2075 if (generate)
2077 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
2078 rtx shift = GEN_INT (set_zero_bit_copies);
2080 emit_constant_insn
2081 (cond,
2082 gen_rtx_SET (VOIDmode, sub,
2083 gen_rtx_NOT (mode,
2084 gen_rtx_LSHIFTRT (mode,
2085 source,
2086 shift))));
2087 emit_constant_insn
2088 (cond,
2089 gen_rtx_SET (VOIDmode, target,
2090 gen_rtx_NOT (mode,
2091 gen_rtx_ASHIFT (mode, sub,
2092 shift))));
2094 return 2;
2097 if (const_ok_for_arm (temp1 = ARM_SIGN_EXTEND (~val)))
2099 if (generate)
2101 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
2102 emit_constant_insn (cond,
2103 gen_rtx_SET (VOIDmode, sub,
2104 gen_rtx_NOT (mode, source)));
2105 source = sub;
2106 if (subtargets)
2107 sub = gen_reg_rtx (mode);
2108 emit_constant_insn (cond,
2109 gen_rtx_SET (VOIDmode, sub,
2110 gen_rtx_AND (mode, source,
2111 GEN_INT (temp1))));
2112 emit_constant_insn (cond,
2113 gen_rtx_SET (VOIDmode, target,
2114 gen_rtx_NOT (mode, sub)));
2116 return 3;
2118 break;
2120 case AND:
2121 /* See if two shifts will do 2 or more insn's worth of work. */
2122 if (clear_sign_bit_copies >= 16 && clear_sign_bit_copies < 24)
2124 HOST_WIDE_INT shift_mask = ((0xffffffff
2125 << (32 - clear_sign_bit_copies))
2126 & 0xffffffff);
2128 if ((remainder | shift_mask) != 0xffffffff)
2130 if (generate)
2132 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2133 insns = arm_gen_constant (AND, mode, cond,
2134 remainder | shift_mask,
2135 new_src, source, subtargets, 1);
2136 source = new_src;
2138 else
2140 rtx targ = subtargets ? NULL_RTX : target;
2141 insns = arm_gen_constant (AND, mode, cond,
2142 remainder | shift_mask,
2143 targ, source, subtargets, 0);
2147 if (generate)
2149 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2150 rtx shift = GEN_INT (clear_sign_bit_copies);
2152 emit_insn (gen_ashlsi3 (new_src, source, shift));
2153 emit_insn (gen_lshrsi3 (target, new_src, shift));
2156 return insns + 2;
2159 if (clear_zero_bit_copies >= 16 && clear_zero_bit_copies < 24)
2161 HOST_WIDE_INT shift_mask = (1 << clear_zero_bit_copies) - 1;
2163 if ((remainder | shift_mask) != 0xffffffff)
2165 if (generate)
2167 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2169 insns = arm_gen_constant (AND, mode, cond,
2170 remainder | shift_mask,
2171 new_src, source, subtargets, 1);
2172 source = new_src;
2174 else
2176 rtx targ = subtargets ? NULL_RTX : target;
2178 insns = arm_gen_constant (AND, mode, cond,
2179 remainder | shift_mask,
2180 targ, source, subtargets, 0);
2184 if (generate)
2186 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2187 rtx shift = GEN_INT (clear_zero_bit_copies);
2189 emit_insn (gen_lshrsi3 (new_src, source, shift));
2190 emit_insn (gen_ashlsi3 (target, new_src, shift));
2193 return insns + 2;
2196 break;
2198 default:
2199 break;
2202 for (i = 0; i < 32; i++)
2203 if (remainder & (1 << i))
2204 num_bits_set++;
2206 if (code == AND || (can_invert && num_bits_set > 16))
2207 remainder = (~remainder) & 0xffffffff;
2208 else if (code == PLUS && num_bits_set > 16)
2209 remainder = (-remainder) & 0xffffffff;
2210 else
2212 can_invert = 0;
2213 can_negate = 0;
2216 /* Now try and find a way of doing the job in either two or three
2217 instructions.
2218 We start by looking for the largest block of zeros that are aligned on
2219 a 2-bit boundary, we then fill up the temps, wrapping around to the
2220 top of the word when we drop off the bottom.
2221 In the worst case this code should produce no more than four insns. */
2223 int best_start = 0;
2224 int best_consecutive_zeros = 0;
2226 for (i = 0; i < 32; i += 2)
2228 int consecutive_zeros = 0;
2230 if (!(remainder & (3 << i)))
2232 while ((i < 32) && !(remainder & (3 << i)))
2234 consecutive_zeros += 2;
2235 i += 2;
2237 if (consecutive_zeros > best_consecutive_zeros)
2239 best_consecutive_zeros = consecutive_zeros;
2240 best_start = i - consecutive_zeros;
2242 i -= 2;
2246 /* So long as it won't require any more insns to do so, it's
2247 desirable to emit a small constant (in bits 0...9) in the last
2248 insn. This way there is more chance that it can be combined with
2249 a later addressing insn to form a pre-indexed load or store
2250 operation. Consider:
2252 *((volatile int *)0xe0000100) = 1;
2253 *((volatile int *)0xe0000110) = 2;
2255 We want this to wind up as:
2257 mov rA, #0xe0000000
2258 mov rB, #1
2259 str rB, [rA, #0x100]
2260 mov rB, #2
2261 str rB, [rA, #0x110]
2263 rather than having to synthesize both large constants from scratch.
2265 Therefore, we calculate how many insns would be required to emit
2266 the constant starting from `best_start', and also starting from
2267 zero (i.e. with bit 31 first to be output). If `best_start' doesn't
2268 yield a shorter sequence, we may as well use zero. */
2269 if (best_start != 0
2270 && ((((unsigned HOST_WIDE_INT) 1) << best_start) < remainder)
2271 && (count_insns_for_constant (remainder, 0) <=
2272 count_insns_for_constant (remainder, best_start)))
2273 best_start = 0;
2275 /* Now start emitting the insns. */
2276 i = best_start;
2279 int end;
2281 if (i <= 0)
2282 i += 32;
2283 if (remainder & (3 << (i - 2)))
2285 end = i - 8;
2286 if (end < 0)
2287 end += 32;
2288 temp1 = remainder & ((0x0ff << end)
2289 | ((i < end) ? (0xff >> (32 - end)) : 0));
2290 remainder &= ~temp1;
2292 if (generate)
2294 rtx new_src, temp1_rtx;
2296 if (code == SET || code == MINUS)
2298 new_src = (subtargets ? gen_reg_rtx (mode) : target);
2299 if (can_invert && code != MINUS)
2300 temp1 = ~temp1;
2302 else
2304 if (remainder && subtargets)
2305 new_src = gen_reg_rtx (mode);
2306 else
2307 new_src = target;
2308 if (can_invert)
2309 temp1 = ~temp1;
2310 else if (can_negate)
2311 temp1 = -temp1;
2314 temp1 = trunc_int_for_mode (temp1, mode);
2315 temp1_rtx = GEN_INT (temp1);
2317 if (code == SET)
2319 else if (code == MINUS)
2320 temp1_rtx = gen_rtx_MINUS (mode, temp1_rtx, source);
2321 else
2322 temp1_rtx = gen_rtx_fmt_ee (code, mode, source, temp1_rtx);
2324 emit_constant_insn (cond,
2325 gen_rtx_SET (VOIDmode, new_src,
2326 temp1_rtx));
2327 source = new_src;
2330 if (code == SET)
2332 can_invert = 0;
2333 code = PLUS;
2335 else if (code == MINUS)
2336 code = PLUS;
2338 insns++;
2339 i -= 6;
2341 i -= 2;
2343 while (remainder);
2346 return insns;
2349 /* Canonicalize a comparison so that we are more likely to recognize it.
2350 This can be done for a few constant compares, where we can make the
2351 immediate value easier to load. */
2353 enum rtx_code
2354 arm_canonicalize_comparison (enum rtx_code code, rtx * op1)
2356 unsigned HOST_WIDE_INT i = INTVAL (*op1);
2358 switch (code)
2360 case EQ:
2361 case NE:
2362 return code;
2364 case GT:
2365 case LE:
2366 if (i != ((((unsigned HOST_WIDE_INT) 1) << (HOST_BITS_PER_WIDE_INT - 1)) - 1)
2367 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
2369 *op1 = GEN_INT (i + 1);
2370 return code == GT ? GE : LT;
2372 break;
2374 case GE:
2375 case LT:
2376 if (i != (((unsigned HOST_WIDE_INT) 1) << (HOST_BITS_PER_WIDE_INT - 1))
2377 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
2379 *op1 = GEN_INT (i - 1);
2380 return code == GE ? GT : LE;
2382 break;
2384 case GTU:
2385 case LEU:
2386 if (i != ~((unsigned HOST_WIDE_INT) 0)
2387 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
2389 *op1 = GEN_INT (i + 1);
2390 return code == GTU ? GEU : LTU;
2392 break;
2394 case GEU:
2395 case LTU:
2396 if (i != 0
2397 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
2399 *op1 = GEN_INT (i - 1);
2400 return code == GEU ? GTU : LEU;
2402 break;
2404 default:
2405 gcc_unreachable ();
2408 return code;
2412 /* Define how to find the value returned by a function. */
2415 arm_function_value(tree type, tree func ATTRIBUTE_UNUSED)
2417 enum machine_mode mode;
2418 int unsignedp ATTRIBUTE_UNUSED;
2419 rtx r ATTRIBUTE_UNUSED;
2421 mode = TYPE_MODE (type);
2422 /* Promote integer types. */
2423 if (INTEGRAL_TYPE_P (type))
2424 PROMOTE_FUNCTION_MODE (mode, unsignedp, type);
2426 /* Promotes small structs returned in a register to full-word size
2427 for big-endian AAPCS. */
2428 if (arm_return_in_msb (type))
2430 HOST_WIDE_INT size = int_size_in_bytes (type);
2431 if (size % UNITS_PER_WORD != 0)
2433 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
2434 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
2438 return LIBCALL_VALUE(mode);
2441 /* Determine the amount of memory needed to store the possible return
2442 registers of an untyped call. */
2444 arm_apply_result_size (void)
2446 int size = 16;
2448 if (TARGET_ARM)
2450 if (TARGET_HARD_FLOAT_ABI)
2452 if (TARGET_FPA)
2453 size += 12;
2454 if (TARGET_MAVERICK)
2455 size += 8;
2457 if (TARGET_IWMMXT_ABI)
2458 size += 8;
2461 return size;
2464 /* Decide whether a type should be returned in memory (true)
2465 or in a register (false). This is called by the macro
2466 RETURN_IN_MEMORY. */
2468 arm_return_in_memory (tree type)
2470 HOST_WIDE_INT size;
2472 if (!AGGREGATE_TYPE_P (type) &&
2473 (TREE_CODE (type) != VECTOR_TYPE) &&
2474 !(TARGET_AAPCS_BASED && TREE_CODE (type) == COMPLEX_TYPE))
2475 /* All simple types are returned in registers.
2476 For AAPCS, complex types are treated the same as aggregates. */
2477 return 0;
2479 size = int_size_in_bytes (type);
2481 if (arm_abi != ARM_ABI_APCS)
2483 /* ATPCS and later return aggregate types in memory only if they are
2484 larger than a word (or are variable size). */
2485 return (size < 0 || size > UNITS_PER_WORD);
2488 /* To maximize backwards compatibility with previous versions of gcc,
2489 return vectors up to 4 words in registers. */
2490 if (TREE_CODE (type) == VECTOR_TYPE)
2491 return (size < 0 || size > (4 * UNITS_PER_WORD));
2493 /* For the arm-wince targets we choose to be compatible with Microsoft's
2494 ARM and Thumb compilers, which always return aggregates in memory. */
2495 #ifndef ARM_WINCE
2496 /* All structures/unions bigger than one word are returned in memory.
2497 Also catch the case where int_size_in_bytes returns -1. In this case
2498 the aggregate is either huge or of variable size, and in either case
2499 we will want to return it via memory and not in a register. */
2500 if (size < 0 || size > UNITS_PER_WORD)
2501 return 1;
2503 if (TREE_CODE (type) == RECORD_TYPE)
2505 tree field;
2507 /* For a struct the APCS says that we only return in a register
2508 if the type is 'integer like' and every addressable element
2509 has an offset of zero. For practical purposes this means
2510 that the structure can have at most one non bit-field element
2511 and that this element must be the first one in the structure. */
2513 /* Find the first field, ignoring non FIELD_DECL things which will
2514 have been created by C++. */
2515 for (field = TYPE_FIELDS (type);
2516 field && TREE_CODE (field) != FIELD_DECL;
2517 field = TREE_CHAIN (field))
2518 continue;
2520 if (field == NULL)
2521 return 0; /* An empty structure. Allowed by an extension to ANSI C. */
2523 /* Check that the first field is valid for returning in a register. */
2525 /* ... Floats are not allowed */
2526 if (FLOAT_TYPE_P (TREE_TYPE (field)))
2527 return 1;
2529 /* ... Aggregates that are not themselves valid for returning in
2530 a register are not allowed. */
2531 if (RETURN_IN_MEMORY (TREE_TYPE (field)))
2532 return 1;
2534 /* Now check the remaining fields, if any. Only bitfields are allowed,
2535 since they are not addressable. */
2536 for (field = TREE_CHAIN (field);
2537 field;
2538 field = TREE_CHAIN (field))
2540 if (TREE_CODE (field) != FIELD_DECL)
2541 continue;
2543 if (!DECL_BIT_FIELD_TYPE (field))
2544 return 1;
2547 return 0;
2550 if (TREE_CODE (type) == UNION_TYPE)
2552 tree field;
2554 /* Unions can be returned in registers if every element is
2555 integral, or can be returned in an integer register. */
2556 for (field = TYPE_FIELDS (type);
2557 field;
2558 field = TREE_CHAIN (field))
2560 if (TREE_CODE (field) != FIELD_DECL)
2561 continue;
2563 if (FLOAT_TYPE_P (TREE_TYPE (field)))
2564 return 1;
2566 if (RETURN_IN_MEMORY (TREE_TYPE (field)))
2567 return 1;
2570 return 0;
2572 #endif /* not ARM_WINCE */
2574 /* Return all other types in memory. */
2575 return 1;
2578 /* Indicate whether or not words of a double are in big-endian order. */
2581 arm_float_words_big_endian (void)
2583 if (TARGET_MAVERICK)
2584 return 0;
2586 /* For FPA, float words are always big-endian. For VFP, floats words
2587 follow the memory system mode. */
2589 if (TARGET_FPA)
2591 return 1;
2594 if (TARGET_VFP)
2595 return (TARGET_BIG_END ? 1 : 0);
2597 return 1;
2600 /* Initialize a variable CUM of type CUMULATIVE_ARGS
2601 for a call to a function whose data type is FNTYPE.
2602 For a library call, FNTYPE is NULL. */
2603 void
2604 arm_init_cumulative_args (CUMULATIVE_ARGS *pcum, tree fntype,
2605 rtx libname ATTRIBUTE_UNUSED,
2606 tree fndecl ATTRIBUTE_UNUSED)
2608 /* On the ARM, the offset starts at 0. */
2609 pcum->nregs = ((fntype && aggregate_value_p (TREE_TYPE (fntype), fntype)) ? 1 : 0);
2610 pcum->iwmmxt_nregs = 0;
2611 pcum->can_split = true;
2613 pcum->call_cookie = CALL_NORMAL;
2615 if (TARGET_LONG_CALLS)
2616 pcum->call_cookie = CALL_LONG;
2618 /* Check for long call/short call attributes. The attributes
2619 override any command line option. */
2620 if (fntype)
2622 if (lookup_attribute ("short_call", TYPE_ATTRIBUTES (fntype)))
2623 pcum->call_cookie = CALL_SHORT;
2624 else if (lookup_attribute ("long_call", TYPE_ATTRIBUTES (fntype)))
2625 pcum->call_cookie = CALL_LONG;
2628 /* Varargs vectors are treated the same as long long.
2629 named_count avoids having to change the way arm handles 'named' */
2630 pcum->named_count = 0;
2631 pcum->nargs = 0;
2633 if (TARGET_REALLY_IWMMXT && fntype)
2635 tree fn_arg;
2637 for (fn_arg = TYPE_ARG_TYPES (fntype);
2638 fn_arg;
2639 fn_arg = TREE_CHAIN (fn_arg))
2640 pcum->named_count += 1;
2642 if (! pcum->named_count)
2643 pcum->named_count = INT_MAX;
2648 /* Return true if mode/type need doubleword alignment. */
2649 bool
2650 arm_needs_doubleword_align (enum machine_mode mode, tree type)
2652 return (GET_MODE_ALIGNMENT (mode) > PARM_BOUNDARY
2653 || (type && TYPE_ALIGN (type) > PARM_BOUNDARY));
2657 /* Determine where to put an argument to a function.
2658 Value is zero to push the argument on the stack,
2659 or a hard register in which to store the argument.
2661 MODE is the argument's machine mode.
2662 TYPE is the data type of the argument (as a tree).
2663 This is null for libcalls where that information may
2664 not be available.
2665 CUM is a variable of type CUMULATIVE_ARGS which gives info about
2666 the preceding args and about the function being called.
2667 NAMED is nonzero if this argument is a named parameter
2668 (otherwise it is an extra parameter matching an ellipsis). */
2671 arm_function_arg (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
2672 tree type, int named)
2674 int nregs;
2676 /* Varargs vectors are treated the same as long long.
2677 named_count avoids having to change the way arm handles 'named' */
2678 if (TARGET_IWMMXT_ABI
2679 && arm_vector_mode_supported_p (mode)
2680 && pcum->named_count > pcum->nargs + 1)
2682 if (pcum->iwmmxt_nregs <= 9)
2683 return gen_rtx_REG (mode, pcum->iwmmxt_nregs + FIRST_IWMMXT_REGNUM);
2684 else
2686 pcum->can_split = false;
2687 return NULL_RTX;
2691 /* Put doubleword aligned quantities in even register pairs. */
2692 if (pcum->nregs & 1
2693 && ARM_DOUBLEWORD_ALIGN
2694 && arm_needs_doubleword_align (mode, type))
2695 pcum->nregs++;
2697 if (mode == VOIDmode)
2698 /* Compute operand 2 of the call insn. */
2699 return GEN_INT (pcum->call_cookie);
2701 /* Only allow splitting an arg between regs and memory if all preceding
2702 args were allocated to regs. For args passed by reference we only count
2703 the reference pointer. */
2704 if (pcum->can_split)
2705 nregs = 1;
2706 else
2707 nregs = ARM_NUM_REGS2 (mode, type);
2709 if (!named || pcum->nregs + nregs > NUM_ARG_REGS)
2710 return NULL_RTX;
2712 return gen_rtx_REG (mode, pcum->nregs);
2715 static int
2716 arm_arg_partial_bytes (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
2717 tree type, bool named ATTRIBUTE_UNUSED)
2719 int nregs = pcum->nregs;
2721 if (arm_vector_mode_supported_p (mode))
2722 return 0;
2724 if (NUM_ARG_REGS > nregs
2725 && (NUM_ARG_REGS < nregs + ARM_NUM_REGS2 (mode, type))
2726 && pcum->can_split)
2727 return (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
2729 return 0;
2732 /* Variable sized types are passed by reference. This is a GCC
2733 extension to the ARM ABI. */
2735 static bool
2736 arm_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
2737 enum machine_mode mode ATTRIBUTE_UNUSED,
2738 tree type, bool named ATTRIBUTE_UNUSED)
2740 return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
2743 /* Encode the current state of the #pragma [no_]long_calls. */
2744 typedef enum
2746 OFF, /* No #pramgma [no_]long_calls is in effect. */
2747 LONG, /* #pragma long_calls is in effect. */
2748 SHORT /* #pragma no_long_calls is in effect. */
2749 } arm_pragma_enum;
2751 static arm_pragma_enum arm_pragma_long_calls = OFF;
2753 void
2754 arm_pr_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
2756 arm_pragma_long_calls = LONG;
2759 void
2760 arm_pr_no_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
2762 arm_pragma_long_calls = SHORT;
2765 void
2766 arm_pr_long_calls_off (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
2768 arm_pragma_long_calls = OFF;
2771 /* Table of machine attributes. */
2772 const struct attribute_spec arm_attribute_table[] =
2774 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
2775 /* Function calls made to this symbol must be done indirectly, because
2776 it may lie outside of the 26 bit addressing range of a normal function
2777 call. */
2778 { "long_call", 0, 0, false, true, true, NULL },
2779 /* Whereas these functions are always known to reside within the 26 bit
2780 addressing range. */
2781 { "short_call", 0, 0, false, true, true, NULL },
2782 /* Interrupt Service Routines have special prologue and epilogue requirements. */
2783 { "isr", 0, 1, false, false, false, arm_handle_isr_attribute },
2784 { "interrupt", 0, 1, false, false, false, arm_handle_isr_attribute },
2785 { "naked", 0, 0, true, false, false, arm_handle_fndecl_attribute },
2786 #ifdef ARM_PE
2787 /* ARM/PE has three new attributes:
2788 interfacearm - ?
2789 dllexport - for exporting a function/variable that will live in a dll
2790 dllimport - for importing a function/variable from a dll
2792 Microsoft allows multiple declspecs in one __declspec, separating
2793 them with spaces. We do NOT support this. Instead, use __declspec
2794 multiple times.
2796 { "dllimport", 0, 0, true, false, false, NULL },
2797 { "dllexport", 0, 0, true, false, false, NULL },
2798 { "interfacearm", 0, 0, true, false, false, arm_handle_fndecl_attribute },
2799 #elif TARGET_DLLIMPORT_DECL_ATTRIBUTES
2800 { "dllimport", 0, 0, false, false, false, handle_dll_attribute },
2801 { "dllexport", 0, 0, false, false, false, handle_dll_attribute },
2802 { "notshared", 0, 0, false, true, false, arm_handle_notshared_attribute },
2803 #endif
2804 { NULL, 0, 0, false, false, false, NULL }
2807 /* Handle an attribute requiring a FUNCTION_DECL;
2808 arguments as in struct attribute_spec.handler. */
2809 static tree
2810 arm_handle_fndecl_attribute (tree *node, tree name, tree args ATTRIBUTE_UNUSED,
2811 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
2813 if (TREE_CODE (*node) != FUNCTION_DECL)
2815 warning (0, "%qs attribute only applies to functions",
2816 IDENTIFIER_POINTER (name));
2817 *no_add_attrs = true;
2820 return NULL_TREE;
2823 /* Handle an "interrupt" or "isr" attribute;
2824 arguments as in struct attribute_spec.handler. */
2825 static tree
2826 arm_handle_isr_attribute (tree *node, tree name, tree args, int flags,
2827 bool *no_add_attrs)
2829 if (DECL_P (*node))
2831 if (TREE_CODE (*node) != FUNCTION_DECL)
2833 warning (0, "%qs attribute only applies to functions",
2834 IDENTIFIER_POINTER (name));
2835 *no_add_attrs = true;
2837 /* FIXME: the argument if any is checked for type attributes;
2838 should it be checked for decl ones? */
2840 else
2842 if (TREE_CODE (*node) == FUNCTION_TYPE
2843 || TREE_CODE (*node) == METHOD_TYPE)
2845 if (arm_isr_value (args) == ARM_FT_UNKNOWN)
2847 warning (0, "%qs attribute ignored", IDENTIFIER_POINTER (name));
2848 *no_add_attrs = true;
2851 else if (TREE_CODE (*node) == POINTER_TYPE
2852 && (TREE_CODE (TREE_TYPE (*node)) == FUNCTION_TYPE
2853 || TREE_CODE (TREE_TYPE (*node)) == METHOD_TYPE)
2854 && arm_isr_value (args) != ARM_FT_UNKNOWN)
2856 *node = build_variant_type_copy (*node);
2857 TREE_TYPE (*node) = build_type_attribute_variant
2858 (TREE_TYPE (*node),
2859 tree_cons (name, args, TYPE_ATTRIBUTES (TREE_TYPE (*node))));
2860 *no_add_attrs = true;
2862 else
2864 /* Possibly pass this attribute on from the type to a decl. */
2865 if (flags & ((int) ATTR_FLAG_DECL_NEXT
2866 | (int) ATTR_FLAG_FUNCTION_NEXT
2867 | (int) ATTR_FLAG_ARRAY_NEXT))
2869 *no_add_attrs = true;
2870 return tree_cons (name, args, NULL_TREE);
2872 else
2874 warning (0, "%qs attribute ignored", IDENTIFIER_POINTER (name));
2879 return NULL_TREE;
2882 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
2883 /* Handle the "notshared" attribute. This attribute is another way of
2884 requesting hidden visibility. ARM's compiler supports
2885 "__declspec(notshared)"; we support the same thing via an
2886 attribute. */
2888 static tree
2889 arm_handle_notshared_attribute (tree *node,
2890 tree name ATTRIBUTE_UNUSED,
2891 tree args ATTRIBUTE_UNUSED,
2892 int flags ATTRIBUTE_UNUSED,
2893 bool *no_add_attrs)
2895 tree decl = TYPE_NAME (*node);
2897 if (decl)
2899 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
2900 DECL_VISIBILITY_SPECIFIED (decl) = 1;
2901 *no_add_attrs = false;
2903 return NULL_TREE;
2905 #endif
2907 /* Return 0 if the attributes for two types are incompatible, 1 if they
2908 are compatible, and 2 if they are nearly compatible (which causes a
2909 warning to be generated). */
2910 static int
2911 arm_comp_type_attributes (tree type1, tree type2)
2913 int l1, l2, s1, s2;
2915 /* Check for mismatch of non-default calling convention. */
2916 if (TREE_CODE (type1) != FUNCTION_TYPE)
2917 return 1;
2919 /* Check for mismatched call attributes. */
2920 l1 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1)) != NULL;
2921 l2 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2)) != NULL;
2922 s1 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1)) != NULL;
2923 s2 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2)) != NULL;
2925 /* Only bother to check if an attribute is defined. */
2926 if (l1 | l2 | s1 | s2)
2928 /* If one type has an attribute, the other must have the same attribute. */
2929 if ((l1 != l2) || (s1 != s2))
2930 return 0;
2932 /* Disallow mixed attributes. */
2933 if ((l1 & s2) || (l2 & s1))
2934 return 0;
2937 /* Check for mismatched ISR attribute. */
2938 l1 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type1)) != NULL;
2939 if (! l1)
2940 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type1)) != NULL;
2941 l2 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type2)) != NULL;
2942 if (! l2)
2943 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type2)) != NULL;
2944 if (l1 != l2)
2945 return 0;
2947 return 1;
2950 /* Encode long_call or short_call attribute by prefixing
2951 symbol name in DECL with a special character FLAG. */
2952 void
2953 arm_encode_call_attribute (tree decl, int flag)
2955 const char * str = XSTR (XEXP (DECL_RTL (decl), 0), 0);
2956 int len = strlen (str);
2957 char * newstr;
2959 /* Do not allow weak functions to be treated as short call. */
2960 if (DECL_WEAK (decl) && flag == SHORT_CALL_FLAG_CHAR)
2961 return;
2963 newstr = alloca (len + 2);
2964 newstr[0] = flag;
2965 strcpy (newstr + 1, str);
2967 newstr = (char *) ggc_alloc_string (newstr, len + 1);
2968 XSTR (XEXP (DECL_RTL (decl), 0), 0) = newstr;
2971 /* Assigns default attributes to newly defined type. This is used to
2972 set short_call/long_call attributes for function types of
2973 functions defined inside corresponding #pragma scopes. */
2974 static void
2975 arm_set_default_type_attributes (tree type)
2977 /* Add __attribute__ ((long_call)) to all functions, when
2978 inside #pragma long_calls or __attribute__ ((short_call)),
2979 when inside #pragma no_long_calls. */
2980 if (TREE_CODE (type) == FUNCTION_TYPE || TREE_CODE (type) == METHOD_TYPE)
2982 tree type_attr_list, attr_name;
2983 type_attr_list = TYPE_ATTRIBUTES (type);
2985 if (arm_pragma_long_calls == LONG)
2986 attr_name = get_identifier ("long_call");
2987 else if (arm_pragma_long_calls == SHORT)
2988 attr_name = get_identifier ("short_call");
2989 else
2990 return;
2992 type_attr_list = tree_cons (attr_name, NULL_TREE, type_attr_list);
2993 TYPE_ATTRIBUTES (type) = type_attr_list;
2997 /* Return 1 if the operand is a SYMBOL_REF for a function known to be
2998 defined within the current compilation unit. If this cannot be
2999 determined, then 0 is returned. */
3000 static int
3001 current_file_function_operand (rtx sym_ref)
3003 /* This is a bit of a fib. A function will have a short call flag
3004 applied to its name if it has the short call attribute, or it has
3005 already been defined within the current compilation unit. */
3006 if (ENCODED_SHORT_CALL_ATTR_P (XSTR (sym_ref, 0)))
3007 return 1;
3009 /* The current function is always defined within the current compilation
3010 unit. If it s a weak definition however, then this may not be the real
3011 definition of the function, and so we have to say no. */
3012 if (sym_ref == XEXP (DECL_RTL (current_function_decl), 0)
3013 && !DECL_WEAK (current_function_decl))
3014 return 1;
3016 /* We cannot make the determination - default to returning 0. */
3017 return 0;
3020 /* Return nonzero if a 32 bit "long_call" should be generated for
3021 this call. We generate a long_call if the function:
3023 a. has an __attribute__((long call))
3024 or b. is within the scope of a #pragma long_calls
3025 or c. the -mlong-calls command line switch has been specified
3026 . and either:
3027 1. -ffunction-sections is in effect
3028 or 2. the current function has __attribute__ ((section))
3029 or 3. the target function has __attribute__ ((section))
3031 However we do not generate a long call if the function:
3033 d. has an __attribute__ ((short_call))
3034 or e. is inside the scope of a #pragma no_long_calls
3035 or f. is defined within the current compilation unit.
3037 This function will be called by C fragments contained in the machine
3038 description file. SYM_REF and CALL_COOKIE correspond to the matched
3039 rtl operands. CALL_SYMBOL is used to distinguish between
3040 two different callers of the function. It is set to 1 in the
3041 "call_symbol" and "call_symbol_value" patterns and to 0 in the "call"
3042 and "call_value" patterns. This is because of the difference in the
3043 SYM_REFs passed by these patterns. */
3045 arm_is_longcall_p (rtx sym_ref, int call_cookie, int call_symbol)
3047 if (!call_symbol)
3049 if (GET_CODE (sym_ref) != MEM)
3050 return 0;
3052 sym_ref = XEXP (sym_ref, 0);
3055 if (GET_CODE (sym_ref) != SYMBOL_REF)
3056 return 0;
3058 if (call_cookie & CALL_SHORT)
3059 return 0;
3061 if (TARGET_LONG_CALLS)
3063 if (flag_function_sections
3064 || DECL_SECTION_NAME (current_function_decl))
3065 /* c.3 is handled by the definition of the
3066 ARM_DECLARE_FUNCTION_SIZE macro. */
3067 return 1;
3070 if (current_file_function_operand (sym_ref))
3071 return 0;
3073 return (call_cookie & CALL_LONG)
3074 || ENCODED_LONG_CALL_ATTR_P (XSTR (sym_ref, 0))
3075 || TARGET_LONG_CALLS;
3078 /* Return nonzero if it is ok to make a tail-call to DECL. */
3079 static bool
3080 arm_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
3082 int call_type = TARGET_LONG_CALLS ? CALL_LONG : CALL_NORMAL;
3084 if (cfun->machine->sibcall_blocked)
3085 return false;
3087 /* Never tailcall something for which we have no decl, or if we
3088 are in Thumb mode. */
3089 if (decl == NULL || TARGET_THUMB)
3090 return false;
3092 /* Get the calling method. */
3093 if (lookup_attribute ("short_call", TYPE_ATTRIBUTES (TREE_TYPE (decl))))
3094 call_type = CALL_SHORT;
3095 else if (lookup_attribute ("long_call", TYPE_ATTRIBUTES (TREE_TYPE (decl))))
3096 call_type = CALL_LONG;
3098 /* Cannot tail-call to long calls, since these are out of range of
3099 a branch instruction. However, if not compiling PIC, we know
3100 we can reach the symbol if it is in this compilation unit. */
3101 if (call_type == CALL_LONG && (flag_pic || !TREE_ASM_WRITTEN (decl)))
3102 return false;
3104 /* If we are interworking and the function is not declared static
3105 then we can't tail-call it unless we know that it exists in this
3106 compilation unit (since it might be a Thumb routine). */
3107 if (TARGET_INTERWORK && TREE_PUBLIC (decl) && !TREE_ASM_WRITTEN (decl))
3108 return false;
3110 /* Never tailcall from an ISR routine - it needs a special exit sequence. */
3111 if (IS_INTERRUPT (arm_current_func_type ()))
3112 return false;
3114 /* Everything else is ok. */
3115 return true;
3119 /* Addressing mode support functions. */
3121 /* Return nonzero if X is a legitimate immediate operand when compiling
3122 for PIC. */
3124 legitimate_pic_operand_p (rtx x)
3126 if (CONSTANT_P (x)
3127 && flag_pic
3128 && (GET_CODE (x) == SYMBOL_REF
3129 || (GET_CODE (x) == CONST
3130 && GET_CODE (XEXP (x, 0)) == PLUS
3131 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)))
3132 return 0;
3134 return 1;
3138 legitimize_pic_address (rtx orig, enum machine_mode mode, rtx reg)
3140 if (GET_CODE (orig) == SYMBOL_REF
3141 || GET_CODE (orig) == LABEL_REF)
3143 #ifndef AOF_ASSEMBLER
3144 rtx pic_ref, address;
3145 #endif
3146 rtx insn;
3147 int subregs = 0;
3149 if (reg == 0)
3151 gcc_assert (!no_new_pseudos);
3152 reg = gen_reg_rtx (Pmode);
3154 subregs = 1;
3157 #ifdef AOF_ASSEMBLER
3158 /* The AOF assembler can generate relocations for these directly, and
3159 understands that the PIC register has to be added into the offset. */
3160 insn = emit_insn (gen_pic_load_addr_based (reg, orig));
3161 #else
3162 if (subregs)
3163 address = gen_reg_rtx (Pmode);
3164 else
3165 address = reg;
3167 if (TARGET_ARM)
3168 emit_insn (gen_pic_load_addr_arm (address, orig));
3169 else
3170 emit_insn (gen_pic_load_addr_thumb (address, orig));
3172 if ((GET_CODE (orig) == LABEL_REF
3173 || (GET_CODE (orig) == SYMBOL_REF &&
3174 SYMBOL_REF_LOCAL_P (orig)))
3175 && NEED_GOT_RELOC)
3176 pic_ref = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, address);
3177 else
3179 pic_ref = gen_const_mem (Pmode,
3180 gen_rtx_PLUS (Pmode, pic_offset_table_rtx,
3181 address));
3184 insn = emit_move_insn (reg, pic_ref);
3185 #endif
3186 current_function_uses_pic_offset_table = 1;
3187 /* Put a REG_EQUAL note on this insn, so that it can be optimized
3188 by loop. */
3189 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_EQUAL, orig,
3190 REG_NOTES (insn));
3191 return reg;
3193 else if (GET_CODE (orig) == CONST)
3195 rtx base, offset;
3197 if (GET_CODE (XEXP (orig, 0)) == PLUS
3198 && XEXP (XEXP (orig, 0), 0) == pic_offset_table_rtx)
3199 return orig;
3201 if (reg == 0)
3203 gcc_assert (!no_new_pseudos);
3204 reg = gen_reg_rtx (Pmode);
3207 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
3209 base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
3210 offset = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
3211 base == reg ? 0 : reg);
3213 if (GET_CODE (offset) == CONST_INT)
3215 /* The base register doesn't really matter, we only want to
3216 test the index for the appropriate mode. */
3217 if (!arm_legitimate_index_p (mode, offset, SET, 0))
3219 gcc_assert (!no_new_pseudos);
3220 offset = force_reg (Pmode, offset);
3223 if (GET_CODE (offset) == CONST_INT)
3224 return plus_constant (base, INTVAL (offset));
3227 if (GET_MODE_SIZE (mode) > 4
3228 && (GET_MODE_CLASS (mode) == MODE_INT
3229 || TARGET_SOFT_FLOAT))
3231 emit_insn (gen_addsi3 (reg, base, offset));
3232 return reg;
3235 return gen_rtx_PLUS (Pmode, base, offset);
3238 return orig;
3242 /* Find a spare low register to use during the prolog of a function. */
3244 static int
3245 thumb_find_work_register (unsigned long pushed_regs_mask)
3247 int reg;
3249 /* Check the argument registers first as these are call-used. The
3250 register allocation order means that sometimes r3 might be used
3251 but earlier argument registers might not, so check them all. */
3252 for (reg = LAST_ARG_REGNUM; reg >= 0; reg --)
3253 if (!regs_ever_live[reg])
3254 return reg;
3256 /* Before going on to check the call-saved registers we can try a couple
3257 more ways of deducing that r3 is available. The first is when we are
3258 pushing anonymous arguments onto the stack and we have less than 4
3259 registers worth of fixed arguments(*). In this case r3 will be part of
3260 the variable argument list and so we can be sure that it will be
3261 pushed right at the start of the function. Hence it will be available
3262 for the rest of the prologue.
3263 (*): ie current_function_pretend_args_size is greater than 0. */
3264 if (cfun->machine->uses_anonymous_args
3265 && current_function_pretend_args_size > 0)
3266 return LAST_ARG_REGNUM;
3268 /* The other case is when we have fixed arguments but less than 4 registers
3269 worth. In this case r3 might be used in the body of the function, but
3270 it is not being used to convey an argument into the function. In theory
3271 we could just check current_function_args_size to see how many bytes are
3272 being passed in argument registers, but it seems that it is unreliable.
3273 Sometimes it will have the value 0 when in fact arguments are being
3274 passed. (See testcase execute/20021111-1.c for an example). So we also
3275 check the args_info.nregs field as well. The problem with this field is
3276 that it makes no allowances for arguments that are passed to the
3277 function but which are not used. Hence we could miss an opportunity
3278 when a function has an unused argument in r3. But it is better to be
3279 safe than to be sorry. */
3280 if (! cfun->machine->uses_anonymous_args
3281 && current_function_args_size >= 0
3282 && current_function_args_size <= (LAST_ARG_REGNUM * UNITS_PER_WORD)
3283 && cfun->args_info.nregs < 4)
3284 return LAST_ARG_REGNUM;
3286 /* Otherwise look for a call-saved register that is going to be pushed. */
3287 for (reg = LAST_LO_REGNUM; reg > LAST_ARG_REGNUM; reg --)
3288 if (pushed_regs_mask & (1 << reg))
3289 return reg;
3291 /* Something went wrong - thumb_compute_save_reg_mask()
3292 should have arranged for a suitable register to be pushed. */
3293 gcc_unreachable ();
3297 /* Generate code to load the PIC register. In thumb mode SCRATCH is a
3298 low register. */
3300 void
3301 arm_load_pic_register (unsigned int scratch)
3303 #ifndef AOF_ASSEMBLER
3304 rtx l1, pic_tmp, pic_tmp2, pic_rtx;
3305 rtx global_offset_table;
3307 if (current_function_uses_pic_offset_table == 0 || TARGET_SINGLE_PIC_BASE)
3308 return;
3310 gcc_assert (flag_pic);
3312 l1 = gen_label_rtx ();
3314 global_offset_table = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
3315 /* On the ARM the PC register contains 'dot + 8' at the time of the
3316 addition, on the Thumb it is 'dot + 4'. */
3317 pic_tmp = plus_constant (gen_rtx_LABEL_REF (Pmode, l1), TARGET_ARM ? 8 : 4);
3318 if (GOT_PCREL)
3319 pic_tmp2 = gen_rtx_CONST (VOIDmode,
3320 gen_rtx_PLUS (Pmode, global_offset_table, pc_rtx));
3321 else
3322 pic_tmp2 = gen_rtx_CONST (VOIDmode, global_offset_table);
3324 pic_rtx = gen_rtx_CONST (Pmode, gen_rtx_MINUS (Pmode, pic_tmp2, pic_tmp));
3326 if (TARGET_ARM)
3328 emit_insn (gen_pic_load_addr_arm (pic_offset_table_rtx, pic_rtx));
3329 emit_insn (gen_pic_add_dot_plus_eight (pic_offset_table_rtx, l1));
3331 else
3333 if (REGNO (pic_offset_table_rtx) > LAST_LO_REGNUM)
3335 /* We will have pushed the pic register, so should always be
3336 able to find a work register. */
3337 pic_tmp = gen_rtx_REG (SImode, scratch);
3338 emit_insn (gen_pic_load_addr_thumb (pic_tmp, pic_rtx));
3339 emit_insn (gen_movsi (pic_offset_table_rtx, pic_tmp));
3341 else
3342 emit_insn (gen_pic_load_addr_thumb (pic_offset_table_rtx, pic_rtx));
3343 emit_insn (gen_pic_add_dot_plus_four (pic_offset_table_rtx, l1));
3346 /* Need to emit this whether or not we obey regdecls,
3347 since setjmp/longjmp can cause life info to screw up. */
3348 emit_insn (gen_rtx_USE (VOIDmode, pic_offset_table_rtx));
3349 #endif /* AOF_ASSEMBLER */
3353 /* Return nonzero if X is valid as an ARM state addressing register. */
3354 static int
3355 arm_address_register_rtx_p (rtx x, int strict_p)
3357 int regno;
3359 if (GET_CODE (x) != REG)
3360 return 0;
3362 regno = REGNO (x);
3364 if (strict_p)
3365 return ARM_REGNO_OK_FOR_BASE_P (regno);
3367 return (regno <= LAST_ARM_REGNUM
3368 || regno >= FIRST_PSEUDO_REGISTER
3369 || regno == FRAME_POINTER_REGNUM
3370 || regno == ARG_POINTER_REGNUM);
3373 /* Return nonzero if X is a valid ARM state address operand. */
3375 arm_legitimate_address_p (enum machine_mode mode, rtx x, RTX_CODE outer,
3376 int strict_p)
3378 bool use_ldrd;
3379 enum rtx_code code = GET_CODE (x);
3381 if (arm_address_register_rtx_p (x, strict_p))
3382 return 1;
3384 use_ldrd = (TARGET_LDRD
3385 && (mode == DImode
3386 || (mode == DFmode && (TARGET_SOFT_FLOAT || TARGET_VFP))));
3388 if (code == POST_INC || code == PRE_DEC
3389 || ((code == PRE_INC || code == POST_DEC)
3390 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
3391 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
3393 else if ((code == POST_MODIFY || code == PRE_MODIFY)
3394 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
3395 && GET_CODE (XEXP (x, 1)) == PLUS
3396 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
3398 rtx addend = XEXP (XEXP (x, 1), 1);
3400 /* Don't allow ldrd post increment by register because it's hard
3401 to fixup invalid register choices. */
3402 if (use_ldrd
3403 && GET_CODE (x) == POST_MODIFY
3404 && GET_CODE (addend) == REG)
3405 return 0;
3407 return ((use_ldrd || GET_MODE_SIZE (mode) <= 4)
3408 && arm_legitimate_index_p (mode, addend, outer, strict_p));
3411 /* After reload constants split into minipools will have addresses
3412 from a LABEL_REF. */
3413 else if (reload_completed
3414 && (code == LABEL_REF
3415 || (code == CONST
3416 && GET_CODE (XEXP (x, 0)) == PLUS
3417 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
3418 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)))
3419 return 1;
3421 else if (mode == TImode)
3422 return 0;
3424 else if (code == PLUS)
3426 rtx xop0 = XEXP (x, 0);
3427 rtx xop1 = XEXP (x, 1);
3429 return ((arm_address_register_rtx_p (xop0, strict_p)
3430 && arm_legitimate_index_p (mode, xop1, outer, strict_p))
3431 || (arm_address_register_rtx_p (xop1, strict_p)
3432 && arm_legitimate_index_p (mode, xop0, outer, strict_p)));
3435 #if 0
3436 /* Reload currently can't handle MINUS, so disable this for now */
3437 else if (GET_CODE (x) == MINUS)
3439 rtx xop0 = XEXP (x, 0);
3440 rtx xop1 = XEXP (x, 1);
3442 return (arm_address_register_rtx_p (xop0, strict_p)
3443 && arm_legitimate_index_p (mode, xop1, outer, strict_p));
3445 #endif
3447 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
3448 && code == SYMBOL_REF
3449 && CONSTANT_POOL_ADDRESS_P (x)
3450 && ! (flag_pic
3451 && symbol_mentioned_p (get_pool_constant (x))))
3452 return 1;
3454 return 0;
3457 /* Return nonzero if INDEX is valid for an address index operand in
3458 ARM state. */
3459 static int
3460 arm_legitimate_index_p (enum machine_mode mode, rtx index, RTX_CODE outer,
3461 int strict_p)
3463 HOST_WIDE_INT range;
3464 enum rtx_code code = GET_CODE (index);
3466 /* Standard coprocessor addressing modes. */
3467 if (TARGET_HARD_FLOAT
3468 && (TARGET_FPA || TARGET_MAVERICK)
3469 && (GET_MODE_CLASS (mode) == MODE_FLOAT
3470 || (TARGET_MAVERICK && mode == DImode)))
3471 return (code == CONST_INT && INTVAL (index) < 1024
3472 && INTVAL (index) > -1024
3473 && (INTVAL (index) & 3) == 0);
3475 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
3476 return (code == CONST_INT
3477 && INTVAL (index) < 1024
3478 && INTVAL (index) > -1024
3479 && (INTVAL (index) & 3) == 0);
3481 if (arm_address_register_rtx_p (index, strict_p)
3482 && (GET_MODE_SIZE (mode) <= 4))
3483 return 1;
3485 if (mode == DImode || mode == DFmode)
3487 if (code == CONST_INT)
3489 HOST_WIDE_INT val = INTVAL (index);
3491 if (TARGET_LDRD)
3492 return val > -256 && val < 256;
3493 else
3494 return val > -4096 && val < 4092;
3497 return TARGET_LDRD && arm_address_register_rtx_p (index, strict_p);
3500 if (GET_MODE_SIZE (mode) <= 4
3501 && ! (arm_arch4
3502 && (mode == HImode
3503 || (mode == QImode && outer == SIGN_EXTEND))))
3505 if (code == MULT)
3507 rtx xiop0 = XEXP (index, 0);
3508 rtx xiop1 = XEXP (index, 1);
3510 return ((arm_address_register_rtx_p (xiop0, strict_p)
3511 && power_of_two_operand (xiop1, SImode))
3512 || (arm_address_register_rtx_p (xiop1, strict_p)
3513 && power_of_two_operand (xiop0, SImode)));
3515 else if (code == LSHIFTRT || code == ASHIFTRT
3516 || code == ASHIFT || code == ROTATERT)
3518 rtx op = XEXP (index, 1);
3520 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
3521 && GET_CODE (op) == CONST_INT
3522 && INTVAL (op) > 0
3523 && INTVAL (op) <= 31);
3527 /* For ARM v4 we may be doing a sign-extend operation during the
3528 load. */
3529 if (arm_arch4)
3531 if (mode == HImode || (outer == SIGN_EXTEND && mode == QImode))
3532 range = 256;
3533 else
3534 range = 4096;
3536 else
3537 range = (mode == HImode) ? 4095 : 4096;
3539 return (code == CONST_INT
3540 && INTVAL (index) < range
3541 && INTVAL (index) > -range);
3544 /* Return nonzero if X is valid as a Thumb state base register. */
3545 static int
3546 thumb_base_register_rtx_p (rtx x, enum machine_mode mode, int strict_p)
3548 int regno;
3550 if (GET_CODE (x) != REG)
3551 return 0;
3553 regno = REGNO (x);
3555 if (strict_p)
3556 return THUMB_REGNO_MODE_OK_FOR_BASE_P (regno, mode);
3558 return (regno <= LAST_LO_REGNUM
3559 || regno > LAST_VIRTUAL_REGISTER
3560 || regno == FRAME_POINTER_REGNUM
3561 || (GET_MODE_SIZE (mode) >= 4
3562 && (regno == STACK_POINTER_REGNUM
3563 || regno >= FIRST_PSEUDO_REGISTER
3564 || x == hard_frame_pointer_rtx
3565 || x == arg_pointer_rtx)));
3568 /* Return nonzero if x is a legitimate index register. This is the case
3569 for any base register that can access a QImode object. */
3570 inline static int
3571 thumb_index_register_rtx_p (rtx x, int strict_p)
3573 return thumb_base_register_rtx_p (x, QImode, strict_p);
3576 /* Return nonzero if x is a legitimate Thumb-state address.
3578 The AP may be eliminated to either the SP or the FP, so we use the
3579 least common denominator, e.g. SImode, and offsets from 0 to 64.
3581 ??? Verify whether the above is the right approach.
3583 ??? Also, the FP may be eliminated to the SP, so perhaps that
3584 needs special handling also.
3586 ??? Look at how the mips16 port solves this problem. It probably uses
3587 better ways to solve some of these problems.
3589 Although it is not incorrect, we don't accept QImode and HImode
3590 addresses based on the frame pointer or arg pointer until the
3591 reload pass starts. This is so that eliminating such addresses
3592 into stack based ones won't produce impossible code. */
3594 thumb_legitimate_address_p (enum machine_mode mode, rtx x, int strict_p)
3596 /* ??? Not clear if this is right. Experiment. */
3597 if (GET_MODE_SIZE (mode) < 4
3598 && !(reload_in_progress || reload_completed)
3599 && (reg_mentioned_p (frame_pointer_rtx, x)
3600 || reg_mentioned_p (arg_pointer_rtx, x)
3601 || reg_mentioned_p (virtual_incoming_args_rtx, x)
3602 || reg_mentioned_p (virtual_outgoing_args_rtx, x)
3603 || reg_mentioned_p (virtual_stack_dynamic_rtx, x)
3604 || reg_mentioned_p (virtual_stack_vars_rtx, x)))
3605 return 0;
3607 /* Accept any base register. SP only in SImode or larger. */
3608 else if (thumb_base_register_rtx_p (x, mode, strict_p))
3609 return 1;
3611 /* This is PC relative data before arm_reorg runs. */
3612 else if (GET_MODE_SIZE (mode) >= 4 && CONSTANT_P (x)
3613 && GET_CODE (x) == SYMBOL_REF
3614 && CONSTANT_POOL_ADDRESS_P (x) && ! flag_pic)
3615 return 1;
3617 /* This is PC relative data after arm_reorg runs. */
3618 else if (GET_MODE_SIZE (mode) >= 4 && reload_completed
3619 && (GET_CODE (x) == LABEL_REF
3620 || (GET_CODE (x) == CONST
3621 && GET_CODE (XEXP (x, 0)) == PLUS
3622 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
3623 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)))
3624 return 1;
3626 /* Post-inc indexing only supported for SImode and larger. */
3627 else if (GET_CODE (x) == POST_INC && GET_MODE_SIZE (mode) >= 4
3628 && thumb_index_register_rtx_p (XEXP (x, 0), strict_p))
3629 return 1;
3631 else if (GET_CODE (x) == PLUS)
3633 /* REG+REG address can be any two index registers. */
3634 /* We disallow FRAME+REG addressing since we know that FRAME
3635 will be replaced with STACK, and SP relative addressing only
3636 permits SP+OFFSET. */
3637 if (GET_MODE_SIZE (mode) <= 4
3638 && XEXP (x, 0) != frame_pointer_rtx
3639 && XEXP (x, 1) != frame_pointer_rtx
3640 && thumb_index_register_rtx_p (XEXP (x, 0), strict_p)
3641 && thumb_index_register_rtx_p (XEXP (x, 1), strict_p))
3642 return 1;
3644 /* REG+const has 5-7 bit offset for non-SP registers. */
3645 else if ((thumb_index_register_rtx_p (XEXP (x, 0), strict_p)
3646 || XEXP (x, 0) == arg_pointer_rtx)
3647 && GET_CODE (XEXP (x, 1)) == CONST_INT
3648 && thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
3649 return 1;
3651 /* REG+const has 10 bit offset for SP, but only SImode and
3652 larger is supported. */
3653 /* ??? Should probably check for DI/DFmode overflow here
3654 just like GO_IF_LEGITIMATE_OFFSET does. */
3655 else if (GET_CODE (XEXP (x, 0)) == REG
3656 && REGNO (XEXP (x, 0)) == STACK_POINTER_REGNUM
3657 && GET_MODE_SIZE (mode) >= 4
3658 && GET_CODE (XEXP (x, 1)) == CONST_INT
3659 && INTVAL (XEXP (x, 1)) >= 0
3660 && INTVAL (XEXP (x, 1)) + GET_MODE_SIZE (mode) <= 1024
3661 && (INTVAL (XEXP (x, 1)) & 3) == 0)
3662 return 1;
3664 else if (GET_CODE (XEXP (x, 0)) == REG
3665 && REGNO (XEXP (x, 0)) == FRAME_POINTER_REGNUM
3666 && GET_MODE_SIZE (mode) >= 4
3667 && GET_CODE (XEXP (x, 1)) == CONST_INT
3668 && (INTVAL (XEXP (x, 1)) & 3) == 0)
3669 return 1;
3672 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
3673 && GET_MODE_SIZE (mode) == 4
3674 && GET_CODE (x) == SYMBOL_REF
3675 && CONSTANT_POOL_ADDRESS_P (x)
3676 && !(flag_pic
3677 && symbol_mentioned_p (get_pool_constant (x))))
3678 return 1;
3680 return 0;
3683 /* Return nonzero if VAL can be used as an offset in a Thumb-state address
3684 instruction of mode MODE. */
3686 thumb_legitimate_offset_p (enum machine_mode mode, HOST_WIDE_INT val)
3688 switch (GET_MODE_SIZE (mode))
3690 case 1:
3691 return val >= 0 && val < 32;
3693 case 2:
3694 return val >= 0 && val < 64 && (val & 1) == 0;
3696 default:
3697 return (val >= 0
3698 && (val + GET_MODE_SIZE (mode)) <= 128
3699 && (val & 3) == 0);
3703 /* Try machine-dependent ways of modifying an illegitimate address
3704 to be legitimate. If we find one, return the new, valid address. */
3706 arm_legitimize_address (rtx x, rtx orig_x, enum machine_mode mode)
3708 if (GET_CODE (x) == PLUS)
3710 rtx xop0 = XEXP (x, 0);
3711 rtx xop1 = XEXP (x, 1);
3713 if (CONSTANT_P (xop0) && !symbol_mentioned_p (xop0))
3714 xop0 = force_reg (SImode, xop0);
3716 if (CONSTANT_P (xop1) && !symbol_mentioned_p (xop1))
3717 xop1 = force_reg (SImode, xop1);
3719 if (ARM_BASE_REGISTER_RTX_P (xop0)
3720 && GET_CODE (xop1) == CONST_INT)
3722 HOST_WIDE_INT n, low_n;
3723 rtx base_reg, val;
3724 n = INTVAL (xop1);
3726 /* VFP addressing modes actually allow greater offsets, but for
3727 now we just stick with the lowest common denominator. */
3728 if (mode == DImode
3729 || ((TARGET_SOFT_FLOAT || TARGET_VFP) && mode == DFmode))
3731 low_n = n & 0x0f;
3732 n &= ~0x0f;
3733 if (low_n > 4)
3735 n += 16;
3736 low_n -= 16;
3739 else
3741 low_n = ((mode) == TImode ? 0
3742 : n >= 0 ? (n & 0xfff) : -((-n) & 0xfff));
3743 n -= low_n;
3746 base_reg = gen_reg_rtx (SImode);
3747 val = force_operand (gen_rtx_PLUS (SImode, xop0,
3748 GEN_INT (n)), NULL_RTX);
3749 emit_move_insn (base_reg, val);
3750 x = (low_n == 0 ? base_reg
3751 : gen_rtx_PLUS (SImode, base_reg, GEN_INT (low_n)));
3753 else if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
3754 x = gen_rtx_PLUS (SImode, xop0, xop1);
3757 /* XXX We don't allow MINUS any more -- see comment in
3758 arm_legitimate_address_p (). */
3759 else if (GET_CODE (x) == MINUS)
3761 rtx xop0 = XEXP (x, 0);
3762 rtx xop1 = XEXP (x, 1);
3764 if (CONSTANT_P (xop0))
3765 xop0 = force_reg (SImode, xop0);
3767 if (CONSTANT_P (xop1) && ! symbol_mentioned_p (xop1))
3768 xop1 = force_reg (SImode, xop1);
3770 if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
3771 x = gen_rtx_MINUS (SImode, xop0, xop1);
3774 if (flag_pic)
3776 /* We need to find and carefully transform any SYMBOL and LABEL
3777 references; so go back to the original address expression. */
3778 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
3780 if (new_x != orig_x)
3781 x = new_x;
3784 return x;
3788 /* Try machine-dependent ways of modifying an illegitimate Thumb address
3789 to be legitimate. If we find one, return the new, valid address. */
3791 thumb_legitimize_address (rtx x, rtx orig_x, enum machine_mode mode)
3793 if (GET_CODE (x) == PLUS
3794 && GET_CODE (XEXP (x, 1)) == CONST_INT
3795 && (INTVAL (XEXP (x, 1)) >= 32 * GET_MODE_SIZE (mode)
3796 || INTVAL (XEXP (x, 1)) < 0))
3798 rtx xop0 = XEXP (x, 0);
3799 rtx xop1 = XEXP (x, 1);
3800 HOST_WIDE_INT offset = INTVAL (xop1);
3802 /* Try and fold the offset into a biasing of the base register and
3803 then offsetting that. Don't do this when optimizing for space
3804 since it can cause too many CSEs. */
3805 if (optimize_size && offset >= 0
3806 && offset < 256 + 31 * GET_MODE_SIZE (mode))
3808 HOST_WIDE_INT delta;
3810 if (offset >= 256)
3811 delta = offset - (256 - GET_MODE_SIZE (mode));
3812 else if (offset < 32 * GET_MODE_SIZE (mode) + 8)
3813 delta = 31 * GET_MODE_SIZE (mode);
3814 else
3815 delta = offset & (~31 * GET_MODE_SIZE (mode));
3817 xop0 = force_operand (plus_constant (xop0, offset - delta),
3818 NULL_RTX);
3819 x = plus_constant (xop0, delta);
3821 else if (offset < 0 && offset > -256)
3822 /* Small negative offsets are best done with a subtract before the
3823 dereference, forcing these into a register normally takes two
3824 instructions. */
3825 x = force_operand (x, NULL_RTX);
3826 else
3828 /* For the remaining cases, force the constant into a register. */
3829 xop1 = force_reg (SImode, xop1);
3830 x = gen_rtx_PLUS (SImode, xop0, xop1);
3833 else if (GET_CODE (x) == PLUS
3834 && s_register_operand (XEXP (x, 1), SImode)
3835 && !s_register_operand (XEXP (x, 0), SImode))
3837 rtx xop0 = force_operand (XEXP (x, 0), NULL_RTX);
3839 x = gen_rtx_PLUS (SImode, xop0, XEXP (x, 1));
3842 if (flag_pic)
3844 /* We need to find and carefully transform any SYMBOL and LABEL
3845 references; so go back to the original address expression. */
3846 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
3848 if (new_x != orig_x)
3849 x = new_x;
3852 return x;
3857 #define REG_OR_SUBREG_REG(X) \
3858 (GET_CODE (X) == REG \
3859 || (GET_CODE (X) == SUBREG && GET_CODE (SUBREG_REG (X)) == REG))
3861 #define REG_OR_SUBREG_RTX(X) \
3862 (GET_CODE (X) == REG ? (X) : SUBREG_REG (X))
3864 #ifndef COSTS_N_INSNS
3865 #define COSTS_N_INSNS(N) ((N) * 4 - 2)
3866 #endif
3867 static inline int
3868 thumb_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
3870 enum machine_mode mode = GET_MODE (x);
3872 switch (code)
3874 case ASHIFT:
3875 case ASHIFTRT:
3876 case LSHIFTRT:
3877 case ROTATERT:
3878 case PLUS:
3879 case MINUS:
3880 case COMPARE:
3881 case NEG:
3882 case NOT:
3883 return COSTS_N_INSNS (1);
3885 case MULT:
3886 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
3888 int cycles = 0;
3889 unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
3891 while (i)
3893 i >>= 2;
3894 cycles++;
3896 return COSTS_N_INSNS (2) + cycles;
3898 return COSTS_N_INSNS (1) + 16;
3900 case SET:
3901 return (COSTS_N_INSNS (1)
3902 + 4 * ((GET_CODE (SET_SRC (x)) == MEM)
3903 + GET_CODE (SET_DEST (x)) == MEM));
3905 case CONST_INT:
3906 if (outer == SET)
3908 if ((unsigned HOST_WIDE_INT) INTVAL (x) < 256)
3909 return 0;
3910 if (thumb_shiftable_const (INTVAL (x)))
3911 return COSTS_N_INSNS (2);
3912 return COSTS_N_INSNS (3);
3914 else if ((outer == PLUS || outer == COMPARE)
3915 && INTVAL (x) < 256 && INTVAL (x) > -256)
3916 return 0;
3917 else if (outer == AND
3918 && INTVAL (x) < 256 && INTVAL (x) >= -256)
3919 return COSTS_N_INSNS (1);
3920 else if (outer == ASHIFT || outer == ASHIFTRT
3921 || outer == LSHIFTRT)
3922 return 0;
3923 return COSTS_N_INSNS (2);
3925 case CONST:
3926 case CONST_DOUBLE:
3927 case LABEL_REF:
3928 case SYMBOL_REF:
3929 return COSTS_N_INSNS (3);
3931 case UDIV:
3932 case UMOD:
3933 case DIV:
3934 case MOD:
3935 return 100;
3937 case TRUNCATE:
3938 return 99;
3940 case AND:
3941 case XOR:
3942 case IOR:
3943 /* XXX guess. */
3944 return 8;
3946 case MEM:
3947 /* XXX another guess. */
3948 /* Memory costs quite a lot for the first word, but subsequent words
3949 load at the equivalent of a single insn each. */
3950 return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
3951 + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
3952 ? 4 : 0));
3954 case IF_THEN_ELSE:
3955 /* XXX a guess. */
3956 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
3957 return 14;
3958 return 2;
3960 case ZERO_EXTEND:
3961 /* XXX still guessing. */
3962 switch (GET_MODE (XEXP (x, 0)))
3964 case QImode:
3965 return (1 + (mode == DImode ? 4 : 0)
3966 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
3968 case HImode:
3969 return (4 + (mode == DImode ? 4 : 0)
3970 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
3972 case SImode:
3973 return (1 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
3975 default:
3976 return 99;
3979 default:
3980 return 99;
3985 /* Worker routine for arm_rtx_costs. */
3986 static inline int
3987 arm_rtx_costs_1 (rtx x, enum rtx_code code, enum rtx_code outer)
3989 enum machine_mode mode = GET_MODE (x);
3990 enum rtx_code subcode;
3991 int extra_cost;
3993 switch (code)
3995 case MEM:
3996 /* Memory costs quite a lot for the first word, but subsequent words
3997 load at the equivalent of a single insn each. */
3998 return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
3999 + (GET_CODE (x) == SYMBOL_REF
4000 && CONSTANT_POOL_ADDRESS_P (x) ? 4 : 0));
4002 case DIV:
4003 case MOD:
4004 case UDIV:
4005 case UMOD:
4006 return optimize_size ? COSTS_N_INSNS (2) : 100;
4008 case ROTATE:
4009 if (mode == SImode && GET_CODE (XEXP (x, 1)) == REG)
4010 return 4;
4011 /* Fall through */
4012 case ROTATERT:
4013 if (mode != SImode)
4014 return 8;
4015 /* Fall through */
4016 case ASHIFT: case LSHIFTRT: case ASHIFTRT:
4017 if (mode == DImode)
4018 return (8 + (GET_CODE (XEXP (x, 1)) == CONST_INT ? 0 : 8)
4019 + ((GET_CODE (XEXP (x, 0)) == REG
4020 || (GET_CODE (XEXP (x, 0)) == SUBREG
4021 && GET_CODE (SUBREG_REG (XEXP (x, 0))) == REG))
4022 ? 0 : 8));
4023 return (1 + ((GET_CODE (XEXP (x, 0)) == REG
4024 || (GET_CODE (XEXP (x, 0)) == SUBREG
4025 && GET_CODE (SUBREG_REG (XEXP (x, 0))) == REG))
4026 ? 0 : 4)
4027 + ((GET_CODE (XEXP (x, 1)) == REG
4028 || (GET_CODE (XEXP (x, 1)) == SUBREG
4029 && GET_CODE (SUBREG_REG (XEXP (x, 1))) == REG)
4030 || (GET_CODE (XEXP (x, 1)) == CONST_INT))
4031 ? 0 : 4));
4033 case MINUS:
4034 if (mode == DImode)
4035 return (4 + (REG_OR_SUBREG_REG (XEXP (x, 1)) ? 0 : 8)
4036 + ((REG_OR_SUBREG_REG (XEXP (x, 0))
4037 || (GET_CODE (XEXP (x, 0)) == CONST_INT
4038 && const_ok_for_arm (INTVAL (XEXP (x, 0)))))
4039 ? 0 : 8));
4041 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
4042 return (2 + ((REG_OR_SUBREG_REG (XEXP (x, 1))
4043 || (GET_CODE (XEXP (x, 1)) == CONST_DOUBLE
4044 && arm_const_double_rtx (XEXP (x, 1))))
4045 ? 0 : 8)
4046 + ((REG_OR_SUBREG_REG (XEXP (x, 0))
4047 || (GET_CODE (XEXP (x, 0)) == CONST_DOUBLE
4048 && arm_const_double_rtx (XEXP (x, 0))))
4049 ? 0 : 8));
4051 if (((GET_CODE (XEXP (x, 0)) == CONST_INT
4052 && const_ok_for_arm (INTVAL (XEXP (x, 0)))
4053 && REG_OR_SUBREG_REG (XEXP (x, 1))))
4054 || (((subcode = GET_CODE (XEXP (x, 1))) == ASHIFT
4055 || subcode == ASHIFTRT || subcode == LSHIFTRT
4056 || subcode == ROTATE || subcode == ROTATERT
4057 || (subcode == MULT
4058 && GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT
4059 && ((INTVAL (XEXP (XEXP (x, 1), 1)) &
4060 (INTVAL (XEXP (XEXP (x, 1), 1)) - 1)) == 0)))
4061 && REG_OR_SUBREG_REG (XEXP (XEXP (x, 1), 0))
4062 && (REG_OR_SUBREG_REG (XEXP (XEXP (x, 1), 1))
4063 || GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT)
4064 && REG_OR_SUBREG_REG (XEXP (x, 0))))
4065 return 1;
4066 /* Fall through */
4068 case PLUS:
4069 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
4070 return (2 + (REG_OR_SUBREG_REG (XEXP (x, 0)) ? 0 : 8)
4071 + ((REG_OR_SUBREG_REG (XEXP (x, 1))
4072 || (GET_CODE (XEXP (x, 1)) == CONST_DOUBLE
4073 && arm_const_double_rtx (XEXP (x, 1))))
4074 ? 0 : 8));
4076 /* Fall through */
4077 case AND: case XOR: case IOR:
4078 extra_cost = 0;
4080 /* Normally the frame registers will be spilt into reg+const during
4081 reload, so it is a bad idea to combine them with other instructions,
4082 since then they might not be moved outside of loops. As a compromise
4083 we allow integration with ops that have a constant as their second
4084 operand. */
4085 if ((REG_OR_SUBREG_REG (XEXP (x, 0))
4086 && ARM_FRAME_RTX (REG_OR_SUBREG_RTX (XEXP (x, 0)))
4087 && GET_CODE (XEXP (x, 1)) != CONST_INT)
4088 || (REG_OR_SUBREG_REG (XEXP (x, 0))
4089 && ARM_FRAME_RTX (REG_OR_SUBREG_RTX (XEXP (x, 0)))))
4090 extra_cost = 4;
4092 if (mode == DImode)
4093 return (4 + extra_cost + (REG_OR_SUBREG_REG (XEXP (x, 0)) ? 0 : 8)
4094 + ((REG_OR_SUBREG_REG (XEXP (x, 1))
4095 || (GET_CODE (XEXP (x, 1)) == CONST_INT
4096 && const_ok_for_op (INTVAL (XEXP (x, 1)), code)))
4097 ? 0 : 8));
4099 if (REG_OR_SUBREG_REG (XEXP (x, 0)))
4100 return (1 + (GET_CODE (XEXP (x, 1)) == CONST_INT ? 0 : extra_cost)
4101 + ((REG_OR_SUBREG_REG (XEXP (x, 1))
4102 || (GET_CODE (XEXP (x, 1)) == CONST_INT
4103 && const_ok_for_op (INTVAL (XEXP (x, 1)), code)))
4104 ? 0 : 4));
4106 else if (REG_OR_SUBREG_REG (XEXP (x, 1)))
4107 return (1 + extra_cost
4108 + ((((subcode = GET_CODE (XEXP (x, 0))) == ASHIFT
4109 || subcode == LSHIFTRT || subcode == ASHIFTRT
4110 || subcode == ROTATE || subcode == ROTATERT
4111 || (subcode == MULT
4112 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
4113 && ((INTVAL (XEXP (XEXP (x, 0), 1)) &
4114 (INTVAL (XEXP (XEXP (x, 0), 1)) - 1)) == 0)))
4115 && (REG_OR_SUBREG_REG (XEXP (XEXP (x, 0), 0)))
4116 && ((REG_OR_SUBREG_REG (XEXP (XEXP (x, 0), 1)))
4117 || GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT))
4118 ? 0 : 4));
4120 return 8;
4122 case MULT:
4123 /* This should have been handled by the CPU specific routines. */
4124 gcc_unreachable ();
4126 case TRUNCATE:
4127 if (arm_arch3m && mode == SImode
4128 && GET_CODE (XEXP (x, 0)) == LSHIFTRT
4129 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
4130 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0))
4131 == GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)))
4132 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
4133 || GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND))
4134 return 8;
4135 return 99;
4137 case NEG:
4138 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
4139 return 4 + (REG_OR_SUBREG_REG (XEXP (x, 0)) ? 0 : 6);
4140 /* Fall through */
4141 case NOT:
4142 if (mode == DImode)
4143 return 4 + (REG_OR_SUBREG_REG (XEXP (x, 0)) ? 0 : 4);
4145 return 1 + (REG_OR_SUBREG_REG (XEXP (x, 0)) ? 0 : 4);
4147 case IF_THEN_ELSE:
4148 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
4149 return 14;
4150 return 2;
4152 case COMPARE:
4153 return 1;
4155 case ABS:
4156 return 4 + (mode == DImode ? 4 : 0);
4158 case SIGN_EXTEND:
4159 if (GET_MODE (XEXP (x, 0)) == QImode)
4160 return (4 + (mode == DImode ? 4 : 0)
4161 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
4162 /* Fall through */
4163 case ZERO_EXTEND:
4164 switch (GET_MODE (XEXP (x, 0)))
4166 case QImode:
4167 return (1 + (mode == DImode ? 4 : 0)
4168 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
4170 case HImode:
4171 return (4 + (mode == DImode ? 4 : 0)
4172 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
4174 case SImode:
4175 return (1 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
4177 case V8QImode:
4178 case V4HImode:
4179 case V2SImode:
4180 case V4QImode:
4181 case V2HImode:
4182 return 1;
4184 default:
4185 gcc_unreachable ();
4187 gcc_unreachable ();
4189 case CONST_INT:
4190 if (const_ok_for_arm (INTVAL (x)))
4191 return outer == SET ? 2 : -1;
4192 else if (outer == AND
4193 && const_ok_for_arm (~INTVAL (x)))
4194 return -1;
4195 else if ((outer == COMPARE
4196 || outer == PLUS || outer == MINUS)
4197 && const_ok_for_arm (-INTVAL (x)))
4198 return -1;
4199 else
4200 return 5;
4202 case CONST:
4203 case LABEL_REF:
4204 case SYMBOL_REF:
4205 return 6;
4207 case CONST_DOUBLE:
4208 if (arm_const_double_rtx (x))
4209 return outer == SET ? 2 : -1;
4210 else if ((outer == COMPARE || outer == PLUS)
4211 && neg_const_double_rtx_ok_for_fpa (x))
4212 return -1;
4213 return 7;
4215 default:
4216 return 99;
4220 /* RTX costs when optimizing for size. */
4221 static bool
4222 arm_size_rtx_costs (rtx x, int code, int outer_code, int *total)
4224 enum machine_mode mode = GET_MODE (x);
4226 if (TARGET_THUMB)
4228 /* XXX TBD. For now, use the standard costs. */
4229 *total = thumb_rtx_costs (x, code, outer_code);
4230 return true;
4233 switch (code)
4235 case MEM:
4236 /* A memory access costs 1 insn if the mode is small, or the address is
4237 a single register, otherwise it costs one insn per word. */
4238 if (REG_P (XEXP (x, 0)))
4239 *total = COSTS_N_INSNS (1);
4240 else
4241 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
4242 return true;
4244 case DIV:
4245 case MOD:
4246 case UDIV:
4247 case UMOD:
4248 /* Needs a libcall, so it costs about this. */
4249 *total = COSTS_N_INSNS (2);
4250 return false;
4252 case ROTATE:
4253 if (mode == SImode && GET_CODE (XEXP (x, 1)) == REG)
4255 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code);
4256 return true;
4258 /* Fall through */
4259 case ROTATERT:
4260 case ASHIFT:
4261 case LSHIFTRT:
4262 case ASHIFTRT:
4263 if (mode == DImode && GET_CODE (XEXP (x, 1)) == CONST_INT)
4265 *total = COSTS_N_INSNS (3) + rtx_cost (XEXP (x, 0), code);
4266 return true;
4268 else if (mode == SImode)
4270 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code);
4271 /* Slightly disparage register shifts, but not by much. */
4272 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
4273 *total += 1 + rtx_cost (XEXP (x, 1), code);
4274 return true;
4277 /* Needs a libcall. */
4278 *total = COSTS_N_INSNS (2);
4279 return false;
4281 case MINUS:
4282 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT)
4284 *total = COSTS_N_INSNS (1);
4285 return false;
4288 if (mode == SImode)
4290 enum rtx_code subcode0 = GET_CODE (XEXP (x, 0));
4291 enum rtx_code subcode1 = GET_CODE (XEXP (x, 1));
4293 if (subcode0 == ROTATE || subcode0 == ROTATERT || subcode0 == ASHIFT
4294 || subcode0 == LSHIFTRT || subcode0 == ASHIFTRT
4295 || subcode1 == ROTATE || subcode1 == ROTATERT
4296 || subcode1 == ASHIFT || subcode1 == LSHIFTRT
4297 || subcode1 == ASHIFTRT)
4299 /* It's just the cost of the two operands. */
4300 *total = 0;
4301 return false;
4304 *total = COSTS_N_INSNS (1);
4305 return false;
4308 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
4309 return false;
4311 case PLUS:
4312 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT)
4314 *total = COSTS_N_INSNS (1);
4315 return false;
4318 /* Fall through */
4319 case AND: case XOR: case IOR:
4320 if (mode == SImode)
4322 enum rtx_code subcode = GET_CODE (XEXP (x, 0));
4324 if (subcode == ROTATE || subcode == ROTATERT || subcode == ASHIFT
4325 || subcode == LSHIFTRT || subcode == ASHIFTRT
4326 || (code == AND && subcode == NOT))
4328 /* It's just the cost of the two operands. */
4329 *total = 0;
4330 return false;
4334 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
4335 return false;
4337 case MULT:
4338 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
4339 return false;
4341 case NEG:
4342 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT)
4343 *total = COSTS_N_INSNS (1);
4344 /* Fall through */
4345 case NOT:
4346 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
4348 return false;
4350 case IF_THEN_ELSE:
4351 *total = 0;
4352 return false;
4354 case COMPARE:
4355 if (cc_register (XEXP (x, 0), VOIDmode))
4356 * total = 0;
4357 else
4358 *total = COSTS_N_INSNS (1);
4359 return false;
4361 case ABS:
4362 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT)
4363 *total = COSTS_N_INSNS (1);
4364 else
4365 *total = COSTS_N_INSNS (1 + ARM_NUM_REGS (mode));
4366 return false;
4368 case SIGN_EXTEND:
4369 *total = 0;
4370 if (GET_MODE_SIZE (GET_MODE (XEXP (x, 0))) < 4)
4372 if (!(arm_arch4 && MEM_P (XEXP (x, 0))))
4373 *total += COSTS_N_INSNS (arm_arch6 ? 1 : 2);
4375 if (mode == DImode)
4376 *total += COSTS_N_INSNS (1);
4377 return false;
4379 case ZERO_EXTEND:
4380 *total = 0;
4381 if (!(arm_arch4 && MEM_P (XEXP (x, 0))))
4383 switch (GET_MODE (XEXP (x, 0)))
4385 case QImode:
4386 *total += COSTS_N_INSNS (1);
4387 break;
4389 case HImode:
4390 *total += COSTS_N_INSNS (arm_arch6 ? 1 : 2);
4392 case SImode:
4393 break;
4395 default:
4396 *total += COSTS_N_INSNS (2);
4400 if (mode == DImode)
4401 *total += COSTS_N_INSNS (1);
4403 return false;
4405 case CONST_INT:
4406 if (const_ok_for_arm (INTVAL (x)))
4407 *total = COSTS_N_INSNS (outer_code == SET ? 1 : 0);
4408 else if (const_ok_for_arm (~INTVAL (x)))
4409 *total = COSTS_N_INSNS (outer_code == AND ? 0 : 1);
4410 else if (const_ok_for_arm (-INTVAL (x)))
4412 if (outer_code == COMPARE || outer_code == PLUS
4413 || outer_code == MINUS)
4414 *total = 0;
4415 else
4416 *total = COSTS_N_INSNS (1);
4418 else
4419 *total = COSTS_N_INSNS (2);
4420 return true;
4422 case CONST:
4423 case LABEL_REF:
4424 case SYMBOL_REF:
4425 *total = COSTS_N_INSNS (2);
4426 return true;
4428 case CONST_DOUBLE:
4429 *total = COSTS_N_INSNS (4);
4430 return true;
4432 default:
4433 if (mode != VOIDmode)
4434 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
4435 else
4436 *total = COSTS_N_INSNS (4); /* How knows? */
4437 return false;
4441 /* RTX costs for cores with a slow MUL implementation. */
4443 static bool
4444 arm_slowmul_rtx_costs (rtx x, int code, int outer_code, int *total)
4446 enum machine_mode mode = GET_MODE (x);
4448 if (TARGET_THUMB)
4450 *total = thumb_rtx_costs (x, code, outer_code);
4451 return true;
4454 switch (code)
4456 case MULT:
4457 if (GET_MODE_CLASS (mode) == MODE_FLOAT
4458 || mode == DImode)
4460 *total = 30;
4461 return true;
4464 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
4466 unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
4467 & (unsigned HOST_WIDE_INT) 0xffffffff);
4468 int cost, const_ok = const_ok_for_arm (i);
4469 int j, booth_unit_size;
4471 /* Tune as appropriate. */
4472 cost = const_ok ? 4 : 8;
4473 booth_unit_size = 2;
4474 for (j = 0; i && j < 32; j += booth_unit_size)
4476 i >>= booth_unit_size;
4477 cost += 2;
4480 *total = cost;
4481 return true;
4484 *total = 30 + (REG_OR_SUBREG_REG (XEXP (x, 0)) ? 0 : 4)
4485 + (REG_OR_SUBREG_REG (XEXP (x, 1)) ? 0 : 4);
4486 return true;
4488 default:
4489 *total = arm_rtx_costs_1 (x, code, outer_code);
4490 return true;
4495 /* RTX cost for cores with a fast multiply unit (M variants). */
4497 static bool
4498 arm_fastmul_rtx_costs (rtx x, int code, int outer_code, int *total)
4500 enum machine_mode mode = GET_MODE (x);
4502 if (TARGET_THUMB)
4504 *total = thumb_rtx_costs (x, code, outer_code);
4505 return true;
4508 switch (code)
4510 case MULT:
4511 /* There is no point basing this on the tuning, since it is always the
4512 fast variant if it exists at all. */
4513 if (mode == DImode
4514 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
4515 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
4516 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
4518 *total = 8;
4519 return true;
4523 if (GET_MODE_CLASS (mode) == MODE_FLOAT
4524 || mode == DImode)
4526 *total = 30;
4527 return true;
4530 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
4532 unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
4533 & (unsigned HOST_WIDE_INT) 0xffffffff);
4534 int cost, const_ok = const_ok_for_arm (i);
4535 int j, booth_unit_size;
4537 /* Tune as appropriate. */
4538 cost = const_ok ? 4 : 8;
4539 booth_unit_size = 8;
4540 for (j = 0; i && j < 32; j += booth_unit_size)
4542 i >>= booth_unit_size;
4543 cost += 2;
4546 *total = cost;
4547 return true;
4550 *total = 8 + (REG_OR_SUBREG_REG (XEXP (x, 0)) ? 0 : 4)
4551 + (REG_OR_SUBREG_REG (XEXP (x, 1)) ? 0 : 4);
4552 return true;
4554 default:
4555 *total = arm_rtx_costs_1 (x, code, outer_code);
4556 return true;
4561 /* RTX cost for XScale CPUs. */
4563 static bool
4564 arm_xscale_rtx_costs (rtx x, int code, int outer_code, int *total)
4566 enum machine_mode mode = GET_MODE (x);
4568 if (TARGET_THUMB)
4570 *total = thumb_rtx_costs (x, code, outer_code);
4571 return true;
4574 switch (code)
4576 case MULT:
4577 /* There is no point basing this on the tuning, since it is always the
4578 fast variant if it exists at all. */
4579 if (mode == DImode
4580 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
4581 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
4582 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
4584 *total = 8;
4585 return true;
4589 if (GET_MODE_CLASS (mode) == MODE_FLOAT
4590 || mode == DImode)
4592 *total = 30;
4593 return true;
4596 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
4598 unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
4599 & (unsigned HOST_WIDE_INT) 0xffffffff);
4600 int cost, const_ok = const_ok_for_arm (i);
4601 unsigned HOST_WIDE_INT masked_const;
4603 /* The cost will be related to two insns.
4604 First a load of the constant (MOV or LDR), then a multiply. */
4605 cost = 2;
4606 if (! const_ok)
4607 cost += 1; /* LDR is probably more expensive because
4608 of longer result latency. */
4609 masked_const = i & 0xffff8000;
4610 if (masked_const != 0 && masked_const != 0xffff8000)
4612 masked_const = i & 0xf8000000;
4613 if (masked_const == 0 || masked_const == 0xf8000000)
4614 cost += 1;
4615 else
4616 cost += 2;
4618 *total = cost;
4619 return true;
4622 *total = 8 + (REG_OR_SUBREG_REG (XEXP (x, 0)) ? 0 : 4)
4623 + (REG_OR_SUBREG_REG (XEXP (x, 1)) ? 0 : 4);
4624 return true;
4626 case COMPARE:
4627 /* A COMPARE of a MULT is slow on XScale; the muls instruction
4628 will stall until the multiplication is complete. */
4629 if (GET_CODE (XEXP (x, 0)) == MULT)
4630 *total = 4 + rtx_cost (XEXP (x, 0), code);
4631 else
4632 *total = arm_rtx_costs_1 (x, code, outer_code);
4633 return true;
4635 default:
4636 *total = arm_rtx_costs_1 (x, code, outer_code);
4637 return true;
4642 /* RTX costs for 9e (and later) cores. */
4644 static bool
4645 arm_9e_rtx_costs (rtx x, int code, int outer_code, int *total)
4647 enum machine_mode mode = GET_MODE (x);
4648 int nonreg_cost;
4649 int cost;
4651 if (TARGET_THUMB)
4653 switch (code)
4655 case MULT:
4656 *total = COSTS_N_INSNS (3);
4657 return true;
4659 default:
4660 *total = thumb_rtx_costs (x, code, outer_code);
4661 return true;
4665 switch (code)
4667 case MULT:
4668 /* There is no point basing this on the tuning, since it is always the
4669 fast variant if it exists at all. */
4670 if (mode == DImode
4671 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
4672 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
4673 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
4675 *total = 3;
4676 return true;
4680 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
4682 *total = 30;
4683 return true;
4685 if (mode == DImode)
4687 cost = 7;
4688 nonreg_cost = 8;
4690 else
4692 cost = 2;
4693 nonreg_cost = 4;
4697 *total = cost + (REG_OR_SUBREG_REG (XEXP (x, 0)) ? 0 : nonreg_cost)
4698 + (REG_OR_SUBREG_REG (XEXP (x, 1)) ? 0 : nonreg_cost);
4699 return true;
4701 default:
4702 *total = arm_rtx_costs_1 (x, code, outer_code);
4703 return true;
4706 /* All address computations that can be done are free, but rtx cost returns
4707 the same for practically all of them. So we weight the different types
4708 of address here in the order (most pref first):
4709 PRE/POST_INC/DEC, SHIFT or NON-INT sum, INT sum, REG, MEM or LABEL. */
4710 static inline int
4711 arm_arm_address_cost (rtx x)
4713 enum rtx_code c = GET_CODE (x);
4715 if (c == PRE_INC || c == PRE_DEC || c == POST_INC || c == POST_DEC)
4716 return 0;
4717 if (c == MEM || c == LABEL_REF || c == SYMBOL_REF)
4718 return 10;
4720 if (c == PLUS || c == MINUS)
4722 if (GET_CODE (XEXP (x, 0)) == CONST_INT)
4723 return 2;
4725 if (ARITHMETIC_P (XEXP (x, 0)) || ARITHMETIC_P (XEXP (x, 1)))
4726 return 3;
4728 return 4;
4731 return 6;
4734 static inline int
4735 arm_thumb_address_cost (rtx x)
4737 enum rtx_code c = GET_CODE (x);
4739 if (c == REG)
4740 return 1;
4741 if (c == PLUS
4742 && GET_CODE (XEXP (x, 0)) == REG
4743 && GET_CODE (XEXP (x, 1)) == CONST_INT)
4744 return 1;
4746 return 2;
4749 static int
4750 arm_address_cost (rtx x)
4752 return TARGET_ARM ? arm_arm_address_cost (x) : arm_thumb_address_cost (x);
4755 static int
4756 arm_adjust_cost (rtx insn, rtx link, rtx dep, int cost)
4758 rtx i_pat, d_pat;
4760 /* Some true dependencies can have a higher cost depending
4761 on precisely how certain input operands are used. */
4762 if (arm_tune_xscale
4763 && REG_NOTE_KIND (link) == 0
4764 && recog_memoized (insn) >= 0
4765 && recog_memoized (dep) >= 0)
4767 int shift_opnum = get_attr_shift (insn);
4768 enum attr_type attr_type = get_attr_type (dep);
4770 /* If nonzero, SHIFT_OPNUM contains the operand number of a shifted
4771 operand for INSN. If we have a shifted input operand and the
4772 instruction we depend on is another ALU instruction, then we may
4773 have to account for an additional stall. */
4774 if (shift_opnum != 0
4775 && (attr_type == TYPE_ALU_SHIFT || attr_type == TYPE_ALU_SHIFT_REG))
4777 rtx shifted_operand;
4778 int opno;
4780 /* Get the shifted operand. */
4781 extract_insn (insn);
4782 shifted_operand = recog_data.operand[shift_opnum];
4784 /* Iterate over all the operands in DEP. If we write an operand
4785 that overlaps with SHIFTED_OPERAND, then we have increase the
4786 cost of this dependency. */
4787 extract_insn (dep);
4788 preprocess_constraints ();
4789 for (opno = 0; opno < recog_data.n_operands; opno++)
4791 /* We can ignore strict inputs. */
4792 if (recog_data.operand_type[opno] == OP_IN)
4793 continue;
4795 if (reg_overlap_mentioned_p (recog_data.operand[opno],
4796 shifted_operand))
4797 return 2;
4802 /* XXX This is not strictly true for the FPA. */
4803 if (REG_NOTE_KIND (link) == REG_DEP_ANTI
4804 || REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
4805 return 0;
4807 /* Call insns don't incur a stall, even if they follow a load. */
4808 if (REG_NOTE_KIND (link) == 0
4809 && GET_CODE (insn) == CALL_INSN)
4810 return 1;
4812 if ((i_pat = single_set (insn)) != NULL
4813 && GET_CODE (SET_SRC (i_pat)) == MEM
4814 && (d_pat = single_set (dep)) != NULL
4815 && GET_CODE (SET_DEST (d_pat)) == MEM)
4817 rtx src_mem = XEXP (SET_SRC (i_pat), 0);
4818 /* This is a load after a store, there is no conflict if the load reads
4819 from a cached area. Assume that loads from the stack, and from the
4820 constant pool are cached, and that others will miss. This is a
4821 hack. */
4823 if ((GET_CODE (src_mem) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (src_mem))
4824 || reg_mentioned_p (stack_pointer_rtx, src_mem)
4825 || reg_mentioned_p (frame_pointer_rtx, src_mem)
4826 || reg_mentioned_p (hard_frame_pointer_rtx, src_mem))
4827 return 1;
4830 return cost;
4833 static int fp_consts_inited = 0;
4835 /* Only zero is valid for VFP. Other values are also valid for FPA. */
4836 static const char * const strings_fp[8] =
4838 "0", "1", "2", "3",
4839 "4", "5", "0.5", "10"
4842 static REAL_VALUE_TYPE values_fp[8];
4844 static void
4845 init_fp_table (void)
4847 int i;
4848 REAL_VALUE_TYPE r;
4850 if (TARGET_VFP)
4851 fp_consts_inited = 1;
4852 else
4853 fp_consts_inited = 8;
4855 for (i = 0; i < fp_consts_inited; i++)
4857 r = REAL_VALUE_ATOF (strings_fp[i], DFmode);
4858 values_fp[i] = r;
4862 /* Return TRUE if rtx X is a valid immediate FP constant. */
4864 arm_const_double_rtx (rtx x)
4866 REAL_VALUE_TYPE r;
4867 int i;
4869 if (!fp_consts_inited)
4870 init_fp_table ();
4872 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
4873 if (REAL_VALUE_MINUS_ZERO (r))
4874 return 0;
4876 for (i = 0; i < fp_consts_inited; i++)
4877 if (REAL_VALUES_EQUAL (r, values_fp[i]))
4878 return 1;
4880 return 0;
4883 /* Return TRUE if rtx X is a valid immediate FPA constant. */
4885 neg_const_double_rtx_ok_for_fpa (rtx x)
4887 REAL_VALUE_TYPE r;
4888 int i;
4890 if (!fp_consts_inited)
4891 init_fp_table ();
4893 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
4894 r = REAL_VALUE_NEGATE (r);
4895 if (REAL_VALUE_MINUS_ZERO (r))
4896 return 0;
4898 for (i = 0; i < 8; i++)
4899 if (REAL_VALUES_EQUAL (r, values_fp[i]))
4900 return 1;
4902 return 0;
4905 /* Predicates for `match_operand' and `match_operator'. */
4907 /* Return nonzero if OP is a valid Cirrus memory address pattern. */
4909 cirrus_memory_offset (rtx op)
4911 /* Reject eliminable registers. */
4912 if (! (reload_in_progress || reload_completed)
4913 && ( reg_mentioned_p (frame_pointer_rtx, op)
4914 || reg_mentioned_p (arg_pointer_rtx, op)
4915 || reg_mentioned_p (virtual_incoming_args_rtx, op)
4916 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
4917 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
4918 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
4919 return 0;
4921 if (GET_CODE (op) == MEM)
4923 rtx ind;
4925 ind = XEXP (op, 0);
4927 /* Match: (mem (reg)). */
4928 if (GET_CODE (ind) == REG)
4929 return 1;
4931 /* Match:
4932 (mem (plus (reg)
4933 (const))). */
4934 if (GET_CODE (ind) == PLUS
4935 && GET_CODE (XEXP (ind, 0)) == REG
4936 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
4937 && GET_CODE (XEXP (ind, 1)) == CONST_INT)
4938 return 1;
4941 return 0;
4944 /* Return TRUE if OP is a valid VFP memory address pattern.
4945 WB if true if writeback address modes are allowed. */
4948 arm_coproc_mem_operand (rtx op, bool wb)
4950 rtx ind;
4952 /* Reject eliminable registers. */
4953 if (! (reload_in_progress || reload_completed)
4954 && ( reg_mentioned_p (frame_pointer_rtx, op)
4955 || reg_mentioned_p (arg_pointer_rtx, op)
4956 || reg_mentioned_p (virtual_incoming_args_rtx, op)
4957 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
4958 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
4959 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
4960 return FALSE;
4962 /* Constants are converted into offsets from labels. */
4963 if (GET_CODE (op) != MEM)
4964 return FALSE;
4966 ind = XEXP (op, 0);
4968 if (reload_completed
4969 && (GET_CODE (ind) == LABEL_REF
4970 || (GET_CODE (ind) == CONST
4971 && GET_CODE (XEXP (ind, 0)) == PLUS
4972 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
4973 && GET_CODE (XEXP (XEXP (ind, 0), 1)) == CONST_INT)))
4974 return TRUE;
4976 /* Match: (mem (reg)). */
4977 if (GET_CODE (ind) == REG)
4978 return arm_address_register_rtx_p (ind, 0);
4980 /* Autoincremment addressing modes. */
4981 if (wb
4982 && (GET_CODE (ind) == PRE_INC
4983 || GET_CODE (ind) == POST_INC
4984 || GET_CODE (ind) == PRE_DEC
4985 || GET_CODE (ind) == POST_DEC))
4986 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
4988 if (wb
4989 && (GET_CODE (ind) == POST_MODIFY || GET_CODE (ind) == PRE_MODIFY)
4990 && arm_address_register_rtx_p (XEXP (ind, 0), 0)
4991 && GET_CODE (XEXP (ind, 1)) == PLUS
4992 && rtx_equal_p (XEXP (XEXP (ind, 1), 0), XEXP (ind, 0)))
4993 ind = XEXP (ind, 1);
4995 /* Match:
4996 (plus (reg)
4997 (const)). */
4998 if (GET_CODE (ind) == PLUS
4999 && GET_CODE (XEXP (ind, 0)) == REG
5000 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
5001 && GET_CODE (XEXP (ind, 1)) == CONST_INT
5002 && INTVAL (XEXP (ind, 1)) > -1024
5003 && INTVAL (XEXP (ind, 1)) < 1024
5004 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
5005 return TRUE;
5007 return FALSE;
5010 /* Return true if X is a register that will be eliminated later on. */
5012 arm_eliminable_register (rtx x)
5014 return REG_P (x) && (REGNO (x) == FRAME_POINTER_REGNUM
5015 || REGNO (x) == ARG_POINTER_REGNUM
5016 || (REGNO (x) >= FIRST_VIRTUAL_REGISTER
5017 && REGNO (x) <= LAST_VIRTUAL_REGISTER));
5020 /* Return GENERAL_REGS if a scratch register required to reload x to/from
5021 VFP registers. Otherwise return NO_REGS. */
5023 enum reg_class
5024 vfp_secondary_reload_class (enum machine_mode mode, rtx x)
5026 if (arm_coproc_mem_operand (x, FALSE) || s_register_operand (x, mode))
5027 return NO_REGS;
5029 return GENERAL_REGS;
5032 /* Values which must be returned in the most-significant end of the return
5033 register. */
5035 static bool
5036 arm_return_in_msb (tree valtype)
5038 return (TARGET_AAPCS_BASED
5039 && BYTES_BIG_ENDIAN
5040 && (AGGREGATE_TYPE_P (valtype)
5041 || TREE_CODE (valtype) == COMPLEX_TYPE));
5044 /* Returns TRUE if INSN is an "LDR REG, ADDR" instruction.
5045 Use by the Cirrus Maverick code which has to workaround
5046 a hardware bug triggered by such instructions. */
5047 static bool
5048 arm_memory_load_p (rtx insn)
5050 rtx body, lhs, rhs;;
5052 if (insn == NULL_RTX || GET_CODE (insn) != INSN)
5053 return false;
5055 body = PATTERN (insn);
5057 if (GET_CODE (body) != SET)
5058 return false;
5060 lhs = XEXP (body, 0);
5061 rhs = XEXP (body, 1);
5063 lhs = REG_OR_SUBREG_RTX (lhs);
5065 /* If the destination is not a general purpose
5066 register we do not have to worry. */
5067 if (GET_CODE (lhs) != REG
5068 || REGNO_REG_CLASS (REGNO (lhs)) != GENERAL_REGS)
5069 return false;
5071 /* As well as loads from memory we also have to react
5072 to loads of invalid constants which will be turned
5073 into loads from the minipool. */
5074 return (GET_CODE (rhs) == MEM
5075 || GET_CODE (rhs) == SYMBOL_REF
5076 || note_invalid_constants (insn, -1, false));
5079 /* Return TRUE if INSN is a Cirrus instruction. */
5080 static bool
5081 arm_cirrus_insn_p (rtx insn)
5083 enum attr_cirrus attr;
5085 /* get_attr cannot accept USE or CLOBBER. */
5086 if (!insn
5087 || GET_CODE (insn) != INSN
5088 || GET_CODE (PATTERN (insn)) == USE
5089 || GET_CODE (PATTERN (insn)) == CLOBBER)
5090 return 0;
5092 attr = get_attr_cirrus (insn);
5094 return attr != CIRRUS_NOT;
5097 /* Cirrus reorg for invalid instruction combinations. */
5098 static void
5099 cirrus_reorg (rtx first)
5101 enum attr_cirrus attr;
5102 rtx body = PATTERN (first);
5103 rtx t;
5104 int nops;
5106 /* Any branch must be followed by 2 non Cirrus instructions. */
5107 if (GET_CODE (first) == JUMP_INSN && GET_CODE (body) != RETURN)
5109 nops = 0;
5110 t = next_nonnote_insn (first);
5112 if (arm_cirrus_insn_p (t))
5113 ++ nops;
5115 if (arm_cirrus_insn_p (next_nonnote_insn (t)))
5116 ++ nops;
5118 while (nops --)
5119 emit_insn_after (gen_nop (), first);
5121 return;
5124 /* (float (blah)) is in parallel with a clobber. */
5125 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
5126 body = XVECEXP (body, 0, 0);
5128 if (GET_CODE (body) == SET)
5130 rtx lhs = XEXP (body, 0), rhs = XEXP (body, 1);
5132 /* cfldrd, cfldr64, cfstrd, cfstr64 must
5133 be followed by a non Cirrus insn. */
5134 if (get_attr_cirrus (first) == CIRRUS_DOUBLE)
5136 if (arm_cirrus_insn_p (next_nonnote_insn (first)))
5137 emit_insn_after (gen_nop (), first);
5139 return;
5141 else if (arm_memory_load_p (first))
5143 unsigned int arm_regno;
5145 /* Any ldr/cfmvdlr, ldr/cfmvdhr, ldr/cfmvsr, ldr/cfmv64lr,
5146 ldr/cfmv64hr combination where the Rd field is the same
5147 in both instructions must be split with a non Cirrus
5148 insn. Example:
5150 ldr r0, blah
5152 cfmvsr mvf0, r0. */
5154 /* Get Arm register number for ldr insn. */
5155 if (GET_CODE (lhs) == REG)
5156 arm_regno = REGNO (lhs);
5157 else
5159 gcc_assert (GET_CODE (rhs) == REG);
5160 arm_regno = REGNO (rhs);
5163 /* Next insn. */
5164 first = next_nonnote_insn (first);
5166 if (! arm_cirrus_insn_p (first))
5167 return;
5169 body = PATTERN (first);
5171 /* (float (blah)) is in parallel with a clobber. */
5172 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0))
5173 body = XVECEXP (body, 0, 0);
5175 if (GET_CODE (body) == FLOAT)
5176 body = XEXP (body, 0);
5178 if (get_attr_cirrus (first) == CIRRUS_MOVE
5179 && GET_CODE (XEXP (body, 1)) == REG
5180 && arm_regno == REGNO (XEXP (body, 1)))
5181 emit_insn_after (gen_nop (), first);
5183 return;
5187 /* get_attr cannot accept USE or CLOBBER. */
5188 if (!first
5189 || GET_CODE (first) != INSN
5190 || GET_CODE (PATTERN (first)) == USE
5191 || GET_CODE (PATTERN (first)) == CLOBBER)
5192 return;
5194 attr = get_attr_cirrus (first);
5196 /* Any coprocessor compare instruction (cfcmps, cfcmpd, ...)
5197 must be followed by a non-coprocessor instruction. */
5198 if (attr == CIRRUS_COMPARE)
5200 nops = 0;
5202 t = next_nonnote_insn (first);
5204 if (arm_cirrus_insn_p (t))
5205 ++ nops;
5207 if (arm_cirrus_insn_p (next_nonnote_insn (t)))
5208 ++ nops;
5210 while (nops --)
5211 emit_insn_after (gen_nop (), first);
5213 return;
5217 /* Return TRUE if X references a SYMBOL_REF. */
5219 symbol_mentioned_p (rtx x)
5221 const char * fmt;
5222 int i;
5224 if (GET_CODE (x) == SYMBOL_REF)
5225 return 1;
5227 fmt = GET_RTX_FORMAT (GET_CODE (x));
5229 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
5231 if (fmt[i] == 'E')
5233 int j;
5235 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
5236 if (symbol_mentioned_p (XVECEXP (x, i, j)))
5237 return 1;
5239 else if (fmt[i] == 'e' && symbol_mentioned_p (XEXP (x, i)))
5240 return 1;
5243 return 0;
5246 /* Return TRUE if X references a LABEL_REF. */
5248 label_mentioned_p (rtx x)
5250 const char * fmt;
5251 int i;
5253 if (GET_CODE (x) == LABEL_REF)
5254 return 1;
5256 fmt = GET_RTX_FORMAT (GET_CODE (x));
5257 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
5259 if (fmt[i] == 'E')
5261 int j;
5263 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
5264 if (label_mentioned_p (XVECEXP (x, i, j)))
5265 return 1;
5267 else if (fmt[i] == 'e' && label_mentioned_p (XEXP (x, i)))
5268 return 1;
5271 return 0;
5274 enum rtx_code
5275 minmax_code (rtx x)
5277 enum rtx_code code = GET_CODE (x);
5279 switch (code)
5281 case SMAX:
5282 return GE;
5283 case SMIN:
5284 return LE;
5285 case UMIN:
5286 return LEU;
5287 case UMAX:
5288 return GEU;
5289 default:
5290 gcc_unreachable ();
5294 /* Return 1 if memory locations are adjacent. */
5296 adjacent_mem_locations (rtx a, rtx b)
5298 /* We don't guarantee to preserve the order of these memory refs. */
5299 if (volatile_refs_p (a) || volatile_refs_p (b))
5300 return 0;
5302 if ((GET_CODE (XEXP (a, 0)) == REG
5303 || (GET_CODE (XEXP (a, 0)) == PLUS
5304 && GET_CODE (XEXP (XEXP (a, 0), 1)) == CONST_INT))
5305 && (GET_CODE (XEXP (b, 0)) == REG
5306 || (GET_CODE (XEXP (b, 0)) == PLUS
5307 && GET_CODE (XEXP (XEXP (b, 0), 1)) == CONST_INT)))
5309 HOST_WIDE_INT val0 = 0, val1 = 0;
5310 rtx reg0, reg1;
5311 int val_diff;
5313 if (GET_CODE (XEXP (a, 0)) == PLUS)
5315 reg0 = XEXP (XEXP (a, 0), 0);
5316 val0 = INTVAL (XEXP (XEXP (a, 0), 1));
5318 else
5319 reg0 = XEXP (a, 0);
5321 if (GET_CODE (XEXP (b, 0)) == PLUS)
5323 reg1 = XEXP (XEXP (b, 0), 0);
5324 val1 = INTVAL (XEXP (XEXP (b, 0), 1));
5326 else
5327 reg1 = XEXP (b, 0);
5329 /* Don't accept any offset that will require multiple
5330 instructions to handle, since this would cause the
5331 arith_adjacentmem pattern to output an overlong sequence. */
5332 if (!const_ok_for_op (PLUS, val0) || !const_ok_for_op (PLUS, val1))
5333 return 0;
5335 /* Don't allow an eliminable register: register elimination can make
5336 the offset too large. */
5337 if (arm_eliminable_register (reg0))
5338 return 0;
5340 val_diff = val1 - val0;
5342 if (arm_ld_sched)
5344 /* If the target has load delay slots, then there's no benefit
5345 to using an ldm instruction unless the offset is zero and
5346 we are optimizing for size. */
5347 return (optimize_size && (REGNO (reg0) == REGNO (reg1))
5348 && (val0 == 0 || val1 == 0 || val0 == 4 || val1 == 4)
5349 && (val_diff == 4 || val_diff == -4));
5352 return ((REGNO (reg0) == REGNO (reg1))
5353 && (val_diff == 4 || val_diff == -4));
5356 return 0;
5360 load_multiple_sequence (rtx *operands, int nops, int *regs, int *base,
5361 HOST_WIDE_INT *load_offset)
5363 int unsorted_regs[4];
5364 HOST_WIDE_INT unsorted_offsets[4];
5365 int order[4];
5366 int base_reg = -1;
5367 int i;
5369 /* Can only handle 2, 3, or 4 insns at present,
5370 though could be easily extended if required. */
5371 gcc_assert (nops >= 2 && nops <= 4);
5373 /* Loop over the operands and check that the memory references are
5374 suitable (i.e. immediate offsets from the same base register). At
5375 the same time, extract the target register, and the memory
5376 offsets. */
5377 for (i = 0; i < nops; i++)
5379 rtx reg;
5380 rtx offset;
5382 /* Convert a subreg of a mem into the mem itself. */
5383 if (GET_CODE (operands[nops + i]) == SUBREG)
5384 operands[nops + i] = alter_subreg (operands + (nops + i));
5386 gcc_assert (GET_CODE (operands[nops + i]) == MEM);
5388 /* Don't reorder volatile memory references; it doesn't seem worth
5389 looking for the case where the order is ok anyway. */
5390 if (MEM_VOLATILE_P (operands[nops + i]))
5391 return 0;
5393 offset = const0_rtx;
5395 if ((GET_CODE (reg = XEXP (operands[nops + i], 0)) == REG
5396 || (GET_CODE (reg) == SUBREG
5397 && GET_CODE (reg = SUBREG_REG (reg)) == REG))
5398 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
5399 && ((GET_CODE (reg = XEXP (XEXP (operands[nops + i], 0), 0))
5400 == REG)
5401 || (GET_CODE (reg) == SUBREG
5402 && GET_CODE (reg = SUBREG_REG (reg)) == REG))
5403 && (GET_CODE (offset = XEXP (XEXP (operands[nops + i], 0), 1))
5404 == CONST_INT)))
5406 if (i == 0)
5408 base_reg = REGNO (reg);
5409 unsorted_regs[0] = (GET_CODE (operands[i]) == REG
5410 ? REGNO (operands[i])
5411 : REGNO (SUBREG_REG (operands[i])));
5412 order[0] = 0;
5414 else
5416 if (base_reg != (int) REGNO (reg))
5417 /* Not addressed from the same base register. */
5418 return 0;
5420 unsorted_regs[i] = (GET_CODE (operands[i]) == REG
5421 ? REGNO (operands[i])
5422 : REGNO (SUBREG_REG (operands[i])));
5423 if (unsorted_regs[i] < unsorted_regs[order[0]])
5424 order[0] = i;
5427 /* If it isn't an integer register, or if it overwrites the
5428 base register but isn't the last insn in the list, then
5429 we can't do this. */
5430 if (unsorted_regs[i] < 0 || unsorted_regs[i] > 14
5431 || (i != nops - 1 && unsorted_regs[i] == base_reg))
5432 return 0;
5434 unsorted_offsets[i] = INTVAL (offset);
5436 else
5437 /* Not a suitable memory address. */
5438 return 0;
5441 /* All the useful information has now been extracted from the
5442 operands into unsorted_regs and unsorted_offsets; additionally,
5443 order[0] has been set to the lowest numbered register in the
5444 list. Sort the registers into order, and check that the memory
5445 offsets are ascending and adjacent. */
5447 for (i = 1; i < nops; i++)
5449 int j;
5451 order[i] = order[i - 1];
5452 for (j = 0; j < nops; j++)
5453 if (unsorted_regs[j] > unsorted_regs[order[i - 1]]
5454 && (order[i] == order[i - 1]
5455 || unsorted_regs[j] < unsorted_regs[order[i]]))
5456 order[i] = j;
5458 /* Have we found a suitable register? if not, one must be used more
5459 than once. */
5460 if (order[i] == order[i - 1])
5461 return 0;
5463 /* Is the memory address adjacent and ascending? */
5464 if (unsorted_offsets[order[i]] != unsorted_offsets[order[i - 1]] + 4)
5465 return 0;
5468 if (base)
5470 *base = base_reg;
5472 for (i = 0; i < nops; i++)
5473 regs[i] = unsorted_regs[order[i]];
5475 *load_offset = unsorted_offsets[order[0]];
5478 if (unsorted_offsets[order[0]] == 0)
5479 return 1; /* ldmia */
5481 if (unsorted_offsets[order[0]] == 4)
5482 return 2; /* ldmib */
5484 if (unsorted_offsets[order[nops - 1]] == 0)
5485 return 3; /* ldmda */
5487 if (unsorted_offsets[order[nops - 1]] == -4)
5488 return 4; /* ldmdb */
5490 /* For ARM8,9 & StrongARM, 2 ldr instructions are faster than an ldm
5491 if the offset isn't small enough. The reason 2 ldrs are faster
5492 is because these ARMs are able to do more than one cache access
5493 in a single cycle. The ARM9 and StrongARM have Harvard caches,
5494 whilst the ARM8 has a double bandwidth cache. This means that
5495 these cores can do both an instruction fetch and a data fetch in
5496 a single cycle, so the trick of calculating the address into a
5497 scratch register (one of the result regs) and then doing a load
5498 multiple actually becomes slower (and no smaller in code size).
5499 That is the transformation
5501 ldr rd1, [rbase + offset]
5502 ldr rd2, [rbase + offset + 4]
5506 add rd1, rbase, offset
5507 ldmia rd1, {rd1, rd2}
5509 produces worse code -- '3 cycles + any stalls on rd2' instead of
5510 '2 cycles + any stalls on rd2'. On ARMs with only one cache
5511 access per cycle, the first sequence could never complete in less
5512 than 6 cycles, whereas the ldm sequence would only take 5 and
5513 would make better use of sequential accesses if not hitting the
5514 cache.
5516 We cheat here and test 'arm_ld_sched' which we currently know to
5517 only be true for the ARM8, ARM9 and StrongARM. If this ever
5518 changes, then the test below needs to be reworked. */
5519 if (nops == 2 && arm_ld_sched)
5520 return 0;
5522 /* Can't do it without setting up the offset, only do this if it takes
5523 no more than one insn. */
5524 return (const_ok_for_arm (unsorted_offsets[order[0]])
5525 || const_ok_for_arm (-unsorted_offsets[order[0]])) ? 5 : 0;
5528 const char *
5529 emit_ldm_seq (rtx *operands, int nops)
5531 int regs[4];
5532 int base_reg;
5533 HOST_WIDE_INT offset;
5534 char buf[100];
5535 int i;
5537 switch (load_multiple_sequence (operands, nops, regs, &base_reg, &offset))
5539 case 1:
5540 strcpy (buf, "ldm%?ia\t");
5541 break;
5543 case 2:
5544 strcpy (buf, "ldm%?ib\t");
5545 break;
5547 case 3:
5548 strcpy (buf, "ldm%?da\t");
5549 break;
5551 case 4:
5552 strcpy (buf, "ldm%?db\t");
5553 break;
5555 case 5:
5556 if (offset >= 0)
5557 sprintf (buf, "add%%?\t%s%s, %s%s, #%ld", REGISTER_PREFIX,
5558 reg_names[regs[0]], REGISTER_PREFIX, reg_names[base_reg],
5559 (long) offset);
5560 else
5561 sprintf (buf, "sub%%?\t%s%s, %s%s, #%ld", REGISTER_PREFIX,
5562 reg_names[regs[0]], REGISTER_PREFIX, reg_names[base_reg],
5563 (long) -offset);
5564 output_asm_insn (buf, operands);
5565 base_reg = regs[0];
5566 strcpy (buf, "ldm%?ia\t");
5567 break;
5569 default:
5570 gcc_unreachable ();
5573 sprintf (buf + strlen (buf), "%s%s, {%s%s", REGISTER_PREFIX,
5574 reg_names[base_reg], REGISTER_PREFIX, reg_names[regs[0]]);
5576 for (i = 1; i < nops; i++)
5577 sprintf (buf + strlen (buf), ", %s%s", REGISTER_PREFIX,
5578 reg_names[regs[i]]);
5580 strcat (buf, "}\t%@ phole ldm");
5582 output_asm_insn (buf, operands);
5583 return "";
5587 store_multiple_sequence (rtx *operands, int nops, int *regs, int *base,
5588 HOST_WIDE_INT * load_offset)
5590 int unsorted_regs[4];
5591 HOST_WIDE_INT unsorted_offsets[4];
5592 int order[4];
5593 int base_reg = -1;
5594 int i;
5596 /* Can only handle 2, 3, or 4 insns at present, though could be easily
5597 extended if required. */
5598 gcc_assert (nops >= 2 && nops <= 4);
5600 /* Loop over the operands and check that the memory references are
5601 suitable (i.e. immediate offsets from the same base register). At
5602 the same time, extract the target register, and the memory
5603 offsets. */
5604 for (i = 0; i < nops; i++)
5606 rtx reg;
5607 rtx offset;
5609 /* Convert a subreg of a mem into the mem itself. */
5610 if (GET_CODE (operands[nops + i]) == SUBREG)
5611 operands[nops + i] = alter_subreg (operands + (nops + i));
5613 gcc_assert (GET_CODE (operands[nops + i]) == MEM);
5615 /* Don't reorder volatile memory references; it doesn't seem worth
5616 looking for the case where the order is ok anyway. */
5617 if (MEM_VOLATILE_P (operands[nops + i]))
5618 return 0;
5620 offset = const0_rtx;
5622 if ((GET_CODE (reg = XEXP (operands[nops + i], 0)) == REG
5623 || (GET_CODE (reg) == SUBREG
5624 && GET_CODE (reg = SUBREG_REG (reg)) == REG))
5625 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
5626 && ((GET_CODE (reg = XEXP (XEXP (operands[nops + i], 0), 0))
5627 == REG)
5628 || (GET_CODE (reg) == SUBREG
5629 && GET_CODE (reg = SUBREG_REG (reg)) == REG))
5630 && (GET_CODE (offset = XEXP (XEXP (operands[nops + i], 0), 1))
5631 == CONST_INT)))
5633 if (i == 0)
5635 base_reg = REGNO (reg);
5636 unsorted_regs[0] = (GET_CODE (operands[i]) == REG
5637 ? REGNO (operands[i])
5638 : REGNO (SUBREG_REG (operands[i])));
5639 order[0] = 0;
5641 else
5643 if (base_reg != (int) REGNO (reg))
5644 /* Not addressed from the same base register. */
5645 return 0;
5647 unsorted_regs[i] = (GET_CODE (operands[i]) == REG
5648 ? REGNO (operands[i])
5649 : REGNO (SUBREG_REG (operands[i])));
5650 if (unsorted_regs[i] < unsorted_regs[order[0]])
5651 order[0] = i;
5654 /* If it isn't an integer register, then we can't do this. */
5655 if (unsorted_regs[i] < 0 || unsorted_regs[i] > 14)
5656 return 0;
5658 unsorted_offsets[i] = INTVAL (offset);
5660 else
5661 /* Not a suitable memory address. */
5662 return 0;
5665 /* All the useful information has now been extracted from the
5666 operands into unsorted_regs and unsorted_offsets; additionally,
5667 order[0] has been set to the lowest numbered register in the
5668 list. Sort the registers into order, and check that the memory
5669 offsets are ascending and adjacent. */
5671 for (i = 1; i < nops; i++)
5673 int j;
5675 order[i] = order[i - 1];
5676 for (j = 0; j < nops; j++)
5677 if (unsorted_regs[j] > unsorted_regs[order[i - 1]]
5678 && (order[i] == order[i - 1]
5679 || unsorted_regs[j] < unsorted_regs[order[i]]))
5680 order[i] = j;
5682 /* Have we found a suitable register? if not, one must be used more
5683 than once. */
5684 if (order[i] == order[i - 1])
5685 return 0;
5687 /* Is the memory address adjacent and ascending? */
5688 if (unsorted_offsets[order[i]] != unsorted_offsets[order[i - 1]] + 4)
5689 return 0;
5692 if (base)
5694 *base = base_reg;
5696 for (i = 0; i < nops; i++)
5697 regs[i] = unsorted_regs[order[i]];
5699 *load_offset = unsorted_offsets[order[0]];
5702 if (unsorted_offsets[order[0]] == 0)
5703 return 1; /* stmia */
5705 if (unsorted_offsets[order[0]] == 4)
5706 return 2; /* stmib */
5708 if (unsorted_offsets[order[nops - 1]] == 0)
5709 return 3; /* stmda */
5711 if (unsorted_offsets[order[nops - 1]] == -4)
5712 return 4; /* stmdb */
5714 return 0;
5717 const char *
5718 emit_stm_seq (rtx *operands, int nops)
5720 int regs[4];
5721 int base_reg;
5722 HOST_WIDE_INT offset;
5723 char buf[100];
5724 int i;
5726 switch (store_multiple_sequence (operands, nops, regs, &base_reg, &offset))
5728 case 1:
5729 strcpy (buf, "stm%?ia\t");
5730 break;
5732 case 2:
5733 strcpy (buf, "stm%?ib\t");
5734 break;
5736 case 3:
5737 strcpy (buf, "stm%?da\t");
5738 break;
5740 case 4:
5741 strcpy (buf, "stm%?db\t");
5742 break;
5744 default:
5745 gcc_unreachable ();
5748 sprintf (buf + strlen (buf), "%s%s, {%s%s", REGISTER_PREFIX,
5749 reg_names[base_reg], REGISTER_PREFIX, reg_names[regs[0]]);
5751 for (i = 1; i < nops; i++)
5752 sprintf (buf + strlen (buf), ", %s%s", REGISTER_PREFIX,
5753 reg_names[regs[i]]);
5755 strcat (buf, "}\t%@ phole stm");
5757 output_asm_insn (buf, operands);
5758 return "";
5762 /* Routines for use in generating RTL. */
5765 arm_gen_load_multiple (int base_regno, int count, rtx from, int up,
5766 int write_back, rtx basemem, HOST_WIDE_INT *offsetp)
5768 HOST_WIDE_INT offset = *offsetp;
5769 int i = 0, j;
5770 rtx result;
5771 int sign = up ? 1 : -1;
5772 rtx mem, addr;
5774 /* XScale has load-store double instructions, but they have stricter
5775 alignment requirements than load-store multiple, so we cannot
5776 use them.
5778 For XScale ldm requires 2 + NREGS cycles to complete and blocks
5779 the pipeline until completion.
5781 NREGS CYCLES
5787 An ldr instruction takes 1-3 cycles, but does not block the
5788 pipeline.
5790 NREGS CYCLES
5791 1 1-3
5792 2 2-6
5793 3 3-9
5794 4 4-12
5796 Best case ldr will always win. However, the more ldr instructions
5797 we issue, the less likely we are to be able to schedule them well.
5798 Using ldr instructions also increases code size.
5800 As a compromise, we use ldr for counts of 1 or 2 regs, and ldm
5801 for counts of 3 or 4 regs. */
5802 if (arm_tune_xscale && count <= 2 && ! optimize_size)
5804 rtx seq;
5806 start_sequence ();
5808 for (i = 0; i < count; i++)
5810 addr = plus_constant (from, i * 4 * sign);
5811 mem = adjust_automodify_address (basemem, SImode, addr, offset);
5812 emit_move_insn (gen_rtx_REG (SImode, base_regno + i), mem);
5813 offset += 4 * sign;
5816 if (write_back)
5818 emit_move_insn (from, plus_constant (from, count * 4 * sign));
5819 *offsetp = offset;
5822 seq = get_insns ();
5823 end_sequence ();
5825 return seq;
5828 result = gen_rtx_PARALLEL (VOIDmode,
5829 rtvec_alloc (count + (write_back ? 1 : 0)));
5830 if (write_back)
5832 XVECEXP (result, 0, 0)
5833 = gen_rtx_SET (GET_MODE (from), from,
5834 plus_constant (from, count * 4 * sign));
5835 i = 1;
5836 count++;
5839 for (j = 0; i < count; i++, j++)
5841 addr = plus_constant (from, j * 4 * sign);
5842 mem = adjust_automodify_address_nv (basemem, SImode, addr, offset);
5843 XVECEXP (result, 0, i)
5844 = gen_rtx_SET (VOIDmode, gen_rtx_REG (SImode, base_regno + j), mem);
5845 offset += 4 * sign;
5848 if (write_back)
5849 *offsetp = offset;
5851 return result;
5855 arm_gen_store_multiple (int base_regno, int count, rtx to, int up,
5856 int write_back, rtx basemem, HOST_WIDE_INT *offsetp)
5858 HOST_WIDE_INT offset = *offsetp;
5859 int i = 0, j;
5860 rtx result;
5861 int sign = up ? 1 : -1;
5862 rtx mem, addr;
5864 /* See arm_gen_load_multiple for discussion of
5865 the pros/cons of ldm/stm usage for XScale. */
5866 if (arm_tune_xscale && count <= 2 && ! optimize_size)
5868 rtx seq;
5870 start_sequence ();
5872 for (i = 0; i < count; i++)
5874 addr = plus_constant (to, i * 4 * sign);
5875 mem = adjust_automodify_address (basemem, SImode, addr, offset);
5876 emit_move_insn (mem, gen_rtx_REG (SImode, base_regno + i));
5877 offset += 4 * sign;
5880 if (write_back)
5882 emit_move_insn (to, plus_constant (to, count * 4 * sign));
5883 *offsetp = offset;
5886 seq = get_insns ();
5887 end_sequence ();
5889 return seq;
5892 result = gen_rtx_PARALLEL (VOIDmode,
5893 rtvec_alloc (count + (write_back ? 1 : 0)));
5894 if (write_back)
5896 XVECEXP (result, 0, 0)
5897 = gen_rtx_SET (GET_MODE (to), to,
5898 plus_constant (to, count * 4 * sign));
5899 i = 1;
5900 count++;
5903 for (j = 0; i < count; i++, j++)
5905 addr = plus_constant (to, j * 4 * sign);
5906 mem = adjust_automodify_address_nv (basemem, SImode, addr, offset);
5907 XVECEXP (result, 0, i)
5908 = gen_rtx_SET (VOIDmode, mem, gen_rtx_REG (SImode, base_regno + j));
5909 offset += 4 * sign;
5912 if (write_back)
5913 *offsetp = offset;
5915 return result;
5919 arm_gen_movmemqi (rtx *operands)
5921 HOST_WIDE_INT in_words_to_go, out_words_to_go, last_bytes;
5922 HOST_WIDE_INT srcoffset, dstoffset;
5923 int i;
5924 rtx src, dst, srcbase, dstbase;
5925 rtx part_bytes_reg = NULL;
5926 rtx mem;
5928 if (GET_CODE (operands[2]) != CONST_INT
5929 || GET_CODE (operands[3]) != CONST_INT
5930 || INTVAL (operands[2]) > 64
5931 || INTVAL (operands[3]) & 3)
5932 return 0;
5934 dstbase = operands[0];
5935 srcbase = operands[1];
5937 dst = copy_to_mode_reg (SImode, XEXP (dstbase, 0));
5938 src = copy_to_mode_reg (SImode, XEXP (srcbase, 0));
5940 in_words_to_go = ARM_NUM_INTS (INTVAL (operands[2]));
5941 out_words_to_go = INTVAL (operands[2]) / 4;
5942 last_bytes = INTVAL (operands[2]) & 3;
5943 dstoffset = srcoffset = 0;
5945 if (out_words_to_go != in_words_to_go && ((in_words_to_go - 1) & 3) != 0)
5946 part_bytes_reg = gen_rtx_REG (SImode, (in_words_to_go - 1) & 3);
5948 for (i = 0; in_words_to_go >= 2; i+=4)
5950 if (in_words_to_go > 4)
5951 emit_insn (arm_gen_load_multiple (0, 4, src, TRUE, TRUE,
5952 srcbase, &srcoffset));
5953 else
5954 emit_insn (arm_gen_load_multiple (0, in_words_to_go, src, TRUE,
5955 FALSE, srcbase, &srcoffset));
5957 if (out_words_to_go)
5959 if (out_words_to_go > 4)
5960 emit_insn (arm_gen_store_multiple (0, 4, dst, TRUE, TRUE,
5961 dstbase, &dstoffset));
5962 else if (out_words_to_go != 1)
5963 emit_insn (arm_gen_store_multiple (0, out_words_to_go,
5964 dst, TRUE,
5965 (last_bytes == 0
5966 ? FALSE : TRUE),
5967 dstbase, &dstoffset));
5968 else
5970 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
5971 emit_move_insn (mem, gen_rtx_REG (SImode, 0));
5972 if (last_bytes != 0)
5974 emit_insn (gen_addsi3 (dst, dst, GEN_INT (4)));
5975 dstoffset += 4;
5980 in_words_to_go -= in_words_to_go < 4 ? in_words_to_go : 4;
5981 out_words_to_go -= out_words_to_go < 4 ? out_words_to_go : 4;
5984 /* OUT_WORDS_TO_GO will be zero here if there are byte stores to do. */
5985 if (out_words_to_go)
5987 rtx sreg;
5989 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
5990 sreg = copy_to_reg (mem);
5992 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
5993 emit_move_insn (mem, sreg);
5994 in_words_to_go--;
5996 gcc_assert (!in_words_to_go); /* Sanity check */
5999 if (in_words_to_go)
6001 gcc_assert (in_words_to_go > 0);
6003 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
6004 part_bytes_reg = copy_to_mode_reg (SImode, mem);
6007 gcc_assert (!last_bytes || part_bytes_reg);
6009 if (BYTES_BIG_ENDIAN && last_bytes)
6011 rtx tmp = gen_reg_rtx (SImode);
6013 /* The bytes we want are in the top end of the word. */
6014 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg,
6015 GEN_INT (8 * (4 - last_bytes))));
6016 part_bytes_reg = tmp;
6018 while (last_bytes)
6020 mem = adjust_automodify_address (dstbase, QImode,
6021 plus_constant (dst, last_bytes - 1),
6022 dstoffset + last_bytes - 1);
6023 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
6025 if (--last_bytes)
6027 tmp = gen_reg_rtx (SImode);
6028 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (8)));
6029 part_bytes_reg = tmp;
6034 else
6036 if (last_bytes > 1)
6038 mem = adjust_automodify_address (dstbase, HImode, dst, dstoffset);
6039 emit_move_insn (mem, gen_lowpart (HImode, part_bytes_reg));
6040 last_bytes -= 2;
6041 if (last_bytes)
6043 rtx tmp = gen_reg_rtx (SImode);
6044 emit_insn (gen_addsi3 (dst, dst, const2_rtx));
6045 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (16)));
6046 part_bytes_reg = tmp;
6047 dstoffset += 2;
6051 if (last_bytes)
6053 mem = adjust_automodify_address (dstbase, QImode, dst, dstoffset);
6054 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
6058 return 1;
6061 /* Generate a memory reference for a half word, such that it will be loaded
6062 into the top 16 bits of the word. We can assume that the address is
6063 known to be alignable and of the form reg, or plus (reg, const). */
6066 arm_gen_rotated_half_load (rtx memref)
6068 HOST_WIDE_INT offset = 0;
6069 rtx base = XEXP (memref, 0);
6071 if (GET_CODE (base) == PLUS)
6073 offset = INTVAL (XEXP (base, 1));
6074 base = XEXP (base, 0);
6077 /* If we aren't allowed to generate unaligned addresses, then fail. */
6078 if ((BYTES_BIG_ENDIAN ? 1 : 0) ^ ((offset & 2) == 0))
6079 return NULL;
6081 base = gen_rtx_MEM (SImode, plus_constant (base, offset & ~2));
6083 if ((BYTES_BIG_ENDIAN ? 1 : 0) ^ ((offset & 2) == 2))
6084 return base;
6086 return gen_rtx_ROTATE (SImode, base, GEN_INT (16));
6089 /* Select a dominance comparison mode if possible for a test of the general
6090 form (OP (COND_OR (X) (Y)) (const_int 0)). We support three forms.
6091 COND_OR == DOM_CC_X_AND_Y => (X && Y)
6092 COND_OR == DOM_CC_NX_OR_Y => ((! X) || Y)
6093 COND_OR == DOM_CC_X_OR_Y => (X || Y)
6094 In all cases OP will be either EQ or NE, but we don't need to know which
6095 here. If we are unable to support a dominance comparison we return
6096 CC mode. This will then fail to match for the RTL expressions that
6097 generate this call. */
6098 enum machine_mode
6099 arm_select_dominance_cc_mode (rtx x, rtx y, HOST_WIDE_INT cond_or)
6101 enum rtx_code cond1, cond2;
6102 int swapped = 0;
6104 /* Currently we will probably get the wrong result if the individual
6105 comparisons are not simple. This also ensures that it is safe to
6106 reverse a comparison if necessary. */
6107 if ((arm_select_cc_mode (cond1 = GET_CODE (x), XEXP (x, 0), XEXP (x, 1))
6108 != CCmode)
6109 || (arm_select_cc_mode (cond2 = GET_CODE (y), XEXP (y, 0), XEXP (y, 1))
6110 != CCmode))
6111 return CCmode;
6113 /* The if_then_else variant of this tests the second condition if the
6114 first passes, but is true if the first fails. Reverse the first
6115 condition to get a true "inclusive-or" expression. */
6116 if (cond_or == DOM_CC_NX_OR_Y)
6117 cond1 = reverse_condition (cond1);
6119 /* If the comparisons are not equal, and one doesn't dominate the other,
6120 then we can't do this. */
6121 if (cond1 != cond2
6122 && !comparison_dominates_p (cond1, cond2)
6123 && (swapped = 1, !comparison_dominates_p (cond2, cond1)))
6124 return CCmode;
6126 if (swapped)
6128 enum rtx_code temp = cond1;
6129 cond1 = cond2;
6130 cond2 = temp;
6133 switch (cond1)
6135 case EQ:
6136 if (cond_or == DOM_CC_X_AND_Y)
6137 return CC_DEQmode;
6139 switch (cond2)
6141 case EQ: return CC_DEQmode;
6142 case LE: return CC_DLEmode;
6143 case LEU: return CC_DLEUmode;
6144 case GE: return CC_DGEmode;
6145 case GEU: return CC_DGEUmode;
6146 default: gcc_unreachable ();
6149 case LT:
6150 if (cond_or == DOM_CC_X_AND_Y)
6151 return CC_DLTmode;
6153 switch (cond2)
6155 case LT:
6156 return CC_DLTmode;
6157 case LE:
6158 return CC_DLEmode;
6159 case NE:
6160 return CC_DNEmode;
6161 default:
6162 gcc_unreachable ();
6165 case GT:
6166 if (cond_or == DOM_CC_X_AND_Y)
6167 return CC_DGTmode;
6169 switch (cond2)
6171 case GT:
6172 return CC_DGTmode;
6173 case GE:
6174 return CC_DGEmode;
6175 case NE:
6176 return CC_DNEmode;
6177 default:
6178 gcc_unreachable ();
6181 case LTU:
6182 if (cond_or == DOM_CC_X_AND_Y)
6183 return CC_DLTUmode;
6185 switch (cond2)
6187 case LTU:
6188 return CC_DLTUmode;
6189 case LEU:
6190 return CC_DLEUmode;
6191 case NE:
6192 return CC_DNEmode;
6193 default:
6194 gcc_unreachable ();
6197 case GTU:
6198 if (cond_or == DOM_CC_X_AND_Y)
6199 return CC_DGTUmode;
6201 switch (cond2)
6203 case GTU:
6204 return CC_DGTUmode;
6205 case GEU:
6206 return CC_DGEUmode;
6207 case NE:
6208 return CC_DNEmode;
6209 default:
6210 gcc_unreachable ();
6213 /* The remaining cases only occur when both comparisons are the
6214 same. */
6215 case NE:
6216 gcc_assert (cond1 == cond2);
6217 return CC_DNEmode;
6219 case LE:
6220 gcc_assert (cond1 == cond2);
6221 return CC_DLEmode;
6223 case GE:
6224 gcc_assert (cond1 == cond2);
6225 return CC_DGEmode;
6227 case LEU:
6228 gcc_assert (cond1 == cond2);
6229 return CC_DLEUmode;
6231 case GEU:
6232 gcc_assert (cond1 == cond2);
6233 return CC_DGEUmode;
6235 default:
6236 gcc_unreachable ();
6240 enum machine_mode
6241 arm_select_cc_mode (enum rtx_code op, rtx x, rtx y)
6243 /* All floating point compares return CCFP if it is an equality
6244 comparison, and CCFPE otherwise. */
6245 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
6247 switch (op)
6249 case EQ:
6250 case NE:
6251 case UNORDERED:
6252 case ORDERED:
6253 case UNLT:
6254 case UNLE:
6255 case UNGT:
6256 case UNGE:
6257 case UNEQ:
6258 case LTGT:
6259 return CCFPmode;
6261 case LT:
6262 case LE:
6263 case GT:
6264 case GE:
6265 if (TARGET_HARD_FLOAT && TARGET_MAVERICK)
6266 return CCFPmode;
6267 return CCFPEmode;
6269 default:
6270 gcc_unreachable ();
6274 /* A compare with a shifted operand. Because of canonicalization, the
6275 comparison will have to be swapped when we emit the assembler. */
6276 if (GET_MODE (y) == SImode && GET_CODE (y) == REG
6277 && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
6278 || GET_CODE (x) == LSHIFTRT || GET_CODE (x) == ROTATE
6279 || GET_CODE (x) == ROTATERT))
6280 return CC_SWPmode;
6282 /* This operation is performed swapped, but since we only rely on the Z
6283 flag we don't need an additional mode. */
6284 if (GET_MODE (y) == SImode && REG_P (y)
6285 && GET_CODE (x) == NEG
6286 && (op == EQ || op == NE))
6287 return CC_Zmode;
6289 /* This is a special case that is used by combine to allow a
6290 comparison of a shifted byte load to be split into a zero-extend
6291 followed by a comparison of the shifted integer (only valid for
6292 equalities and unsigned inequalities). */
6293 if (GET_MODE (x) == SImode
6294 && GET_CODE (x) == ASHIFT
6295 && GET_CODE (XEXP (x, 1)) == CONST_INT && INTVAL (XEXP (x, 1)) == 24
6296 && GET_CODE (XEXP (x, 0)) == SUBREG
6297 && GET_CODE (SUBREG_REG (XEXP (x, 0))) == MEM
6298 && GET_MODE (SUBREG_REG (XEXP (x, 0))) == QImode
6299 && (op == EQ || op == NE
6300 || op == GEU || op == GTU || op == LTU || op == LEU)
6301 && GET_CODE (y) == CONST_INT)
6302 return CC_Zmode;
6304 /* A construct for a conditional compare, if the false arm contains
6305 0, then both conditions must be true, otherwise either condition
6306 must be true. Not all conditions are possible, so CCmode is
6307 returned if it can't be done. */
6308 if (GET_CODE (x) == IF_THEN_ELSE
6309 && (XEXP (x, 2) == const0_rtx
6310 || XEXP (x, 2) == const1_rtx)
6311 && COMPARISON_P (XEXP (x, 0))
6312 && COMPARISON_P (XEXP (x, 1)))
6313 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
6314 INTVAL (XEXP (x, 2)));
6316 /* Alternate canonicalizations of the above. These are somewhat cleaner. */
6317 if (GET_CODE (x) == AND
6318 && COMPARISON_P (XEXP (x, 0))
6319 && COMPARISON_P (XEXP (x, 1)))
6320 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
6321 DOM_CC_X_AND_Y);
6323 if (GET_CODE (x) == IOR
6324 && COMPARISON_P (XEXP (x, 0))
6325 && COMPARISON_P (XEXP (x, 1)))
6326 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
6327 DOM_CC_X_OR_Y);
6329 /* An operation (on Thumb) where we want to test for a single bit.
6330 This is done by shifting that bit up into the top bit of a
6331 scratch register; we can then branch on the sign bit. */
6332 if (TARGET_THUMB
6333 && GET_MODE (x) == SImode
6334 && (op == EQ || op == NE)
6335 && (GET_CODE (x) == ZERO_EXTRACT))
6336 return CC_Nmode;
6338 /* An operation that sets the condition codes as a side-effect, the
6339 V flag is not set correctly, so we can only use comparisons where
6340 this doesn't matter. (For LT and GE we can use "mi" and "pl"
6341 instead.) */
6342 if (GET_MODE (x) == SImode
6343 && y == const0_rtx
6344 && (op == EQ || op == NE || op == LT || op == GE)
6345 && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS
6346 || GET_CODE (x) == AND || GET_CODE (x) == IOR
6347 || GET_CODE (x) == XOR || GET_CODE (x) == MULT
6348 || GET_CODE (x) == NOT || GET_CODE (x) == NEG
6349 || GET_CODE (x) == LSHIFTRT
6350 || GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
6351 || GET_CODE (x) == ROTATERT
6352 || (TARGET_ARM && GET_CODE (x) == ZERO_EXTRACT)))
6353 return CC_NOOVmode;
6355 if (GET_MODE (x) == QImode && (op == EQ || op == NE))
6356 return CC_Zmode;
6358 if (GET_MODE (x) == SImode && (op == LTU || op == GEU)
6359 && GET_CODE (x) == PLUS
6360 && (rtx_equal_p (XEXP (x, 0), y) || rtx_equal_p (XEXP (x, 1), y)))
6361 return CC_Cmode;
6363 return CCmode;
6366 /* X and Y are two things to compare using CODE. Emit the compare insn and
6367 return the rtx for register 0 in the proper mode. FP means this is a
6368 floating point compare: I don't think that it is needed on the arm. */
6370 arm_gen_compare_reg (enum rtx_code code, rtx x, rtx y)
6372 enum machine_mode mode = SELECT_CC_MODE (code, x, y);
6373 rtx cc_reg = gen_rtx_REG (mode, CC_REGNUM);
6375 emit_insn (gen_rtx_SET (VOIDmode, cc_reg,
6376 gen_rtx_COMPARE (mode, x, y)));
6378 return cc_reg;
6381 /* Generate a sequence of insns that will generate the correct return
6382 address mask depending on the physical architecture that the program
6383 is running on. */
6385 arm_gen_return_addr_mask (void)
6387 rtx reg = gen_reg_rtx (Pmode);
6389 emit_insn (gen_return_addr_mask (reg));
6390 return reg;
6393 void
6394 arm_reload_in_hi (rtx *operands)
6396 rtx ref = operands[1];
6397 rtx base, scratch;
6398 HOST_WIDE_INT offset = 0;
6400 if (GET_CODE (ref) == SUBREG)
6402 offset = SUBREG_BYTE (ref);
6403 ref = SUBREG_REG (ref);
6406 if (GET_CODE (ref) == REG)
6408 /* We have a pseudo which has been spilt onto the stack; there
6409 are two cases here: the first where there is a simple
6410 stack-slot replacement and a second where the stack-slot is
6411 out of range, or is used as a subreg. */
6412 if (reg_equiv_mem[REGNO (ref)])
6414 ref = reg_equiv_mem[REGNO (ref)];
6415 base = find_replacement (&XEXP (ref, 0));
6417 else
6418 /* The slot is out of range, or was dressed up in a SUBREG. */
6419 base = reg_equiv_address[REGNO (ref)];
6421 else
6422 base = find_replacement (&XEXP (ref, 0));
6424 /* Handle the case where the address is too complex to be offset by 1. */
6425 if (GET_CODE (base) == MINUS
6426 || (GET_CODE (base) == PLUS && GET_CODE (XEXP (base, 1)) != CONST_INT))
6428 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
6430 emit_insn (gen_rtx_SET (VOIDmode, base_plus, base));
6431 base = base_plus;
6433 else if (GET_CODE (base) == PLUS)
6435 /* The addend must be CONST_INT, or we would have dealt with it above. */
6436 HOST_WIDE_INT hi, lo;
6438 offset += INTVAL (XEXP (base, 1));
6439 base = XEXP (base, 0);
6441 /* Rework the address into a legal sequence of insns. */
6442 /* Valid range for lo is -4095 -> 4095 */
6443 lo = (offset >= 0
6444 ? (offset & 0xfff)
6445 : -((-offset) & 0xfff));
6447 /* Corner case, if lo is the max offset then we would be out of range
6448 once we have added the additional 1 below, so bump the msb into the
6449 pre-loading insn(s). */
6450 if (lo == 4095)
6451 lo &= 0x7ff;
6453 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
6454 ^ (HOST_WIDE_INT) 0x80000000)
6455 - (HOST_WIDE_INT) 0x80000000);
6457 gcc_assert (hi + lo == offset);
6459 if (hi != 0)
6461 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
6463 /* Get the base address; addsi3 knows how to handle constants
6464 that require more than one insn. */
6465 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
6466 base = base_plus;
6467 offset = lo;
6471 /* Operands[2] may overlap operands[0] (though it won't overlap
6472 operands[1]), that's why we asked for a DImode reg -- so we can
6473 use the bit that does not overlap. */
6474 if (REGNO (operands[2]) == REGNO (operands[0]))
6475 scratch = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
6476 else
6477 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
6479 emit_insn (gen_zero_extendqisi2 (scratch,
6480 gen_rtx_MEM (QImode,
6481 plus_constant (base,
6482 offset))));
6483 emit_insn (gen_zero_extendqisi2 (gen_rtx_SUBREG (SImode, operands[0], 0),
6484 gen_rtx_MEM (QImode,
6485 plus_constant (base,
6486 offset + 1))));
6487 if (!BYTES_BIG_ENDIAN)
6488 emit_insn (gen_rtx_SET (VOIDmode, gen_rtx_SUBREG (SImode, operands[0], 0),
6489 gen_rtx_IOR (SImode,
6490 gen_rtx_ASHIFT
6491 (SImode,
6492 gen_rtx_SUBREG (SImode, operands[0], 0),
6493 GEN_INT (8)),
6494 scratch)));
6495 else
6496 emit_insn (gen_rtx_SET (VOIDmode, gen_rtx_SUBREG (SImode, operands[0], 0),
6497 gen_rtx_IOR (SImode,
6498 gen_rtx_ASHIFT (SImode, scratch,
6499 GEN_INT (8)),
6500 gen_rtx_SUBREG (SImode, operands[0],
6501 0))));
6504 /* Handle storing a half-word to memory during reload by synthesizing as two
6505 byte stores. Take care not to clobber the input values until after we
6506 have moved them somewhere safe. This code assumes that if the DImode
6507 scratch in operands[2] overlaps either the input value or output address
6508 in some way, then that value must die in this insn (we absolutely need
6509 two scratch registers for some corner cases). */
6510 void
6511 arm_reload_out_hi (rtx *operands)
6513 rtx ref = operands[0];
6514 rtx outval = operands[1];
6515 rtx base, scratch;
6516 HOST_WIDE_INT offset = 0;
6518 if (GET_CODE (ref) == SUBREG)
6520 offset = SUBREG_BYTE (ref);
6521 ref = SUBREG_REG (ref);
6524 if (GET_CODE (ref) == REG)
6526 /* We have a pseudo which has been spilt onto the stack; there
6527 are two cases here: the first where there is a simple
6528 stack-slot replacement and a second where the stack-slot is
6529 out of range, or is used as a subreg. */
6530 if (reg_equiv_mem[REGNO (ref)])
6532 ref = reg_equiv_mem[REGNO (ref)];
6533 base = find_replacement (&XEXP (ref, 0));
6535 else
6536 /* The slot is out of range, or was dressed up in a SUBREG. */
6537 base = reg_equiv_address[REGNO (ref)];
6539 else
6540 base = find_replacement (&XEXP (ref, 0));
6542 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
6544 /* Handle the case where the address is too complex to be offset by 1. */
6545 if (GET_CODE (base) == MINUS
6546 || (GET_CODE (base) == PLUS && GET_CODE (XEXP (base, 1)) != CONST_INT))
6548 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
6550 /* Be careful not to destroy OUTVAL. */
6551 if (reg_overlap_mentioned_p (base_plus, outval))
6553 /* Updating base_plus might destroy outval, see if we can
6554 swap the scratch and base_plus. */
6555 if (!reg_overlap_mentioned_p (scratch, outval))
6557 rtx tmp = scratch;
6558 scratch = base_plus;
6559 base_plus = tmp;
6561 else
6563 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
6565 /* Be conservative and copy OUTVAL into the scratch now,
6566 this should only be necessary if outval is a subreg
6567 of something larger than a word. */
6568 /* XXX Might this clobber base? I can't see how it can,
6569 since scratch is known to overlap with OUTVAL, and
6570 must be wider than a word. */
6571 emit_insn (gen_movhi (scratch_hi, outval));
6572 outval = scratch_hi;
6576 emit_insn (gen_rtx_SET (VOIDmode, base_plus, base));
6577 base = base_plus;
6579 else if (GET_CODE (base) == PLUS)
6581 /* The addend must be CONST_INT, or we would have dealt with it above. */
6582 HOST_WIDE_INT hi, lo;
6584 offset += INTVAL (XEXP (base, 1));
6585 base = XEXP (base, 0);
6587 /* Rework the address into a legal sequence of insns. */
6588 /* Valid range for lo is -4095 -> 4095 */
6589 lo = (offset >= 0
6590 ? (offset & 0xfff)
6591 : -((-offset) & 0xfff));
6593 /* Corner case, if lo is the max offset then we would be out of range
6594 once we have added the additional 1 below, so bump the msb into the
6595 pre-loading insn(s). */
6596 if (lo == 4095)
6597 lo &= 0x7ff;
6599 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
6600 ^ (HOST_WIDE_INT) 0x80000000)
6601 - (HOST_WIDE_INT) 0x80000000);
6603 gcc_assert (hi + lo == offset);
6605 if (hi != 0)
6607 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
6609 /* Be careful not to destroy OUTVAL. */
6610 if (reg_overlap_mentioned_p (base_plus, outval))
6612 /* Updating base_plus might destroy outval, see if we
6613 can swap the scratch and base_plus. */
6614 if (!reg_overlap_mentioned_p (scratch, outval))
6616 rtx tmp = scratch;
6617 scratch = base_plus;
6618 base_plus = tmp;
6620 else
6622 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
6624 /* Be conservative and copy outval into scratch now,
6625 this should only be necessary if outval is a
6626 subreg of something larger than a word. */
6627 /* XXX Might this clobber base? I can't see how it
6628 can, since scratch is known to overlap with
6629 outval. */
6630 emit_insn (gen_movhi (scratch_hi, outval));
6631 outval = scratch_hi;
6635 /* Get the base address; addsi3 knows how to handle constants
6636 that require more than one insn. */
6637 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
6638 base = base_plus;
6639 offset = lo;
6643 if (BYTES_BIG_ENDIAN)
6645 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
6646 plus_constant (base, offset + 1)),
6647 gen_lowpart (QImode, outval)));
6648 emit_insn (gen_lshrsi3 (scratch,
6649 gen_rtx_SUBREG (SImode, outval, 0),
6650 GEN_INT (8)));
6651 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (base, offset)),
6652 gen_lowpart (QImode, scratch)));
6654 else
6656 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (base, offset)),
6657 gen_lowpart (QImode, outval)));
6658 emit_insn (gen_lshrsi3 (scratch,
6659 gen_rtx_SUBREG (SImode, outval, 0),
6660 GEN_INT (8)));
6661 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
6662 plus_constant (base, offset + 1)),
6663 gen_lowpart (QImode, scratch)));
6667 /* Return true if a type must be passed in memory. For AAPCS, small aggregates
6668 (padded to the size of a word) should be passed in a register. */
6670 static bool
6671 arm_must_pass_in_stack (enum machine_mode mode, tree type)
6673 if (TARGET_AAPCS_BASED)
6674 return must_pass_in_stack_var_size (mode, type);
6675 else
6676 return must_pass_in_stack_var_size_or_pad (mode, type);
6680 /* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
6681 Return true if an argument passed on the stack should be padded upwards,
6682 i.e. if the least-significant byte has useful data. */
6684 bool
6685 arm_pad_arg_upward (enum machine_mode mode, tree type)
6687 if (!TARGET_AAPCS_BASED)
6688 return DEFAULT_FUNCTION_ARG_PADDING(mode, type);
6690 if (type && BYTES_BIG_ENDIAN && INTEGRAL_TYPE_P (type))
6691 return false;
6693 return true;
6697 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
6698 For non-AAPCS, return !BYTES_BIG_ENDIAN if the least significant
6699 byte of the register has useful data, and return the opposite if the
6700 most significant byte does.
6701 For AAPCS, small aggregates and small complex types are always padded
6702 upwards. */
6704 bool
6705 arm_pad_reg_upward (enum machine_mode mode ATTRIBUTE_UNUSED,
6706 tree type, int first ATTRIBUTE_UNUSED)
6708 if (TARGET_AAPCS_BASED
6709 && BYTES_BIG_ENDIAN
6710 && (AGGREGATE_TYPE_P (type) || TREE_CODE (type) == COMPLEX_TYPE)
6711 && int_size_in_bytes (type) <= 4)
6712 return true;
6714 /* Otherwise, use default padding. */
6715 return !BYTES_BIG_ENDIAN;
6720 /* Print a symbolic form of X to the debug file, F. */
6721 static void
6722 arm_print_value (FILE *f, rtx x)
6724 switch (GET_CODE (x))
6726 case CONST_INT:
6727 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
6728 return;
6730 case CONST_DOUBLE:
6731 fprintf (f, "<0x%lx,0x%lx>", (long)XWINT (x, 2), (long)XWINT (x, 3));
6732 return;
6734 case CONST_VECTOR:
6736 int i;
6738 fprintf (f, "<");
6739 for (i = 0; i < CONST_VECTOR_NUNITS (x); i++)
6741 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (CONST_VECTOR_ELT (x, i)));
6742 if (i < (CONST_VECTOR_NUNITS (x) - 1))
6743 fputc (',', f);
6745 fprintf (f, ">");
6747 return;
6749 case CONST_STRING:
6750 fprintf (f, "\"%s\"", XSTR (x, 0));
6751 return;
6753 case SYMBOL_REF:
6754 fprintf (f, "`%s'", XSTR (x, 0));
6755 return;
6757 case LABEL_REF:
6758 fprintf (f, "L%d", INSN_UID (XEXP (x, 0)));
6759 return;
6761 case CONST:
6762 arm_print_value (f, XEXP (x, 0));
6763 return;
6765 case PLUS:
6766 arm_print_value (f, XEXP (x, 0));
6767 fprintf (f, "+");
6768 arm_print_value (f, XEXP (x, 1));
6769 return;
6771 case PC:
6772 fprintf (f, "pc");
6773 return;
6775 default:
6776 fprintf (f, "????");
6777 return;
6781 /* Routines for manipulation of the constant pool. */
6783 /* Arm instructions cannot load a large constant directly into a
6784 register; they have to come from a pc relative load. The constant
6785 must therefore be placed in the addressable range of the pc
6786 relative load. Depending on the precise pc relative load
6787 instruction the range is somewhere between 256 bytes and 4k. This
6788 means that we often have to dump a constant inside a function, and
6789 generate code to branch around it.
6791 It is important to minimize this, since the branches will slow
6792 things down and make the code larger.
6794 Normally we can hide the table after an existing unconditional
6795 branch so that there is no interruption of the flow, but in the
6796 worst case the code looks like this:
6798 ldr rn, L1
6800 b L2
6801 align
6802 L1: .long value
6806 ldr rn, L3
6808 b L4
6809 align
6810 L3: .long value
6814 We fix this by performing a scan after scheduling, which notices
6815 which instructions need to have their operands fetched from the
6816 constant table and builds the table.
6818 The algorithm starts by building a table of all the constants that
6819 need fixing up and all the natural barriers in the function (places
6820 where a constant table can be dropped without breaking the flow).
6821 For each fixup we note how far the pc-relative replacement will be
6822 able to reach and the offset of the instruction into the function.
6824 Having built the table we then group the fixes together to form
6825 tables that are as large as possible (subject to addressing
6826 constraints) and emit each table of constants after the last
6827 barrier that is within range of all the instructions in the group.
6828 If a group does not contain a barrier, then we forcibly create one
6829 by inserting a jump instruction into the flow. Once the table has
6830 been inserted, the insns are then modified to reference the
6831 relevant entry in the pool.
6833 Possible enhancements to the algorithm (not implemented) are:
6835 1) For some processors and object formats, there may be benefit in
6836 aligning the pools to the start of cache lines; this alignment
6837 would need to be taken into account when calculating addressability
6838 of a pool. */
6840 /* These typedefs are located at the start of this file, so that
6841 they can be used in the prototypes there. This comment is to
6842 remind readers of that fact so that the following structures
6843 can be understood more easily.
6845 typedef struct minipool_node Mnode;
6846 typedef struct minipool_fixup Mfix; */
6848 struct minipool_node
6850 /* Doubly linked chain of entries. */
6851 Mnode * next;
6852 Mnode * prev;
6853 /* The maximum offset into the code that this entry can be placed. While
6854 pushing fixes for forward references, all entries are sorted in order
6855 of increasing max_address. */
6856 HOST_WIDE_INT max_address;
6857 /* Similarly for an entry inserted for a backwards ref. */
6858 HOST_WIDE_INT min_address;
6859 /* The number of fixes referencing this entry. This can become zero
6860 if we "unpush" an entry. In this case we ignore the entry when we
6861 come to emit the code. */
6862 int refcount;
6863 /* The offset from the start of the minipool. */
6864 HOST_WIDE_INT offset;
6865 /* The value in table. */
6866 rtx value;
6867 /* The mode of value. */
6868 enum machine_mode mode;
6869 /* The size of the value. With iWMMXt enabled
6870 sizes > 4 also imply an alignment of 8-bytes. */
6871 int fix_size;
6874 struct minipool_fixup
6876 Mfix * next;
6877 rtx insn;
6878 HOST_WIDE_INT address;
6879 rtx * loc;
6880 enum machine_mode mode;
6881 int fix_size;
6882 rtx value;
6883 Mnode * minipool;
6884 HOST_WIDE_INT forwards;
6885 HOST_WIDE_INT backwards;
6888 /* Fixes less than a word need padding out to a word boundary. */
6889 #define MINIPOOL_FIX_SIZE(mode) \
6890 (GET_MODE_SIZE ((mode)) >= 4 ? GET_MODE_SIZE ((mode)) : 4)
6892 static Mnode * minipool_vector_head;
6893 static Mnode * minipool_vector_tail;
6894 static rtx minipool_vector_label;
6896 /* The linked list of all minipool fixes required for this function. */
6897 Mfix * minipool_fix_head;
6898 Mfix * minipool_fix_tail;
6899 /* The fix entry for the current minipool, once it has been placed. */
6900 Mfix * minipool_barrier;
6902 /* Determines if INSN is the start of a jump table. Returns the end
6903 of the TABLE or NULL_RTX. */
6904 static rtx
6905 is_jump_table (rtx insn)
6907 rtx table;
6909 if (GET_CODE (insn) == JUMP_INSN
6910 && JUMP_LABEL (insn) != NULL
6911 && ((table = next_real_insn (JUMP_LABEL (insn)))
6912 == next_real_insn (insn))
6913 && table != NULL
6914 && GET_CODE (table) == JUMP_INSN
6915 && (GET_CODE (PATTERN (table)) == ADDR_VEC
6916 || GET_CODE (PATTERN (table)) == ADDR_DIFF_VEC))
6917 return table;
6919 return NULL_RTX;
6922 #ifndef JUMP_TABLES_IN_TEXT_SECTION
6923 #define JUMP_TABLES_IN_TEXT_SECTION 0
6924 #endif
6926 static HOST_WIDE_INT
6927 get_jump_table_size (rtx insn)
6929 /* ADDR_VECs only take room if read-only data does into the text
6930 section. */
6931 if (JUMP_TABLES_IN_TEXT_SECTION
6932 #if !defined(READONLY_DATA_SECTION) && !defined(READONLY_DATA_SECTION_ASM_OP)
6933 || 1
6934 #endif
6937 rtx body = PATTERN (insn);
6938 int elt = GET_CODE (body) == ADDR_DIFF_VEC ? 1 : 0;
6940 return GET_MODE_SIZE (GET_MODE (body)) * XVECLEN (body, elt);
6943 return 0;
6946 /* Move a minipool fix MP from its current location to before MAX_MP.
6947 If MAX_MP is NULL, then MP doesn't need moving, but the addressing
6948 constraints may need updating. */
6949 static Mnode *
6950 move_minipool_fix_forward_ref (Mnode *mp, Mnode *max_mp,
6951 HOST_WIDE_INT max_address)
6953 /* The code below assumes these are different. */
6954 gcc_assert (mp != max_mp);
6956 if (max_mp == NULL)
6958 if (max_address < mp->max_address)
6959 mp->max_address = max_address;
6961 else
6963 if (max_address > max_mp->max_address - mp->fix_size)
6964 mp->max_address = max_mp->max_address - mp->fix_size;
6965 else
6966 mp->max_address = max_address;
6968 /* Unlink MP from its current position. Since max_mp is non-null,
6969 mp->prev must be non-null. */
6970 mp->prev->next = mp->next;
6971 if (mp->next != NULL)
6972 mp->next->prev = mp->prev;
6973 else
6974 minipool_vector_tail = mp->prev;
6976 /* Re-insert it before MAX_MP. */
6977 mp->next = max_mp;
6978 mp->prev = max_mp->prev;
6979 max_mp->prev = mp;
6981 if (mp->prev != NULL)
6982 mp->prev->next = mp;
6983 else
6984 minipool_vector_head = mp;
6987 /* Save the new entry. */
6988 max_mp = mp;
6990 /* Scan over the preceding entries and adjust their addresses as
6991 required. */
6992 while (mp->prev != NULL
6993 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
6995 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
6996 mp = mp->prev;
6999 return max_mp;
7002 /* Add a constant to the minipool for a forward reference. Returns the
7003 node added or NULL if the constant will not fit in this pool. */
7004 static Mnode *
7005 add_minipool_forward_ref (Mfix *fix)
7007 /* If set, max_mp is the first pool_entry that has a lower
7008 constraint than the one we are trying to add. */
7009 Mnode * max_mp = NULL;
7010 HOST_WIDE_INT max_address = fix->address + fix->forwards;
7011 Mnode * mp;
7013 /* If this fix's address is greater than the address of the first
7014 entry, then we can't put the fix in this pool. We subtract the
7015 size of the current fix to ensure that if the table is fully
7016 packed we still have enough room to insert this value by suffling
7017 the other fixes forwards. */
7018 if (minipool_vector_head &&
7019 fix->address >= minipool_vector_head->max_address - fix->fix_size)
7020 return NULL;
7022 /* Scan the pool to see if a constant with the same value has
7023 already been added. While we are doing this, also note the
7024 location where we must insert the constant if it doesn't already
7025 exist. */
7026 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
7028 if (GET_CODE (fix->value) == GET_CODE (mp->value)
7029 && fix->mode == mp->mode
7030 && (GET_CODE (fix->value) != CODE_LABEL
7031 || (CODE_LABEL_NUMBER (fix->value)
7032 == CODE_LABEL_NUMBER (mp->value)))
7033 && rtx_equal_p (fix->value, mp->value))
7035 /* More than one fix references this entry. */
7036 mp->refcount++;
7037 return move_minipool_fix_forward_ref (mp, max_mp, max_address);
7040 /* Note the insertion point if necessary. */
7041 if (max_mp == NULL
7042 && mp->max_address > max_address)
7043 max_mp = mp;
7045 /* If we are inserting an 8-bytes aligned quantity and
7046 we have not already found an insertion point, then
7047 make sure that all such 8-byte aligned quantities are
7048 placed at the start of the pool. */
7049 if (ARM_DOUBLEWORD_ALIGN
7050 && max_mp == NULL
7051 && fix->fix_size == 8
7052 && mp->fix_size != 8)
7054 max_mp = mp;
7055 max_address = mp->max_address;
7059 /* The value is not currently in the minipool, so we need to create
7060 a new entry for it. If MAX_MP is NULL, the entry will be put on
7061 the end of the list since the placement is less constrained than
7062 any existing entry. Otherwise, we insert the new fix before
7063 MAX_MP and, if necessary, adjust the constraints on the other
7064 entries. */
7065 mp = xmalloc (sizeof (* mp));
7066 mp->fix_size = fix->fix_size;
7067 mp->mode = fix->mode;
7068 mp->value = fix->value;
7069 mp->refcount = 1;
7070 /* Not yet required for a backwards ref. */
7071 mp->min_address = -65536;
7073 if (max_mp == NULL)
7075 mp->max_address = max_address;
7076 mp->next = NULL;
7077 mp->prev = minipool_vector_tail;
7079 if (mp->prev == NULL)
7081 minipool_vector_head = mp;
7082 minipool_vector_label = gen_label_rtx ();
7084 else
7085 mp->prev->next = mp;
7087 minipool_vector_tail = mp;
7089 else
7091 if (max_address > max_mp->max_address - mp->fix_size)
7092 mp->max_address = max_mp->max_address - mp->fix_size;
7093 else
7094 mp->max_address = max_address;
7096 mp->next = max_mp;
7097 mp->prev = max_mp->prev;
7098 max_mp->prev = mp;
7099 if (mp->prev != NULL)
7100 mp->prev->next = mp;
7101 else
7102 minipool_vector_head = mp;
7105 /* Save the new entry. */
7106 max_mp = mp;
7108 /* Scan over the preceding entries and adjust their addresses as
7109 required. */
7110 while (mp->prev != NULL
7111 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
7113 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
7114 mp = mp->prev;
7117 return max_mp;
7120 static Mnode *
7121 move_minipool_fix_backward_ref (Mnode *mp, Mnode *min_mp,
7122 HOST_WIDE_INT min_address)
7124 HOST_WIDE_INT offset;
7126 /* The code below assumes these are different. */
7127 gcc_assert (mp != min_mp);
7129 if (min_mp == NULL)
7131 if (min_address > mp->min_address)
7132 mp->min_address = min_address;
7134 else
7136 /* We will adjust this below if it is too loose. */
7137 mp->min_address = min_address;
7139 /* Unlink MP from its current position. Since min_mp is non-null,
7140 mp->next must be non-null. */
7141 mp->next->prev = mp->prev;
7142 if (mp->prev != NULL)
7143 mp->prev->next = mp->next;
7144 else
7145 minipool_vector_head = mp->next;
7147 /* Reinsert it after MIN_MP. */
7148 mp->prev = min_mp;
7149 mp->next = min_mp->next;
7150 min_mp->next = mp;
7151 if (mp->next != NULL)
7152 mp->next->prev = mp;
7153 else
7154 minipool_vector_tail = mp;
7157 min_mp = mp;
7159 offset = 0;
7160 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
7162 mp->offset = offset;
7163 if (mp->refcount > 0)
7164 offset += mp->fix_size;
7166 if (mp->next && mp->next->min_address < mp->min_address + mp->fix_size)
7167 mp->next->min_address = mp->min_address + mp->fix_size;
7170 return min_mp;
7173 /* Add a constant to the minipool for a backward reference. Returns the
7174 node added or NULL if the constant will not fit in this pool.
7176 Note that the code for insertion for a backwards reference can be
7177 somewhat confusing because the calculated offsets for each fix do
7178 not take into account the size of the pool (which is still under
7179 construction. */
7180 static Mnode *
7181 add_minipool_backward_ref (Mfix *fix)
7183 /* If set, min_mp is the last pool_entry that has a lower constraint
7184 than the one we are trying to add. */
7185 Mnode *min_mp = NULL;
7186 /* This can be negative, since it is only a constraint. */
7187 HOST_WIDE_INT min_address = fix->address - fix->backwards;
7188 Mnode *mp;
7190 /* If we can't reach the current pool from this insn, or if we can't
7191 insert this entry at the end of the pool without pushing other
7192 fixes out of range, then we don't try. This ensures that we
7193 can't fail later on. */
7194 if (min_address >= minipool_barrier->address
7195 || (minipool_vector_tail->min_address + fix->fix_size
7196 >= minipool_barrier->address))
7197 return NULL;
7199 /* Scan the pool to see if a constant with the same value has
7200 already been added. While we are doing this, also note the
7201 location where we must insert the constant if it doesn't already
7202 exist. */
7203 for (mp = minipool_vector_tail; mp != NULL; mp = mp->prev)
7205 if (GET_CODE (fix->value) == GET_CODE (mp->value)
7206 && fix->mode == mp->mode
7207 && (GET_CODE (fix->value) != CODE_LABEL
7208 || (CODE_LABEL_NUMBER (fix->value)
7209 == CODE_LABEL_NUMBER (mp->value)))
7210 && rtx_equal_p (fix->value, mp->value)
7211 /* Check that there is enough slack to move this entry to the
7212 end of the table (this is conservative). */
7213 && (mp->max_address
7214 > (minipool_barrier->address
7215 + minipool_vector_tail->offset
7216 + minipool_vector_tail->fix_size)))
7218 mp->refcount++;
7219 return move_minipool_fix_backward_ref (mp, min_mp, min_address);
7222 if (min_mp != NULL)
7223 mp->min_address += fix->fix_size;
7224 else
7226 /* Note the insertion point if necessary. */
7227 if (mp->min_address < min_address)
7229 /* For now, we do not allow the insertion of 8-byte alignment
7230 requiring nodes anywhere but at the start of the pool. */
7231 if (ARM_DOUBLEWORD_ALIGN
7232 && fix->fix_size == 8 && mp->fix_size != 8)
7233 return NULL;
7234 else
7235 min_mp = mp;
7237 else if (mp->max_address
7238 < minipool_barrier->address + mp->offset + fix->fix_size)
7240 /* Inserting before this entry would push the fix beyond
7241 its maximum address (which can happen if we have
7242 re-located a forwards fix); force the new fix to come
7243 after it. */
7244 min_mp = mp;
7245 min_address = mp->min_address + fix->fix_size;
7247 /* If we are inserting an 8-bytes aligned quantity and
7248 we have not already found an insertion point, then
7249 make sure that all such 8-byte aligned quantities are
7250 placed at the start of the pool. */
7251 else if (ARM_DOUBLEWORD_ALIGN
7252 && min_mp == NULL
7253 && fix->fix_size == 8
7254 && mp->fix_size < 8)
7256 min_mp = mp;
7257 min_address = mp->min_address + fix->fix_size;
7262 /* We need to create a new entry. */
7263 mp = xmalloc (sizeof (* mp));
7264 mp->fix_size = fix->fix_size;
7265 mp->mode = fix->mode;
7266 mp->value = fix->value;
7267 mp->refcount = 1;
7268 mp->max_address = minipool_barrier->address + 65536;
7270 mp->min_address = min_address;
7272 if (min_mp == NULL)
7274 mp->prev = NULL;
7275 mp->next = minipool_vector_head;
7277 if (mp->next == NULL)
7279 minipool_vector_tail = mp;
7280 minipool_vector_label = gen_label_rtx ();
7282 else
7283 mp->next->prev = mp;
7285 minipool_vector_head = mp;
7287 else
7289 mp->next = min_mp->next;
7290 mp->prev = min_mp;
7291 min_mp->next = mp;
7293 if (mp->next != NULL)
7294 mp->next->prev = mp;
7295 else
7296 minipool_vector_tail = mp;
7299 /* Save the new entry. */
7300 min_mp = mp;
7302 if (mp->prev)
7303 mp = mp->prev;
7304 else
7305 mp->offset = 0;
7307 /* Scan over the following entries and adjust their offsets. */
7308 while (mp->next != NULL)
7310 if (mp->next->min_address < mp->min_address + mp->fix_size)
7311 mp->next->min_address = mp->min_address + mp->fix_size;
7313 if (mp->refcount)
7314 mp->next->offset = mp->offset + mp->fix_size;
7315 else
7316 mp->next->offset = mp->offset;
7318 mp = mp->next;
7321 return min_mp;
7324 static void
7325 assign_minipool_offsets (Mfix *barrier)
7327 HOST_WIDE_INT offset = 0;
7328 Mnode *mp;
7330 minipool_barrier = barrier;
7332 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
7334 mp->offset = offset;
7336 if (mp->refcount > 0)
7337 offset += mp->fix_size;
7341 /* Output the literal table */
7342 static void
7343 dump_minipool (rtx scan)
7345 Mnode * mp;
7346 Mnode * nmp;
7347 int align64 = 0;
7349 if (ARM_DOUBLEWORD_ALIGN)
7350 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
7351 if (mp->refcount > 0 && mp->fix_size == 8)
7353 align64 = 1;
7354 break;
7357 if (dump_file)
7358 fprintf (dump_file,
7359 ";; Emitting minipool after insn %u; address %ld; align %d (bytes)\n",
7360 INSN_UID (scan), (unsigned long) minipool_barrier->address, align64 ? 8 : 4);
7362 scan = emit_label_after (gen_label_rtx (), scan);
7363 scan = emit_insn_after (align64 ? gen_align_8 () : gen_align_4 (), scan);
7364 scan = emit_label_after (minipool_vector_label, scan);
7366 for (mp = minipool_vector_head; mp != NULL; mp = nmp)
7368 if (mp->refcount > 0)
7370 if (dump_file)
7372 fprintf (dump_file,
7373 ";; Offset %u, min %ld, max %ld ",
7374 (unsigned) mp->offset, (unsigned long) mp->min_address,
7375 (unsigned long) mp->max_address);
7376 arm_print_value (dump_file, mp->value);
7377 fputc ('\n', dump_file);
7380 switch (mp->fix_size)
7382 #ifdef HAVE_consttable_1
7383 case 1:
7384 scan = emit_insn_after (gen_consttable_1 (mp->value), scan);
7385 break;
7387 #endif
7388 #ifdef HAVE_consttable_2
7389 case 2:
7390 scan = emit_insn_after (gen_consttable_2 (mp->value), scan);
7391 break;
7393 #endif
7394 #ifdef HAVE_consttable_4
7395 case 4:
7396 scan = emit_insn_after (gen_consttable_4 (mp->value), scan);
7397 break;
7399 #endif
7400 #ifdef HAVE_consttable_8
7401 case 8:
7402 scan = emit_insn_after (gen_consttable_8 (mp->value), scan);
7403 break;
7405 #endif
7406 default:
7407 gcc_unreachable ();
7411 nmp = mp->next;
7412 free (mp);
7415 minipool_vector_head = minipool_vector_tail = NULL;
7416 scan = emit_insn_after (gen_consttable_end (), scan);
7417 scan = emit_barrier_after (scan);
7420 /* Return the cost of forcibly inserting a barrier after INSN. */
7421 static int
7422 arm_barrier_cost (rtx insn)
7424 /* Basing the location of the pool on the loop depth is preferable,
7425 but at the moment, the basic block information seems to be
7426 corrupt by this stage of the compilation. */
7427 int base_cost = 50;
7428 rtx next = next_nonnote_insn (insn);
7430 if (next != NULL && GET_CODE (next) == CODE_LABEL)
7431 base_cost -= 20;
7433 switch (GET_CODE (insn))
7435 case CODE_LABEL:
7436 /* It will always be better to place the table before the label, rather
7437 than after it. */
7438 return 50;
7440 case INSN:
7441 case CALL_INSN:
7442 return base_cost;
7444 case JUMP_INSN:
7445 return base_cost - 10;
7447 default:
7448 return base_cost + 10;
7452 /* Find the best place in the insn stream in the range
7453 (FIX->address,MAX_ADDRESS) to forcibly insert a minipool barrier.
7454 Create the barrier by inserting a jump and add a new fix entry for
7455 it. */
7456 static Mfix *
7457 create_fix_barrier (Mfix *fix, HOST_WIDE_INT max_address)
7459 HOST_WIDE_INT count = 0;
7460 rtx barrier;
7461 rtx from = fix->insn;
7462 rtx selected = from;
7463 int selected_cost;
7464 HOST_WIDE_INT selected_address;
7465 Mfix * new_fix;
7466 HOST_WIDE_INT max_count = max_address - fix->address;
7467 rtx label = gen_label_rtx ();
7469 selected_cost = arm_barrier_cost (from);
7470 selected_address = fix->address;
7472 while (from && count < max_count)
7474 rtx tmp;
7475 int new_cost;
7477 /* This code shouldn't have been called if there was a natural barrier
7478 within range. */
7479 gcc_assert (GET_CODE (from) != BARRIER);
7481 /* Count the length of this insn. */
7482 count += get_attr_length (from);
7484 /* If there is a jump table, add its length. */
7485 tmp = is_jump_table (from);
7486 if (tmp != NULL)
7488 count += get_jump_table_size (tmp);
7490 /* Jump tables aren't in a basic block, so base the cost on
7491 the dispatch insn. If we select this location, we will
7492 still put the pool after the table. */
7493 new_cost = arm_barrier_cost (from);
7495 if (count < max_count && new_cost <= selected_cost)
7497 selected = tmp;
7498 selected_cost = new_cost;
7499 selected_address = fix->address + count;
7502 /* Continue after the dispatch table. */
7503 from = NEXT_INSN (tmp);
7504 continue;
7507 new_cost = arm_barrier_cost (from);
7509 if (count < max_count && new_cost <= selected_cost)
7511 selected = from;
7512 selected_cost = new_cost;
7513 selected_address = fix->address + count;
7516 from = NEXT_INSN (from);
7519 /* Create a new JUMP_INSN that branches around a barrier. */
7520 from = emit_jump_insn_after (gen_jump (label), selected);
7521 JUMP_LABEL (from) = label;
7522 barrier = emit_barrier_after (from);
7523 emit_label_after (label, barrier);
7525 /* Create a minipool barrier entry for the new barrier. */
7526 new_fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* new_fix));
7527 new_fix->insn = barrier;
7528 new_fix->address = selected_address;
7529 new_fix->next = fix->next;
7530 fix->next = new_fix;
7532 return new_fix;
7535 /* Record that there is a natural barrier in the insn stream at
7536 ADDRESS. */
7537 static void
7538 push_minipool_barrier (rtx insn, HOST_WIDE_INT address)
7540 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
7542 fix->insn = insn;
7543 fix->address = address;
7545 fix->next = NULL;
7546 if (minipool_fix_head != NULL)
7547 minipool_fix_tail->next = fix;
7548 else
7549 minipool_fix_head = fix;
7551 minipool_fix_tail = fix;
7554 /* Record INSN, which will need fixing up to load a value from the
7555 minipool. ADDRESS is the offset of the insn since the start of the
7556 function; LOC is a pointer to the part of the insn which requires
7557 fixing; VALUE is the constant that must be loaded, which is of type
7558 MODE. */
7559 static void
7560 push_minipool_fix (rtx insn, HOST_WIDE_INT address, rtx *loc,
7561 enum machine_mode mode, rtx value)
7563 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
7565 #ifdef AOF_ASSEMBLER
7566 /* PIC symbol references need to be converted into offsets into the
7567 based area. */
7568 /* XXX This shouldn't be done here. */
7569 if (flag_pic && GET_CODE (value) == SYMBOL_REF)
7570 value = aof_pic_entry (value);
7571 #endif /* AOF_ASSEMBLER */
7573 fix->insn = insn;
7574 fix->address = address;
7575 fix->loc = loc;
7576 fix->mode = mode;
7577 fix->fix_size = MINIPOOL_FIX_SIZE (mode);
7578 fix->value = value;
7579 fix->forwards = get_attr_pool_range (insn);
7580 fix->backwards = get_attr_neg_pool_range (insn);
7581 fix->minipool = NULL;
7583 /* If an insn doesn't have a range defined for it, then it isn't
7584 expecting to be reworked by this code. Better to stop now than
7585 to generate duff assembly code. */
7586 gcc_assert (fix->forwards || fix->backwards);
7588 /* With AAPCS/iWMMXt enabled, the pool is aligned to an 8-byte boundary.
7589 So there might be an empty word before the start of the pool.
7590 Hence we reduce the forward range by 4 to allow for this
7591 possibility. */
7592 if (ARM_DOUBLEWORD_ALIGN && fix->fix_size == 8)
7593 fix->forwards -= 4;
7595 if (dump_file)
7597 fprintf (dump_file,
7598 ";; %smode fixup for i%d; addr %lu, range (%ld,%ld): ",
7599 GET_MODE_NAME (mode),
7600 INSN_UID (insn), (unsigned long) address,
7601 -1 * (long)fix->backwards, (long)fix->forwards);
7602 arm_print_value (dump_file, fix->value);
7603 fprintf (dump_file, "\n");
7606 /* Add it to the chain of fixes. */
7607 fix->next = NULL;
7609 if (minipool_fix_head != NULL)
7610 minipool_fix_tail->next = fix;
7611 else
7612 minipool_fix_head = fix;
7614 minipool_fix_tail = fix;
7617 /* Return the cost of synthesizing a 64-bit constant VAL inline.
7618 Returns the number of insns needed, or 99 if we don't know how to
7619 do it. */
7621 arm_const_double_inline_cost (rtx val)
7623 rtx lowpart, highpart;
7624 enum machine_mode mode;
7626 mode = GET_MODE (val);
7628 if (mode == VOIDmode)
7629 mode = DImode;
7631 gcc_assert (GET_MODE_SIZE (mode) == 8);
7633 lowpart = gen_lowpart (SImode, val);
7634 highpart = gen_highpart_mode (SImode, mode, val);
7636 gcc_assert (GET_CODE (lowpart) == CONST_INT);
7637 gcc_assert (GET_CODE (highpart) == CONST_INT);
7639 return (arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (lowpart),
7640 NULL_RTX, NULL_RTX, 0, 0)
7641 + arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (highpart),
7642 NULL_RTX, NULL_RTX, 0, 0));
7645 /* Return true if it is worthwhile to split a 64-bit constant into two
7646 32-bit operations. This is the case if optimizing for size, or
7647 if we have load delay slots, or if one 32-bit part can be done with
7648 a single data operation. */
7649 bool
7650 arm_const_double_by_parts (rtx val)
7652 enum machine_mode mode = GET_MODE (val);
7653 rtx part;
7655 if (optimize_size || arm_ld_sched)
7656 return true;
7658 if (mode == VOIDmode)
7659 mode = DImode;
7661 part = gen_highpart_mode (SImode, mode, val);
7663 gcc_assert (GET_CODE (part) == CONST_INT);
7665 if (const_ok_for_arm (INTVAL (part))
7666 || const_ok_for_arm (~INTVAL (part)))
7667 return true;
7669 part = gen_lowpart (SImode, val);
7671 gcc_assert (GET_CODE (part) == CONST_INT);
7673 if (const_ok_for_arm (INTVAL (part))
7674 || const_ok_for_arm (~INTVAL (part)))
7675 return true;
7677 return false;
7680 /* Scan INSN and note any of its operands that need fixing.
7681 If DO_PUSHES is false we do not actually push any of the fixups
7682 needed. The function returns TRUE if any fixups were needed/pushed.
7683 This is used by arm_memory_load_p() which needs to know about loads
7684 of constants that will be converted into minipool loads. */
7685 static bool
7686 note_invalid_constants (rtx insn, HOST_WIDE_INT address, int do_pushes)
7688 bool result = false;
7689 int opno;
7691 extract_insn (insn);
7693 if (!constrain_operands (1))
7694 fatal_insn_not_found (insn);
7696 if (recog_data.n_alternatives == 0)
7697 return false;
7699 /* Fill in recog_op_alt with information about the constraints of
7700 this insn. */
7701 preprocess_constraints ();
7703 for (opno = 0; opno < recog_data.n_operands; opno++)
7705 /* Things we need to fix can only occur in inputs. */
7706 if (recog_data.operand_type[opno] != OP_IN)
7707 continue;
7709 /* If this alternative is a memory reference, then any mention
7710 of constants in this alternative is really to fool reload
7711 into allowing us to accept one there. We need to fix them up
7712 now so that we output the right code. */
7713 if (recog_op_alt[opno][which_alternative].memory_ok)
7715 rtx op = recog_data.operand[opno];
7717 if (CONSTANT_P (op))
7719 if (do_pushes)
7720 push_minipool_fix (insn, address, recog_data.operand_loc[opno],
7721 recog_data.operand_mode[opno], op);
7722 result = true;
7724 else if (GET_CODE (op) == MEM
7725 && GET_CODE (XEXP (op, 0)) == SYMBOL_REF
7726 && CONSTANT_POOL_ADDRESS_P (XEXP (op, 0)))
7728 if (do_pushes)
7730 rtx cop = avoid_constant_pool_reference (op);
7732 /* Casting the address of something to a mode narrower
7733 than a word can cause avoid_constant_pool_reference()
7734 to return the pool reference itself. That's no good to
7735 us here. Lets just hope that we can use the
7736 constant pool value directly. */
7737 if (op == cop)
7738 cop = get_pool_constant (XEXP (op, 0));
7740 push_minipool_fix (insn, address,
7741 recog_data.operand_loc[opno],
7742 recog_data.operand_mode[opno], cop);
7745 result = true;
7750 return result;
7753 /* Gcc puts the pool in the wrong place for ARM, since we can only
7754 load addresses a limited distance around the pc. We do some
7755 special munging to move the constant pool values to the correct
7756 point in the code. */
7757 static void
7758 arm_reorg (void)
7760 rtx insn;
7761 HOST_WIDE_INT address = 0;
7762 Mfix * fix;
7764 minipool_fix_head = minipool_fix_tail = NULL;
7766 /* The first insn must always be a note, or the code below won't
7767 scan it properly. */
7768 insn = get_insns ();
7769 gcc_assert (GET_CODE (insn) == NOTE);
7771 /* Scan all the insns and record the operands that will need fixing. */
7772 for (insn = next_nonnote_insn (insn); insn; insn = next_nonnote_insn (insn))
7774 if (TARGET_CIRRUS_FIX_INVALID_INSNS
7775 && (arm_cirrus_insn_p (insn)
7776 || GET_CODE (insn) == JUMP_INSN
7777 || arm_memory_load_p (insn)))
7778 cirrus_reorg (insn);
7780 if (GET_CODE (insn) == BARRIER)
7781 push_minipool_barrier (insn, address);
7782 else if (INSN_P (insn))
7784 rtx table;
7786 note_invalid_constants (insn, address, true);
7787 address += get_attr_length (insn);
7789 /* If the insn is a vector jump, add the size of the table
7790 and skip the table. */
7791 if ((table = is_jump_table (insn)) != NULL)
7793 address += get_jump_table_size (table);
7794 insn = table;
7799 fix = minipool_fix_head;
7801 /* Now scan the fixups and perform the required changes. */
7802 while (fix)
7804 Mfix * ftmp;
7805 Mfix * fdel;
7806 Mfix * last_added_fix;
7807 Mfix * last_barrier = NULL;
7808 Mfix * this_fix;
7810 /* Skip any further barriers before the next fix. */
7811 while (fix && GET_CODE (fix->insn) == BARRIER)
7812 fix = fix->next;
7814 /* No more fixes. */
7815 if (fix == NULL)
7816 break;
7818 last_added_fix = NULL;
7820 for (ftmp = fix; ftmp; ftmp = ftmp->next)
7822 if (GET_CODE (ftmp->insn) == BARRIER)
7824 if (ftmp->address >= minipool_vector_head->max_address)
7825 break;
7827 last_barrier = ftmp;
7829 else if ((ftmp->minipool = add_minipool_forward_ref (ftmp)) == NULL)
7830 break;
7832 last_added_fix = ftmp; /* Keep track of the last fix added. */
7835 /* If we found a barrier, drop back to that; any fixes that we
7836 could have reached but come after the barrier will now go in
7837 the next mini-pool. */
7838 if (last_barrier != NULL)
7840 /* Reduce the refcount for those fixes that won't go into this
7841 pool after all. */
7842 for (fdel = last_barrier->next;
7843 fdel && fdel != ftmp;
7844 fdel = fdel->next)
7846 fdel->minipool->refcount--;
7847 fdel->minipool = NULL;
7850 ftmp = last_barrier;
7852 else
7854 /* ftmp is first fix that we can't fit into this pool and
7855 there no natural barriers that we could use. Insert a
7856 new barrier in the code somewhere between the previous
7857 fix and this one, and arrange to jump around it. */
7858 HOST_WIDE_INT max_address;
7860 /* The last item on the list of fixes must be a barrier, so
7861 we can never run off the end of the list of fixes without
7862 last_barrier being set. */
7863 gcc_assert (ftmp);
7865 max_address = minipool_vector_head->max_address;
7866 /* Check that there isn't another fix that is in range that
7867 we couldn't fit into this pool because the pool was
7868 already too large: we need to put the pool before such an
7869 instruction. */
7870 if (ftmp->address < max_address)
7871 max_address = ftmp->address;
7873 last_barrier = create_fix_barrier (last_added_fix, max_address);
7876 assign_minipool_offsets (last_barrier);
7878 while (ftmp)
7880 if (GET_CODE (ftmp->insn) != BARRIER
7881 && ((ftmp->minipool = add_minipool_backward_ref (ftmp))
7882 == NULL))
7883 break;
7885 ftmp = ftmp->next;
7888 /* Scan over the fixes we have identified for this pool, fixing them
7889 up and adding the constants to the pool itself. */
7890 for (this_fix = fix; this_fix && ftmp != this_fix;
7891 this_fix = this_fix->next)
7892 if (GET_CODE (this_fix->insn) != BARRIER)
7894 rtx addr
7895 = plus_constant (gen_rtx_LABEL_REF (VOIDmode,
7896 minipool_vector_label),
7897 this_fix->minipool->offset);
7898 *this_fix->loc = gen_rtx_MEM (this_fix->mode, addr);
7901 dump_minipool (last_barrier->insn);
7902 fix = ftmp;
7905 /* From now on we must synthesize any constants that we can't handle
7906 directly. This can happen if the RTL gets split during final
7907 instruction generation. */
7908 after_arm_reorg = 1;
7910 /* Free the minipool memory. */
7911 obstack_free (&minipool_obstack, minipool_startobj);
7914 /* Routines to output assembly language. */
7916 /* If the rtx is the correct value then return the string of the number.
7917 In this way we can ensure that valid double constants are generated even
7918 when cross compiling. */
7919 const char *
7920 fp_immediate_constant (rtx x)
7922 REAL_VALUE_TYPE r;
7923 int i;
7925 if (!fp_consts_inited)
7926 init_fp_table ();
7928 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
7929 for (i = 0; i < 8; i++)
7930 if (REAL_VALUES_EQUAL (r, values_fp[i]))
7931 return strings_fp[i];
7933 gcc_unreachable ();
7936 /* As for fp_immediate_constant, but value is passed directly, not in rtx. */
7937 static const char *
7938 fp_const_from_val (REAL_VALUE_TYPE *r)
7940 int i;
7942 if (!fp_consts_inited)
7943 init_fp_table ();
7945 for (i = 0; i < 8; i++)
7946 if (REAL_VALUES_EQUAL (*r, values_fp[i]))
7947 return strings_fp[i];
7949 gcc_unreachable ();
7952 /* Output the operands of a LDM/STM instruction to STREAM.
7953 MASK is the ARM register set mask of which only bits 0-15 are important.
7954 REG is the base register, either the frame pointer or the stack pointer,
7955 INSTR is the possibly suffixed load or store instruction. */
7957 static void
7958 print_multi_reg (FILE *stream, const char *instr, unsigned reg,
7959 unsigned long mask)
7961 unsigned i;
7962 bool not_first = FALSE;
7964 fputc ('\t', stream);
7965 asm_fprintf (stream, instr, reg);
7966 fputs (", {", stream);
7968 for (i = 0; i <= LAST_ARM_REGNUM; i++)
7969 if (mask & (1 << i))
7971 if (not_first)
7972 fprintf (stream, ", ");
7974 asm_fprintf (stream, "%r", i);
7975 not_first = TRUE;
7978 fprintf (stream, "}\n");
7982 /* Output a FLDMX instruction to STREAM.
7983 BASE if the register containing the address.
7984 REG and COUNT specify the register range.
7985 Extra registers may be added to avoid hardware bugs. */
7987 static void
7988 arm_output_fldmx (FILE * stream, unsigned int base, int reg, int count)
7990 int i;
7992 /* Workaround ARM10 VFPr1 bug. */
7993 if (count == 2 && !arm_arch6)
7995 if (reg == 15)
7996 reg--;
7997 count++;
8000 fputc ('\t', stream);
8001 asm_fprintf (stream, "fldmfdx\t%r!, {", base);
8003 for (i = reg; i < reg + count; i++)
8005 if (i > reg)
8006 fputs (", ", stream);
8007 asm_fprintf (stream, "d%d", i);
8009 fputs ("}\n", stream);
8014 /* Output the assembly for a store multiple. */
8016 const char *
8017 vfp_output_fstmx (rtx * operands)
8019 char pattern[100];
8020 int p;
8021 int base;
8022 int i;
8024 strcpy (pattern, "fstmfdx\t%m0!, {%P1");
8025 p = strlen (pattern);
8027 gcc_assert (GET_CODE (operands[1]) == REG);
8029 base = (REGNO (operands[1]) - FIRST_VFP_REGNUM) / 2;
8030 for (i = 1; i < XVECLEN (operands[2], 0); i++)
8032 p += sprintf (&pattern[p], ", d%d", base + i);
8034 strcpy (&pattern[p], "}");
8036 output_asm_insn (pattern, operands);
8037 return "";
8041 /* Emit RTL to save block of VFP register pairs to the stack. Returns the
8042 number of bytes pushed. */
8044 static int
8045 vfp_emit_fstmx (int base_reg, int count)
8047 rtx par;
8048 rtx dwarf;
8049 rtx tmp, reg;
8050 int i;
8052 /* Workaround ARM10 VFPr1 bug. Data corruption can occur when exactly two
8053 register pairs are stored by a store multiple insn. We avoid this
8054 by pushing an extra pair. */
8055 if (count == 2 && !arm_arch6)
8057 if (base_reg == LAST_VFP_REGNUM - 3)
8058 base_reg -= 2;
8059 count++;
8062 /* ??? The frame layout is implementation defined. We describe
8063 standard format 1 (equivalent to a FSTMD insn and unused pad word).
8064 We really need some way of representing the whole block so that the
8065 unwinder can figure it out at runtime. */
8066 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
8067 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1));
8069 reg = gen_rtx_REG (DFmode, base_reg);
8070 base_reg += 2;
8072 XVECEXP (par, 0, 0)
8073 = gen_rtx_SET (VOIDmode,
8074 gen_rtx_MEM (BLKmode,
8075 gen_rtx_PRE_DEC (BLKmode, stack_pointer_rtx)),
8076 gen_rtx_UNSPEC (BLKmode,
8077 gen_rtvec (1, reg),
8078 UNSPEC_PUSH_MULT));
8080 tmp = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
8081 gen_rtx_PLUS (SImode, stack_pointer_rtx,
8082 GEN_INT (-(count * 8 + 4))));
8083 RTX_FRAME_RELATED_P (tmp) = 1;
8084 XVECEXP (dwarf, 0, 0) = tmp;
8086 tmp = gen_rtx_SET (VOIDmode,
8087 gen_rtx_MEM (DFmode, stack_pointer_rtx),
8088 reg);
8089 RTX_FRAME_RELATED_P (tmp) = 1;
8090 XVECEXP (dwarf, 0, 1) = tmp;
8092 for (i = 1; i < count; i++)
8094 reg = gen_rtx_REG (DFmode, base_reg);
8095 base_reg += 2;
8096 XVECEXP (par, 0, i) = gen_rtx_USE (VOIDmode, reg);
8098 tmp = gen_rtx_SET (VOIDmode,
8099 gen_rtx_MEM (DFmode,
8100 gen_rtx_PLUS (SImode,
8101 stack_pointer_rtx,
8102 GEN_INT (i * 8))),
8103 reg);
8104 RTX_FRAME_RELATED_P (tmp) = 1;
8105 XVECEXP (dwarf, 0, i + 1) = tmp;
8108 par = emit_insn (par);
8109 REG_NOTES (par) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, dwarf,
8110 REG_NOTES (par));
8111 RTX_FRAME_RELATED_P (par) = 1;
8113 return count * 8 + 4;
8117 /* Output a 'call' insn. */
8118 const char *
8119 output_call (rtx *operands)
8121 gcc_assert (!arm_arch5); /* Patterns should call blx <reg> directly. */
8123 /* Handle calls to lr using ip (which may be clobbered in subr anyway). */
8124 if (REGNO (operands[0]) == LR_REGNUM)
8126 operands[0] = gen_rtx_REG (SImode, IP_REGNUM);
8127 output_asm_insn ("mov%?\t%0, %|lr", operands);
8130 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
8132 if (TARGET_INTERWORK || arm_arch4t)
8133 output_asm_insn ("bx%?\t%0", operands);
8134 else
8135 output_asm_insn ("mov%?\t%|pc, %0", operands);
8137 return "";
8140 /* Output a 'call' insn that is a reference in memory. */
8141 const char *
8142 output_call_mem (rtx *operands)
8144 if (TARGET_INTERWORK && !arm_arch5)
8146 output_asm_insn ("ldr%?\t%|ip, %0", operands);
8147 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
8148 output_asm_insn ("bx%?\t%|ip", operands);
8150 else if (regno_use_in (LR_REGNUM, operands[0]))
8152 /* LR is used in the memory address. We load the address in the
8153 first instruction. It's safe to use IP as the target of the
8154 load since the call will kill it anyway. */
8155 output_asm_insn ("ldr%?\t%|ip, %0", operands);
8156 if (arm_arch5)
8157 output_asm_insn ("blx%?\t%|ip", operands);
8158 else
8160 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
8161 if (arm_arch4t)
8162 output_asm_insn ("bx%?\t%|ip", operands);
8163 else
8164 output_asm_insn ("mov%?\t%|pc, %|ip", operands);
8167 else
8169 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
8170 output_asm_insn ("ldr%?\t%|pc, %0", operands);
8173 return "";
8177 /* Output a move from arm registers to an fpa registers.
8178 OPERANDS[0] is an fpa register.
8179 OPERANDS[1] is the first registers of an arm register pair. */
8180 const char *
8181 output_mov_long_double_fpa_from_arm (rtx *operands)
8183 int arm_reg0 = REGNO (operands[1]);
8184 rtx ops[3];
8186 gcc_assert (arm_reg0 != IP_REGNUM);
8188 ops[0] = gen_rtx_REG (SImode, arm_reg0);
8189 ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0);
8190 ops[2] = gen_rtx_REG (SImode, 2 + arm_reg0);
8192 output_asm_insn ("stm%?fd\t%|sp!, {%0, %1, %2}", ops);
8193 output_asm_insn ("ldf%?e\t%0, [%|sp], #12", operands);
8195 return "";
8198 /* Output a move from an fpa register to arm registers.
8199 OPERANDS[0] is the first registers of an arm register pair.
8200 OPERANDS[1] is an fpa register. */
8201 const char *
8202 output_mov_long_double_arm_from_fpa (rtx *operands)
8204 int arm_reg0 = REGNO (operands[0]);
8205 rtx ops[3];
8207 gcc_assert (arm_reg0 != IP_REGNUM);
8209 ops[0] = gen_rtx_REG (SImode, arm_reg0);
8210 ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0);
8211 ops[2] = gen_rtx_REG (SImode, 2 + arm_reg0);
8213 output_asm_insn ("stf%?e\t%1, [%|sp, #-12]!", operands);
8214 output_asm_insn ("ldm%?fd\t%|sp!, {%0, %1, %2}", ops);
8215 return "";
8218 /* Output a move from arm registers to arm registers of a long double
8219 OPERANDS[0] is the destination.
8220 OPERANDS[1] is the source. */
8221 const char *
8222 output_mov_long_double_arm_from_arm (rtx *operands)
8224 /* We have to be careful here because the two might overlap. */
8225 int dest_start = REGNO (operands[0]);
8226 int src_start = REGNO (operands[1]);
8227 rtx ops[2];
8228 int i;
8230 if (dest_start < src_start)
8232 for (i = 0; i < 3; i++)
8234 ops[0] = gen_rtx_REG (SImode, dest_start + i);
8235 ops[1] = gen_rtx_REG (SImode, src_start + i);
8236 output_asm_insn ("mov%?\t%0, %1", ops);
8239 else
8241 for (i = 2; i >= 0; i--)
8243 ops[0] = gen_rtx_REG (SImode, dest_start + i);
8244 ops[1] = gen_rtx_REG (SImode, src_start + i);
8245 output_asm_insn ("mov%?\t%0, %1", ops);
8249 return "";
8253 /* Output a move from arm registers to an fpa registers.
8254 OPERANDS[0] is an fpa register.
8255 OPERANDS[1] is the first registers of an arm register pair. */
8256 const char *
8257 output_mov_double_fpa_from_arm (rtx *operands)
8259 int arm_reg0 = REGNO (operands[1]);
8260 rtx ops[2];
8262 gcc_assert (arm_reg0 != IP_REGNUM);
8264 ops[0] = gen_rtx_REG (SImode, arm_reg0);
8265 ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0);
8266 output_asm_insn ("stm%?fd\t%|sp!, {%0, %1}", ops);
8267 output_asm_insn ("ldf%?d\t%0, [%|sp], #8", operands);
8268 return "";
8271 /* Output a move from an fpa register to arm registers.
8272 OPERANDS[0] is the first registers of an arm register pair.
8273 OPERANDS[1] is an fpa register. */
8274 const char *
8275 output_mov_double_arm_from_fpa (rtx *operands)
8277 int arm_reg0 = REGNO (operands[0]);
8278 rtx ops[2];
8280 gcc_assert (arm_reg0 != IP_REGNUM);
8282 ops[0] = gen_rtx_REG (SImode, arm_reg0);
8283 ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0);
8284 output_asm_insn ("stf%?d\t%1, [%|sp, #-8]!", operands);
8285 output_asm_insn ("ldm%?fd\t%|sp!, {%0, %1}", ops);
8286 return "";
8289 /* Output a move between double words.
8290 It must be REG<-REG, REG<-CONST_DOUBLE, REG<-CONST_INT, REG<-MEM
8291 or MEM<-REG and all MEMs must be offsettable addresses. */
8292 const char *
8293 output_move_double (rtx *operands)
8295 enum rtx_code code0 = GET_CODE (operands[0]);
8296 enum rtx_code code1 = GET_CODE (operands[1]);
8297 rtx otherops[3];
8299 if (code0 == REG)
8301 int reg0 = REGNO (operands[0]);
8303 otherops[0] = gen_rtx_REG (SImode, 1 + reg0);
8305 gcc_assert (code1 == MEM); /* Constraints should ensure this. */
8307 switch (GET_CODE (XEXP (operands[1], 0)))
8309 case REG:
8310 output_asm_insn ("ldm%?ia\t%m1, %M0", operands);
8311 break;
8313 case PRE_INC:
8314 gcc_assert (TARGET_LDRD);
8315 output_asm_insn ("ldr%?d\t%0, [%m1, #8]!", operands);
8316 break;
8318 case PRE_DEC:
8319 output_asm_insn ("ldm%?db\t%m1!, %M0", operands);
8320 break;
8322 case POST_INC:
8323 output_asm_insn ("ldm%?ia\t%m1!, %M0", operands);
8324 break;
8326 case POST_DEC:
8327 gcc_assert (TARGET_LDRD);
8328 output_asm_insn ("ldr%?d\t%0, [%m1], #-8", operands);
8329 break;
8331 case PRE_MODIFY:
8332 case POST_MODIFY:
8333 otherops[0] = operands[0];
8334 otherops[1] = XEXP (XEXP (XEXP (operands[1], 0), 1), 0);
8335 otherops[2] = XEXP (XEXP (XEXP (operands[1], 0), 1), 1);
8337 if (GET_CODE (XEXP (operands[1], 0)) == PRE_MODIFY)
8339 if (reg_overlap_mentioned_p (otherops[0], otherops[2]))
8341 /* Registers overlap so split out the increment. */
8342 output_asm_insn ("add%?\t%1, %1, %2", otherops);
8343 output_asm_insn ("ldr%?d\t%0, [%1] @split", otherops);
8345 else
8346 output_asm_insn ("ldr%?d\t%0, [%1, %2]!", otherops);
8348 else
8350 /* We only allow constant increments, so this is safe. */
8351 output_asm_insn ("ldr%?d\t%0, [%1], %2", otherops);
8353 break;
8355 case LABEL_REF:
8356 case CONST:
8357 output_asm_insn ("adr%?\t%0, %1", operands);
8358 output_asm_insn ("ldm%?ia\t%0, %M0", operands);
8359 break;
8361 default:
8362 if (arm_add_operand (XEXP (XEXP (operands[1], 0), 1),
8363 GET_MODE (XEXP (XEXP (operands[1], 0), 1))))
8365 otherops[0] = operands[0];
8366 otherops[1] = XEXP (XEXP (operands[1], 0), 0);
8367 otherops[2] = XEXP (XEXP (operands[1], 0), 1);
8369 if (GET_CODE (XEXP (operands[1], 0)) == PLUS)
8371 if (GET_CODE (otherops[2]) == CONST_INT)
8373 switch ((int) INTVAL (otherops[2]))
8375 case -8:
8376 output_asm_insn ("ldm%?db\t%1, %M0", otherops);
8377 return "";
8378 case -4:
8379 output_asm_insn ("ldm%?da\t%1, %M0", otherops);
8380 return "";
8381 case 4:
8382 output_asm_insn ("ldm%?ib\t%1, %M0", otherops);
8383 return "";
8386 if (TARGET_LDRD
8387 && (GET_CODE (otherops[2]) == REG
8388 || (GET_CODE (otherops[2]) == CONST_INT
8389 && INTVAL (otherops[2]) > -256
8390 && INTVAL (otherops[2]) < 256)))
8392 if (reg_overlap_mentioned_p (otherops[0],
8393 otherops[2]))
8395 /* Swap base and index registers over to
8396 avoid a conflict. */
8397 otherops[1] = XEXP (XEXP (operands[1], 0), 1);
8398 otherops[2] = XEXP (XEXP (operands[1], 0), 0);
8401 /* If both registers conflict, it will usually
8402 have been fixed by a splitter. */
8403 if (reg_overlap_mentioned_p (otherops[0], otherops[2]))
8405 output_asm_insn ("add%?\t%1, %1, %2", otherops);
8406 output_asm_insn ("ldr%?d\t%0, [%1]",
8407 otherops);
8409 else
8410 output_asm_insn ("ldr%?d\t%0, [%1, %2]", otherops);
8411 return "";
8414 if (GET_CODE (otherops[2]) == CONST_INT)
8416 if (!(const_ok_for_arm (INTVAL (otherops[2]))))
8417 output_asm_insn ("sub%?\t%0, %1, #%n2", otherops);
8418 else
8419 output_asm_insn ("add%?\t%0, %1, %2", otherops);
8421 else
8422 output_asm_insn ("add%?\t%0, %1, %2", otherops);
8424 else
8425 output_asm_insn ("sub%?\t%0, %1, %2", otherops);
8427 return "ldm%?ia\t%0, %M0";
8429 else
8431 otherops[1] = adjust_address (operands[1], SImode, 4);
8432 /* Take care of overlapping base/data reg. */
8433 if (reg_mentioned_p (operands[0], operands[1]))
8435 output_asm_insn ("ldr%?\t%0, %1", otherops);
8436 output_asm_insn ("ldr%?\t%0, %1", operands);
8438 else
8440 output_asm_insn ("ldr%?\t%0, %1", operands);
8441 output_asm_insn ("ldr%?\t%0, %1", otherops);
8446 else
8448 /* Constraints should ensure this. */
8449 gcc_assert (code0 == MEM && code1 == REG);
8450 gcc_assert (REGNO (operands[1]) != IP_REGNUM);
8452 switch (GET_CODE (XEXP (operands[0], 0)))
8454 case REG:
8455 output_asm_insn ("stm%?ia\t%m0, %M1", operands);
8456 break;
8458 case PRE_INC:
8459 gcc_assert (TARGET_LDRD);
8460 output_asm_insn ("str%?d\t%1, [%m0, #8]!", operands);
8461 break;
8463 case PRE_DEC:
8464 output_asm_insn ("stm%?db\t%m0!, %M1", operands);
8465 break;
8467 case POST_INC:
8468 output_asm_insn ("stm%?ia\t%m0!, %M1", operands);
8469 break;
8471 case POST_DEC:
8472 gcc_assert (TARGET_LDRD);
8473 output_asm_insn ("str%?d\t%1, [%m0], #-8", operands);
8474 break;
8476 case PRE_MODIFY:
8477 case POST_MODIFY:
8478 otherops[0] = operands[1];
8479 otherops[1] = XEXP (XEXP (XEXP (operands[0], 0), 1), 0);
8480 otherops[2] = XEXP (XEXP (XEXP (operands[0], 0), 1), 1);
8482 if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
8483 output_asm_insn ("str%?d\t%0, [%1, %2]!", otherops);
8484 else
8485 output_asm_insn ("str%?d\t%0, [%1], %2", otherops);
8486 break;
8488 case PLUS:
8489 otherops[2] = XEXP (XEXP (operands[0], 0), 1);
8490 if (GET_CODE (otherops[2]) == CONST_INT)
8492 switch ((int) INTVAL (XEXP (XEXP (operands[0], 0), 1)))
8494 case -8:
8495 output_asm_insn ("stm%?db\t%m0, %M1", operands);
8496 return "";
8498 case -4:
8499 output_asm_insn ("stm%?da\t%m0, %M1", operands);
8500 return "";
8502 case 4:
8503 output_asm_insn ("stm%?ib\t%m0, %M1", operands);
8504 return "";
8507 if (TARGET_LDRD
8508 && (GET_CODE (otherops[2]) == REG
8509 || (GET_CODE (otherops[2]) == CONST_INT
8510 && INTVAL (otherops[2]) > -256
8511 && INTVAL (otherops[2]) < 256)))
8513 otherops[0] = operands[1];
8514 otherops[1] = XEXP (XEXP (operands[0], 0), 0);
8515 output_asm_insn ("str%?d\t%0, [%1, %2]", otherops);
8516 return "";
8518 /* Fall through */
8520 default:
8521 otherops[0] = adjust_address (operands[0], SImode, 4);
8522 otherops[1] = gen_rtx_REG (SImode, 1 + REGNO (operands[1]));
8523 output_asm_insn ("str%?\t%1, %0", operands);
8524 output_asm_insn ("str%?\t%1, %0", otherops);
8528 return "";
8531 /* Output an ADD r, s, #n where n may be too big for one instruction.
8532 If adding zero to one register, output nothing. */
8533 const char *
8534 output_add_immediate (rtx *operands)
8536 HOST_WIDE_INT n = INTVAL (operands[2]);
8538 if (n != 0 || REGNO (operands[0]) != REGNO (operands[1]))
8540 if (n < 0)
8541 output_multi_immediate (operands,
8542 "sub%?\t%0, %1, %2", "sub%?\t%0, %0, %2", 2,
8543 -n);
8544 else
8545 output_multi_immediate (operands,
8546 "add%?\t%0, %1, %2", "add%?\t%0, %0, %2", 2,
8550 return "";
8553 /* Output a multiple immediate operation.
8554 OPERANDS is the vector of operands referred to in the output patterns.
8555 INSTR1 is the output pattern to use for the first constant.
8556 INSTR2 is the output pattern to use for subsequent constants.
8557 IMMED_OP is the index of the constant slot in OPERANDS.
8558 N is the constant value. */
8559 static const char *
8560 output_multi_immediate (rtx *operands, const char *instr1, const char *instr2,
8561 int immed_op, HOST_WIDE_INT n)
8563 #if HOST_BITS_PER_WIDE_INT > 32
8564 n &= 0xffffffff;
8565 #endif
8567 if (n == 0)
8569 /* Quick and easy output. */
8570 operands[immed_op] = const0_rtx;
8571 output_asm_insn (instr1, operands);
8573 else
8575 int i;
8576 const char * instr = instr1;
8578 /* Note that n is never zero here (which would give no output). */
8579 for (i = 0; i < 32; i += 2)
8581 if (n & (3 << i))
8583 operands[immed_op] = GEN_INT (n & (255 << i));
8584 output_asm_insn (instr, operands);
8585 instr = instr2;
8586 i += 6;
8591 return "";
8594 /* Return the appropriate ARM instruction for the operation code.
8595 The returned result should not be overwritten. OP is the rtx of the
8596 operation. SHIFT_FIRST_ARG is TRUE if the first argument of the operator
8597 was shifted. */
8598 const char *
8599 arithmetic_instr (rtx op, int shift_first_arg)
8601 switch (GET_CODE (op))
8603 case PLUS:
8604 return "add";
8606 case MINUS:
8607 return shift_first_arg ? "rsb" : "sub";
8609 case IOR:
8610 return "orr";
8612 case XOR:
8613 return "eor";
8615 case AND:
8616 return "and";
8618 default:
8619 gcc_unreachable ();
8623 /* Ensure valid constant shifts and return the appropriate shift mnemonic
8624 for the operation code. The returned result should not be overwritten.
8625 OP is the rtx code of the shift.
8626 On exit, *AMOUNTP will be -1 if the shift is by a register, or a constant
8627 shift. */
8628 static const char *
8629 shift_op (rtx op, HOST_WIDE_INT *amountp)
8631 const char * mnem;
8632 enum rtx_code code = GET_CODE (op);
8634 switch (GET_CODE (XEXP (op, 1)))
8636 case REG:
8637 case SUBREG:
8638 *amountp = -1;
8639 break;
8641 case CONST_INT:
8642 *amountp = INTVAL (XEXP (op, 1));
8643 break;
8645 default:
8646 gcc_unreachable ();
8649 switch (code)
8651 case ASHIFT:
8652 mnem = "asl";
8653 break;
8655 case ASHIFTRT:
8656 mnem = "asr";
8657 break;
8659 case LSHIFTRT:
8660 mnem = "lsr";
8661 break;
8663 case ROTATE:
8664 gcc_assert (*amountp != -1);
8665 *amountp = 32 - *amountp;
8667 /* Fall through. */
8669 case ROTATERT:
8670 mnem = "ror";
8671 break;
8673 case MULT:
8674 /* We never have to worry about the amount being other than a
8675 power of 2, since this case can never be reloaded from a reg. */
8676 gcc_assert (*amountp != -1);
8677 *amountp = int_log2 (*amountp);
8678 return "asl";
8680 default:
8681 gcc_unreachable ();
8684 if (*amountp != -1)
8686 /* This is not 100% correct, but follows from the desire to merge
8687 multiplication by a power of 2 with the recognizer for a
8688 shift. >=32 is not a valid shift for "asl", so we must try and
8689 output a shift that produces the correct arithmetical result.
8690 Using lsr #32 is identical except for the fact that the carry bit
8691 is not set correctly if we set the flags; but we never use the
8692 carry bit from such an operation, so we can ignore that. */
8693 if (code == ROTATERT)
8694 /* Rotate is just modulo 32. */
8695 *amountp &= 31;
8696 else if (*amountp != (*amountp & 31))
8698 if (code == ASHIFT)
8699 mnem = "lsr";
8700 *amountp = 32;
8703 /* Shifts of 0 are no-ops. */
8704 if (*amountp == 0)
8705 return NULL;
8708 return mnem;
8711 /* Obtain the shift from the POWER of two. */
8713 static HOST_WIDE_INT
8714 int_log2 (HOST_WIDE_INT power)
8716 HOST_WIDE_INT shift = 0;
8718 while ((((HOST_WIDE_INT) 1 << shift) & power) == 0)
8720 gcc_assert (shift <= 31);
8721 shift++;
8724 return shift;
8727 /* Output a .ascii pseudo-op, keeping track of lengths. This is
8728 because /bin/as is horribly restrictive. The judgement about
8729 whether or not each character is 'printable' (and can be output as
8730 is) or not (and must be printed with an octal escape) must be made
8731 with reference to the *host* character set -- the situation is
8732 similar to that discussed in the comments above pp_c_char in
8733 c-pretty-print.c. */
8735 #define MAX_ASCII_LEN 51
8737 void
8738 output_ascii_pseudo_op (FILE *stream, const unsigned char *p, int len)
8740 int i;
8741 int len_so_far = 0;
8743 fputs ("\t.ascii\t\"", stream);
8745 for (i = 0; i < len; i++)
8747 int c = p[i];
8749 if (len_so_far >= MAX_ASCII_LEN)
8751 fputs ("\"\n\t.ascii\t\"", stream);
8752 len_so_far = 0;
8755 if (ISPRINT (c))
8757 if (c == '\\' || c == '\"')
8759 putc ('\\', stream);
8760 len_so_far++;
8762 putc (c, stream);
8763 len_so_far++;
8765 else
8767 fprintf (stream, "\\%03o", c);
8768 len_so_far += 4;
8772 fputs ("\"\n", stream);
8775 /* Compute the register save mask for registers 0 through 12
8776 inclusive. This code is used by arm_compute_save_reg_mask. */
8778 static unsigned long
8779 arm_compute_save_reg0_reg12_mask (void)
8781 unsigned long func_type = arm_current_func_type ();
8782 unsigned long save_reg_mask = 0;
8783 unsigned int reg;
8785 if (IS_INTERRUPT (func_type))
8787 unsigned int max_reg;
8788 /* Interrupt functions must not corrupt any registers,
8789 even call clobbered ones. If this is a leaf function
8790 we can just examine the registers used by the RTL, but
8791 otherwise we have to assume that whatever function is
8792 called might clobber anything, and so we have to save
8793 all the call-clobbered registers as well. */
8794 if (ARM_FUNC_TYPE (func_type) == ARM_FT_FIQ)
8795 /* FIQ handlers have registers r8 - r12 banked, so
8796 we only need to check r0 - r7, Normal ISRs only
8797 bank r14 and r15, so we must check up to r12.
8798 r13 is the stack pointer which is always preserved,
8799 so we do not need to consider it here. */
8800 max_reg = 7;
8801 else
8802 max_reg = 12;
8804 for (reg = 0; reg <= max_reg; reg++)
8805 if (regs_ever_live[reg]
8806 || (! current_function_is_leaf && call_used_regs [reg]))
8807 save_reg_mask |= (1 << reg);
8809 /* Also save the pic base register if necessary. */
8810 if (flag_pic
8811 && !TARGET_SINGLE_PIC_BASE
8812 && current_function_uses_pic_offset_table)
8813 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
8815 else
8817 /* In the normal case we only need to save those registers
8818 which are call saved and which are used by this function. */
8819 for (reg = 0; reg <= 10; reg++)
8820 if (regs_ever_live[reg] && ! call_used_regs [reg])
8821 save_reg_mask |= (1 << reg);
8823 /* Handle the frame pointer as a special case. */
8824 if (! TARGET_APCS_FRAME
8825 && ! frame_pointer_needed
8826 && regs_ever_live[HARD_FRAME_POINTER_REGNUM]
8827 && ! call_used_regs[HARD_FRAME_POINTER_REGNUM])
8828 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
8830 /* If we aren't loading the PIC register,
8831 don't stack it even though it may be live. */
8832 if (flag_pic
8833 && !TARGET_SINGLE_PIC_BASE
8834 && (regs_ever_live[PIC_OFFSET_TABLE_REGNUM]
8835 || current_function_uses_pic_offset_table))
8836 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
8839 /* Save registers so the exception handler can modify them. */
8840 if (current_function_calls_eh_return)
8842 unsigned int i;
8844 for (i = 0; ; i++)
8846 reg = EH_RETURN_DATA_REGNO (i);
8847 if (reg == INVALID_REGNUM)
8848 break;
8849 save_reg_mask |= 1 << reg;
8853 return save_reg_mask;
8856 /* Compute a bit mask of which registers need to be
8857 saved on the stack for the current function. */
8859 static unsigned long
8860 arm_compute_save_reg_mask (void)
8862 unsigned int save_reg_mask = 0;
8863 unsigned long func_type = arm_current_func_type ();
8865 if (IS_NAKED (func_type))
8866 /* This should never really happen. */
8867 return 0;
8869 /* If we are creating a stack frame, then we must save the frame pointer,
8870 IP (which will hold the old stack pointer), LR and the PC. */
8871 if (frame_pointer_needed)
8872 save_reg_mask |=
8873 (1 << ARM_HARD_FRAME_POINTER_REGNUM)
8874 | (1 << IP_REGNUM)
8875 | (1 << LR_REGNUM)
8876 | (1 << PC_REGNUM);
8878 /* Volatile functions do not return, so there
8879 is no need to save any other registers. */
8880 if (IS_VOLATILE (func_type))
8881 return save_reg_mask;
8883 save_reg_mask |= arm_compute_save_reg0_reg12_mask ();
8885 /* Decide if we need to save the link register.
8886 Interrupt routines have their own banked link register,
8887 so they never need to save it.
8888 Otherwise if we do not use the link register we do not need to save
8889 it. If we are pushing other registers onto the stack however, we
8890 can save an instruction in the epilogue by pushing the link register
8891 now and then popping it back into the PC. This incurs extra memory
8892 accesses though, so we only do it when optimizing for size, and only
8893 if we know that we will not need a fancy return sequence. */
8894 if (regs_ever_live [LR_REGNUM]
8895 || (save_reg_mask
8896 && optimize_size
8897 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
8898 && !current_function_calls_eh_return))
8899 save_reg_mask |= 1 << LR_REGNUM;
8901 if (cfun->machine->lr_save_eliminated)
8902 save_reg_mask &= ~ (1 << LR_REGNUM);
8904 if (TARGET_REALLY_IWMMXT
8905 && ((bit_count (save_reg_mask)
8906 + ARM_NUM_INTS (current_function_pretend_args_size)) % 2) != 0)
8908 unsigned int reg;
8910 /* The total number of registers that are going to be pushed
8911 onto the stack is odd. We need to ensure that the stack
8912 is 64-bit aligned before we start to save iWMMXt registers,
8913 and also before we start to create locals. (A local variable
8914 might be a double or long long which we will load/store using
8915 an iWMMXt instruction). Therefore we need to push another
8916 ARM register, so that the stack will be 64-bit aligned. We
8917 try to avoid using the arg registers (r0 -r3) as they might be
8918 used to pass values in a tail call. */
8919 for (reg = 4; reg <= 12; reg++)
8920 if ((save_reg_mask & (1 << reg)) == 0)
8921 break;
8923 if (reg <= 12)
8924 save_reg_mask |= (1 << reg);
8925 else
8927 cfun->machine->sibcall_blocked = 1;
8928 save_reg_mask |= (1 << 3);
8932 return save_reg_mask;
8936 /* Compute a bit mask of which registers need to be
8937 saved on the stack for the current function. */
8938 static unsigned long
8939 thumb_compute_save_reg_mask (void)
8941 unsigned long mask;
8942 unsigned reg;
8944 mask = 0;
8945 for (reg = 0; reg < 12; reg ++)
8946 if (regs_ever_live[reg] && !call_used_regs[reg])
8947 mask |= 1 << reg;
8949 if (flag_pic && !TARGET_SINGLE_PIC_BASE)
8950 mask |= (1 << PIC_OFFSET_TABLE_REGNUM);
8952 if (TARGET_SINGLE_PIC_BASE)
8953 mask &= ~(1 << arm_pic_register);
8955 /* See if we might need r11 for calls to _interwork_r11_call_via_rN(). */
8956 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
8957 mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
8959 /* LR will also be pushed if any lo regs are pushed. */
8960 if (mask & 0xff || thumb_force_lr_save ())
8961 mask |= (1 << LR_REGNUM);
8963 /* Make sure we have a low work register if we need one.
8964 We will need one if we are going to push a high register,
8965 but we are not currently intending to push a low register. */
8966 if ((mask & 0xff) == 0
8967 && ((mask & 0x0f00) || TARGET_BACKTRACE))
8969 /* Use thumb_find_work_register to choose which register
8970 we will use. If the register is live then we will
8971 have to push it. Use LAST_LO_REGNUM as our fallback
8972 choice for the register to select. */
8973 reg = thumb_find_work_register (1 << LAST_LO_REGNUM);
8975 if (! call_used_regs[reg])
8976 mask |= 1 << reg;
8979 return mask;
8983 /* Return the number of bytes required to save VFP registers. */
8984 static int
8985 arm_get_vfp_saved_size (void)
8987 unsigned int regno;
8988 int count;
8989 int saved;
8991 saved = 0;
8992 /* Space for saved VFP registers. */
8993 if (TARGET_HARD_FLOAT && TARGET_VFP)
8995 count = 0;
8996 for (regno = FIRST_VFP_REGNUM;
8997 regno < LAST_VFP_REGNUM;
8998 regno += 2)
9000 if ((!regs_ever_live[regno] || call_used_regs[regno])
9001 && (!regs_ever_live[regno + 1] || call_used_regs[regno + 1]))
9003 if (count > 0)
9005 /* Workaround ARM10 VFPr1 bug. */
9006 if (count == 2 && !arm_arch6)
9007 count++;
9008 saved += count * 8 + 4;
9010 count = 0;
9012 else
9013 count++;
9015 if (count > 0)
9017 if (count == 2 && !arm_arch6)
9018 count++;
9019 saved += count * 8 + 4;
9022 return saved;
9026 /* Generate a function exit sequence. If REALLY_RETURN is false, then do
9027 everything bar the final return instruction. */
9028 const char *
9029 output_return_instruction (rtx operand, int really_return, int reverse)
9031 char conditional[10];
9032 char instr[100];
9033 unsigned reg;
9034 unsigned long live_regs_mask;
9035 unsigned long func_type;
9036 arm_stack_offsets *offsets;
9038 func_type = arm_current_func_type ();
9040 if (IS_NAKED (func_type))
9041 return "";
9043 if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)
9045 /* If this function was declared non-returning, and we have
9046 found a tail call, then we have to trust that the called
9047 function won't return. */
9048 if (really_return)
9050 rtx ops[2];
9052 /* Otherwise, trap an attempted return by aborting. */
9053 ops[0] = operand;
9054 ops[1] = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)"
9055 : "abort");
9056 assemble_external_libcall (ops[1]);
9057 output_asm_insn (reverse ? "bl%D0\t%a1" : "bl%d0\t%a1", ops);
9060 return "";
9063 gcc_assert (!current_function_calls_alloca || really_return);
9065 sprintf (conditional, "%%?%%%c0", reverse ? 'D' : 'd');
9067 return_used_this_function = 1;
9069 live_regs_mask = arm_compute_save_reg_mask ();
9071 if (live_regs_mask)
9073 const char * return_reg;
9075 /* If we do not have any special requirements for function exit
9076 (e.g. interworking, or ISR) then we can load the return address
9077 directly into the PC. Otherwise we must load it into LR. */
9078 if (really_return
9079 && ! TARGET_INTERWORK)
9080 return_reg = reg_names[PC_REGNUM];
9081 else
9082 return_reg = reg_names[LR_REGNUM];
9084 if ((live_regs_mask & (1 << IP_REGNUM)) == (1 << IP_REGNUM))
9086 /* There are three possible reasons for the IP register
9087 being saved. 1) a stack frame was created, in which case
9088 IP contains the old stack pointer, or 2) an ISR routine
9089 corrupted it, or 3) it was saved to align the stack on
9090 iWMMXt. In case 1, restore IP into SP, otherwise just
9091 restore IP. */
9092 if (frame_pointer_needed)
9094 live_regs_mask &= ~ (1 << IP_REGNUM);
9095 live_regs_mask |= (1 << SP_REGNUM);
9097 else
9098 gcc_assert (IS_INTERRUPT (func_type) || TARGET_REALLY_IWMMXT);
9101 /* On some ARM architectures it is faster to use LDR rather than
9102 LDM to load a single register. On other architectures, the
9103 cost is the same. In 26 bit mode, or for exception handlers,
9104 we have to use LDM to load the PC so that the CPSR is also
9105 restored. */
9106 for (reg = 0; reg <= LAST_ARM_REGNUM; reg++)
9107 if (live_regs_mask == (1U << reg))
9108 break;
9110 if (reg <= LAST_ARM_REGNUM
9111 && (reg != LR_REGNUM
9112 || ! really_return
9113 || ! IS_INTERRUPT (func_type)))
9115 sprintf (instr, "ldr%s\t%%|%s, [%%|sp], #4", conditional,
9116 (reg == LR_REGNUM) ? return_reg : reg_names[reg]);
9118 else
9120 char *p;
9121 int first = 1;
9123 /* Generate the load multiple instruction to restore the
9124 registers. Note we can get here, even if
9125 frame_pointer_needed is true, but only if sp already
9126 points to the base of the saved core registers. */
9127 if (live_regs_mask & (1 << SP_REGNUM))
9129 unsigned HOST_WIDE_INT stack_adjust;
9131 offsets = arm_get_frame_offsets ();
9132 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
9133 gcc_assert (stack_adjust == 0 || stack_adjust == 4);
9135 if (stack_adjust && arm_arch5)
9136 sprintf (instr, "ldm%sib\t%%|sp, {", conditional);
9137 else
9139 /* If we can't use ldmib (SA110 bug),
9140 then try to pop r3 instead. */
9141 if (stack_adjust)
9142 live_regs_mask |= 1 << 3;
9143 sprintf (instr, "ldm%sfd\t%%|sp, {", conditional);
9146 else
9147 sprintf (instr, "ldm%sfd\t%%|sp!, {", conditional);
9149 p = instr + strlen (instr);
9151 for (reg = 0; reg <= SP_REGNUM; reg++)
9152 if (live_regs_mask & (1 << reg))
9154 int l = strlen (reg_names[reg]);
9156 if (first)
9157 first = 0;
9158 else
9160 memcpy (p, ", ", 2);
9161 p += 2;
9164 memcpy (p, "%|", 2);
9165 memcpy (p + 2, reg_names[reg], l);
9166 p += l + 2;
9169 if (live_regs_mask & (1 << LR_REGNUM))
9171 sprintf (p, "%s%%|%s}", first ? "" : ", ", return_reg);
9172 /* If returning from an interrupt, restore the CPSR. */
9173 if (IS_INTERRUPT (func_type))
9174 strcat (p, "^");
9176 else
9177 strcpy (p, "}");
9180 output_asm_insn (instr, & operand);
9182 /* See if we need to generate an extra instruction to
9183 perform the actual function return. */
9184 if (really_return
9185 && func_type != ARM_FT_INTERWORKED
9186 && (live_regs_mask & (1 << LR_REGNUM)) != 0)
9188 /* The return has already been handled
9189 by loading the LR into the PC. */
9190 really_return = 0;
9194 if (really_return)
9196 switch ((int) ARM_FUNC_TYPE (func_type))
9198 case ARM_FT_ISR:
9199 case ARM_FT_FIQ:
9200 sprintf (instr, "sub%ss\t%%|pc, %%|lr, #4", conditional);
9201 break;
9203 case ARM_FT_INTERWORKED:
9204 sprintf (instr, "bx%s\t%%|lr", conditional);
9205 break;
9207 case ARM_FT_EXCEPTION:
9208 sprintf (instr, "mov%ss\t%%|pc, %%|lr", conditional);
9209 break;
9211 default:
9212 /* Use bx if it's available. */
9213 if (arm_arch5 || arm_arch4t)
9214 sprintf (instr, "bx%s\t%%|lr", conditional);
9215 else
9216 sprintf (instr, "mov%s\t%%|pc, %%|lr", conditional);
9217 break;
9220 output_asm_insn (instr, & operand);
9223 return "";
9226 /* Write the function name into the code section, directly preceding
9227 the function prologue.
9229 Code will be output similar to this:
9231 .ascii "arm_poke_function_name", 0
9232 .align
9234 .word 0xff000000 + (t1 - t0)
9235 arm_poke_function_name
9236 mov ip, sp
9237 stmfd sp!, {fp, ip, lr, pc}
9238 sub fp, ip, #4
9240 When performing a stack backtrace, code can inspect the value
9241 of 'pc' stored at 'fp' + 0. If the trace function then looks
9242 at location pc - 12 and the top 8 bits are set, then we know
9243 that there is a function name embedded immediately preceding this
9244 location and has length ((pc[-3]) & 0xff000000).
9246 We assume that pc is declared as a pointer to an unsigned long.
9248 It is of no benefit to output the function name if we are assembling
9249 a leaf function. These function types will not contain a stack
9250 backtrace structure, therefore it is not possible to determine the
9251 function name. */
9252 void
9253 arm_poke_function_name (FILE *stream, const char *name)
9255 unsigned long alignlength;
9256 unsigned long length;
9257 rtx x;
9259 length = strlen (name) + 1;
9260 alignlength = ROUND_UP_WORD (length);
9262 ASM_OUTPUT_ASCII (stream, name, length);
9263 ASM_OUTPUT_ALIGN (stream, 2);
9264 x = GEN_INT ((unsigned HOST_WIDE_INT) 0xff000000 + alignlength);
9265 assemble_aligned_integer (UNITS_PER_WORD, x);
9268 /* Place some comments into the assembler stream
9269 describing the current function. */
9270 static void
9271 arm_output_function_prologue (FILE *f, HOST_WIDE_INT frame_size)
9273 unsigned long func_type;
9275 if (!TARGET_ARM)
9277 thumb_output_function_prologue (f, frame_size);
9278 return;
9281 /* Sanity check. */
9282 gcc_assert (!arm_ccfsm_state && !arm_target_insn);
9284 func_type = arm_current_func_type ();
9286 switch ((int) ARM_FUNC_TYPE (func_type))
9288 default:
9289 case ARM_FT_NORMAL:
9290 break;
9291 case ARM_FT_INTERWORKED:
9292 asm_fprintf (f, "\t%@ Function supports interworking.\n");
9293 break;
9294 case ARM_FT_ISR:
9295 asm_fprintf (f, "\t%@ Interrupt Service Routine.\n");
9296 break;
9297 case ARM_FT_FIQ:
9298 asm_fprintf (f, "\t%@ Fast Interrupt Service Routine.\n");
9299 break;
9300 case ARM_FT_EXCEPTION:
9301 asm_fprintf (f, "\t%@ ARM Exception Handler.\n");
9302 break;
9305 if (IS_NAKED (func_type))
9306 asm_fprintf (f, "\t%@ Naked Function: prologue and epilogue provided by programmer.\n");
9308 if (IS_VOLATILE (func_type))
9309 asm_fprintf (f, "\t%@ Volatile: function does not return.\n");
9311 if (IS_NESTED (func_type))
9312 asm_fprintf (f, "\t%@ Nested: function declared inside another function.\n");
9314 asm_fprintf (f, "\t%@ args = %d, pretend = %d, frame = %wd\n",
9315 current_function_args_size,
9316 current_function_pretend_args_size, frame_size);
9318 asm_fprintf (f, "\t%@ frame_needed = %d, uses_anonymous_args = %d\n",
9319 frame_pointer_needed,
9320 cfun->machine->uses_anonymous_args);
9322 if (cfun->machine->lr_save_eliminated)
9323 asm_fprintf (f, "\t%@ link register save eliminated.\n");
9325 if (current_function_calls_eh_return)
9326 asm_fprintf (f, "\t@ Calls __builtin_eh_return.\n");
9328 #ifdef AOF_ASSEMBLER
9329 if (flag_pic)
9330 asm_fprintf (f, "\tmov\t%r, %r\n", IP_REGNUM, PIC_OFFSET_TABLE_REGNUM);
9331 #endif
9333 return_used_this_function = 0;
9336 const char *
9337 arm_output_epilogue (rtx sibling)
9339 int reg;
9340 unsigned long saved_regs_mask;
9341 unsigned long func_type;
9342 /* Floats_offset is the offset from the "virtual" frame. In an APCS
9343 frame that is $fp + 4 for a non-variadic function. */
9344 int floats_offset = 0;
9345 rtx operands[3];
9346 FILE * f = asm_out_file;
9347 unsigned int lrm_count = 0;
9348 int really_return = (sibling == NULL);
9349 int start_reg;
9350 arm_stack_offsets *offsets;
9352 /* If we have already generated the return instruction
9353 then it is futile to generate anything else. */
9354 if (use_return_insn (FALSE, sibling) && return_used_this_function)
9355 return "";
9357 func_type = arm_current_func_type ();
9359 if (IS_NAKED (func_type))
9360 /* Naked functions don't have epilogues. */
9361 return "";
9363 if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)
9365 rtx op;
9367 /* A volatile function should never return. Call abort. */
9368 op = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)" : "abort");
9369 assemble_external_libcall (op);
9370 output_asm_insn ("bl\t%a0", &op);
9372 return "";
9375 /* If we are throwing an exception, then we really must be doing a
9376 return, so we can't tail-call. */
9377 gcc_assert (!current_function_calls_eh_return || really_return);
9379 offsets = arm_get_frame_offsets ();
9380 saved_regs_mask = arm_compute_save_reg_mask ();
9382 if (TARGET_IWMMXT)
9383 lrm_count = bit_count (saved_regs_mask);
9385 floats_offset = offsets->saved_args;
9386 /* Compute how far away the floats will be. */
9387 for (reg = 0; reg <= LAST_ARM_REGNUM; reg++)
9388 if (saved_regs_mask & (1 << reg))
9389 floats_offset += 4;
9391 if (frame_pointer_needed)
9393 /* This variable is for the Virtual Frame Pointer, not VFP regs. */
9394 int vfp_offset = offsets->frame;
9396 if (arm_fpu_arch == FPUTYPE_FPA_EMU2)
9398 for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
9399 if (regs_ever_live[reg] && !call_used_regs[reg])
9401 floats_offset += 12;
9402 asm_fprintf (f, "\tldfe\t%r, [%r, #-%d]\n",
9403 reg, FP_REGNUM, floats_offset - vfp_offset);
9406 else
9408 start_reg = LAST_FPA_REGNUM;
9410 for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
9412 if (regs_ever_live[reg] && !call_used_regs[reg])
9414 floats_offset += 12;
9416 /* We can't unstack more than four registers at once. */
9417 if (start_reg - reg == 3)
9419 asm_fprintf (f, "\tlfm\t%r, 4, [%r, #-%d]\n",
9420 reg, FP_REGNUM, floats_offset - vfp_offset);
9421 start_reg = reg - 1;
9424 else
9426 if (reg != start_reg)
9427 asm_fprintf (f, "\tlfm\t%r, %d, [%r, #-%d]\n",
9428 reg + 1, start_reg - reg,
9429 FP_REGNUM, floats_offset - vfp_offset);
9430 start_reg = reg - 1;
9434 /* Just in case the last register checked also needs unstacking. */
9435 if (reg != start_reg)
9436 asm_fprintf (f, "\tlfm\t%r, %d, [%r, #-%d]\n",
9437 reg + 1, start_reg - reg,
9438 FP_REGNUM, floats_offset - vfp_offset);
9441 if (TARGET_HARD_FLOAT && TARGET_VFP)
9443 int saved_size;
9445 /* The fldmx insn does not have base+offset addressing modes,
9446 so we use IP to hold the address. */
9447 saved_size = arm_get_vfp_saved_size ();
9449 if (saved_size > 0)
9451 floats_offset += saved_size;
9452 asm_fprintf (f, "\tsub\t%r, %r, #%d\n", IP_REGNUM,
9453 FP_REGNUM, floats_offset - vfp_offset);
9455 start_reg = FIRST_VFP_REGNUM;
9456 for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
9458 if ((!regs_ever_live[reg] || call_used_regs[reg])
9459 && (!regs_ever_live[reg + 1] || call_used_regs[reg + 1]))
9461 if (start_reg != reg)
9462 arm_output_fldmx (f, IP_REGNUM,
9463 (start_reg - FIRST_VFP_REGNUM) / 2,
9464 (reg - start_reg) / 2);
9465 start_reg = reg + 2;
9468 if (start_reg != reg)
9469 arm_output_fldmx (f, IP_REGNUM,
9470 (start_reg - FIRST_VFP_REGNUM) / 2,
9471 (reg - start_reg) / 2);
9474 if (TARGET_IWMMXT)
9476 /* The frame pointer is guaranteed to be non-double-word aligned.
9477 This is because it is set to (old_stack_pointer - 4) and the
9478 old_stack_pointer was double word aligned. Thus the offset to
9479 the iWMMXt registers to be loaded must also be non-double-word
9480 sized, so that the resultant address *is* double-word aligned.
9481 We can ignore floats_offset since that was already included in
9482 the live_regs_mask. */
9483 lrm_count += (lrm_count % 2 ? 2 : 1);
9485 for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
9486 if (regs_ever_live[reg] && !call_used_regs[reg])
9488 asm_fprintf (f, "\twldrd\t%r, [%r, #-%d]\n",
9489 reg, FP_REGNUM, lrm_count * 4);
9490 lrm_count += 2;
9494 /* saved_regs_mask should contain the IP, which at the time of stack
9495 frame generation actually contains the old stack pointer. So a
9496 quick way to unwind the stack is just pop the IP register directly
9497 into the stack pointer. */
9498 gcc_assert (saved_regs_mask & (1 << IP_REGNUM));
9499 saved_regs_mask &= ~ (1 << IP_REGNUM);
9500 saved_regs_mask |= (1 << SP_REGNUM);
9502 /* There are two registers left in saved_regs_mask - LR and PC. We
9503 only need to restore the LR register (the return address), but to
9504 save time we can load it directly into the PC, unless we need a
9505 special function exit sequence, or we are not really returning. */
9506 if (really_return
9507 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
9508 && !current_function_calls_eh_return)
9509 /* Delete the LR from the register mask, so that the LR on
9510 the stack is loaded into the PC in the register mask. */
9511 saved_regs_mask &= ~ (1 << LR_REGNUM);
9512 else
9513 saved_regs_mask &= ~ (1 << PC_REGNUM);
9515 /* We must use SP as the base register, because SP is one of the
9516 registers being restored. If an interrupt or page fault
9517 happens in the ldm instruction, the SP might or might not
9518 have been restored. That would be bad, as then SP will no
9519 longer indicate the safe area of stack, and we can get stack
9520 corruption. Using SP as the base register means that it will
9521 be reset correctly to the original value, should an interrupt
9522 occur. If the stack pointer already points at the right
9523 place, then omit the subtraction. */
9524 if (offsets->outgoing_args != (1 + (int) bit_count (saved_regs_mask))
9525 || current_function_calls_alloca)
9526 asm_fprintf (f, "\tsub\t%r, %r, #%d\n", SP_REGNUM, FP_REGNUM,
9527 4 * bit_count (saved_regs_mask));
9528 print_multi_reg (f, "ldmfd\t%r", SP_REGNUM, saved_regs_mask);
9530 if (IS_INTERRUPT (func_type))
9531 /* Interrupt handlers will have pushed the
9532 IP onto the stack, so restore it now. */
9533 print_multi_reg (f, "ldmfd\t%r!", SP_REGNUM, 1 << IP_REGNUM);
9535 else
9537 /* Restore stack pointer if necessary. */
9538 if (offsets->outgoing_args != offsets->saved_regs)
9540 operands[0] = operands[1] = stack_pointer_rtx;
9541 operands[2] = GEN_INT (offsets->outgoing_args - offsets->saved_regs);
9542 output_add_immediate (operands);
9545 if (arm_fpu_arch == FPUTYPE_FPA_EMU2)
9547 for (reg = FIRST_FPA_REGNUM; reg <= LAST_FPA_REGNUM; reg++)
9548 if (regs_ever_live[reg] && !call_used_regs[reg])
9549 asm_fprintf (f, "\tldfe\t%r, [%r], #12\n",
9550 reg, SP_REGNUM);
9552 else
9554 start_reg = FIRST_FPA_REGNUM;
9556 for (reg = FIRST_FPA_REGNUM; reg <= LAST_FPA_REGNUM; reg++)
9558 if (regs_ever_live[reg] && !call_used_regs[reg])
9560 if (reg - start_reg == 3)
9562 asm_fprintf (f, "\tlfmfd\t%r, 4, [%r]!\n",
9563 start_reg, SP_REGNUM);
9564 start_reg = reg + 1;
9567 else
9569 if (reg != start_reg)
9570 asm_fprintf (f, "\tlfmfd\t%r, %d, [%r]!\n",
9571 start_reg, reg - start_reg,
9572 SP_REGNUM);
9574 start_reg = reg + 1;
9578 /* Just in case the last register checked also needs unstacking. */
9579 if (reg != start_reg)
9580 asm_fprintf (f, "\tlfmfd\t%r, %d, [%r]!\n",
9581 start_reg, reg - start_reg, SP_REGNUM);
9584 if (TARGET_HARD_FLOAT && TARGET_VFP)
9586 start_reg = FIRST_VFP_REGNUM;
9587 for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
9589 if ((!regs_ever_live[reg] || call_used_regs[reg])
9590 && (!regs_ever_live[reg + 1] || call_used_regs[reg + 1]))
9592 if (start_reg != reg)
9593 arm_output_fldmx (f, SP_REGNUM,
9594 (start_reg - FIRST_VFP_REGNUM) / 2,
9595 (reg - start_reg) / 2);
9596 start_reg = reg + 2;
9599 if (start_reg != reg)
9600 arm_output_fldmx (f, SP_REGNUM,
9601 (start_reg - FIRST_VFP_REGNUM) / 2,
9602 (reg - start_reg) / 2);
9604 if (TARGET_IWMMXT)
9605 for (reg = FIRST_IWMMXT_REGNUM; reg <= LAST_IWMMXT_REGNUM; reg++)
9606 if (regs_ever_live[reg] && !call_used_regs[reg])
9607 asm_fprintf (f, "\twldrd\t%r, [%r], #8\n", reg, SP_REGNUM);
9609 /* If we can, restore the LR into the PC. */
9610 if (ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
9611 && really_return
9612 && current_function_pretend_args_size == 0
9613 && saved_regs_mask & (1 << LR_REGNUM)
9614 && !current_function_calls_eh_return)
9616 saved_regs_mask &= ~ (1 << LR_REGNUM);
9617 saved_regs_mask |= (1 << PC_REGNUM);
9620 /* Load the registers off the stack. If we only have one register
9621 to load use the LDR instruction - it is faster. */
9622 if (saved_regs_mask == (1 << LR_REGNUM))
9624 asm_fprintf (f, "\tldr\t%r, [%r], #4\n", LR_REGNUM, SP_REGNUM);
9626 else if (saved_regs_mask)
9628 if (saved_regs_mask & (1 << SP_REGNUM))
9629 /* Note - write back to the stack register is not enabled
9630 (i.e. "ldmfd sp!..."). We know that the stack pointer is
9631 in the list of registers and if we add writeback the
9632 instruction becomes UNPREDICTABLE. */
9633 print_multi_reg (f, "ldmfd\t%r", SP_REGNUM, saved_regs_mask);
9634 else
9635 print_multi_reg (f, "ldmfd\t%r!", SP_REGNUM, saved_regs_mask);
9638 if (current_function_pretend_args_size)
9640 /* Unwind the pre-pushed regs. */
9641 operands[0] = operands[1] = stack_pointer_rtx;
9642 operands[2] = GEN_INT (current_function_pretend_args_size);
9643 output_add_immediate (operands);
9647 /* We may have already restored PC directly from the stack. */
9648 if (!really_return || saved_regs_mask & (1 << PC_REGNUM))
9649 return "";
9651 /* Stack adjustment for exception handler. */
9652 if (current_function_calls_eh_return)
9653 asm_fprintf (f, "\tadd\t%r, %r, %r\n", SP_REGNUM, SP_REGNUM,
9654 ARM_EH_STACKADJ_REGNUM);
9656 /* Generate the return instruction. */
9657 switch ((int) ARM_FUNC_TYPE (func_type))
9659 case ARM_FT_ISR:
9660 case ARM_FT_FIQ:
9661 asm_fprintf (f, "\tsubs\t%r, %r, #4\n", PC_REGNUM, LR_REGNUM);
9662 break;
9664 case ARM_FT_EXCEPTION:
9665 asm_fprintf (f, "\tmovs\t%r, %r\n", PC_REGNUM, LR_REGNUM);
9666 break;
9668 case ARM_FT_INTERWORKED:
9669 asm_fprintf (f, "\tbx\t%r\n", LR_REGNUM);
9670 break;
9672 default:
9673 if (arm_arch5 || arm_arch4t)
9674 asm_fprintf (f, "\tbx\t%r\n", LR_REGNUM);
9675 else
9676 asm_fprintf (f, "\tmov\t%r, %r\n", PC_REGNUM, LR_REGNUM);
9677 break;
9680 return "";
9683 static void
9684 arm_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
9685 HOST_WIDE_INT frame_size ATTRIBUTE_UNUSED)
9687 arm_stack_offsets *offsets;
9689 if (TARGET_THUMB)
9691 int regno;
9693 /* Emit any call-via-reg trampolines that are needed for v4t support
9694 of call_reg and call_value_reg type insns. */
9695 for (regno = 0; regno < LR_REGNUM; regno++)
9697 rtx label = cfun->machine->call_via[regno];
9699 if (label != NULL)
9701 function_section (current_function_decl);
9702 targetm.asm_out.internal_label (asm_out_file, "L",
9703 CODE_LABEL_NUMBER (label));
9704 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
9708 /* ??? Probably not safe to set this here, since it assumes that a
9709 function will be emitted as assembly immediately after we generate
9710 RTL for it. This does not happen for inline functions. */
9711 return_used_this_function = 0;
9713 else
9715 /* We need to take into account any stack-frame rounding. */
9716 offsets = arm_get_frame_offsets ();
9718 gcc_assert (!use_return_insn (FALSE, NULL)
9719 || !return_used_this_function
9720 || offsets->saved_regs == offsets->outgoing_args
9721 || frame_pointer_needed);
9723 /* Reset the ARM-specific per-function variables. */
9724 after_arm_reorg = 0;
9728 /* Generate and emit an insn that we will recognize as a push_multi.
9729 Unfortunately, since this insn does not reflect very well the actual
9730 semantics of the operation, we need to annotate the insn for the benefit
9731 of DWARF2 frame unwind information. */
9732 static rtx
9733 emit_multi_reg_push (unsigned long mask)
9735 int num_regs = 0;
9736 int num_dwarf_regs;
9737 int i, j;
9738 rtx par;
9739 rtx dwarf;
9740 int dwarf_par_index;
9741 rtx tmp, reg;
9743 for (i = 0; i <= LAST_ARM_REGNUM; i++)
9744 if (mask & (1 << i))
9745 num_regs++;
9747 gcc_assert (num_regs && num_regs <= 16);
9749 /* We don't record the PC in the dwarf frame information. */
9750 num_dwarf_regs = num_regs;
9751 if (mask & (1 << PC_REGNUM))
9752 num_dwarf_regs--;
9754 /* For the body of the insn we are going to generate an UNSPEC in
9755 parallel with several USEs. This allows the insn to be recognized
9756 by the push_multi pattern in the arm.md file. The insn looks
9757 something like this:
9759 (parallel [
9760 (set (mem:BLK (pre_dec:BLK (reg:SI sp)))
9761 (unspec:BLK [(reg:SI r4)] UNSPEC_PUSH_MULT))
9762 (use (reg:SI 11 fp))
9763 (use (reg:SI 12 ip))
9764 (use (reg:SI 14 lr))
9765 (use (reg:SI 15 pc))
9768 For the frame note however, we try to be more explicit and actually
9769 show each register being stored into the stack frame, plus a (single)
9770 decrement of the stack pointer. We do it this way in order to be
9771 friendly to the stack unwinding code, which only wants to see a single
9772 stack decrement per instruction. The RTL we generate for the note looks
9773 something like this:
9775 (sequence [
9776 (set (reg:SI sp) (plus:SI (reg:SI sp) (const_int -20)))
9777 (set (mem:SI (reg:SI sp)) (reg:SI r4))
9778 (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI fp))
9779 (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI ip))
9780 (set (mem:SI (plus:SI (reg:SI sp) (const_int 12))) (reg:SI lr))
9783 This sequence is used both by the code to support stack unwinding for
9784 exceptions handlers and the code to generate dwarf2 frame debugging. */
9786 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs));
9787 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_dwarf_regs + 1));
9788 dwarf_par_index = 1;
9790 for (i = 0; i <= LAST_ARM_REGNUM; i++)
9792 if (mask & (1 << i))
9794 reg = gen_rtx_REG (SImode, i);
9796 XVECEXP (par, 0, 0)
9797 = gen_rtx_SET (VOIDmode,
9798 gen_rtx_MEM (BLKmode,
9799 gen_rtx_PRE_DEC (BLKmode,
9800 stack_pointer_rtx)),
9801 gen_rtx_UNSPEC (BLKmode,
9802 gen_rtvec (1, reg),
9803 UNSPEC_PUSH_MULT));
9805 if (i != PC_REGNUM)
9807 tmp = gen_rtx_SET (VOIDmode,
9808 gen_rtx_MEM (SImode, stack_pointer_rtx),
9809 reg);
9810 RTX_FRAME_RELATED_P (tmp) = 1;
9811 XVECEXP (dwarf, 0, dwarf_par_index) = tmp;
9812 dwarf_par_index++;
9815 break;
9819 for (j = 1, i++; j < num_regs; i++)
9821 if (mask & (1 << i))
9823 reg = gen_rtx_REG (SImode, i);
9825 XVECEXP (par, 0, j) = gen_rtx_USE (VOIDmode, reg);
9827 if (i != PC_REGNUM)
9829 tmp = gen_rtx_SET (VOIDmode,
9830 gen_rtx_MEM (SImode,
9831 plus_constant (stack_pointer_rtx,
9832 4 * j)),
9833 reg);
9834 RTX_FRAME_RELATED_P (tmp) = 1;
9835 XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
9838 j++;
9842 par = emit_insn (par);
9844 tmp = gen_rtx_SET (SImode,
9845 stack_pointer_rtx,
9846 gen_rtx_PLUS (SImode,
9847 stack_pointer_rtx,
9848 GEN_INT (-4 * num_regs)));
9849 RTX_FRAME_RELATED_P (tmp) = 1;
9850 XVECEXP (dwarf, 0, 0) = tmp;
9852 REG_NOTES (par) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, dwarf,
9853 REG_NOTES (par));
9854 return par;
9857 /* Calculate the size of the return value that is passed in registers. */
9858 static int
9859 arm_size_return_regs (void)
9861 enum machine_mode mode;
9863 if (current_function_return_rtx != 0)
9864 mode = GET_MODE (current_function_return_rtx);
9865 else
9866 mode = DECL_MODE (DECL_RESULT (current_function_decl));
9868 return GET_MODE_SIZE (mode);
9871 static rtx
9872 emit_sfm (int base_reg, int count)
9874 rtx par;
9875 rtx dwarf;
9876 rtx tmp, reg;
9877 int i;
9879 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
9880 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1));
9882 reg = gen_rtx_REG (XFmode, base_reg++);
9884 XVECEXP (par, 0, 0)
9885 = gen_rtx_SET (VOIDmode,
9886 gen_rtx_MEM (BLKmode,
9887 gen_rtx_PRE_DEC (BLKmode, stack_pointer_rtx)),
9888 gen_rtx_UNSPEC (BLKmode,
9889 gen_rtvec (1, reg),
9890 UNSPEC_PUSH_MULT));
9891 tmp = gen_rtx_SET (VOIDmode,
9892 gen_rtx_MEM (XFmode, stack_pointer_rtx), reg);
9893 RTX_FRAME_RELATED_P (tmp) = 1;
9894 XVECEXP (dwarf, 0, 1) = tmp;
9896 for (i = 1; i < count; i++)
9898 reg = gen_rtx_REG (XFmode, base_reg++);
9899 XVECEXP (par, 0, i) = gen_rtx_USE (VOIDmode, reg);
9901 tmp = gen_rtx_SET (VOIDmode,
9902 gen_rtx_MEM (XFmode,
9903 plus_constant (stack_pointer_rtx,
9904 i * 12)),
9905 reg);
9906 RTX_FRAME_RELATED_P (tmp) = 1;
9907 XVECEXP (dwarf, 0, i + 1) = tmp;
9910 tmp = gen_rtx_SET (VOIDmode,
9911 stack_pointer_rtx,
9912 gen_rtx_PLUS (SImode,
9913 stack_pointer_rtx,
9914 GEN_INT (-12 * count)));
9915 RTX_FRAME_RELATED_P (tmp) = 1;
9916 XVECEXP (dwarf, 0, 0) = tmp;
9918 par = emit_insn (par);
9919 REG_NOTES (par) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, dwarf,
9920 REG_NOTES (par));
9921 return par;
9925 /* Return true if the current function needs to save/restore LR. */
9927 static bool
9928 thumb_force_lr_save (void)
9930 return !cfun->machine->lr_save_eliminated
9931 && (!leaf_function_p ()
9932 || thumb_far_jump_used_p ()
9933 || regs_ever_live [LR_REGNUM]);
9937 /* Compute the distance from register FROM to register TO.
9938 These can be the arg pointer (26), the soft frame pointer (25),
9939 the stack pointer (13) or the hard frame pointer (11).
9940 In thumb mode r7 is used as the soft frame pointer, if needed.
9941 Typical stack layout looks like this:
9943 old stack pointer -> | |
9944 ----
9945 | | \
9946 | | saved arguments for
9947 | | vararg functions
9948 | | /
9950 hard FP & arg pointer -> | | \
9951 | | stack
9952 | | frame
9953 | | /
9955 | | \
9956 | | call saved
9957 | | registers
9958 soft frame pointer -> | | /
9960 | | \
9961 | | local
9962 | | variables
9963 | | /
9965 | | \
9966 | | outgoing
9967 | | arguments
9968 current stack pointer -> | | /
9971 For a given function some or all of these stack components
9972 may not be needed, giving rise to the possibility of
9973 eliminating some of the registers.
9975 The values returned by this function must reflect the behavior
9976 of arm_expand_prologue() and arm_compute_save_reg_mask().
9978 The sign of the number returned reflects the direction of stack
9979 growth, so the values are positive for all eliminations except
9980 from the soft frame pointer to the hard frame pointer.
9982 SFP may point just inside the local variables block to ensure correct
9983 alignment. */
9986 /* Calculate stack offsets. These are used to calculate register elimination
9987 offsets and in prologue/epilogue code. */
9989 static arm_stack_offsets *
9990 arm_get_frame_offsets (void)
9992 struct arm_stack_offsets *offsets;
9993 unsigned long func_type;
9994 int leaf;
9995 int saved;
9996 HOST_WIDE_INT frame_size;
9998 offsets = &cfun->machine->stack_offsets;
10000 /* We need to know if we are a leaf function. Unfortunately, it
10001 is possible to be called after start_sequence has been called,
10002 which causes get_insns to return the insns for the sequence,
10003 not the function, which will cause leaf_function_p to return
10004 the incorrect result.
10006 to know about leaf functions once reload has completed, and the
10007 frame size cannot be changed after that time, so we can safely
10008 use the cached value. */
10010 if (reload_completed)
10011 return offsets;
10013 /* Initially this is the size of the local variables. It will translated
10014 into an offset once we have determined the size of preceding data. */
10015 frame_size = ROUND_UP_WORD (get_frame_size ());
10017 leaf = leaf_function_p ();
10019 /* Space for variadic functions. */
10020 offsets->saved_args = current_function_pretend_args_size;
10022 offsets->frame = offsets->saved_args + (frame_pointer_needed ? 4 : 0);
10024 if (TARGET_ARM)
10026 unsigned int regno;
10028 saved = bit_count (arm_compute_save_reg_mask ()) * 4;
10030 /* We know that SP will be doubleword aligned on entry, and we must
10031 preserve that condition at any subroutine call. We also require the
10032 soft frame pointer to be doubleword aligned. */
10034 if (TARGET_REALLY_IWMMXT)
10036 /* Check for the call-saved iWMMXt registers. */
10037 for (regno = FIRST_IWMMXT_REGNUM;
10038 regno <= LAST_IWMMXT_REGNUM;
10039 regno++)
10040 if (regs_ever_live [regno] && ! call_used_regs [regno])
10041 saved += 8;
10044 func_type = arm_current_func_type ();
10045 if (! IS_VOLATILE (func_type))
10047 /* Space for saved FPA registers. */
10048 for (regno = FIRST_FPA_REGNUM; regno <= LAST_FPA_REGNUM; regno++)
10049 if (regs_ever_live[regno] && ! call_used_regs[regno])
10050 saved += 12;
10052 /* Space for saved VFP registers. */
10053 if (TARGET_HARD_FLOAT && TARGET_VFP)
10054 saved += arm_get_vfp_saved_size ();
10057 else /* TARGET_THUMB */
10059 saved = bit_count (thumb_compute_save_reg_mask ()) * 4;
10060 if (TARGET_BACKTRACE)
10061 saved += 16;
10064 /* Saved registers include the stack frame. */
10065 offsets->saved_regs = offsets->saved_args + saved;
10066 offsets->soft_frame = offsets->saved_regs + CALLER_INTERWORKING_SLOT_SIZE;
10067 /* A leaf function does not need any stack alignment if it has nothing
10068 on the stack. */
10069 if (leaf && frame_size == 0)
10071 offsets->outgoing_args = offsets->soft_frame;
10072 return offsets;
10075 /* Ensure SFP has the correct alignment. */
10076 if (ARM_DOUBLEWORD_ALIGN
10077 && (offsets->soft_frame & 7))
10078 offsets->soft_frame += 4;
10080 offsets->outgoing_args = offsets->soft_frame + frame_size
10081 + current_function_outgoing_args_size;
10083 if (ARM_DOUBLEWORD_ALIGN)
10085 /* Ensure SP remains doubleword aligned. */
10086 if (offsets->outgoing_args & 7)
10087 offsets->outgoing_args += 4;
10088 gcc_assert (!(offsets->outgoing_args & 7));
10091 return offsets;
10095 /* Calculate the relative offsets for the different stack pointers. Positive
10096 offsets are in the direction of stack growth. */
10098 HOST_WIDE_INT
10099 arm_compute_initial_elimination_offset (unsigned int from, unsigned int to)
10101 arm_stack_offsets *offsets;
10103 offsets = arm_get_frame_offsets ();
10105 /* OK, now we have enough information to compute the distances.
10106 There must be an entry in these switch tables for each pair
10107 of registers in ELIMINABLE_REGS, even if some of the entries
10108 seem to be redundant or useless. */
10109 switch (from)
10111 case ARG_POINTER_REGNUM:
10112 switch (to)
10114 case THUMB_HARD_FRAME_POINTER_REGNUM:
10115 return 0;
10117 case FRAME_POINTER_REGNUM:
10118 /* This is the reverse of the soft frame pointer
10119 to hard frame pointer elimination below. */
10120 return offsets->soft_frame - offsets->saved_args;
10122 case ARM_HARD_FRAME_POINTER_REGNUM:
10123 /* If there is no stack frame then the hard
10124 frame pointer and the arg pointer coincide. */
10125 if (offsets->frame == offsets->saved_regs)
10126 return 0;
10127 /* FIXME: Not sure about this. Maybe we should always return 0 ? */
10128 return (frame_pointer_needed
10129 && cfun->static_chain_decl != NULL
10130 && ! cfun->machine->uses_anonymous_args) ? 4 : 0;
10132 case STACK_POINTER_REGNUM:
10133 /* If nothing has been pushed on the stack at all
10134 then this will return -4. This *is* correct! */
10135 return offsets->outgoing_args - (offsets->saved_args + 4);
10137 default:
10138 gcc_unreachable ();
10140 gcc_unreachable ();
10142 case FRAME_POINTER_REGNUM:
10143 switch (to)
10145 case THUMB_HARD_FRAME_POINTER_REGNUM:
10146 return 0;
10148 case ARM_HARD_FRAME_POINTER_REGNUM:
10149 /* The hard frame pointer points to the top entry in the
10150 stack frame. The soft frame pointer to the bottom entry
10151 in the stack frame. If there is no stack frame at all,
10152 then they are identical. */
10154 return offsets->frame - offsets->soft_frame;
10156 case STACK_POINTER_REGNUM:
10157 return offsets->outgoing_args - offsets->soft_frame;
10159 default:
10160 gcc_unreachable ();
10162 gcc_unreachable ();
10164 default:
10165 /* You cannot eliminate from the stack pointer.
10166 In theory you could eliminate from the hard frame
10167 pointer to the stack pointer, but this will never
10168 happen, since if a stack frame is not needed the
10169 hard frame pointer will never be used. */
10170 gcc_unreachable ();
10175 /* Generate the prologue instructions for entry into an ARM function. */
10176 void
10177 arm_expand_prologue (void)
10179 int reg;
10180 rtx amount;
10181 rtx insn;
10182 rtx ip_rtx;
10183 unsigned long live_regs_mask;
10184 unsigned long func_type;
10185 int fp_offset = 0;
10186 int saved_pretend_args = 0;
10187 int saved_regs = 0;
10188 unsigned HOST_WIDE_INT args_to_push;
10189 arm_stack_offsets *offsets;
10191 func_type = arm_current_func_type ();
10193 /* Naked functions don't have prologues. */
10194 if (IS_NAKED (func_type))
10195 return;
10197 /* Make a copy of c_f_p_a_s as we may need to modify it locally. */
10198 args_to_push = current_function_pretend_args_size;
10200 /* Compute which register we will have to save onto the stack. */
10201 live_regs_mask = arm_compute_save_reg_mask ();
10203 ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
10205 if (frame_pointer_needed)
10207 if (IS_INTERRUPT (func_type))
10209 /* Interrupt functions must not corrupt any registers.
10210 Creating a frame pointer however, corrupts the IP
10211 register, so we must push it first. */
10212 insn = emit_multi_reg_push (1 << IP_REGNUM);
10214 /* Do not set RTX_FRAME_RELATED_P on this insn.
10215 The dwarf stack unwinding code only wants to see one
10216 stack decrement per function, and this is not it. If
10217 this instruction is labeled as being part of the frame
10218 creation sequence then dwarf2out_frame_debug_expr will
10219 die when it encounters the assignment of IP to FP
10220 later on, since the use of SP here establishes SP as
10221 the CFA register and not IP.
10223 Anyway this instruction is not really part of the stack
10224 frame creation although it is part of the prologue. */
10226 else if (IS_NESTED (func_type))
10228 /* The Static chain register is the same as the IP register
10229 used as a scratch register during stack frame creation.
10230 To get around this need to find somewhere to store IP
10231 whilst the frame is being created. We try the following
10232 places in order:
10234 1. The last argument register.
10235 2. A slot on the stack above the frame. (This only
10236 works if the function is not a varargs function).
10237 3. Register r3, after pushing the argument registers
10238 onto the stack.
10240 Note - we only need to tell the dwarf2 backend about the SP
10241 adjustment in the second variant; the static chain register
10242 doesn't need to be unwound, as it doesn't contain a value
10243 inherited from the caller. */
10245 if (regs_ever_live[3] == 0)
10247 insn = gen_rtx_REG (SImode, 3);
10248 insn = gen_rtx_SET (SImode, insn, ip_rtx);
10249 insn = emit_insn (insn);
10251 else if (args_to_push == 0)
10253 rtx dwarf;
10254 insn = gen_rtx_PRE_DEC (SImode, stack_pointer_rtx);
10255 insn = gen_rtx_MEM (SImode, insn);
10256 insn = gen_rtx_SET (VOIDmode, insn, ip_rtx);
10257 insn = emit_insn (insn);
10259 fp_offset = 4;
10261 /* Just tell the dwarf backend that we adjusted SP. */
10262 dwarf = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10263 gen_rtx_PLUS (SImode, stack_pointer_rtx,
10264 GEN_INT (-fp_offset)));
10265 RTX_FRAME_RELATED_P (insn) = 1;
10266 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
10267 dwarf, REG_NOTES (insn));
10269 else
10271 /* Store the args on the stack. */
10272 if (cfun->machine->uses_anonymous_args)
10273 insn = emit_multi_reg_push
10274 ((0xf0 >> (args_to_push / 4)) & 0xf);
10275 else
10276 insn = emit_insn
10277 (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
10278 GEN_INT (- args_to_push)));
10280 RTX_FRAME_RELATED_P (insn) = 1;
10282 saved_pretend_args = 1;
10283 fp_offset = args_to_push;
10284 args_to_push = 0;
10286 /* Now reuse r3 to preserve IP. */
10287 insn = gen_rtx_REG (SImode, 3);
10288 insn = gen_rtx_SET (SImode, insn, ip_rtx);
10289 (void) emit_insn (insn);
10293 if (fp_offset)
10295 insn = gen_rtx_PLUS (SImode, stack_pointer_rtx, GEN_INT (fp_offset));
10296 insn = gen_rtx_SET (SImode, ip_rtx, insn);
10298 else
10299 insn = gen_movsi (ip_rtx, stack_pointer_rtx);
10301 insn = emit_insn (insn);
10302 RTX_FRAME_RELATED_P (insn) = 1;
10305 if (args_to_push)
10307 /* Push the argument registers, or reserve space for them. */
10308 if (cfun->machine->uses_anonymous_args)
10309 insn = emit_multi_reg_push
10310 ((0xf0 >> (args_to_push / 4)) & 0xf);
10311 else
10312 insn = emit_insn
10313 (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
10314 GEN_INT (- args_to_push)));
10315 RTX_FRAME_RELATED_P (insn) = 1;
10318 /* If this is an interrupt service routine, and the link register
10319 is going to be pushed, and we are not creating a stack frame,
10320 (which would involve an extra push of IP and a pop in the epilogue)
10321 subtracting four from LR now will mean that the function return
10322 can be done with a single instruction. */
10323 if ((func_type == ARM_FT_ISR || func_type == ARM_FT_FIQ)
10324 && (live_regs_mask & (1 << LR_REGNUM)) != 0
10325 && ! frame_pointer_needed)
10326 emit_insn (gen_rtx_SET (SImode,
10327 gen_rtx_REG (SImode, LR_REGNUM),
10328 gen_rtx_PLUS (SImode,
10329 gen_rtx_REG (SImode, LR_REGNUM),
10330 GEN_INT (-4))));
10332 if (live_regs_mask)
10334 insn = emit_multi_reg_push (live_regs_mask);
10335 saved_regs += bit_count (live_regs_mask) * 4;
10336 RTX_FRAME_RELATED_P (insn) = 1;
10339 if (TARGET_IWMMXT)
10340 for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
10341 if (regs_ever_live[reg] && ! call_used_regs [reg])
10343 insn = gen_rtx_PRE_DEC (V2SImode, stack_pointer_rtx);
10344 insn = gen_rtx_MEM (V2SImode, insn);
10345 insn = emit_insn (gen_rtx_SET (VOIDmode, insn,
10346 gen_rtx_REG (V2SImode, reg)));
10347 RTX_FRAME_RELATED_P (insn) = 1;
10348 saved_regs += 8;
10351 if (! IS_VOLATILE (func_type))
10353 int start_reg;
10355 /* Save any floating point call-saved registers used by this
10356 function. */
10357 if (arm_fpu_arch == FPUTYPE_FPA_EMU2)
10359 for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
10360 if (regs_ever_live[reg] && !call_used_regs[reg])
10362 insn = gen_rtx_PRE_DEC (XFmode, stack_pointer_rtx);
10363 insn = gen_rtx_MEM (XFmode, insn);
10364 insn = emit_insn (gen_rtx_SET (VOIDmode, insn,
10365 gen_rtx_REG (XFmode, reg)));
10366 RTX_FRAME_RELATED_P (insn) = 1;
10367 saved_regs += 12;
10370 else
10372 start_reg = LAST_FPA_REGNUM;
10374 for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
10376 if (regs_ever_live[reg] && !call_used_regs[reg])
10378 if (start_reg - reg == 3)
10380 insn = emit_sfm (reg, 4);
10381 RTX_FRAME_RELATED_P (insn) = 1;
10382 saved_regs += 48;
10383 start_reg = reg - 1;
10386 else
10388 if (start_reg != reg)
10390 insn = emit_sfm (reg + 1, start_reg - reg);
10391 RTX_FRAME_RELATED_P (insn) = 1;
10392 saved_regs += (start_reg - reg) * 12;
10394 start_reg = reg - 1;
10398 if (start_reg != reg)
10400 insn = emit_sfm (reg + 1, start_reg - reg);
10401 saved_regs += (start_reg - reg) * 12;
10402 RTX_FRAME_RELATED_P (insn) = 1;
10405 if (TARGET_HARD_FLOAT && TARGET_VFP)
10407 start_reg = FIRST_VFP_REGNUM;
10409 for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
10411 if ((!regs_ever_live[reg] || call_used_regs[reg])
10412 && (!regs_ever_live[reg + 1] || call_used_regs[reg + 1]))
10414 if (start_reg != reg)
10415 saved_regs += vfp_emit_fstmx (start_reg,
10416 (reg - start_reg) / 2);
10417 start_reg = reg + 2;
10420 if (start_reg != reg)
10421 saved_regs += vfp_emit_fstmx (start_reg,
10422 (reg - start_reg) / 2);
10426 if (frame_pointer_needed)
10428 /* Create the new frame pointer. */
10429 insn = GEN_INT (-(4 + args_to_push + fp_offset));
10430 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx, ip_rtx, insn));
10431 RTX_FRAME_RELATED_P (insn) = 1;
10433 if (IS_NESTED (func_type))
10435 /* Recover the static chain register. */
10436 if (regs_ever_live [3] == 0
10437 || saved_pretend_args)
10438 insn = gen_rtx_REG (SImode, 3);
10439 else /* if (current_function_pretend_args_size == 0) */
10441 insn = gen_rtx_PLUS (SImode, hard_frame_pointer_rtx,
10442 GEN_INT (4));
10443 insn = gen_rtx_MEM (SImode, insn);
10446 emit_insn (gen_rtx_SET (SImode, ip_rtx, insn));
10447 /* Add a USE to stop propagate_one_insn() from barfing. */
10448 emit_insn (gen_prologue_use (ip_rtx));
10452 offsets = arm_get_frame_offsets ();
10453 if (offsets->outgoing_args != offsets->saved_args + saved_regs)
10455 /* This add can produce multiple insns for a large constant, so we
10456 need to get tricky. */
10457 rtx last = get_last_insn ();
10459 amount = GEN_INT (offsets->saved_args + saved_regs
10460 - offsets->outgoing_args);
10462 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
10463 amount));
10466 last = last ? NEXT_INSN (last) : get_insns ();
10467 RTX_FRAME_RELATED_P (last) = 1;
10469 while (last != insn);
10471 /* If the frame pointer is needed, emit a special barrier that
10472 will prevent the scheduler from moving stores to the frame
10473 before the stack adjustment. */
10474 if (frame_pointer_needed)
10475 insn = emit_insn (gen_stack_tie (stack_pointer_rtx,
10476 hard_frame_pointer_rtx));
10480 if (flag_pic)
10481 arm_load_pic_register (INVALID_REGNUM);
10483 /* If we are profiling, make sure no instructions are scheduled before
10484 the call to mcount. Similarly if the user has requested no
10485 scheduling in the prolog. */
10486 if (current_function_profile || !TARGET_SCHED_PROLOG)
10487 emit_insn (gen_blockage ());
10489 /* If the link register is being kept alive, with the return address in it,
10490 then make sure that it does not get reused by the ce2 pass. */
10491 if ((live_regs_mask & (1 << LR_REGNUM)) == 0)
10493 emit_insn (gen_prologue_use (gen_rtx_REG (SImode, LR_REGNUM)));
10494 cfun->machine->lr_save_eliminated = 1;
10498 /* If CODE is 'd', then the X is a condition operand and the instruction
10499 should only be executed if the condition is true.
10500 if CODE is 'D', then the X is a condition operand and the instruction
10501 should only be executed if the condition is false: however, if the mode
10502 of the comparison is CCFPEmode, then always execute the instruction -- we
10503 do this because in these circumstances !GE does not necessarily imply LT;
10504 in these cases the instruction pattern will take care to make sure that
10505 an instruction containing %d will follow, thereby undoing the effects of
10506 doing this instruction unconditionally.
10507 If CODE is 'N' then X is a floating point operand that must be negated
10508 before output.
10509 If CODE is 'B' then output a bitwise inverted value of X (a const int).
10510 If X is a REG and CODE is `M', output a ldm/stm style multi-reg. */
10511 void
10512 arm_print_operand (FILE *stream, rtx x, int code)
10514 switch (code)
10516 case '@':
10517 fputs (ASM_COMMENT_START, stream);
10518 return;
10520 case '_':
10521 fputs (user_label_prefix, stream);
10522 return;
10524 case '|':
10525 fputs (REGISTER_PREFIX, stream);
10526 return;
10528 case '?':
10529 if (arm_ccfsm_state == 3 || arm_ccfsm_state == 4)
10531 if (TARGET_THUMB)
10533 output_operand_lossage ("predicated Thumb instruction");
10534 break;
10536 if (current_insn_predicate != NULL)
10538 output_operand_lossage
10539 ("predicated instruction in conditional sequence");
10540 break;
10543 fputs (arm_condition_codes[arm_current_cc], stream);
10545 else if (current_insn_predicate)
10547 enum arm_cond_code code;
10549 if (TARGET_THUMB)
10551 output_operand_lossage ("predicated Thumb instruction");
10552 break;
10555 code = get_arm_condition_code (current_insn_predicate);
10556 fputs (arm_condition_codes[code], stream);
10558 return;
10560 case 'N':
10562 REAL_VALUE_TYPE r;
10563 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
10564 r = REAL_VALUE_NEGATE (r);
10565 fprintf (stream, "%s", fp_const_from_val (&r));
10567 return;
10569 case 'B':
10570 if (GET_CODE (x) == CONST_INT)
10572 HOST_WIDE_INT val;
10573 val = ARM_SIGN_EXTEND (~INTVAL (x));
10574 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, val);
10576 else
10578 putc ('~', stream);
10579 output_addr_const (stream, x);
10581 return;
10583 case 'i':
10584 fprintf (stream, "%s", arithmetic_instr (x, 1));
10585 return;
10587 /* Truncate Cirrus shift counts. */
10588 case 's':
10589 if (GET_CODE (x) == CONST_INT)
10591 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 0x3f);
10592 return;
10594 arm_print_operand (stream, x, 0);
10595 return;
10597 case 'I':
10598 fprintf (stream, "%s", arithmetic_instr (x, 0));
10599 return;
10601 case 'S':
10603 HOST_WIDE_INT val;
10604 const char * shift = shift_op (x, &val);
10606 if (shift)
10608 fprintf (stream, ", %s ", shift_op (x, &val));
10609 if (val == -1)
10610 arm_print_operand (stream, XEXP (x, 1), 0);
10611 else
10612 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
10615 return;
10617 /* An explanation of the 'Q', 'R' and 'H' register operands:
10619 In a pair of registers containing a DI or DF value the 'Q'
10620 operand returns the register number of the register containing
10621 the least significant part of the value. The 'R' operand returns
10622 the register number of the register containing the most
10623 significant part of the value.
10625 The 'H' operand returns the higher of the two register numbers.
10626 On a run where WORDS_BIG_ENDIAN is true the 'H' operand is the
10627 same as the 'Q' operand, since the most significant part of the
10628 value is held in the lower number register. The reverse is true
10629 on systems where WORDS_BIG_ENDIAN is false.
10631 The purpose of these operands is to distinguish between cases
10632 where the endian-ness of the values is important (for example
10633 when they are added together), and cases where the endian-ness
10634 is irrelevant, but the order of register operations is important.
10635 For example when loading a value from memory into a register
10636 pair, the endian-ness does not matter. Provided that the value
10637 from the lower memory address is put into the lower numbered
10638 register, and the value from the higher address is put into the
10639 higher numbered register, the load will work regardless of whether
10640 the value being loaded is big-wordian or little-wordian. The
10641 order of the two register loads can matter however, if the address
10642 of the memory location is actually held in one of the registers
10643 being overwritten by the load. */
10644 case 'Q':
10645 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
10647 output_operand_lossage ("invalid operand for code '%c'", code);
10648 return;
10651 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 1 : 0));
10652 return;
10654 case 'R':
10655 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
10657 output_operand_lossage ("invalid operand for code '%c'", code);
10658 return;
10661 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 0 : 1));
10662 return;
10664 case 'H':
10665 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
10667 output_operand_lossage ("invalid operand for code '%c'", code);
10668 return;
10671 asm_fprintf (stream, "%r", REGNO (x) + 1);
10672 return;
10674 case 'm':
10675 asm_fprintf (stream, "%r",
10676 GET_CODE (XEXP (x, 0)) == REG
10677 ? REGNO (XEXP (x, 0)) : REGNO (XEXP (XEXP (x, 0), 0)));
10678 return;
10680 case 'M':
10681 asm_fprintf (stream, "{%r-%r}",
10682 REGNO (x),
10683 REGNO (x) + ARM_NUM_REGS (GET_MODE (x)) - 1);
10684 return;
10686 case 'd':
10687 /* CONST_TRUE_RTX means always -- that's the default. */
10688 if (x == const_true_rtx)
10689 return;
10691 if (!COMPARISON_P (x))
10693 output_operand_lossage ("invalid operand for code '%c'", code);
10694 return;
10697 fputs (arm_condition_codes[get_arm_condition_code (x)],
10698 stream);
10699 return;
10701 case 'D':
10702 /* CONST_TRUE_RTX means not always -- i.e. never. We shouldn't ever
10703 want to do that. */
10704 if (x == const_true_rtx)
10706 output_operand_lossage ("instruction never exectued");
10707 return;
10709 if (!COMPARISON_P (x))
10711 output_operand_lossage ("invalid operand for code '%c'", code);
10712 return;
10715 fputs (arm_condition_codes[ARM_INVERSE_CONDITION_CODE
10716 (get_arm_condition_code (x))],
10717 stream);
10718 return;
10720 /* Cirrus registers can be accessed in a variety of ways:
10721 single floating point (f)
10722 double floating point (d)
10723 32bit integer (fx)
10724 64bit integer (dx). */
10725 case 'W': /* Cirrus register in F mode. */
10726 case 'X': /* Cirrus register in D mode. */
10727 case 'Y': /* Cirrus register in FX mode. */
10728 case 'Z': /* Cirrus register in DX mode. */
10729 gcc_assert (GET_CODE (x) == REG
10730 && REGNO_REG_CLASS (REGNO (x)) == CIRRUS_REGS);
10732 fprintf (stream, "mv%s%s",
10733 code == 'W' ? "f"
10734 : code == 'X' ? "d"
10735 : code == 'Y' ? "fx" : "dx", reg_names[REGNO (x)] + 2);
10737 return;
10739 /* Print cirrus register in the mode specified by the register's mode. */
10740 case 'V':
10742 int mode = GET_MODE (x);
10744 if (GET_CODE (x) != REG || REGNO_REG_CLASS (REGNO (x)) != CIRRUS_REGS)
10746 output_operand_lossage ("invalid operand for code '%c'", code);
10747 return;
10750 fprintf (stream, "mv%s%s",
10751 mode == DFmode ? "d"
10752 : mode == SImode ? "fx"
10753 : mode == DImode ? "dx"
10754 : "f", reg_names[REGNO (x)] + 2);
10756 return;
10759 case 'U':
10760 if (GET_CODE (x) != REG
10761 || REGNO (x) < FIRST_IWMMXT_GR_REGNUM
10762 || REGNO (x) > LAST_IWMMXT_GR_REGNUM)
10763 /* Bad value for wCG register number. */
10765 output_operand_lossage ("invalid operand for code '%c'", code);
10766 return;
10769 else
10770 fprintf (stream, "%d", REGNO (x) - FIRST_IWMMXT_GR_REGNUM);
10771 return;
10773 /* Print an iWMMXt control register name. */
10774 case 'w':
10775 if (GET_CODE (x) != CONST_INT
10776 || INTVAL (x) < 0
10777 || INTVAL (x) >= 16)
10778 /* Bad value for wC register number. */
10780 output_operand_lossage ("invalid operand for code '%c'", code);
10781 return;
10784 else
10786 static const char * wc_reg_names [16] =
10788 "wCID", "wCon", "wCSSF", "wCASF",
10789 "wC4", "wC5", "wC6", "wC7",
10790 "wCGR0", "wCGR1", "wCGR2", "wCGR3",
10791 "wC12", "wC13", "wC14", "wC15"
10794 fprintf (stream, wc_reg_names [INTVAL (x)]);
10796 return;
10798 /* Print a VFP double precision register name. */
10799 case 'P':
10801 int mode = GET_MODE (x);
10802 int num;
10804 if (mode != DImode && mode != DFmode)
10806 output_operand_lossage ("invalid operand for code '%c'", code);
10807 return;
10810 if (GET_CODE (x) != REG
10811 || !IS_VFP_REGNUM (REGNO (x)))
10813 output_operand_lossage ("invalid operand for code '%c'", code);
10814 return;
10817 num = REGNO(x) - FIRST_VFP_REGNUM;
10818 if (num & 1)
10820 output_operand_lossage ("invalid operand for code '%c'", code);
10821 return;
10824 fprintf (stream, "d%d", num >> 1);
10826 return;
10828 default:
10829 if (x == 0)
10831 output_operand_lossage ("missing operand");
10832 return;
10835 switch (GET_CODE (x))
10837 case REG:
10838 asm_fprintf (stream, "%r", REGNO (x));
10839 break;
10841 case MEM:
10842 output_memory_reference_mode = GET_MODE (x);
10843 output_address (XEXP (x, 0));
10844 break;
10846 case CONST_DOUBLE:
10847 fprintf (stream, "#%s", fp_immediate_constant (x));
10848 break;
10850 default:
10851 gcc_assert (GET_CODE (x) != NEG);
10852 fputc ('#', stream);
10853 output_addr_const (stream, x);
10854 break;
10859 #ifndef AOF_ASSEMBLER
10860 /* Target hook for assembling integer objects. The ARM version needs to
10861 handle word-sized values specially. */
10862 static bool
10863 arm_assemble_integer (rtx x, unsigned int size, int aligned_p)
10865 if (size == UNITS_PER_WORD && aligned_p)
10867 fputs ("\t.word\t", asm_out_file);
10868 output_addr_const (asm_out_file, x);
10870 /* Mark symbols as position independent. We only do this in the
10871 .text segment, not in the .data segment. */
10872 if (NEED_GOT_RELOC && flag_pic && making_const_table &&
10873 (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF))
10875 if (GET_CODE (x) == SYMBOL_REF
10876 && (CONSTANT_POOL_ADDRESS_P (x)
10877 || SYMBOL_REF_LOCAL_P (x)))
10878 fputs ("(GOTOFF)", asm_out_file);
10879 else if (GET_CODE (x) == LABEL_REF)
10880 fputs ("(GOTOFF)", asm_out_file);
10881 else
10882 fputs ("(GOT)", asm_out_file);
10884 fputc ('\n', asm_out_file);
10885 return true;
10888 if (arm_vector_mode_supported_p (GET_MODE (x)))
10890 int i, units;
10892 gcc_assert (GET_CODE (x) == CONST_VECTOR);
10894 units = CONST_VECTOR_NUNITS (x);
10896 switch (GET_MODE (x))
10898 case V2SImode: size = 4; break;
10899 case V4HImode: size = 2; break;
10900 case V8QImode: size = 1; break;
10901 default:
10902 gcc_unreachable ();
10905 for (i = 0; i < units; i++)
10907 rtx elt;
10909 elt = CONST_VECTOR_ELT (x, i);
10910 assemble_integer
10911 (elt, size, i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT, 1);
10914 return true;
10917 return default_assemble_integer (x, size, aligned_p);
10921 /* Add a function to the list of static constructors. */
10923 static void
10924 arm_elf_asm_constructor (rtx symbol, int priority ATTRIBUTE_UNUSED)
10926 if (!TARGET_AAPCS_BASED)
10928 default_named_section_asm_out_constructor (symbol, priority);
10929 return;
10932 /* Put these in the .init_array section, using a special relocation. */
10933 ctors_section ();
10934 assemble_align (POINTER_SIZE);
10935 fputs ("\t.word\t", asm_out_file);
10936 output_addr_const (asm_out_file, symbol);
10937 fputs ("(target1)\n", asm_out_file);
10939 #endif
10941 /* A finite state machine takes care of noticing whether or not instructions
10942 can be conditionally executed, and thus decrease execution time and code
10943 size by deleting branch instructions. The fsm is controlled by
10944 final_prescan_insn, and controls the actions of ASM_OUTPUT_OPCODE. */
10946 /* The state of the fsm controlling condition codes are:
10947 0: normal, do nothing special
10948 1: make ASM_OUTPUT_OPCODE not output this instruction
10949 2: make ASM_OUTPUT_OPCODE not output this instruction
10950 3: make instructions conditional
10951 4: make instructions conditional
10953 State transitions (state->state by whom under condition):
10954 0 -> 1 final_prescan_insn if the `target' is a label
10955 0 -> 2 final_prescan_insn if the `target' is an unconditional branch
10956 1 -> 3 ASM_OUTPUT_OPCODE after not having output the conditional branch
10957 2 -> 4 ASM_OUTPUT_OPCODE after not having output the conditional branch
10958 3 -> 0 (*targetm.asm_out.internal_label) if the `target' label is reached
10959 (the target label has CODE_LABEL_NUMBER equal to arm_target_label).
10960 4 -> 0 final_prescan_insn if the `target' unconditional branch is reached
10961 (the target insn is arm_target_insn).
10963 If the jump clobbers the conditions then we use states 2 and 4.
10965 A similar thing can be done with conditional return insns.
10967 XXX In case the `target' is an unconditional branch, this conditionalising
10968 of the instructions always reduces code size, but not always execution
10969 time. But then, I want to reduce the code size to somewhere near what
10970 /bin/cc produces. */
10972 /* Returns the index of the ARM condition code string in
10973 `arm_condition_codes'. COMPARISON should be an rtx like
10974 `(eq (...) (...))'. */
10975 static enum arm_cond_code
10976 get_arm_condition_code (rtx comparison)
10978 enum machine_mode mode = GET_MODE (XEXP (comparison, 0));
10979 int code;
10980 enum rtx_code comp_code = GET_CODE (comparison);
10982 if (GET_MODE_CLASS (mode) != MODE_CC)
10983 mode = SELECT_CC_MODE (comp_code, XEXP (comparison, 0),
10984 XEXP (comparison, 1));
10986 switch (mode)
10988 case CC_DNEmode: code = ARM_NE; goto dominance;
10989 case CC_DEQmode: code = ARM_EQ; goto dominance;
10990 case CC_DGEmode: code = ARM_GE; goto dominance;
10991 case CC_DGTmode: code = ARM_GT; goto dominance;
10992 case CC_DLEmode: code = ARM_LE; goto dominance;
10993 case CC_DLTmode: code = ARM_LT; goto dominance;
10994 case CC_DGEUmode: code = ARM_CS; goto dominance;
10995 case CC_DGTUmode: code = ARM_HI; goto dominance;
10996 case CC_DLEUmode: code = ARM_LS; goto dominance;
10997 case CC_DLTUmode: code = ARM_CC;
10999 dominance:
11000 gcc_assert (comp_code == EQ || comp_code == NE);
11002 if (comp_code == EQ)
11003 return ARM_INVERSE_CONDITION_CODE (code);
11004 return code;
11006 case CC_NOOVmode:
11007 switch (comp_code)
11009 case NE: return ARM_NE;
11010 case EQ: return ARM_EQ;
11011 case GE: return ARM_PL;
11012 case LT: return ARM_MI;
11013 default: gcc_unreachable ();
11016 case CC_Zmode:
11017 switch (comp_code)
11019 case NE: return ARM_NE;
11020 case EQ: return ARM_EQ;
11021 default: gcc_unreachable ();
11024 case CC_Nmode:
11025 switch (comp_code)
11027 case NE: return ARM_MI;
11028 case EQ: return ARM_PL;
11029 default: gcc_unreachable ();
11032 case CCFPEmode:
11033 case CCFPmode:
11034 /* These encodings assume that AC=1 in the FPA system control
11035 byte. This allows us to handle all cases except UNEQ and
11036 LTGT. */
11037 switch (comp_code)
11039 case GE: return ARM_GE;
11040 case GT: return ARM_GT;
11041 case LE: return ARM_LS;
11042 case LT: return ARM_MI;
11043 case NE: return ARM_NE;
11044 case EQ: return ARM_EQ;
11045 case ORDERED: return ARM_VC;
11046 case UNORDERED: return ARM_VS;
11047 case UNLT: return ARM_LT;
11048 case UNLE: return ARM_LE;
11049 case UNGT: return ARM_HI;
11050 case UNGE: return ARM_PL;
11051 /* UNEQ and LTGT do not have a representation. */
11052 case UNEQ: /* Fall through. */
11053 case LTGT: /* Fall through. */
11054 default: gcc_unreachable ();
11057 case CC_SWPmode:
11058 switch (comp_code)
11060 case NE: return ARM_NE;
11061 case EQ: return ARM_EQ;
11062 case GE: return ARM_LE;
11063 case GT: return ARM_LT;
11064 case LE: return ARM_GE;
11065 case LT: return ARM_GT;
11066 case GEU: return ARM_LS;
11067 case GTU: return ARM_CC;
11068 case LEU: return ARM_CS;
11069 case LTU: return ARM_HI;
11070 default: gcc_unreachable ();
11073 case CC_Cmode:
11074 switch (comp_code)
11076 case LTU: return ARM_CS;
11077 case GEU: return ARM_CC;
11078 default: gcc_unreachable ();
11081 case CCmode:
11082 switch (comp_code)
11084 case NE: return ARM_NE;
11085 case EQ: return ARM_EQ;
11086 case GE: return ARM_GE;
11087 case GT: return ARM_GT;
11088 case LE: return ARM_LE;
11089 case LT: return ARM_LT;
11090 case GEU: return ARM_CS;
11091 case GTU: return ARM_HI;
11092 case LEU: return ARM_LS;
11093 case LTU: return ARM_CC;
11094 default: gcc_unreachable ();
11097 default: gcc_unreachable ();
11101 void
11102 arm_final_prescan_insn (rtx insn)
11104 /* BODY will hold the body of INSN. */
11105 rtx body = PATTERN (insn);
11107 /* This will be 1 if trying to repeat the trick, and things need to be
11108 reversed if it appears to fail. */
11109 int reverse = 0;
11111 /* JUMP_CLOBBERS will be one implies that the conditions if a branch is
11112 taken are clobbered, even if the rtl suggests otherwise. It also
11113 means that we have to grub around within the jump expression to find
11114 out what the conditions are when the jump isn't taken. */
11115 int jump_clobbers = 0;
11117 /* If we start with a return insn, we only succeed if we find another one. */
11118 int seeking_return = 0;
11120 /* START_INSN will hold the insn from where we start looking. This is the
11121 first insn after the following code_label if REVERSE is true. */
11122 rtx start_insn = insn;
11124 /* If in state 4, check if the target branch is reached, in order to
11125 change back to state 0. */
11126 if (arm_ccfsm_state == 4)
11128 if (insn == arm_target_insn)
11130 arm_target_insn = NULL;
11131 arm_ccfsm_state = 0;
11133 return;
11136 /* If in state 3, it is possible to repeat the trick, if this insn is an
11137 unconditional branch to a label, and immediately following this branch
11138 is the previous target label which is only used once, and the label this
11139 branch jumps to is not too far off. */
11140 if (arm_ccfsm_state == 3)
11142 if (simplejump_p (insn))
11144 start_insn = next_nonnote_insn (start_insn);
11145 if (GET_CODE (start_insn) == BARRIER)
11147 /* XXX Isn't this always a barrier? */
11148 start_insn = next_nonnote_insn (start_insn);
11150 if (GET_CODE (start_insn) == CODE_LABEL
11151 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
11152 && LABEL_NUSES (start_insn) == 1)
11153 reverse = TRUE;
11154 else
11155 return;
11157 else if (GET_CODE (body) == RETURN)
11159 start_insn = next_nonnote_insn (start_insn);
11160 if (GET_CODE (start_insn) == BARRIER)
11161 start_insn = next_nonnote_insn (start_insn);
11162 if (GET_CODE (start_insn) == CODE_LABEL
11163 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
11164 && LABEL_NUSES (start_insn) == 1)
11166 reverse = TRUE;
11167 seeking_return = 1;
11169 else
11170 return;
11172 else
11173 return;
11176 gcc_assert (!arm_ccfsm_state || reverse);
11177 if (GET_CODE (insn) != JUMP_INSN)
11178 return;
11180 /* This jump might be paralleled with a clobber of the condition codes
11181 the jump should always come first */
11182 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
11183 body = XVECEXP (body, 0, 0);
11185 if (reverse
11186 || (GET_CODE (body) == SET && GET_CODE (SET_DEST (body)) == PC
11187 && GET_CODE (SET_SRC (body)) == IF_THEN_ELSE))
11189 int insns_skipped;
11190 int fail = FALSE, succeed = FALSE;
11191 /* Flag which part of the IF_THEN_ELSE is the LABEL_REF. */
11192 int then_not_else = TRUE;
11193 rtx this_insn = start_insn, label = 0;
11195 /* If the jump cannot be done with one instruction, we cannot
11196 conditionally execute the instruction in the inverse case. */
11197 if (get_attr_conds (insn) == CONDS_JUMP_CLOB)
11199 jump_clobbers = 1;
11200 return;
11203 /* Register the insn jumped to. */
11204 if (reverse)
11206 if (!seeking_return)
11207 label = XEXP (SET_SRC (body), 0);
11209 else if (GET_CODE (XEXP (SET_SRC (body), 1)) == LABEL_REF)
11210 label = XEXP (XEXP (SET_SRC (body), 1), 0);
11211 else if (GET_CODE (XEXP (SET_SRC (body), 2)) == LABEL_REF)
11213 label = XEXP (XEXP (SET_SRC (body), 2), 0);
11214 then_not_else = FALSE;
11216 else if (GET_CODE (XEXP (SET_SRC (body), 1)) == RETURN)
11217 seeking_return = 1;
11218 else if (GET_CODE (XEXP (SET_SRC (body), 2)) == RETURN)
11220 seeking_return = 1;
11221 then_not_else = FALSE;
11223 else
11224 gcc_unreachable ();
11226 /* See how many insns this branch skips, and what kind of insns. If all
11227 insns are okay, and the label or unconditional branch to the same
11228 label is not too far away, succeed. */
11229 for (insns_skipped = 0;
11230 !fail && !succeed && insns_skipped++ < max_insns_skipped;)
11232 rtx scanbody;
11234 this_insn = next_nonnote_insn (this_insn);
11235 if (!this_insn)
11236 break;
11238 switch (GET_CODE (this_insn))
11240 case CODE_LABEL:
11241 /* Succeed if it is the target label, otherwise fail since
11242 control falls in from somewhere else. */
11243 if (this_insn == label)
11245 if (jump_clobbers)
11247 arm_ccfsm_state = 2;
11248 this_insn = next_nonnote_insn (this_insn);
11250 else
11251 arm_ccfsm_state = 1;
11252 succeed = TRUE;
11254 else
11255 fail = TRUE;
11256 break;
11258 case BARRIER:
11259 /* Succeed if the following insn is the target label.
11260 Otherwise fail.
11261 If return insns are used then the last insn in a function
11262 will be a barrier. */
11263 this_insn = next_nonnote_insn (this_insn);
11264 if (this_insn && this_insn == label)
11266 if (jump_clobbers)
11268 arm_ccfsm_state = 2;
11269 this_insn = next_nonnote_insn (this_insn);
11271 else
11272 arm_ccfsm_state = 1;
11273 succeed = TRUE;
11275 else
11276 fail = TRUE;
11277 break;
11279 case CALL_INSN:
11280 /* The AAPCS says that conditional calls should not be
11281 used since they make interworking inefficient (the
11282 linker can't transform BL<cond> into BLX). That's
11283 only a problem if the machine has BLX. */
11284 if (arm_arch5)
11286 fail = TRUE;
11287 break;
11290 /* Succeed if the following insn is the target label, or
11291 if the following two insns are a barrier and the
11292 target label. */
11293 this_insn = next_nonnote_insn (this_insn);
11294 if (this_insn && GET_CODE (this_insn) == BARRIER)
11295 this_insn = next_nonnote_insn (this_insn);
11297 if (this_insn && this_insn == label
11298 && insns_skipped < max_insns_skipped)
11300 if (jump_clobbers)
11302 arm_ccfsm_state = 2;
11303 this_insn = next_nonnote_insn (this_insn);
11305 else
11306 arm_ccfsm_state = 1;
11307 succeed = TRUE;
11309 else
11310 fail = TRUE;
11311 break;
11313 case JUMP_INSN:
11314 /* If this is an unconditional branch to the same label, succeed.
11315 If it is to another label, do nothing. If it is conditional,
11316 fail. */
11317 /* XXX Probably, the tests for SET and the PC are
11318 unnecessary. */
11320 scanbody = PATTERN (this_insn);
11321 if (GET_CODE (scanbody) == SET
11322 && GET_CODE (SET_DEST (scanbody)) == PC)
11324 if (GET_CODE (SET_SRC (scanbody)) == LABEL_REF
11325 && XEXP (SET_SRC (scanbody), 0) == label && !reverse)
11327 arm_ccfsm_state = 2;
11328 succeed = TRUE;
11330 else if (GET_CODE (SET_SRC (scanbody)) == IF_THEN_ELSE)
11331 fail = TRUE;
11333 /* Fail if a conditional return is undesirable (e.g. on a
11334 StrongARM), but still allow this if optimizing for size. */
11335 else if (GET_CODE (scanbody) == RETURN
11336 && !use_return_insn (TRUE, NULL)
11337 && !optimize_size)
11338 fail = TRUE;
11339 else if (GET_CODE (scanbody) == RETURN
11340 && seeking_return)
11342 arm_ccfsm_state = 2;
11343 succeed = TRUE;
11345 else if (GET_CODE (scanbody) == PARALLEL)
11347 switch (get_attr_conds (this_insn))
11349 case CONDS_NOCOND:
11350 break;
11351 default:
11352 fail = TRUE;
11353 break;
11356 else
11357 fail = TRUE; /* Unrecognized jump (e.g. epilogue). */
11359 break;
11361 case INSN:
11362 /* Instructions using or affecting the condition codes make it
11363 fail. */
11364 scanbody = PATTERN (this_insn);
11365 if (!(GET_CODE (scanbody) == SET
11366 || GET_CODE (scanbody) == PARALLEL)
11367 || get_attr_conds (this_insn) != CONDS_NOCOND)
11368 fail = TRUE;
11370 /* A conditional cirrus instruction must be followed by
11371 a non Cirrus instruction. However, since we
11372 conditionalize instructions in this function and by
11373 the time we get here we can't add instructions
11374 (nops), because shorten_branches() has already been
11375 called, we will disable conditionalizing Cirrus
11376 instructions to be safe. */
11377 if (GET_CODE (scanbody) != USE
11378 && GET_CODE (scanbody) != CLOBBER
11379 && get_attr_cirrus (this_insn) != CIRRUS_NOT)
11380 fail = TRUE;
11381 break;
11383 default:
11384 break;
11387 if (succeed)
11389 if ((!seeking_return) && (arm_ccfsm_state == 1 || reverse))
11390 arm_target_label = CODE_LABEL_NUMBER (label);
11391 else
11393 gcc_assert (seeking_return || arm_ccfsm_state == 2);
11395 while (this_insn && GET_CODE (PATTERN (this_insn)) == USE)
11397 this_insn = next_nonnote_insn (this_insn);
11398 gcc_assert (!this_insn
11399 || (GET_CODE (this_insn) != BARRIER
11400 && GET_CODE (this_insn) != CODE_LABEL));
11402 if (!this_insn)
11404 /* Oh, dear! we ran off the end.. give up. */
11405 recog (PATTERN (insn), insn, NULL);
11406 arm_ccfsm_state = 0;
11407 arm_target_insn = NULL;
11408 return;
11410 arm_target_insn = this_insn;
11412 if (jump_clobbers)
11414 gcc_assert (!reverse);
11415 arm_current_cc =
11416 get_arm_condition_code (XEXP (XEXP (XEXP (SET_SRC (body),
11417 0), 0), 1));
11418 if (GET_CODE (XEXP (XEXP (SET_SRC (body), 0), 0)) == AND)
11419 arm_current_cc = ARM_INVERSE_CONDITION_CODE (arm_current_cc);
11420 if (GET_CODE (XEXP (SET_SRC (body), 0)) == NE)
11421 arm_current_cc = ARM_INVERSE_CONDITION_CODE (arm_current_cc);
11423 else
11425 /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from
11426 what it was. */
11427 if (!reverse)
11428 arm_current_cc = get_arm_condition_code (XEXP (SET_SRC (body),
11429 0));
11432 if (reverse || then_not_else)
11433 arm_current_cc = ARM_INVERSE_CONDITION_CODE (arm_current_cc);
11436 /* Restore recog_data (getting the attributes of other insns can
11437 destroy this array, but final.c assumes that it remains intact
11438 across this call; since the insn has been recognized already we
11439 call recog direct). */
11440 recog (PATTERN (insn), insn, NULL);
11444 /* Returns true if REGNO is a valid register
11445 for holding a quantity of type MODE. */
11447 arm_hard_regno_mode_ok (unsigned int regno, enum machine_mode mode)
11449 if (GET_MODE_CLASS (mode) == MODE_CC)
11450 return regno == CC_REGNUM || regno == VFPCC_REGNUM;
11452 if (TARGET_THUMB)
11453 /* For the Thumb we only allow values bigger than SImode in
11454 registers 0 - 6, so that there is always a second low
11455 register available to hold the upper part of the value.
11456 We probably we ought to ensure that the register is the
11457 start of an even numbered register pair. */
11458 return (ARM_NUM_REGS (mode) < 2) || (regno < LAST_LO_REGNUM);
11460 if (IS_CIRRUS_REGNUM (regno))
11461 /* We have outlawed SI values in Cirrus registers because they
11462 reside in the lower 32 bits, but SF values reside in the
11463 upper 32 bits. This causes gcc all sorts of grief. We can't
11464 even split the registers into pairs because Cirrus SI values
11465 get sign extended to 64bits-- aldyh. */
11466 return (GET_MODE_CLASS (mode) == MODE_FLOAT) || (mode == DImode);
11468 if (IS_VFP_REGNUM (regno))
11470 if (mode == SFmode || mode == SImode)
11471 return TRUE;
11473 /* DFmode values are only valid in even register pairs. */
11474 if (mode == DFmode)
11475 return ((regno - FIRST_VFP_REGNUM) & 1) == 0;
11476 return FALSE;
11479 if (IS_IWMMXT_GR_REGNUM (regno))
11480 return mode == SImode;
11482 if (IS_IWMMXT_REGNUM (regno))
11483 return VALID_IWMMXT_REG_MODE (mode);
11485 /* We allow any value to be stored in the general registers.
11486 Restrict doubleword quantities to even register pairs so that we can
11487 use ldrd. */
11488 if (regno <= LAST_ARM_REGNUM)
11489 return !(TARGET_LDRD && GET_MODE_SIZE (mode) > 4 && (regno & 1) != 0);
11491 if ( regno == FRAME_POINTER_REGNUM
11492 || regno == ARG_POINTER_REGNUM)
11493 /* We only allow integers in the fake hard registers. */
11494 return GET_MODE_CLASS (mode) == MODE_INT;
11496 /* The only registers left are the FPA registers
11497 which we only allow to hold FP values. */
11498 return GET_MODE_CLASS (mode) == MODE_FLOAT
11499 && regno >= FIRST_FPA_REGNUM
11500 && regno <= LAST_FPA_REGNUM;
11504 arm_regno_class (int regno)
11506 if (TARGET_THUMB)
11508 if (regno == STACK_POINTER_REGNUM)
11509 return STACK_REG;
11510 if (regno == CC_REGNUM)
11511 return CC_REG;
11512 if (regno < 8)
11513 return LO_REGS;
11514 return HI_REGS;
11517 if ( regno <= LAST_ARM_REGNUM
11518 || regno == FRAME_POINTER_REGNUM
11519 || regno == ARG_POINTER_REGNUM)
11520 return GENERAL_REGS;
11522 if (regno == CC_REGNUM || regno == VFPCC_REGNUM)
11523 return NO_REGS;
11525 if (IS_CIRRUS_REGNUM (regno))
11526 return CIRRUS_REGS;
11528 if (IS_VFP_REGNUM (regno))
11529 return VFP_REGS;
11531 if (IS_IWMMXT_REGNUM (regno))
11532 return IWMMXT_REGS;
11534 if (IS_IWMMXT_GR_REGNUM (regno))
11535 return IWMMXT_GR_REGS;
11537 return FPA_REGS;
11540 /* Handle a special case when computing the offset
11541 of an argument from the frame pointer. */
11543 arm_debugger_arg_offset (int value, rtx addr)
11545 rtx insn;
11547 /* We are only interested if dbxout_parms() failed to compute the offset. */
11548 if (value != 0)
11549 return 0;
11551 /* We can only cope with the case where the address is held in a register. */
11552 if (GET_CODE (addr) != REG)
11553 return 0;
11555 /* If we are using the frame pointer to point at the argument, then
11556 an offset of 0 is correct. */
11557 if (REGNO (addr) == (unsigned) HARD_FRAME_POINTER_REGNUM)
11558 return 0;
11560 /* If we are using the stack pointer to point at the
11561 argument, then an offset of 0 is correct. */
11562 if ((TARGET_THUMB || !frame_pointer_needed)
11563 && REGNO (addr) == SP_REGNUM)
11564 return 0;
11566 /* Oh dear. The argument is pointed to by a register rather
11567 than being held in a register, or being stored at a known
11568 offset from the frame pointer. Since GDB only understands
11569 those two kinds of argument we must translate the address
11570 held in the register into an offset from the frame pointer.
11571 We do this by searching through the insns for the function
11572 looking to see where this register gets its value. If the
11573 register is initialized from the frame pointer plus an offset
11574 then we are in luck and we can continue, otherwise we give up.
11576 This code is exercised by producing debugging information
11577 for a function with arguments like this:
11579 double func (double a, double b, int c, double d) {return d;}
11581 Without this code the stab for parameter 'd' will be set to
11582 an offset of 0 from the frame pointer, rather than 8. */
11584 /* The if() statement says:
11586 If the insn is a normal instruction
11587 and if the insn is setting the value in a register
11588 and if the register being set is the register holding the address of the argument
11589 and if the address is computing by an addition
11590 that involves adding to a register
11591 which is the frame pointer
11592 a constant integer
11594 then... */
11596 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
11598 if ( GET_CODE (insn) == INSN
11599 && GET_CODE (PATTERN (insn)) == SET
11600 && REGNO (XEXP (PATTERN (insn), 0)) == REGNO (addr)
11601 && GET_CODE (XEXP (PATTERN (insn), 1)) == PLUS
11602 && GET_CODE (XEXP (XEXP (PATTERN (insn), 1), 0)) == REG
11603 && REGNO (XEXP (XEXP (PATTERN (insn), 1), 0)) == (unsigned) HARD_FRAME_POINTER_REGNUM
11604 && GET_CODE (XEXP (XEXP (PATTERN (insn), 1), 1)) == CONST_INT
11607 value = INTVAL (XEXP (XEXP (PATTERN (insn), 1), 1));
11609 break;
11613 if (value == 0)
11615 debug_rtx (addr);
11616 warning (0, "unable to compute real location of stacked parameter");
11617 value = 8; /* XXX magic hack */
11620 return value;
11623 #define def_mbuiltin(MASK, NAME, TYPE, CODE) \
11624 do \
11626 if ((MASK) & insn_flags) \
11627 lang_hooks.builtin_function ((NAME), (TYPE), (CODE), \
11628 BUILT_IN_MD, NULL, NULL_TREE); \
11630 while (0)
11632 struct builtin_description
11634 const unsigned int mask;
11635 const enum insn_code icode;
11636 const char * const name;
11637 const enum arm_builtins code;
11638 const enum rtx_code comparison;
11639 const unsigned int flag;
11642 static const struct builtin_description bdesc_2arg[] =
11644 #define IWMMXT_BUILTIN(code, string, builtin) \
11645 { FL_IWMMXT, CODE_FOR_##code, "__builtin_arm_" string, \
11646 ARM_BUILTIN_##builtin, 0, 0 },
11648 IWMMXT_BUILTIN (addv8qi3, "waddb", WADDB)
11649 IWMMXT_BUILTIN (addv4hi3, "waddh", WADDH)
11650 IWMMXT_BUILTIN (addv2si3, "waddw", WADDW)
11651 IWMMXT_BUILTIN (subv8qi3, "wsubb", WSUBB)
11652 IWMMXT_BUILTIN (subv4hi3, "wsubh", WSUBH)
11653 IWMMXT_BUILTIN (subv2si3, "wsubw", WSUBW)
11654 IWMMXT_BUILTIN (ssaddv8qi3, "waddbss", WADDSSB)
11655 IWMMXT_BUILTIN (ssaddv4hi3, "waddhss", WADDSSH)
11656 IWMMXT_BUILTIN (ssaddv2si3, "waddwss", WADDSSW)
11657 IWMMXT_BUILTIN (sssubv8qi3, "wsubbss", WSUBSSB)
11658 IWMMXT_BUILTIN (sssubv4hi3, "wsubhss", WSUBSSH)
11659 IWMMXT_BUILTIN (sssubv2si3, "wsubwss", WSUBSSW)
11660 IWMMXT_BUILTIN (usaddv8qi3, "waddbus", WADDUSB)
11661 IWMMXT_BUILTIN (usaddv4hi3, "waddhus", WADDUSH)
11662 IWMMXT_BUILTIN (usaddv2si3, "waddwus", WADDUSW)
11663 IWMMXT_BUILTIN (ussubv8qi3, "wsubbus", WSUBUSB)
11664 IWMMXT_BUILTIN (ussubv4hi3, "wsubhus", WSUBUSH)
11665 IWMMXT_BUILTIN (ussubv2si3, "wsubwus", WSUBUSW)
11666 IWMMXT_BUILTIN (mulv4hi3, "wmulul", WMULUL)
11667 IWMMXT_BUILTIN (smulv4hi3_highpart, "wmulsm", WMULSM)
11668 IWMMXT_BUILTIN (umulv4hi3_highpart, "wmulum", WMULUM)
11669 IWMMXT_BUILTIN (eqv8qi3, "wcmpeqb", WCMPEQB)
11670 IWMMXT_BUILTIN (eqv4hi3, "wcmpeqh", WCMPEQH)
11671 IWMMXT_BUILTIN (eqv2si3, "wcmpeqw", WCMPEQW)
11672 IWMMXT_BUILTIN (gtuv8qi3, "wcmpgtub", WCMPGTUB)
11673 IWMMXT_BUILTIN (gtuv4hi3, "wcmpgtuh", WCMPGTUH)
11674 IWMMXT_BUILTIN (gtuv2si3, "wcmpgtuw", WCMPGTUW)
11675 IWMMXT_BUILTIN (gtv8qi3, "wcmpgtsb", WCMPGTSB)
11676 IWMMXT_BUILTIN (gtv4hi3, "wcmpgtsh", WCMPGTSH)
11677 IWMMXT_BUILTIN (gtv2si3, "wcmpgtsw", WCMPGTSW)
11678 IWMMXT_BUILTIN (umaxv8qi3, "wmaxub", WMAXUB)
11679 IWMMXT_BUILTIN (smaxv8qi3, "wmaxsb", WMAXSB)
11680 IWMMXT_BUILTIN (umaxv4hi3, "wmaxuh", WMAXUH)
11681 IWMMXT_BUILTIN (smaxv4hi3, "wmaxsh", WMAXSH)
11682 IWMMXT_BUILTIN (umaxv2si3, "wmaxuw", WMAXUW)
11683 IWMMXT_BUILTIN (smaxv2si3, "wmaxsw", WMAXSW)
11684 IWMMXT_BUILTIN (uminv8qi3, "wminub", WMINUB)
11685 IWMMXT_BUILTIN (sminv8qi3, "wminsb", WMINSB)
11686 IWMMXT_BUILTIN (uminv4hi3, "wminuh", WMINUH)
11687 IWMMXT_BUILTIN (sminv4hi3, "wminsh", WMINSH)
11688 IWMMXT_BUILTIN (uminv2si3, "wminuw", WMINUW)
11689 IWMMXT_BUILTIN (sminv2si3, "wminsw", WMINSW)
11690 IWMMXT_BUILTIN (iwmmxt_anddi3, "wand", WAND)
11691 IWMMXT_BUILTIN (iwmmxt_nanddi3, "wandn", WANDN)
11692 IWMMXT_BUILTIN (iwmmxt_iordi3, "wor", WOR)
11693 IWMMXT_BUILTIN (iwmmxt_xordi3, "wxor", WXOR)
11694 IWMMXT_BUILTIN (iwmmxt_uavgv8qi3, "wavg2b", WAVG2B)
11695 IWMMXT_BUILTIN (iwmmxt_uavgv4hi3, "wavg2h", WAVG2H)
11696 IWMMXT_BUILTIN (iwmmxt_uavgrndv8qi3, "wavg2br", WAVG2BR)
11697 IWMMXT_BUILTIN (iwmmxt_uavgrndv4hi3, "wavg2hr", WAVG2HR)
11698 IWMMXT_BUILTIN (iwmmxt_wunpckilb, "wunpckilb", WUNPCKILB)
11699 IWMMXT_BUILTIN (iwmmxt_wunpckilh, "wunpckilh", WUNPCKILH)
11700 IWMMXT_BUILTIN (iwmmxt_wunpckilw, "wunpckilw", WUNPCKILW)
11701 IWMMXT_BUILTIN (iwmmxt_wunpckihb, "wunpckihb", WUNPCKIHB)
11702 IWMMXT_BUILTIN (iwmmxt_wunpckihh, "wunpckihh", WUNPCKIHH)
11703 IWMMXT_BUILTIN (iwmmxt_wunpckihw, "wunpckihw", WUNPCKIHW)
11704 IWMMXT_BUILTIN (iwmmxt_wmadds, "wmadds", WMADDS)
11705 IWMMXT_BUILTIN (iwmmxt_wmaddu, "wmaddu", WMADDU)
11707 #define IWMMXT_BUILTIN2(code, builtin) \
11708 { FL_IWMMXT, CODE_FOR_##code, NULL, ARM_BUILTIN_##builtin, 0, 0 },
11710 IWMMXT_BUILTIN2 (iwmmxt_wpackhss, WPACKHSS)
11711 IWMMXT_BUILTIN2 (iwmmxt_wpackwss, WPACKWSS)
11712 IWMMXT_BUILTIN2 (iwmmxt_wpackdss, WPACKDSS)
11713 IWMMXT_BUILTIN2 (iwmmxt_wpackhus, WPACKHUS)
11714 IWMMXT_BUILTIN2 (iwmmxt_wpackwus, WPACKWUS)
11715 IWMMXT_BUILTIN2 (iwmmxt_wpackdus, WPACKDUS)
11716 IWMMXT_BUILTIN2 (ashlv4hi3_di, WSLLH)
11717 IWMMXT_BUILTIN2 (ashlv4hi3, WSLLHI)
11718 IWMMXT_BUILTIN2 (ashlv2si3_di, WSLLW)
11719 IWMMXT_BUILTIN2 (ashlv2si3, WSLLWI)
11720 IWMMXT_BUILTIN2 (ashldi3_di, WSLLD)
11721 IWMMXT_BUILTIN2 (ashldi3_iwmmxt, WSLLDI)
11722 IWMMXT_BUILTIN2 (lshrv4hi3_di, WSRLH)
11723 IWMMXT_BUILTIN2 (lshrv4hi3, WSRLHI)
11724 IWMMXT_BUILTIN2 (lshrv2si3_di, WSRLW)
11725 IWMMXT_BUILTIN2 (lshrv2si3, WSRLWI)
11726 IWMMXT_BUILTIN2 (lshrdi3_di, WSRLD)
11727 IWMMXT_BUILTIN2 (lshrdi3_iwmmxt, WSRLDI)
11728 IWMMXT_BUILTIN2 (ashrv4hi3_di, WSRAH)
11729 IWMMXT_BUILTIN2 (ashrv4hi3, WSRAHI)
11730 IWMMXT_BUILTIN2 (ashrv2si3_di, WSRAW)
11731 IWMMXT_BUILTIN2 (ashrv2si3, WSRAWI)
11732 IWMMXT_BUILTIN2 (ashrdi3_di, WSRAD)
11733 IWMMXT_BUILTIN2 (ashrdi3_iwmmxt, WSRADI)
11734 IWMMXT_BUILTIN2 (rorv4hi3_di, WRORH)
11735 IWMMXT_BUILTIN2 (rorv4hi3, WRORHI)
11736 IWMMXT_BUILTIN2 (rorv2si3_di, WRORW)
11737 IWMMXT_BUILTIN2 (rorv2si3, WRORWI)
11738 IWMMXT_BUILTIN2 (rordi3_di, WRORD)
11739 IWMMXT_BUILTIN2 (rordi3, WRORDI)
11740 IWMMXT_BUILTIN2 (iwmmxt_wmacuz, WMACUZ)
11741 IWMMXT_BUILTIN2 (iwmmxt_wmacsz, WMACSZ)
11744 static const struct builtin_description bdesc_1arg[] =
11746 IWMMXT_BUILTIN (iwmmxt_tmovmskb, "tmovmskb", TMOVMSKB)
11747 IWMMXT_BUILTIN (iwmmxt_tmovmskh, "tmovmskh", TMOVMSKH)
11748 IWMMXT_BUILTIN (iwmmxt_tmovmskw, "tmovmskw", TMOVMSKW)
11749 IWMMXT_BUILTIN (iwmmxt_waccb, "waccb", WACCB)
11750 IWMMXT_BUILTIN (iwmmxt_wacch, "wacch", WACCH)
11751 IWMMXT_BUILTIN (iwmmxt_waccw, "waccw", WACCW)
11752 IWMMXT_BUILTIN (iwmmxt_wunpckehub, "wunpckehub", WUNPCKEHUB)
11753 IWMMXT_BUILTIN (iwmmxt_wunpckehuh, "wunpckehuh", WUNPCKEHUH)
11754 IWMMXT_BUILTIN (iwmmxt_wunpckehuw, "wunpckehuw", WUNPCKEHUW)
11755 IWMMXT_BUILTIN (iwmmxt_wunpckehsb, "wunpckehsb", WUNPCKEHSB)
11756 IWMMXT_BUILTIN (iwmmxt_wunpckehsh, "wunpckehsh", WUNPCKEHSH)
11757 IWMMXT_BUILTIN (iwmmxt_wunpckehsw, "wunpckehsw", WUNPCKEHSW)
11758 IWMMXT_BUILTIN (iwmmxt_wunpckelub, "wunpckelub", WUNPCKELUB)
11759 IWMMXT_BUILTIN (iwmmxt_wunpckeluh, "wunpckeluh", WUNPCKELUH)
11760 IWMMXT_BUILTIN (iwmmxt_wunpckeluw, "wunpckeluw", WUNPCKELUW)
11761 IWMMXT_BUILTIN (iwmmxt_wunpckelsb, "wunpckelsb", WUNPCKELSB)
11762 IWMMXT_BUILTIN (iwmmxt_wunpckelsh, "wunpckelsh", WUNPCKELSH)
11763 IWMMXT_BUILTIN (iwmmxt_wunpckelsw, "wunpckelsw", WUNPCKELSW)
11766 /* Set up all the iWMMXt builtins. This is
11767 not called if TARGET_IWMMXT is zero. */
11769 static void
11770 arm_init_iwmmxt_builtins (void)
11772 const struct builtin_description * d;
11773 size_t i;
11774 tree endlink = void_list_node;
11776 tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
11777 tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
11778 tree V8QI_type_node = build_vector_type_for_mode (intQI_type_node, V8QImode);
11780 tree int_ftype_int
11781 = build_function_type (integer_type_node,
11782 tree_cons (NULL_TREE, integer_type_node, endlink));
11783 tree v8qi_ftype_v8qi_v8qi_int
11784 = build_function_type (V8QI_type_node,
11785 tree_cons (NULL_TREE, V8QI_type_node,
11786 tree_cons (NULL_TREE, V8QI_type_node,
11787 tree_cons (NULL_TREE,
11788 integer_type_node,
11789 endlink))));
11790 tree v4hi_ftype_v4hi_int
11791 = build_function_type (V4HI_type_node,
11792 tree_cons (NULL_TREE, V4HI_type_node,
11793 tree_cons (NULL_TREE, integer_type_node,
11794 endlink)));
11795 tree v2si_ftype_v2si_int
11796 = build_function_type (V2SI_type_node,
11797 tree_cons (NULL_TREE, V2SI_type_node,
11798 tree_cons (NULL_TREE, integer_type_node,
11799 endlink)));
11800 tree v2si_ftype_di_di
11801 = build_function_type (V2SI_type_node,
11802 tree_cons (NULL_TREE, long_long_integer_type_node,
11803 tree_cons (NULL_TREE, long_long_integer_type_node,
11804 endlink)));
11805 tree di_ftype_di_int
11806 = build_function_type (long_long_integer_type_node,
11807 tree_cons (NULL_TREE, long_long_integer_type_node,
11808 tree_cons (NULL_TREE, integer_type_node,
11809 endlink)));
11810 tree di_ftype_di_int_int
11811 = build_function_type (long_long_integer_type_node,
11812 tree_cons (NULL_TREE, long_long_integer_type_node,
11813 tree_cons (NULL_TREE, integer_type_node,
11814 tree_cons (NULL_TREE,
11815 integer_type_node,
11816 endlink))));
11817 tree int_ftype_v8qi
11818 = build_function_type (integer_type_node,
11819 tree_cons (NULL_TREE, V8QI_type_node,
11820 endlink));
11821 tree int_ftype_v4hi
11822 = build_function_type (integer_type_node,
11823 tree_cons (NULL_TREE, V4HI_type_node,
11824 endlink));
11825 tree int_ftype_v2si
11826 = build_function_type (integer_type_node,
11827 tree_cons (NULL_TREE, V2SI_type_node,
11828 endlink));
11829 tree int_ftype_v8qi_int
11830 = build_function_type (integer_type_node,
11831 tree_cons (NULL_TREE, V8QI_type_node,
11832 tree_cons (NULL_TREE, integer_type_node,
11833 endlink)));
11834 tree int_ftype_v4hi_int
11835 = build_function_type (integer_type_node,
11836 tree_cons (NULL_TREE, V4HI_type_node,
11837 tree_cons (NULL_TREE, integer_type_node,
11838 endlink)));
11839 tree int_ftype_v2si_int
11840 = build_function_type (integer_type_node,
11841 tree_cons (NULL_TREE, V2SI_type_node,
11842 tree_cons (NULL_TREE, integer_type_node,
11843 endlink)));
11844 tree v8qi_ftype_v8qi_int_int
11845 = build_function_type (V8QI_type_node,
11846 tree_cons (NULL_TREE, V8QI_type_node,
11847 tree_cons (NULL_TREE, integer_type_node,
11848 tree_cons (NULL_TREE,
11849 integer_type_node,
11850 endlink))));
11851 tree v4hi_ftype_v4hi_int_int
11852 = build_function_type (V4HI_type_node,
11853 tree_cons (NULL_TREE, V4HI_type_node,
11854 tree_cons (NULL_TREE, integer_type_node,
11855 tree_cons (NULL_TREE,
11856 integer_type_node,
11857 endlink))));
11858 tree v2si_ftype_v2si_int_int
11859 = build_function_type (V2SI_type_node,
11860 tree_cons (NULL_TREE, V2SI_type_node,
11861 tree_cons (NULL_TREE, integer_type_node,
11862 tree_cons (NULL_TREE,
11863 integer_type_node,
11864 endlink))));
11865 /* Miscellaneous. */
11866 tree v8qi_ftype_v4hi_v4hi
11867 = build_function_type (V8QI_type_node,
11868 tree_cons (NULL_TREE, V4HI_type_node,
11869 tree_cons (NULL_TREE, V4HI_type_node,
11870 endlink)));
11871 tree v4hi_ftype_v2si_v2si
11872 = build_function_type (V4HI_type_node,
11873 tree_cons (NULL_TREE, V2SI_type_node,
11874 tree_cons (NULL_TREE, V2SI_type_node,
11875 endlink)));
11876 tree v2si_ftype_v4hi_v4hi
11877 = build_function_type (V2SI_type_node,
11878 tree_cons (NULL_TREE, V4HI_type_node,
11879 tree_cons (NULL_TREE, V4HI_type_node,
11880 endlink)));
11881 tree v2si_ftype_v8qi_v8qi
11882 = build_function_type (V2SI_type_node,
11883 tree_cons (NULL_TREE, V8QI_type_node,
11884 tree_cons (NULL_TREE, V8QI_type_node,
11885 endlink)));
11886 tree v4hi_ftype_v4hi_di
11887 = build_function_type (V4HI_type_node,
11888 tree_cons (NULL_TREE, V4HI_type_node,
11889 tree_cons (NULL_TREE,
11890 long_long_integer_type_node,
11891 endlink)));
11892 tree v2si_ftype_v2si_di
11893 = build_function_type (V2SI_type_node,
11894 tree_cons (NULL_TREE, V2SI_type_node,
11895 tree_cons (NULL_TREE,
11896 long_long_integer_type_node,
11897 endlink)));
11898 tree void_ftype_int_int
11899 = build_function_type (void_type_node,
11900 tree_cons (NULL_TREE, integer_type_node,
11901 tree_cons (NULL_TREE, integer_type_node,
11902 endlink)));
11903 tree di_ftype_void
11904 = build_function_type (long_long_unsigned_type_node, endlink);
11905 tree di_ftype_v8qi
11906 = build_function_type (long_long_integer_type_node,
11907 tree_cons (NULL_TREE, V8QI_type_node,
11908 endlink));
11909 tree di_ftype_v4hi
11910 = build_function_type (long_long_integer_type_node,
11911 tree_cons (NULL_TREE, V4HI_type_node,
11912 endlink));
11913 tree di_ftype_v2si
11914 = build_function_type (long_long_integer_type_node,
11915 tree_cons (NULL_TREE, V2SI_type_node,
11916 endlink));
11917 tree v2si_ftype_v4hi
11918 = build_function_type (V2SI_type_node,
11919 tree_cons (NULL_TREE, V4HI_type_node,
11920 endlink));
11921 tree v4hi_ftype_v8qi
11922 = build_function_type (V4HI_type_node,
11923 tree_cons (NULL_TREE, V8QI_type_node,
11924 endlink));
11926 tree di_ftype_di_v4hi_v4hi
11927 = build_function_type (long_long_unsigned_type_node,
11928 tree_cons (NULL_TREE,
11929 long_long_unsigned_type_node,
11930 tree_cons (NULL_TREE, V4HI_type_node,
11931 tree_cons (NULL_TREE,
11932 V4HI_type_node,
11933 endlink))));
11935 tree di_ftype_v4hi_v4hi
11936 = build_function_type (long_long_unsigned_type_node,
11937 tree_cons (NULL_TREE, V4HI_type_node,
11938 tree_cons (NULL_TREE, V4HI_type_node,
11939 endlink)));
11941 /* Normal vector binops. */
11942 tree v8qi_ftype_v8qi_v8qi
11943 = build_function_type (V8QI_type_node,
11944 tree_cons (NULL_TREE, V8QI_type_node,
11945 tree_cons (NULL_TREE, V8QI_type_node,
11946 endlink)));
11947 tree v4hi_ftype_v4hi_v4hi
11948 = build_function_type (V4HI_type_node,
11949 tree_cons (NULL_TREE, V4HI_type_node,
11950 tree_cons (NULL_TREE, V4HI_type_node,
11951 endlink)));
11952 tree v2si_ftype_v2si_v2si
11953 = build_function_type (V2SI_type_node,
11954 tree_cons (NULL_TREE, V2SI_type_node,
11955 tree_cons (NULL_TREE, V2SI_type_node,
11956 endlink)));
11957 tree di_ftype_di_di
11958 = build_function_type (long_long_unsigned_type_node,
11959 tree_cons (NULL_TREE, long_long_unsigned_type_node,
11960 tree_cons (NULL_TREE,
11961 long_long_unsigned_type_node,
11962 endlink)));
11964 /* Add all builtins that are more or less simple operations on two
11965 operands. */
11966 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
11968 /* Use one of the operands; the target can have a different mode for
11969 mask-generating compares. */
11970 enum machine_mode mode;
11971 tree type;
11973 if (d->name == 0)
11974 continue;
11976 mode = insn_data[d->icode].operand[1].mode;
11978 switch (mode)
11980 case V8QImode:
11981 type = v8qi_ftype_v8qi_v8qi;
11982 break;
11983 case V4HImode:
11984 type = v4hi_ftype_v4hi_v4hi;
11985 break;
11986 case V2SImode:
11987 type = v2si_ftype_v2si_v2si;
11988 break;
11989 case DImode:
11990 type = di_ftype_di_di;
11991 break;
11993 default:
11994 gcc_unreachable ();
11997 def_mbuiltin (d->mask, d->name, type, d->code);
12000 /* Add the remaining MMX insns with somewhat more complicated types. */
12001 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wzero", di_ftype_void, ARM_BUILTIN_WZERO);
12002 def_mbuiltin (FL_IWMMXT, "__builtin_arm_setwcx", void_ftype_int_int, ARM_BUILTIN_SETWCX);
12003 def_mbuiltin (FL_IWMMXT, "__builtin_arm_getwcx", int_ftype_int, ARM_BUILTIN_GETWCX);
12005 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsllh", v4hi_ftype_v4hi_di, ARM_BUILTIN_WSLLH);
12006 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsllw", v2si_ftype_v2si_di, ARM_BUILTIN_WSLLW);
12007 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wslld", di_ftype_di_di, ARM_BUILTIN_WSLLD);
12008 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsllhi", v4hi_ftype_v4hi_int, ARM_BUILTIN_WSLLHI);
12009 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsllwi", v2si_ftype_v2si_int, ARM_BUILTIN_WSLLWI);
12010 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wslldi", di_ftype_di_int, ARM_BUILTIN_WSLLDI);
12012 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrlh", v4hi_ftype_v4hi_di, ARM_BUILTIN_WSRLH);
12013 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrlw", v2si_ftype_v2si_di, ARM_BUILTIN_WSRLW);
12014 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrld", di_ftype_di_di, ARM_BUILTIN_WSRLD);
12015 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrlhi", v4hi_ftype_v4hi_int, ARM_BUILTIN_WSRLHI);
12016 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrlwi", v2si_ftype_v2si_int, ARM_BUILTIN_WSRLWI);
12017 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrldi", di_ftype_di_int, ARM_BUILTIN_WSRLDI);
12019 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrah", v4hi_ftype_v4hi_di, ARM_BUILTIN_WSRAH);
12020 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsraw", v2si_ftype_v2si_di, ARM_BUILTIN_WSRAW);
12021 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrad", di_ftype_di_di, ARM_BUILTIN_WSRAD);
12022 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrahi", v4hi_ftype_v4hi_int, ARM_BUILTIN_WSRAHI);
12023 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrawi", v2si_ftype_v2si_int, ARM_BUILTIN_WSRAWI);
12024 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsradi", di_ftype_di_int, ARM_BUILTIN_WSRADI);
12026 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrorh", v4hi_ftype_v4hi_di, ARM_BUILTIN_WRORH);
12027 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrorw", v2si_ftype_v2si_di, ARM_BUILTIN_WRORW);
12028 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrord", di_ftype_di_di, ARM_BUILTIN_WRORD);
12029 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrorhi", v4hi_ftype_v4hi_int, ARM_BUILTIN_WRORHI);
12030 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrorwi", v2si_ftype_v2si_int, ARM_BUILTIN_WRORWI);
12031 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrordi", di_ftype_di_int, ARM_BUILTIN_WRORDI);
12033 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wshufh", v4hi_ftype_v4hi_int, ARM_BUILTIN_WSHUFH);
12035 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsadb", v2si_ftype_v8qi_v8qi, ARM_BUILTIN_WSADB);
12036 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsadh", v2si_ftype_v4hi_v4hi, ARM_BUILTIN_WSADH);
12037 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsadbz", v2si_ftype_v8qi_v8qi, ARM_BUILTIN_WSADBZ);
12038 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsadhz", v2si_ftype_v4hi_v4hi, ARM_BUILTIN_WSADHZ);
12040 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmsb", int_ftype_v8qi_int, ARM_BUILTIN_TEXTRMSB);
12041 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmsh", int_ftype_v4hi_int, ARM_BUILTIN_TEXTRMSH);
12042 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmsw", int_ftype_v2si_int, ARM_BUILTIN_TEXTRMSW);
12043 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmub", int_ftype_v8qi_int, ARM_BUILTIN_TEXTRMUB);
12044 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmuh", int_ftype_v4hi_int, ARM_BUILTIN_TEXTRMUH);
12045 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmuw", int_ftype_v2si_int, ARM_BUILTIN_TEXTRMUW);
12046 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tinsrb", v8qi_ftype_v8qi_int_int, ARM_BUILTIN_TINSRB);
12047 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tinsrh", v4hi_ftype_v4hi_int_int, ARM_BUILTIN_TINSRH);
12048 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tinsrw", v2si_ftype_v2si_int_int, ARM_BUILTIN_TINSRW);
12050 def_mbuiltin (FL_IWMMXT, "__builtin_arm_waccb", di_ftype_v8qi, ARM_BUILTIN_WACCB);
12051 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wacch", di_ftype_v4hi, ARM_BUILTIN_WACCH);
12052 def_mbuiltin (FL_IWMMXT, "__builtin_arm_waccw", di_ftype_v2si, ARM_BUILTIN_WACCW);
12054 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmovmskb", int_ftype_v8qi, ARM_BUILTIN_TMOVMSKB);
12055 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmovmskh", int_ftype_v4hi, ARM_BUILTIN_TMOVMSKH);
12056 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmovmskw", int_ftype_v2si, ARM_BUILTIN_TMOVMSKW);
12058 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackhss", v8qi_ftype_v4hi_v4hi, ARM_BUILTIN_WPACKHSS);
12059 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackhus", v8qi_ftype_v4hi_v4hi, ARM_BUILTIN_WPACKHUS);
12060 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackwus", v4hi_ftype_v2si_v2si, ARM_BUILTIN_WPACKWUS);
12061 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackwss", v4hi_ftype_v2si_v2si, ARM_BUILTIN_WPACKWSS);
12062 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackdus", v2si_ftype_di_di, ARM_BUILTIN_WPACKDUS);
12063 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackdss", v2si_ftype_di_di, ARM_BUILTIN_WPACKDSS);
12065 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehub", v4hi_ftype_v8qi, ARM_BUILTIN_WUNPCKEHUB);
12066 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehuh", v2si_ftype_v4hi, ARM_BUILTIN_WUNPCKEHUH);
12067 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehuw", di_ftype_v2si, ARM_BUILTIN_WUNPCKEHUW);
12068 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehsb", v4hi_ftype_v8qi, ARM_BUILTIN_WUNPCKEHSB);
12069 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehsh", v2si_ftype_v4hi, ARM_BUILTIN_WUNPCKEHSH);
12070 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehsw", di_ftype_v2si, ARM_BUILTIN_WUNPCKEHSW);
12071 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckelub", v4hi_ftype_v8qi, ARM_BUILTIN_WUNPCKELUB);
12072 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckeluh", v2si_ftype_v4hi, ARM_BUILTIN_WUNPCKELUH);
12073 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckeluw", di_ftype_v2si, ARM_BUILTIN_WUNPCKELUW);
12074 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckelsb", v4hi_ftype_v8qi, ARM_BUILTIN_WUNPCKELSB);
12075 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckelsh", v2si_ftype_v4hi, ARM_BUILTIN_WUNPCKELSH);
12076 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckelsw", di_ftype_v2si, ARM_BUILTIN_WUNPCKELSW);
12078 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wmacs", di_ftype_di_v4hi_v4hi, ARM_BUILTIN_WMACS);
12079 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wmacsz", di_ftype_v4hi_v4hi, ARM_BUILTIN_WMACSZ);
12080 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wmacu", di_ftype_di_v4hi_v4hi, ARM_BUILTIN_WMACU);
12081 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wmacuz", di_ftype_v4hi_v4hi, ARM_BUILTIN_WMACUZ);
12083 def_mbuiltin (FL_IWMMXT, "__builtin_arm_walign", v8qi_ftype_v8qi_v8qi_int, ARM_BUILTIN_WALIGN);
12084 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmia", di_ftype_di_int_int, ARM_BUILTIN_TMIA);
12085 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiaph", di_ftype_di_int_int, ARM_BUILTIN_TMIAPH);
12086 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiabb", di_ftype_di_int_int, ARM_BUILTIN_TMIABB);
12087 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiabt", di_ftype_di_int_int, ARM_BUILTIN_TMIABT);
12088 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiatb", di_ftype_di_int_int, ARM_BUILTIN_TMIATB);
12089 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiatt", di_ftype_di_int_int, ARM_BUILTIN_TMIATT);
12092 static void
12093 arm_init_builtins (void)
12095 if (TARGET_REALLY_IWMMXT)
12096 arm_init_iwmmxt_builtins ();
12099 /* Errors in the source file can cause expand_expr to return const0_rtx
12100 where we expect a vector. To avoid crashing, use one of the vector
12101 clear instructions. */
12103 static rtx
12104 safe_vector_operand (rtx x, enum machine_mode mode)
12106 if (x != const0_rtx)
12107 return x;
12108 x = gen_reg_rtx (mode);
12110 emit_insn (gen_iwmmxt_clrdi (mode == DImode ? x
12111 : gen_rtx_SUBREG (DImode, x, 0)));
12112 return x;
12115 /* Subroutine of arm_expand_builtin to take care of binop insns. */
12117 static rtx
12118 arm_expand_binop_builtin (enum insn_code icode,
12119 tree arglist, rtx target)
12121 rtx pat;
12122 tree arg0 = TREE_VALUE (arglist);
12123 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12124 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12125 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12126 enum machine_mode tmode = insn_data[icode].operand[0].mode;
12127 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
12128 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
12130 if (VECTOR_MODE_P (mode0))
12131 op0 = safe_vector_operand (op0, mode0);
12132 if (VECTOR_MODE_P (mode1))
12133 op1 = safe_vector_operand (op1, mode1);
12135 if (! target
12136 || GET_MODE (target) != tmode
12137 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12138 target = gen_reg_rtx (tmode);
12140 gcc_assert (GET_MODE (op0) == mode0 && GET_MODE (op1) == mode1);
12142 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12143 op0 = copy_to_mode_reg (mode0, op0);
12144 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
12145 op1 = copy_to_mode_reg (mode1, op1);
12147 pat = GEN_FCN (icode) (target, op0, op1);
12148 if (! pat)
12149 return 0;
12150 emit_insn (pat);
12151 return target;
12154 /* Subroutine of arm_expand_builtin to take care of unop insns. */
12156 static rtx
12157 arm_expand_unop_builtin (enum insn_code icode,
12158 tree arglist, rtx target, int do_load)
12160 rtx pat;
12161 tree arg0 = TREE_VALUE (arglist);
12162 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12163 enum machine_mode tmode = insn_data[icode].operand[0].mode;
12164 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
12166 if (! target
12167 || GET_MODE (target) != tmode
12168 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12169 target = gen_reg_rtx (tmode);
12170 if (do_load)
12171 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
12172 else
12174 if (VECTOR_MODE_P (mode0))
12175 op0 = safe_vector_operand (op0, mode0);
12177 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12178 op0 = copy_to_mode_reg (mode0, op0);
12181 pat = GEN_FCN (icode) (target, op0);
12182 if (! pat)
12183 return 0;
12184 emit_insn (pat);
12185 return target;
12188 /* Expand an expression EXP that calls a built-in function,
12189 with result going to TARGET if that's convenient
12190 (and in mode MODE if that's convenient).
12191 SUBTARGET may be used as the target for computing one of EXP's operands.
12192 IGNORE is nonzero if the value is to be ignored. */
12194 static rtx
12195 arm_expand_builtin (tree exp,
12196 rtx target,
12197 rtx subtarget ATTRIBUTE_UNUSED,
12198 enum machine_mode mode ATTRIBUTE_UNUSED,
12199 int ignore ATTRIBUTE_UNUSED)
12201 const struct builtin_description * d;
12202 enum insn_code icode;
12203 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
12204 tree arglist = TREE_OPERAND (exp, 1);
12205 tree arg0;
12206 tree arg1;
12207 tree arg2;
12208 rtx op0;
12209 rtx op1;
12210 rtx op2;
12211 rtx pat;
12212 int fcode = DECL_FUNCTION_CODE (fndecl);
12213 size_t i;
12214 enum machine_mode tmode;
12215 enum machine_mode mode0;
12216 enum machine_mode mode1;
12217 enum machine_mode mode2;
12219 switch (fcode)
12221 case ARM_BUILTIN_TEXTRMSB:
12222 case ARM_BUILTIN_TEXTRMUB:
12223 case ARM_BUILTIN_TEXTRMSH:
12224 case ARM_BUILTIN_TEXTRMUH:
12225 case ARM_BUILTIN_TEXTRMSW:
12226 case ARM_BUILTIN_TEXTRMUW:
12227 icode = (fcode == ARM_BUILTIN_TEXTRMSB ? CODE_FOR_iwmmxt_textrmsb
12228 : fcode == ARM_BUILTIN_TEXTRMUB ? CODE_FOR_iwmmxt_textrmub
12229 : fcode == ARM_BUILTIN_TEXTRMSH ? CODE_FOR_iwmmxt_textrmsh
12230 : fcode == ARM_BUILTIN_TEXTRMUH ? CODE_FOR_iwmmxt_textrmuh
12231 : CODE_FOR_iwmmxt_textrmw);
12233 arg0 = TREE_VALUE (arglist);
12234 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12235 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12236 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12237 tmode = insn_data[icode].operand[0].mode;
12238 mode0 = insn_data[icode].operand[1].mode;
12239 mode1 = insn_data[icode].operand[2].mode;
12241 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12242 op0 = copy_to_mode_reg (mode0, op0);
12243 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
12245 /* @@@ better error message */
12246 error ("selector must be an immediate");
12247 return gen_reg_rtx (tmode);
12249 if (target == 0
12250 || GET_MODE (target) != tmode
12251 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12252 target = gen_reg_rtx (tmode);
12253 pat = GEN_FCN (icode) (target, op0, op1);
12254 if (! pat)
12255 return 0;
12256 emit_insn (pat);
12257 return target;
12259 case ARM_BUILTIN_TINSRB:
12260 case ARM_BUILTIN_TINSRH:
12261 case ARM_BUILTIN_TINSRW:
12262 icode = (fcode == ARM_BUILTIN_TINSRB ? CODE_FOR_iwmmxt_tinsrb
12263 : fcode == ARM_BUILTIN_TINSRH ? CODE_FOR_iwmmxt_tinsrh
12264 : CODE_FOR_iwmmxt_tinsrw);
12265 arg0 = TREE_VALUE (arglist);
12266 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12267 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
12268 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12269 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12270 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
12271 tmode = insn_data[icode].operand[0].mode;
12272 mode0 = insn_data[icode].operand[1].mode;
12273 mode1 = insn_data[icode].operand[2].mode;
12274 mode2 = insn_data[icode].operand[3].mode;
12276 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12277 op0 = copy_to_mode_reg (mode0, op0);
12278 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
12279 op1 = copy_to_mode_reg (mode1, op1);
12280 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
12282 /* @@@ better error message */
12283 error ("selector must be an immediate");
12284 return const0_rtx;
12286 if (target == 0
12287 || GET_MODE (target) != tmode
12288 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12289 target = gen_reg_rtx (tmode);
12290 pat = GEN_FCN (icode) (target, op0, op1, op2);
12291 if (! pat)
12292 return 0;
12293 emit_insn (pat);
12294 return target;
12296 case ARM_BUILTIN_SETWCX:
12297 arg0 = TREE_VALUE (arglist);
12298 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12299 op0 = force_reg (SImode, expand_expr (arg0, NULL_RTX, VOIDmode, 0));
12300 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12301 emit_insn (gen_iwmmxt_tmcr (op1, op0));
12302 return 0;
12304 case ARM_BUILTIN_GETWCX:
12305 arg0 = TREE_VALUE (arglist);
12306 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12307 target = gen_reg_rtx (SImode);
12308 emit_insn (gen_iwmmxt_tmrc (target, op0));
12309 return target;
12311 case ARM_BUILTIN_WSHUFH:
12312 icode = CODE_FOR_iwmmxt_wshufh;
12313 arg0 = TREE_VALUE (arglist);
12314 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12315 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12316 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12317 tmode = insn_data[icode].operand[0].mode;
12318 mode1 = insn_data[icode].operand[1].mode;
12319 mode2 = insn_data[icode].operand[2].mode;
12321 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
12322 op0 = copy_to_mode_reg (mode1, op0);
12323 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
12325 /* @@@ better error message */
12326 error ("mask must be an immediate");
12327 return const0_rtx;
12329 if (target == 0
12330 || GET_MODE (target) != tmode
12331 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12332 target = gen_reg_rtx (tmode);
12333 pat = GEN_FCN (icode) (target, op0, op1);
12334 if (! pat)
12335 return 0;
12336 emit_insn (pat);
12337 return target;
12339 case ARM_BUILTIN_WSADB:
12340 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadb, arglist, target);
12341 case ARM_BUILTIN_WSADH:
12342 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadh, arglist, target);
12343 case ARM_BUILTIN_WSADBZ:
12344 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadbz, arglist, target);
12345 case ARM_BUILTIN_WSADHZ:
12346 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadhz, arglist, target);
12348 /* Several three-argument builtins. */
12349 case ARM_BUILTIN_WMACS:
12350 case ARM_BUILTIN_WMACU:
12351 case ARM_BUILTIN_WALIGN:
12352 case ARM_BUILTIN_TMIA:
12353 case ARM_BUILTIN_TMIAPH:
12354 case ARM_BUILTIN_TMIATT:
12355 case ARM_BUILTIN_TMIATB:
12356 case ARM_BUILTIN_TMIABT:
12357 case ARM_BUILTIN_TMIABB:
12358 icode = (fcode == ARM_BUILTIN_WMACS ? CODE_FOR_iwmmxt_wmacs
12359 : fcode == ARM_BUILTIN_WMACU ? CODE_FOR_iwmmxt_wmacu
12360 : fcode == ARM_BUILTIN_TMIA ? CODE_FOR_iwmmxt_tmia
12361 : fcode == ARM_BUILTIN_TMIAPH ? CODE_FOR_iwmmxt_tmiaph
12362 : fcode == ARM_BUILTIN_TMIABB ? CODE_FOR_iwmmxt_tmiabb
12363 : fcode == ARM_BUILTIN_TMIABT ? CODE_FOR_iwmmxt_tmiabt
12364 : fcode == ARM_BUILTIN_TMIATB ? CODE_FOR_iwmmxt_tmiatb
12365 : fcode == ARM_BUILTIN_TMIATT ? CODE_FOR_iwmmxt_tmiatt
12366 : CODE_FOR_iwmmxt_walign);
12367 arg0 = TREE_VALUE (arglist);
12368 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12369 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
12370 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12371 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12372 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
12373 tmode = insn_data[icode].operand[0].mode;
12374 mode0 = insn_data[icode].operand[1].mode;
12375 mode1 = insn_data[icode].operand[2].mode;
12376 mode2 = insn_data[icode].operand[3].mode;
12378 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12379 op0 = copy_to_mode_reg (mode0, op0);
12380 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
12381 op1 = copy_to_mode_reg (mode1, op1);
12382 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
12383 op2 = copy_to_mode_reg (mode2, op2);
12384 if (target == 0
12385 || GET_MODE (target) != tmode
12386 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12387 target = gen_reg_rtx (tmode);
12388 pat = GEN_FCN (icode) (target, op0, op1, op2);
12389 if (! pat)
12390 return 0;
12391 emit_insn (pat);
12392 return target;
12394 case ARM_BUILTIN_WZERO:
12395 target = gen_reg_rtx (DImode);
12396 emit_insn (gen_iwmmxt_clrdi (target));
12397 return target;
12399 default:
12400 break;
12403 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
12404 if (d->code == (const enum arm_builtins) fcode)
12405 return arm_expand_binop_builtin (d->icode, arglist, target);
12407 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
12408 if (d->code == (const enum arm_builtins) fcode)
12409 return arm_expand_unop_builtin (d->icode, arglist, target, 0);
12411 /* @@@ Should really do something sensible here. */
12412 return NULL_RTX;
12415 /* Return the number (counting from 0) of
12416 the least significant set bit in MASK. */
12418 inline static int
12419 number_of_first_bit_set (unsigned mask)
12421 int bit;
12423 for (bit = 0;
12424 (mask & (1 << bit)) == 0;
12425 ++bit)
12426 continue;
12428 return bit;
12431 /* Emit code to push or pop registers to or from the stack. F is the
12432 assembly file. MASK is the registers to push or pop. PUSH is
12433 nonzero if we should push, and zero if we should pop. For debugging
12434 output, if pushing, adjust CFA_OFFSET by the amount of space added
12435 to the stack. REAL_REGS should have the same number of bits set as
12436 MASK, and will be used instead (in the same order) to describe which
12437 registers were saved - this is used to mark the save slots when we
12438 push high registers after moving them to low registers. */
12439 static void
12440 thumb_pushpop (FILE *f, unsigned long mask, int push, int *cfa_offset,
12441 unsigned long real_regs)
12443 int regno;
12444 int lo_mask = mask & 0xFF;
12445 int pushed_words = 0;
12447 gcc_assert (mask);
12449 if (lo_mask == 0 && !push && (mask & (1 << PC_REGNUM)))
12451 /* Special case. Do not generate a POP PC statement here, do it in
12452 thumb_exit() */
12453 thumb_exit (f, -1);
12454 return;
12457 fprintf (f, "\t%s\t{", push ? "push" : "pop");
12459 /* Look at the low registers first. */
12460 for (regno = 0; regno <= LAST_LO_REGNUM; regno++, lo_mask >>= 1)
12462 if (lo_mask & 1)
12464 asm_fprintf (f, "%r", regno);
12466 if ((lo_mask & ~1) != 0)
12467 fprintf (f, ", ");
12469 pushed_words++;
12473 if (push && (mask & (1 << LR_REGNUM)))
12475 /* Catch pushing the LR. */
12476 if (mask & 0xFF)
12477 fprintf (f, ", ");
12479 asm_fprintf (f, "%r", LR_REGNUM);
12481 pushed_words++;
12483 else if (!push && (mask & (1 << PC_REGNUM)))
12485 /* Catch popping the PC. */
12486 if (TARGET_INTERWORK || TARGET_BACKTRACE
12487 || current_function_calls_eh_return)
12489 /* The PC is never poped directly, instead
12490 it is popped into r3 and then BX is used. */
12491 fprintf (f, "}\n");
12493 thumb_exit (f, -1);
12495 return;
12497 else
12499 if (mask & 0xFF)
12500 fprintf (f, ", ");
12502 asm_fprintf (f, "%r", PC_REGNUM);
12506 fprintf (f, "}\n");
12508 if (push && pushed_words && dwarf2out_do_frame ())
12510 char *l = dwarf2out_cfi_label ();
12511 int pushed_mask = real_regs;
12513 *cfa_offset += pushed_words * 4;
12514 dwarf2out_def_cfa (l, SP_REGNUM, *cfa_offset);
12516 pushed_words = 0;
12517 pushed_mask = real_regs;
12518 for (regno = 0; regno <= 14; regno++, pushed_mask >>= 1)
12520 if (pushed_mask & 1)
12521 dwarf2out_reg_save (l, regno, 4 * pushed_words++ - *cfa_offset);
12526 /* Generate code to return from a thumb function.
12527 If 'reg_containing_return_addr' is -1, then the return address is
12528 actually on the stack, at the stack pointer. */
12529 static void
12530 thumb_exit (FILE *f, int reg_containing_return_addr)
12532 unsigned regs_available_for_popping;
12533 unsigned regs_to_pop;
12534 int pops_needed;
12535 unsigned available;
12536 unsigned required;
12537 int mode;
12538 int size;
12539 int restore_a4 = FALSE;
12541 /* Compute the registers we need to pop. */
12542 regs_to_pop = 0;
12543 pops_needed = 0;
12545 if (reg_containing_return_addr == -1)
12547 regs_to_pop |= 1 << LR_REGNUM;
12548 ++pops_needed;
12551 if (TARGET_BACKTRACE)
12553 /* Restore the (ARM) frame pointer and stack pointer. */
12554 regs_to_pop |= (1 << ARM_HARD_FRAME_POINTER_REGNUM) | (1 << SP_REGNUM);
12555 pops_needed += 2;
12558 /* If there is nothing to pop then just emit the BX instruction and
12559 return. */
12560 if (pops_needed == 0)
12562 if (current_function_calls_eh_return)
12563 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
12565 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
12566 return;
12568 /* Otherwise if we are not supporting interworking and we have not created
12569 a backtrace structure and the function was not entered in ARM mode then
12570 just pop the return address straight into the PC. */
12571 else if (!TARGET_INTERWORK
12572 && !TARGET_BACKTRACE
12573 && !is_called_in_ARM_mode (current_function_decl)
12574 && !current_function_calls_eh_return)
12576 asm_fprintf (f, "\tpop\t{%r}\n", PC_REGNUM);
12577 return;
12580 /* Find out how many of the (return) argument registers we can corrupt. */
12581 regs_available_for_popping = 0;
12583 /* If returning via __builtin_eh_return, the bottom three registers
12584 all contain information needed for the return. */
12585 if (current_function_calls_eh_return)
12586 size = 12;
12587 else
12589 /* If we can deduce the registers used from the function's
12590 return value. This is more reliable that examining
12591 regs_ever_live[] because that will be set if the register is
12592 ever used in the function, not just if the register is used
12593 to hold a return value. */
12595 if (current_function_return_rtx != 0)
12596 mode = GET_MODE (current_function_return_rtx);
12597 else
12598 mode = DECL_MODE (DECL_RESULT (current_function_decl));
12600 size = GET_MODE_SIZE (mode);
12602 if (size == 0)
12604 /* In a void function we can use any argument register.
12605 In a function that returns a structure on the stack
12606 we can use the second and third argument registers. */
12607 if (mode == VOIDmode)
12608 regs_available_for_popping =
12609 (1 << ARG_REGISTER (1))
12610 | (1 << ARG_REGISTER (2))
12611 | (1 << ARG_REGISTER (3));
12612 else
12613 regs_available_for_popping =
12614 (1 << ARG_REGISTER (2))
12615 | (1 << ARG_REGISTER (3));
12617 else if (size <= 4)
12618 regs_available_for_popping =
12619 (1 << ARG_REGISTER (2))
12620 | (1 << ARG_REGISTER (3));
12621 else if (size <= 8)
12622 regs_available_for_popping =
12623 (1 << ARG_REGISTER (3));
12626 /* Match registers to be popped with registers into which we pop them. */
12627 for (available = regs_available_for_popping,
12628 required = regs_to_pop;
12629 required != 0 && available != 0;
12630 available &= ~(available & - available),
12631 required &= ~(required & - required))
12632 -- pops_needed;
12634 /* If we have any popping registers left over, remove them. */
12635 if (available > 0)
12636 regs_available_for_popping &= ~available;
12638 /* Otherwise if we need another popping register we can use
12639 the fourth argument register. */
12640 else if (pops_needed)
12642 /* If we have not found any free argument registers and
12643 reg a4 contains the return address, we must move it. */
12644 if (regs_available_for_popping == 0
12645 && reg_containing_return_addr == LAST_ARG_REGNUM)
12647 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
12648 reg_containing_return_addr = LR_REGNUM;
12650 else if (size > 12)
12652 /* Register a4 is being used to hold part of the return value,
12653 but we have dire need of a free, low register. */
12654 restore_a4 = TRUE;
12656 asm_fprintf (f, "\tmov\t%r, %r\n",IP_REGNUM, LAST_ARG_REGNUM);
12659 if (reg_containing_return_addr != LAST_ARG_REGNUM)
12661 /* The fourth argument register is available. */
12662 regs_available_for_popping |= 1 << LAST_ARG_REGNUM;
12664 --pops_needed;
12668 /* Pop as many registers as we can. */
12669 thumb_pushpop (f, regs_available_for_popping, FALSE, NULL,
12670 regs_available_for_popping);
12672 /* Process the registers we popped. */
12673 if (reg_containing_return_addr == -1)
12675 /* The return address was popped into the lowest numbered register. */
12676 regs_to_pop &= ~(1 << LR_REGNUM);
12678 reg_containing_return_addr =
12679 number_of_first_bit_set (regs_available_for_popping);
12681 /* Remove this register for the mask of available registers, so that
12682 the return address will not be corrupted by further pops. */
12683 regs_available_for_popping &= ~(1 << reg_containing_return_addr);
12686 /* If we popped other registers then handle them here. */
12687 if (regs_available_for_popping)
12689 int frame_pointer;
12691 /* Work out which register currently contains the frame pointer. */
12692 frame_pointer = number_of_first_bit_set (regs_available_for_popping);
12694 /* Move it into the correct place. */
12695 asm_fprintf (f, "\tmov\t%r, %r\n",
12696 ARM_HARD_FRAME_POINTER_REGNUM, frame_pointer);
12698 /* (Temporarily) remove it from the mask of popped registers. */
12699 regs_available_for_popping &= ~(1 << frame_pointer);
12700 regs_to_pop &= ~(1 << ARM_HARD_FRAME_POINTER_REGNUM);
12702 if (regs_available_for_popping)
12704 int stack_pointer;
12706 /* We popped the stack pointer as well,
12707 find the register that contains it. */
12708 stack_pointer = number_of_first_bit_set (regs_available_for_popping);
12710 /* Move it into the stack register. */
12711 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, stack_pointer);
12713 /* At this point we have popped all necessary registers, so
12714 do not worry about restoring regs_available_for_popping
12715 to its correct value:
12717 assert (pops_needed == 0)
12718 assert (regs_available_for_popping == (1 << frame_pointer))
12719 assert (regs_to_pop == (1 << STACK_POINTER)) */
12721 else
12723 /* Since we have just move the popped value into the frame
12724 pointer, the popping register is available for reuse, and
12725 we know that we still have the stack pointer left to pop. */
12726 regs_available_for_popping |= (1 << frame_pointer);
12730 /* If we still have registers left on the stack, but we no longer have
12731 any registers into which we can pop them, then we must move the return
12732 address into the link register and make available the register that
12733 contained it. */
12734 if (regs_available_for_popping == 0 && pops_needed > 0)
12736 regs_available_for_popping |= 1 << reg_containing_return_addr;
12738 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM,
12739 reg_containing_return_addr);
12741 reg_containing_return_addr = LR_REGNUM;
12744 /* If we have registers left on the stack then pop some more.
12745 We know that at most we will want to pop FP and SP. */
12746 if (pops_needed > 0)
12748 int popped_into;
12749 int move_to;
12751 thumb_pushpop (f, regs_available_for_popping, FALSE, NULL,
12752 regs_available_for_popping);
12754 /* We have popped either FP or SP.
12755 Move whichever one it is into the correct register. */
12756 popped_into = number_of_first_bit_set (regs_available_for_popping);
12757 move_to = number_of_first_bit_set (regs_to_pop);
12759 asm_fprintf (f, "\tmov\t%r, %r\n", move_to, popped_into);
12761 regs_to_pop &= ~(1 << move_to);
12763 --pops_needed;
12766 /* If we still have not popped everything then we must have only
12767 had one register available to us and we are now popping the SP. */
12768 if (pops_needed > 0)
12770 int popped_into;
12772 thumb_pushpop (f, regs_available_for_popping, FALSE, NULL,
12773 regs_available_for_popping);
12775 popped_into = number_of_first_bit_set (regs_available_for_popping);
12777 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, popped_into);
12779 assert (regs_to_pop == (1 << STACK_POINTER))
12780 assert (pops_needed == 1)
12784 /* If necessary restore the a4 register. */
12785 if (restore_a4)
12787 if (reg_containing_return_addr != LR_REGNUM)
12789 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
12790 reg_containing_return_addr = LR_REGNUM;
12793 asm_fprintf (f, "\tmov\t%r, %r\n", LAST_ARG_REGNUM, IP_REGNUM);
12796 if (current_function_calls_eh_return)
12797 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
12799 /* Return to caller. */
12800 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
12804 void
12805 thumb_final_prescan_insn (rtx insn)
12807 if (flag_print_asm_name)
12808 asm_fprintf (asm_out_file, "%@ 0x%04x\n",
12809 INSN_ADDRESSES (INSN_UID (insn)));
12813 thumb_shiftable_const (unsigned HOST_WIDE_INT val)
12815 unsigned HOST_WIDE_INT mask = 0xff;
12816 int i;
12818 if (val == 0) /* XXX */
12819 return 0;
12821 for (i = 0; i < 25; i++)
12822 if ((val & (mask << i)) == val)
12823 return 1;
12825 return 0;
12828 /* Returns nonzero if the current function contains,
12829 or might contain a far jump. */
12830 static int
12831 thumb_far_jump_used_p (void)
12833 rtx insn;
12835 /* This test is only important for leaf functions. */
12836 /* assert (!leaf_function_p ()); */
12838 /* If we have already decided that far jumps may be used,
12839 do not bother checking again, and always return true even if
12840 it turns out that they are not being used. Once we have made
12841 the decision that far jumps are present (and that hence the link
12842 register will be pushed onto the stack) we cannot go back on it. */
12843 if (cfun->machine->far_jump_used)
12844 return 1;
12846 /* If this function is not being called from the prologue/epilogue
12847 generation code then it must be being called from the
12848 INITIAL_ELIMINATION_OFFSET macro. */
12849 if (!(ARM_DOUBLEWORD_ALIGN || reload_completed))
12851 /* In this case we know that we are being asked about the elimination
12852 of the arg pointer register. If that register is not being used,
12853 then there are no arguments on the stack, and we do not have to
12854 worry that a far jump might force the prologue to push the link
12855 register, changing the stack offsets. In this case we can just
12856 return false, since the presence of far jumps in the function will
12857 not affect stack offsets.
12859 If the arg pointer is live (or if it was live, but has now been
12860 eliminated and so set to dead) then we do have to test to see if
12861 the function might contain a far jump. This test can lead to some
12862 false negatives, since before reload is completed, then length of
12863 branch instructions is not known, so gcc defaults to returning their
12864 longest length, which in turn sets the far jump attribute to true.
12866 A false negative will not result in bad code being generated, but it
12867 will result in a needless push and pop of the link register. We
12868 hope that this does not occur too often.
12870 If we need doubleword stack alignment this could affect the other
12871 elimination offsets so we can't risk getting it wrong. */
12872 if (regs_ever_live [ARG_POINTER_REGNUM])
12873 cfun->machine->arg_pointer_live = 1;
12874 else if (!cfun->machine->arg_pointer_live)
12875 return 0;
12878 /* Check to see if the function contains a branch
12879 insn with the far jump attribute set. */
12880 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
12882 if (GET_CODE (insn) == JUMP_INSN
12883 /* Ignore tablejump patterns. */
12884 && GET_CODE (PATTERN (insn)) != ADDR_VEC
12885 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC
12886 && get_attr_far_jump (insn) == FAR_JUMP_YES
12889 /* Record the fact that we have decided that
12890 the function does use far jumps. */
12891 cfun->machine->far_jump_used = 1;
12892 return 1;
12896 return 0;
12899 /* Return nonzero if FUNC must be entered in ARM mode. */
12901 is_called_in_ARM_mode (tree func)
12903 gcc_assert (TREE_CODE (func) == FUNCTION_DECL);
12905 /* Ignore the problem about functions whose address is taken. */
12906 if (TARGET_CALLEE_INTERWORKING && TREE_PUBLIC (func))
12907 return TRUE;
12909 #ifdef ARM_PE
12910 return lookup_attribute ("interfacearm", DECL_ATTRIBUTES (func)) != NULL_TREE;
12911 #else
12912 return FALSE;
12913 #endif
12916 /* The bits which aren't usefully expanded as rtl. */
12917 const char *
12918 thumb_unexpanded_epilogue (void)
12920 int regno;
12921 unsigned long live_regs_mask = 0;
12922 int high_regs_pushed = 0;
12923 int had_to_push_lr;
12924 int size;
12925 int mode;
12927 if (return_used_this_function)
12928 return "";
12930 if (IS_NAKED (arm_current_func_type ()))
12931 return "";
12933 live_regs_mask = thumb_compute_save_reg_mask ();
12934 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
12936 /* If we can deduce the registers used from the function's return value.
12937 This is more reliable that examining regs_ever_live[] because that
12938 will be set if the register is ever used in the function, not just if
12939 the register is used to hold a return value. */
12940 size = arm_size_return_regs ();
12942 /* The prolog may have pushed some high registers to use as
12943 work registers. e.g. the testsuite file:
12944 gcc/testsuite/gcc/gcc.c-torture/execute/complex-2.c
12945 compiles to produce:
12946 push {r4, r5, r6, r7, lr}
12947 mov r7, r9
12948 mov r6, r8
12949 push {r6, r7}
12950 as part of the prolog. We have to undo that pushing here. */
12952 if (high_regs_pushed)
12954 unsigned long mask = live_regs_mask & 0xff;
12955 int next_hi_reg;
12957 /* The available low registers depend on the size of the value we are
12958 returning. */
12959 if (size <= 12)
12960 mask |= 1 << 3;
12961 if (size <= 8)
12962 mask |= 1 << 2;
12964 if (mask == 0)
12965 /* Oh dear! We have no low registers into which we can pop
12966 high registers! */
12967 internal_error
12968 ("no low registers available for popping high registers");
12970 for (next_hi_reg = 8; next_hi_reg < 13; next_hi_reg++)
12971 if (live_regs_mask & (1 << next_hi_reg))
12972 break;
12974 while (high_regs_pushed)
12976 /* Find lo register(s) into which the high register(s) can
12977 be popped. */
12978 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
12980 if (mask & (1 << regno))
12981 high_regs_pushed--;
12982 if (high_regs_pushed == 0)
12983 break;
12986 mask &= (2 << regno) - 1; /* A noop if regno == 8 */
12988 /* Pop the values into the low register(s). */
12989 thumb_pushpop (asm_out_file, mask, 0, NULL, mask);
12991 /* Move the value(s) into the high registers. */
12992 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
12994 if (mask & (1 << regno))
12996 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", next_hi_reg,
12997 regno);
12999 for (next_hi_reg++; next_hi_reg < 13; next_hi_reg++)
13000 if (live_regs_mask & (1 << next_hi_reg))
13001 break;
13005 live_regs_mask &= ~0x0f00;
13008 had_to_push_lr = (live_regs_mask & (1 << LR_REGNUM)) != 0;
13009 live_regs_mask &= 0xff;
13011 if (current_function_pretend_args_size == 0 || TARGET_BACKTRACE)
13013 /* Pop the return address into the PC. */
13014 if (had_to_push_lr)
13015 live_regs_mask |= 1 << PC_REGNUM;
13017 /* Either no argument registers were pushed or a backtrace
13018 structure was created which includes an adjusted stack
13019 pointer, so just pop everything. */
13020 if (live_regs_mask)
13021 thumb_pushpop (asm_out_file, live_regs_mask, FALSE, NULL,
13022 live_regs_mask);
13024 /* We have either just popped the return address into the
13025 PC or it is was kept in LR for the entire function. */
13026 if (!had_to_push_lr)
13027 thumb_exit (asm_out_file, LR_REGNUM);
13029 else
13031 /* Pop everything but the return address. */
13032 if (live_regs_mask)
13033 thumb_pushpop (asm_out_file, live_regs_mask, FALSE, NULL,
13034 live_regs_mask);
13036 if (had_to_push_lr)
13038 if (size > 12)
13040 /* We have no free low regs, so save one. */
13041 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", IP_REGNUM,
13042 LAST_ARG_REGNUM);
13045 /* Get the return address into a temporary register. */
13046 thumb_pushpop (asm_out_file, 1 << LAST_ARG_REGNUM, 0, NULL,
13047 1 << LAST_ARG_REGNUM);
13049 if (size > 12)
13051 /* Move the return address to lr. */
13052 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LR_REGNUM,
13053 LAST_ARG_REGNUM);
13054 /* Restore the low register. */
13055 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LAST_ARG_REGNUM,
13056 IP_REGNUM);
13057 regno = LR_REGNUM;
13059 else
13060 regno = LAST_ARG_REGNUM;
13062 else
13063 regno = LR_REGNUM;
13065 /* Remove the argument registers that were pushed onto the stack. */
13066 asm_fprintf (asm_out_file, "\tadd\t%r, %r, #%d\n",
13067 SP_REGNUM, SP_REGNUM,
13068 current_function_pretend_args_size);
13070 thumb_exit (asm_out_file, regno);
13073 return "";
13076 /* Functions to save and restore machine-specific function data. */
13077 static struct machine_function *
13078 arm_init_machine_status (void)
13080 struct machine_function *machine;
13081 machine = (machine_function *) ggc_alloc_cleared (sizeof (machine_function));
13083 #if ARM_FT_UNKNOWN != 0
13084 machine->func_type = ARM_FT_UNKNOWN;
13085 #endif
13086 return machine;
13089 /* Return an RTX indicating where the return address to the
13090 calling function can be found. */
13092 arm_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
13094 if (count != 0)
13095 return NULL_RTX;
13097 return get_hard_reg_initial_val (Pmode, LR_REGNUM);
13100 /* Do anything needed before RTL is emitted for each function. */
13101 void
13102 arm_init_expanders (void)
13104 /* Arrange to initialize and mark the machine per-function status. */
13105 init_machine_status = arm_init_machine_status;
13107 /* This is to stop the combine pass optimizing away the alignment
13108 adjustment of va_arg. */
13109 /* ??? It is claimed that this should not be necessary. */
13110 if (cfun)
13111 mark_reg_pointer (arg_pointer_rtx, PARM_BOUNDARY);
13115 /* Like arm_compute_initial_elimination offset. Simpler because
13116 THUMB_HARD_FRAME_POINTER isn't actually the ABI specified frame pointer. */
13118 HOST_WIDE_INT
13119 thumb_compute_initial_elimination_offset (unsigned int from, unsigned int to)
13121 arm_stack_offsets *offsets;
13123 offsets = arm_get_frame_offsets ();
13125 switch (from)
13127 case ARG_POINTER_REGNUM:
13128 switch (to)
13130 case STACK_POINTER_REGNUM:
13131 return offsets->outgoing_args - offsets->saved_args;
13133 case FRAME_POINTER_REGNUM:
13134 return offsets->soft_frame - offsets->saved_args;
13136 case THUMB_HARD_FRAME_POINTER_REGNUM:
13137 case ARM_HARD_FRAME_POINTER_REGNUM:
13138 return offsets->saved_regs - offsets->saved_args;
13140 default:
13141 gcc_unreachable ();
13143 break;
13145 case FRAME_POINTER_REGNUM:
13146 switch (to)
13148 case STACK_POINTER_REGNUM:
13149 return offsets->outgoing_args - offsets->soft_frame;
13151 case THUMB_HARD_FRAME_POINTER_REGNUM:
13152 case ARM_HARD_FRAME_POINTER_REGNUM:
13153 return offsets->saved_regs - offsets->soft_frame;
13155 default:
13156 gcc_unreachable ();
13158 break;
13160 default:
13161 gcc_unreachable ();
13166 /* Generate the rest of a function's prologue. */
13167 void
13168 thumb_expand_prologue (void)
13170 rtx insn, dwarf;
13172 HOST_WIDE_INT amount;
13173 arm_stack_offsets *offsets;
13174 unsigned long func_type;
13175 int regno;
13176 unsigned long live_regs_mask;
13178 func_type = arm_current_func_type ();
13180 /* Naked functions don't have prologues. */
13181 if (IS_NAKED (func_type))
13182 return;
13184 if (IS_INTERRUPT (func_type))
13186 error ("interrupt Service Routines cannot be coded in Thumb mode");
13187 return;
13190 live_regs_mask = thumb_compute_save_reg_mask ();
13191 /* Load the pic register before setting the frame pointer,
13192 so we can use r7 as a temporary work register. */
13193 if (flag_pic)
13194 arm_load_pic_register (thumb_find_work_register (live_regs_mask));
13196 offsets = arm_get_frame_offsets ();
13198 if (frame_pointer_needed)
13200 insn = emit_insn (gen_movsi (hard_frame_pointer_rtx,
13201 stack_pointer_rtx));
13202 RTX_FRAME_RELATED_P (insn) = 1;
13204 else if (CALLER_INTERWORKING_SLOT_SIZE > 0)
13205 emit_move_insn (gen_rtx_REG (Pmode, ARM_HARD_FRAME_POINTER_REGNUM),
13206 stack_pointer_rtx);
13208 amount = offsets->outgoing_args - offsets->saved_regs;
13209 if (amount)
13211 if (amount < 512)
13213 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
13214 GEN_INT (- amount)));
13215 RTX_FRAME_RELATED_P (insn) = 1;
13217 else
13219 rtx reg;
13221 /* The stack decrement is too big for an immediate value in a single
13222 insn. In theory we could issue multiple subtracts, but after
13223 three of them it becomes more space efficient to place the full
13224 value in the constant pool and load into a register. (Also the
13225 ARM debugger really likes to see only one stack decrement per
13226 function). So instead we look for a scratch register into which
13227 we can load the decrement, and then we subtract this from the
13228 stack pointer. Unfortunately on the thumb the only available
13229 scratch registers are the argument registers, and we cannot use
13230 these as they may hold arguments to the function. Instead we
13231 attempt to locate a call preserved register which is used by this
13232 function. If we can find one, then we know that it will have
13233 been pushed at the start of the prologue and so we can corrupt
13234 it now. */
13235 for (regno = LAST_ARG_REGNUM + 1; regno <= LAST_LO_REGNUM; regno++)
13236 if (live_regs_mask & (1 << regno)
13237 && !(frame_pointer_needed
13238 && (regno == THUMB_HARD_FRAME_POINTER_REGNUM)))
13239 break;
13241 if (regno > LAST_LO_REGNUM) /* Very unlikely. */
13243 rtx spare = gen_rtx_REG (SImode, IP_REGNUM);
13245 /* Choose an arbitrary, non-argument low register. */
13246 reg = gen_rtx_REG (SImode, LAST_LO_REGNUM);
13248 /* Save it by copying it into a high, scratch register. */
13249 emit_insn (gen_movsi (spare, reg));
13250 /* Add a USE to stop propagate_one_insn() from barfing. */
13251 emit_insn (gen_prologue_use (spare));
13253 /* Decrement the stack. */
13254 emit_insn (gen_movsi (reg, GEN_INT (- amount)));
13255 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
13256 stack_pointer_rtx, reg));
13257 RTX_FRAME_RELATED_P (insn) = 1;
13258 dwarf = gen_rtx_SET (SImode, stack_pointer_rtx,
13259 plus_constant (stack_pointer_rtx,
13260 -amount));
13261 RTX_FRAME_RELATED_P (dwarf) = 1;
13262 REG_NOTES (insn)
13263 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, dwarf,
13264 REG_NOTES (insn));
13266 /* Restore the low register's original value. */
13267 emit_insn (gen_movsi (reg, spare));
13269 /* Emit a USE of the restored scratch register, so that flow
13270 analysis will not consider the restore redundant. The
13271 register won't be used again in this function and isn't
13272 restored by the epilogue. */
13273 emit_insn (gen_prologue_use (reg));
13275 else
13277 reg = gen_rtx_REG (SImode, regno);
13279 emit_insn (gen_movsi (reg, GEN_INT (- amount)));
13281 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
13282 stack_pointer_rtx, reg));
13283 RTX_FRAME_RELATED_P (insn) = 1;
13284 dwarf = gen_rtx_SET (SImode, stack_pointer_rtx,
13285 plus_constant (stack_pointer_rtx,
13286 -amount));
13287 RTX_FRAME_RELATED_P (dwarf) = 1;
13288 REG_NOTES (insn)
13289 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, dwarf,
13290 REG_NOTES (insn));
13293 /* If the frame pointer is needed, emit a special barrier that
13294 will prevent the scheduler from moving stores to the frame
13295 before the stack adjustment. */
13296 if (frame_pointer_needed)
13297 emit_insn (gen_stack_tie (stack_pointer_rtx,
13298 hard_frame_pointer_rtx));
13301 if (current_function_profile || !TARGET_SCHED_PROLOG)
13302 emit_insn (gen_blockage ());
13304 cfun->machine->lr_save_eliminated = !thumb_force_lr_save ();
13305 if (live_regs_mask & 0xff)
13306 cfun->machine->lr_save_eliminated = 0;
13308 /* If the link register is being kept alive, with the return address in it,
13309 then make sure that it does not get reused by the ce2 pass. */
13310 if (cfun->machine->lr_save_eliminated)
13311 emit_insn (gen_prologue_use (gen_rtx_REG (SImode, LR_REGNUM)));
13315 void
13316 thumb_expand_epilogue (void)
13318 HOST_WIDE_INT amount;
13319 arm_stack_offsets *offsets;
13320 int regno;
13322 /* Naked functions don't have prologues. */
13323 if (IS_NAKED (arm_current_func_type ()))
13324 return;
13326 offsets = arm_get_frame_offsets ();
13327 amount = offsets->outgoing_args - offsets->saved_regs;
13329 if (frame_pointer_needed)
13330 emit_insn (gen_movsi (stack_pointer_rtx, hard_frame_pointer_rtx));
13331 else if (amount)
13333 if (amount < 512)
13334 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
13335 GEN_INT (amount)));
13336 else
13338 /* r3 is always free in the epilogue. */
13339 rtx reg = gen_rtx_REG (SImode, LAST_ARG_REGNUM);
13341 emit_insn (gen_movsi (reg, GEN_INT (amount)));
13342 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, reg));
13346 /* Emit a USE (stack_pointer_rtx), so that
13347 the stack adjustment will not be deleted. */
13348 emit_insn (gen_prologue_use (stack_pointer_rtx));
13350 if (current_function_profile || !TARGET_SCHED_PROLOG)
13351 emit_insn (gen_blockage ());
13353 /* Emit a clobber for each insn that will be restored in the epilogue,
13354 so that flow2 will get register lifetimes correct. */
13355 for (regno = 0; regno < 13; regno++)
13356 if (regs_ever_live[regno] && !call_used_regs[regno])
13357 emit_insn (gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, regno)));
13359 if (! regs_ever_live[LR_REGNUM])
13360 emit_insn (gen_rtx_USE (VOIDmode, gen_rtx_REG (SImode, LR_REGNUM)));
13363 static void
13364 thumb_output_function_prologue (FILE *f, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
13366 unsigned long live_regs_mask = 0;
13367 unsigned long l_mask;
13368 unsigned high_regs_pushed = 0;
13369 int cfa_offset = 0;
13370 int regno;
13372 if (IS_NAKED (arm_current_func_type ()))
13373 return;
13375 if (is_called_in_ARM_mode (current_function_decl))
13377 const char * name;
13379 gcc_assert (GET_CODE (DECL_RTL (current_function_decl)) == MEM);
13380 gcc_assert (GET_CODE (XEXP (DECL_RTL (current_function_decl), 0))
13381 == SYMBOL_REF);
13382 name = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
13384 /* Generate code sequence to switch us into Thumb mode. */
13385 /* The .code 32 directive has already been emitted by
13386 ASM_DECLARE_FUNCTION_NAME. */
13387 asm_fprintf (f, "\torr\t%r, %r, #1\n", IP_REGNUM, PC_REGNUM);
13388 asm_fprintf (f, "\tbx\t%r\n", IP_REGNUM);
13390 /* Generate a label, so that the debugger will notice the
13391 change in instruction sets. This label is also used by
13392 the assembler to bypass the ARM code when this function
13393 is called from a Thumb encoded function elsewhere in the
13394 same file. Hence the definition of STUB_NAME here must
13395 agree with the definition in gas/config/tc-arm.c. */
13397 #define STUB_NAME ".real_start_of"
13399 fprintf (f, "\t.code\t16\n");
13400 #ifdef ARM_PE
13401 if (arm_dllexport_name_p (name))
13402 name = arm_strip_name_encoding (name);
13403 #endif
13404 asm_fprintf (f, "\t.globl %s%U%s\n", STUB_NAME, name);
13405 fprintf (f, "\t.thumb_func\n");
13406 asm_fprintf (f, "%s%U%s:\n", STUB_NAME, name);
13409 if (current_function_pretend_args_size)
13411 if (cfun->machine->uses_anonymous_args)
13413 int num_pushes;
13415 fprintf (f, "\tpush\t{");
13417 num_pushes = ARM_NUM_INTS (current_function_pretend_args_size);
13419 for (regno = LAST_ARG_REGNUM + 1 - num_pushes;
13420 regno <= LAST_ARG_REGNUM;
13421 regno++)
13422 asm_fprintf (f, "%r%s", regno,
13423 regno == LAST_ARG_REGNUM ? "" : ", ");
13425 fprintf (f, "}\n");
13427 else
13428 asm_fprintf (f, "\tsub\t%r, %r, #%d\n",
13429 SP_REGNUM, SP_REGNUM,
13430 current_function_pretend_args_size);
13432 /* We don't need to record the stores for unwinding (would it
13433 help the debugger any if we did?), but record the change in
13434 the stack pointer. */
13435 if (dwarf2out_do_frame ())
13437 char *l = dwarf2out_cfi_label ();
13439 cfa_offset = cfa_offset + current_function_pretend_args_size;
13440 dwarf2out_def_cfa (l, SP_REGNUM, cfa_offset);
13444 /* Get the registers we are going to push. */
13445 live_regs_mask = thumb_compute_save_reg_mask ();
13446 /* Extract a mask of the ones we can give to the Thumb's push instruction. */
13447 l_mask = live_regs_mask & 0x40ff;
13448 /* Then count how many other high registers will need to be pushed. */
13449 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
13451 if (TARGET_BACKTRACE)
13453 unsigned offset;
13454 unsigned work_register;
13456 /* We have been asked to create a stack backtrace structure.
13457 The code looks like this:
13459 0 .align 2
13460 0 func:
13461 0 sub SP, #16 Reserve space for 4 registers.
13462 2 push {R7} Push low registers.
13463 4 add R7, SP, #20 Get the stack pointer before the push.
13464 6 str R7, [SP, #8] Store the stack pointer (before reserving the space).
13465 8 mov R7, PC Get hold of the start of this code plus 12.
13466 10 str R7, [SP, #16] Store it.
13467 12 mov R7, FP Get hold of the current frame pointer.
13468 14 str R7, [SP, #4] Store it.
13469 16 mov R7, LR Get hold of the current return address.
13470 18 str R7, [SP, #12] Store it.
13471 20 add R7, SP, #16 Point at the start of the backtrace structure.
13472 22 mov FP, R7 Put this value into the frame pointer. */
13474 work_register = thumb_find_work_register (live_regs_mask);
13476 asm_fprintf
13477 (f, "\tsub\t%r, %r, #16\t%@ Create stack backtrace structure\n",
13478 SP_REGNUM, SP_REGNUM);
13480 if (dwarf2out_do_frame ())
13482 char *l = dwarf2out_cfi_label ();
13484 cfa_offset = cfa_offset + 16;
13485 dwarf2out_def_cfa (l, SP_REGNUM, cfa_offset);
13488 if (l_mask)
13490 thumb_pushpop (f, l_mask, 1, &cfa_offset, l_mask);
13491 offset = bit_count (l_mask);
13493 else
13494 offset = 0;
13496 asm_fprintf (f, "\tadd\t%r, %r, #%d\n", work_register, SP_REGNUM,
13497 offset + 16 + current_function_pretend_args_size);
13499 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
13500 offset + 4);
13502 /* Make sure that the instruction fetching the PC is in the right place
13503 to calculate "start of backtrace creation code + 12". */
13504 if (l_mask)
13506 asm_fprintf (f, "\tmov\t%r, %r\n", work_register, PC_REGNUM);
13507 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
13508 offset + 12);
13509 asm_fprintf (f, "\tmov\t%r, %r\n", work_register,
13510 ARM_HARD_FRAME_POINTER_REGNUM);
13511 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
13512 offset);
13514 else
13516 asm_fprintf (f, "\tmov\t%r, %r\n", work_register,
13517 ARM_HARD_FRAME_POINTER_REGNUM);
13518 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
13519 offset);
13520 asm_fprintf (f, "\tmov\t%r, %r\n", work_register, PC_REGNUM);
13521 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
13522 offset + 12);
13525 asm_fprintf (f, "\tmov\t%r, %r\n", work_register, LR_REGNUM);
13526 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
13527 offset + 8);
13528 asm_fprintf (f, "\tadd\t%r, %r, #%d\n", work_register, SP_REGNUM,
13529 offset + 12);
13530 asm_fprintf (f, "\tmov\t%r, %r\t\t%@ Backtrace structure created\n",
13531 ARM_HARD_FRAME_POINTER_REGNUM, work_register);
13533 /* Optimisation: If we are not pushing any low registers but we are going
13534 to push some high registers then delay our first push. This will just
13535 be a push of LR and we can combine it with the push of the first high
13536 register. */
13537 else if ((l_mask & 0xff) != 0
13538 || (high_regs_pushed == 0 && l_mask))
13539 thumb_pushpop (f, l_mask, 1, &cfa_offset, l_mask);
13541 if (high_regs_pushed)
13543 unsigned pushable_regs;
13544 unsigned next_hi_reg;
13546 for (next_hi_reg = 12; next_hi_reg > LAST_LO_REGNUM; next_hi_reg--)
13547 if (live_regs_mask & (1 << next_hi_reg))
13548 break;
13550 pushable_regs = l_mask & 0xff;
13552 if (pushable_regs == 0)
13553 pushable_regs = 1 << thumb_find_work_register (live_regs_mask);
13555 while (high_regs_pushed > 0)
13557 unsigned long real_regs_mask = 0;
13559 for (regno = LAST_LO_REGNUM; regno >= 0; regno --)
13561 if (pushable_regs & (1 << regno))
13563 asm_fprintf (f, "\tmov\t%r, %r\n", regno, next_hi_reg);
13565 high_regs_pushed --;
13566 real_regs_mask |= (1 << next_hi_reg);
13568 if (high_regs_pushed)
13570 for (next_hi_reg --; next_hi_reg > LAST_LO_REGNUM;
13571 next_hi_reg --)
13572 if (live_regs_mask & (1 << next_hi_reg))
13573 break;
13575 else
13577 pushable_regs &= ~((1 << regno) - 1);
13578 break;
13583 /* If we had to find a work register and we have not yet
13584 saved the LR then add it to the list of regs to push. */
13585 if (l_mask == (1 << LR_REGNUM))
13587 thumb_pushpop (f, pushable_regs | (1 << LR_REGNUM),
13588 1, &cfa_offset,
13589 real_regs_mask | (1 << LR_REGNUM));
13590 l_mask = 0;
13592 else
13593 thumb_pushpop (f, pushable_regs, 1, &cfa_offset, real_regs_mask);
13598 /* Handle the case of a double word load into a low register from
13599 a computed memory address. The computed address may involve a
13600 register which is overwritten by the load. */
13601 const char *
13602 thumb_load_double_from_address (rtx *operands)
13604 rtx addr;
13605 rtx base;
13606 rtx offset;
13607 rtx arg1;
13608 rtx arg2;
13610 gcc_assert (GET_CODE (operands[0]) == REG);
13611 gcc_assert (GET_CODE (operands[1]) == MEM);
13613 /* Get the memory address. */
13614 addr = XEXP (operands[1], 0);
13616 /* Work out how the memory address is computed. */
13617 switch (GET_CODE (addr))
13619 case REG:
13620 operands[2] = gen_rtx_MEM (SImode,
13621 plus_constant (XEXP (operands[1], 0), 4));
13623 if (REGNO (operands[0]) == REGNO (addr))
13625 output_asm_insn ("ldr\t%H0, %2", operands);
13626 output_asm_insn ("ldr\t%0, %1", operands);
13628 else
13630 output_asm_insn ("ldr\t%0, %1", operands);
13631 output_asm_insn ("ldr\t%H0, %2", operands);
13633 break;
13635 case CONST:
13636 /* Compute <address> + 4 for the high order load. */
13637 operands[2] = gen_rtx_MEM (SImode,
13638 plus_constant (XEXP (operands[1], 0), 4));
13640 output_asm_insn ("ldr\t%0, %1", operands);
13641 output_asm_insn ("ldr\t%H0, %2", operands);
13642 break;
13644 case PLUS:
13645 arg1 = XEXP (addr, 0);
13646 arg2 = XEXP (addr, 1);
13648 if (CONSTANT_P (arg1))
13649 base = arg2, offset = arg1;
13650 else
13651 base = arg1, offset = arg2;
13653 gcc_assert (GET_CODE (base) == REG);
13655 /* Catch the case of <address> = <reg> + <reg> */
13656 if (GET_CODE (offset) == REG)
13658 int reg_offset = REGNO (offset);
13659 int reg_base = REGNO (base);
13660 int reg_dest = REGNO (operands[0]);
13662 /* Add the base and offset registers together into the
13663 higher destination register. */
13664 asm_fprintf (asm_out_file, "\tadd\t%r, %r, %r",
13665 reg_dest + 1, reg_base, reg_offset);
13667 /* Load the lower destination register from the address in
13668 the higher destination register. */
13669 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #0]",
13670 reg_dest, reg_dest + 1);
13672 /* Load the higher destination register from its own address
13673 plus 4. */
13674 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #4]",
13675 reg_dest + 1, reg_dest + 1);
13677 else
13679 /* Compute <address> + 4 for the high order load. */
13680 operands[2] = gen_rtx_MEM (SImode,
13681 plus_constant (XEXP (operands[1], 0), 4));
13683 /* If the computed address is held in the low order register
13684 then load the high order register first, otherwise always
13685 load the low order register first. */
13686 if (REGNO (operands[0]) == REGNO (base))
13688 output_asm_insn ("ldr\t%H0, %2", operands);
13689 output_asm_insn ("ldr\t%0, %1", operands);
13691 else
13693 output_asm_insn ("ldr\t%0, %1", operands);
13694 output_asm_insn ("ldr\t%H0, %2", operands);
13697 break;
13699 case LABEL_REF:
13700 /* With no registers to worry about we can just load the value
13701 directly. */
13702 operands[2] = gen_rtx_MEM (SImode,
13703 plus_constant (XEXP (operands[1], 0), 4));
13705 output_asm_insn ("ldr\t%H0, %2", operands);
13706 output_asm_insn ("ldr\t%0, %1", operands);
13707 break;
13709 default:
13710 gcc_unreachable ();
13713 return "";
13716 const char *
13717 thumb_output_move_mem_multiple (int n, rtx *operands)
13719 rtx tmp;
13721 switch (n)
13723 case 2:
13724 if (REGNO (operands[4]) > REGNO (operands[5]))
13726 tmp = operands[4];
13727 operands[4] = operands[5];
13728 operands[5] = tmp;
13730 output_asm_insn ("ldmia\t%1!, {%4, %5}", operands);
13731 output_asm_insn ("stmia\t%0!, {%4, %5}", operands);
13732 break;
13734 case 3:
13735 if (REGNO (operands[4]) > REGNO (operands[5]))
13737 tmp = operands[4];
13738 operands[4] = operands[5];
13739 operands[5] = tmp;
13741 if (REGNO (operands[5]) > REGNO (operands[6]))
13743 tmp = operands[5];
13744 operands[5] = operands[6];
13745 operands[6] = tmp;
13747 if (REGNO (operands[4]) > REGNO (operands[5]))
13749 tmp = operands[4];
13750 operands[4] = operands[5];
13751 operands[5] = tmp;
13754 output_asm_insn ("ldmia\t%1!, {%4, %5, %6}", operands);
13755 output_asm_insn ("stmia\t%0!, {%4, %5, %6}", operands);
13756 break;
13758 default:
13759 gcc_unreachable ();
13762 return "";
13765 /* Output a call-via instruction for thumb state. */
13766 const char *
13767 thumb_call_via_reg (rtx reg)
13769 int regno = REGNO (reg);
13770 rtx *labelp;
13772 gcc_assert (regno < LR_REGNUM);
13774 /* If we are in the normal text section we can use a single instance
13775 per compilation unit. If we are doing function sections, then we need
13776 an entry per section, since we can't rely on reachability. */
13777 if (in_text_section ())
13779 thumb_call_reg_needed = 1;
13781 if (thumb_call_via_label[regno] == NULL)
13782 thumb_call_via_label[regno] = gen_label_rtx ();
13783 labelp = thumb_call_via_label + regno;
13785 else
13787 if (cfun->machine->call_via[regno] == NULL)
13788 cfun->machine->call_via[regno] = gen_label_rtx ();
13789 labelp = cfun->machine->call_via + regno;
13792 output_asm_insn ("bl\t%a0", labelp);
13793 return "";
13796 /* Routines for generating rtl. */
13797 void
13798 thumb_expand_movmemqi (rtx *operands)
13800 rtx out = copy_to_mode_reg (SImode, XEXP (operands[0], 0));
13801 rtx in = copy_to_mode_reg (SImode, XEXP (operands[1], 0));
13802 HOST_WIDE_INT len = INTVAL (operands[2]);
13803 HOST_WIDE_INT offset = 0;
13805 while (len >= 12)
13807 emit_insn (gen_movmem12b (out, in, out, in));
13808 len -= 12;
13811 if (len >= 8)
13813 emit_insn (gen_movmem8b (out, in, out, in));
13814 len -= 8;
13817 if (len >= 4)
13819 rtx reg = gen_reg_rtx (SImode);
13820 emit_insn (gen_movsi (reg, gen_rtx_MEM (SImode, in)));
13821 emit_insn (gen_movsi (gen_rtx_MEM (SImode, out), reg));
13822 len -= 4;
13823 offset += 4;
13826 if (len >= 2)
13828 rtx reg = gen_reg_rtx (HImode);
13829 emit_insn (gen_movhi (reg, gen_rtx_MEM (HImode,
13830 plus_constant (in, offset))));
13831 emit_insn (gen_movhi (gen_rtx_MEM (HImode, plus_constant (out, offset)),
13832 reg));
13833 len -= 2;
13834 offset += 2;
13837 if (len)
13839 rtx reg = gen_reg_rtx (QImode);
13840 emit_insn (gen_movqi (reg, gen_rtx_MEM (QImode,
13841 plus_constant (in, offset))));
13842 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (out, offset)),
13843 reg));
13847 void
13848 thumb_reload_out_hi (rtx *operands)
13850 emit_insn (gen_thumb_movhi_clobber (operands[0], operands[1], operands[2]));
13853 /* Handle reading a half-word from memory during reload. */
13854 void
13855 thumb_reload_in_hi (rtx *operands ATTRIBUTE_UNUSED)
13857 gcc_unreachable ();
13860 /* Return the length of a function name prefix
13861 that starts with the character 'c'. */
13862 static int
13863 arm_get_strip_length (int c)
13865 switch (c)
13867 ARM_NAME_ENCODING_LENGTHS
13868 default: return 0;
13872 /* Return a pointer to a function's name with any
13873 and all prefix encodings stripped from it. */
13874 const char *
13875 arm_strip_name_encoding (const char *name)
13877 int skip;
13879 while ((skip = arm_get_strip_length (* name)))
13880 name += skip;
13882 return name;
13885 /* If there is a '*' anywhere in the name's prefix, then
13886 emit the stripped name verbatim, otherwise prepend an
13887 underscore if leading underscores are being used. */
13888 void
13889 arm_asm_output_labelref (FILE *stream, const char *name)
13891 int skip;
13892 int verbatim = 0;
13894 while ((skip = arm_get_strip_length (* name)))
13896 verbatim |= (*name == '*');
13897 name += skip;
13900 if (verbatim)
13901 fputs (name, stream);
13902 else
13903 asm_fprintf (stream, "%U%s", name);
13906 static void
13907 arm_file_end (void)
13909 int regno;
13911 if (! thumb_call_reg_needed)
13912 return;
13914 text_section ();
13915 asm_fprintf (asm_out_file, "\t.code 16\n");
13916 ASM_OUTPUT_ALIGN (asm_out_file, 1);
13918 for (regno = 0; regno < LR_REGNUM; regno++)
13920 rtx label = thumb_call_via_label[regno];
13922 if (label != 0)
13924 targetm.asm_out.internal_label (asm_out_file, "L",
13925 CODE_LABEL_NUMBER (label));
13926 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
13931 rtx aof_pic_label;
13933 #ifdef AOF_ASSEMBLER
13934 /* Special functions only needed when producing AOF syntax assembler. */
13936 struct pic_chain
13938 struct pic_chain * next;
13939 const char * symname;
13942 static struct pic_chain * aof_pic_chain = NULL;
13945 aof_pic_entry (rtx x)
13947 struct pic_chain ** chainp;
13948 int offset;
13950 if (aof_pic_label == NULL_RTX)
13952 aof_pic_label = gen_rtx_SYMBOL_REF (Pmode, "x$adcons");
13955 for (offset = 0, chainp = &aof_pic_chain; *chainp;
13956 offset += 4, chainp = &(*chainp)->next)
13957 if ((*chainp)->symname == XSTR (x, 0))
13958 return plus_constant (aof_pic_label, offset);
13960 *chainp = (struct pic_chain *) xmalloc (sizeof (struct pic_chain));
13961 (*chainp)->next = NULL;
13962 (*chainp)->symname = XSTR (x, 0);
13963 return plus_constant (aof_pic_label, offset);
13966 void
13967 aof_dump_pic_table (FILE *f)
13969 struct pic_chain * chain;
13971 if (aof_pic_chain == NULL)
13972 return;
13974 asm_fprintf (f, "\tAREA |%r$$adcons|, BASED %r\n",
13975 PIC_OFFSET_TABLE_REGNUM,
13976 PIC_OFFSET_TABLE_REGNUM);
13977 fputs ("|x$adcons|\n", f);
13979 for (chain = aof_pic_chain; chain; chain = chain->next)
13981 fputs ("\tDCD\t", f);
13982 assemble_name (f, chain->symname);
13983 fputs ("\n", f);
13987 int arm_text_section_count = 1;
13989 char *
13990 aof_text_section (void )
13992 static char buf[100];
13993 sprintf (buf, "\tAREA |C$$code%d|, CODE, READONLY",
13994 arm_text_section_count++);
13995 if (flag_pic)
13996 strcat (buf, ", PIC, REENTRANT");
13997 return buf;
14000 static int arm_data_section_count = 1;
14002 char *
14003 aof_data_section (void)
14005 static char buf[100];
14006 sprintf (buf, "\tAREA |C$$data%d|, DATA", arm_data_section_count++);
14007 return buf;
14010 /* The AOF assembler is religiously strict about declarations of
14011 imported and exported symbols, so that it is impossible to declare
14012 a function as imported near the beginning of the file, and then to
14013 export it later on. It is, however, possible to delay the decision
14014 until all the functions in the file have been compiled. To get
14015 around this, we maintain a list of the imports and exports, and
14016 delete from it any that are subsequently defined. At the end of
14017 compilation we spit the remainder of the list out before the END
14018 directive. */
14020 struct import
14022 struct import * next;
14023 const char * name;
14026 static struct import * imports_list = NULL;
14028 void
14029 aof_add_import (const char *name)
14031 struct import * new;
14033 for (new = imports_list; new; new = new->next)
14034 if (new->name == name)
14035 return;
14037 new = (struct import *) xmalloc (sizeof (struct import));
14038 new->next = imports_list;
14039 imports_list = new;
14040 new->name = name;
14043 void
14044 aof_delete_import (const char *name)
14046 struct import ** old;
14048 for (old = &imports_list; *old; old = & (*old)->next)
14050 if ((*old)->name == name)
14052 *old = (*old)->next;
14053 return;
14058 int arm_main_function = 0;
14060 static void
14061 aof_dump_imports (FILE *f)
14063 /* The AOF assembler needs this to cause the startup code to be extracted
14064 from the library. Brining in __main causes the whole thing to work
14065 automagically. */
14066 if (arm_main_function)
14068 text_section ();
14069 fputs ("\tIMPORT __main\n", f);
14070 fputs ("\tDCD __main\n", f);
14073 /* Now dump the remaining imports. */
14074 while (imports_list)
14076 fprintf (f, "\tIMPORT\t");
14077 assemble_name (f, imports_list->name);
14078 fputc ('\n', f);
14079 imports_list = imports_list->next;
14083 static void
14084 aof_globalize_label (FILE *stream, const char *name)
14086 default_globalize_label (stream, name);
14087 if (! strcmp (name, "main"))
14088 arm_main_function = 1;
14091 static void
14092 aof_file_start (void)
14094 fputs ("__r0\tRN\t0\n", asm_out_file);
14095 fputs ("__a1\tRN\t0\n", asm_out_file);
14096 fputs ("__a2\tRN\t1\n", asm_out_file);
14097 fputs ("__a3\tRN\t2\n", asm_out_file);
14098 fputs ("__a4\tRN\t3\n", asm_out_file);
14099 fputs ("__v1\tRN\t4\n", asm_out_file);
14100 fputs ("__v2\tRN\t5\n", asm_out_file);
14101 fputs ("__v3\tRN\t6\n", asm_out_file);
14102 fputs ("__v4\tRN\t7\n", asm_out_file);
14103 fputs ("__v5\tRN\t8\n", asm_out_file);
14104 fputs ("__v6\tRN\t9\n", asm_out_file);
14105 fputs ("__sl\tRN\t10\n", asm_out_file);
14106 fputs ("__fp\tRN\t11\n", asm_out_file);
14107 fputs ("__ip\tRN\t12\n", asm_out_file);
14108 fputs ("__sp\tRN\t13\n", asm_out_file);
14109 fputs ("__lr\tRN\t14\n", asm_out_file);
14110 fputs ("__pc\tRN\t15\n", asm_out_file);
14111 fputs ("__f0\tFN\t0\n", asm_out_file);
14112 fputs ("__f1\tFN\t1\n", asm_out_file);
14113 fputs ("__f2\tFN\t2\n", asm_out_file);
14114 fputs ("__f3\tFN\t3\n", asm_out_file);
14115 fputs ("__f4\tFN\t4\n", asm_out_file);
14116 fputs ("__f5\tFN\t5\n", asm_out_file);
14117 fputs ("__f6\tFN\t6\n", asm_out_file);
14118 fputs ("__f7\tFN\t7\n", asm_out_file);
14119 text_section ();
14122 static void
14123 aof_file_end (void)
14125 if (flag_pic)
14126 aof_dump_pic_table (asm_out_file);
14127 arm_file_end ();
14128 aof_dump_imports (asm_out_file);
14129 fputs ("\tEND\n", asm_out_file);
14131 #endif /* AOF_ASSEMBLER */
14133 #ifndef ARM_PE
14134 /* Symbols in the text segment can be accessed without indirecting via the
14135 constant pool; it may take an extra binary operation, but this is still
14136 faster than indirecting via memory. Don't do this when not optimizing,
14137 since we won't be calculating al of the offsets necessary to do this
14138 simplification. */
14140 static void
14141 arm_encode_section_info (tree decl, rtx rtl, int first)
14143 /* This doesn't work with AOF syntax, since the string table may be in
14144 a different AREA. */
14145 #ifndef AOF_ASSEMBLER
14146 if (optimize > 0 && TREE_CONSTANT (decl))
14147 SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1;
14148 #endif
14150 /* If we are referencing a function that is weak then encode a long call
14151 flag in the function name, otherwise if the function is static or
14152 or known to be defined in this file then encode a short call flag. */
14153 if (first && DECL_P (decl))
14155 if (TREE_CODE (decl) == FUNCTION_DECL && DECL_WEAK (decl))
14156 arm_encode_call_attribute (decl, LONG_CALL_FLAG_CHAR);
14157 else if (! TREE_PUBLIC (decl))
14158 arm_encode_call_attribute (decl, SHORT_CALL_FLAG_CHAR);
14161 #endif /* !ARM_PE */
14163 static void
14164 arm_internal_label (FILE *stream, const char *prefix, unsigned long labelno)
14166 if (arm_ccfsm_state == 3 && (unsigned) arm_target_label == labelno
14167 && !strcmp (prefix, "L"))
14169 arm_ccfsm_state = 0;
14170 arm_target_insn = NULL;
14172 default_internal_label (stream, prefix, labelno);
14175 /* Output code to add DELTA to the first argument, and then jump
14176 to FUNCTION. Used for C++ multiple inheritance. */
14177 static void
14178 arm_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
14179 HOST_WIDE_INT delta,
14180 HOST_WIDE_INT vcall_offset ATTRIBUTE_UNUSED,
14181 tree function)
14183 static int thunk_label = 0;
14184 char label[256];
14185 int mi_delta = delta;
14186 const char *const mi_op = mi_delta < 0 ? "sub" : "add";
14187 int shift = 0;
14188 int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function)
14189 ? 1 : 0);
14190 if (mi_delta < 0)
14191 mi_delta = - mi_delta;
14192 if (TARGET_THUMB)
14194 int labelno = thunk_label++;
14195 ASM_GENERATE_INTERNAL_LABEL (label, "LTHUMBFUNC", labelno);
14196 fputs ("\tldr\tr12, ", file);
14197 assemble_name (file, label);
14198 fputc ('\n', file);
14200 while (mi_delta != 0)
14202 if ((mi_delta & (3 << shift)) == 0)
14203 shift += 2;
14204 else
14206 asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
14207 mi_op, this_regno, this_regno,
14208 mi_delta & (0xff << shift));
14209 mi_delta &= ~(0xff << shift);
14210 shift += 8;
14213 if (TARGET_THUMB)
14215 fprintf (file, "\tbx\tr12\n");
14216 ASM_OUTPUT_ALIGN (file, 2);
14217 assemble_name (file, label);
14218 fputs (":\n", file);
14219 assemble_integer (XEXP (DECL_RTL (function), 0), 4, BITS_PER_WORD, 1);
14221 else
14223 fputs ("\tb\t", file);
14224 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
14225 if (NEED_PLT_RELOC)
14226 fputs ("(PLT)", file);
14227 fputc ('\n', file);
14232 arm_emit_vector_const (FILE *file, rtx x)
14234 int i;
14235 const char * pattern;
14237 gcc_assert (GET_CODE (x) == CONST_VECTOR);
14239 switch (GET_MODE (x))
14241 case V2SImode: pattern = "%08x"; break;
14242 case V4HImode: pattern = "%04x"; break;
14243 case V8QImode: pattern = "%02x"; break;
14244 default: gcc_unreachable ();
14247 fprintf (file, "0x");
14248 for (i = CONST_VECTOR_NUNITS (x); i--;)
14250 rtx element;
14252 element = CONST_VECTOR_ELT (x, i);
14253 fprintf (file, pattern, INTVAL (element));
14256 return 1;
14259 const char *
14260 arm_output_load_gr (rtx *operands)
14262 rtx reg;
14263 rtx offset;
14264 rtx wcgr;
14265 rtx sum;
14267 if (GET_CODE (operands [1]) != MEM
14268 || GET_CODE (sum = XEXP (operands [1], 0)) != PLUS
14269 || GET_CODE (reg = XEXP (sum, 0)) != REG
14270 || GET_CODE (offset = XEXP (sum, 1)) != CONST_INT
14271 || ((INTVAL (offset) < 1024) && (INTVAL (offset) > -1024)))
14272 return "wldrw%?\t%0, %1";
14274 /* Fix up an out-of-range load of a GR register. */
14275 output_asm_insn ("str%?\t%0, [sp, #-4]!\t@ Start of GR load expansion", & reg);
14276 wcgr = operands[0];
14277 operands[0] = reg;
14278 output_asm_insn ("ldr%?\t%0, %1", operands);
14280 operands[0] = wcgr;
14281 operands[1] = reg;
14282 output_asm_insn ("tmcr%?\t%0, %1", operands);
14283 output_asm_insn ("ldr%?\t%0, [sp], #4\t@ End of GR load expansion", & reg);
14285 return "";
14288 static rtx
14289 arm_struct_value_rtx (tree fntype ATTRIBUTE_UNUSED,
14290 int incoming ATTRIBUTE_UNUSED)
14292 #if 0
14293 /* FIXME: The ARM backend has special code to handle structure
14294 returns, and will reserve its own hidden first argument. So
14295 if this macro is enabled a *second* hidden argument will be
14296 reserved, which will break binary compatibility with old
14297 toolchains and also thunk handling. One day this should be
14298 fixed. */
14299 return 0;
14300 #else
14301 /* Register in which address to store a structure value
14302 is passed to a function. */
14303 return gen_rtx_REG (Pmode, ARG_REGISTER (1));
14304 #endif
14307 /* Worker function for TARGET_SETUP_INCOMING_VARARGS.
14309 On the ARM, PRETEND_SIZE is set in order to have the prologue push the last
14310 named arg and all anonymous args onto the stack.
14311 XXX I know the prologue shouldn't be pushing registers, but it is faster
14312 that way. */
14314 static void
14315 arm_setup_incoming_varargs (CUMULATIVE_ARGS *cum,
14316 enum machine_mode mode ATTRIBUTE_UNUSED,
14317 tree type ATTRIBUTE_UNUSED,
14318 int *pretend_size,
14319 int second_time ATTRIBUTE_UNUSED)
14321 cfun->machine->uses_anonymous_args = 1;
14322 if (cum->nregs < NUM_ARG_REGS)
14323 *pretend_size = (NUM_ARG_REGS - cum->nregs) * UNITS_PER_WORD;
14326 /* Return nonzero if the CONSUMER instruction (a store) does not need
14327 PRODUCER's value to calculate the address. */
14330 arm_no_early_store_addr_dep (rtx producer, rtx consumer)
14332 rtx value = PATTERN (producer);
14333 rtx addr = PATTERN (consumer);
14335 if (GET_CODE (value) == COND_EXEC)
14336 value = COND_EXEC_CODE (value);
14337 if (GET_CODE (value) == PARALLEL)
14338 value = XVECEXP (value, 0, 0);
14339 value = XEXP (value, 0);
14340 if (GET_CODE (addr) == COND_EXEC)
14341 addr = COND_EXEC_CODE (addr);
14342 if (GET_CODE (addr) == PARALLEL)
14343 addr = XVECEXP (addr, 0, 0);
14344 addr = XEXP (addr, 0);
14346 return !reg_overlap_mentioned_p (value, addr);
14349 /* Return nonzero if the CONSUMER instruction (an ALU op) does not
14350 have an early register shift value or amount dependency on the
14351 result of PRODUCER. */
14354 arm_no_early_alu_shift_dep (rtx producer, rtx consumer)
14356 rtx value = PATTERN (producer);
14357 rtx op = PATTERN (consumer);
14358 rtx early_op;
14360 if (GET_CODE (value) == COND_EXEC)
14361 value = COND_EXEC_CODE (value);
14362 if (GET_CODE (value) == PARALLEL)
14363 value = XVECEXP (value, 0, 0);
14364 value = XEXP (value, 0);
14365 if (GET_CODE (op) == COND_EXEC)
14366 op = COND_EXEC_CODE (op);
14367 if (GET_CODE (op) == PARALLEL)
14368 op = XVECEXP (op, 0, 0);
14369 op = XEXP (op, 1);
14371 early_op = XEXP (op, 0);
14372 /* This is either an actual independent shift, or a shift applied to
14373 the first operand of another operation. We want the whole shift
14374 operation. */
14375 if (GET_CODE (early_op) == REG)
14376 early_op = op;
14378 return !reg_overlap_mentioned_p (value, early_op);
14381 /* Return nonzero if the CONSUMER instruction (an ALU op) does not
14382 have an early register shift value dependency on the result of
14383 PRODUCER. */
14386 arm_no_early_alu_shift_value_dep (rtx producer, rtx consumer)
14388 rtx value = PATTERN (producer);
14389 rtx op = PATTERN (consumer);
14390 rtx early_op;
14392 if (GET_CODE (value) == COND_EXEC)
14393 value = COND_EXEC_CODE (value);
14394 if (GET_CODE (value) == PARALLEL)
14395 value = XVECEXP (value, 0, 0);
14396 value = XEXP (value, 0);
14397 if (GET_CODE (op) == COND_EXEC)
14398 op = COND_EXEC_CODE (op);
14399 if (GET_CODE (op) == PARALLEL)
14400 op = XVECEXP (op, 0, 0);
14401 op = XEXP (op, 1);
14403 early_op = XEXP (op, 0);
14405 /* This is either an actual independent shift, or a shift applied to
14406 the first operand of another operation. We want the value being
14407 shifted, in either case. */
14408 if (GET_CODE (early_op) != REG)
14409 early_op = XEXP (early_op, 0);
14411 return !reg_overlap_mentioned_p (value, early_op);
14414 /* Return nonzero if the CONSUMER (a mul or mac op) does not
14415 have an early register mult dependency on the result of
14416 PRODUCER. */
14419 arm_no_early_mul_dep (rtx producer, rtx consumer)
14421 rtx value = PATTERN (producer);
14422 rtx op = PATTERN (consumer);
14424 if (GET_CODE (value) == COND_EXEC)
14425 value = COND_EXEC_CODE (value);
14426 if (GET_CODE (value) == PARALLEL)
14427 value = XVECEXP (value, 0, 0);
14428 value = XEXP (value, 0);
14429 if (GET_CODE (op) == COND_EXEC)
14430 op = COND_EXEC_CODE (op);
14431 if (GET_CODE (op) == PARALLEL)
14432 op = XVECEXP (op, 0, 0);
14433 op = XEXP (op, 1);
14435 return (GET_CODE (op) == PLUS
14436 && !reg_overlap_mentioned_p (value, XEXP (op, 0)));
14440 /* We can't rely on the caller doing the proper promotion when
14441 using APCS or ATPCS. */
14443 static bool
14444 arm_promote_prototypes (tree t ATTRIBUTE_UNUSED)
14446 return !TARGET_AAPCS_BASED;
14450 /* AAPCS based ABIs use short enums by default. */
14452 static bool
14453 arm_default_short_enums (void)
14455 return TARGET_AAPCS_BASED;
14459 /* AAPCS requires that anonymous bitfields affect structure alignment. */
14461 static bool
14462 arm_align_anon_bitfield (void)
14464 return TARGET_AAPCS_BASED;
14468 /* The generic C++ ABI says 64-bit (long long). The EABI says 32-bit. */
14470 static tree
14471 arm_cxx_guard_type (void)
14473 return TARGET_AAPCS_BASED ? integer_type_node : long_long_integer_type_node;
14477 /* The EABI says test the least significan bit of a guard variable. */
14479 static bool
14480 arm_cxx_guard_mask_bit (void)
14482 return TARGET_AAPCS_BASED;
14486 /* The EABI specifies that all array cookies are 8 bytes long. */
14488 static tree
14489 arm_get_cookie_size (tree type)
14491 tree size;
14493 if (!TARGET_AAPCS_BASED)
14494 return default_cxx_get_cookie_size (type);
14496 size = build_int_cst (sizetype, 8);
14497 return size;
14501 /* The EABI says that array cookies should also contain the element size. */
14503 static bool
14504 arm_cookie_has_size (void)
14506 return TARGET_AAPCS_BASED;
14510 /* The EABI says constructors and destructors should return a pointer to
14511 the object constructed/destroyed. */
14513 static bool
14514 arm_cxx_cdtor_returns_this (void)
14516 return TARGET_AAPCS_BASED;
14519 /* The EABI says that an inline function may never be the key
14520 method. */
14522 static bool
14523 arm_cxx_key_method_may_be_inline (void)
14525 return !TARGET_AAPCS_BASED;
14528 static void
14529 arm_cxx_determine_class_data_visibility (tree decl)
14531 if (!TARGET_AAPCS_BASED)
14532 return;
14534 /* In general, \S 3.2.5.5 of the ARM EABI requires that class data
14535 is exported. However, on systems without dynamic vague linkage,
14536 \S 3.2.5.6 says that COMDAT class data has hidden linkage. */
14537 if (!TARGET_ARM_DYNAMIC_VAGUE_LINKAGE_P && DECL_COMDAT (decl))
14538 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
14539 else
14540 DECL_VISIBILITY (decl) = VISIBILITY_DEFAULT;
14541 DECL_VISIBILITY_SPECIFIED (decl) = 1;
14544 static bool
14545 arm_cxx_class_data_always_comdat (void)
14547 /* \S 3.2.5.4 of the ARM C++ ABI says that class data only have
14548 vague linkage if the class has no key function. */
14549 return !TARGET_AAPCS_BASED;
14553 /* The EABI says __aeabi_atexit should be used to register static
14554 destructors. */
14556 static bool
14557 arm_cxx_use_aeabi_atexit (void)
14559 return TARGET_AAPCS_BASED;
14563 void
14564 arm_set_return_address (rtx source, rtx scratch)
14566 arm_stack_offsets *offsets;
14567 HOST_WIDE_INT delta;
14568 rtx addr;
14569 unsigned long saved_regs;
14571 saved_regs = arm_compute_save_reg_mask ();
14573 if ((saved_regs & (1 << LR_REGNUM)) == 0)
14574 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
14575 else
14577 if (frame_pointer_needed)
14578 addr = plus_constant(hard_frame_pointer_rtx, -4);
14579 else
14581 /* LR will be the first saved register. */
14582 offsets = arm_get_frame_offsets ();
14583 delta = offsets->outgoing_args - (offsets->frame + 4);
14586 if (delta >= 4096)
14588 emit_insn (gen_addsi3 (scratch, stack_pointer_rtx,
14589 GEN_INT (delta & ~4095)));
14590 addr = scratch;
14591 delta &= 4095;
14593 else
14594 addr = stack_pointer_rtx;
14596 addr = plus_constant (addr, delta);
14598 emit_move_insn (gen_rtx_MEM (Pmode, addr), source);
14603 void
14604 thumb_set_return_address (rtx source, rtx scratch)
14606 arm_stack_offsets *offsets;
14607 HOST_WIDE_INT delta;
14608 int reg;
14609 rtx addr;
14610 unsigned long mask;
14612 emit_insn (gen_rtx_USE (VOIDmode, source));
14614 mask = thumb_compute_save_reg_mask ();
14615 if (mask & (1 << LR_REGNUM))
14617 offsets = arm_get_frame_offsets ();
14619 /* Find the saved regs. */
14620 if (frame_pointer_needed)
14622 delta = offsets->soft_frame - offsets->saved_args;
14623 reg = THUMB_HARD_FRAME_POINTER_REGNUM;
14625 else
14627 delta = offsets->outgoing_args - offsets->saved_args;
14628 reg = SP_REGNUM;
14630 /* Allow for the stack frame. */
14631 if (TARGET_BACKTRACE)
14632 delta -= 16;
14633 /* The link register is always the first saved register. */
14634 delta -= 4;
14636 /* Construct the address. */
14637 addr = gen_rtx_REG (SImode, reg);
14638 if ((reg != SP_REGNUM && delta >= 128)
14639 || delta >= 1024)
14641 emit_insn (gen_movsi (scratch, GEN_INT (delta)));
14642 emit_insn (gen_addsi3 (scratch, scratch, stack_pointer_rtx));
14643 addr = scratch;
14645 else
14646 addr = plus_constant (addr, delta);
14648 emit_move_insn (gen_rtx_MEM (Pmode, addr), source);
14650 else
14651 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
14654 /* Implements target hook vector_mode_supported_p. */
14655 bool
14656 arm_vector_mode_supported_p (enum machine_mode mode)
14658 if ((mode == V2SImode)
14659 || (mode == V4HImode)
14660 || (mode == V8QImode))
14661 return true;
14663 return false;
14666 /* Implement TARGET_SHIFT_TRUNCATION_MASK. SImode shifts use normal
14667 ARM insns and therefore guarantee that the shift count is modulo 256.
14668 DImode shifts (those implemented by lib1funcs.asm or by optabs.c)
14669 guarantee no particular behavior for out-of-range counts. */
14671 static unsigned HOST_WIDE_INT
14672 arm_shift_truncation_mask (enum machine_mode mode)
14674 return mode == SImode ? 255 : 0;
14678 /* Map internal gcc register numbers to DWARF2 register numbers. */
14680 unsigned int
14681 arm_dbx_register_number (unsigned int regno)
14683 if (regno < 16)
14684 return regno;
14686 /* TODO: Legacy targets output FPA regs as registers 16-23 for backwards
14687 compatibility. The EABI defines them as registers 96-103. */
14688 if (IS_FPA_REGNUM (regno))
14689 return (TARGET_AAPCS_BASED ? 96 : 16) + regno - FIRST_FPA_REGNUM;
14691 if (IS_VFP_REGNUM (regno))
14692 return 64 + regno - FIRST_VFP_REGNUM;
14694 if (IS_IWMMXT_GR_REGNUM (regno))
14695 return 104 + regno - FIRST_IWMMXT_GR_REGNUM;
14697 if (IS_IWMMXT_REGNUM (regno))
14698 return 112 + regno - FIRST_IWMMXT_REGNUM;
14700 gcc_unreachable ();