* arm.c (arm_return_in_memory): Add handling for vector return types.
[official-gcc.git] / gcc / config / arm / arm.c
blob4129a99651f2fe0ee5ed310d1675c3bb0ba57913
1 /* Output routines for GCC for ARM.
2 Copyright (C) 1991, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004, 2005 Free Software Foundation, Inc.
4 Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
5 and Martin Simmons (@harleqn.co.uk).
6 More major hacks by Richard Earnshaw (rearnsha@arm.com).
8 This file is part of GCC.
10 GCC is free software; you can redistribute it and/or modify it
11 under the terms of the GNU General Public License as published
12 by the Free Software Foundation; either version 2, or (at your
13 option) any later version.
15 GCC is distributed in the hope that it will be useful, but WITHOUT
16 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
17 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
18 License for more details.
20 You should have received a copy of the GNU General Public License
21 along with GCC; see the file COPYING. If not, write to
22 the Free Software Foundation, 59 Temple Place - Suite 330,
23 Boston, MA 02111-1307, USA. */
25 #include "config.h"
26 #include "system.h"
27 #include "coretypes.h"
28 #include "tm.h"
29 #include "rtl.h"
30 #include "tree.h"
31 #include "obstack.h"
32 #include "regs.h"
33 #include "hard-reg-set.h"
34 #include "real.h"
35 #include "insn-config.h"
36 #include "conditions.h"
37 #include "output.h"
38 #include "insn-attr.h"
39 #include "flags.h"
40 #include "reload.h"
41 #include "function.h"
42 #include "expr.h"
43 #include "optabs.h"
44 #include "toplev.h"
45 #include "recog.h"
46 #include "ggc.h"
47 #include "except.h"
48 #include "c-pragma.h"
49 #include "integrate.h"
50 #include "tm_p.h"
51 #include "target.h"
52 #include "target-def.h"
53 #include "debug.h"
54 #include "langhooks.h"
56 /* Forward definitions of types. */
57 typedef struct minipool_node Mnode;
58 typedef struct minipool_fixup Mfix;
60 const struct attribute_spec arm_attribute_table[];
62 /* Forward function declarations. */
63 static arm_stack_offsets *arm_get_frame_offsets (void);
64 static void arm_add_gc_roots (void);
65 static int arm_gen_constant (enum rtx_code, enum machine_mode, rtx,
66 HOST_WIDE_INT, rtx, rtx, int, int);
67 static unsigned bit_count (unsigned long);
68 static int arm_address_register_rtx_p (rtx, int);
69 static int arm_legitimate_index_p (enum machine_mode, rtx, RTX_CODE, int);
70 static int thumb_base_register_rtx_p (rtx, enum machine_mode, int);
71 inline static int thumb_index_register_rtx_p (rtx, int);
72 static int thumb_far_jump_used_p (void);
73 static bool thumb_force_lr_save (void);
74 static int const_ok_for_op (HOST_WIDE_INT, enum rtx_code);
75 static rtx emit_sfm (int, int);
76 #ifndef AOF_ASSEMBLER
77 static bool arm_assemble_integer (rtx, unsigned int, int);
78 #endif
79 static const char *fp_const_from_val (REAL_VALUE_TYPE *);
80 static arm_cc get_arm_condition_code (rtx);
81 static HOST_WIDE_INT int_log2 (HOST_WIDE_INT);
82 static rtx is_jump_table (rtx);
83 static const char *output_multi_immediate (rtx *, const char *, const char *,
84 int, HOST_WIDE_INT);
85 static const char *shift_op (rtx, HOST_WIDE_INT *);
86 static struct machine_function *arm_init_machine_status (void);
87 static void thumb_exit (FILE *, int);
88 static rtx is_jump_table (rtx);
89 static HOST_WIDE_INT get_jump_table_size (rtx);
90 static Mnode *move_minipool_fix_forward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
91 static Mnode *add_minipool_forward_ref (Mfix *);
92 static Mnode *move_minipool_fix_backward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
93 static Mnode *add_minipool_backward_ref (Mfix *);
94 static void assign_minipool_offsets (Mfix *);
95 static void arm_print_value (FILE *, rtx);
96 static void dump_minipool (rtx);
97 static int arm_barrier_cost (rtx);
98 static Mfix *create_fix_barrier (Mfix *, HOST_WIDE_INT);
99 static void push_minipool_barrier (rtx, HOST_WIDE_INT);
100 static void push_minipool_fix (rtx, HOST_WIDE_INT, rtx *, enum machine_mode,
101 rtx);
102 static void arm_reorg (void);
103 static bool note_invalid_constants (rtx, HOST_WIDE_INT, int);
104 static int current_file_function_operand (rtx);
105 static unsigned long arm_compute_save_reg0_reg12_mask (void);
106 static unsigned long arm_compute_save_reg_mask (void);
107 static unsigned long arm_isr_value (tree);
108 static unsigned long arm_compute_func_type (void);
109 static tree arm_handle_fndecl_attribute (tree *, tree, tree, int, bool *);
110 static tree arm_handle_isr_attribute (tree *, tree, tree, int, bool *);
111 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
112 static tree arm_handle_notshared_attribute (tree *, tree, tree, int, bool *);
113 #endif
114 static void arm_output_function_epilogue (FILE *, HOST_WIDE_INT);
115 static void arm_output_function_prologue (FILE *, HOST_WIDE_INT);
116 static void thumb_output_function_prologue (FILE *, HOST_WIDE_INT);
117 static int arm_comp_type_attributes (tree, tree);
118 static void arm_set_default_type_attributes (tree);
119 static int arm_adjust_cost (rtx, rtx, rtx, int);
120 static int count_insns_for_constant (HOST_WIDE_INT, int);
121 static int arm_get_strip_length (int);
122 static bool arm_function_ok_for_sibcall (tree, tree);
123 static void arm_internal_label (FILE *, const char *, unsigned long);
124 static void arm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT,
125 tree);
126 static int arm_rtx_costs_1 (rtx, enum rtx_code, enum rtx_code);
127 static bool arm_size_rtx_costs (rtx, int, int, int *);
128 static bool arm_slowmul_rtx_costs (rtx, int, int, int *);
129 static bool arm_fastmul_rtx_costs (rtx, int, int, int *);
130 static bool arm_xscale_rtx_costs (rtx, int, int, int *);
131 static bool arm_9e_rtx_costs (rtx, int, int, int *);
132 static int arm_address_cost (rtx);
133 static bool arm_memory_load_p (rtx);
134 static bool arm_cirrus_insn_p (rtx);
135 static void cirrus_reorg (rtx);
136 static void arm_init_builtins (void);
137 static rtx arm_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
138 static void arm_init_iwmmxt_builtins (void);
139 static rtx safe_vector_operand (rtx, enum machine_mode);
140 static rtx arm_expand_binop_builtin (enum insn_code, tree, rtx);
141 static rtx arm_expand_unop_builtin (enum insn_code, tree, rtx, int);
142 static rtx arm_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
143 static void emit_constant_insn (rtx cond, rtx pattern);
144 static int arm_arg_partial_bytes (CUMULATIVE_ARGS *, enum machine_mode,
145 tree, bool);
147 #ifdef OBJECT_FORMAT_ELF
148 static void arm_elf_asm_constructor (rtx, int);
149 #endif
150 #ifndef ARM_PE
151 static void arm_encode_section_info (tree, rtx, int);
152 #endif
154 static void arm_file_end (void);
156 #ifdef AOF_ASSEMBLER
157 static void aof_globalize_label (FILE *, const char *);
158 static void aof_dump_imports (FILE *);
159 static void aof_dump_pic_table (FILE *);
160 static void aof_file_start (void);
161 static void aof_file_end (void);
162 #endif
163 static rtx arm_struct_value_rtx (tree, int);
164 static void arm_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode,
165 tree, int *, int);
166 static bool arm_pass_by_reference (CUMULATIVE_ARGS *,
167 enum machine_mode, tree, bool);
168 static bool arm_promote_prototypes (tree);
169 static bool arm_default_short_enums (void);
170 static bool arm_align_anon_bitfield (void);
171 static bool arm_return_in_msb (tree);
172 static bool arm_must_pass_in_stack (enum machine_mode, tree);
174 static tree arm_cxx_guard_type (void);
175 static bool arm_cxx_guard_mask_bit (void);
176 static tree arm_get_cookie_size (tree);
177 static bool arm_cookie_has_size (void);
178 static bool arm_cxx_cdtor_returns_this (void);
179 static bool arm_cxx_key_method_may_be_inline (void);
180 static void arm_cxx_determine_class_data_visibility (tree);
181 static bool arm_cxx_class_data_always_comdat (void);
182 static bool arm_cxx_use_aeabi_atexit (void);
183 static void arm_init_libfuncs (void);
184 static bool arm_handle_option (size_t, const char *, int);
185 static unsigned HOST_WIDE_INT arm_shift_truncation_mask (enum machine_mode);
187 /* Initialize the GCC target structure. */
188 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
189 #undef TARGET_MERGE_DECL_ATTRIBUTES
190 #define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
191 #endif
193 #undef TARGET_ATTRIBUTE_TABLE
194 #define TARGET_ATTRIBUTE_TABLE arm_attribute_table
196 #undef TARGET_ASM_FILE_END
197 #define TARGET_ASM_FILE_END arm_file_end
199 #ifdef AOF_ASSEMBLER
200 #undef TARGET_ASM_BYTE_OP
201 #define TARGET_ASM_BYTE_OP "\tDCB\t"
202 #undef TARGET_ASM_ALIGNED_HI_OP
203 #define TARGET_ASM_ALIGNED_HI_OP "\tDCW\t"
204 #undef TARGET_ASM_ALIGNED_SI_OP
205 #define TARGET_ASM_ALIGNED_SI_OP "\tDCD\t"
206 #undef TARGET_ASM_GLOBALIZE_LABEL
207 #define TARGET_ASM_GLOBALIZE_LABEL aof_globalize_label
208 #undef TARGET_ASM_FILE_START
209 #define TARGET_ASM_FILE_START aof_file_start
210 #undef TARGET_ASM_FILE_END
211 #define TARGET_ASM_FILE_END aof_file_end
212 #else
213 #undef TARGET_ASM_ALIGNED_SI_OP
214 #define TARGET_ASM_ALIGNED_SI_OP NULL
215 #undef TARGET_ASM_INTEGER
216 #define TARGET_ASM_INTEGER arm_assemble_integer
217 #endif
219 #undef TARGET_ASM_FUNCTION_PROLOGUE
220 #define TARGET_ASM_FUNCTION_PROLOGUE arm_output_function_prologue
222 #undef TARGET_ASM_FUNCTION_EPILOGUE
223 #define TARGET_ASM_FUNCTION_EPILOGUE arm_output_function_epilogue
225 #undef TARGET_DEFAULT_TARGET_FLAGS
226 #define TARGET_DEFAULT_TARGET_FLAGS (TARGET_DEFAULT | MASK_SCHED_PROLOG)
227 #undef TARGET_HANDLE_OPTION
228 #define TARGET_HANDLE_OPTION arm_handle_option
230 #undef TARGET_COMP_TYPE_ATTRIBUTES
231 #define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes
233 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
234 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES arm_set_default_type_attributes
236 #undef TARGET_SCHED_ADJUST_COST
237 #define TARGET_SCHED_ADJUST_COST arm_adjust_cost
239 #undef TARGET_ENCODE_SECTION_INFO
240 #ifdef ARM_PE
241 #define TARGET_ENCODE_SECTION_INFO arm_pe_encode_section_info
242 #else
243 #define TARGET_ENCODE_SECTION_INFO arm_encode_section_info
244 #endif
246 #undef TARGET_STRIP_NAME_ENCODING
247 #define TARGET_STRIP_NAME_ENCODING arm_strip_name_encoding
249 #undef TARGET_ASM_INTERNAL_LABEL
250 #define TARGET_ASM_INTERNAL_LABEL arm_internal_label
252 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
253 #define TARGET_FUNCTION_OK_FOR_SIBCALL arm_function_ok_for_sibcall
255 #undef TARGET_ASM_OUTPUT_MI_THUNK
256 #define TARGET_ASM_OUTPUT_MI_THUNK arm_output_mi_thunk
257 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
258 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK default_can_output_mi_thunk_no_vcall
260 /* This will be overridden in arm_override_options. */
261 #undef TARGET_RTX_COSTS
262 #define TARGET_RTX_COSTS arm_slowmul_rtx_costs
263 #undef TARGET_ADDRESS_COST
264 #define TARGET_ADDRESS_COST arm_address_cost
266 #undef TARGET_SHIFT_TRUNCATION_MASK
267 #define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask
268 #undef TARGET_VECTOR_MODE_SUPPORTED_P
269 #define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p
271 #undef TARGET_MACHINE_DEPENDENT_REORG
272 #define TARGET_MACHINE_DEPENDENT_REORG arm_reorg
274 #undef TARGET_INIT_BUILTINS
275 #define TARGET_INIT_BUILTINS arm_init_builtins
276 #undef TARGET_EXPAND_BUILTIN
277 #define TARGET_EXPAND_BUILTIN arm_expand_builtin
279 #undef TARGET_INIT_LIBFUNCS
280 #define TARGET_INIT_LIBFUNCS arm_init_libfuncs
282 #undef TARGET_PROMOTE_FUNCTION_ARGS
283 #define TARGET_PROMOTE_FUNCTION_ARGS hook_bool_tree_true
284 #undef TARGET_PROMOTE_FUNCTION_RETURN
285 #define TARGET_PROMOTE_FUNCTION_RETURN hook_bool_tree_true
286 #undef TARGET_PROMOTE_PROTOTYPES
287 #define TARGET_PROMOTE_PROTOTYPES arm_promote_prototypes
288 #undef TARGET_PASS_BY_REFERENCE
289 #define TARGET_PASS_BY_REFERENCE arm_pass_by_reference
290 #undef TARGET_ARG_PARTIAL_BYTES
291 #define TARGET_ARG_PARTIAL_BYTES arm_arg_partial_bytes
293 #undef TARGET_STRUCT_VALUE_RTX
294 #define TARGET_STRUCT_VALUE_RTX arm_struct_value_rtx
296 #undef TARGET_SETUP_INCOMING_VARARGS
297 #define TARGET_SETUP_INCOMING_VARARGS arm_setup_incoming_varargs
299 #undef TARGET_DEFAULT_SHORT_ENUMS
300 #define TARGET_DEFAULT_SHORT_ENUMS arm_default_short_enums
302 #undef TARGET_ALIGN_ANON_BITFIELD
303 #define TARGET_ALIGN_ANON_BITFIELD arm_align_anon_bitfield
305 #undef TARGET_CXX_GUARD_TYPE
306 #define TARGET_CXX_GUARD_TYPE arm_cxx_guard_type
308 #undef TARGET_CXX_GUARD_MASK_BIT
309 #define TARGET_CXX_GUARD_MASK_BIT arm_cxx_guard_mask_bit
311 #undef TARGET_CXX_GET_COOKIE_SIZE
312 #define TARGET_CXX_GET_COOKIE_SIZE arm_get_cookie_size
314 #undef TARGET_CXX_COOKIE_HAS_SIZE
315 #define TARGET_CXX_COOKIE_HAS_SIZE arm_cookie_has_size
317 #undef TARGET_CXX_CDTOR_RETURNS_THIS
318 #define TARGET_CXX_CDTOR_RETURNS_THIS arm_cxx_cdtor_returns_this
320 #undef TARGET_CXX_KEY_METHOD_MAY_BE_INLINE
321 #define TARGET_CXX_KEY_METHOD_MAY_BE_INLINE arm_cxx_key_method_may_be_inline
323 #undef TARGET_CXX_USE_AEABI_ATEXIT
324 #define TARGET_CXX_USE_AEABI_ATEXIT arm_cxx_use_aeabi_atexit
326 #undef TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY
327 #define TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY \
328 arm_cxx_determine_class_data_visibility
330 #undef TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT
331 #define TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT arm_cxx_class_data_always_comdat
333 #undef TARGET_RETURN_IN_MSB
334 #define TARGET_RETURN_IN_MSB arm_return_in_msb
336 #undef TARGET_MUST_PASS_IN_STACK
337 #define TARGET_MUST_PASS_IN_STACK arm_must_pass_in_stack
339 struct gcc_target targetm = TARGET_INITIALIZER;
341 /* Obstack for minipool constant handling. */
342 static struct obstack minipool_obstack;
343 static char * minipool_startobj;
345 /* The maximum number of insns skipped which
346 will be conditionalised if possible. */
347 static int max_insns_skipped = 5;
349 extern FILE * asm_out_file;
351 /* True if we are currently building a constant table. */
352 int making_const_table;
354 /* Define the information needed to generate branch insns. This is
355 stored from the compare operation. */
356 rtx arm_compare_op0, arm_compare_op1;
358 /* The processor for which instructions should be scheduled. */
359 enum processor_type arm_tune = arm_none;
361 /* Which floating point model to use. */
362 enum arm_fp_model arm_fp_model;
364 /* Which floating point hardware is available. */
365 enum fputype arm_fpu_arch;
367 /* Which floating point hardware to schedule for. */
368 enum fputype arm_fpu_tune;
370 /* Whether to use floating point hardware. */
371 enum float_abi_type arm_float_abi;
373 /* Which ABI to use. */
374 enum arm_abi_type arm_abi;
376 /* Set by the -mfpu=... option. */
377 static const char * target_fpu_name = NULL;
379 /* Set by the -mfpe=... option. */
380 static const char * target_fpe_name = NULL;
382 /* Set by the -mfloat-abi=... option. */
383 static const char * target_float_abi_name = NULL;
385 /* Set by the -mabi=... option. */
386 static const char * target_abi_name = NULL;
388 /* Used to parse -mstructure_size_boundary command line option. */
389 static const char * structure_size_string = NULL;
390 int arm_structure_size_boundary = DEFAULT_STRUCTURE_SIZE_BOUNDARY;
392 /* Used for Thumb call_via trampolines. */
393 rtx thumb_call_via_label[14];
394 static int thumb_call_reg_needed;
396 /* Bit values used to identify processor capabilities. */
397 #define FL_CO_PROC (1 << 0) /* Has external co-processor bus */
398 #define FL_ARCH3M (1 << 1) /* Extended multiply */
399 #define FL_MODE26 (1 << 2) /* 26-bit mode support */
400 #define FL_MODE32 (1 << 3) /* 32-bit mode support */
401 #define FL_ARCH4 (1 << 4) /* Architecture rel 4 */
402 #define FL_ARCH5 (1 << 5) /* Architecture rel 5 */
403 #define FL_THUMB (1 << 6) /* Thumb aware */
404 #define FL_LDSCHED (1 << 7) /* Load scheduling necessary */
405 #define FL_STRONG (1 << 8) /* StrongARM */
406 #define FL_ARCH5E (1 << 9) /* DSP extensions to v5 */
407 #define FL_XSCALE (1 << 10) /* XScale */
408 #define FL_CIRRUS (1 << 11) /* Cirrus/DSP. */
409 #define FL_ARCH6 (1 << 12) /* Architecture rel 6. Adds
410 media instructions. */
411 #define FL_VFPV2 (1 << 13) /* Vector Floating Point V2. */
412 #define FL_WBUF (1 << 14) /* Schedule for write buffer ops.
413 Note: ARM6 & 7 derivatives only. */
415 #define FL_IWMMXT (1 << 29) /* XScale v2 or "Intel Wireless MMX technology". */
417 #define FL_FOR_ARCH2 0
418 #define FL_FOR_ARCH3 FL_MODE32
419 #define FL_FOR_ARCH3M (FL_FOR_ARCH3 | FL_ARCH3M)
420 #define FL_FOR_ARCH4 (FL_FOR_ARCH3M | FL_ARCH4)
421 #define FL_FOR_ARCH4T (FL_FOR_ARCH4 | FL_THUMB)
422 #define FL_FOR_ARCH5 (FL_FOR_ARCH4 | FL_ARCH5)
423 #define FL_FOR_ARCH5T (FL_FOR_ARCH5 | FL_THUMB)
424 #define FL_FOR_ARCH5E (FL_FOR_ARCH5 | FL_ARCH5E)
425 #define FL_FOR_ARCH5TE (FL_FOR_ARCH5E | FL_THUMB)
426 #define FL_FOR_ARCH5TEJ FL_FOR_ARCH5TE
427 #define FL_FOR_ARCH6 (FL_FOR_ARCH5TE | FL_ARCH6)
428 #define FL_FOR_ARCH6J FL_FOR_ARCH6
429 #define FL_FOR_ARCH6K FL_FOR_ARCH6
430 #define FL_FOR_ARCH6Z FL_FOR_ARCH6
431 #define FL_FOR_ARCH6ZK FL_FOR_ARCH6
433 /* The bits in this mask specify which
434 instructions we are allowed to generate. */
435 static unsigned long insn_flags = 0;
437 /* The bits in this mask specify which instruction scheduling options should
438 be used. */
439 static unsigned long tune_flags = 0;
441 /* The following are used in the arm.md file as equivalents to bits
442 in the above two flag variables. */
444 /* Nonzero if this chip supports the ARM Architecture 3M extensions. */
445 int arm_arch3m = 0;
447 /* Nonzero if this chip supports the ARM Architecture 4 extensions. */
448 int arm_arch4 = 0;
450 /* Nonzero if this chip supports the ARM Architecture 4t extensions. */
451 int arm_arch4t = 0;
453 /* Nonzero if this chip supports the ARM Architecture 5 extensions. */
454 int arm_arch5 = 0;
456 /* Nonzero if this chip supports the ARM Architecture 5E extensions. */
457 int arm_arch5e = 0;
459 /* Nonzero if this chip supports the ARM Architecture 6 extensions. */
460 int arm_arch6 = 0;
462 /* Nonzero if this chip can benefit from load scheduling. */
463 int arm_ld_sched = 0;
465 /* Nonzero if this chip is a StrongARM. */
466 int arm_tune_strongarm = 0;
468 /* Nonzero if this chip is a Cirrus variant. */
469 int arm_arch_cirrus = 0;
471 /* Nonzero if this chip supports Intel Wireless MMX technology. */
472 int arm_arch_iwmmxt = 0;
474 /* Nonzero if this chip is an XScale. */
475 int arm_arch_xscale = 0;
477 /* Nonzero if tuning for XScale */
478 int arm_tune_xscale = 0;
480 /* Nonzero if we want to tune for stores that access the write-buffer.
481 This typically means an ARM6 or ARM7 with MMU or MPU. */
482 int arm_tune_wbuf = 0;
484 /* Nonzero if generating Thumb instructions. */
485 int thumb_code = 0;
487 /* Nonzero if we should define __THUMB_INTERWORK__ in the
488 preprocessor.
489 XXX This is a bit of a hack, it's intended to help work around
490 problems in GLD which doesn't understand that armv5t code is
491 interworking clean. */
492 int arm_cpp_interwork = 0;
494 /* In case of a PRE_INC, POST_INC, PRE_DEC, POST_DEC memory reference, we
495 must report the mode of the memory reference from PRINT_OPERAND to
496 PRINT_OPERAND_ADDRESS. */
497 enum machine_mode output_memory_reference_mode;
499 /* The register number to be used for the PIC offset register. */
500 static const char * arm_pic_register_string = NULL;
501 int arm_pic_register = INVALID_REGNUM;
503 /* Set to 1 when a return insn is output, this means that the epilogue
504 is not needed. */
505 int return_used_this_function;
507 /* Set to 1 after arm_reorg has started. Reset to start at the start of
508 the next function. */
509 static int after_arm_reorg = 0;
511 /* The maximum number of insns to be used when loading a constant. */
512 static int arm_constant_limit = 3;
514 /* For an explanation of these variables, see final_prescan_insn below. */
515 int arm_ccfsm_state;
516 enum arm_cond_code arm_current_cc;
517 rtx arm_target_insn;
518 int arm_target_label;
520 /* The condition codes of the ARM, and the inverse function. */
521 static const char * const arm_condition_codes[] =
523 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
524 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
527 #define streq(string1, string2) (strcmp (string1, string2) == 0)
529 /* Initialization code. */
531 struct processors
533 const char *const name;
534 enum processor_type core;
535 const char *arch;
536 const unsigned long flags;
537 bool (* rtx_costs) (rtx, int, int, int *);
540 /* Not all of these give usefully different compilation alternatives,
541 but there is no simple way of generalizing them. */
542 static const struct processors all_cores[] =
544 /* ARM Cores */
545 #define ARM_CORE(NAME, IDENT, ARCH, FLAGS, COSTS) \
546 {NAME, arm_none, #ARCH, FLAGS | FL_FOR_ARCH##ARCH, arm_##COSTS##_rtx_costs},
547 #include "arm-cores.def"
548 #undef ARM_CORE
549 {NULL, arm_none, NULL, 0, NULL}
552 static const struct processors all_architectures[] =
554 /* ARM Architectures */
555 /* We don't specify rtx_costs here as it will be figured out
556 from the core. */
558 {"armv2", arm2, "2", FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH2, NULL},
559 {"armv2a", arm2, "2", FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH2, NULL},
560 {"armv3", arm6, "3", FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH3, NULL},
561 {"armv3m", arm7m, "3M", FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH3M, NULL},
562 {"armv4", arm7tdmi, "4", FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH4, NULL},
563 /* Strictly, FL_MODE26 is a permitted option for v4t, but there are no
564 implementations that support it, so we will leave it out for now. */
565 {"armv4t", arm7tdmi, "4T", FL_CO_PROC | FL_FOR_ARCH4T, NULL},
566 {"armv5", arm10tdmi, "5", FL_CO_PROC | FL_FOR_ARCH5, NULL},
567 {"armv5t", arm10tdmi, "5T", FL_CO_PROC | FL_FOR_ARCH5T, NULL},
568 {"armv5e", arm1026ejs, "5E", FL_CO_PROC | FL_FOR_ARCH5E, NULL},
569 {"armv5te", arm1026ejs, "5TE", FL_CO_PROC | FL_FOR_ARCH5TE, NULL},
570 {"armv6", arm1136js, "6", FL_CO_PROC | FL_FOR_ARCH6, NULL},
571 {"armv6j", arm1136js, "6J", FL_CO_PROC | FL_FOR_ARCH6J, NULL},
572 {"armv6k", mpcore, "6K", FL_CO_PROC | FL_FOR_ARCH6K, NULL},
573 {"armv6z", arm1176jzs, "6Z", FL_CO_PROC | FL_FOR_ARCH6Z, NULL},
574 {"armv6zk", arm1176jzs, "6ZK", FL_CO_PROC | FL_FOR_ARCH6ZK, NULL},
575 {"ep9312", ep9312, "4T", FL_LDSCHED | FL_CIRRUS | FL_FOR_ARCH4, NULL},
576 {"iwmmxt", iwmmxt, "5TE", FL_LDSCHED | FL_STRONG | FL_FOR_ARCH5TE | FL_XSCALE | FL_IWMMXT , NULL},
577 {NULL, arm_none, NULL, 0 , NULL}
580 struct arm_cpu_select
582 const char * string;
583 const char * name;
584 const struct processors * processors;
587 /* This is a magic structure. The 'string' field is magically filled in
588 with a pointer to the value specified by the user on the command line
589 assuming that the user has specified such a value. */
591 static struct arm_cpu_select arm_select[] =
593 /* string name processors */
594 { NULL, "-mcpu=", all_cores },
595 { NULL, "-march=", all_architectures },
596 { NULL, "-mtune=", all_cores }
599 /* Defines representing the indexes into the above table. */
600 #define ARM_OPT_SET_CPU 0
601 #define ARM_OPT_SET_ARCH 1
602 #define ARM_OPT_SET_TUNE 2
604 /* The name of the proprocessor macro to define for this architecture. */
606 char arm_arch_name[] = "__ARM_ARCH_0UNK__";
608 struct fpu_desc
610 const char * name;
611 enum fputype fpu;
615 /* Available values for for -mfpu=. */
617 static const struct fpu_desc all_fpus[] =
619 {"fpa", FPUTYPE_FPA},
620 {"fpe2", FPUTYPE_FPA_EMU2},
621 {"fpe3", FPUTYPE_FPA_EMU2},
622 {"maverick", FPUTYPE_MAVERICK},
623 {"vfp", FPUTYPE_VFP}
627 /* Floating point models used by the different hardware.
628 See fputype in arm.h. */
630 static const enum fputype fp_model_for_fpu[] =
632 /* No FP hardware. */
633 ARM_FP_MODEL_UNKNOWN, /* FPUTYPE_NONE */
634 ARM_FP_MODEL_FPA, /* FPUTYPE_FPA */
635 ARM_FP_MODEL_FPA, /* FPUTYPE_FPA_EMU2 */
636 ARM_FP_MODEL_FPA, /* FPUTYPE_FPA_EMU3 */
637 ARM_FP_MODEL_MAVERICK, /* FPUTYPE_MAVERICK */
638 ARM_FP_MODEL_VFP /* FPUTYPE_VFP */
642 struct float_abi
644 const char * name;
645 enum float_abi_type abi_type;
649 /* Available values for -mfloat-abi=. */
651 static const struct float_abi all_float_abis[] =
653 {"soft", ARM_FLOAT_ABI_SOFT},
654 {"softfp", ARM_FLOAT_ABI_SOFTFP},
655 {"hard", ARM_FLOAT_ABI_HARD}
659 struct abi_name
661 const char *name;
662 enum arm_abi_type abi_type;
666 /* Available values for -mabi=. */
668 static const struct abi_name arm_all_abis[] =
670 {"apcs-gnu", ARM_ABI_APCS},
671 {"atpcs", ARM_ABI_ATPCS},
672 {"aapcs", ARM_ABI_AAPCS},
673 {"iwmmxt", ARM_ABI_IWMMXT}
676 /* Return the number of bits set in VALUE. */
677 static unsigned
678 bit_count (unsigned long value)
680 unsigned long count = 0;
682 while (value)
684 count++;
685 value &= value - 1; /* Clear the least-significant set bit. */
688 return count;
691 /* Set up library functions unique to ARM. */
693 static void
694 arm_init_libfuncs (void)
696 /* There are no special library functions unless we are using the
697 ARM BPABI. */
698 if (!TARGET_BPABI)
699 return;
701 /* The functions below are described in Section 4 of the "Run-Time
702 ABI for the ARM architecture", Version 1.0. */
704 /* Double-precision floating-point arithmetic. Table 2. */
705 set_optab_libfunc (add_optab, DFmode, "__aeabi_dadd");
706 set_optab_libfunc (sdiv_optab, DFmode, "__aeabi_ddiv");
707 set_optab_libfunc (smul_optab, DFmode, "__aeabi_dmul");
708 set_optab_libfunc (neg_optab, DFmode, "__aeabi_dneg");
709 set_optab_libfunc (sub_optab, DFmode, "__aeabi_dsub");
711 /* Double-precision comparisons. Table 3. */
712 set_optab_libfunc (eq_optab, DFmode, "__aeabi_dcmpeq");
713 set_optab_libfunc (ne_optab, DFmode, NULL);
714 set_optab_libfunc (lt_optab, DFmode, "__aeabi_dcmplt");
715 set_optab_libfunc (le_optab, DFmode, "__aeabi_dcmple");
716 set_optab_libfunc (ge_optab, DFmode, "__aeabi_dcmpge");
717 set_optab_libfunc (gt_optab, DFmode, "__aeabi_dcmpgt");
718 set_optab_libfunc (unord_optab, DFmode, "__aeabi_dcmpun");
720 /* Single-precision floating-point arithmetic. Table 4. */
721 set_optab_libfunc (add_optab, SFmode, "__aeabi_fadd");
722 set_optab_libfunc (sdiv_optab, SFmode, "__aeabi_fdiv");
723 set_optab_libfunc (smul_optab, SFmode, "__aeabi_fmul");
724 set_optab_libfunc (neg_optab, SFmode, "__aeabi_fneg");
725 set_optab_libfunc (sub_optab, SFmode, "__aeabi_fsub");
727 /* Single-precision comparisons. Table 5. */
728 set_optab_libfunc (eq_optab, SFmode, "__aeabi_fcmpeq");
729 set_optab_libfunc (ne_optab, SFmode, NULL);
730 set_optab_libfunc (lt_optab, SFmode, "__aeabi_fcmplt");
731 set_optab_libfunc (le_optab, SFmode, "__aeabi_fcmple");
732 set_optab_libfunc (ge_optab, SFmode, "__aeabi_fcmpge");
733 set_optab_libfunc (gt_optab, SFmode, "__aeabi_fcmpgt");
734 set_optab_libfunc (unord_optab, SFmode, "__aeabi_fcmpun");
736 /* Floating-point to integer conversions. Table 6. */
737 set_conv_libfunc (sfix_optab, SImode, DFmode, "__aeabi_d2iz");
738 set_conv_libfunc (ufix_optab, SImode, DFmode, "__aeabi_d2uiz");
739 set_conv_libfunc (sfix_optab, DImode, DFmode, "__aeabi_d2lz");
740 set_conv_libfunc (ufix_optab, DImode, DFmode, "__aeabi_d2ulz");
741 set_conv_libfunc (sfix_optab, SImode, SFmode, "__aeabi_f2iz");
742 set_conv_libfunc (ufix_optab, SImode, SFmode, "__aeabi_f2uiz");
743 set_conv_libfunc (sfix_optab, DImode, SFmode, "__aeabi_f2lz");
744 set_conv_libfunc (ufix_optab, DImode, SFmode, "__aeabi_f2ulz");
746 /* Conversions between floating types. Table 7. */
747 set_conv_libfunc (trunc_optab, SFmode, DFmode, "__aeabi_d2f");
748 set_conv_libfunc (sext_optab, DFmode, SFmode, "__aeabi_f2d");
750 /* Integer to floating-point conversions. Table 8. */
751 set_conv_libfunc (sfloat_optab, DFmode, SImode, "__aeabi_i2d");
752 set_conv_libfunc (ufloat_optab, DFmode, SImode, "__aeabi_ui2d");
753 set_conv_libfunc (sfloat_optab, DFmode, DImode, "__aeabi_l2d");
754 set_conv_libfunc (ufloat_optab, DFmode, DImode, "__aeabi_ul2d");
755 set_conv_libfunc (sfloat_optab, SFmode, SImode, "__aeabi_i2f");
756 set_conv_libfunc (ufloat_optab, SFmode, SImode, "__aeabi_ui2f");
757 set_conv_libfunc (sfloat_optab, SFmode, DImode, "__aeabi_l2f");
758 set_conv_libfunc (ufloat_optab, SFmode, DImode, "__aeabi_ul2f");
760 /* Long long. Table 9. */
761 set_optab_libfunc (smul_optab, DImode, "__aeabi_lmul");
762 set_optab_libfunc (sdivmod_optab, DImode, "__aeabi_ldivmod");
763 set_optab_libfunc (udivmod_optab, DImode, "__aeabi_uldivmod");
764 set_optab_libfunc (ashl_optab, DImode, "__aeabi_llsl");
765 set_optab_libfunc (lshr_optab, DImode, "__aeabi_llsr");
766 set_optab_libfunc (ashr_optab, DImode, "__aeabi_lasr");
767 set_optab_libfunc (cmp_optab, DImode, "__aeabi_lcmp");
768 set_optab_libfunc (ucmp_optab, DImode, "__aeabi_ulcmp");
770 /* Integer (32/32->32) division. \S 4.3.1. */
771 set_optab_libfunc (sdivmod_optab, SImode, "__aeabi_idivmod");
772 set_optab_libfunc (udivmod_optab, SImode, "__aeabi_uidivmod");
774 /* The divmod functions are designed so that they can be used for
775 plain division, even though they return both the quotient and the
776 remainder. The quotient is returned in the usual location (i.e.,
777 r0 for SImode, {r0, r1} for DImode), just as would be expected
778 for an ordinary division routine. Because the AAPCS calling
779 conventions specify that all of { r0, r1, r2, r3 } are
780 callee-saved registers, there is no need to tell the compiler
781 explicitly that those registers are clobbered by these
782 routines. */
783 set_optab_libfunc (sdiv_optab, DImode, "__aeabi_ldivmod");
784 set_optab_libfunc (udiv_optab, DImode, "__aeabi_uldivmod");
785 set_optab_libfunc (sdiv_optab, SImode, "__aeabi_idivmod");
786 set_optab_libfunc (udiv_optab, SImode, "__aeabi_uidivmod");
788 /* We don't have mod libcalls. Fortunately gcc knows how to use the
789 divmod libcalls instead. */
790 set_optab_libfunc (smod_optab, DImode, NULL);
791 set_optab_libfunc (umod_optab, DImode, NULL);
792 set_optab_libfunc (smod_optab, SImode, NULL);
793 set_optab_libfunc (umod_optab, SImode, NULL);
796 /* Implement TARGET_HANDLE_OPTION. */
798 static bool
799 arm_handle_option (size_t code, const char *arg, int value ATTRIBUTE_UNUSED)
801 switch (code)
803 case OPT_mabi_:
804 target_abi_name = arg;
805 return true;
807 case OPT_march_:
808 arm_select[1].string = arg;
809 return true;
811 case OPT_mcpu_:
812 arm_select[0].string = arg;
813 return true;
815 case OPT_mfloat_abi_:
816 target_float_abi_name = arg;
817 return true;
819 case OPT_mfp_:
820 case OPT_mfpe_:
821 target_fpe_name = arg;
822 return true;
824 case OPT_mfpu_:
825 target_fpu_name = arg;
826 return true;
828 case OPT_mhard_float:
829 target_float_abi_name = "hard";
830 return true;
832 case OPT_mpic_register_:
833 arm_pic_register_string = arg;
834 return true;
836 case OPT_msoft_float:
837 target_float_abi_name = "soft";
838 return true;
840 case OPT_mstructure_size_boundary_:
841 structure_size_string = arg;
842 return true;
844 case OPT_mtune_:
845 arm_select[2].string = arg;
846 return true;
848 default:
849 return true;
853 /* Fix up any incompatible options that the user has specified.
854 This has now turned into a maze. */
855 void
856 arm_override_options (void)
858 unsigned i;
859 enum processor_type target_arch_cpu = arm_none;
861 /* Set up the flags based on the cpu/architecture selected by the user. */
862 for (i = ARRAY_SIZE (arm_select); i--;)
864 struct arm_cpu_select * ptr = arm_select + i;
866 if (ptr->string != NULL && ptr->string[0] != '\0')
868 const struct processors * sel;
870 for (sel = ptr->processors; sel->name != NULL; sel++)
871 if (streq (ptr->string, sel->name))
873 /* Set the architecture define. */
874 if (i != ARM_OPT_SET_TUNE)
875 sprintf (arm_arch_name, "__ARM_ARCH_%s__", sel->arch);
877 /* Determine the processor core for which we should
878 tune code-generation. */
879 if (/* -mcpu= is a sensible default. */
880 i == ARM_OPT_SET_CPU
881 /* -mtune= overrides -mcpu= and -march=. */
882 || i == ARM_OPT_SET_TUNE)
883 arm_tune = (enum processor_type) (sel - ptr->processors);
885 /* Remember the CPU associated with this architecture.
886 If no other option is used to set the CPU type,
887 we'll use this to guess the most suitable tuning
888 options. */
889 if (i == ARM_OPT_SET_ARCH)
890 target_arch_cpu = sel->core;
892 if (i != ARM_OPT_SET_TUNE)
894 /* If we have been given an architecture and a processor
895 make sure that they are compatible. We only generate
896 a warning though, and we prefer the CPU over the
897 architecture. */
898 if (insn_flags != 0 && (insn_flags ^ sel->flags))
899 warning (0, "switch -mcpu=%s conflicts with -march= switch",
900 ptr->string);
902 insn_flags = sel->flags;
905 break;
908 if (sel->name == NULL)
909 error ("bad value (%s) for %s switch", ptr->string, ptr->name);
913 /* Guess the tuning options from the architecture if necessary. */
914 if (arm_tune == arm_none)
915 arm_tune = target_arch_cpu;
917 /* If the user did not specify a processor, choose one for them. */
918 if (insn_flags == 0)
920 const struct processors * sel;
921 unsigned int sought;
922 enum processor_type cpu;
924 cpu = TARGET_CPU_DEFAULT;
925 if (cpu == arm_none)
927 #ifdef SUBTARGET_CPU_DEFAULT
928 /* Use the subtarget default CPU if none was specified by
929 configure. */
930 cpu = SUBTARGET_CPU_DEFAULT;
931 #endif
932 /* Default to ARM6. */
933 if (cpu == arm_none)
934 cpu = arm6;
936 sel = &all_cores[cpu];
938 insn_flags = sel->flags;
940 /* Now check to see if the user has specified some command line
941 switch that require certain abilities from the cpu. */
942 sought = 0;
944 if (TARGET_INTERWORK || TARGET_THUMB)
946 sought |= (FL_THUMB | FL_MODE32);
948 /* There are no ARM processors that support both APCS-26 and
949 interworking. Therefore we force FL_MODE26 to be removed
950 from insn_flags here (if it was set), so that the search
951 below will always be able to find a compatible processor. */
952 insn_flags &= ~FL_MODE26;
955 if (sought != 0 && ((sought & insn_flags) != sought))
957 /* Try to locate a CPU type that supports all of the abilities
958 of the default CPU, plus the extra abilities requested by
959 the user. */
960 for (sel = all_cores; sel->name != NULL; sel++)
961 if ((sel->flags & sought) == (sought | insn_flags))
962 break;
964 if (sel->name == NULL)
966 unsigned current_bit_count = 0;
967 const struct processors * best_fit = NULL;
969 /* Ideally we would like to issue an error message here
970 saying that it was not possible to find a CPU compatible
971 with the default CPU, but which also supports the command
972 line options specified by the programmer, and so they
973 ought to use the -mcpu=<name> command line option to
974 override the default CPU type.
976 If we cannot find a cpu that has both the
977 characteristics of the default cpu and the given
978 command line options we scan the array again looking
979 for a best match. */
980 for (sel = all_cores; sel->name != NULL; sel++)
981 if ((sel->flags & sought) == sought)
983 unsigned count;
985 count = bit_count (sel->flags & insn_flags);
987 if (count >= current_bit_count)
989 best_fit = sel;
990 current_bit_count = count;
994 gcc_assert (best_fit);
995 sel = best_fit;
998 insn_flags = sel->flags;
1000 sprintf (arm_arch_name, "__ARM_ARCH_%s__", sel->arch);
1001 if (arm_tune == arm_none)
1002 arm_tune = (enum processor_type) (sel - all_cores);
1005 /* The processor for which we should tune should now have been
1006 chosen. */
1007 gcc_assert (arm_tune != arm_none);
1009 tune_flags = all_cores[(int)arm_tune].flags;
1010 if (optimize_size)
1011 targetm.rtx_costs = arm_size_rtx_costs;
1012 else
1013 targetm.rtx_costs = all_cores[(int)arm_tune].rtx_costs;
1015 /* Make sure that the processor choice does not conflict with any of the
1016 other command line choices. */
1017 if (TARGET_INTERWORK && !(insn_flags & FL_THUMB))
1019 warning (0, "target CPU does not support interworking" );
1020 target_flags &= ~MASK_INTERWORK;
1023 if (TARGET_THUMB && !(insn_flags & FL_THUMB))
1025 warning (0, "target CPU does not support THUMB instructions");
1026 target_flags &= ~MASK_THUMB;
1029 if (TARGET_APCS_FRAME && TARGET_THUMB)
1031 /* warning (0, "ignoring -mapcs-frame because -mthumb was used"); */
1032 target_flags &= ~MASK_APCS_FRAME;
1035 /* TARGET_BACKTRACE calls leaf_function_p, which causes a crash if done
1036 from here where no function is being compiled currently. */
1037 if ((TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME) && TARGET_ARM)
1038 warning (0, "enabling backtrace support is only meaningful when compiling for the Thumb");
1040 if (TARGET_ARM && TARGET_CALLEE_INTERWORKING)
1041 warning (0, "enabling callee interworking support is only meaningful when compiling for the Thumb");
1043 if (TARGET_ARM && TARGET_CALLER_INTERWORKING)
1044 warning (0, "enabling caller interworking support is only meaningful when compiling for the Thumb");
1046 if (TARGET_APCS_STACK && !TARGET_APCS_FRAME)
1048 warning (0, "-mapcs-stack-check incompatible with -mno-apcs-frame");
1049 target_flags |= MASK_APCS_FRAME;
1052 if (TARGET_POKE_FUNCTION_NAME)
1053 target_flags |= MASK_APCS_FRAME;
1055 if (TARGET_APCS_REENT && flag_pic)
1056 error ("-fpic and -mapcs-reent are incompatible");
1058 if (TARGET_APCS_REENT)
1059 warning (0, "APCS reentrant code not supported. Ignored");
1061 /* If this target is normally configured to use APCS frames, warn if they
1062 are turned off and debugging is turned on. */
1063 if (TARGET_ARM
1064 && write_symbols != NO_DEBUG
1065 && !TARGET_APCS_FRAME
1066 && (TARGET_DEFAULT & MASK_APCS_FRAME))
1067 warning (0, "-g with -mno-apcs-frame may not give sensible debugging");
1069 /* If stack checking is disabled, we can use r10 as the PIC register,
1070 which keeps r9 available. */
1071 if (flag_pic)
1072 arm_pic_register = TARGET_APCS_STACK ? 9 : 10;
1074 if (TARGET_APCS_FLOAT)
1075 warning (0, "passing floating point arguments in fp regs not yet supported");
1077 /* Initialize boolean versions of the flags, for use in the arm.md file. */
1078 arm_arch3m = (insn_flags & FL_ARCH3M) != 0;
1079 arm_arch4 = (insn_flags & FL_ARCH4) != 0;
1080 arm_arch4t = arm_arch4 & ((insn_flags & FL_THUMB) != 0);
1081 arm_arch5 = (insn_flags & FL_ARCH5) != 0;
1082 arm_arch5e = (insn_flags & FL_ARCH5E) != 0;
1083 arm_arch6 = (insn_flags & FL_ARCH6) != 0;
1084 arm_arch_xscale = (insn_flags & FL_XSCALE) != 0;
1085 arm_arch_cirrus = (insn_flags & FL_CIRRUS) != 0;
1087 arm_ld_sched = (tune_flags & FL_LDSCHED) != 0;
1088 arm_tune_strongarm = (tune_flags & FL_STRONG) != 0;
1089 thumb_code = (TARGET_ARM == 0);
1090 arm_tune_wbuf = (tune_flags & FL_WBUF) != 0;
1091 arm_tune_xscale = (tune_flags & FL_XSCALE) != 0;
1092 arm_arch_iwmmxt = (insn_flags & FL_IWMMXT) != 0;
1094 /* V5 code we generate is completely interworking capable, so we turn off
1095 TARGET_INTERWORK here to avoid many tests later on. */
1097 /* XXX However, we must pass the right pre-processor defines to CPP
1098 or GLD can get confused. This is a hack. */
1099 if (TARGET_INTERWORK)
1100 arm_cpp_interwork = 1;
1102 if (arm_arch5)
1103 target_flags &= ~MASK_INTERWORK;
1105 if (target_abi_name)
1107 for (i = 0; i < ARRAY_SIZE (arm_all_abis); i++)
1109 if (streq (arm_all_abis[i].name, target_abi_name))
1111 arm_abi = arm_all_abis[i].abi_type;
1112 break;
1115 if (i == ARRAY_SIZE (arm_all_abis))
1116 error ("invalid ABI option: -mabi=%s", target_abi_name);
1118 else
1119 arm_abi = ARM_DEFAULT_ABI;
1121 if (TARGET_IWMMXT && !ARM_DOUBLEWORD_ALIGN)
1122 error ("iwmmxt requires an AAPCS compatible ABI for proper operation");
1124 if (TARGET_IWMMXT_ABI && !TARGET_IWMMXT)
1125 error ("iwmmxt abi requires an iwmmxt capable cpu");
1127 arm_fp_model = ARM_FP_MODEL_UNKNOWN;
1128 if (target_fpu_name == NULL && target_fpe_name != NULL)
1130 if (streq (target_fpe_name, "2"))
1131 target_fpu_name = "fpe2";
1132 else if (streq (target_fpe_name, "3"))
1133 target_fpu_name = "fpe3";
1134 else
1135 error ("invalid floating point emulation option: -mfpe=%s",
1136 target_fpe_name);
1138 if (target_fpu_name != NULL)
1140 /* The user specified a FPU. */
1141 for (i = 0; i < ARRAY_SIZE (all_fpus); i++)
1143 if (streq (all_fpus[i].name, target_fpu_name))
1145 arm_fpu_arch = all_fpus[i].fpu;
1146 arm_fpu_tune = arm_fpu_arch;
1147 arm_fp_model = fp_model_for_fpu[arm_fpu_arch];
1148 break;
1151 if (arm_fp_model == ARM_FP_MODEL_UNKNOWN)
1152 error ("invalid floating point option: -mfpu=%s", target_fpu_name);
1154 else
1156 #ifdef FPUTYPE_DEFAULT
1157 /* Use the default if it is specified for this platform. */
1158 arm_fpu_arch = FPUTYPE_DEFAULT;
1159 arm_fpu_tune = FPUTYPE_DEFAULT;
1160 #else
1161 /* Pick one based on CPU type. */
1162 /* ??? Some targets assume FPA is the default.
1163 if ((insn_flags & FL_VFP) != 0)
1164 arm_fpu_arch = FPUTYPE_VFP;
1165 else
1167 if (arm_arch_cirrus)
1168 arm_fpu_arch = FPUTYPE_MAVERICK;
1169 else
1170 arm_fpu_arch = FPUTYPE_FPA_EMU2;
1171 #endif
1172 if (tune_flags & FL_CO_PROC && arm_fpu_arch == FPUTYPE_FPA_EMU2)
1173 arm_fpu_tune = FPUTYPE_FPA;
1174 else
1175 arm_fpu_tune = arm_fpu_arch;
1176 arm_fp_model = fp_model_for_fpu[arm_fpu_arch];
1177 gcc_assert (arm_fp_model != ARM_FP_MODEL_UNKNOWN);
1180 if (target_float_abi_name != NULL)
1182 /* The user specified a FP ABI. */
1183 for (i = 0; i < ARRAY_SIZE (all_float_abis); i++)
1185 if (streq (all_float_abis[i].name, target_float_abi_name))
1187 arm_float_abi = all_float_abis[i].abi_type;
1188 break;
1191 if (i == ARRAY_SIZE (all_float_abis))
1192 error ("invalid floating point abi: -mfloat-abi=%s",
1193 target_float_abi_name);
1195 else
1196 arm_float_abi = TARGET_DEFAULT_FLOAT_ABI;
1198 if (arm_float_abi == ARM_FLOAT_ABI_HARD && TARGET_VFP)
1199 sorry ("-mfloat-abi=hard and VFP");
1201 /* If soft-float is specified then don't use FPU. */
1202 if (TARGET_SOFT_FLOAT)
1203 arm_fpu_arch = FPUTYPE_NONE;
1205 /* For arm2/3 there is no need to do any scheduling if there is only
1206 a floating point emulator, or we are doing software floating-point. */
1207 if ((TARGET_SOFT_FLOAT
1208 || arm_fpu_tune == FPUTYPE_FPA_EMU2
1209 || arm_fpu_tune == FPUTYPE_FPA_EMU3)
1210 && (tune_flags & FL_MODE32) == 0)
1211 flag_schedule_insns = flag_schedule_insns_after_reload = 0;
1213 /* Override the default structure alignment for AAPCS ABI. */
1214 if (arm_abi == ARM_ABI_AAPCS)
1215 arm_structure_size_boundary = 8;
1217 if (structure_size_string != NULL)
1219 int size = strtol (structure_size_string, NULL, 0);
1221 if (size == 8 || size == 32
1222 || (ARM_DOUBLEWORD_ALIGN && size == 64))
1223 arm_structure_size_boundary = size;
1224 else
1225 warning (0, "structure size boundary can only be set to %s",
1226 ARM_DOUBLEWORD_ALIGN ? "8, 32 or 64": "8 or 32");
1229 if (arm_pic_register_string != NULL)
1231 int pic_register = decode_reg_name (arm_pic_register_string);
1233 if (!flag_pic)
1234 warning (0, "-mpic-register= is useless without -fpic");
1236 /* Prevent the user from choosing an obviously stupid PIC register. */
1237 else if (pic_register < 0 || call_used_regs[pic_register]
1238 || pic_register == HARD_FRAME_POINTER_REGNUM
1239 || pic_register == STACK_POINTER_REGNUM
1240 || pic_register >= PC_REGNUM)
1241 error ("unable to use '%s' for PIC register", arm_pic_register_string);
1242 else
1243 arm_pic_register = pic_register;
1246 if (TARGET_THUMB && flag_schedule_insns)
1248 /* Don't warn since it's on by default in -O2. */
1249 flag_schedule_insns = 0;
1252 if (optimize_size)
1254 /* There's some dispute as to whether this should be 1 or 2. However,
1255 experiments seem to show that in pathological cases a setting of
1256 1 degrades less severely than a setting of 2. This could change if
1257 other parts of the compiler change their behavior. */
1258 arm_constant_limit = 1;
1260 /* If optimizing for size, bump the number of instructions that we
1261 are prepared to conditionally execute (even on a StrongARM). */
1262 max_insns_skipped = 6;
1264 else
1266 /* For processors with load scheduling, it never costs more than
1267 2 cycles to load a constant, and the load scheduler may well
1268 reduce that to 1. */
1269 if (arm_ld_sched)
1270 arm_constant_limit = 1;
1272 /* On XScale the longer latency of a load makes it more difficult
1273 to achieve a good schedule, so it's faster to synthesize
1274 constants that can be done in two insns. */
1275 if (arm_tune_xscale)
1276 arm_constant_limit = 2;
1278 /* StrongARM has early execution of branches, so a sequence
1279 that is worth skipping is shorter. */
1280 if (arm_tune_strongarm)
1281 max_insns_skipped = 3;
1284 /* Register global variables with the garbage collector. */
1285 arm_add_gc_roots ();
1288 static void
1289 arm_add_gc_roots (void)
1291 gcc_obstack_init(&minipool_obstack);
1292 minipool_startobj = (char *) obstack_alloc (&minipool_obstack, 0);
1295 /* A table of known ARM exception types.
1296 For use with the interrupt function attribute. */
1298 typedef struct
1300 const char *const arg;
1301 const unsigned long return_value;
1303 isr_attribute_arg;
1305 static const isr_attribute_arg isr_attribute_args [] =
1307 { "IRQ", ARM_FT_ISR },
1308 { "irq", ARM_FT_ISR },
1309 { "FIQ", ARM_FT_FIQ },
1310 { "fiq", ARM_FT_FIQ },
1311 { "ABORT", ARM_FT_ISR },
1312 { "abort", ARM_FT_ISR },
1313 { "ABORT", ARM_FT_ISR },
1314 { "abort", ARM_FT_ISR },
1315 { "UNDEF", ARM_FT_EXCEPTION },
1316 { "undef", ARM_FT_EXCEPTION },
1317 { "SWI", ARM_FT_EXCEPTION },
1318 { "swi", ARM_FT_EXCEPTION },
1319 { NULL, ARM_FT_NORMAL }
1322 /* Returns the (interrupt) function type of the current
1323 function, or ARM_FT_UNKNOWN if the type cannot be determined. */
1325 static unsigned long
1326 arm_isr_value (tree argument)
1328 const isr_attribute_arg * ptr;
1329 const char * arg;
1331 /* No argument - default to IRQ. */
1332 if (argument == NULL_TREE)
1333 return ARM_FT_ISR;
1335 /* Get the value of the argument. */
1336 if (TREE_VALUE (argument) == NULL_TREE
1337 || TREE_CODE (TREE_VALUE (argument)) != STRING_CST)
1338 return ARM_FT_UNKNOWN;
1340 arg = TREE_STRING_POINTER (TREE_VALUE (argument));
1342 /* Check it against the list of known arguments. */
1343 for (ptr = isr_attribute_args; ptr->arg != NULL; ptr++)
1344 if (streq (arg, ptr->arg))
1345 return ptr->return_value;
1347 /* An unrecognized interrupt type. */
1348 return ARM_FT_UNKNOWN;
1351 /* Computes the type of the current function. */
1353 static unsigned long
1354 arm_compute_func_type (void)
1356 unsigned long type = ARM_FT_UNKNOWN;
1357 tree a;
1358 tree attr;
1360 gcc_assert (TREE_CODE (current_function_decl) == FUNCTION_DECL);
1362 /* Decide if the current function is volatile. Such functions
1363 never return, and many memory cycles can be saved by not storing
1364 register values that will never be needed again. This optimization
1365 was added to speed up context switching in a kernel application. */
1366 if (optimize > 0
1367 && TREE_NOTHROW (current_function_decl)
1368 && TREE_THIS_VOLATILE (current_function_decl))
1369 type |= ARM_FT_VOLATILE;
1371 if (cfun->static_chain_decl != NULL)
1372 type |= ARM_FT_NESTED;
1374 attr = DECL_ATTRIBUTES (current_function_decl);
1376 a = lookup_attribute ("naked", attr);
1377 if (a != NULL_TREE)
1378 type |= ARM_FT_NAKED;
1380 a = lookup_attribute ("isr", attr);
1381 if (a == NULL_TREE)
1382 a = lookup_attribute ("interrupt", attr);
1384 if (a == NULL_TREE)
1385 type |= TARGET_INTERWORK ? ARM_FT_INTERWORKED : ARM_FT_NORMAL;
1386 else
1387 type |= arm_isr_value (TREE_VALUE (a));
1389 return type;
1392 /* Returns the type of the current function. */
1394 unsigned long
1395 arm_current_func_type (void)
1397 if (ARM_FUNC_TYPE (cfun->machine->func_type) == ARM_FT_UNKNOWN)
1398 cfun->machine->func_type = arm_compute_func_type ();
1400 return cfun->machine->func_type;
1403 /* Return 1 if it is possible to return using a single instruction.
1404 If SIBLING is non-null, this is a test for a return before a sibling
1405 call. SIBLING is the call insn, so we can examine its register usage. */
1408 use_return_insn (int iscond, rtx sibling)
1410 int regno;
1411 unsigned int func_type;
1412 unsigned long saved_int_regs;
1413 unsigned HOST_WIDE_INT stack_adjust;
1414 arm_stack_offsets *offsets;
1416 /* Never use a return instruction before reload has run. */
1417 if (!reload_completed)
1418 return 0;
1420 func_type = arm_current_func_type ();
1422 /* Naked functions and volatile functions need special
1423 consideration. */
1424 if (func_type & (ARM_FT_VOLATILE | ARM_FT_NAKED))
1425 return 0;
1427 /* So do interrupt functions that use the frame pointer. */
1428 if (IS_INTERRUPT (func_type) && frame_pointer_needed)
1429 return 0;
1431 offsets = arm_get_frame_offsets ();
1432 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
1434 /* As do variadic functions. */
1435 if (current_function_pretend_args_size
1436 || cfun->machine->uses_anonymous_args
1437 /* Or if the function calls __builtin_eh_return () */
1438 || current_function_calls_eh_return
1439 /* Or if the function calls alloca */
1440 || current_function_calls_alloca
1441 /* Or if there is a stack adjustment. However, if the stack pointer
1442 is saved on the stack, we can use a pre-incrementing stack load. */
1443 || !(stack_adjust == 0 || (frame_pointer_needed && stack_adjust == 4)))
1444 return 0;
1446 saved_int_regs = arm_compute_save_reg_mask ();
1448 /* Unfortunately, the insn
1450 ldmib sp, {..., sp, ...}
1452 triggers a bug on most SA-110 based devices, such that the stack
1453 pointer won't be correctly restored if the instruction takes a
1454 page fault. We work around this problem by popping r3 along with
1455 the other registers, since that is never slower than executing
1456 another instruction.
1458 We test for !arm_arch5 here, because code for any architecture
1459 less than this could potentially be run on one of the buggy
1460 chips. */
1461 if (stack_adjust == 4 && !arm_arch5)
1463 /* Validate that r3 is a call-clobbered register (always true in
1464 the default abi) ... */
1465 if (!call_used_regs[3])
1466 return 0;
1468 /* ... that it isn't being used for a return value (always true
1469 until we implement return-in-regs), or for a tail-call
1470 argument ... */
1471 if (sibling)
1473 gcc_assert (GET_CODE (sibling) == CALL_INSN);
1475 if (find_regno_fusage (sibling, USE, 3))
1476 return 0;
1479 /* ... and that there are no call-saved registers in r0-r2
1480 (always true in the default ABI). */
1481 if (saved_int_regs & 0x7)
1482 return 0;
1485 /* Can't be done if interworking with Thumb, and any registers have been
1486 stacked. */
1487 if (TARGET_INTERWORK && saved_int_regs != 0)
1488 return 0;
1490 /* On StrongARM, conditional returns are expensive if they aren't
1491 taken and multiple registers have been stacked. */
1492 if (iscond && arm_tune_strongarm)
1494 /* Conditional return when just the LR is stored is a simple
1495 conditional-load instruction, that's not expensive. */
1496 if (saved_int_regs != 0 && saved_int_regs != (1 << LR_REGNUM))
1497 return 0;
1499 if (flag_pic && regs_ever_live[PIC_OFFSET_TABLE_REGNUM])
1500 return 0;
1503 /* If there are saved registers but the LR isn't saved, then we need
1504 two instructions for the return. */
1505 if (saved_int_regs && !(saved_int_regs & (1 << LR_REGNUM)))
1506 return 0;
1508 /* Can't be done if any of the FPA regs are pushed,
1509 since this also requires an insn. */
1510 if (TARGET_HARD_FLOAT && TARGET_FPA)
1511 for (regno = FIRST_FPA_REGNUM; regno <= LAST_FPA_REGNUM; regno++)
1512 if (regs_ever_live[regno] && !call_used_regs[regno])
1513 return 0;
1515 /* Likewise VFP regs. */
1516 if (TARGET_HARD_FLOAT && TARGET_VFP)
1517 for (regno = FIRST_VFP_REGNUM; regno <= LAST_VFP_REGNUM; regno++)
1518 if (regs_ever_live[regno] && !call_used_regs[regno])
1519 return 0;
1521 if (TARGET_REALLY_IWMMXT)
1522 for (regno = FIRST_IWMMXT_REGNUM; regno <= LAST_IWMMXT_REGNUM; regno++)
1523 if (regs_ever_live[regno] && ! call_used_regs [regno])
1524 return 0;
1526 return 1;
1529 /* Return TRUE if int I is a valid immediate ARM constant. */
1532 const_ok_for_arm (HOST_WIDE_INT i)
1534 int lowbit;
1536 /* For machines with >32 bit HOST_WIDE_INT, the bits above bit 31 must
1537 be all zero, or all one. */
1538 if ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff) != 0
1539 && ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff)
1540 != ((~(unsigned HOST_WIDE_INT) 0)
1541 & ~(unsigned HOST_WIDE_INT) 0xffffffff)))
1542 return FALSE;
1544 i &= (unsigned HOST_WIDE_INT) 0xffffffff;
1546 /* Fast return for 0 and small values. We must do this for zero, since
1547 the code below can't handle that one case. */
1548 if ((i & ~(unsigned HOST_WIDE_INT) 0xff) == 0)
1549 return TRUE;
1551 /* Get the number of trailing zeros, rounded down to the nearest even
1552 number. */
1553 lowbit = (ffs ((int) i) - 1) & ~1;
1555 if ((i & ~(((unsigned HOST_WIDE_INT) 0xff) << lowbit)) == 0)
1556 return TRUE;
1557 else if (lowbit <= 4
1558 && ((i & ~0xc000003f) == 0
1559 || (i & ~0xf000000f) == 0
1560 || (i & ~0xfc000003) == 0))
1561 return TRUE;
1563 return FALSE;
1566 /* Return true if I is a valid constant for the operation CODE. */
1567 static int
1568 const_ok_for_op (HOST_WIDE_INT i, enum rtx_code code)
1570 if (const_ok_for_arm (i))
1571 return 1;
1573 switch (code)
1575 case PLUS:
1576 return const_ok_for_arm (ARM_SIGN_EXTEND (-i));
1578 case MINUS: /* Should only occur with (MINUS I reg) => rsb */
1579 case XOR:
1580 case IOR:
1581 return 0;
1583 case AND:
1584 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
1586 default:
1587 gcc_unreachable ();
1591 /* Emit a sequence of insns to handle a large constant.
1592 CODE is the code of the operation required, it can be any of SET, PLUS,
1593 IOR, AND, XOR, MINUS;
1594 MODE is the mode in which the operation is being performed;
1595 VAL is the integer to operate on;
1596 SOURCE is the other operand (a register, or a null-pointer for SET);
1597 SUBTARGETS means it is safe to create scratch registers if that will
1598 either produce a simpler sequence, or we will want to cse the values.
1599 Return value is the number of insns emitted. */
1602 arm_split_constant (enum rtx_code code, enum machine_mode mode, rtx insn,
1603 HOST_WIDE_INT val, rtx target, rtx source, int subtargets)
1605 rtx cond;
1607 if (insn && GET_CODE (PATTERN (insn)) == COND_EXEC)
1608 cond = COND_EXEC_TEST (PATTERN (insn));
1609 else
1610 cond = NULL_RTX;
1612 if (subtargets || code == SET
1613 || (GET_CODE (target) == REG && GET_CODE (source) == REG
1614 && REGNO (target) != REGNO (source)))
1616 /* After arm_reorg has been called, we can't fix up expensive
1617 constants by pushing them into memory so we must synthesize
1618 them in-line, regardless of the cost. This is only likely to
1619 be more costly on chips that have load delay slots and we are
1620 compiling without running the scheduler (so no splitting
1621 occurred before the final instruction emission).
1623 Ref: gcc -O1 -mcpu=strongarm gcc.c-torture/compile/980506-2.c
1625 if (!after_arm_reorg
1626 && !cond
1627 && (arm_gen_constant (code, mode, NULL_RTX, val, target, source,
1628 1, 0)
1629 > arm_constant_limit + (code != SET)))
1631 if (code == SET)
1633 /* Currently SET is the only monadic value for CODE, all
1634 the rest are diadic. */
1635 emit_insn (gen_rtx_SET (VOIDmode, target, GEN_INT (val)));
1636 return 1;
1638 else
1640 rtx temp = subtargets ? gen_reg_rtx (mode) : target;
1642 emit_insn (gen_rtx_SET (VOIDmode, temp, GEN_INT (val)));
1643 /* For MINUS, the value is subtracted from, since we never
1644 have subtraction of a constant. */
1645 if (code == MINUS)
1646 emit_insn (gen_rtx_SET (VOIDmode, target,
1647 gen_rtx_MINUS (mode, temp, source)));
1648 else
1649 emit_insn (gen_rtx_SET (VOIDmode, target,
1650 gen_rtx_fmt_ee (code, mode, source, temp)));
1651 return 2;
1656 return arm_gen_constant (code, mode, cond, val, target, source, subtargets,
1660 static int
1661 count_insns_for_constant (HOST_WIDE_INT remainder, int i)
1663 HOST_WIDE_INT temp1;
1664 int num_insns = 0;
1667 int end;
1669 if (i <= 0)
1670 i += 32;
1671 if (remainder & (3 << (i - 2)))
1673 end = i - 8;
1674 if (end < 0)
1675 end += 32;
1676 temp1 = remainder & ((0x0ff << end)
1677 | ((i < end) ? (0xff >> (32 - end)) : 0));
1678 remainder &= ~temp1;
1679 num_insns++;
1680 i -= 6;
1682 i -= 2;
1683 } while (remainder);
1684 return num_insns;
1687 /* Emit an instruction with the indicated PATTERN. If COND is
1688 non-NULL, conditionalize the execution of the instruction on COND
1689 being true. */
1691 static void
1692 emit_constant_insn (rtx cond, rtx pattern)
1694 if (cond)
1695 pattern = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond), pattern);
1696 emit_insn (pattern);
1699 /* As above, but extra parameter GENERATE which, if clear, suppresses
1700 RTL generation. */
1702 static int
1703 arm_gen_constant (enum rtx_code code, enum machine_mode mode, rtx cond,
1704 HOST_WIDE_INT val, rtx target, rtx source, int subtargets,
1705 int generate)
1707 int can_invert = 0;
1708 int can_negate = 0;
1709 int can_negate_initial = 0;
1710 int can_shift = 0;
1711 int i;
1712 int num_bits_set = 0;
1713 int set_sign_bit_copies = 0;
1714 int clear_sign_bit_copies = 0;
1715 int clear_zero_bit_copies = 0;
1716 int set_zero_bit_copies = 0;
1717 int insns = 0;
1718 unsigned HOST_WIDE_INT temp1, temp2;
1719 unsigned HOST_WIDE_INT remainder = val & 0xffffffff;
1721 /* Find out which operations are safe for a given CODE. Also do a quick
1722 check for degenerate cases; these can occur when DImode operations
1723 are split. */
1724 switch (code)
1726 case SET:
1727 can_invert = 1;
1728 can_shift = 1;
1729 can_negate = 1;
1730 break;
1732 case PLUS:
1733 can_negate = 1;
1734 can_negate_initial = 1;
1735 break;
1737 case IOR:
1738 if (remainder == 0xffffffff)
1740 if (generate)
1741 emit_constant_insn (cond,
1742 gen_rtx_SET (VOIDmode, target,
1743 GEN_INT (ARM_SIGN_EXTEND (val))));
1744 return 1;
1746 if (remainder == 0)
1748 if (reload_completed && rtx_equal_p (target, source))
1749 return 0;
1750 if (generate)
1751 emit_constant_insn (cond,
1752 gen_rtx_SET (VOIDmode, target, source));
1753 return 1;
1755 break;
1757 case AND:
1758 if (remainder == 0)
1760 if (generate)
1761 emit_constant_insn (cond,
1762 gen_rtx_SET (VOIDmode, target, const0_rtx));
1763 return 1;
1765 if (remainder == 0xffffffff)
1767 if (reload_completed && rtx_equal_p (target, source))
1768 return 0;
1769 if (generate)
1770 emit_constant_insn (cond,
1771 gen_rtx_SET (VOIDmode, target, source));
1772 return 1;
1774 can_invert = 1;
1775 break;
1777 case XOR:
1778 if (remainder == 0)
1780 if (reload_completed && rtx_equal_p (target, source))
1781 return 0;
1782 if (generate)
1783 emit_constant_insn (cond,
1784 gen_rtx_SET (VOIDmode, target, source));
1785 return 1;
1788 /* We don't know how to handle other cases yet. */
1789 gcc_assert (remainder == 0xffffffff);
1791 if (generate)
1792 emit_constant_insn (cond,
1793 gen_rtx_SET (VOIDmode, target,
1794 gen_rtx_NOT (mode, source)));
1795 return 1;
1797 case MINUS:
1798 /* We treat MINUS as (val - source), since (source - val) is always
1799 passed as (source + (-val)). */
1800 if (remainder == 0)
1802 if (generate)
1803 emit_constant_insn (cond,
1804 gen_rtx_SET (VOIDmode, target,
1805 gen_rtx_NEG (mode, source)));
1806 return 1;
1808 if (const_ok_for_arm (val))
1810 if (generate)
1811 emit_constant_insn (cond,
1812 gen_rtx_SET (VOIDmode, target,
1813 gen_rtx_MINUS (mode, GEN_INT (val),
1814 source)));
1815 return 1;
1817 can_negate = 1;
1819 break;
1821 default:
1822 gcc_unreachable ();
1825 /* If we can do it in one insn get out quickly. */
1826 if (const_ok_for_arm (val)
1827 || (can_negate_initial && const_ok_for_arm (-val))
1828 || (can_invert && const_ok_for_arm (~val)))
1830 if (generate)
1831 emit_constant_insn (cond,
1832 gen_rtx_SET (VOIDmode, target,
1833 (source
1834 ? gen_rtx_fmt_ee (code, mode, source,
1835 GEN_INT (val))
1836 : GEN_INT (val))));
1837 return 1;
1840 /* Calculate a few attributes that may be useful for specific
1841 optimizations. */
1842 for (i = 31; i >= 0; i--)
1844 if ((remainder & (1 << i)) == 0)
1845 clear_sign_bit_copies++;
1846 else
1847 break;
1850 for (i = 31; i >= 0; i--)
1852 if ((remainder & (1 << i)) != 0)
1853 set_sign_bit_copies++;
1854 else
1855 break;
1858 for (i = 0; i <= 31; i++)
1860 if ((remainder & (1 << i)) == 0)
1861 clear_zero_bit_copies++;
1862 else
1863 break;
1866 for (i = 0; i <= 31; i++)
1868 if ((remainder & (1 << i)) != 0)
1869 set_zero_bit_copies++;
1870 else
1871 break;
1874 switch (code)
1876 case SET:
1877 /* See if we can do this by sign_extending a constant that is known
1878 to be negative. This is a good, way of doing it, since the shift
1879 may well merge into a subsequent insn. */
1880 if (set_sign_bit_copies > 1)
1882 if (const_ok_for_arm
1883 (temp1 = ARM_SIGN_EXTEND (remainder
1884 << (set_sign_bit_copies - 1))))
1886 if (generate)
1888 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
1889 emit_constant_insn (cond,
1890 gen_rtx_SET (VOIDmode, new_src,
1891 GEN_INT (temp1)));
1892 emit_constant_insn (cond,
1893 gen_ashrsi3 (target, new_src,
1894 GEN_INT (set_sign_bit_copies - 1)));
1896 return 2;
1898 /* For an inverted constant, we will need to set the low bits,
1899 these will be shifted out of harm's way. */
1900 temp1 |= (1 << (set_sign_bit_copies - 1)) - 1;
1901 if (const_ok_for_arm (~temp1))
1903 if (generate)
1905 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
1906 emit_constant_insn (cond,
1907 gen_rtx_SET (VOIDmode, new_src,
1908 GEN_INT (temp1)));
1909 emit_constant_insn (cond,
1910 gen_ashrsi3 (target, new_src,
1911 GEN_INT (set_sign_bit_copies - 1)));
1913 return 2;
1917 /* See if we can calculate the value as the difference between two
1918 valid immediates. */
1919 if (clear_sign_bit_copies + clear_zero_bit_copies <= 16)
1921 int topshift = clear_sign_bit_copies & ~1;
1923 temp1 = ARM_SIGN_EXTEND ((remainder + (0x00800000 >> topshift))
1924 & (0xff000000 >> topshift));
1926 /* If temp1 is zero, then that means the 9 most significant
1927 bits of remainder were 1 and we've caused it to overflow.
1928 When topshift is 0 we don't need to do anything since we
1929 can borrow from 'bit 32'. */
1930 if (temp1 == 0 && topshift != 0)
1931 temp1 = 0x80000000 >> (topshift - 1);
1933 temp2 = ARM_SIGN_EXTEND (temp1 - remainder);
1935 if (const_ok_for_arm (temp2))
1937 if (generate)
1939 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
1940 emit_constant_insn (cond,
1941 gen_rtx_SET (VOIDmode, new_src,
1942 GEN_INT (temp1)));
1943 emit_constant_insn (cond,
1944 gen_addsi3 (target, new_src,
1945 GEN_INT (-temp2)));
1948 return 2;
1952 /* See if we can generate this by setting the bottom (or the top)
1953 16 bits, and then shifting these into the other half of the
1954 word. We only look for the simplest cases, to do more would cost
1955 too much. Be careful, however, not to generate this when the
1956 alternative would take fewer insns. */
1957 if (val & 0xffff0000)
1959 temp1 = remainder & 0xffff0000;
1960 temp2 = remainder & 0x0000ffff;
1962 /* Overlaps outside this range are best done using other methods. */
1963 for (i = 9; i < 24; i++)
1965 if ((((temp2 | (temp2 << i)) & 0xffffffff) == remainder)
1966 && !const_ok_for_arm (temp2))
1968 rtx new_src = (subtargets
1969 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
1970 : target);
1971 insns = arm_gen_constant (code, mode, cond, temp2, new_src,
1972 source, subtargets, generate);
1973 source = new_src;
1974 if (generate)
1975 emit_constant_insn
1976 (cond,
1977 gen_rtx_SET
1978 (VOIDmode, target,
1979 gen_rtx_IOR (mode,
1980 gen_rtx_ASHIFT (mode, source,
1981 GEN_INT (i)),
1982 source)));
1983 return insns + 1;
1987 /* Don't duplicate cases already considered. */
1988 for (i = 17; i < 24; i++)
1990 if (((temp1 | (temp1 >> i)) == remainder)
1991 && !const_ok_for_arm (temp1))
1993 rtx new_src = (subtargets
1994 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
1995 : target);
1996 insns = arm_gen_constant (code, mode, cond, temp1, new_src,
1997 source, subtargets, generate);
1998 source = new_src;
1999 if (generate)
2000 emit_constant_insn
2001 (cond,
2002 gen_rtx_SET (VOIDmode, target,
2003 gen_rtx_IOR
2004 (mode,
2005 gen_rtx_LSHIFTRT (mode, source,
2006 GEN_INT (i)),
2007 source)));
2008 return insns + 1;
2012 break;
2014 case IOR:
2015 case XOR:
2016 /* If we have IOR or XOR, and the constant can be loaded in a
2017 single instruction, and we can find a temporary to put it in,
2018 then this can be done in two instructions instead of 3-4. */
2019 if (subtargets
2020 /* TARGET can't be NULL if SUBTARGETS is 0 */
2021 || (reload_completed && !reg_mentioned_p (target, source)))
2023 if (const_ok_for_arm (ARM_SIGN_EXTEND (~val)))
2025 if (generate)
2027 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
2029 emit_constant_insn (cond,
2030 gen_rtx_SET (VOIDmode, sub,
2031 GEN_INT (val)));
2032 emit_constant_insn (cond,
2033 gen_rtx_SET (VOIDmode, target,
2034 gen_rtx_fmt_ee (code, mode,
2035 source, sub)));
2037 return 2;
2041 if (code == XOR)
2042 break;
2044 if (set_sign_bit_copies > 8
2045 && (val & (-1 << (32 - set_sign_bit_copies))) == val)
2047 if (generate)
2049 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
2050 rtx shift = GEN_INT (set_sign_bit_copies);
2052 emit_constant_insn
2053 (cond,
2054 gen_rtx_SET (VOIDmode, sub,
2055 gen_rtx_NOT (mode,
2056 gen_rtx_ASHIFT (mode,
2057 source,
2058 shift))));
2059 emit_constant_insn
2060 (cond,
2061 gen_rtx_SET (VOIDmode, target,
2062 gen_rtx_NOT (mode,
2063 gen_rtx_LSHIFTRT (mode, sub,
2064 shift))));
2066 return 2;
2069 if (set_zero_bit_copies > 8
2070 && (remainder & ((1 << set_zero_bit_copies) - 1)) == remainder)
2072 if (generate)
2074 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
2075 rtx shift = GEN_INT (set_zero_bit_copies);
2077 emit_constant_insn
2078 (cond,
2079 gen_rtx_SET (VOIDmode, sub,
2080 gen_rtx_NOT (mode,
2081 gen_rtx_LSHIFTRT (mode,
2082 source,
2083 shift))));
2084 emit_constant_insn
2085 (cond,
2086 gen_rtx_SET (VOIDmode, target,
2087 gen_rtx_NOT (mode,
2088 gen_rtx_ASHIFT (mode, sub,
2089 shift))));
2091 return 2;
2094 if (const_ok_for_arm (temp1 = ARM_SIGN_EXTEND (~val)))
2096 if (generate)
2098 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
2099 emit_constant_insn (cond,
2100 gen_rtx_SET (VOIDmode, sub,
2101 gen_rtx_NOT (mode, source)));
2102 source = sub;
2103 if (subtargets)
2104 sub = gen_reg_rtx (mode);
2105 emit_constant_insn (cond,
2106 gen_rtx_SET (VOIDmode, sub,
2107 gen_rtx_AND (mode, source,
2108 GEN_INT (temp1))));
2109 emit_constant_insn (cond,
2110 gen_rtx_SET (VOIDmode, target,
2111 gen_rtx_NOT (mode, sub)));
2113 return 3;
2115 break;
2117 case AND:
2118 /* See if two shifts will do 2 or more insn's worth of work. */
2119 if (clear_sign_bit_copies >= 16 && clear_sign_bit_copies < 24)
2121 HOST_WIDE_INT shift_mask = ((0xffffffff
2122 << (32 - clear_sign_bit_copies))
2123 & 0xffffffff);
2125 if ((remainder | shift_mask) != 0xffffffff)
2127 if (generate)
2129 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2130 insns = arm_gen_constant (AND, mode, cond,
2131 remainder | shift_mask,
2132 new_src, source, subtargets, 1);
2133 source = new_src;
2135 else
2137 rtx targ = subtargets ? NULL_RTX : target;
2138 insns = arm_gen_constant (AND, mode, cond,
2139 remainder | shift_mask,
2140 targ, source, subtargets, 0);
2144 if (generate)
2146 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2147 rtx shift = GEN_INT (clear_sign_bit_copies);
2149 emit_insn (gen_ashlsi3 (new_src, source, shift));
2150 emit_insn (gen_lshrsi3 (target, new_src, shift));
2153 return insns + 2;
2156 if (clear_zero_bit_copies >= 16 && clear_zero_bit_copies < 24)
2158 HOST_WIDE_INT shift_mask = (1 << clear_zero_bit_copies) - 1;
2160 if ((remainder | shift_mask) != 0xffffffff)
2162 if (generate)
2164 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2166 insns = arm_gen_constant (AND, mode, cond,
2167 remainder | shift_mask,
2168 new_src, source, subtargets, 1);
2169 source = new_src;
2171 else
2173 rtx targ = subtargets ? NULL_RTX : target;
2175 insns = arm_gen_constant (AND, mode, cond,
2176 remainder | shift_mask,
2177 targ, source, subtargets, 0);
2181 if (generate)
2183 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2184 rtx shift = GEN_INT (clear_zero_bit_copies);
2186 emit_insn (gen_lshrsi3 (new_src, source, shift));
2187 emit_insn (gen_ashlsi3 (target, new_src, shift));
2190 return insns + 2;
2193 break;
2195 default:
2196 break;
2199 for (i = 0; i < 32; i++)
2200 if (remainder & (1 << i))
2201 num_bits_set++;
2203 if (code == AND || (can_invert && num_bits_set > 16))
2204 remainder = (~remainder) & 0xffffffff;
2205 else if (code == PLUS && num_bits_set > 16)
2206 remainder = (-remainder) & 0xffffffff;
2207 else
2209 can_invert = 0;
2210 can_negate = 0;
2213 /* Now try and find a way of doing the job in either two or three
2214 instructions.
2215 We start by looking for the largest block of zeros that are aligned on
2216 a 2-bit boundary, we then fill up the temps, wrapping around to the
2217 top of the word when we drop off the bottom.
2218 In the worst case this code should produce no more than four insns. */
2220 int best_start = 0;
2221 int best_consecutive_zeros = 0;
2223 for (i = 0; i < 32; i += 2)
2225 int consecutive_zeros = 0;
2227 if (!(remainder & (3 << i)))
2229 while ((i < 32) && !(remainder & (3 << i)))
2231 consecutive_zeros += 2;
2232 i += 2;
2234 if (consecutive_zeros > best_consecutive_zeros)
2236 best_consecutive_zeros = consecutive_zeros;
2237 best_start = i - consecutive_zeros;
2239 i -= 2;
2243 /* So long as it won't require any more insns to do so, it's
2244 desirable to emit a small constant (in bits 0...9) in the last
2245 insn. This way there is more chance that it can be combined with
2246 a later addressing insn to form a pre-indexed load or store
2247 operation. Consider:
2249 *((volatile int *)0xe0000100) = 1;
2250 *((volatile int *)0xe0000110) = 2;
2252 We want this to wind up as:
2254 mov rA, #0xe0000000
2255 mov rB, #1
2256 str rB, [rA, #0x100]
2257 mov rB, #2
2258 str rB, [rA, #0x110]
2260 rather than having to synthesize both large constants from scratch.
2262 Therefore, we calculate how many insns would be required to emit
2263 the constant starting from `best_start', and also starting from
2264 zero (i.e. with bit 31 first to be output). If `best_start' doesn't
2265 yield a shorter sequence, we may as well use zero. */
2266 if (best_start != 0
2267 && ((((unsigned HOST_WIDE_INT) 1) << best_start) < remainder)
2268 && (count_insns_for_constant (remainder, 0) <=
2269 count_insns_for_constant (remainder, best_start)))
2270 best_start = 0;
2272 /* Now start emitting the insns. */
2273 i = best_start;
2276 int end;
2278 if (i <= 0)
2279 i += 32;
2280 if (remainder & (3 << (i - 2)))
2282 end = i - 8;
2283 if (end < 0)
2284 end += 32;
2285 temp1 = remainder & ((0x0ff << end)
2286 | ((i < end) ? (0xff >> (32 - end)) : 0));
2287 remainder &= ~temp1;
2289 if (generate)
2291 rtx new_src, temp1_rtx;
2293 if (code == SET || code == MINUS)
2295 new_src = (subtargets ? gen_reg_rtx (mode) : target);
2296 if (can_invert && code != MINUS)
2297 temp1 = ~temp1;
2299 else
2301 if (remainder && subtargets)
2302 new_src = gen_reg_rtx (mode);
2303 else
2304 new_src = target;
2305 if (can_invert)
2306 temp1 = ~temp1;
2307 else if (can_negate)
2308 temp1 = -temp1;
2311 temp1 = trunc_int_for_mode (temp1, mode);
2312 temp1_rtx = GEN_INT (temp1);
2314 if (code == SET)
2316 else if (code == MINUS)
2317 temp1_rtx = gen_rtx_MINUS (mode, temp1_rtx, source);
2318 else
2319 temp1_rtx = gen_rtx_fmt_ee (code, mode, source, temp1_rtx);
2321 emit_constant_insn (cond,
2322 gen_rtx_SET (VOIDmode, new_src,
2323 temp1_rtx));
2324 source = new_src;
2327 if (code == SET)
2329 can_invert = 0;
2330 code = PLUS;
2332 else if (code == MINUS)
2333 code = PLUS;
2335 insns++;
2336 i -= 6;
2338 i -= 2;
2340 while (remainder);
2343 return insns;
2346 /* Canonicalize a comparison so that we are more likely to recognize it.
2347 This can be done for a few constant compares, where we can make the
2348 immediate value easier to load. */
2350 enum rtx_code
2351 arm_canonicalize_comparison (enum rtx_code code, rtx * op1)
2353 unsigned HOST_WIDE_INT i = INTVAL (*op1);
2355 switch (code)
2357 case EQ:
2358 case NE:
2359 return code;
2361 case GT:
2362 case LE:
2363 if (i != ((((unsigned HOST_WIDE_INT) 1) << (HOST_BITS_PER_WIDE_INT - 1)) - 1)
2364 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
2366 *op1 = GEN_INT (i + 1);
2367 return code == GT ? GE : LT;
2369 break;
2371 case GE:
2372 case LT:
2373 if (i != (((unsigned HOST_WIDE_INT) 1) << (HOST_BITS_PER_WIDE_INT - 1))
2374 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
2376 *op1 = GEN_INT (i - 1);
2377 return code == GE ? GT : LE;
2379 break;
2381 case GTU:
2382 case LEU:
2383 if (i != ~((unsigned HOST_WIDE_INT) 0)
2384 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
2386 *op1 = GEN_INT (i + 1);
2387 return code == GTU ? GEU : LTU;
2389 break;
2391 case GEU:
2392 case LTU:
2393 if (i != 0
2394 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
2396 *op1 = GEN_INT (i - 1);
2397 return code == GEU ? GTU : LEU;
2399 break;
2401 default:
2402 gcc_unreachable ();
2405 return code;
2409 /* Define how to find the value returned by a function. */
2412 arm_function_value(tree type, tree func ATTRIBUTE_UNUSED)
2414 enum machine_mode mode;
2415 int unsignedp ATTRIBUTE_UNUSED;
2416 rtx r ATTRIBUTE_UNUSED;
2418 mode = TYPE_MODE (type);
2419 /* Promote integer types. */
2420 if (INTEGRAL_TYPE_P (type))
2421 PROMOTE_FUNCTION_MODE (mode, unsignedp, type);
2423 /* Promotes small structs returned in a register to full-word size
2424 for big-endian AAPCS. */
2425 if (arm_return_in_msb (type))
2427 HOST_WIDE_INT size = int_size_in_bytes (type);
2428 if (size % UNITS_PER_WORD != 0)
2430 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
2431 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
2435 return LIBCALL_VALUE(mode);
2438 /* Determine the amount of memory needed to store the possible return
2439 registers of an untyped call. */
2441 arm_apply_result_size (void)
2443 int size = 16;
2445 if (TARGET_ARM)
2447 if (TARGET_HARD_FLOAT_ABI)
2449 if (TARGET_FPA)
2450 size += 12;
2451 if (TARGET_MAVERICK)
2452 size += 8;
2454 if (TARGET_IWMMXT_ABI)
2455 size += 8;
2458 return size;
2461 /* Decide whether a type should be returned in memory (true)
2462 or in a register (false). This is called by the macro
2463 RETURN_IN_MEMORY. */
2465 arm_return_in_memory (tree type)
2467 HOST_WIDE_INT size;
2469 if (!AGGREGATE_TYPE_P (type) &&
2470 (TREE_CODE (type) != VECTOR_TYPE) &&
2471 !(TARGET_AAPCS_BASED && TREE_CODE (type) == COMPLEX_TYPE))
2472 /* All simple types are returned in registers.
2473 For AAPCS, complex types are treated the same as aggregates. */
2474 return 0;
2476 size = int_size_in_bytes (type);
2478 if (arm_abi != ARM_ABI_APCS)
2480 /* ATPCS and later return aggregate types in memory only if they are
2481 larger than a word (or are variable size). */
2482 return (size < 0 || size > UNITS_PER_WORD);
2485 /* To maximize backwards compatibility with previous versions of gcc,
2486 return vectors up to 4 words in registers. */
2487 if (TREE_CODE (type) == VECTOR_TYPE)
2488 return (size < 0 || size > (4 * UNITS_PER_WORD));
2490 /* For the arm-wince targets we choose to be compatible with Microsoft's
2491 ARM and Thumb compilers, which always return aggregates in memory. */
2492 #ifndef ARM_WINCE
2493 /* All structures/unions bigger than one word are returned in memory.
2494 Also catch the case where int_size_in_bytes returns -1. In this case
2495 the aggregate is either huge or of variable size, and in either case
2496 we will want to return it via memory and not in a register. */
2497 if (size < 0 || size > UNITS_PER_WORD)
2498 return 1;
2500 if (TREE_CODE (type) == RECORD_TYPE)
2502 tree field;
2504 /* For a struct the APCS says that we only return in a register
2505 if the type is 'integer like' and every addressable element
2506 has an offset of zero. For practical purposes this means
2507 that the structure can have at most one non bit-field element
2508 and that this element must be the first one in the structure. */
2510 /* Find the first field, ignoring non FIELD_DECL things which will
2511 have been created by C++. */
2512 for (field = TYPE_FIELDS (type);
2513 field && TREE_CODE (field) != FIELD_DECL;
2514 field = TREE_CHAIN (field))
2515 continue;
2517 if (field == NULL)
2518 return 0; /* An empty structure. Allowed by an extension to ANSI C. */
2520 /* Check that the first field is valid for returning in a register. */
2522 /* ... Floats are not allowed */
2523 if (FLOAT_TYPE_P (TREE_TYPE (field)))
2524 return 1;
2526 /* ... Aggregates that are not themselves valid for returning in
2527 a register are not allowed. */
2528 if (RETURN_IN_MEMORY (TREE_TYPE (field)))
2529 return 1;
2531 /* Now check the remaining fields, if any. Only bitfields are allowed,
2532 since they are not addressable. */
2533 for (field = TREE_CHAIN (field);
2534 field;
2535 field = TREE_CHAIN (field))
2537 if (TREE_CODE (field) != FIELD_DECL)
2538 continue;
2540 if (!DECL_BIT_FIELD_TYPE (field))
2541 return 1;
2544 return 0;
2547 if (TREE_CODE (type) == UNION_TYPE)
2549 tree field;
2551 /* Unions can be returned in registers if every element is
2552 integral, or can be returned in an integer register. */
2553 for (field = TYPE_FIELDS (type);
2554 field;
2555 field = TREE_CHAIN (field))
2557 if (TREE_CODE (field) != FIELD_DECL)
2558 continue;
2560 if (FLOAT_TYPE_P (TREE_TYPE (field)))
2561 return 1;
2563 if (RETURN_IN_MEMORY (TREE_TYPE (field)))
2564 return 1;
2567 return 0;
2569 #endif /* not ARM_WINCE */
2571 /* Return all other types in memory. */
2572 return 1;
2575 /* Indicate whether or not words of a double are in big-endian order. */
2578 arm_float_words_big_endian (void)
2580 if (TARGET_MAVERICK)
2581 return 0;
2583 /* For FPA, float words are always big-endian. For VFP, floats words
2584 follow the memory system mode. */
2586 if (TARGET_FPA)
2588 return 1;
2591 if (TARGET_VFP)
2592 return (TARGET_BIG_END ? 1 : 0);
2594 return 1;
2597 /* Initialize a variable CUM of type CUMULATIVE_ARGS
2598 for a call to a function whose data type is FNTYPE.
2599 For a library call, FNTYPE is NULL. */
2600 void
2601 arm_init_cumulative_args (CUMULATIVE_ARGS *pcum, tree fntype,
2602 rtx libname ATTRIBUTE_UNUSED,
2603 tree fndecl ATTRIBUTE_UNUSED)
2605 /* On the ARM, the offset starts at 0. */
2606 pcum->nregs = ((fntype && aggregate_value_p (TREE_TYPE (fntype), fntype)) ? 1 : 0);
2607 pcum->iwmmxt_nregs = 0;
2608 pcum->can_split = true;
2610 pcum->call_cookie = CALL_NORMAL;
2612 if (TARGET_LONG_CALLS)
2613 pcum->call_cookie = CALL_LONG;
2615 /* Check for long call/short call attributes. The attributes
2616 override any command line option. */
2617 if (fntype)
2619 if (lookup_attribute ("short_call", TYPE_ATTRIBUTES (fntype)))
2620 pcum->call_cookie = CALL_SHORT;
2621 else if (lookup_attribute ("long_call", TYPE_ATTRIBUTES (fntype)))
2622 pcum->call_cookie = CALL_LONG;
2625 /* Varargs vectors are treated the same as long long.
2626 named_count avoids having to change the way arm handles 'named' */
2627 pcum->named_count = 0;
2628 pcum->nargs = 0;
2630 if (TARGET_REALLY_IWMMXT && fntype)
2632 tree fn_arg;
2634 for (fn_arg = TYPE_ARG_TYPES (fntype);
2635 fn_arg;
2636 fn_arg = TREE_CHAIN (fn_arg))
2637 pcum->named_count += 1;
2639 if (! pcum->named_count)
2640 pcum->named_count = INT_MAX;
2645 /* Return true if mode/type need doubleword alignment. */
2646 bool
2647 arm_needs_doubleword_align (enum machine_mode mode, tree type)
2649 return (GET_MODE_ALIGNMENT (mode) > PARM_BOUNDARY
2650 || (type && TYPE_ALIGN (type) > PARM_BOUNDARY));
2654 /* Determine where to put an argument to a function.
2655 Value is zero to push the argument on the stack,
2656 or a hard register in which to store the argument.
2658 MODE is the argument's machine mode.
2659 TYPE is the data type of the argument (as a tree).
2660 This is null for libcalls where that information may
2661 not be available.
2662 CUM is a variable of type CUMULATIVE_ARGS which gives info about
2663 the preceding args and about the function being called.
2664 NAMED is nonzero if this argument is a named parameter
2665 (otherwise it is an extra parameter matching an ellipsis). */
2668 arm_function_arg (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
2669 tree type, int named)
2671 int nregs;
2673 /* Varargs vectors are treated the same as long long.
2674 named_count avoids having to change the way arm handles 'named' */
2675 if (TARGET_IWMMXT_ABI
2676 && arm_vector_mode_supported_p (mode)
2677 && pcum->named_count > pcum->nargs + 1)
2679 if (pcum->iwmmxt_nregs <= 9)
2680 return gen_rtx_REG (mode, pcum->iwmmxt_nregs + FIRST_IWMMXT_REGNUM);
2681 else
2683 pcum->can_split = false;
2684 return NULL_RTX;
2688 /* Put doubleword aligned quantities in even register pairs. */
2689 if (pcum->nregs & 1
2690 && ARM_DOUBLEWORD_ALIGN
2691 && arm_needs_doubleword_align (mode, type))
2692 pcum->nregs++;
2694 if (mode == VOIDmode)
2695 /* Compute operand 2 of the call insn. */
2696 return GEN_INT (pcum->call_cookie);
2698 /* Only allow splitting an arg between regs and memory if all preceding
2699 args were allocated to regs. For args passed by reference we only count
2700 the reference pointer. */
2701 if (pcum->can_split)
2702 nregs = 1;
2703 else
2704 nregs = ARM_NUM_REGS2 (mode, type);
2706 if (!named || pcum->nregs + nregs > NUM_ARG_REGS)
2707 return NULL_RTX;
2709 return gen_rtx_REG (mode, pcum->nregs);
2712 static int
2713 arm_arg_partial_bytes (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
2714 tree type, bool named ATTRIBUTE_UNUSED)
2716 int nregs = pcum->nregs;
2718 if (arm_vector_mode_supported_p (mode))
2719 return 0;
2721 if (NUM_ARG_REGS > nregs
2722 && (NUM_ARG_REGS < nregs + ARM_NUM_REGS2 (mode, type))
2723 && pcum->can_split)
2724 return (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
2726 return 0;
2729 /* Variable sized types are passed by reference. This is a GCC
2730 extension to the ARM ABI. */
2732 static bool
2733 arm_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
2734 enum machine_mode mode ATTRIBUTE_UNUSED,
2735 tree type, bool named ATTRIBUTE_UNUSED)
2737 return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
2740 /* Encode the current state of the #pragma [no_]long_calls. */
2741 typedef enum
2743 OFF, /* No #pramgma [no_]long_calls is in effect. */
2744 LONG, /* #pragma long_calls is in effect. */
2745 SHORT /* #pragma no_long_calls is in effect. */
2746 } arm_pragma_enum;
2748 static arm_pragma_enum arm_pragma_long_calls = OFF;
2750 void
2751 arm_pr_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
2753 arm_pragma_long_calls = LONG;
2756 void
2757 arm_pr_no_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
2759 arm_pragma_long_calls = SHORT;
2762 void
2763 arm_pr_long_calls_off (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
2765 arm_pragma_long_calls = OFF;
2768 /* Table of machine attributes. */
2769 const struct attribute_spec arm_attribute_table[] =
2771 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
2772 /* Function calls made to this symbol must be done indirectly, because
2773 it may lie outside of the 26 bit addressing range of a normal function
2774 call. */
2775 { "long_call", 0, 0, false, true, true, NULL },
2776 /* Whereas these functions are always known to reside within the 26 bit
2777 addressing range. */
2778 { "short_call", 0, 0, false, true, true, NULL },
2779 /* Interrupt Service Routines have special prologue and epilogue requirements. */
2780 { "isr", 0, 1, false, false, false, arm_handle_isr_attribute },
2781 { "interrupt", 0, 1, false, false, false, arm_handle_isr_attribute },
2782 { "naked", 0, 0, true, false, false, arm_handle_fndecl_attribute },
2783 #ifdef ARM_PE
2784 /* ARM/PE has three new attributes:
2785 interfacearm - ?
2786 dllexport - for exporting a function/variable that will live in a dll
2787 dllimport - for importing a function/variable from a dll
2789 Microsoft allows multiple declspecs in one __declspec, separating
2790 them with spaces. We do NOT support this. Instead, use __declspec
2791 multiple times.
2793 { "dllimport", 0, 0, true, false, false, NULL },
2794 { "dllexport", 0, 0, true, false, false, NULL },
2795 { "interfacearm", 0, 0, true, false, false, arm_handle_fndecl_attribute },
2796 #elif TARGET_DLLIMPORT_DECL_ATTRIBUTES
2797 { "dllimport", 0, 0, false, false, false, handle_dll_attribute },
2798 { "dllexport", 0, 0, false, false, false, handle_dll_attribute },
2799 { "notshared", 0, 0, false, true, false, arm_handle_notshared_attribute },
2800 #endif
2801 { NULL, 0, 0, false, false, false, NULL }
2804 /* Handle an attribute requiring a FUNCTION_DECL;
2805 arguments as in struct attribute_spec.handler. */
2806 static tree
2807 arm_handle_fndecl_attribute (tree *node, tree name, tree args ATTRIBUTE_UNUSED,
2808 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
2810 if (TREE_CODE (*node) != FUNCTION_DECL)
2812 warning (0, "%qs attribute only applies to functions",
2813 IDENTIFIER_POINTER (name));
2814 *no_add_attrs = true;
2817 return NULL_TREE;
2820 /* Handle an "interrupt" or "isr" attribute;
2821 arguments as in struct attribute_spec.handler. */
2822 static tree
2823 arm_handle_isr_attribute (tree *node, tree name, tree args, int flags,
2824 bool *no_add_attrs)
2826 if (DECL_P (*node))
2828 if (TREE_CODE (*node) != FUNCTION_DECL)
2830 warning (0, "%qs attribute only applies to functions",
2831 IDENTIFIER_POINTER (name));
2832 *no_add_attrs = true;
2834 /* FIXME: the argument if any is checked for type attributes;
2835 should it be checked for decl ones? */
2837 else
2839 if (TREE_CODE (*node) == FUNCTION_TYPE
2840 || TREE_CODE (*node) == METHOD_TYPE)
2842 if (arm_isr_value (args) == ARM_FT_UNKNOWN)
2844 warning (0, "%qs attribute ignored", IDENTIFIER_POINTER (name));
2845 *no_add_attrs = true;
2848 else if (TREE_CODE (*node) == POINTER_TYPE
2849 && (TREE_CODE (TREE_TYPE (*node)) == FUNCTION_TYPE
2850 || TREE_CODE (TREE_TYPE (*node)) == METHOD_TYPE)
2851 && arm_isr_value (args) != ARM_FT_UNKNOWN)
2853 *node = build_variant_type_copy (*node);
2854 TREE_TYPE (*node) = build_type_attribute_variant
2855 (TREE_TYPE (*node),
2856 tree_cons (name, args, TYPE_ATTRIBUTES (TREE_TYPE (*node))));
2857 *no_add_attrs = true;
2859 else
2861 /* Possibly pass this attribute on from the type to a decl. */
2862 if (flags & ((int) ATTR_FLAG_DECL_NEXT
2863 | (int) ATTR_FLAG_FUNCTION_NEXT
2864 | (int) ATTR_FLAG_ARRAY_NEXT))
2866 *no_add_attrs = true;
2867 return tree_cons (name, args, NULL_TREE);
2869 else
2871 warning (0, "%qs attribute ignored", IDENTIFIER_POINTER (name));
2876 return NULL_TREE;
2879 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
2880 /* Handle the "notshared" attribute. This attribute is another way of
2881 requesting hidden visibility. ARM's compiler supports
2882 "__declspec(notshared)"; we support the same thing via an
2883 attribute. */
2885 static tree
2886 arm_handle_notshared_attribute (tree *node,
2887 tree name ATTRIBUTE_UNUSED,
2888 tree args ATTRIBUTE_UNUSED,
2889 int flags ATTRIBUTE_UNUSED,
2890 bool *no_add_attrs)
2892 tree decl = TYPE_NAME (*node);
2894 if (decl)
2896 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
2897 DECL_VISIBILITY_SPECIFIED (decl) = 1;
2898 *no_add_attrs = false;
2900 return NULL_TREE;
2902 #endif
2904 /* Return 0 if the attributes for two types are incompatible, 1 if they
2905 are compatible, and 2 if they are nearly compatible (which causes a
2906 warning to be generated). */
2907 static int
2908 arm_comp_type_attributes (tree type1, tree type2)
2910 int l1, l2, s1, s2;
2912 /* Check for mismatch of non-default calling convention. */
2913 if (TREE_CODE (type1) != FUNCTION_TYPE)
2914 return 1;
2916 /* Check for mismatched call attributes. */
2917 l1 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1)) != NULL;
2918 l2 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2)) != NULL;
2919 s1 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1)) != NULL;
2920 s2 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2)) != NULL;
2922 /* Only bother to check if an attribute is defined. */
2923 if (l1 | l2 | s1 | s2)
2925 /* If one type has an attribute, the other must have the same attribute. */
2926 if ((l1 != l2) || (s1 != s2))
2927 return 0;
2929 /* Disallow mixed attributes. */
2930 if ((l1 & s2) || (l2 & s1))
2931 return 0;
2934 /* Check for mismatched ISR attribute. */
2935 l1 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type1)) != NULL;
2936 if (! l1)
2937 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type1)) != NULL;
2938 l2 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type2)) != NULL;
2939 if (! l2)
2940 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type2)) != NULL;
2941 if (l1 != l2)
2942 return 0;
2944 return 1;
2947 /* Encode long_call or short_call attribute by prefixing
2948 symbol name in DECL with a special character FLAG. */
2949 void
2950 arm_encode_call_attribute (tree decl, int flag)
2952 const char * str = XSTR (XEXP (DECL_RTL (decl), 0), 0);
2953 int len = strlen (str);
2954 char * newstr;
2956 /* Do not allow weak functions to be treated as short call. */
2957 if (DECL_WEAK (decl) && flag == SHORT_CALL_FLAG_CHAR)
2958 return;
2960 newstr = alloca (len + 2);
2961 newstr[0] = flag;
2962 strcpy (newstr + 1, str);
2964 newstr = (char *) ggc_alloc_string (newstr, len + 1);
2965 XSTR (XEXP (DECL_RTL (decl), 0), 0) = newstr;
2968 /* Assigns default attributes to newly defined type. This is used to
2969 set short_call/long_call attributes for function types of
2970 functions defined inside corresponding #pragma scopes. */
2971 static void
2972 arm_set_default_type_attributes (tree type)
2974 /* Add __attribute__ ((long_call)) to all functions, when
2975 inside #pragma long_calls or __attribute__ ((short_call)),
2976 when inside #pragma no_long_calls. */
2977 if (TREE_CODE (type) == FUNCTION_TYPE || TREE_CODE (type) == METHOD_TYPE)
2979 tree type_attr_list, attr_name;
2980 type_attr_list = TYPE_ATTRIBUTES (type);
2982 if (arm_pragma_long_calls == LONG)
2983 attr_name = get_identifier ("long_call");
2984 else if (arm_pragma_long_calls == SHORT)
2985 attr_name = get_identifier ("short_call");
2986 else
2987 return;
2989 type_attr_list = tree_cons (attr_name, NULL_TREE, type_attr_list);
2990 TYPE_ATTRIBUTES (type) = type_attr_list;
2994 /* Return 1 if the operand is a SYMBOL_REF for a function known to be
2995 defined within the current compilation unit. If this cannot be
2996 determined, then 0 is returned. */
2997 static int
2998 current_file_function_operand (rtx sym_ref)
3000 /* This is a bit of a fib. A function will have a short call flag
3001 applied to its name if it has the short call attribute, or it has
3002 already been defined within the current compilation unit. */
3003 if (ENCODED_SHORT_CALL_ATTR_P (XSTR (sym_ref, 0)))
3004 return 1;
3006 /* The current function is always defined within the current compilation
3007 unit. If it s a weak definition however, then this may not be the real
3008 definition of the function, and so we have to say no. */
3009 if (sym_ref == XEXP (DECL_RTL (current_function_decl), 0)
3010 && !DECL_WEAK (current_function_decl))
3011 return 1;
3013 /* We cannot make the determination - default to returning 0. */
3014 return 0;
3017 /* Return nonzero if a 32 bit "long_call" should be generated for
3018 this call. We generate a long_call if the function:
3020 a. has an __attribute__((long call))
3021 or b. is within the scope of a #pragma long_calls
3022 or c. the -mlong-calls command line switch has been specified
3023 . and either:
3024 1. -ffunction-sections is in effect
3025 or 2. the current function has __attribute__ ((section))
3026 or 3. the target function has __attribute__ ((section))
3028 However we do not generate a long call if the function:
3030 d. has an __attribute__ ((short_call))
3031 or e. is inside the scope of a #pragma no_long_calls
3032 or f. is defined within the current compilation unit.
3034 This function will be called by C fragments contained in the machine
3035 description file. SYM_REF and CALL_COOKIE correspond to the matched
3036 rtl operands. CALL_SYMBOL is used to distinguish between
3037 two different callers of the function. It is set to 1 in the
3038 "call_symbol" and "call_symbol_value" patterns and to 0 in the "call"
3039 and "call_value" patterns. This is because of the difference in the
3040 SYM_REFs passed by these patterns. */
3042 arm_is_longcall_p (rtx sym_ref, int call_cookie, int call_symbol)
3044 if (!call_symbol)
3046 if (GET_CODE (sym_ref) != MEM)
3047 return 0;
3049 sym_ref = XEXP (sym_ref, 0);
3052 if (GET_CODE (sym_ref) != SYMBOL_REF)
3053 return 0;
3055 if (call_cookie & CALL_SHORT)
3056 return 0;
3058 if (TARGET_LONG_CALLS)
3060 if (flag_function_sections
3061 || DECL_SECTION_NAME (current_function_decl))
3062 /* c.3 is handled by the definition of the
3063 ARM_DECLARE_FUNCTION_SIZE macro. */
3064 return 1;
3067 if (current_file_function_operand (sym_ref))
3068 return 0;
3070 return (call_cookie & CALL_LONG)
3071 || ENCODED_LONG_CALL_ATTR_P (XSTR (sym_ref, 0))
3072 || TARGET_LONG_CALLS;
3075 /* Return nonzero if it is ok to make a tail-call to DECL. */
3076 static bool
3077 arm_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
3079 int call_type = TARGET_LONG_CALLS ? CALL_LONG : CALL_NORMAL;
3081 if (cfun->machine->sibcall_blocked)
3082 return false;
3084 /* Never tailcall something for which we have no decl, or if we
3085 are in Thumb mode. */
3086 if (decl == NULL || TARGET_THUMB)
3087 return false;
3089 /* Get the calling method. */
3090 if (lookup_attribute ("short_call", TYPE_ATTRIBUTES (TREE_TYPE (decl))))
3091 call_type = CALL_SHORT;
3092 else if (lookup_attribute ("long_call", TYPE_ATTRIBUTES (TREE_TYPE (decl))))
3093 call_type = CALL_LONG;
3095 /* Cannot tail-call to long calls, since these are out of range of
3096 a branch instruction. However, if not compiling PIC, we know
3097 we can reach the symbol if it is in this compilation unit. */
3098 if (call_type == CALL_LONG && (flag_pic || !TREE_ASM_WRITTEN (decl)))
3099 return false;
3101 /* If we are interworking and the function is not declared static
3102 then we can't tail-call it unless we know that it exists in this
3103 compilation unit (since it might be a Thumb routine). */
3104 if (TARGET_INTERWORK && TREE_PUBLIC (decl) && !TREE_ASM_WRITTEN (decl))
3105 return false;
3107 /* Never tailcall from an ISR routine - it needs a special exit sequence. */
3108 if (IS_INTERRUPT (arm_current_func_type ()))
3109 return false;
3111 /* Everything else is ok. */
3112 return true;
3116 /* Addressing mode support functions. */
3118 /* Return nonzero if X is a legitimate immediate operand when compiling
3119 for PIC. */
3121 legitimate_pic_operand_p (rtx x)
3123 if (CONSTANT_P (x)
3124 && flag_pic
3125 && (GET_CODE (x) == SYMBOL_REF
3126 || (GET_CODE (x) == CONST
3127 && GET_CODE (XEXP (x, 0)) == PLUS
3128 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)))
3129 return 0;
3131 return 1;
3135 legitimize_pic_address (rtx orig, enum machine_mode mode, rtx reg)
3137 if (GET_CODE (orig) == SYMBOL_REF
3138 || GET_CODE (orig) == LABEL_REF)
3140 #ifndef AOF_ASSEMBLER
3141 rtx pic_ref, address;
3142 #endif
3143 rtx insn;
3144 int subregs = 0;
3146 if (reg == 0)
3148 gcc_assert (!no_new_pseudos);
3149 reg = gen_reg_rtx (Pmode);
3151 subregs = 1;
3154 #ifdef AOF_ASSEMBLER
3155 /* The AOF assembler can generate relocations for these directly, and
3156 understands that the PIC register has to be added into the offset. */
3157 insn = emit_insn (gen_pic_load_addr_based (reg, orig));
3158 #else
3159 if (subregs)
3160 address = gen_reg_rtx (Pmode);
3161 else
3162 address = reg;
3164 if (TARGET_ARM)
3165 emit_insn (gen_pic_load_addr_arm (address, orig));
3166 else
3167 emit_insn (gen_pic_load_addr_thumb (address, orig));
3169 if ((GET_CODE (orig) == LABEL_REF
3170 || (GET_CODE (orig) == SYMBOL_REF &&
3171 SYMBOL_REF_LOCAL_P (orig)))
3172 && NEED_GOT_RELOC)
3173 pic_ref = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, address);
3174 else
3176 pic_ref = gen_const_mem (Pmode,
3177 gen_rtx_PLUS (Pmode, pic_offset_table_rtx,
3178 address));
3181 insn = emit_move_insn (reg, pic_ref);
3182 #endif
3183 current_function_uses_pic_offset_table = 1;
3184 /* Put a REG_EQUAL note on this insn, so that it can be optimized
3185 by loop. */
3186 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_EQUAL, orig,
3187 REG_NOTES (insn));
3188 return reg;
3190 else if (GET_CODE (orig) == CONST)
3192 rtx base, offset;
3194 if (GET_CODE (XEXP (orig, 0)) == PLUS
3195 && XEXP (XEXP (orig, 0), 0) == pic_offset_table_rtx)
3196 return orig;
3198 if (reg == 0)
3200 gcc_assert (!no_new_pseudos);
3201 reg = gen_reg_rtx (Pmode);
3204 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
3206 base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
3207 offset = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
3208 base == reg ? 0 : reg);
3210 if (GET_CODE (offset) == CONST_INT)
3212 /* The base register doesn't really matter, we only want to
3213 test the index for the appropriate mode. */
3214 if (!arm_legitimate_index_p (mode, offset, SET, 0))
3216 gcc_assert (!no_new_pseudos);
3217 offset = force_reg (Pmode, offset);
3220 if (GET_CODE (offset) == CONST_INT)
3221 return plus_constant (base, INTVAL (offset));
3224 if (GET_MODE_SIZE (mode) > 4
3225 && (GET_MODE_CLASS (mode) == MODE_INT
3226 || TARGET_SOFT_FLOAT))
3228 emit_insn (gen_addsi3 (reg, base, offset));
3229 return reg;
3232 return gen_rtx_PLUS (Pmode, base, offset);
3235 return orig;
3239 /* Find a spare low register to use during the prolog of a function. */
3241 static int
3242 thumb_find_work_register (unsigned long pushed_regs_mask)
3244 int reg;
3246 /* Check the argument registers first as these are call-used. The
3247 register allocation order means that sometimes r3 might be used
3248 but earlier argument registers might not, so check them all. */
3249 for (reg = LAST_ARG_REGNUM; reg >= 0; reg --)
3250 if (!regs_ever_live[reg])
3251 return reg;
3253 /* Before going on to check the call-saved registers we can try a couple
3254 more ways of deducing that r3 is available. The first is when we are
3255 pushing anonymous arguments onto the stack and we have less than 4
3256 registers worth of fixed arguments(*). In this case r3 will be part of
3257 the variable argument list and so we can be sure that it will be
3258 pushed right at the start of the function. Hence it will be available
3259 for the rest of the prologue.
3260 (*): ie current_function_pretend_args_size is greater than 0. */
3261 if (cfun->machine->uses_anonymous_args
3262 && current_function_pretend_args_size > 0)
3263 return LAST_ARG_REGNUM;
3265 /* The other case is when we have fixed arguments but less than 4 registers
3266 worth. In this case r3 might be used in the body of the function, but
3267 it is not being used to convey an argument into the function. In theory
3268 we could just check current_function_args_size to see how many bytes are
3269 being passed in argument registers, but it seems that it is unreliable.
3270 Sometimes it will have the value 0 when in fact arguments are being
3271 passed. (See testcase execute/20021111-1.c for an example). So we also
3272 check the args_info.nregs field as well. The problem with this field is
3273 that it makes no allowances for arguments that are passed to the
3274 function but which are not used. Hence we could miss an opportunity
3275 when a function has an unused argument in r3. But it is better to be
3276 safe than to be sorry. */
3277 if (! cfun->machine->uses_anonymous_args
3278 && current_function_args_size >= 0
3279 && current_function_args_size <= (LAST_ARG_REGNUM * UNITS_PER_WORD)
3280 && cfun->args_info.nregs < 4)
3281 return LAST_ARG_REGNUM;
3283 /* Otherwise look for a call-saved register that is going to be pushed. */
3284 for (reg = LAST_LO_REGNUM; reg > LAST_ARG_REGNUM; reg --)
3285 if (pushed_regs_mask & (1 << reg))
3286 return reg;
3288 /* Something went wrong - thumb_compute_save_reg_mask()
3289 should have arranged for a suitable register to be pushed. */
3290 gcc_unreachable ();
3294 /* Generate code to load the PIC register. In thumb mode SCRATCH is a
3295 low register. */
3297 void
3298 arm_load_pic_register (unsigned int scratch)
3300 #ifndef AOF_ASSEMBLER
3301 rtx l1, pic_tmp, pic_tmp2, pic_rtx;
3302 rtx global_offset_table;
3304 if (current_function_uses_pic_offset_table == 0 || TARGET_SINGLE_PIC_BASE)
3305 return;
3307 gcc_assert (flag_pic);
3309 l1 = gen_label_rtx ();
3311 global_offset_table = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
3312 /* On the ARM the PC register contains 'dot + 8' at the time of the
3313 addition, on the Thumb it is 'dot + 4'. */
3314 pic_tmp = plus_constant (gen_rtx_LABEL_REF (Pmode, l1), TARGET_ARM ? 8 : 4);
3315 if (GOT_PCREL)
3316 pic_tmp2 = gen_rtx_CONST (VOIDmode,
3317 gen_rtx_PLUS (Pmode, global_offset_table, pc_rtx));
3318 else
3319 pic_tmp2 = gen_rtx_CONST (VOIDmode, global_offset_table);
3321 pic_rtx = gen_rtx_CONST (Pmode, gen_rtx_MINUS (Pmode, pic_tmp2, pic_tmp));
3323 if (TARGET_ARM)
3325 emit_insn (gen_pic_load_addr_arm (pic_offset_table_rtx, pic_rtx));
3326 emit_insn (gen_pic_add_dot_plus_eight (pic_offset_table_rtx, l1));
3328 else
3330 if (REGNO (pic_offset_table_rtx) > LAST_LO_REGNUM)
3332 /* We will have pushed the pic register, so should always be
3333 able to find a work register. */
3334 pic_tmp = gen_rtx_REG (SImode, scratch);
3335 emit_insn (gen_pic_load_addr_thumb (pic_tmp, pic_rtx));
3336 emit_insn (gen_movsi (pic_offset_table_rtx, pic_tmp));
3338 else
3339 emit_insn (gen_pic_load_addr_thumb (pic_offset_table_rtx, pic_rtx));
3340 emit_insn (gen_pic_add_dot_plus_four (pic_offset_table_rtx, l1));
3343 /* Need to emit this whether or not we obey regdecls,
3344 since setjmp/longjmp can cause life info to screw up. */
3345 emit_insn (gen_rtx_USE (VOIDmode, pic_offset_table_rtx));
3346 #endif /* AOF_ASSEMBLER */
3350 /* Return nonzero if X is valid as an ARM state addressing register. */
3351 static int
3352 arm_address_register_rtx_p (rtx x, int strict_p)
3354 int regno;
3356 if (GET_CODE (x) != REG)
3357 return 0;
3359 regno = REGNO (x);
3361 if (strict_p)
3362 return ARM_REGNO_OK_FOR_BASE_P (regno);
3364 return (regno <= LAST_ARM_REGNUM
3365 || regno >= FIRST_PSEUDO_REGISTER
3366 || regno == FRAME_POINTER_REGNUM
3367 || regno == ARG_POINTER_REGNUM);
3370 /* Return nonzero if X is a valid ARM state address operand. */
3372 arm_legitimate_address_p (enum machine_mode mode, rtx x, RTX_CODE outer,
3373 int strict_p)
3375 bool use_ldrd;
3376 enum rtx_code code = GET_CODE (x);
3378 if (arm_address_register_rtx_p (x, strict_p))
3379 return 1;
3381 use_ldrd = (TARGET_LDRD
3382 && (mode == DImode
3383 || (mode == DFmode && (TARGET_SOFT_FLOAT || TARGET_VFP))));
3385 if (code == POST_INC || code == PRE_DEC
3386 || ((code == PRE_INC || code == POST_DEC)
3387 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
3388 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
3390 else if ((code == POST_MODIFY || code == PRE_MODIFY)
3391 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
3392 && GET_CODE (XEXP (x, 1)) == PLUS
3393 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
3395 rtx addend = XEXP (XEXP (x, 1), 1);
3397 /* Don't allow ldrd post increment by register because it's hard
3398 to fixup invalid register choices. */
3399 if (use_ldrd
3400 && GET_CODE (x) == POST_MODIFY
3401 && GET_CODE (addend) == REG)
3402 return 0;
3404 return ((use_ldrd || GET_MODE_SIZE (mode) <= 4)
3405 && arm_legitimate_index_p (mode, addend, outer, strict_p));
3408 /* After reload constants split into minipools will have addresses
3409 from a LABEL_REF. */
3410 else if (reload_completed
3411 && (code == LABEL_REF
3412 || (code == CONST
3413 && GET_CODE (XEXP (x, 0)) == PLUS
3414 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
3415 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)))
3416 return 1;
3418 else if (mode == TImode)
3419 return 0;
3421 else if (code == PLUS)
3423 rtx xop0 = XEXP (x, 0);
3424 rtx xop1 = XEXP (x, 1);
3426 return ((arm_address_register_rtx_p (xop0, strict_p)
3427 && arm_legitimate_index_p (mode, xop1, outer, strict_p))
3428 || (arm_address_register_rtx_p (xop1, strict_p)
3429 && arm_legitimate_index_p (mode, xop0, outer, strict_p)));
3432 #if 0
3433 /* Reload currently can't handle MINUS, so disable this for now */
3434 else if (GET_CODE (x) == MINUS)
3436 rtx xop0 = XEXP (x, 0);
3437 rtx xop1 = XEXP (x, 1);
3439 return (arm_address_register_rtx_p (xop0, strict_p)
3440 && arm_legitimate_index_p (mode, xop1, outer, strict_p));
3442 #endif
3444 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
3445 && code == SYMBOL_REF
3446 && CONSTANT_POOL_ADDRESS_P (x)
3447 && ! (flag_pic
3448 && symbol_mentioned_p (get_pool_constant (x))))
3449 return 1;
3451 return 0;
3454 /* Return nonzero if INDEX is valid for an address index operand in
3455 ARM state. */
3456 static int
3457 arm_legitimate_index_p (enum machine_mode mode, rtx index, RTX_CODE outer,
3458 int strict_p)
3460 HOST_WIDE_INT range;
3461 enum rtx_code code = GET_CODE (index);
3463 /* Standard coprocessor addressing modes. */
3464 if (TARGET_HARD_FLOAT
3465 && (TARGET_FPA || TARGET_MAVERICK)
3466 && (GET_MODE_CLASS (mode) == MODE_FLOAT
3467 || (TARGET_MAVERICK && mode == DImode)))
3468 return (code == CONST_INT && INTVAL (index) < 1024
3469 && INTVAL (index) > -1024
3470 && (INTVAL (index) & 3) == 0);
3472 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
3473 return (code == CONST_INT
3474 && INTVAL (index) < 1024
3475 && INTVAL (index) > -1024
3476 && (INTVAL (index) & 3) == 0);
3478 if (arm_address_register_rtx_p (index, strict_p)
3479 && (GET_MODE_SIZE (mode) <= 4))
3480 return 1;
3482 if (mode == DImode || mode == DFmode)
3484 if (code == CONST_INT)
3486 HOST_WIDE_INT val = INTVAL (index);
3488 if (TARGET_LDRD)
3489 return val > -256 && val < 256;
3490 else
3491 return val > -4096 && val < 4092;
3494 return TARGET_LDRD && arm_address_register_rtx_p (index, strict_p);
3497 if (GET_MODE_SIZE (mode) <= 4
3498 && ! (arm_arch4
3499 && (mode == HImode
3500 || (mode == QImode && outer == SIGN_EXTEND))))
3502 if (code == MULT)
3504 rtx xiop0 = XEXP (index, 0);
3505 rtx xiop1 = XEXP (index, 1);
3507 return ((arm_address_register_rtx_p (xiop0, strict_p)
3508 && power_of_two_operand (xiop1, SImode))
3509 || (arm_address_register_rtx_p (xiop1, strict_p)
3510 && power_of_two_operand (xiop0, SImode)));
3512 else if (code == LSHIFTRT || code == ASHIFTRT
3513 || code == ASHIFT || code == ROTATERT)
3515 rtx op = XEXP (index, 1);
3517 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
3518 && GET_CODE (op) == CONST_INT
3519 && INTVAL (op) > 0
3520 && INTVAL (op) <= 31);
3524 /* For ARM v4 we may be doing a sign-extend operation during the
3525 load. */
3526 if (arm_arch4)
3528 if (mode == HImode || (outer == SIGN_EXTEND && mode == QImode))
3529 range = 256;
3530 else
3531 range = 4096;
3533 else
3534 range = (mode == HImode) ? 4095 : 4096;
3536 return (code == CONST_INT
3537 && INTVAL (index) < range
3538 && INTVAL (index) > -range);
3541 /* Return nonzero if X is valid as a Thumb state base register. */
3542 static int
3543 thumb_base_register_rtx_p (rtx x, enum machine_mode mode, int strict_p)
3545 int regno;
3547 if (GET_CODE (x) != REG)
3548 return 0;
3550 regno = REGNO (x);
3552 if (strict_p)
3553 return THUMB_REGNO_MODE_OK_FOR_BASE_P (regno, mode);
3555 return (regno <= LAST_LO_REGNUM
3556 || regno > LAST_VIRTUAL_REGISTER
3557 || regno == FRAME_POINTER_REGNUM
3558 || (GET_MODE_SIZE (mode) >= 4
3559 && (regno == STACK_POINTER_REGNUM
3560 || regno >= FIRST_PSEUDO_REGISTER
3561 || x == hard_frame_pointer_rtx
3562 || x == arg_pointer_rtx)));
3565 /* Return nonzero if x is a legitimate index register. This is the case
3566 for any base register that can access a QImode object. */
3567 inline static int
3568 thumb_index_register_rtx_p (rtx x, int strict_p)
3570 return thumb_base_register_rtx_p (x, QImode, strict_p);
3573 /* Return nonzero if x is a legitimate Thumb-state address.
3575 The AP may be eliminated to either the SP or the FP, so we use the
3576 least common denominator, e.g. SImode, and offsets from 0 to 64.
3578 ??? Verify whether the above is the right approach.
3580 ??? Also, the FP may be eliminated to the SP, so perhaps that
3581 needs special handling also.
3583 ??? Look at how the mips16 port solves this problem. It probably uses
3584 better ways to solve some of these problems.
3586 Although it is not incorrect, we don't accept QImode and HImode
3587 addresses based on the frame pointer or arg pointer until the
3588 reload pass starts. This is so that eliminating such addresses
3589 into stack based ones won't produce impossible code. */
3591 thumb_legitimate_address_p (enum machine_mode mode, rtx x, int strict_p)
3593 /* ??? Not clear if this is right. Experiment. */
3594 if (GET_MODE_SIZE (mode) < 4
3595 && !(reload_in_progress || reload_completed)
3596 && (reg_mentioned_p (frame_pointer_rtx, x)
3597 || reg_mentioned_p (arg_pointer_rtx, x)
3598 || reg_mentioned_p (virtual_incoming_args_rtx, x)
3599 || reg_mentioned_p (virtual_outgoing_args_rtx, x)
3600 || reg_mentioned_p (virtual_stack_dynamic_rtx, x)
3601 || reg_mentioned_p (virtual_stack_vars_rtx, x)))
3602 return 0;
3604 /* Accept any base register. SP only in SImode or larger. */
3605 else if (thumb_base_register_rtx_p (x, mode, strict_p))
3606 return 1;
3608 /* This is PC relative data before arm_reorg runs. */
3609 else if (GET_MODE_SIZE (mode) >= 4 && CONSTANT_P (x)
3610 && GET_CODE (x) == SYMBOL_REF
3611 && CONSTANT_POOL_ADDRESS_P (x) && ! flag_pic)
3612 return 1;
3614 /* This is PC relative data after arm_reorg runs. */
3615 else if (GET_MODE_SIZE (mode) >= 4 && reload_completed
3616 && (GET_CODE (x) == LABEL_REF
3617 || (GET_CODE (x) == CONST
3618 && GET_CODE (XEXP (x, 0)) == PLUS
3619 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
3620 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)))
3621 return 1;
3623 /* Post-inc indexing only supported for SImode and larger. */
3624 else if (GET_CODE (x) == POST_INC && GET_MODE_SIZE (mode) >= 4
3625 && thumb_index_register_rtx_p (XEXP (x, 0), strict_p))
3626 return 1;
3628 else if (GET_CODE (x) == PLUS)
3630 /* REG+REG address can be any two index registers. */
3631 /* We disallow FRAME+REG addressing since we know that FRAME
3632 will be replaced with STACK, and SP relative addressing only
3633 permits SP+OFFSET. */
3634 if (GET_MODE_SIZE (mode) <= 4
3635 && XEXP (x, 0) != frame_pointer_rtx
3636 && XEXP (x, 1) != frame_pointer_rtx
3637 && thumb_index_register_rtx_p (XEXP (x, 0), strict_p)
3638 && thumb_index_register_rtx_p (XEXP (x, 1), strict_p))
3639 return 1;
3641 /* REG+const has 5-7 bit offset for non-SP registers. */
3642 else if ((thumb_index_register_rtx_p (XEXP (x, 0), strict_p)
3643 || XEXP (x, 0) == arg_pointer_rtx)
3644 && GET_CODE (XEXP (x, 1)) == CONST_INT
3645 && thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
3646 return 1;
3648 /* REG+const has 10 bit offset for SP, but only SImode and
3649 larger is supported. */
3650 /* ??? Should probably check for DI/DFmode overflow here
3651 just like GO_IF_LEGITIMATE_OFFSET does. */
3652 else if (GET_CODE (XEXP (x, 0)) == REG
3653 && REGNO (XEXP (x, 0)) == STACK_POINTER_REGNUM
3654 && GET_MODE_SIZE (mode) >= 4
3655 && GET_CODE (XEXP (x, 1)) == CONST_INT
3656 && INTVAL (XEXP (x, 1)) >= 0
3657 && INTVAL (XEXP (x, 1)) + GET_MODE_SIZE (mode) <= 1024
3658 && (INTVAL (XEXP (x, 1)) & 3) == 0)
3659 return 1;
3661 else if (GET_CODE (XEXP (x, 0)) == REG
3662 && REGNO (XEXP (x, 0)) == FRAME_POINTER_REGNUM
3663 && GET_MODE_SIZE (mode) >= 4
3664 && GET_CODE (XEXP (x, 1)) == CONST_INT
3665 && (INTVAL (XEXP (x, 1)) & 3) == 0)
3666 return 1;
3669 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
3670 && GET_MODE_SIZE (mode) == 4
3671 && GET_CODE (x) == SYMBOL_REF
3672 && CONSTANT_POOL_ADDRESS_P (x)
3673 && !(flag_pic
3674 && symbol_mentioned_p (get_pool_constant (x))))
3675 return 1;
3677 return 0;
3680 /* Return nonzero if VAL can be used as an offset in a Thumb-state address
3681 instruction of mode MODE. */
3683 thumb_legitimate_offset_p (enum machine_mode mode, HOST_WIDE_INT val)
3685 switch (GET_MODE_SIZE (mode))
3687 case 1:
3688 return val >= 0 && val < 32;
3690 case 2:
3691 return val >= 0 && val < 64 && (val & 1) == 0;
3693 default:
3694 return (val >= 0
3695 && (val + GET_MODE_SIZE (mode)) <= 128
3696 && (val & 3) == 0);
3700 /* Try machine-dependent ways of modifying an illegitimate address
3701 to be legitimate. If we find one, return the new, valid address. */
3703 arm_legitimize_address (rtx x, rtx orig_x, enum machine_mode mode)
3705 if (GET_CODE (x) == PLUS)
3707 rtx xop0 = XEXP (x, 0);
3708 rtx xop1 = XEXP (x, 1);
3710 if (CONSTANT_P (xop0) && !symbol_mentioned_p (xop0))
3711 xop0 = force_reg (SImode, xop0);
3713 if (CONSTANT_P (xop1) && !symbol_mentioned_p (xop1))
3714 xop1 = force_reg (SImode, xop1);
3716 if (ARM_BASE_REGISTER_RTX_P (xop0)
3717 && GET_CODE (xop1) == CONST_INT)
3719 HOST_WIDE_INT n, low_n;
3720 rtx base_reg, val;
3721 n = INTVAL (xop1);
3723 /* VFP addressing modes actually allow greater offsets, but for
3724 now we just stick with the lowest common denominator. */
3725 if (mode == DImode
3726 || ((TARGET_SOFT_FLOAT || TARGET_VFP) && mode == DFmode))
3728 low_n = n & 0x0f;
3729 n &= ~0x0f;
3730 if (low_n > 4)
3732 n += 16;
3733 low_n -= 16;
3736 else
3738 low_n = ((mode) == TImode ? 0
3739 : n >= 0 ? (n & 0xfff) : -((-n) & 0xfff));
3740 n -= low_n;
3743 base_reg = gen_reg_rtx (SImode);
3744 val = force_operand (gen_rtx_PLUS (SImode, xop0,
3745 GEN_INT (n)), NULL_RTX);
3746 emit_move_insn (base_reg, val);
3747 x = (low_n == 0 ? base_reg
3748 : gen_rtx_PLUS (SImode, base_reg, GEN_INT (low_n)));
3750 else if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
3751 x = gen_rtx_PLUS (SImode, xop0, xop1);
3754 /* XXX We don't allow MINUS any more -- see comment in
3755 arm_legitimate_address_p (). */
3756 else if (GET_CODE (x) == MINUS)
3758 rtx xop0 = XEXP (x, 0);
3759 rtx xop1 = XEXP (x, 1);
3761 if (CONSTANT_P (xop0))
3762 xop0 = force_reg (SImode, xop0);
3764 if (CONSTANT_P (xop1) && ! symbol_mentioned_p (xop1))
3765 xop1 = force_reg (SImode, xop1);
3767 if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
3768 x = gen_rtx_MINUS (SImode, xop0, xop1);
3771 if (flag_pic)
3773 /* We need to find and carefully transform any SYMBOL and LABEL
3774 references; so go back to the original address expression. */
3775 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
3777 if (new_x != orig_x)
3778 x = new_x;
3781 return x;
3785 /* Try machine-dependent ways of modifying an illegitimate Thumb address
3786 to be legitimate. If we find one, return the new, valid address. */
3788 thumb_legitimize_address (rtx x, rtx orig_x, enum machine_mode mode)
3790 if (GET_CODE (x) == PLUS
3791 && GET_CODE (XEXP (x, 1)) == CONST_INT
3792 && (INTVAL (XEXP (x, 1)) >= 32 * GET_MODE_SIZE (mode)
3793 || INTVAL (XEXP (x, 1)) < 0))
3795 rtx xop0 = XEXP (x, 0);
3796 rtx xop1 = XEXP (x, 1);
3797 HOST_WIDE_INT offset = INTVAL (xop1);
3799 /* Try and fold the offset into a biasing of the base register and
3800 then offsetting that. Don't do this when optimizing for space
3801 since it can cause too many CSEs. */
3802 if (optimize_size && offset >= 0
3803 && offset < 256 + 31 * GET_MODE_SIZE (mode))
3805 HOST_WIDE_INT delta;
3807 if (offset >= 256)
3808 delta = offset - (256 - GET_MODE_SIZE (mode));
3809 else if (offset < 32 * GET_MODE_SIZE (mode) + 8)
3810 delta = 31 * GET_MODE_SIZE (mode);
3811 else
3812 delta = offset & (~31 * GET_MODE_SIZE (mode));
3814 xop0 = force_operand (plus_constant (xop0, offset - delta),
3815 NULL_RTX);
3816 x = plus_constant (xop0, delta);
3818 else if (offset < 0 && offset > -256)
3819 /* Small negative offsets are best done with a subtract before the
3820 dereference, forcing these into a register normally takes two
3821 instructions. */
3822 x = force_operand (x, NULL_RTX);
3823 else
3825 /* For the remaining cases, force the constant into a register. */
3826 xop1 = force_reg (SImode, xop1);
3827 x = gen_rtx_PLUS (SImode, xop0, xop1);
3830 else if (GET_CODE (x) == PLUS
3831 && s_register_operand (XEXP (x, 1), SImode)
3832 && !s_register_operand (XEXP (x, 0), SImode))
3834 rtx xop0 = force_operand (XEXP (x, 0), NULL_RTX);
3836 x = gen_rtx_PLUS (SImode, xop0, XEXP (x, 1));
3839 if (flag_pic)
3841 /* We need to find and carefully transform any SYMBOL and LABEL
3842 references; so go back to the original address expression. */
3843 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
3845 if (new_x != orig_x)
3846 x = new_x;
3849 return x;
3854 #define REG_OR_SUBREG_REG(X) \
3855 (GET_CODE (X) == REG \
3856 || (GET_CODE (X) == SUBREG && GET_CODE (SUBREG_REG (X)) == REG))
3858 #define REG_OR_SUBREG_RTX(X) \
3859 (GET_CODE (X) == REG ? (X) : SUBREG_REG (X))
3861 #ifndef COSTS_N_INSNS
3862 #define COSTS_N_INSNS(N) ((N) * 4 - 2)
3863 #endif
3864 static inline int
3865 thumb_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
3867 enum machine_mode mode = GET_MODE (x);
3869 switch (code)
3871 case ASHIFT:
3872 case ASHIFTRT:
3873 case LSHIFTRT:
3874 case ROTATERT:
3875 case PLUS:
3876 case MINUS:
3877 case COMPARE:
3878 case NEG:
3879 case NOT:
3880 return COSTS_N_INSNS (1);
3882 case MULT:
3883 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
3885 int cycles = 0;
3886 unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
3888 while (i)
3890 i >>= 2;
3891 cycles++;
3893 return COSTS_N_INSNS (2) + cycles;
3895 return COSTS_N_INSNS (1) + 16;
3897 case SET:
3898 return (COSTS_N_INSNS (1)
3899 + 4 * ((GET_CODE (SET_SRC (x)) == MEM)
3900 + GET_CODE (SET_DEST (x)) == MEM));
3902 case CONST_INT:
3903 if (outer == SET)
3905 if ((unsigned HOST_WIDE_INT) INTVAL (x) < 256)
3906 return 0;
3907 if (thumb_shiftable_const (INTVAL (x)))
3908 return COSTS_N_INSNS (2);
3909 return COSTS_N_INSNS (3);
3911 else if ((outer == PLUS || outer == COMPARE)
3912 && INTVAL (x) < 256 && INTVAL (x) > -256)
3913 return 0;
3914 else if (outer == AND
3915 && INTVAL (x) < 256 && INTVAL (x) >= -256)
3916 return COSTS_N_INSNS (1);
3917 else if (outer == ASHIFT || outer == ASHIFTRT
3918 || outer == LSHIFTRT)
3919 return 0;
3920 return COSTS_N_INSNS (2);
3922 case CONST:
3923 case CONST_DOUBLE:
3924 case LABEL_REF:
3925 case SYMBOL_REF:
3926 return COSTS_N_INSNS (3);
3928 case UDIV:
3929 case UMOD:
3930 case DIV:
3931 case MOD:
3932 return 100;
3934 case TRUNCATE:
3935 return 99;
3937 case AND:
3938 case XOR:
3939 case IOR:
3940 /* XXX guess. */
3941 return 8;
3943 case MEM:
3944 /* XXX another guess. */
3945 /* Memory costs quite a lot for the first word, but subsequent words
3946 load at the equivalent of a single insn each. */
3947 return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
3948 + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
3949 ? 4 : 0));
3951 case IF_THEN_ELSE:
3952 /* XXX a guess. */
3953 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
3954 return 14;
3955 return 2;
3957 case ZERO_EXTEND:
3958 /* XXX still guessing. */
3959 switch (GET_MODE (XEXP (x, 0)))
3961 case QImode:
3962 return (1 + (mode == DImode ? 4 : 0)
3963 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
3965 case HImode:
3966 return (4 + (mode == DImode ? 4 : 0)
3967 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
3969 case SImode:
3970 return (1 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
3972 default:
3973 return 99;
3976 default:
3977 return 99;
3982 /* Worker routine for arm_rtx_costs. */
3983 static inline int
3984 arm_rtx_costs_1 (rtx x, enum rtx_code code, enum rtx_code outer)
3986 enum machine_mode mode = GET_MODE (x);
3987 enum rtx_code subcode;
3988 int extra_cost;
3990 switch (code)
3992 case MEM:
3993 /* Memory costs quite a lot for the first word, but subsequent words
3994 load at the equivalent of a single insn each. */
3995 return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
3996 + (GET_CODE (x) == SYMBOL_REF
3997 && CONSTANT_POOL_ADDRESS_P (x) ? 4 : 0));
3999 case DIV:
4000 case MOD:
4001 case UDIV:
4002 case UMOD:
4003 return optimize_size ? COSTS_N_INSNS (2) : 100;
4005 case ROTATE:
4006 if (mode == SImode && GET_CODE (XEXP (x, 1)) == REG)
4007 return 4;
4008 /* Fall through */
4009 case ROTATERT:
4010 if (mode != SImode)
4011 return 8;
4012 /* Fall through */
4013 case ASHIFT: case LSHIFTRT: case ASHIFTRT:
4014 if (mode == DImode)
4015 return (8 + (GET_CODE (XEXP (x, 1)) == CONST_INT ? 0 : 8)
4016 + ((GET_CODE (XEXP (x, 0)) == REG
4017 || (GET_CODE (XEXP (x, 0)) == SUBREG
4018 && GET_CODE (SUBREG_REG (XEXP (x, 0))) == REG))
4019 ? 0 : 8));
4020 return (1 + ((GET_CODE (XEXP (x, 0)) == REG
4021 || (GET_CODE (XEXP (x, 0)) == SUBREG
4022 && GET_CODE (SUBREG_REG (XEXP (x, 0))) == REG))
4023 ? 0 : 4)
4024 + ((GET_CODE (XEXP (x, 1)) == REG
4025 || (GET_CODE (XEXP (x, 1)) == SUBREG
4026 && GET_CODE (SUBREG_REG (XEXP (x, 1))) == REG)
4027 || (GET_CODE (XEXP (x, 1)) == CONST_INT))
4028 ? 0 : 4));
4030 case MINUS:
4031 if (mode == DImode)
4032 return (4 + (REG_OR_SUBREG_REG (XEXP (x, 1)) ? 0 : 8)
4033 + ((REG_OR_SUBREG_REG (XEXP (x, 0))
4034 || (GET_CODE (XEXP (x, 0)) == CONST_INT
4035 && const_ok_for_arm (INTVAL (XEXP (x, 0)))))
4036 ? 0 : 8));
4038 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
4039 return (2 + ((REG_OR_SUBREG_REG (XEXP (x, 1))
4040 || (GET_CODE (XEXP (x, 1)) == CONST_DOUBLE
4041 && arm_const_double_rtx (XEXP (x, 1))))
4042 ? 0 : 8)
4043 + ((REG_OR_SUBREG_REG (XEXP (x, 0))
4044 || (GET_CODE (XEXP (x, 0)) == CONST_DOUBLE
4045 && arm_const_double_rtx (XEXP (x, 0))))
4046 ? 0 : 8));
4048 if (((GET_CODE (XEXP (x, 0)) == CONST_INT
4049 && const_ok_for_arm (INTVAL (XEXP (x, 0)))
4050 && REG_OR_SUBREG_REG (XEXP (x, 1))))
4051 || (((subcode = GET_CODE (XEXP (x, 1))) == ASHIFT
4052 || subcode == ASHIFTRT || subcode == LSHIFTRT
4053 || subcode == ROTATE || subcode == ROTATERT
4054 || (subcode == MULT
4055 && GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT
4056 && ((INTVAL (XEXP (XEXP (x, 1), 1)) &
4057 (INTVAL (XEXP (XEXP (x, 1), 1)) - 1)) == 0)))
4058 && REG_OR_SUBREG_REG (XEXP (XEXP (x, 1), 0))
4059 && (REG_OR_SUBREG_REG (XEXP (XEXP (x, 1), 1))
4060 || GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT)
4061 && REG_OR_SUBREG_REG (XEXP (x, 0))))
4062 return 1;
4063 /* Fall through */
4065 case PLUS:
4066 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
4067 return (2 + (REG_OR_SUBREG_REG (XEXP (x, 0)) ? 0 : 8)
4068 + ((REG_OR_SUBREG_REG (XEXP (x, 1))
4069 || (GET_CODE (XEXP (x, 1)) == CONST_DOUBLE
4070 && arm_const_double_rtx (XEXP (x, 1))))
4071 ? 0 : 8));
4073 /* Fall through */
4074 case AND: case XOR: case IOR:
4075 extra_cost = 0;
4077 /* Normally the frame registers will be spilt into reg+const during
4078 reload, so it is a bad idea to combine them with other instructions,
4079 since then they might not be moved outside of loops. As a compromise
4080 we allow integration with ops that have a constant as their second
4081 operand. */
4082 if ((REG_OR_SUBREG_REG (XEXP (x, 0))
4083 && ARM_FRAME_RTX (REG_OR_SUBREG_RTX (XEXP (x, 0)))
4084 && GET_CODE (XEXP (x, 1)) != CONST_INT)
4085 || (REG_OR_SUBREG_REG (XEXP (x, 0))
4086 && ARM_FRAME_RTX (REG_OR_SUBREG_RTX (XEXP (x, 0)))))
4087 extra_cost = 4;
4089 if (mode == DImode)
4090 return (4 + extra_cost + (REG_OR_SUBREG_REG (XEXP (x, 0)) ? 0 : 8)
4091 + ((REG_OR_SUBREG_REG (XEXP (x, 1))
4092 || (GET_CODE (XEXP (x, 1)) == CONST_INT
4093 && const_ok_for_op (INTVAL (XEXP (x, 1)), code)))
4094 ? 0 : 8));
4096 if (REG_OR_SUBREG_REG (XEXP (x, 0)))
4097 return (1 + (GET_CODE (XEXP (x, 1)) == CONST_INT ? 0 : extra_cost)
4098 + ((REG_OR_SUBREG_REG (XEXP (x, 1))
4099 || (GET_CODE (XEXP (x, 1)) == CONST_INT
4100 && const_ok_for_op (INTVAL (XEXP (x, 1)), code)))
4101 ? 0 : 4));
4103 else if (REG_OR_SUBREG_REG (XEXP (x, 1)))
4104 return (1 + extra_cost
4105 + ((((subcode = GET_CODE (XEXP (x, 0))) == ASHIFT
4106 || subcode == LSHIFTRT || subcode == ASHIFTRT
4107 || subcode == ROTATE || subcode == ROTATERT
4108 || (subcode == MULT
4109 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
4110 && ((INTVAL (XEXP (XEXP (x, 0), 1)) &
4111 (INTVAL (XEXP (XEXP (x, 0), 1)) - 1)) == 0)))
4112 && (REG_OR_SUBREG_REG (XEXP (XEXP (x, 0), 0)))
4113 && ((REG_OR_SUBREG_REG (XEXP (XEXP (x, 0), 1)))
4114 || GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT))
4115 ? 0 : 4));
4117 return 8;
4119 case MULT:
4120 /* This should have been handled by the CPU specific routines. */
4121 gcc_unreachable ();
4123 case TRUNCATE:
4124 if (arm_arch3m && mode == SImode
4125 && GET_CODE (XEXP (x, 0)) == LSHIFTRT
4126 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
4127 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0))
4128 == GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)))
4129 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
4130 || GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND))
4131 return 8;
4132 return 99;
4134 case NEG:
4135 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
4136 return 4 + (REG_OR_SUBREG_REG (XEXP (x, 0)) ? 0 : 6);
4137 /* Fall through */
4138 case NOT:
4139 if (mode == DImode)
4140 return 4 + (REG_OR_SUBREG_REG (XEXP (x, 0)) ? 0 : 4);
4142 return 1 + (REG_OR_SUBREG_REG (XEXP (x, 0)) ? 0 : 4);
4144 case IF_THEN_ELSE:
4145 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
4146 return 14;
4147 return 2;
4149 case COMPARE:
4150 return 1;
4152 case ABS:
4153 return 4 + (mode == DImode ? 4 : 0);
4155 case SIGN_EXTEND:
4156 if (GET_MODE (XEXP (x, 0)) == QImode)
4157 return (4 + (mode == DImode ? 4 : 0)
4158 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
4159 /* Fall through */
4160 case ZERO_EXTEND:
4161 switch (GET_MODE (XEXP (x, 0)))
4163 case QImode:
4164 return (1 + (mode == DImode ? 4 : 0)
4165 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
4167 case HImode:
4168 return (4 + (mode == DImode ? 4 : 0)
4169 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
4171 case SImode:
4172 return (1 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
4174 case V8QImode:
4175 case V4HImode:
4176 case V2SImode:
4177 case V4QImode:
4178 case V2HImode:
4179 return 1;
4181 default:
4182 gcc_unreachable ();
4184 gcc_unreachable ();
4186 case CONST_INT:
4187 if (const_ok_for_arm (INTVAL (x)))
4188 return outer == SET ? 2 : -1;
4189 else if (outer == AND
4190 && const_ok_for_arm (~INTVAL (x)))
4191 return -1;
4192 else if ((outer == COMPARE
4193 || outer == PLUS || outer == MINUS)
4194 && const_ok_for_arm (-INTVAL (x)))
4195 return -1;
4196 else
4197 return 5;
4199 case CONST:
4200 case LABEL_REF:
4201 case SYMBOL_REF:
4202 return 6;
4204 case CONST_DOUBLE:
4205 if (arm_const_double_rtx (x))
4206 return outer == SET ? 2 : -1;
4207 else if ((outer == COMPARE || outer == PLUS)
4208 && neg_const_double_rtx_ok_for_fpa (x))
4209 return -1;
4210 return 7;
4212 default:
4213 return 99;
4217 /* RTX costs when optimizing for size. */
4218 static bool
4219 arm_size_rtx_costs (rtx x, int code, int outer_code, int *total)
4221 enum machine_mode mode = GET_MODE (x);
4223 if (TARGET_THUMB)
4225 /* XXX TBD. For now, use the standard costs. */
4226 *total = thumb_rtx_costs (x, code, outer_code);
4227 return true;
4230 switch (code)
4232 case MEM:
4233 /* A memory access costs 1 insn if the mode is small, or the address is
4234 a single register, otherwise it costs one insn per word. */
4235 if (REG_P (XEXP (x, 0)))
4236 *total = COSTS_N_INSNS (1);
4237 else
4238 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
4239 return true;
4241 case DIV:
4242 case MOD:
4243 case UDIV:
4244 case UMOD:
4245 /* Needs a libcall, so it costs about this. */
4246 *total = COSTS_N_INSNS (2);
4247 return false;
4249 case ROTATE:
4250 if (mode == SImode && GET_CODE (XEXP (x, 1)) == REG)
4252 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code);
4253 return true;
4255 /* Fall through */
4256 case ROTATERT:
4257 case ASHIFT:
4258 case LSHIFTRT:
4259 case ASHIFTRT:
4260 if (mode == DImode && GET_CODE (XEXP (x, 1)) == CONST_INT)
4262 *total = COSTS_N_INSNS (3) + rtx_cost (XEXP (x, 0), code);
4263 return true;
4265 else if (mode == SImode)
4267 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code);
4268 /* Slightly disparage register shifts, but not by much. */
4269 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
4270 *total += 1 + rtx_cost (XEXP (x, 1), code);
4271 return true;
4274 /* Needs a libcall. */
4275 *total = COSTS_N_INSNS (2);
4276 return false;
4278 case MINUS:
4279 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT)
4281 *total = COSTS_N_INSNS (1);
4282 return false;
4285 if (mode == SImode)
4287 enum rtx_code subcode0 = GET_CODE (XEXP (x, 0));
4288 enum rtx_code subcode1 = GET_CODE (XEXP (x, 1));
4290 if (subcode0 == ROTATE || subcode0 == ROTATERT || subcode0 == ASHIFT
4291 || subcode0 == LSHIFTRT || subcode0 == ASHIFTRT
4292 || subcode1 == ROTATE || subcode1 == ROTATERT
4293 || subcode1 == ASHIFT || subcode1 == LSHIFTRT
4294 || subcode1 == ASHIFTRT)
4296 /* It's just the cost of the two operands. */
4297 *total = 0;
4298 return false;
4301 *total = COSTS_N_INSNS (1);
4302 return false;
4305 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
4306 return false;
4308 case PLUS:
4309 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT)
4311 *total = COSTS_N_INSNS (1);
4312 return false;
4315 /* Fall through */
4316 case AND: case XOR: case IOR:
4317 if (mode == SImode)
4319 enum rtx_code subcode = GET_CODE (XEXP (x, 0));
4321 if (subcode == ROTATE || subcode == ROTATERT || subcode == ASHIFT
4322 || subcode == LSHIFTRT || subcode == ASHIFTRT
4323 || (code == AND && subcode == NOT))
4325 /* It's just the cost of the two operands. */
4326 *total = 0;
4327 return false;
4331 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
4332 return false;
4334 case MULT:
4335 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
4336 return false;
4338 case NEG:
4339 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT)
4340 *total = COSTS_N_INSNS (1);
4341 /* Fall through */
4342 case NOT:
4343 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
4345 return false;
4347 case IF_THEN_ELSE:
4348 *total = 0;
4349 return false;
4351 case COMPARE:
4352 if (cc_register (XEXP (x, 0), VOIDmode))
4353 * total = 0;
4354 else
4355 *total = COSTS_N_INSNS (1);
4356 return false;
4358 case ABS:
4359 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT)
4360 *total = COSTS_N_INSNS (1);
4361 else
4362 *total = COSTS_N_INSNS (1 + ARM_NUM_REGS (mode));
4363 return false;
4365 case SIGN_EXTEND:
4366 *total = 0;
4367 if (GET_MODE_SIZE (GET_MODE (XEXP (x, 0))) < 4)
4369 if (!(arm_arch4 && MEM_P (XEXP (x, 0))))
4370 *total += COSTS_N_INSNS (arm_arch6 ? 1 : 2);
4372 if (mode == DImode)
4373 *total += COSTS_N_INSNS (1);
4374 return false;
4376 case ZERO_EXTEND:
4377 *total = 0;
4378 if (!(arm_arch4 && MEM_P (XEXP (x, 0))))
4380 switch (GET_MODE (XEXP (x, 0)))
4382 case QImode:
4383 *total += COSTS_N_INSNS (1);
4384 break;
4386 case HImode:
4387 *total += COSTS_N_INSNS (arm_arch6 ? 1 : 2);
4389 case SImode:
4390 break;
4392 default:
4393 *total += COSTS_N_INSNS (2);
4397 if (mode == DImode)
4398 *total += COSTS_N_INSNS (1);
4400 return false;
4402 case CONST_INT:
4403 if (const_ok_for_arm (INTVAL (x)))
4404 *total = COSTS_N_INSNS (outer_code == SET ? 1 : 0);
4405 else if (const_ok_for_arm (~INTVAL (x)))
4406 *total = COSTS_N_INSNS (outer_code == AND ? 0 : 1);
4407 else if (const_ok_for_arm (-INTVAL (x)))
4409 if (outer_code == COMPARE || outer_code == PLUS
4410 || outer_code == MINUS)
4411 *total = 0;
4412 else
4413 *total = COSTS_N_INSNS (1);
4415 else
4416 *total = COSTS_N_INSNS (2);
4417 return true;
4419 case CONST:
4420 case LABEL_REF:
4421 case SYMBOL_REF:
4422 *total = COSTS_N_INSNS (2);
4423 return true;
4425 case CONST_DOUBLE:
4426 *total = COSTS_N_INSNS (4);
4427 return true;
4429 default:
4430 if (mode != VOIDmode)
4431 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
4432 else
4433 *total = COSTS_N_INSNS (4); /* How knows? */
4434 return false;
4438 /* RTX costs for cores with a slow MUL implementation. */
4440 static bool
4441 arm_slowmul_rtx_costs (rtx x, int code, int outer_code, int *total)
4443 enum machine_mode mode = GET_MODE (x);
4445 if (TARGET_THUMB)
4447 *total = thumb_rtx_costs (x, code, outer_code);
4448 return true;
4451 switch (code)
4453 case MULT:
4454 if (GET_MODE_CLASS (mode) == MODE_FLOAT
4455 || mode == DImode)
4457 *total = 30;
4458 return true;
4461 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
4463 unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
4464 & (unsigned HOST_WIDE_INT) 0xffffffff);
4465 int cost, const_ok = const_ok_for_arm (i);
4466 int j, booth_unit_size;
4468 /* Tune as appropriate. */
4469 cost = const_ok ? 4 : 8;
4470 booth_unit_size = 2;
4471 for (j = 0; i && j < 32; j += booth_unit_size)
4473 i >>= booth_unit_size;
4474 cost += 2;
4477 *total = cost;
4478 return true;
4481 *total = 30 + (REG_OR_SUBREG_REG (XEXP (x, 0)) ? 0 : 4)
4482 + (REG_OR_SUBREG_REG (XEXP (x, 1)) ? 0 : 4);
4483 return true;
4485 default:
4486 *total = arm_rtx_costs_1 (x, code, outer_code);
4487 return true;
4492 /* RTX cost for cores with a fast multiply unit (M variants). */
4494 static bool
4495 arm_fastmul_rtx_costs (rtx x, int code, int outer_code, int *total)
4497 enum machine_mode mode = GET_MODE (x);
4499 if (TARGET_THUMB)
4501 *total = thumb_rtx_costs (x, code, outer_code);
4502 return true;
4505 switch (code)
4507 case MULT:
4508 /* There is no point basing this on the tuning, since it is always the
4509 fast variant if it exists at all. */
4510 if (mode == DImode
4511 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
4512 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
4513 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
4515 *total = 8;
4516 return true;
4520 if (GET_MODE_CLASS (mode) == MODE_FLOAT
4521 || mode == DImode)
4523 *total = 30;
4524 return true;
4527 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
4529 unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
4530 & (unsigned HOST_WIDE_INT) 0xffffffff);
4531 int cost, const_ok = const_ok_for_arm (i);
4532 int j, booth_unit_size;
4534 /* Tune as appropriate. */
4535 cost = const_ok ? 4 : 8;
4536 booth_unit_size = 8;
4537 for (j = 0; i && j < 32; j += booth_unit_size)
4539 i >>= booth_unit_size;
4540 cost += 2;
4543 *total = cost;
4544 return true;
4547 *total = 8 + (REG_OR_SUBREG_REG (XEXP (x, 0)) ? 0 : 4)
4548 + (REG_OR_SUBREG_REG (XEXP (x, 1)) ? 0 : 4);
4549 return true;
4551 default:
4552 *total = arm_rtx_costs_1 (x, code, outer_code);
4553 return true;
4558 /* RTX cost for XScale CPUs. */
4560 static bool
4561 arm_xscale_rtx_costs (rtx x, int code, int outer_code, int *total)
4563 enum machine_mode mode = GET_MODE (x);
4565 if (TARGET_THUMB)
4567 *total = thumb_rtx_costs (x, code, outer_code);
4568 return true;
4571 switch (code)
4573 case MULT:
4574 /* There is no point basing this on the tuning, since it is always the
4575 fast variant if it exists at all. */
4576 if (mode == DImode
4577 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
4578 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
4579 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
4581 *total = 8;
4582 return true;
4586 if (GET_MODE_CLASS (mode) == MODE_FLOAT
4587 || mode == DImode)
4589 *total = 30;
4590 return true;
4593 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
4595 unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
4596 & (unsigned HOST_WIDE_INT) 0xffffffff);
4597 int cost, const_ok = const_ok_for_arm (i);
4598 unsigned HOST_WIDE_INT masked_const;
4600 /* The cost will be related to two insns.
4601 First a load of the constant (MOV or LDR), then a multiply. */
4602 cost = 2;
4603 if (! const_ok)
4604 cost += 1; /* LDR is probably more expensive because
4605 of longer result latency. */
4606 masked_const = i & 0xffff8000;
4607 if (masked_const != 0 && masked_const != 0xffff8000)
4609 masked_const = i & 0xf8000000;
4610 if (masked_const == 0 || masked_const == 0xf8000000)
4611 cost += 1;
4612 else
4613 cost += 2;
4615 *total = cost;
4616 return true;
4619 *total = 8 + (REG_OR_SUBREG_REG (XEXP (x, 0)) ? 0 : 4)
4620 + (REG_OR_SUBREG_REG (XEXP (x, 1)) ? 0 : 4);
4621 return true;
4623 case COMPARE:
4624 /* A COMPARE of a MULT is slow on XScale; the muls instruction
4625 will stall until the multiplication is complete. */
4626 if (GET_CODE (XEXP (x, 0)) == MULT)
4627 *total = 4 + rtx_cost (XEXP (x, 0), code);
4628 else
4629 *total = arm_rtx_costs_1 (x, code, outer_code);
4630 return true;
4632 default:
4633 *total = arm_rtx_costs_1 (x, code, outer_code);
4634 return true;
4639 /* RTX costs for 9e (and later) cores. */
4641 static bool
4642 arm_9e_rtx_costs (rtx x, int code, int outer_code, int *total)
4644 enum machine_mode mode = GET_MODE (x);
4645 int nonreg_cost;
4646 int cost;
4648 if (TARGET_THUMB)
4650 switch (code)
4652 case MULT:
4653 *total = COSTS_N_INSNS (3);
4654 return true;
4656 default:
4657 *total = thumb_rtx_costs (x, code, outer_code);
4658 return true;
4662 switch (code)
4664 case MULT:
4665 /* There is no point basing this on the tuning, since it is always the
4666 fast variant if it exists at all. */
4667 if (mode == DImode
4668 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
4669 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
4670 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
4672 *total = 3;
4673 return true;
4677 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
4679 *total = 30;
4680 return true;
4682 if (mode == DImode)
4684 cost = 7;
4685 nonreg_cost = 8;
4687 else
4689 cost = 2;
4690 nonreg_cost = 4;
4694 *total = cost + (REG_OR_SUBREG_REG (XEXP (x, 0)) ? 0 : nonreg_cost)
4695 + (REG_OR_SUBREG_REG (XEXP (x, 1)) ? 0 : nonreg_cost);
4696 return true;
4698 default:
4699 *total = arm_rtx_costs_1 (x, code, outer_code);
4700 return true;
4703 /* All address computations that can be done are free, but rtx cost returns
4704 the same for practically all of them. So we weight the different types
4705 of address here in the order (most pref first):
4706 PRE/POST_INC/DEC, SHIFT or NON-INT sum, INT sum, REG, MEM or LABEL. */
4707 static inline int
4708 arm_arm_address_cost (rtx x)
4710 enum rtx_code c = GET_CODE (x);
4712 if (c == PRE_INC || c == PRE_DEC || c == POST_INC || c == POST_DEC)
4713 return 0;
4714 if (c == MEM || c == LABEL_REF || c == SYMBOL_REF)
4715 return 10;
4717 if (c == PLUS || c == MINUS)
4719 if (GET_CODE (XEXP (x, 0)) == CONST_INT)
4720 return 2;
4722 if (ARITHMETIC_P (XEXP (x, 0)) || ARITHMETIC_P (XEXP (x, 1)))
4723 return 3;
4725 return 4;
4728 return 6;
4731 static inline int
4732 arm_thumb_address_cost (rtx x)
4734 enum rtx_code c = GET_CODE (x);
4736 if (c == REG)
4737 return 1;
4738 if (c == PLUS
4739 && GET_CODE (XEXP (x, 0)) == REG
4740 && GET_CODE (XEXP (x, 1)) == CONST_INT)
4741 return 1;
4743 return 2;
4746 static int
4747 arm_address_cost (rtx x)
4749 return TARGET_ARM ? arm_arm_address_cost (x) : arm_thumb_address_cost (x);
4752 static int
4753 arm_adjust_cost (rtx insn, rtx link, rtx dep, int cost)
4755 rtx i_pat, d_pat;
4757 /* Some true dependencies can have a higher cost depending
4758 on precisely how certain input operands are used. */
4759 if (arm_tune_xscale
4760 && REG_NOTE_KIND (link) == 0
4761 && recog_memoized (insn) >= 0
4762 && recog_memoized (dep) >= 0)
4764 int shift_opnum = get_attr_shift (insn);
4765 enum attr_type attr_type = get_attr_type (dep);
4767 /* If nonzero, SHIFT_OPNUM contains the operand number of a shifted
4768 operand for INSN. If we have a shifted input operand and the
4769 instruction we depend on is another ALU instruction, then we may
4770 have to account for an additional stall. */
4771 if (shift_opnum != 0
4772 && (attr_type == TYPE_ALU_SHIFT || attr_type == TYPE_ALU_SHIFT_REG))
4774 rtx shifted_operand;
4775 int opno;
4777 /* Get the shifted operand. */
4778 extract_insn (insn);
4779 shifted_operand = recog_data.operand[shift_opnum];
4781 /* Iterate over all the operands in DEP. If we write an operand
4782 that overlaps with SHIFTED_OPERAND, then we have increase the
4783 cost of this dependency. */
4784 extract_insn (dep);
4785 preprocess_constraints ();
4786 for (opno = 0; opno < recog_data.n_operands; opno++)
4788 /* We can ignore strict inputs. */
4789 if (recog_data.operand_type[opno] == OP_IN)
4790 continue;
4792 if (reg_overlap_mentioned_p (recog_data.operand[opno],
4793 shifted_operand))
4794 return 2;
4799 /* XXX This is not strictly true for the FPA. */
4800 if (REG_NOTE_KIND (link) == REG_DEP_ANTI
4801 || REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
4802 return 0;
4804 /* Call insns don't incur a stall, even if they follow a load. */
4805 if (REG_NOTE_KIND (link) == 0
4806 && GET_CODE (insn) == CALL_INSN)
4807 return 1;
4809 if ((i_pat = single_set (insn)) != NULL
4810 && GET_CODE (SET_SRC (i_pat)) == MEM
4811 && (d_pat = single_set (dep)) != NULL
4812 && GET_CODE (SET_DEST (d_pat)) == MEM)
4814 rtx src_mem = XEXP (SET_SRC (i_pat), 0);
4815 /* This is a load after a store, there is no conflict if the load reads
4816 from a cached area. Assume that loads from the stack, and from the
4817 constant pool are cached, and that others will miss. This is a
4818 hack. */
4820 if ((GET_CODE (src_mem) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (src_mem))
4821 || reg_mentioned_p (stack_pointer_rtx, src_mem)
4822 || reg_mentioned_p (frame_pointer_rtx, src_mem)
4823 || reg_mentioned_p (hard_frame_pointer_rtx, src_mem))
4824 return 1;
4827 return cost;
4830 static int fp_consts_inited = 0;
4832 /* Only zero is valid for VFP. Other values are also valid for FPA. */
4833 static const char * const strings_fp[8] =
4835 "0", "1", "2", "3",
4836 "4", "5", "0.5", "10"
4839 static REAL_VALUE_TYPE values_fp[8];
4841 static void
4842 init_fp_table (void)
4844 int i;
4845 REAL_VALUE_TYPE r;
4847 if (TARGET_VFP)
4848 fp_consts_inited = 1;
4849 else
4850 fp_consts_inited = 8;
4852 for (i = 0; i < fp_consts_inited; i++)
4854 r = REAL_VALUE_ATOF (strings_fp[i], DFmode);
4855 values_fp[i] = r;
4859 /* Return TRUE if rtx X is a valid immediate FP constant. */
4861 arm_const_double_rtx (rtx x)
4863 REAL_VALUE_TYPE r;
4864 int i;
4866 if (!fp_consts_inited)
4867 init_fp_table ();
4869 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
4870 if (REAL_VALUE_MINUS_ZERO (r))
4871 return 0;
4873 for (i = 0; i < fp_consts_inited; i++)
4874 if (REAL_VALUES_EQUAL (r, values_fp[i]))
4875 return 1;
4877 return 0;
4880 /* Return TRUE if rtx X is a valid immediate FPA constant. */
4882 neg_const_double_rtx_ok_for_fpa (rtx x)
4884 REAL_VALUE_TYPE r;
4885 int i;
4887 if (!fp_consts_inited)
4888 init_fp_table ();
4890 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
4891 r = REAL_VALUE_NEGATE (r);
4892 if (REAL_VALUE_MINUS_ZERO (r))
4893 return 0;
4895 for (i = 0; i < 8; i++)
4896 if (REAL_VALUES_EQUAL (r, values_fp[i]))
4897 return 1;
4899 return 0;
4902 /* Predicates for `match_operand' and `match_operator'. */
4904 /* Return nonzero if OP is a valid Cirrus memory address pattern. */
4906 cirrus_memory_offset (rtx op)
4908 /* Reject eliminable registers. */
4909 if (! (reload_in_progress || reload_completed)
4910 && ( reg_mentioned_p (frame_pointer_rtx, op)
4911 || reg_mentioned_p (arg_pointer_rtx, op)
4912 || reg_mentioned_p (virtual_incoming_args_rtx, op)
4913 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
4914 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
4915 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
4916 return 0;
4918 if (GET_CODE (op) == MEM)
4920 rtx ind;
4922 ind = XEXP (op, 0);
4924 /* Match: (mem (reg)). */
4925 if (GET_CODE (ind) == REG)
4926 return 1;
4928 /* Match:
4929 (mem (plus (reg)
4930 (const))). */
4931 if (GET_CODE (ind) == PLUS
4932 && GET_CODE (XEXP (ind, 0)) == REG
4933 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
4934 && GET_CODE (XEXP (ind, 1)) == CONST_INT)
4935 return 1;
4938 return 0;
4941 /* Return TRUE if OP is a valid VFP memory address pattern.
4942 WB if true if writeback address modes are allowed. */
4945 arm_coproc_mem_operand (rtx op, bool wb)
4947 rtx ind;
4949 /* Reject eliminable registers. */
4950 if (! (reload_in_progress || reload_completed)
4951 && ( reg_mentioned_p (frame_pointer_rtx, op)
4952 || reg_mentioned_p (arg_pointer_rtx, op)
4953 || reg_mentioned_p (virtual_incoming_args_rtx, op)
4954 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
4955 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
4956 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
4957 return FALSE;
4959 /* Constants are converted into offsets from labels. */
4960 if (GET_CODE (op) != MEM)
4961 return FALSE;
4963 ind = XEXP (op, 0);
4965 if (reload_completed
4966 && (GET_CODE (ind) == LABEL_REF
4967 || (GET_CODE (ind) == CONST
4968 && GET_CODE (XEXP (ind, 0)) == PLUS
4969 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
4970 && GET_CODE (XEXP (XEXP (ind, 0), 1)) == CONST_INT)))
4971 return TRUE;
4973 /* Match: (mem (reg)). */
4974 if (GET_CODE (ind) == REG)
4975 return arm_address_register_rtx_p (ind, 0);
4977 /* Autoincremment addressing modes. */
4978 if (wb
4979 && (GET_CODE (ind) == PRE_INC
4980 || GET_CODE (ind) == POST_INC
4981 || GET_CODE (ind) == PRE_DEC
4982 || GET_CODE (ind) == POST_DEC))
4983 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
4985 if (wb
4986 && (GET_CODE (ind) == POST_MODIFY || GET_CODE (ind) == PRE_MODIFY)
4987 && arm_address_register_rtx_p (XEXP (ind, 0), 0)
4988 && GET_CODE (XEXP (ind, 1)) == PLUS
4989 && rtx_equal_p (XEXP (XEXP (ind, 1), 0), XEXP (ind, 0)))
4990 ind = XEXP (ind, 1);
4992 /* Match:
4993 (plus (reg)
4994 (const)). */
4995 if (GET_CODE (ind) == PLUS
4996 && GET_CODE (XEXP (ind, 0)) == REG
4997 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
4998 && GET_CODE (XEXP (ind, 1)) == CONST_INT
4999 && INTVAL (XEXP (ind, 1)) > -1024
5000 && INTVAL (XEXP (ind, 1)) < 1024
5001 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
5002 return TRUE;
5004 return FALSE;
5007 /* Return true if X is a register that will be eliminated later on. */
5009 arm_eliminable_register (rtx x)
5011 return REG_P (x) && (REGNO (x) == FRAME_POINTER_REGNUM
5012 || REGNO (x) == ARG_POINTER_REGNUM
5013 || (REGNO (x) >= FIRST_VIRTUAL_REGISTER
5014 && REGNO (x) <= LAST_VIRTUAL_REGISTER));
5017 /* Return GENERAL_REGS if a scratch register required to reload x to/from
5018 VFP registers. Otherwise return NO_REGS. */
5020 enum reg_class
5021 vfp_secondary_reload_class (enum machine_mode mode, rtx x)
5023 if (arm_coproc_mem_operand (x, FALSE) || s_register_operand (x, mode))
5024 return NO_REGS;
5026 return GENERAL_REGS;
5029 /* Values which must be returned in the most-significant end of the return
5030 register. */
5032 static bool
5033 arm_return_in_msb (tree valtype)
5035 return (TARGET_AAPCS_BASED
5036 && BYTES_BIG_ENDIAN
5037 && (AGGREGATE_TYPE_P (valtype)
5038 || TREE_CODE (valtype) == COMPLEX_TYPE));
5041 /* Returns TRUE if INSN is an "LDR REG, ADDR" instruction.
5042 Use by the Cirrus Maverick code which has to workaround
5043 a hardware bug triggered by such instructions. */
5044 static bool
5045 arm_memory_load_p (rtx insn)
5047 rtx body, lhs, rhs;;
5049 if (insn == NULL_RTX || GET_CODE (insn) != INSN)
5050 return false;
5052 body = PATTERN (insn);
5054 if (GET_CODE (body) != SET)
5055 return false;
5057 lhs = XEXP (body, 0);
5058 rhs = XEXP (body, 1);
5060 lhs = REG_OR_SUBREG_RTX (lhs);
5062 /* If the destination is not a general purpose
5063 register we do not have to worry. */
5064 if (GET_CODE (lhs) != REG
5065 || REGNO_REG_CLASS (REGNO (lhs)) != GENERAL_REGS)
5066 return false;
5068 /* As well as loads from memory we also have to react
5069 to loads of invalid constants which will be turned
5070 into loads from the minipool. */
5071 return (GET_CODE (rhs) == MEM
5072 || GET_CODE (rhs) == SYMBOL_REF
5073 || note_invalid_constants (insn, -1, false));
5076 /* Return TRUE if INSN is a Cirrus instruction. */
5077 static bool
5078 arm_cirrus_insn_p (rtx insn)
5080 enum attr_cirrus attr;
5082 /* get_attr cannot accept USE or CLOBBER. */
5083 if (!insn
5084 || GET_CODE (insn) != INSN
5085 || GET_CODE (PATTERN (insn)) == USE
5086 || GET_CODE (PATTERN (insn)) == CLOBBER)
5087 return 0;
5089 attr = get_attr_cirrus (insn);
5091 return attr != CIRRUS_NOT;
5094 /* Cirrus reorg for invalid instruction combinations. */
5095 static void
5096 cirrus_reorg (rtx first)
5098 enum attr_cirrus attr;
5099 rtx body = PATTERN (first);
5100 rtx t;
5101 int nops;
5103 /* Any branch must be followed by 2 non Cirrus instructions. */
5104 if (GET_CODE (first) == JUMP_INSN && GET_CODE (body) != RETURN)
5106 nops = 0;
5107 t = next_nonnote_insn (first);
5109 if (arm_cirrus_insn_p (t))
5110 ++ nops;
5112 if (arm_cirrus_insn_p (next_nonnote_insn (t)))
5113 ++ nops;
5115 while (nops --)
5116 emit_insn_after (gen_nop (), first);
5118 return;
5121 /* (float (blah)) is in parallel with a clobber. */
5122 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
5123 body = XVECEXP (body, 0, 0);
5125 if (GET_CODE (body) == SET)
5127 rtx lhs = XEXP (body, 0), rhs = XEXP (body, 1);
5129 /* cfldrd, cfldr64, cfstrd, cfstr64 must
5130 be followed by a non Cirrus insn. */
5131 if (get_attr_cirrus (first) == CIRRUS_DOUBLE)
5133 if (arm_cirrus_insn_p (next_nonnote_insn (first)))
5134 emit_insn_after (gen_nop (), first);
5136 return;
5138 else if (arm_memory_load_p (first))
5140 unsigned int arm_regno;
5142 /* Any ldr/cfmvdlr, ldr/cfmvdhr, ldr/cfmvsr, ldr/cfmv64lr,
5143 ldr/cfmv64hr combination where the Rd field is the same
5144 in both instructions must be split with a non Cirrus
5145 insn. Example:
5147 ldr r0, blah
5149 cfmvsr mvf0, r0. */
5151 /* Get Arm register number for ldr insn. */
5152 if (GET_CODE (lhs) == REG)
5153 arm_regno = REGNO (lhs);
5154 else
5156 gcc_assert (GET_CODE (rhs) == REG);
5157 arm_regno = REGNO (rhs);
5160 /* Next insn. */
5161 first = next_nonnote_insn (first);
5163 if (! arm_cirrus_insn_p (first))
5164 return;
5166 body = PATTERN (first);
5168 /* (float (blah)) is in parallel with a clobber. */
5169 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0))
5170 body = XVECEXP (body, 0, 0);
5172 if (GET_CODE (body) == FLOAT)
5173 body = XEXP (body, 0);
5175 if (get_attr_cirrus (first) == CIRRUS_MOVE
5176 && GET_CODE (XEXP (body, 1)) == REG
5177 && arm_regno == REGNO (XEXP (body, 1)))
5178 emit_insn_after (gen_nop (), first);
5180 return;
5184 /* get_attr cannot accept USE or CLOBBER. */
5185 if (!first
5186 || GET_CODE (first) != INSN
5187 || GET_CODE (PATTERN (first)) == USE
5188 || GET_CODE (PATTERN (first)) == CLOBBER)
5189 return;
5191 attr = get_attr_cirrus (first);
5193 /* Any coprocessor compare instruction (cfcmps, cfcmpd, ...)
5194 must be followed by a non-coprocessor instruction. */
5195 if (attr == CIRRUS_COMPARE)
5197 nops = 0;
5199 t = next_nonnote_insn (first);
5201 if (arm_cirrus_insn_p (t))
5202 ++ nops;
5204 if (arm_cirrus_insn_p (next_nonnote_insn (t)))
5205 ++ nops;
5207 while (nops --)
5208 emit_insn_after (gen_nop (), first);
5210 return;
5214 /* Return TRUE if X references a SYMBOL_REF. */
5216 symbol_mentioned_p (rtx x)
5218 const char * fmt;
5219 int i;
5221 if (GET_CODE (x) == SYMBOL_REF)
5222 return 1;
5224 fmt = GET_RTX_FORMAT (GET_CODE (x));
5226 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
5228 if (fmt[i] == 'E')
5230 int j;
5232 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
5233 if (symbol_mentioned_p (XVECEXP (x, i, j)))
5234 return 1;
5236 else if (fmt[i] == 'e' && symbol_mentioned_p (XEXP (x, i)))
5237 return 1;
5240 return 0;
5243 /* Return TRUE if X references a LABEL_REF. */
5245 label_mentioned_p (rtx x)
5247 const char * fmt;
5248 int i;
5250 if (GET_CODE (x) == LABEL_REF)
5251 return 1;
5253 fmt = GET_RTX_FORMAT (GET_CODE (x));
5254 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
5256 if (fmt[i] == 'E')
5258 int j;
5260 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
5261 if (label_mentioned_p (XVECEXP (x, i, j)))
5262 return 1;
5264 else if (fmt[i] == 'e' && label_mentioned_p (XEXP (x, i)))
5265 return 1;
5268 return 0;
5271 enum rtx_code
5272 minmax_code (rtx x)
5274 enum rtx_code code = GET_CODE (x);
5276 switch (code)
5278 case SMAX:
5279 return GE;
5280 case SMIN:
5281 return LE;
5282 case UMIN:
5283 return LEU;
5284 case UMAX:
5285 return GEU;
5286 default:
5287 gcc_unreachable ();
5291 /* Return 1 if memory locations are adjacent. */
5293 adjacent_mem_locations (rtx a, rtx b)
5295 /* We don't guarantee to preserve the order of these memory refs. */
5296 if (volatile_refs_p (a) || volatile_refs_p (b))
5297 return 0;
5299 if ((GET_CODE (XEXP (a, 0)) == REG
5300 || (GET_CODE (XEXP (a, 0)) == PLUS
5301 && GET_CODE (XEXP (XEXP (a, 0), 1)) == CONST_INT))
5302 && (GET_CODE (XEXP (b, 0)) == REG
5303 || (GET_CODE (XEXP (b, 0)) == PLUS
5304 && GET_CODE (XEXP (XEXP (b, 0), 1)) == CONST_INT)))
5306 HOST_WIDE_INT val0 = 0, val1 = 0;
5307 rtx reg0, reg1;
5308 int val_diff;
5310 if (GET_CODE (XEXP (a, 0)) == PLUS)
5312 reg0 = XEXP (XEXP (a, 0), 0);
5313 val0 = INTVAL (XEXP (XEXP (a, 0), 1));
5315 else
5316 reg0 = XEXP (a, 0);
5318 if (GET_CODE (XEXP (b, 0)) == PLUS)
5320 reg1 = XEXP (XEXP (b, 0), 0);
5321 val1 = INTVAL (XEXP (XEXP (b, 0), 1));
5323 else
5324 reg1 = XEXP (b, 0);
5326 /* Don't accept any offset that will require multiple
5327 instructions to handle, since this would cause the
5328 arith_adjacentmem pattern to output an overlong sequence. */
5329 if (!const_ok_for_op (PLUS, val0) || !const_ok_for_op (PLUS, val1))
5330 return 0;
5332 /* Don't allow an eliminable register: register elimination can make
5333 the offset too large. */
5334 if (arm_eliminable_register (reg0))
5335 return 0;
5337 val_diff = val1 - val0;
5339 if (arm_ld_sched)
5341 /* If the target has load delay slots, then there's no benefit
5342 to using an ldm instruction unless the offset is zero and
5343 we are optimizing for size. */
5344 return (optimize_size && (REGNO (reg0) == REGNO (reg1))
5345 && (val0 == 0 || val1 == 0 || val0 == 4 || val1 == 4)
5346 && (val_diff == 4 || val_diff == -4));
5349 return ((REGNO (reg0) == REGNO (reg1))
5350 && (val_diff == 4 || val_diff == -4));
5353 return 0;
5357 load_multiple_sequence (rtx *operands, int nops, int *regs, int *base,
5358 HOST_WIDE_INT *load_offset)
5360 int unsorted_regs[4];
5361 HOST_WIDE_INT unsorted_offsets[4];
5362 int order[4];
5363 int base_reg = -1;
5364 int i;
5366 /* Can only handle 2, 3, or 4 insns at present,
5367 though could be easily extended if required. */
5368 gcc_assert (nops >= 2 && nops <= 4);
5370 /* Loop over the operands and check that the memory references are
5371 suitable (i.e. immediate offsets from the same base register). At
5372 the same time, extract the target register, and the memory
5373 offsets. */
5374 for (i = 0; i < nops; i++)
5376 rtx reg;
5377 rtx offset;
5379 /* Convert a subreg of a mem into the mem itself. */
5380 if (GET_CODE (operands[nops + i]) == SUBREG)
5381 operands[nops + i] = alter_subreg (operands + (nops + i));
5383 gcc_assert (GET_CODE (operands[nops + i]) == MEM);
5385 /* Don't reorder volatile memory references; it doesn't seem worth
5386 looking for the case where the order is ok anyway. */
5387 if (MEM_VOLATILE_P (operands[nops + i]))
5388 return 0;
5390 offset = const0_rtx;
5392 if ((GET_CODE (reg = XEXP (operands[nops + i], 0)) == REG
5393 || (GET_CODE (reg) == SUBREG
5394 && GET_CODE (reg = SUBREG_REG (reg)) == REG))
5395 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
5396 && ((GET_CODE (reg = XEXP (XEXP (operands[nops + i], 0), 0))
5397 == REG)
5398 || (GET_CODE (reg) == SUBREG
5399 && GET_CODE (reg = SUBREG_REG (reg)) == REG))
5400 && (GET_CODE (offset = XEXP (XEXP (operands[nops + i], 0), 1))
5401 == CONST_INT)))
5403 if (i == 0)
5405 base_reg = REGNO (reg);
5406 unsorted_regs[0] = (GET_CODE (operands[i]) == REG
5407 ? REGNO (operands[i])
5408 : REGNO (SUBREG_REG (operands[i])));
5409 order[0] = 0;
5411 else
5413 if (base_reg != (int) REGNO (reg))
5414 /* Not addressed from the same base register. */
5415 return 0;
5417 unsorted_regs[i] = (GET_CODE (operands[i]) == REG
5418 ? REGNO (operands[i])
5419 : REGNO (SUBREG_REG (operands[i])));
5420 if (unsorted_regs[i] < unsorted_regs[order[0]])
5421 order[0] = i;
5424 /* If it isn't an integer register, or if it overwrites the
5425 base register but isn't the last insn in the list, then
5426 we can't do this. */
5427 if (unsorted_regs[i] < 0 || unsorted_regs[i] > 14
5428 || (i != nops - 1 && unsorted_regs[i] == base_reg))
5429 return 0;
5431 unsorted_offsets[i] = INTVAL (offset);
5433 else
5434 /* Not a suitable memory address. */
5435 return 0;
5438 /* All the useful information has now been extracted from the
5439 operands into unsorted_regs and unsorted_offsets; additionally,
5440 order[0] has been set to the lowest numbered register in the
5441 list. Sort the registers into order, and check that the memory
5442 offsets are ascending and adjacent. */
5444 for (i = 1; i < nops; i++)
5446 int j;
5448 order[i] = order[i - 1];
5449 for (j = 0; j < nops; j++)
5450 if (unsorted_regs[j] > unsorted_regs[order[i - 1]]
5451 && (order[i] == order[i - 1]
5452 || unsorted_regs[j] < unsorted_regs[order[i]]))
5453 order[i] = j;
5455 /* Have we found a suitable register? if not, one must be used more
5456 than once. */
5457 if (order[i] == order[i - 1])
5458 return 0;
5460 /* Is the memory address adjacent and ascending? */
5461 if (unsorted_offsets[order[i]] != unsorted_offsets[order[i - 1]] + 4)
5462 return 0;
5465 if (base)
5467 *base = base_reg;
5469 for (i = 0; i < nops; i++)
5470 regs[i] = unsorted_regs[order[i]];
5472 *load_offset = unsorted_offsets[order[0]];
5475 if (unsorted_offsets[order[0]] == 0)
5476 return 1; /* ldmia */
5478 if (unsorted_offsets[order[0]] == 4)
5479 return 2; /* ldmib */
5481 if (unsorted_offsets[order[nops - 1]] == 0)
5482 return 3; /* ldmda */
5484 if (unsorted_offsets[order[nops - 1]] == -4)
5485 return 4; /* ldmdb */
5487 /* For ARM8,9 & StrongARM, 2 ldr instructions are faster than an ldm
5488 if the offset isn't small enough. The reason 2 ldrs are faster
5489 is because these ARMs are able to do more than one cache access
5490 in a single cycle. The ARM9 and StrongARM have Harvard caches,
5491 whilst the ARM8 has a double bandwidth cache. This means that
5492 these cores can do both an instruction fetch and a data fetch in
5493 a single cycle, so the trick of calculating the address into a
5494 scratch register (one of the result regs) and then doing a load
5495 multiple actually becomes slower (and no smaller in code size).
5496 That is the transformation
5498 ldr rd1, [rbase + offset]
5499 ldr rd2, [rbase + offset + 4]
5503 add rd1, rbase, offset
5504 ldmia rd1, {rd1, rd2}
5506 produces worse code -- '3 cycles + any stalls on rd2' instead of
5507 '2 cycles + any stalls on rd2'. On ARMs with only one cache
5508 access per cycle, the first sequence could never complete in less
5509 than 6 cycles, whereas the ldm sequence would only take 5 and
5510 would make better use of sequential accesses if not hitting the
5511 cache.
5513 We cheat here and test 'arm_ld_sched' which we currently know to
5514 only be true for the ARM8, ARM9 and StrongARM. If this ever
5515 changes, then the test below needs to be reworked. */
5516 if (nops == 2 && arm_ld_sched)
5517 return 0;
5519 /* Can't do it without setting up the offset, only do this if it takes
5520 no more than one insn. */
5521 return (const_ok_for_arm (unsorted_offsets[order[0]])
5522 || const_ok_for_arm (-unsorted_offsets[order[0]])) ? 5 : 0;
5525 const char *
5526 emit_ldm_seq (rtx *operands, int nops)
5528 int regs[4];
5529 int base_reg;
5530 HOST_WIDE_INT offset;
5531 char buf[100];
5532 int i;
5534 switch (load_multiple_sequence (operands, nops, regs, &base_reg, &offset))
5536 case 1:
5537 strcpy (buf, "ldm%?ia\t");
5538 break;
5540 case 2:
5541 strcpy (buf, "ldm%?ib\t");
5542 break;
5544 case 3:
5545 strcpy (buf, "ldm%?da\t");
5546 break;
5548 case 4:
5549 strcpy (buf, "ldm%?db\t");
5550 break;
5552 case 5:
5553 if (offset >= 0)
5554 sprintf (buf, "add%%?\t%s%s, %s%s, #%ld", REGISTER_PREFIX,
5555 reg_names[regs[0]], REGISTER_PREFIX, reg_names[base_reg],
5556 (long) offset);
5557 else
5558 sprintf (buf, "sub%%?\t%s%s, %s%s, #%ld", REGISTER_PREFIX,
5559 reg_names[regs[0]], REGISTER_PREFIX, reg_names[base_reg],
5560 (long) -offset);
5561 output_asm_insn (buf, operands);
5562 base_reg = regs[0];
5563 strcpy (buf, "ldm%?ia\t");
5564 break;
5566 default:
5567 gcc_unreachable ();
5570 sprintf (buf + strlen (buf), "%s%s, {%s%s", REGISTER_PREFIX,
5571 reg_names[base_reg], REGISTER_PREFIX, reg_names[regs[0]]);
5573 for (i = 1; i < nops; i++)
5574 sprintf (buf + strlen (buf), ", %s%s", REGISTER_PREFIX,
5575 reg_names[regs[i]]);
5577 strcat (buf, "}\t%@ phole ldm");
5579 output_asm_insn (buf, operands);
5580 return "";
5584 store_multiple_sequence (rtx *operands, int nops, int *regs, int *base,
5585 HOST_WIDE_INT * load_offset)
5587 int unsorted_regs[4];
5588 HOST_WIDE_INT unsorted_offsets[4];
5589 int order[4];
5590 int base_reg = -1;
5591 int i;
5593 /* Can only handle 2, 3, or 4 insns at present, though could be easily
5594 extended if required. */
5595 gcc_assert (nops >= 2 && nops <= 4);
5597 /* Loop over the operands and check that the memory references are
5598 suitable (i.e. immediate offsets from the same base register). At
5599 the same time, extract the target register, and the memory
5600 offsets. */
5601 for (i = 0; i < nops; i++)
5603 rtx reg;
5604 rtx offset;
5606 /* Convert a subreg of a mem into the mem itself. */
5607 if (GET_CODE (operands[nops + i]) == SUBREG)
5608 operands[nops + i] = alter_subreg (operands + (nops + i));
5610 gcc_assert (GET_CODE (operands[nops + i]) == MEM);
5612 /* Don't reorder volatile memory references; it doesn't seem worth
5613 looking for the case where the order is ok anyway. */
5614 if (MEM_VOLATILE_P (operands[nops + i]))
5615 return 0;
5617 offset = const0_rtx;
5619 if ((GET_CODE (reg = XEXP (operands[nops + i], 0)) == REG
5620 || (GET_CODE (reg) == SUBREG
5621 && GET_CODE (reg = SUBREG_REG (reg)) == REG))
5622 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
5623 && ((GET_CODE (reg = XEXP (XEXP (operands[nops + i], 0), 0))
5624 == REG)
5625 || (GET_CODE (reg) == SUBREG
5626 && GET_CODE (reg = SUBREG_REG (reg)) == REG))
5627 && (GET_CODE (offset = XEXP (XEXP (operands[nops + i], 0), 1))
5628 == CONST_INT)))
5630 if (i == 0)
5632 base_reg = REGNO (reg);
5633 unsorted_regs[0] = (GET_CODE (operands[i]) == REG
5634 ? REGNO (operands[i])
5635 : REGNO (SUBREG_REG (operands[i])));
5636 order[0] = 0;
5638 else
5640 if (base_reg != (int) REGNO (reg))
5641 /* Not addressed from the same base register. */
5642 return 0;
5644 unsorted_regs[i] = (GET_CODE (operands[i]) == REG
5645 ? REGNO (operands[i])
5646 : REGNO (SUBREG_REG (operands[i])));
5647 if (unsorted_regs[i] < unsorted_regs[order[0]])
5648 order[0] = i;
5651 /* If it isn't an integer register, then we can't do this. */
5652 if (unsorted_regs[i] < 0 || unsorted_regs[i] > 14)
5653 return 0;
5655 unsorted_offsets[i] = INTVAL (offset);
5657 else
5658 /* Not a suitable memory address. */
5659 return 0;
5662 /* All the useful information has now been extracted from the
5663 operands into unsorted_regs and unsorted_offsets; additionally,
5664 order[0] has been set to the lowest numbered register in the
5665 list. Sort the registers into order, and check that the memory
5666 offsets are ascending and adjacent. */
5668 for (i = 1; i < nops; i++)
5670 int j;
5672 order[i] = order[i - 1];
5673 for (j = 0; j < nops; j++)
5674 if (unsorted_regs[j] > unsorted_regs[order[i - 1]]
5675 && (order[i] == order[i - 1]
5676 || unsorted_regs[j] < unsorted_regs[order[i]]))
5677 order[i] = j;
5679 /* Have we found a suitable register? if not, one must be used more
5680 than once. */
5681 if (order[i] == order[i - 1])
5682 return 0;
5684 /* Is the memory address adjacent and ascending? */
5685 if (unsorted_offsets[order[i]] != unsorted_offsets[order[i - 1]] + 4)
5686 return 0;
5689 if (base)
5691 *base = base_reg;
5693 for (i = 0; i < nops; i++)
5694 regs[i] = unsorted_regs[order[i]];
5696 *load_offset = unsorted_offsets[order[0]];
5699 if (unsorted_offsets[order[0]] == 0)
5700 return 1; /* stmia */
5702 if (unsorted_offsets[order[0]] == 4)
5703 return 2; /* stmib */
5705 if (unsorted_offsets[order[nops - 1]] == 0)
5706 return 3; /* stmda */
5708 if (unsorted_offsets[order[nops - 1]] == -4)
5709 return 4; /* stmdb */
5711 return 0;
5714 const char *
5715 emit_stm_seq (rtx *operands, int nops)
5717 int regs[4];
5718 int base_reg;
5719 HOST_WIDE_INT offset;
5720 char buf[100];
5721 int i;
5723 switch (store_multiple_sequence (operands, nops, regs, &base_reg, &offset))
5725 case 1:
5726 strcpy (buf, "stm%?ia\t");
5727 break;
5729 case 2:
5730 strcpy (buf, "stm%?ib\t");
5731 break;
5733 case 3:
5734 strcpy (buf, "stm%?da\t");
5735 break;
5737 case 4:
5738 strcpy (buf, "stm%?db\t");
5739 break;
5741 default:
5742 gcc_unreachable ();
5745 sprintf (buf + strlen (buf), "%s%s, {%s%s", REGISTER_PREFIX,
5746 reg_names[base_reg], REGISTER_PREFIX, reg_names[regs[0]]);
5748 for (i = 1; i < nops; i++)
5749 sprintf (buf + strlen (buf), ", %s%s", REGISTER_PREFIX,
5750 reg_names[regs[i]]);
5752 strcat (buf, "}\t%@ phole stm");
5754 output_asm_insn (buf, operands);
5755 return "";
5759 /* Routines for use in generating RTL. */
5762 arm_gen_load_multiple (int base_regno, int count, rtx from, int up,
5763 int write_back, rtx basemem, HOST_WIDE_INT *offsetp)
5765 HOST_WIDE_INT offset = *offsetp;
5766 int i = 0, j;
5767 rtx result;
5768 int sign = up ? 1 : -1;
5769 rtx mem, addr;
5771 /* XScale has load-store double instructions, but they have stricter
5772 alignment requirements than load-store multiple, so we cannot
5773 use them.
5775 For XScale ldm requires 2 + NREGS cycles to complete and blocks
5776 the pipeline until completion.
5778 NREGS CYCLES
5784 An ldr instruction takes 1-3 cycles, but does not block the
5785 pipeline.
5787 NREGS CYCLES
5788 1 1-3
5789 2 2-6
5790 3 3-9
5791 4 4-12
5793 Best case ldr will always win. However, the more ldr instructions
5794 we issue, the less likely we are to be able to schedule them well.
5795 Using ldr instructions also increases code size.
5797 As a compromise, we use ldr for counts of 1 or 2 regs, and ldm
5798 for counts of 3 or 4 regs. */
5799 if (arm_tune_xscale && count <= 2 && ! optimize_size)
5801 rtx seq;
5803 start_sequence ();
5805 for (i = 0; i < count; i++)
5807 addr = plus_constant (from, i * 4 * sign);
5808 mem = adjust_automodify_address (basemem, SImode, addr, offset);
5809 emit_move_insn (gen_rtx_REG (SImode, base_regno + i), mem);
5810 offset += 4 * sign;
5813 if (write_back)
5815 emit_move_insn (from, plus_constant (from, count * 4 * sign));
5816 *offsetp = offset;
5819 seq = get_insns ();
5820 end_sequence ();
5822 return seq;
5825 result = gen_rtx_PARALLEL (VOIDmode,
5826 rtvec_alloc (count + (write_back ? 1 : 0)));
5827 if (write_back)
5829 XVECEXP (result, 0, 0)
5830 = gen_rtx_SET (GET_MODE (from), from,
5831 plus_constant (from, count * 4 * sign));
5832 i = 1;
5833 count++;
5836 for (j = 0; i < count; i++, j++)
5838 addr = plus_constant (from, j * 4 * sign);
5839 mem = adjust_automodify_address_nv (basemem, SImode, addr, offset);
5840 XVECEXP (result, 0, i)
5841 = gen_rtx_SET (VOIDmode, gen_rtx_REG (SImode, base_regno + j), mem);
5842 offset += 4 * sign;
5845 if (write_back)
5846 *offsetp = offset;
5848 return result;
5852 arm_gen_store_multiple (int base_regno, int count, rtx to, int up,
5853 int write_back, rtx basemem, HOST_WIDE_INT *offsetp)
5855 HOST_WIDE_INT offset = *offsetp;
5856 int i = 0, j;
5857 rtx result;
5858 int sign = up ? 1 : -1;
5859 rtx mem, addr;
5861 /* See arm_gen_load_multiple for discussion of
5862 the pros/cons of ldm/stm usage for XScale. */
5863 if (arm_tune_xscale && count <= 2 && ! optimize_size)
5865 rtx seq;
5867 start_sequence ();
5869 for (i = 0; i < count; i++)
5871 addr = plus_constant (to, i * 4 * sign);
5872 mem = adjust_automodify_address (basemem, SImode, addr, offset);
5873 emit_move_insn (mem, gen_rtx_REG (SImode, base_regno + i));
5874 offset += 4 * sign;
5877 if (write_back)
5879 emit_move_insn (to, plus_constant (to, count * 4 * sign));
5880 *offsetp = offset;
5883 seq = get_insns ();
5884 end_sequence ();
5886 return seq;
5889 result = gen_rtx_PARALLEL (VOIDmode,
5890 rtvec_alloc (count + (write_back ? 1 : 0)));
5891 if (write_back)
5893 XVECEXP (result, 0, 0)
5894 = gen_rtx_SET (GET_MODE (to), to,
5895 plus_constant (to, count * 4 * sign));
5896 i = 1;
5897 count++;
5900 for (j = 0; i < count; i++, j++)
5902 addr = plus_constant (to, j * 4 * sign);
5903 mem = adjust_automodify_address_nv (basemem, SImode, addr, offset);
5904 XVECEXP (result, 0, i)
5905 = gen_rtx_SET (VOIDmode, mem, gen_rtx_REG (SImode, base_regno + j));
5906 offset += 4 * sign;
5909 if (write_back)
5910 *offsetp = offset;
5912 return result;
5916 arm_gen_movmemqi (rtx *operands)
5918 HOST_WIDE_INT in_words_to_go, out_words_to_go, last_bytes;
5919 HOST_WIDE_INT srcoffset, dstoffset;
5920 int i;
5921 rtx src, dst, srcbase, dstbase;
5922 rtx part_bytes_reg = NULL;
5923 rtx mem;
5925 if (GET_CODE (operands[2]) != CONST_INT
5926 || GET_CODE (operands[3]) != CONST_INT
5927 || INTVAL (operands[2]) > 64
5928 || INTVAL (operands[3]) & 3)
5929 return 0;
5931 dstbase = operands[0];
5932 srcbase = operands[1];
5934 dst = copy_to_mode_reg (SImode, XEXP (dstbase, 0));
5935 src = copy_to_mode_reg (SImode, XEXP (srcbase, 0));
5937 in_words_to_go = ARM_NUM_INTS (INTVAL (operands[2]));
5938 out_words_to_go = INTVAL (operands[2]) / 4;
5939 last_bytes = INTVAL (operands[2]) & 3;
5940 dstoffset = srcoffset = 0;
5942 if (out_words_to_go != in_words_to_go && ((in_words_to_go - 1) & 3) != 0)
5943 part_bytes_reg = gen_rtx_REG (SImode, (in_words_to_go - 1) & 3);
5945 for (i = 0; in_words_to_go >= 2; i+=4)
5947 if (in_words_to_go > 4)
5948 emit_insn (arm_gen_load_multiple (0, 4, src, TRUE, TRUE,
5949 srcbase, &srcoffset));
5950 else
5951 emit_insn (arm_gen_load_multiple (0, in_words_to_go, src, TRUE,
5952 FALSE, srcbase, &srcoffset));
5954 if (out_words_to_go)
5956 if (out_words_to_go > 4)
5957 emit_insn (arm_gen_store_multiple (0, 4, dst, TRUE, TRUE,
5958 dstbase, &dstoffset));
5959 else if (out_words_to_go != 1)
5960 emit_insn (arm_gen_store_multiple (0, out_words_to_go,
5961 dst, TRUE,
5962 (last_bytes == 0
5963 ? FALSE : TRUE),
5964 dstbase, &dstoffset));
5965 else
5967 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
5968 emit_move_insn (mem, gen_rtx_REG (SImode, 0));
5969 if (last_bytes != 0)
5971 emit_insn (gen_addsi3 (dst, dst, GEN_INT (4)));
5972 dstoffset += 4;
5977 in_words_to_go -= in_words_to_go < 4 ? in_words_to_go : 4;
5978 out_words_to_go -= out_words_to_go < 4 ? out_words_to_go : 4;
5981 /* OUT_WORDS_TO_GO will be zero here if there are byte stores to do. */
5982 if (out_words_to_go)
5984 rtx sreg;
5986 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
5987 sreg = copy_to_reg (mem);
5989 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
5990 emit_move_insn (mem, sreg);
5991 in_words_to_go--;
5993 gcc_assert (!in_words_to_go); /* Sanity check */
5996 if (in_words_to_go)
5998 gcc_assert (in_words_to_go > 0);
6000 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
6001 part_bytes_reg = copy_to_mode_reg (SImode, mem);
6004 gcc_assert (!last_bytes || part_bytes_reg);
6006 if (BYTES_BIG_ENDIAN && last_bytes)
6008 rtx tmp = gen_reg_rtx (SImode);
6010 /* The bytes we want are in the top end of the word. */
6011 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg,
6012 GEN_INT (8 * (4 - last_bytes))));
6013 part_bytes_reg = tmp;
6015 while (last_bytes)
6017 mem = adjust_automodify_address (dstbase, QImode,
6018 plus_constant (dst, last_bytes - 1),
6019 dstoffset + last_bytes - 1);
6020 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
6022 if (--last_bytes)
6024 tmp = gen_reg_rtx (SImode);
6025 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (8)));
6026 part_bytes_reg = tmp;
6031 else
6033 if (last_bytes > 1)
6035 mem = adjust_automodify_address (dstbase, HImode, dst, dstoffset);
6036 emit_move_insn (mem, gen_lowpart (HImode, part_bytes_reg));
6037 last_bytes -= 2;
6038 if (last_bytes)
6040 rtx tmp = gen_reg_rtx (SImode);
6041 emit_insn (gen_addsi3 (dst, dst, const2_rtx));
6042 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (16)));
6043 part_bytes_reg = tmp;
6044 dstoffset += 2;
6048 if (last_bytes)
6050 mem = adjust_automodify_address (dstbase, QImode, dst, dstoffset);
6051 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
6055 return 1;
6058 /* Generate a memory reference for a half word, such that it will be loaded
6059 into the top 16 bits of the word. We can assume that the address is
6060 known to be alignable and of the form reg, or plus (reg, const). */
6063 arm_gen_rotated_half_load (rtx memref)
6065 HOST_WIDE_INT offset = 0;
6066 rtx base = XEXP (memref, 0);
6068 if (GET_CODE (base) == PLUS)
6070 offset = INTVAL (XEXP (base, 1));
6071 base = XEXP (base, 0);
6074 /* If we aren't allowed to generate unaligned addresses, then fail. */
6075 if ((BYTES_BIG_ENDIAN ? 1 : 0) ^ ((offset & 2) == 0))
6076 return NULL;
6078 base = gen_rtx_MEM (SImode, plus_constant (base, offset & ~2));
6080 if ((BYTES_BIG_ENDIAN ? 1 : 0) ^ ((offset & 2) == 2))
6081 return base;
6083 return gen_rtx_ROTATE (SImode, base, GEN_INT (16));
6086 /* Select a dominance comparison mode if possible for a test of the general
6087 form (OP (COND_OR (X) (Y)) (const_int 0)). We support three forms.
6088 COND_OR == DOM_CC_X_AND_Y => (X && Y)
6089 COND_OR == DOM_CC_NX_OR_Y => ((! X) || Y)
6090 COND_OR == DOM_CC_X_OR_Y => (X || Y)
6091 In all cases OP will be either EQ or NE, but we don't need to know which
6092 here. If we are unable to support a dominance comparison we return
6093 CC mode. This will then fail to match for the RTL expressions that
6094 generate this call. */
6095 enum machine_mode
6096 arm_select_dominance_cc_mode (rtx x, rtx y, HOST_WIDE_INT cond_or)
6098 enum rtx_code cond1, cond2;
6099 int swapped = 0;
6101 /* Currently we will probably get the wrong result if the individual
6102 comparisons are not simple. This also ensures that it is safe to
6103 reverse a comparison if necessary. */
6104 if ((arm_select_cc_mode (cond1 = GET_CODE (x), XEXP (x, 0), XEXP (x, 1))
6105 != CCmode)
6106 || (arm_select_cc_mode (cond2 = GET_CODE (y), XEXP (y, 0), XEXP (y, 1))
6107 != CCmode))
6108 return CCmode;
6110 /* The if_then_else variant of this tests the second condition if the
6111 first passes, but is true if the first fails. Reverse the first
6112 condition to get a true "inclusive-or" expression. */
6113 if (cond_or == DOM_CC_NX_OR_Y)
6114 cond1 = reverse_condition (cond1);
6116 /* If the comparisons are not equal, and one doesn't dominate the other,
6117 then we can't do this. */
6118 if (cond1 != cond2
6119 && !comparison_dominates_p (cond1, cond2)
6120 && (swapped = 1, !comparison_dominates_p (cond2, cond1)))
6121 return CCmode;
6123 if (swapped)
6125 enum rtx_code temp = cond1;
6126 cond1 = cond2;
6127 cond2 = temp;
6130 switch (cond1)
6132 case EQ:
6133 if (cond_or == DOM_CC_X_AND_Y)
6134 return CC_DEQmode;
6136 switch (cond2)
6138 case EQ: return CC_DEQmode;
6139 case LE: return CC_DLEmode;
6140 case LEU: return CC_DLEUmode;
6141 case GE: return CC_DGEmode;
6142 case GEU: return CC_DGEUmode;
6143 default: gcc_unreachable ();
6146 case LT:
6147 if (cond_or == DOM_CC_X_AND_Y)
6148 return CC_DLTmode;
6150 switch (cond2)
6152 case LT:
6153 return CC_DLTmode;
6154 case LE:
6155 return CC_DLEmode;
6156 case NE:
6157 return CC_DNEmode;
6158 default:
6159 gcc_unreachable ();
6162 case GT:
6163 if (cond_or == DOM_CC_X_AND_Y)
6164 return CC_DGTmode;
6166 switch (cond2)
6168 case GT:
6169 return CC_DGTmode;
6170 case GE:
6171 return CC_DGEmode;
6172 case NE:
6173 return CC_DNEmode;
6174 default:
6175 gcc_unreachable ();
6178 case LTU:
6179 if (cond_or == DOM_CC_X_AND_Y)
6180 return CC_DLTUmode;
6182 switch (cond2)
6184 case LTU:
6185 return CC_DLTUmode;
6186 case LEU:
6187 return CC_DLEUmode;
6188 case NE:
6189 return CC_DNEmode;
6190 default:
6191 gcc_unreachable ();
6194 case GTU:
6195 if (cond_or == DOM_CC_X_AND_Y)
6196 return CC_DGTUmode;
6198 switch (cond2)
6200 case GTU:
6201 return CC_DGTUmode;
6202 case GEU:
6203 return CC_DGEUmode;
6204 case NE:
6205 return CC_DNEmode;
6206 default:
6207 gcc_unreachable ();
6210 /* The remaining cases only occur when both comparisons are the
6211 same. */
6212 case NE:
6213 gcc_assert (cond1 == cond2);
6214 return CC_DNEmode;
6216 case LE:
6217 gcc_assert (cond1 == cond2);
6218 return CC_DLEmode;
6220 case GE:
6221 gcc_assert (cond1 == cond2);
6222 return CC_DGEmode;
6224 case LEU:
6225 gcc_assert (cond1 == cond2);
6226 return CC_DLEUmode;
6228 case GEU:
6229 gcc_assert (cond1 == cond2);
6230 return CC_DGEUmode;
6232 default:
6233 gcc_unreachable ();
6237 enum machine_mode
6238 arm_select_cc_mode (enum rtx_code op, rtx x, rtx y)
6240 /* All floating point compares return CCFP if it is an equality
6241 comparison, and CCFPE otherwise. */
6242 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
6244 switch (op)
6246 case EQ:
6247 case NE:
6248 case UNORDERED:
6249 case ORDERED:
6250 case UNLT:
6251 case UNLE:
6252 case UNGT:
6253 case UNGE:
6254 case UNEQ:
6255 case LTGT:
6256 return CCFPmode;
6258 case LT:
6259 case LE:
6260 case GT:
6261 case GE:
6262 if (TARGET_HARD_FLOAT && TARGET_MAVERICK)
6263 return CCFPmode;
6264 return CCFPEmode;
6266 default:
6267 gcc_unreachable ();
6271 /* A compare with a shifted operand. Because of canonicalization, the
6272 comparison will have to be swapped when we emit the assembler. */
6273 if (GET_MODE (y) == SImode && GET_CODE (y) == REG
6274 && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
6275 || GET_CODE (x) == LSHIFTRT || GET_CODE (x) == ROTATE
6276 || GET_CODE (x) == ROTATERT))
6277 return CC_SWPmode;
6279 /* This operation is performed swapped, but since we only rely on the Z
6280 flag we don't need an additional mode. */
6281 if (GET_MODE (y) == SImode && REG_P (y)
6282 && GET_CODE (x) == NEG
6283 && (op == EQ || op == NE))
6284 return CC_Zmode;
6286 /* This is a special case that is used by combine to allow a
6287 comparison of a shifted byte load to be split into a zero-extend
6288 followed by a comparison of the shifted integer (only valid for
6289 equalities and unsigned inequalities). */
6290 if (GET_MODE (x) == SImode
6291 && GET_CODE (x) == ASHIFT
6292 && GET_CODE (XEXP (x, 1)) == CONST_INT && INTVAL (XEXP (x, 1)) == 24
6293 && GET_CODE (XEXP (x, 0)) == SUBREG
6294 && GET_CODE (SUBREG_REG (XEXP (x, 0))) == MEM
6295 && GET_MODE (SUBREG_REG (XEXP (x, 0))) == QImode
6296 && (op == EQ || op == NE
6297 || op == GEU || op == GTU || op == LTU || op == LEU)
6298 && GET_CODE (y) == CONST_INT)
6299 return CC_Zmode;
6301 /* A construct for a conditional compare, if the false arm contains
6302 0, then both conditions must be true, otherwise either condition
6303 must be true. Not all conditions are possible, so CCmode is
6304 returned if it can't be done. */
6305 if (GET_CODE (x) == IF_THEN_ELSE
6306 && (XEXP (x, 2) == const0_rtx
6307 || XEXP (x, 2) == const1_rtx)
6308 && COMPARISON_P (XEXP (x, 0))
6309 && COMPARISON_P (XEXP (x, 1)))
6310 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
6311 INTVAL (XEXP (x, 2)));
6313 /* Alternate canonicalizations of the above. These are somewhat cleaner. */
6314 if (GET_CODE (x) == AND
6315 && COMPARISON_P (XEXP (x, 0))
6316 && COMPARISON_P (XEXP (x, 1)))
6317 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
6318 DOM_CC_X_AND_Y);
6320 if (GET_CODE (x) == IOR
6321 && COMPARISON_P (XEXP (x, 0))
6322 && COMPARISON_P (XEXP (x, 1)))
6323 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
6324 DOM_CC_X_OR_Y);
6326 /* An operation (on Thumb) where we want to test for a single bit.
6327 This is done by shifting that bit up into the top bit of a
6328 scratch register; we can then branch on the sign bit. */
6329 if (TARGET_THUMB
6330 && GET_MODE (x) == SImode
6331 && (op == EQ || op == NE)
6332 && (GET_CODE (x) == ZERO_EXTRACT))
6333 return CC_Nmode;
6335 /* An operation that sets the condition codes as a side-effect, the
6336 V flag is not set correctly, so we can only use comparisons where
6337 this doesn't matter. (For LT and GE we can use "mi" and "pl"
6338 instead.) */
6339 if (GET_MODE (x) == SImode
6340 && y == const0_rtx
6341 && (op == EQ || op == NE || op == LT || op == GE)
6342 && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS
6343 || GET_CODE (x) == AND || GET_CODE (x) == IOR
6344 || GET_CODE (x) == XOR || GET_CODE (x) == MULT
6345 || GET_CODE (x) == NOT || GET_CODE (x) == NEG
6346 || GET_CODE (x) == LSHIFTRT
6347 || GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
6348 || GET_CODE (x) == ROTATERT
6349 || (TARGET_ARM && GET_CODE (x) == ZERO_EXTRACT)))
6350 return CC_NOOVmode;
6352 if (GET_MODE (x) == QImode && (op == EQ || op == NE))
6353 return CC_Zmode;
6355 if (GET_MODE (x) == SImode && (op == LTU || op == GEU)
6356 && GET_CODE (x) == PLUS
6357 && (rtx_equal_p (XEXP (x, 0), y) || rtx_equal_p (XEXP (x, 1), y)))
6358 return CC_Cmode;
6360 return CCmode;
6363 /* X and Y are two things to compare using CODE. Emit the compare insn and
6364 return the rtx for register 0 in the proper mode. FP means this is a
6365 floating point compare: I don't think that it is needed on the arm. */
6367 arm_gen_compare_reg (enum rtx_code code, rtx x, rtx y)
6369 enum machine_mode mode = SELECT_CC_MODE (code, x, y);
6370 rtx cc_reg = gen_rtx_REG (mode, CC_REGNUM);
6372 emit_insn (gen_rtx_SET (VOIDmode, cc_reg,
6373 gen_rtx_COMPARE (mode, x, y)));
6375 return cc_reg;
6378 /* Generate a sequence of insns that will generate the correct return
6379 address mask depending on the physical architecture that the program
6380 is running on. */
6382 arm_gen_return_addr_mask (void)
6384 rtx reg = gen_reg_rtx (Pmode);
6386 emit_insn (gen_return_addr_mask (reg));
6387 return reg;
6390 void
6391 arm_reload_in_hi (rtx *operands)
6393 rtx ref = operands[1];
6394 rtx base, scratch;
6395 HOST_WIDE_INT offset = 0;
6397 if (GET_CODE (ref) == SUBREG)
6399 offset = SUBREG_BYTE (ref);
6400 ref = SUBREG_REG (ref);
6403 if (GET_CODE (ref) == REG)
6405 /* We have a pseudo which has been spilt onto the stack; there
6406 are two cases here: the first where there is a simple
6407 stack-slot replacement and a second where the stack-slot is
6408 out of range, or is used as a subreg. */
6409 if (reg_equiv_mem[REGNO (ref)])
6411 ref = reg_equiv_mem[REGNO (ref)];
6412 base = find_replacement (&XEXP (ref, 0));
6414 else
6415 /* The slot is out of range, or was dressed up in a SUBREG. */
6416 base = reg_equiv_address[REGNO (ref)];
6418 else
6419 base = find_replacement (&XEXP (ref, 0));
6421 /* Handle the case where the address is too complex to be offset by 1. */
6422 if (GET_CODE (base) == MINUS
6423 || (GET_CODE (base) == PLUS && GET_CODE (XEXP (base, 1)) != CONST_INT))
6425 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
6427 emit_insn (gen_rtx_SET (VOIDmode, base_plus, base));
6428 base = base_plus;
6430 else if (GET_CODE (base) == PLUS)
6432 /* The addend must be CONST_INT, or we would have dealt with it above. */
6433 HOST_WIDE_INT hi, lo;
6435 offset += INTVAL (XEXP (base, 1));
6436 base = XEXP (base, 0);
6438 /* Rework the address into a legal sequence of insns. */
6439 /* Valid range for lo is -4095 -> 4095 */
6440 lo = (offset >= 0
6441 ? (offset & 0xfff)
6442 : -((-offset) & 0xfff));
6444 /* Corner case, if lo is the max offset then we would be out of range
6445 once we have added the additional 1 below, so bump the msb into the
6446 pre-loading insn(s). */
6447 if (lo == 4095)
6448 lo &= 0x7ff;
6450 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
6451 ^ (HOST_WIDE_INT) 0x80000000)
6452 - (HOST_WIDE_INT) 0x80000000);
6454 gcc_assert (hi + lo == offset);
6456 if (hi != 0)
6458 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
6460 /* Get the base address; addsi3 knows how to handle constants
6461 that require more than one insn. */
6462 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
6463 base = base_plus;
6464 offset = lo;
6468 /* Operands[2] may overlap operands[0] (though it won't overlap
6469 operands[1]), that's why we asked for a DImode reg -- so we can
6470 use the bit that does not overlap. */
6471 if (REGNO (operands[2]) == REGNO (operands[0]))
6472 scratch = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
6473 else
6474 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
6476 emit_insn (gen_zero_extendqisi2 (scratch,
6477 gen_rtx_MEM (QImode,
6478 plus_constant (base,
6479 offset))));
6480 emit_insn (gen_zero_extendqisi2 (gen_rtx_SUBREG (SImode, operands[0], 0),
6481 gen_rtx_MEM (QImode,
6482 plus_constant (base,
6483 offset + 1))));
6484 if (!BYTES_BIG_ENDIAN)
6485 emit_insn (gen_rtx_SET (VOIDmode, gen_rtx_SUBREG (SImode, operands[0], 0),
6486 gen_rtx_IOR (SImode,
6487 gen_rtx_ASHIFT
6488 (SImode,
6489 gen_rtx_SUBREG (SImode, operands[0], 0),
6490 GEN_INT (8)),
6491 scratch)));
6492 else
6493 emit_insn (gen_rtx_SET (VOIDmode, gen_rtx_SUBREG (SImode, operands[0], 0),
6494 gen_rtx_IOR (SImode,
6495 gen_rtx_ASHIFT (SImode, scratch,
6496 GEN_INT (8)),
6497 gen_rtx_SUBREG (SImode, operands[0],
6498 0))));
6501 /* Handle storing a half-word to memory during reload by synthesizing as two
6502 byte stores. Take care not to clobber the input values until after we
6503 have moved them somewhere safe. This code assumes that if the DImode
6504 scratch in operands[2] overlaps either the input value or output address
6505 in some way, then that value must die in this insn (we absolutely need
6506 two scratch registers for some corner cases). */
6507 void
6508 arm_reload_out_hi (rtx *operands)
6510 rtx ref = operands[0];
6511 rtx outval = operands[1];
6512 rtx base, scratch;
6513 HOST_WIDE_INT offset = 0;
6515 if (GET_CODE (ref) == SUBREG)
6517 offset = SUBREG_BYTE (ref);
6518 ref = SUBREG_REG (ref);
6521 if (GET_CODE (ref) == REG)
6523 /* We have a pseudo which has been spilt onto the stack; there
6524 are two cases here: the first where there is a simple
6525 stack-slot replacement and a second where the stack-slot is
6526 out of range, or is used as a subreg. */
6527 if (reg_equiv_mem[REGNO (ref)])
6529 ref = reg_equiv_mem[REGNO (ref)];
6530 base = find_replacement (&XEXP (ref, 0));
6532 else
6533 /* The slot is out of range, or was dressed up in a SUBREG. */
6534 base = reg_equiv_address[REGNO (ref)];
6536 else
6537 base = find_replacement (&XEXP (ref, 0));
6539 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
6541 /* Handle the case where the address is too complex to be offset by 1. */
6542 if (GET_CODE (base) == MINUS
6543 || (GET_CODE (base) == PLUS && GET_CODE (XEXP (base, 1)) != CONST_INT))
6545 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
6547 /* Be careful not to destroy OUTVAL. */
6548 if (reg_overlap_mentioned_p (base_plus, outval))
6550 /* Updating base_plus might destroy outval, see if we can
6551 swap the scratch and base_plus. */
6552 if (!reg_overlap_mentioned_p (scratch, outval))
6554 rtx tmp = scratch;
6555 scratch = base_plus;
6556 base_plus = tmp;
6558 else
6560 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
6562 /* Be conservative and copy OUTVAL into the scratch now,
6563 this should only be necessary if outval is a subreg
6564 of something larger than a word. */
6565 /* XXX Might this clobber base? I can't see how it can,
6566 since scratch is known to overlap with OUTVAL, and
6567 must be wider than a word. */
6568 emit_insn (gen_movhi (scratch_hi, outval));
6569 outval = scratch_hi;
6573 emit_insn (gen_rtx_SET (VOIDmode, base_plus, base));
6574 base = base_plus;
6576 else if (GET_CODE (base) == PLUS)
6578 /* The addend must be CONST_INT, or we would have dealt with it above. */
6579 HOST_WIDE_INT hi, lo;
6581 offset += INTVAL (XEXP (base, 1));
6582 base = XEXP (base, 0);
6584 /* Rework the address into a legal sequence of insns. */
6585 /* Valid range for lo is -4095 -> 4095 */
6586 lo = (offset >= 0
6587 ? (offset & 0xfff)
6588 : -((-offset) & 0xfff));
6590 /* Corner case, if lo is the max offset then we would be out of range
6591 once we have added the additional 1 below, so bump the msb into the
6592 pre-loading insn(s). */
6593 if (lo == 4095)
6594 lo &= 0x7ff;
6596 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
6597 ^ (HOST_WIDE_INT) 0x80000000)
6598 - (HOST_WIDE_INT) 0x80000000);
6600 gcc_assert (hi + lo == offset);
6602 if (hi != 0)
6604 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
6606 /* Be careful not to destroy OUTVAL. */
6607 if (reg_overlap_mentioned_p (base_plus, outval))
6609 /* Updating base_plus might destroy outval, see if we
6610 can swap the scratch and base_plus. */
6611 if (!reg_overlap_mentioned_p (scratch, outval))
6613 rtx tmp = scratch;
6614 scratch = base_plus;
6615 base_plus = tmp;
6617 else
6619 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
6621 /* Be conservative and copy outval into scratch now,
6622 this should only be necessary if outval is a
6623 subreg of something larger than a word. */
6624 /* XXX Might this clobber base? I can't see how it
6625 can, since scratch is known to overlap with
6626 outval. */
6627 emit_insn (gen_movhi (scratch_hi, outval));
6628 outval = scratch_hi;
6632 /* Get the base address; addsi3 knows how to handle constants
6633 that require more than one insn. */
6634 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
6635 base = base_plus;
6636 offset = lo;
6640 if (BYTES_BIG_ENDIAN)
6642 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
6643 plus_constant (base, offset + 1)),
6644 gen_lowpart (QImode, outval)));
6645 emit_insn (gen_lshrsi3 (scratch,
6646 gen_rtx_SUBREG (SImode, outval, 0),
6647 GEN_INT (8)));
6648 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (base, offset)),
6649 gen_lowpart (QImode, scratch)));
6651 else
6653 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (base, offset)),
6654 gen_lowpart (QImode, outval)));
6655 emit_insn (gen_lshrsi3 (scratch,
6656 gen_rtx_SUBREG (SImode, outval, 0),
6657 GEN_INT (8)));
6658 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
6659 plus_constant (base, offset + 1)),
6660 gen_lowpart (QImode, scratch)));
6664 /* Return true if a type must be passed in memory. For AAPCS, small aggregates
6665 (padded to the size of a word) should be passed in a register. */
6667 static bool
6668 arm_must_pass_in_stack (enum machine_mode mode, tree type)
6670 if (TARGET_AAPCS_BASED)
6671 return must_pass_in_stack_var_size (mode, type);
6672 else
6673 return must_pass_in_stack_var_size_or_pad (mode, type);
6677 /* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
6678 Return true if an argument passed on the stack should be padded upwards,
6679 i.e. if the least-significant byte has useful data. */
6681 bool
6682 arm_pad_arg_upward (enum machine_mode mode, tree type)
6684 if (!TARGET_AAPCS_BASED)
6685 return DEFAULT_FUNCTION_ARG_PADDING(mode, type);
6687 if (type && BYTES_BIG_ENDIAN && INTEGRAL_TYPE_P (type))
6688 return false;
6690 return true;
6694 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
6695 For non-AAPCS, return !BYTES_BIG_ENDIAN if the least significant
6696 byte of the register has useful data, and return the opposite if the
6697 most significant byte does.
6698 For AAPCS, small aggregates and small complex types are always padded
6699 upwards. */
6701 bool
6702 arm_pad_reg_upward (enum machine_mode mode ATTRIBUTE_UNUSED,
6703 tree type, int first ATTRIBUTE_UNUSED)
6705 if (TARGET_AAPCS_BASED
6706 && BYTES_BIG_ENDIAN
6707 && (AGGREGATE_TYPE_P (type) || TREE_CODE (type) == COMPLEX_TYPE)
6708 && int_size_in_bytes (type) <= 4)
6709 return true;
6711 /* Otherwise, use default padding. */
6712 return !BYTES_BIG_ENDIAN;
6717 /* Print a symbolic form of X to the debug file, F. */
6718 static void
6719 arm_print_value (FILE *f, rtx x)
6721 switch (GET_CODE (x))
6723 case CONST_INT:
6724 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
6725 return;
6727 case CONST_DOUBLE:
6728 fprintf (f, "<0x%lx,0x%lx>", (long)XWINT (x, 2), (long)XWINT (x, 3));
6729 return;
6731 case CONST_VECTOR:
6733 int i;
6735 fprintf (f, "<");
6736 for (i = 0; i < CONST_VECTOR_NUNITS (x); i++)
6738 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (CONST_VECTOR_ELT (x, i)));
6739 if (i < (CONST_VECTOR_NUNITS (x) - 1))
6740 fputc (',', f);
6742 fprintf (f, ">");
6744 return;
6746 case CONST_STRING:
6747 fprintf (f, "\"%s\"", XSTR (x, 0));
6748 return;
6750 case SYMBOL_REF:
6751 fprintf (f, "`%s'", XSTR (x, 0));
6752 return;
6754 case LABEL_REF:
6755 fprintf (f, "L%d", INSN_UID (XEXP (x, 0)));
6756 return;
6758 case CONST:
6759 arm_print_value (f, XEXP (x, 0));
6760 return;
6762 case PLUS:
6763 arm_print_value (f, XEXP (x, 0));
6764 fprintf (f, "+");
6765 arm_print_value (f, XEXP (x, 1));
6766 return;
6768 case PC:
6769 fprintf (f, "pc");
6770 return;
6772 default:
6773 fprintf (f, "????");
6774 return;
6778 /* Routines for manipulation of the constant pool. */
6780 /* Arm instructions cannot load a large constant directly into a
6781 register; they have to come from a pc relative load. The constant
6782 must therefore be placed in the addressable range of the pc
6783 relative load. Depending on the precise pc relative load
6784 instruction the range is somewhere between 256 bytes and 4k. This
6785 means that we often have to dump a constant inside a function, and
6786 generate code to branch around it.
6788 It is important to minimize this, since the branches will slow
6789 things down and make the code larger.
6791 Normally we can hide the table after an existing unconditional
6792 branch so that there is no interruption of the flow, but in the
6793 worst case the code looks like this:
6795 ldr rn, L1
6797 b L2
6798 align
6799 L1: .long value
6803 ldr rn, L3
6805 b L4
6806 align
6807 L3: .long value
6811 We fix this by performing a scan after scheduling, which notices
6812 which instructions need to have their operands fetched from the
6813 constant table and builds the table.
6815 The algorithm starts by building a table of all the constants that
6816 need fixing up and all the natural barriers in the function (places
6817 where a constant table can be dropped without breaking the flow).
6818 For each fixup we note how far the pc-relative replacement will be
6819 able to reach and the offset of the instruction into the function.
6821 Having built the table we then group the fixes together to form
6822 tables that are as large as possible (subject to addressing
6823 constraints) and emit each table of constants after the last
6824 barrier that is within range of all the instructions in the group.
6825 If a group does not contain a barrier, then we forcibly create one
6826 by inserting a jump instruction into the flow. Once the table has
6827 been inserted, the insns are then modified to reference the
6828 relevant entry in the pool.
6830 Possible enhancements to the algorithm (not implemented) are:
6832 1) For some processors and object formats, there may be benefit in
6833 aligning the pools to the start of cache lines; this alignment
6834 would need to be taken into account when calculating addressability
6835 of a pool. */
6837 /* These typedefs are located at the start of this file, so that
6838 they can be used in the prototypes there. This comment is to
6839 remind readers of that fact so that the following structures
6840 can be understood more easily.
6842 typedef struct minipool_node Mnode;
6843 typedef struct minipool_fixup Mfix; */
6845 struct minipool_node
6847 /* Doubly linked chain of entries. */
6848 Mnode * next;
6849 Mnode * prev;
6850 /* The maximum offset into the code that this entry can be placed. While
6851 pushing fixes for forward references, all entries are sorted in order
6852 of increasing max_address. */
6853 HOST_WIDE_INT max_address;
6854 /* Similarly for an entry inserted for a backwards ref. */
6855 HOST_WIDE_INT min_address;
6856 /* The number of fixes referencing this entry. This can become zero
6857 if we "unpush" an entry. In this case we ignore the entry when we
6858 come to emit the code. */
6859 int refcount;
6860 /* The offset from the start of the minipool. */
6861 HOST_WIDE_INT offset;
6862 /* The value in table. */
6863 rtx value;
6864 /* The mode of value. */
6865 enum machine_mode mode;
6866 /* The size of the value. With iWMMXt enabled
6867 sizes > 4 also imply an alignment of 8-bytes. */
6868 int fix_size;
6871 struct minipool_fixup
6873 Mfix * next;
6874 rtx insn;
6875 HOST_WIDE_INT address;
6876 rtx * loc;
6877 enum machine_mode mode;
6878 int fix_size;
6879 rtx value;
6880 Mnode * minipool;
6881 HOST_WIDE_INT forwards;
6882 HOST_WIDE_INT backwards;
6885 /* Fixes less than a word need padding out to a word boundary. */
6886 #define MINIPOOL_FIX_SIZE(mode) \
6887 (GET_MODE_SIZE ((mode)) >= 4 ? GET_MODE_SIZE ((mode)) : 4)
6889 static Mnode * minipool_vector_head;
6890 static Mnode * minipool_vector_tail;
6891 static rtx minipool_vector_label;
6893 /* The linked list of all minipool fixes required for this function. */
6894 Mfix * minipool_fix_head;
6895 Mfix * minipool_fix_tail;
6896 /* The fix entry for the current minipool, once it has been placed. */
6897 Mfix * minipool_barrier;
6899 /* Determines if INSN is the start of a jump table. Returns the end
6900 of the TABLE or NULL_RTX. */
6901 static rtx
6902 is_jump_table (rtx insn)
6904 rtx table;
6906 if (GET_CODE (insn) == JUMP_INSN
6907 && JUMP_LABEL (insn) != NULL
6908 && ((table = next_real_insn (JUMP_LABEL (insn)))
6909 == next_real_insn (insn))
6910 && table != NULL
6911 && GET_CODE (table) == JUMP_INSN
6912 && (GET_CODE (PATTERN (table)) == ADDR_VEC
6913 || GET_CODE (PATTERN (table)) == ADDR_DIFF_VEC))
6914 return table;
6916 return NULL_RTX;
6919 #ifndef JUMP_TABLES_IN_TEXT_SECTION
6920 #define JUMP_TABLES_IN_TEXT_SECTION 0
6921 #endif
6923 static HOST_WIDE_INT
6924 get_jump_table_size (rtx insn)
6926 /* ADDR_VECs only take room if read-only data does into the text
6927 section. */
6928 if (JUMP_TABLES_IN_TEXT_SECTION
6929 #if !defined(READONLY_DATA_SECTION) && !defined(READONLY_DATA_SECTION_ASM_OP)
6930 || 1
6931 #endif
6934 rtx body = PATTERN (insn);
6935 int elt = GET_CODE (body) == ADDR_DIFF_VEC ? 1 : 0;
6937 return GET_MODE_SIZE (GET_MODE (body)) * XVECLEN (body, elt);
6940 return 0;
6943 /* Move a minipool fix MP from its current location to before MAX_MP.
6944 If MAX_MP is NULL, then MP doesn't need moving, but the addressing
6945 constraints may need updating. */
6946 static Mnode *
6947 move_minipool_fix_forward_ref (Mnode *mp, Mnode *max_mp,
6948 HOST_WIDE_INT max_address)
6950 /* The code below assumes these are different. */
6951 gcc_assert (mp != max_mp);
6953 if (max_mp == NULL)
6955 if (max_address < mp->max_address)
6956 mp->max_address = max_address;
6958 else
6960 if (max_address > max_mp->max_address - mp->fix_size)
6961 mp->max_address = max_mp->max_address - mp->fix_size;
6962 else
6963 mp->max_address = max_address;
6965 /* Unlink MP from its current position. Since max_mp is non-null,
6966 mp->prev must be non-null. */
6967 mp->prev->next = mp->next;
6968 if (mp->next != NULL)
6969 mp->next->prev = mp->prev;
6970 else
6971 minipool_vector_tail = mp->prev;
6973 /* Re-insert it before MAX_MP. */
6974 mp->next = max_mp;
6975 mp->prev = max_mp->prev;
6976 max_mp->prev = mp;
6978 if (mp->prev != NULL)
6979 mp->prev->next = mp;
6980 else
6981 minipool_vector_head = mp;
6984 /* Save the new entry. */
6985 max_mp = mp;
6987 /* Scan over the preceding entries and adjust their addresses as
6988 required. */
6989 while (mp->prev != NULL
6990 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
6992 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
6993 mp = mp->prev;
6996 return max_mp;
6999 /* Add a constant to the minipool for a forward reference. Returns the
7000 node added or NULL if the constant will not fit in this pool. */
7001 static Mnode *
7002 add_minipool_forward_ref (Mfix *fix)
7004 /* If set, max_mp is the first pool_entry that has a lower
7005 constraint than the one we are trying to add. */
7006 Mnode * max_mp = NULL;
7007 HOST_WIDE_INT max_address = fix->address + fix->forwards;
7008 Mnode * mp;
7010 /* If this fix's address is greater than the address of the first
7011 entry, then we can't put the fix in this pool. We subtract the
7012 size of the current fix to ensure that if the table is fully
7013 packed we still have enough room to insert this value by suffling
7014 the other fixes forwards. */
7015 if (minipool_vector_head &&
7016 fix->address >= minipool_vector_head->max_address - fix->fix_size)
7017 return NULL;
7019 /* Scan the pool to see if a constant with the same value has
7020 already been added. While we are doing this, also note the
7021 location where we must insert the constant if it doesn't already
7022 exist. */
7023 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
7025 if (GET_CODE (fix->value) == GET_CODE (mp->value)
7026 && fix->mode == mp->mode
7027 && (GET_CODE (fix->value) != CODE_LABEL
7028 || (CODE_LABEL_NUMBER (fix->value)
7029 == CODE_LABEL_NUMBER (mp->value)))
7030 && rtx_equal_p (fix->value, mp->value))
7032 /* More than one fix references this entry. */
7033 mp->refcount++;
7034 return move_minipool_fix_forward_ref (mp, max_mp, max_address);
7037 /* Note the insertion point if necessary. */
7038 if (max_mp == NULL
7039 && mp->max_address > max_address)
7040 max_mp = mp;
7042 /* If we are inserting an 8-bytes aligned quantity and
7043 we have not already found an insertion point, then
7044 make sure that all such 8-byte aligned quantities are
7045 placed at the start of the pool. */
7046 if (ARM_DOUBLEWORD_ALIGN
7047 && max_mp == NULL
7048 && fix->fix_size == 8
7049 && mp->fix_size != 8)
7051 max_mp = mp;
7052 max_address = mp->max_address;
7056 /* The value is not currently in the minipool, so we need to create
7057 a new entry for it. If MAX_MP is NULL, the entry will be put on
7058 the end of the list since the placement is less constrained than
7059 any existing entry. Otherwise, we insert the new fix before
7060 MAX_MP and, if necessary, adjust the constraints on the other
7061 entries. */
7062 mp = xmalloc (sizeof (* mp));
7063 mp->fix_size = fix->fix_size;
7064 mp->mode = fix->mode;
7065 mp->value = fix->value;
7066 mp->refcount = 1;
7067 /* Not yet required for a backwards ref. */
7068 mp->min_address = -65536;
7070 if (max_mp == NULL)
7072 mp->max_address = max_address;
7073 mp->next = NULL;
7074 mp->prev = minipool_vector_tail;
7076 if (mp->prev == NULL)
7078 minipool_vector_head = mp;
7079 minipool_vector_label = gen_label_rtx ();
7081 else
7082 mp->prev->next = mp;
7084 minipool_vector_tail = mp;
7086 else
7088 if (max_address > max_mp->max_address - mp->fix_size)
7089 mp->max_address = max_mp->max_address - mp->fix_size;
7090 else
7091 mp->max_address = max_address;
7093 mp->next = max_mp;
7094 mp->prev = max_mp->prev;
7095 max_mp->prev = mp;
7096 if (mp->prev != NULL)
7097 mp->prev->next = mp;
7098 else
7099 minipool_vector_head = mp;
7102 /* Save the new entry. */
7103 max_mp = mp;
7105 /* Scan over the preceding entries and adjust their addresses as
7106 required. */
7107 while (mp->prev != NULL
7108 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
7110 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
7111 mp = mp->prev;
7114 return max_mp;
7117 static Mnode *
7118 move_minipool_fix_backward_ref (Mnode *mp, Mnode *min_mp,
7119 HOST_WIDE_INT min_address)
7121 HOST_WIDE_INT offset;
7123 /* The code below assumes these are different. */
7124 gcc_assert (mp != min_mp);
7126 if (min_mp == NULL)
7128 if (min_address > mp->min_address)
7129 mp->min_address = min_address;
7131 else
7133 /* We will adjust this below if it is too loose. */
7134 mp->min_address = min_address;
7136 /* Unlink MP from its current position. Since min_mp is non-null,
7137 mp->next must be non-null. */
7138 mp->next->prev = mp->prev;
7139 if (mp->prev != NULL)
7140 mp->prev->next = mp->next;
7141 else
7142 minipool_vector_head = mp->next;
7144 /* Reinsert it after MIN_MP. */
7145 mp->prev = min_mp;
7146 mp->next = min_mp->next;
7147 min_mp->next = mp;
7148 if (mp->next != NULL)
7149 mp->next->prev = mp;
7150 else
7151 minipool_vector_tail = mp;
7154 min_mp = mp;
7156 offset = 0;
7157 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
7159 mp->offset = offset;
7160 if (mp->refcount > 0)
7161 offset += mp->fix_size;
7163 if (mp->next && mp->next->min_address < mp->min_address + mp->fix_size)
7164 mp->next->min_address = mp->min_address + mp->fix_size;
7167 return min_mp;
7170 /* Add a constant to the minipool for a backward reference. Returns the
7171 node added or NULL if the constant will not fit in this pool.
7173 Note that the code for insertion for a backwards reference can be
7174 somewhat confusing because the calculated offsets for each fix do
7175 not take into account the size of the pool (which is still under
7176 construction. */
7177 static Mnode *
7178 add_minipool_backward_ref (Mfix *fix)
7180 /* If set, min_mp is the last pool_entry that has a lower constraint
7181 than the one we are trying to add. */
7182 Mnode *min_mp = NULL;
7183 /* This can be negative, since it is only a constraint. */
7184 HOST_WIDE_INT min_address = fix->address - fix->backwards;
7185 Mnode *mp;
7187 /* If we can't reach the current pool from this insn, or if we can't
7188 insert this entry at the end of the pool without pushing other
7189 fixes out of range, then we don't try. This ensures that we
7190 can't fail later on. */
7191 if (min_address >= minipool_barrier->address
7192 || (minipool_vector_tail->min_address + fix->fix_size
7193 >= minipool_barrier->address))
7194 return NULL;
7196 /* Scan the pool to see if a constant with the same value has
7197 already been added. While we are doing this, also note the
7198 location where we must insert the constant if it doesn't already
7199 exist. */
7200 for (mp = minipool_vector_tail; mp != NULL; mp = mp->prev)
7202 if (GET_CODE (fix->value) == GET_CODE (mp->value)
7203 && fix->mode == mp->mode
7204 && (GET_CODE (fix->value) != CODE_LABEL
7205 || (CODE_LABEL_NUMBER (fix->value)
7206 == CODE_LABEL_NUMBER (mp->value)))
7207 && rtx_equal_p (fix->value, mp->value)
7208 /* Check that there is enough slack to move this entry to the
7209 end of the table (this is conservative). */
7210 && (mp->max_address
7211 > (minipool_barrier->address
7212 + minipool_vector_tail->offset
7213 + minipool_vector_tail->fix_size)))
7215 mp->refcount++;
7216 return move_minipool_fix_backward_ref (mp, min_mp, min_address);
7219 if (min_mp != NULL)
7220 mp->min_address += fix->fix_size;
7221 else
7223 /* Note the insertion point if necessary. */
7224 if (mp->min_address < min_address)
7226 /* For now, we do not allow the insertion of 8-byte alignment
7227 requiring nodes anywhere but at the start of the pool. */
7228 if (ARM_DOUBLEWORD_ALIGN
7229 && fix->fix_size == 8 && mp->fix_size != 8)
7230 return NULL;
7231 else
7232 min_mp = mp;
7234 else if (mp->max_address
7235 < minipool_barrier->address + mp->offset + fix->fix_size)
7237 /* Inserting before this entry would push the fix beyond
7238 its maximum address (which can happen if we have
7239 re-located a forwards fix); force the new fix to come
7240 after it. */
7241 min_mp = mp;
7242 min_address = mp->min_address + fix->fix_size;
7244 /* If we are inserting an 8-bytes aligned quantity and
7245 we have not already found an insertion point, then
7246 make sure that all such 8-byte aligned quantities are
7247 placed at the start of the pool. */
7248 else if (ARM_DOUBLEWORD_ALIGN
7249 && min_mp == NULL
7250 && fix->fix_size == 8
7251 && mp->fix_size < 8)
7253 min_mp = mp;
7254 min_address = mp->min_address + fix->fix_size;
7259 /* We need to create a new entry. */
7260 mp = xmalloc (sizeof (* mp));
7261 mp->fix_size = fix->fix_size;
7262 mp->mode = fix->mode;
7263 mp->value = fix->value;
7264 mp->refcount = 1;
7265 mp->max_address = minipool_barrier->address + 65536;
7267 mp->min_address = min_address;
7269 if (min_mp == NULL)
7271 mp->prev = NULL;
7272 mp->next = minipool_vector_head;
7274 if (mp->next == NULL)
7276 minipool_vector_tail = mp;
7277 minipool_vector_label = gen_label_rtx ();
7279 else
7280 mp->next->prev = mp;
7282 minipool_vector_head = mp;
7284 else
7286 mp->next = min_mp->next;
7287 mp->prev = min_mp;
7288 min_mp->next = mp;
7290 if (mp->next != NULL)
7291 mp->next->prev = mp;
7292 else
7293 minipool_vector_tail = mp;
7296 /* Save the new entry. */
7297 min_mp = mp;
7299 if (mp->prev)
7300 mp = mp->prev;
7301 else
7302 mp->offset = 0;
7304 /* Scan over the following entries and adjust their offsets. */
7305 while (mp->next != NULL)
7307 if (mp->next->min_address < mp->min_address + mp->fix_size)
7308 mp->next->min_address = mp->min_address + mp->fix_size;
7310 if (mp->refcount)
7311 mp->next->offset = mp->offset + mp->fix_size;
7312 else
7313 mp->next->offset = mp->offset;
7315 mp = mp->next;
7318 return min_mp;
7321 static void
7322 assign_minipool_offsets (Mfix *barrier)
7324 HOST_WIDE_INT offset = 0;
7325 Mnode *mp;
7327 minipool_barrier = barrier;
7329 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
7331 mp->offset = offset;
7333 if (mp->refcount > 0)
7334 offset += mp->fix_size;
7338 /* Output the literal table */
7339 static void
7340 dump_minipool (rtx scan)
7342 Mnode * mp;
7343 Mnode * nmp;
7344 int align64 = 0;
7346 if (ARM_DOUBLEWORD_ALIGN)
7347 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
7348 if (mp->refcount > 0 && mp->fix_size == 8)
7350 align64 = 1;
7351 break;
7354 if (dump_file)
7355 fprintf (dump_file,
7356 ";; Emitting minipool after insn %u; address %ld; align %d (bytes)\n",
7357 INSN_UID (scan), (unsigned long) minipool_barrier->address, align64 ? 8 : 4);
7359 scan = emit_label_after (gen_label_rtx (), scan);
7360 scan = emit_insn_after (align64 ? gen_align_8 () : gen_align_4 (), scan);
7361 scan = emit_label_after (minipool_vector_label, scan);
7363 for (mp = minipool_vector_head; mp != NULL; mp = nmp)
7365 if (mp->refcount > 0)
7367 if (dump_file)
7369 fprintf (dump_file,
7370 ";; Offset %u, min %ld, max %ld ",
7371 (unsigned) mp->offset, (unsigned long) mp->min_address,
7372 (unsigned long) mp->max_address);
7373 arm_print_value (dump_file, mp->value);
7374 fputc ('\n', dump_file);
7377 switch (mp->fix_size)
7379 #ifdef HAVE_consttable_1
7380 case 1:
7381 scan = emit_insn_after (gen_consttable_1 (mp->value), scan);
7382 break;
7384 #endif
7385 #ifdef HAVE_consttable_2
7386 case 2:
7387 scan = emit_insn_after (gen_consttable_2 (mp->value), scan);
7388 break;
7390 #endif
7391 #ifdef HAVE_consttable_4
7392 case 4:
7393 scan = emit_insn_after (gen_consttable_4 (mp->value), scan);
7394 break;
7396 #endif
7397 #ifdef HAVE_consttable_8
7398 case 8:
7399 scan = emit_insn_after (gen_consttable_8 (mp->value), scan);
7400 break;
7402 #endif
7403 default:
7404 gcc_unreachable ();
7408 nmp = mp->next;
7409 free (mp);
7412 minipool_vector_head = minipool_vector_tail = NULL;
7413 scan = emit_insn_after (gen_consttable_end (), scan);
7414 scan = emit_barrier_after (scan);
7417 /* Return the cost of forcibly inserting a barrier after INSN. */
7418 static int
7419 arm_barrier_cost (rtx insn)
7421 /* Basing the location of the pool on the loop depth is preferable,
7422 but at the moment, the basic block information seems to be
7423 corrupt by this stage of the compilation. */
7424 int base_cost = 50;
7425 rtx next = next_nonnote_insn (insn);
7427 if (next != NULL && GET_CODE (next) == CODE_LABEL)
7428 base_cost -= 20;
7430 switch (GET_CODE (insn))
7432 case CODE_LABEL:
7433 /* It will always be better to place the table before the label, rather
7434 than after it. */
7435 return 50;
7437 case INSN:
7438 case CALL_INSN:
7439 return base_cost;
7441 case JUMP_INSN:
7442 return base_cost - 10;
7444 default:
7445 return base_cost + 10;
7449 /* Find the best place in the insn stream in the range
7450 (FIX->address,MAX_ADDRESS) to forcibly insert a minipool barrier.
7451 Create the barrier by inserting a jump and add a new fix entry for
7452 it. */
7453 static Mfix *
7454 create_fix_barrier (Mfix *fix, HOST_WIDE_INT max_address)
7456 HOST_WIDE_INT count = 0;
7457 rtx barrier;
7458 rtx from = fix->insn;
7459 rtx selected = from;
7460 int selected_cost;
7461 HOST_WIDE_INT selected_address;
7462 Mfix * new_fix;
7463 HOST_WIDE_INT max_count = max_address - fix->address;
7464 rtx label = gen_label_rtx ();
7466 selected_cost = arm_barrier_cost (from);
7467 selected_address = fix->address;
7469 while (from && count < max_count)
7471 rtx tmp;
7472 int new_cost;
7474 /* This code shouldn't have been called if there was a natural barrier
7475 within range. */
7476 gcc_assert (GET_CODE (from) != BARRIER);
7478 /* Count the length of this insn. */
7479 count += get_attr_length (from);
7481 /* If there is a jump table, add its length. */
7482 tmp = is_jump_table (from);
7483 if (tmp != NULL)
7485 count += get_jump_table_size (tmp);
7487 /* Jump tables aren't in a basic block, so base the cost on
7488 the dispatch insn. If we select this location, we will
7489 still put the pool after the table. */
7490 new_cost = arm_barrier_cost (from);
7492 if (count < max_count && new_cost <= selected_cost)
7494 selected = tmp;
7495 selected_cost = new_cost;
7496 selected_address = fix->address + count;
7499 /* Continue after the dispatch table. */
7500 from = NEXT_INSN (tmp);
7501 continue;
7504 new_cost = arm_barrier_cost (from);
7506 if (count < max_count && new_cost <= selected_cost)
7508 selected = from;
7509 selected_cost = new_cost;
7510 selected_address = fix->address + count;
7513 from = NEXT_INSN (from);
7516 /* Create a new JUMP_INSN that branches around a barrier. */
7517 from = emit_jump_insn_after (gen_jump (label), selected);
7518 JUMP_LABEL (from) = label;
7519 barrier = emit_barrier_after (from);
7520 emit_label_after (label, barrier);
7522 /* Create a minipool barrier entry for the new barrier. */
7523 new_fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* new_fix));
7524 new_fix->insn = barrier;
7525 new_fix->address = selected_address;
7526 new_fix->next = fix->next;
7527 fix->next = new_fix;
7529 return new_fix;
7532 /* Record that there is a natural barrier in the insn stream at
7533 ADDRESS. */
7534 static void
7535 push_minipool_barrier (rtx insn, HOST_WIDE_INT address)
7537 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
7539 fix->insn = insn;
7540 fix->address = address;
7542 fix->next = NULL;
7543 if (minipool_fix_head != NULL)
7544 minipool_fix_tail->next = fix;
7545 else
7546 minipool_fix_head = fix;
7548 minipool_fix_tail = fix;
7551 /* Record INSN, which will need fixing up to load a value from the
7552 minipool. ADDRESS is the offset of the insn since the start of the
7553 function; LOC is a pointer to the part of the insn which requires
7554 fixing; VALUE is the constant that must be loaded, which is of type
7555 MODE. */
7556 static void
7557 push_minipool_fix (rtx insn, HOST_WIDE_INT address, rtx *loc,
7558 enum machine_mode mode, rtx value)
7560 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
7562 #ifdef AOF_ASSEMBLER
7563 /* PIC symbol references need to be converted into offsets into the
7564 based area. */
7565 /* XXX This shouldn't be done here. */
7566 if (flag_pic && GET_CODE (value) == SYMBOL_REF)
7567 value = aof_pic_entry (value);
7568 #endif /* AOF_ASSEMBLER */
7570 fix->insn = insn;
7571 fix->address = address;
7572 fix->loc = loc;
7573 fix->mode = mode;
7574 fix->fix_size = MINIPOOL_FIX_SIZE (mode);
7575 fix->value = value;
7576 fix->forwards = get_attr_pool_range (insn);
7577 fix->backwards = get_attr_neg_pool_range (insn);
7578 fix->minipool = NULL;
7580 /* If an insn doesn't have a range defined for it, then it isn't
7581 expecting to be reworked by this code. Better to stop now than
7582 to generate duff assembly code. */
7583 gcc_assert (fix->forwards || fix->backwards);
7585 /* With AAPCS/iWMMXt enabled, the pool is aligned to an 8-byte boundary.
7586 So there might be an empty word before the start of the pool.
7587 Hence we reduce the forward range by 4 to allow for this
7588 possibility. */
7589 if (ARM_DOUBLEWORD_ALIGN && fix->fix_size == 8)
7590 fix->forwards -= 4;
7592 if (dump_file)
7594 fprintf (dump_file,
7595 ";; %smode fixup for i%d; addr %lu, range (%ld,%ld): ",
7596 GET_MODE_NAME (mode),
7597 INSN_UID (insn), (unsigned long) address,
7598 -1 * (long)fix->backwards, (long)fix->forwards);
7599 arm_print_value (dump_file, fix->value);
7600 fprintf (dump_file, "\n");
7603 /* Add it to the chain of fixes. */
7604 fix->next = NULL;
7606 if (minipool_fix_head != NULL)
7607 minipool_fix_tail->next = fix;
7608 else
7609 minipool_fix_head = fix;
7611 minipool_fix_tail = fix;
7614 /* Return the cost of synthesizing a 64-bit constant VAL inline.
7615 Returns the number of insns needed, or 99 if we don't know how to
7616 do it. */
7618 arm_const_double_inline_cost (rtx val)
7620 rtx lowpart, highpart;
7621 enum machine_mode mode;
7623 mode = GET_MODE (val);
7625 if (mode == VOIDmode)
7626 mode = DImode;
7628 gcc_assert (GET_MODE_SIZE (mode) == 8);
7630 lowpart = gen_lowpart (SImode, val);
7631 highpart = gen_highpart_mode (SImode, mode, val);
7633 gcc_assert (GET_CODE (lowpart) == CONST_INT);
7634 gcc_assert (GET_CODE (highpart) == CONST_INT);
7636 return (arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (lowpart),
7637 NULL_RTX, NULL_RTX, 0, 0)
7638 + arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (highpart),
7639 NULL_RTX, NULL_RTX, 0, 0));
7642 /* Return true if it is worthwhile to split a 64-bit constant into two
7643 32-bit operations. This is the case if optimizing for size, or
7644 if we have load delay slots, or if one 32-bit part can be done with
7645 a single data operation. */
7646 bool
7647 arm_const_double_by_parts (rtx val)
7649 enum machine_mode mode = GET_MODE (val);
7650 rtx part;
7652 if (optimize_size || arm_ld_sched)
7653 return true;
7655 if (mode == VOIDmode)
7656 mode = DImode;
7658 part = gen_highpart_mode (SImode, mode, val);
7660 gcc_assert (GET_CODE (part) == CONST_INT);
7662 if (const_ok_for_arm (INTVAL (part))
7663 || const_ok_for_arm (~INTVAL (part)))
7664 return true;
7666 part = gen_lowpart (SImode, val);
7668 gcc_assert (GET_CODE (part) == CONST_INT);
7670 if (const_ok_for_arm (INTVAL (part))
7671 || const_ok_for_arm (~INTVAL (part)))
7672 return true;
7674 return false;
7677 /* Scan INSN and note any of its operands that need fixing.
7678 If DO_PUSHES is false we do not actually push any of the fixups
7679 needed. The function returns TRUE if any fixups were needed/pushed.
7680 This is used by arm_memory_load_p() which needs to know about loads
7681 of constants that will be converted into minipool loads. */
7682 static bool
7683 note_invalid_constants (rtx insn, HOST_WIDE_INT address, int do_pushes)
7685 bool result = false;
7686 int opno;
7688 extract_insn (insn);
7690 if (!constrain_operands (1))
7691 fatal_insn_not_found (insn);
7693 if (recog_data.n_alternatives == 0)
7694 return false;
7696 /* Fill in recog_op_alt with information about the constraints of
7697 this insn. */
7698 preprocess_constraints ();
7700 for (opno = 0; opno < recog_data.n_operands; opno++)
7702 /* Things we need to fix can only occur in inputs. */
7703 if (recog_data.operand_type[opno] != OP_IN)
7704 continue;
7706 /* If this alternative is a memory reference, then any mention
7707 of constants in this alternative is really to fool reload
7708 into allowing us to accept one there. We need to fix them up
7709 now so that we output the right code. */
7710 if (recog_op_alt[opno][which_alternative].memory_ok)
7712 rtx op = recog_data.operand[opno];
7714 if (CONSTANT_P (op))
7716 if (do_pushes)
7717 push_minipool_fix (insn, address, recog_data.operand_loc[opno],
7718 recog_data.operand_mode[opno], op);
7719 result = true;
7721 else if (GET_CODE (op) == MEM
7722 && GET_CODE (XEXP (op, 0)) == SYMBOL_REF
7723 && CONSTANT_POOL_ADDRESS_P (XEXP (op, 0)))
7725 if (do_pushes)
7727 rtx cop = avoid_constant_pool_reference (op);
7729 /* Casting the address of something to a mode narrower
7730 than a word can cause avoid_constant_pool_reference()
7731 to return the pool reference itself. That's no good to
7732 us here. Lets just hope that we can use the
7733 constant pool value directly. */
7734 if (op == cop)
7735 cop = get_pool_constant (XEXP (op, 0));
7737 push_minipool_fix (insn, address,
7738 recog_data.operand_loc[opno],
7739 recog_data.operand_mode[opno], cop);
7742 result = true;
7747 return result;
7750 /* Gcc puts the pool in the wrong place for ARM, since we can only
7751 load addresses a limited distance around the pc. We do some
7752 special munging to move the constant pool values to the correct
7753 point in the code. */
7754 static void
7755 arm_reorg (void)
7757 rtx insn;
7758 HOST_WIDE_INT address = 0;
7759 Mfix * fix;
7761 minipool_fix_head = minipool_fix_tail = NULL;
7763 /* The first insn must always be a note, or the code below won't
7764 scan it properly. */
7765 insn = get_insns ();
7766 gcc_assert (GET_CODE (insn) == NOTE);
7768 /* Scan all the insns and record the operands that will need fixing. */
7769 for (insn = next_nonnote_insn (insn); insn; insn = next_nonnote_insn (insn))
7771 if (TARGET_CIRRUS_FIX_INVALID_INSNS
7772 && (arm_cirrus_insn_p (insn)
7773 || GET_CODE (insn) == JUMP_INSN
7774 || arm_memory_load_p (insn)))
7775 cirrus_reorg (insn);
7777 if (GET_CODE (insn) == BARRIER)
7778 push_minipool_barrier (insn, address);
7779 else if (INSN_P (insn))
7781 rtx table;
7783 note_invalid_constants (insn, address, true);
7784 address += get_attr_length (insn);
7786 /* If the insn is a vector jump, add the size of the table
7787 and skip the table. */
7788 if ((table = is_jump_table (insn)) != NULL)
7790 address += get_jump_table_size (table);
7791 insn = table;
7796 fix = minipool_fix_head;
7798 /* Now scan the fixups and perform the required changes. */
7799 while (fix)
7801 Mfix * ftmp;
7802 Mfix * fdel;
7803 Mfix * last_added_fix;
7804 Mfix * last_barrier = NULL;
7805 Mfix * this_fix;
7807 /* Skip any further barriers before the next fix. */
7808 while (fix && GET_CODE (fix->insn) == BARRIER)
7809 fix = fix->next;
7811 /* No more fixes. */
7812 if (fix == NULL)
7813 break;
7815 last_added_fix = NULL;
7817 for (ftmp = fix; ftmp; ftmp = ftmp->next)
7819 if (GET_CODE (ftmp->insn) == BARRIER)
7821 if (ftmp->address >= minipool_vector_head->max_address)
7822 break;
7824 last_barrier = ftmp;
7826 else if ((ftmp->minipool = add_minipool_forward_ref (ftmp)) == NULL)
7827 break;
7829 last_added_fix = ftmp; /* Keep track of the last fix added. */
7832 /* If we found a barrier, drop back to that; any fixes that we
7833 could have reached but come after the barrier will now go in
7834 the next mini-pool. */
7835 if (last_barrier != NULL)
7837 /* Reduce the refcount for those fixes that won't go into this
7838 pool after all. */
7839 for (fdel = last_barrier->next;
7840 fdel && fdel != ftmp;
7841 fdel = fdel->next)
7843 fdel->minipool->refcount--;
7844 fdel->minipool = NULL;
7847 ftmp = last_barrier;
7849 else
7851 /* ftmp is first fix that we can't fit into this pool and
7852 there no natural barriers that we could use. Insert a
7853 new barrier in the code somewhere between the previous
7854 fix and this one, and arrange to jump around it. */
7855 HOST_WIDE_INT max_address;
7857 /* The last item on the list of fixes must be a barrier, so
7858 we can never run off the end of the list of fixes without
7859 last_barrier being set. */
7860 gcc_assert (ftmp);
7862 max_address = minipool_vector_head->max_address;
7863 /* Check that there isn't another fix that is in range that
7864 we couldn't fit into this pool because the pool was
7865 already too large: we need to put the pool before such an
7866 instruction. */
7867 if (ftmp->address < max_address)
7868 max_address = ftmp->address;
7870 last_barrier = create_fix_barrier (last_added_fix, max_address);
7873 assign_minipool_offsets (last_barrier);
7875 while (ftmp)
7877 if (GET_CODE (ftmp->insn) != BARRIER
7878 && ((ftmp->minipool = add_minipool_backward_ref (ftmp))
7879 == NULL))
7880 break;
7882 ftmp = ftmp->next;
7885 /* Scan over the fixes we have identified for this pool, fixing them
7886 up and adding the constants to the pool itself. */
7887 for (this_fix = fix; this_fix && ftmp != this_fix;
7888 this_fix = this_fix->next)
7889 if (GET_CODE (this_fix->insn) != BARRIER)
7891 rtx addr
7892 = plus_constant (gen_rtx_LABEL_REF (VOIDmode,
7893 minipool_vector_label),
7894 this_fix->minipool->offset);
7895 *this_fix->loc = gen_rtx_MEM (this_fix->mode, addr);
7898 dump_minipool (last_barrier->insn);
7899 fix = ftmp;
7902 /* From now on we must synthesize any constants that we can't handle
7903 directly. This can happen if the RTL gets split during final
7904 instruction generation. */
7905 after_arm_reorg = 1;
7907 /* Free the minipool memory. */
7908 obstack_free (&minipool_obstack, minipool_startobj);
7911 /* Routines to output assembly language. */
7913 /* If the rtx is the correct value then return the string of the number.
7914 In this way we can ensure that valid double constants are generated even
7915 when cross compiling. */
7916 const char *
7917 fp_immediate_constant (rtx x)
7919 REAL_VALUE_TYPE r;
7920 int i;
7922 if (!fp_consts_inited)
7923 init_fp_table ();
7925 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
7926 for (i = 0; i < 8; i++)
7927 if (REAL_VALUES_EQUAL (r, values_fp[i]))
7928 return strings_fp[i];
7930 gcc_unreachable ();
7933 /* As for fp_immediate_constant, but value is passed directly, not in rtx. */
7934 static const char *
7935 fp_const_from_val (REAL_VALUE_TYPE *r)
7937 int i;
7939 if (!fp_consts_inited)
7940 init_fp_table ();
7942 for (i = 0; i < 8; i++)
7943 if (REAL_VALUES_EQUAL (*r, values_fp[i]))
7944 return strings_fp[i];
7946 gcc_unreachable ();
7949 /* Output the operands of a LDM/STM instruction to STREAM.
7950 MASK is the ARM register set mask of which only bits 0-15 are important.
7951 REG is the base register, either the frame pointer or the stack pointer,
7952 INSTR is the possibly suffixed load or store instruction. */
7954 static void
7955 print_multi_reg (FILE *stream, const char *instr, unsigned reg,
7956 unsigned long mask)
7958 unsigned i;
7959 bool not_first = FALSE;
7961 fputc ('\t', stream);
7962 asm_fprintf (stream, instr, reg);
7963 fputs (", {", stream);
7965 for (i = 0; i <= LAST_ARM_REGNUM; i++)
7966 if (mask & (1 << i))
7968 if (not_first)
7969 fprintf (stream, ", ");
7971 asm_fprintf (stream, "%r", i);
7972 not_first = TRUE;
7975 fprintf (stream, "}\n");
7979 /* Output a FLDMX instruction to STREAM.
7980 BASE if the register containing the address.
7981 REG and COUNT specify the register range.
7982 Extra registers may be added to avoid hardware bugs. */
7984 static void
7985 arm_output_fldmx (FILE * stream, unsigned int base, int reg, int count)
7987 int i;
7989 /* Workaround ARM10 VFPr1 bug. */
7990 if (count == 2 && !arm_arch6)
7992 if (reg == 15)
7993 reg--;
7994 count++;
7997 fputc ('\t', stream);
7998 asm_fprintf (stream, "fldmfdx\t%r!, {", base);
8000 for (i = reg; i < reg + count; i++)
8002 if (i > reg)
8003 fputs (", ", stream);
8004 asm_fprintf (stream, "d%d", i);
8006 fputs ("}\n", stream);
8011 /* Output the assembly for a store multiple. */
8013 const char *
8014 vfp_output_fstmx (rtx * operands)
8016 char pattern[100];
8017 int p;
8018 int base;
8019 int i;
8021 strcpy (pattern, "fstmfdx\t%m0!, {%P1");
8022 p = strlen (pattern);
8024 gcc_assert (GET_CODE (operands[1]) == REG);
8026 base = (REGNO (operands[1]) - FIRST_VFP_REGNUM) / 2;
8027 for (i = 1; i < XVECLEN (operands[2], 0); i++)
8029 p += sprintf (&pattern[p], ", d%d", base + i);
8031 strcpy (&pattern[p], "}");
8033 output_asm_insn (pattern, operands);
8034 return "";
8038 /* Emit RTL to save block of VFP register pairs to the stack. Returns the
8039 number of bytes pushed. */
8041 static int
8042 vfp_emit_fstmx (int base_reg, int count)
8044 rtx par;
8045 rtx dwarf;
8046 rtx tmp, reg;
8047 int i;
8049 /* Workaround ARM10 VFPr1 bug. Data corruption can occur when exactly two
8050 register pairs are stored by a store multiple insn. We avoid this
8051 by pushing an extra pair. */
8052 if (count == 2 && !arm_arch6)
8054 if (base_reg == LAST_VFP_REGNUM - 3)
8055 base_reg -= 2;
8056 count++;
8059 /* ??? The frame layout is implementation defined. We describe
8060 standard format 1 (equivalent to a FSTMD insn and unused pad word).
8061 We really need some way of representing the whole block so that the
8062 unwinder can figure it out at runtime. */
8063 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
8064 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1));
8066 reg = gen_rtx_REG (DFmode, base_reg);
8067 base_reg += 2;
8069 XVECEXP (par, 0, 0)
8070 = gen_rtx_SET (VOIDmode,
8071 gen_rtx_MEM (BLKmode,
8072 gen_rtx_PRE_DEC (BLKmode, stack_pointer_rtx)),
8073 gen_rtx_UNSPEC (BLKmode,
8074 gen_rtvec (1, reg),
8075 UNSPEC_PUSH_MULT));
8077 tmp = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
8078 gen_rtx_PLUS (SImode, stack_pointer_rtx,
8079 GEN_INT (-(count * 8 + 4))));
8080 RTX_FRAME_RELATED_P (tmp) = 1;
8081 XVECEXP (dwarf, 0, 0) = tmp;
8083 tmp = gen_rtx_SET (VOIDmode,
8084 gen_rtx_MEM (DFmode, stack_pointer_rtx),
8085 reg);
8086 RTX_FRAME_RELATED_P (tmp) = 1;
8087 XVECEXP (dwarf, 0, 1) = tmp;
8089 for (i = 1; i < count; i++)
8091 reg = gen_rtx_REG (DFmode, base_reg);
8092 base_reg += 2;
8093 XVECEXP (par, 0, i) = gen_rtx_USE (VOIDmode, reg);
8095 tmp = gen_rtx_SET (VOIDmode,
8096 gen_rtx_MEM (DFmode,
8097 gen_rtx_PLUS (SImode,
8098 stack_pointer_rtx,
8099 GEN_INT (i * 8))),
8100 reg);
8101 RTX_FRAME_RELATED_P (tmp) = 1;
8102 XVECEXP (dwarf, 0, i + 1) = tmp;
8105 par = emit_insn (par);
8106 REG_NOTES (par) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, dwarf,
8107 REG_NOTES (par));
8108 RTX_FRAME_RELATED_P (par) = 1;
8110 return count * 8 + 4;
8114 /* Output a 'call' insn. */
8115 const char *
8116 output_call (rtx *operands)
8118 gcc_assert (!arm_arch5); /* Patterns should call blx <reg> directly. */
8120 /* Handle calls to lr using ip (which may be clobbered in subr anyway). */
8121 if (REGNO (operands[0]) == LR_REGNUM)
8123 operands[0] = gen_rtx_REG (SImode, IP_REGNUM);
8124 output_asm_insn ("mov%?\t%0, %|lr", operands);
8127 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
8129 if (TARGET_INTERWORK || arm_arch4t)
8130 output_asm_insn ("bx%?\t%0", operands);
8131 else
8132 output_asm_insn ("mov%?\t%|pc, %0", operands);
8134 return "";
8137 /* Output a 'call' insn that is a reference in memory. */
8138 const char *
8139 output_call_mem (rtx *operands)
8141 if (TARGET_INTERWORK && !arm_arch5)
8143 output_asm_insn ("ldr%?\t%|ip, %0", operands);
8144 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
8145 output_asm_insn ("bx%?\t%|ip", operands);
8147 else if (regno_use_in (LR_REGNUM, operands[0]))
8149 /* LR is used in the memory address. We load the address in the
8150 first instruction. It's safe to use IP as the target of the
8151 load since the call will kill it anyway. */
8152 output_asm_insn ("ldr%?\t%|ip, %0", operands);
8153 if (arm_arch5)
8154 output_asm_insn ("blx%?\t%|ip", operands);
8155 else
8157 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
8158 if (arm_arch4t)
8159 output_asm_insn ("bx%?\t%|ip", operands);
8160 else
8161 output_asm_insn ("mov%?\t%|pc, %|ip", operands);
8164 else
8166 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
8167 output_asm_insn ("ldr%?\t%|pc, %0", operands);
8170 return "";
8174 /* Output a move from arm registers to an fpa registers.
8175 OPERANDS[0] is an fpa register.
8176 OPERANDS[1] is the first registers of an arm register pair. */
8177 const char *
8178 output_mov_long_double_fpa_from_arm (rtx *operands)
8180 int arm_reg0 = REGNO (operands[1]);
8181 rtx ops[3];
8183 gcc_assert (arm_reg0 != IP_REGNUM);
8185 ops[0] = gen_rtx_REG (SImode, arm_reg0);
8186 ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0);
8187 ops[2] = gen_rtx_REG (SImode, 2 + arm_reg0);
8189 output_asm_insn ("stm%?fd\t%|sp!, {%0, %1, %2}", ops);
8190 output_asm_insn ("ldf%?e\t%0, [%|sp], #12", operands);
8192 return "";
8195 /* Output a move from an fpa register to arm registers.
8196 OPERANDS[0] is the first registers of an arm register pair.
8197 OPERANDS[1] is an fpa register. */
8198 const char *
8199 output_mov_long_double_arm_from_fpa (rtx *operands)
8201 int arm_reg0 = REGNO (operands[0]);
8202 rtx ops[3];
8204 gcc_assert (arm_reg0 != IP_REGNUM);
8206 ops[0] = gen_rtx_REG (SImode, arm_reg0);
8207 ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0);
8208 ops[2] = gen_rtx_REG (SImode, 2 + arm_reg0);
8210 output_asm_insn ("stf%?e\t%1, [%|sp, #-12]!", operands);
8211 output_asm_insn ("ldm%?fd\t%|sp!, {%0, %1, %2}", ops);
8212 return "";
8215 /* Output a move from arm registers to arm registers of a long double
8216 OPERANDS[0] is the destination.
8217 OPERANDS[1] is the source. */
8218 const char *
8219 output_mov_long_double_arm_from_arm (rtx *operands)
8221 /* We have to be careful here because the two might overlap. */
8222 int dest_start = REGNO (operands[0]);
8223 int src_start = REGNO (operands[1]);
8224 rtx ops[2];
8225 int i;
8227 if (dest_start < src_start)
8229 for (i = 0; i < 3; i++)
8231 ops[0] = gen_rtx_REG (SImode, dest_start + i);
8232 ops[1] = gen_rtx_REG (SImode, src_start + i);
8233 output_asm_insn ("mov%?\t%0, %1", ops);
8236 else
8238 for (i = 2; i >= 0; i--)
8240 ops[0] = gen_rtx_REG (SImode, dest_start + i);
8241 ops[1] = gen_rtx_REG (SImode, src_start + i);
8242 output_asm_insn ("mov%?\t%0, %1", ops);
8246 return "";
8250 /* Output a move from arm registers to an fpa registers.
8251 OPERANDS[0] is an fpa register.
8252 OPERANDS[1] is the first registers of an arm register pair. */
8253 const char *
8254 output_mov_double_fpa_from_arm (rtx *operands)
8256 int arm_reg0 = REGNO (operands[1]);
8257 rtx ops[2];
8259 gcc_assert (arm_reg0 != IP_REGNUM);
8261 ops[0] = gen_rtx_REG (SImode, arm_reg0);
8262 ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0);
8263 output_asm_insn ("stm%?fd\t%|sp!, {%0, %1}", ops);
8264 output_asm_insn ("ldf%?d\t%0, [%|sp], #8", operands);
8265 return "";
8268 /* Output a move from an fpa register to arm registers.
8269 OPERANDS[0] is the first registers of an arm register pair.
8270 OPERANDS[1] is an fpa register. */
8271 const char *
8272 output_mov_double_arm_from_fpa (rtx *operands)
8274 int arm_reg0 = REGNO (operands[0]);
8275 rtx ops[2];
8277 gcc_assert (arm_reg0 != IP_REGNUM);
8279 ops[0] = gen_rtx_REG (SImode, arm_reg0);
8280 ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0);
8281 output_asm_insn ("stf%?d\t%1, [%|sp, #-8]!", operands);
8282 output_asm_insn ("ldm%?fd\t%|sp!, {%0, %1}", ops);
8283 return "";
8286 /* Output a move between double words.
8287 It must be REG<-REG, REG<-CONST_DOUBLE, REG<-CONST_INT, REG<-MEM
8288 or MEM<-REG and all MEMs must be offsettable addresses. */
8289 const char *
8290 output_move_double (rtx *operands)
8292 enum rtx_code code0 = GET_CODE (operands[0]);
8293 enum rtx_code code1 = GET_CODE (operands[1]);
8294 rtx otherops[3];
8296 if (code0 == REG)
8298 int reg0 = REGNO (operands[0]);
8300 otherops[0] = gen_rtx_REG (SImode, 1 + reg0);
8302 gcc_assert (code1 == MEM); /* Constraints should ensure this. */
8304 switch (GET_CODE (XEXP (operands[1], 0)))
8306 case REG:
8307 output_asm_insn ("ldm%?ia\t%m1, %M0", operands);
8308 break;
8310 case PRE_INC:
8311 gcc_assert (TARGET_LDRD);
8312 output_asm_insn ("ldr%?d\t%0, [%m1, #8]!", operands);
8313 break;
8315 case PRE_DEC:
8316 output_asm_insn ("ldm%?db\t%m1!, %M0", operands);
8317 break;
8319 case POST_INC:
8320 output_asm_insn ("ldm%?ia\t%m1!, %M0", operands);
8321 break;
8323 case POST_DEC:
8324 gcc_assert (TARGET_LDRD);
8325 output_asm_insn ("ldr%?d\t%0, [%m1], #-8", operands);
8326 break;
8328 case PRE_MODIFY:
8329 case POST_MODIFY:
8330 otherops[0] = operands[0];
8331 otherops[1] = XEXP (XEXP (XEXP (operands[1], 0), 1), 0);
8332 otherops[2] = XEXP (XEXP (XEXP (operands[1], 0), 1), 1);
8334 if (GET_CODE (XEXP (operands[1], 0)) == PRE_MODIFY)
8336 if (reg_overlap_mentioned_p (otherops[0], otherops[2]))
8338 /* Registers overlap so split out the increment. */
8339 output_asm_insn ("add%?\t%1, %1, %2", otherops);
8340 output_asm_insn ("ldr%?d\t%0, [%1] @split", otherops);
8342 else
8343 output_asm_insn ("ldr%?d\t%0, [%1, %2]!", otherops);
8345 else
8347 /* We only allow constant increments, so this is safe. */
8348 output_asm_insn ("ldr%?d\t%0, [%1], %2", otherops);
8350 break;
8352 case LABEL_REF:
8353 case CONST:
8354 output_asm_insn ("adr%?\t%0, %1", operands);
8355 output_asm_insn ("ldm%?ia\t%0, %M0", operands);
8356 break;
8358 default:
8359 if (arm_add_operand (XEXP (XEXP (operands[1], 0), 1),
8360 GET_MODE (XEXP (XEXP (operands[1], 0), 1))))
8362 otherops[0] = operands[0];
8363 otherops[1] = XEXP (XEXP (operands[1], 0), 0);
8364 otherops[2] = XEXP (XEXP (operands[1], 0), 1);
8366 if (GET_CODE (XEXP (operands[1], 0)) == PLUS)
8368 if (GET_CODE (otherops[2]) == CONST_INT)
8370 switch ((int) INTVAL (otherops[2]))
8372 case -8:
8373 output_asm_insn ("ldm%?db\t%1, %M0", otherops);
8374 return "";
8375 case -4:
8376 output_asm_insn ("ldm%?da\t%1, %M0", otherops);
8377 return "";
8378 case 4:
8379 output_asm_insn ("ldm%?ib\t%1, %M0", otherops);
8380 return "";
8383 if (TARGET_LDRD
8384 && (GET_CODE (otherops[2]) == REG
8385 || (GET_CODE (otherops[2]) == CONST_INT
8386 && INTVAL (otherops[2]) > -256
8387 && INTVAL (otherops[2]) < 256)))
8389 if (reg_overlap_mentioned_p (otherops[0],
8390 otherops[2]))
8392 /* Swap base and index registers over to
8393 avoid a conflict. */
8394 otherops[1] = XEXP (XEXP (operands[1], 0), 1);
8395 otherops[2] = XEXP (XEXP (operands[1], 0), 0);
8398 /* If both registers conflict, it will usually
8399 have been fixed by a splitter. */
8400 if (reg_overlap_mentioned_p (otherops[0], otherops[2]))
8402 output_asm_insn ("add%?\t%1, %1, %2", otherops);
8403 output_asm_insn ("ldr%?d\t%0, [%1]",
8404 otherops);
8406 else
8407 output_asm_insn ("ldr%?d\t%0, [%1, %2]", otherops);
8408 return "";
8411 if (GET_CODE (otherops[2]) == CONST_INT)
8413 if (!(const_ok_for_arm (INTVAL (otherops[2]))))
8414 output_asm_insn ("sub%?\t%0, %1, #%n2", otherops);
8415 else
8416 output_asm_insn ("add%?\t%0, %1, %2", otherops);
8418 else
8419 output_asm_insn ("add%?\t%0, %1, %2", otherops);
8421 else
8422 output_asm_insn ("sub%?\t%0, %1, %2", otherops);
8424 return "ldm%?ia\t%0, %M0";
8426 else
8428 otherops[1] = adjust_address (operands[1], SImode, 4);
8429 /* Take care of overlapping base/data reg. */
8430 if (reg_mentioned_p (operands[0], operands[1]))
8432 output_asm_insn ("ldr%?\t%0, %1", otherops);
8433 output_asm_insn ("ldr%?\t%0, %1", operands);
8435 else
8437 output_asm_insn ("ldr%?\t%0, %1", operands);
8438 output_asm_insn ("ldr%?\t%0, %1", otherops);
8443 else
8445 /* Constraints should ensure this. */
8446 gcc_assert (code0 == MEM && code1 == REG);
8447 gcc_assert (REGNO (operands[1]) != IP_REGNUM);
8449 switch (GET_CODE (XEXP (operands[0], 0)))
8451 case REG:
8452 output_asm_insn ("stm%?ia\t%m0, %M1", operands);
8453 break;
8455 case PRE_INC:
8456 gcc_assert (TARGET_LDRD);
8457 output_asm_insn ("str%?d\t%1, [%m0, #8]!", operands);
8458 break;
8460 case PRE_DEC:
8461 output_asm_insn ("stm%?db\t%m0!, %M1", operands);
8462 break;
8464 case POST_INC:
8465 output_asm_insn ("stm%?ia\t%m0!, %M1", operands);
8466 break;
8468 case POST_DEC:
8469 gcc_assert (TARGET_LDRD);
8470 output_asm_insn ("str%?d\t%1, [%m0], #-8", operands);
8471 break;
8473 case PRE_MODIFY:
8474 case POST_MODIFY:
8475 otherops[0] = operands[1];
8476 otherops[1] = XEXP (XEXP (XEXP (operands[0], 0), 1), 0);
8477 otherops[2] = XEXP (XEXP (XEXP (operands[0], 0), 1), 1);
8479 if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
8480 output_asm_insn ("str%?d\t%0, [%1, %2]!", otherops);
8481 else
8482 output_asm_insn ("str%?d\t%0, [%1], %2", otherops);
8483 break;
8485 case PLUS:
8486 otherops[2] = XEXP (XEXP (operands[0], 0), 1);
8487 if (GET_CODE (otherops[2]) == CONST_INT)
8489 switch ((int) INTVAL (XEXP (XEXP (operands[0], 0), 1)))
8491 case -8:
8492 output_asm_insn ("stm%?db\t%m0, %M1", operands);
8493 return "";
8495 case -4:
8496 output_asm_insn ("stm%?da\t%m0, %M1", operands);
8497 return "";
8499 case 4:
8500 output_asm_insn ("stm%?ib\t%m0, %M1", operands);
8501 return "";
8504 if (TARGET_LDRD
8505 && (GET_CODE (otherops[2]) == REG
8506 || (GET_CODE (otherops[2]) == CONST_INT
8507 && INTVAL (otherops[2]) > -256
8508 && INTVAL (otherops[2]) < 256)))
8510 otherops[0] = operands[1];
8511 otherops[1] = XEXP (XEXP (operands[0], 0), 0);
8512 output_asm_insn ("str%?d\t%0, [%1, %2]", otherops);
8513 return "";
8515 /* Fall through */
8517 default:
8518 otherops[0] = adjust_address (operands[0], SImode, 4);
8519 otherops[1] = gen_rtx_REG (SImode, 1 + REGNO (operands[1]));
8520 output_asm_insn ("str%?\t%1, %0", operands);
8521 output_asm_insn ("str%?\t%1, %0", otherops);
8525 return "";
8528 /* Output an ADD r, s, #n where n may be too big for one instruction.
8529 If adding zero to one register, output nothing. */
8530 const char *
8531 output_add_immediate (rtx *operands)
8533 HOST_WIDE_INT n = INTVAL (operands[2]);
8535 if (n != 0 || REGNO (operands[0]) != REGNO (operands[1]))
8537 if (n < 0)
8538 output_multi_immediate (operands,
8539 "sub%?\t%0, %1, %2", "sub%?\t%0, %0, %2", 2,
8540 -n);
8541 else
8542 output_multi_immediate (operands,
8543 "add%?\t%0, %1, %2", "add%?\t%0, %0, %2", 2,
8547 return "";
8550 /* Output a multiple immediate operation.
8551 OPERANDS is the vector of operands referred to in the output patterns.
8552 INSTR1 is the output pattern to use for the first constant.
8553 INSTR2 is the output pattern to use for subsequent constants.
8554 IMMED_OP is the index of the constant slot in OPERANDS.
8555 N is the constant value. */
8556 static const char *
8557 output_multi_immediate (rtx *operands, const char *instr1, const char *instr2,
8558 int immed_op, HOST_WIDE_INT n)
8560 #if HOST_BITS_PER_WIDE_INT > 32
8561 n &= 0xffffffff;
8562 #endif
8564 if (n == 0)
8566 /* Quick and easy output. */
8567 operands[immed_op] = const0_rtx;
8568 output_asm_insn (instr1, operands);
8570 else
8572 int i;
8573 const char * instr = instr1;
8575 /* Note that n is never zero here (which would give no output). */
8576 for (i = 0; i < 32; i += 2)
8578 if (n & (3 << i))
8580 operands[immed_op] = GEN_INT (n & (255 << i));
8581 output_asm_insn (instr, operands);
8582 instr = instr2;
8583 i += 6;
8588 return "";
8591 /* Return the appropriate ARM instruction for the operation code.
8592 The returned result should not be overwritten. OP is the rtx of the
8593 operation. SHIFT_FIRST_ARG is TRUE if the first argument of the operator
8594 was shifted. */
8595 const char *
8596 arithmetic_instr (rtx op, int shift_first_arg)
8598 switch (GET_CODE (op))
8600 case PLUS:
8601 return "add";
8603 case MINUS:
8604 return shift_first_arg ? "rsb" : "sub";
8606 case IOR:
8607 return "orr";
8609 case XOR:
8610 return "eor";
8612 case AND:
8613 return "and";
8615 default:
8616 gcc_unreachable ();
8620 /* Ensure valid constant shifts and return the appropriate shift mnemonic
8621 for the operation code. The returned result should not be overwritten.
8622 OP is the rtx code of the shift.
8623 On exit, *AMOUNTP will be -1 if the shift is by a register, or a constant
8624 shift. */
8625 static const char *
8626 shift_op (rtx op, HOST_WIDE_INT *amountp)
8628 const char * mnem;
8629 enum rtx_code code = GET_CODE (op);
8631 switch (GET_CODE (XEXP (op, 1)))
8633 case REG:
8634 case SUBREG:
8635 *amountp = -1;
8636 break;
8638 case CONST_INT:
8639 *amountp = INTVAL (XEXP (op, 1));
8640 break;
8642 default:
8643 gcc_unreachable ();
8646 switch (code)
8648 case ASHIFT:
8649 mnem = "asl";
8650 break;
8652 case ASHIFTRT:
8653 mnem = "asr";
8654 break;
8656 case LSHIFTRT:
8657 mnem = "lsr";
8658 break;
8660 case ROTATE:
8661 gcc_assert (*amountp != -1);
8662 *amountp = 32 - *amountp;
8664 /* Fall through. */
8666 case ROTATERT:
8667 mnem = "ror";
8668 break;
8670 case MULT:
8671 /* We never have to worry about the amount being other than a
8672 power of 2, since this case can never be reloaded from a reg. */
8673 gcc_assert (*amountp != -1);
8674 *amountp = int_log2 (*amountp);
8675 return "asl";
8677 default:
8678 gcc_unreachable ();
8681 if (*amountp != -1)
8683 /* This is not 100% correct, but follows from the desire to merge
8684 multiplication by a power of 2 with the recognizer for a
8685 shift. >=32 is not a valid shift for "asl", so we must try and
8686 output a shift that produces the correct arithmetical result.
8687 Using lsr #32 is identical except for the fact that the carry bit
8688 is not set correctly if we set the flags; but we never use the
8689 carry bit from such an operation, so we can ignore that. */
8690 if (code == ROTATERT)
8691 /* Rotate is just modulo 32. */
8692 *amountp &= 31;
8693 else if (*amountp != (*amountp & 31))
8695 if (code == ASHIFT)
8696 mnem = "lsr";
8697 *amountp = 32;
8700 /* Shifts of 0 are no-ops. */
8701 if (*amountp == 0)
8702 return NULL;
8705 return mnem;
8708 /* Obtain the shift from the POWER of two. */
8710 static HOST_WIDE_INT
8711 int_log2 (HOST_WIDE_INT power)
8713 HOST_WIDE_INT shift = 0;
8715 while ((((HOST_WIDE_INT) 1 << shift) & power) == 0)
8717 gcc_assert (shift <= 31);
8718 shift++;
8721 return shift;
8724 /* Output a .ascii pseudo-op, keeping track of lengths. This is
8725 because /bin/as is horribly restrictive. The judgement about
8726 whether or not each character is 'printable' (and can be output as
8727 is) or not (and must be printed with an octal escape) must be made
8728 with reference to the *host* character set -- the situation is
8729 similar to that discussed in the comments above pp_c_char in
8730 c-pretty-print.c. */
8732 #define MAX_ASCII_LEN 51
8734 void
8735 output_ascii_pseudo_op (FILE *stream, const unsigned char *p, int len)
8737 int i;
8738 int len_so_far = 0;
8740 fputs ("\t.ascii\t\"", stream);
8742 for (i = 0; i < len; i++)
8744 int c = p[i];
8746 if (len_so_far >= MAX_ASCII_LEN)
8748 fputs ("\"\n\t.ascii\t\"", stream);
8749 len_so_far = 0;
8752 if (ISPRINT (c))
8754 if (c == '\\' || c == '\"')
8756 putc ('\\', stream);
8757 len_so_far++;
8759 putc (c, stream);
8760 len_so_far++;
8762 else
8764 fprintf (stream, "\\%03o", c);
8765 len_so_far += 4;
8769 fputs ("\"\n", stream);
8772 /* Compute the register save mask for registers 0 through 12
8773 inclusive. This code is used by arm_compute_save_reg_mask. */
8775 static unsigned long
8776 arm_compute_save_reg0_reg12_mask (void)
8778 unsigned long func_type = arm_current_func_type ();
8779 unsigned long save_reg_mask = 0;
8780 unsigned int reg;
8782 if (IS_INTERRUPT (func_type))
8784 unsigned int max_reg;
8785 /* Interrupt functions must not corrupt any registers,
8786 even call clobbered ones. If this is a leaf function
8787 we can just examine the registers used by the RTL, but
8788 otherwise we have to assume that whatever function is
8789 called might clobber anything, and so we have to save
8790 all the call-clobbered registers as well. */
8791 if (ARM_FUNC_TYPE (func_type) == ARM_FT_FIQ)
8792 /* FIQ handlers have registers r8 - r12 banked, so
8793 we only need to check r0 - r7, Normal ISRs only
8794 bank r14 and r15, so we must check up to r12.
8795 r13 is the stack pointer which is always preserved,
8796 so we do not need to consider it here. */
8797 max_reg = 7;
8798 else
8799 max_reg = 12;
8801 for (reg = 0; reg <= max_reg; reg++)
8802 if (regs_ever_live[reg]
8803 || (! current_function_is_leaf && call_used_regs [reg]))
8804 save_reg_mask |= (1 << reg);
8806 /* Also save the pic base register if necessary. */
8807 if (flag_pic
8808 && !TARGET_SINGLE_PIC_BASE
8809 && current_function_uses_pic_offset_table)
8810 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
8812 else
8814 /* In the normal case we only need to save those registers
8815 which are call saved and which are used by this function. */
8816 for (reg = 0; reg <= 10; reg++)
8817 if (regs_ever_live[reg] && ! call_used_regs [reg])
8818 save_reg_mask |= (1 << reg);
8820 /* Handle the frame pointer as a special case. */
8821 if (! TARGET_APCS_FRAME
8822 && ! frame_pointer_needed
8823 && regs_ever_live[HARD_FRAME_POINTER_REGNUM]
8824 && ! call_used_regs[HARD_FRAME_POINTER_REGNUM])
8825 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
8827 /* If we aren't loading the PIC register,
8828 don't stack it even though it may be live. */
8829 if (flag_pic
8830 && !TARGET_SINGLE_PIC_BASE
8831 && (regs_ever_live[PIC_OFFSET_TABLE_REGNUM]
8832 || current_function_uses_pic_offset_table))
8833 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
8836 /* Save registers so the exception handler can modify them. */
8837 if (current_function_calls_eh_return)
8839 unsigned int i;
8841 for (i = 0; ; i++)
8843 reg = EH_RETURN_DATA_REGNO (i);
8844 if (reg == INVALID_REGNUM)
8845 break;
8846 save_reg_mask |= 1 << reg;
8850 return save_reg_mask;
8853 /* Compute a bit mask of which registers need to be
8854 saved on the stack for the current function. */
8856 static unsigned long
8857 arm_compute_save_reg_mask (void)
8859 unsigned int save_reg_mask = 0;
8860 unsigned long func_type = arm_current_func_type ();
8862 if (IS_NAKED (func_type))
8863 /* This should never really happen. */
8864 return 0;
8866 /* If we are creating a stack frame, then we must save the frame pointer,
8867 IP (which will hold the old stack pointer), LR and the PC. */
8868 if (frame_pointer_needed)
8869 save_reg_mask |=
8870 (1 << ARM_HARD_FRAME_POINTER_REGNUM)
8871 | (1 << IP_REGNUM)
8872 | (1 << LR_REGNUM)
8873 | (1 << PC_REGNUM);
8875 /* Volatile functions do not return, so there
8876 is no need to save any other registers. */
8877 if (IS_VOLATILE (func_type))
8878 return save_reg_mask;
8880 save_reg_mask |= arm_compute_save_reg0_reg12_mask ();
8882 /* Decide if we need to save the link register.
8883 Interrupt routines have their own banked link register,
8884 so they never need to save it.
8885 Otherwise if we do not use the link register we do not need to save
8886 it. If we are pushing other registers onto the stack however, we
8887 can save an instruction in the epilogue by pushing the link register
8888 now and then popping it back into the PC. This incurs extra memory
8889 accesses though, so we only do it when optimizing for size, and only
8890 if we know that we will not need a fancy return sequence. */
8891 if (regs_ever_live [LR_REGNUM]
8892 || (save_reg_mask
8893 && optimize_size
8894 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
8895 && !current_function_calls_eh_return))
8896 save_reg_mask |= 1 << LR_REGNUM;
8898 if (cfun->machine->lr_save_eliminated)
8899 save_reg_mask &= ~ (1 << LR_REGNUM);
8901 if (TARGET_REALLY_IWMMXT
8902 && ((bit_count (save_reg_mask)
8903 + ARM_NUM_INTS (current_function_pretend_args_size)) % 2) != 0)
8905 unsigned int reg;
8907 /* The total number of registers that are going to be pushed
8908 onto the stack is odd. We need to ensure that the stack
8909 is 64-bit aligned before we start to save iWMMXt registers,
8910 and also before we start to create locals. (A local variable
8911 might be a double or long long which we will load/store using
8912 an iWMMXt instruction). Therefore we need to push another
8913 ARM register, so that the stack will be 64-bit aligned. We
8914 try to avoid using the arg registers (r0 -r3) as they might be
8915 used to pass values in a tail call. */
8916 for (reg = 4; reg <= 12; reg++)
8917 if ((save_reg_mask & (1 << reg)) == 0)
8918 break;
8920 if (reg <= 12)
8921 save_reg_mask |= (1 << reg);
8922 else
8924 cfun->machine->sibcall_blocked = 1;
8925 save_reg_mask |= (1 << 3);
8929 return save_reg_mask;
8933 /* Compute a bit mask of which registers need to be
8934 saved on the stack for the current function. */
8935 static unsigned long
8936 thumb_compute_save_reg_mask (void)
8938 unsigned long mask;
8939 unsigned reg;
8941 mask = 0;
8942 for (reg = 0; reg < 12; reg ++)
8943 if (regs_ever_live[reg] && !call_used_regs[reg])
8944 mask |= 1 << reg;
8946 if (flag_pic && !TARGET_SINGLE_PIC_BASE)
8947 mask |= (1 << PIC_OFFSET_TABLE_REGNUM);
8949 if (TARGET_SINGLE_PIC_BASE)
8950 mask &= ~(1 << arm_pic_register);
8952 /* See if we might need r11 for calls to _interwork_r11_call_via_rN(). */
8953 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
8954 mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
8956 /* LR will also be pushed if any lo regs are pushed. */
8957 if (mask & 0xff || thumb_force_lr_save ())
8958 mask |= (1 << LR_REGNUM);
8960 /* Make sure we have a low work register if we need one.
8961 We will need one if we are going to push a high register,
8962 but we are not currently intending to push a low register. */
8963 if ((mask & 0xff) == 0
8964 && ((mask & 0x0f00) || TARGET_BACKTRACE))
8966 /* Use thumb_find_work_register to choose which register
8967 we will use. If the register is live then we will
8968 have to push it. Use LAST_LO_REGNUM as our fallback
8969 choice for the register to select. */
8970 reg = thumb_find_work_register (1 << LAST_LO_REGNUM);
8972 if (! call_used_regs[reg])
8973 mask |= 1 << reg;
8976 return mask;
8980 /* Return the number of bytes required to save VFP registers. */
8981 static int
8982 arm_get_vfp_saved_size (void)
8984 unsigned int regno;
8985 int count;
8986 int saved;
8988 saved = 0;
8989 /* Space for saved VFP registers. */
8990 if (TARGET_HARD_FLOAT && TARGET_VFP)
8992 count = 0;
8993 for (regno = FIRST_VFP_REGNUM;
8994 regno < LAST_VFP_REGNUM;
8995 regno += 2)
8997 if ((!regs_ever_live[regno] || call_used_regs[regno])
8998 && (!regs_ever_live[regno + 1] || call_used_regs[regno + 1]))
9000 if (count > 0)
9002 /* Workaround ARM10 VFPr1 bug. */
9003 if (count == 2 && !arm_arch6)
9004 count++;
9005 saved += count * 8 + 4;
9007 count = 0;
9009 else
9010 count++;
9012 if (count > 0)
9014 if (count == 2 && !arm_arch6)
9015 count++;
9016 saved += count * 8 + 4;
9019 return saved;
9023 /* Generate a function exit sequence. If REALLY_RETURN is false, then do
9024 everything bar the final return instruction. */
9025 const char *
9026 output_return_instruction (rtx operand, int really_return, int reverse)
9028 char conditional[10];
9029 char instr[100];
9030 unsigned reg;
9031 unsigned long live_regs_mask;
9032 unsigned long func_type;
9033 arm_stack_offsets *offsets;
9035 func_type = arm_current_func_type ();
9037 if (IS_NAKED (func_type))
9038 return "";
9040 if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)
9042 /* If this function was declared non-returning, and we have
9043 found a tail call, then we have to trust that the called
9044 function won't return. */
9045 if (really_return)
9047 rtx ops[2];
9049 /* Otherwise, trap an attempted return by aborting. */
9050 ops[0] = operand;
9051 ops[1] = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)"
9052 : "abort");
9053 assemble_external_libcall (ops[1]);
9054 output_asm_insn (reverse ? "bl%D0\t%a1" : "bl%d0\t%a1", ops);
9057 return "";
9060 gcc_assert (!current_function_calls_alloca || really_return);
9062 sprintf (conditional, "%%?%%%c0", reverse ? 'D' : 'd');
9064 return_used_this_function = 1;
9066 live_regs_mask = arm_compute_save_reg_mask ();
9068 if (live_regs_mask)
9070 const char * return_reg;
9072 /* If we do not have any special requirements for function exit
9073 (e.g. interworking, or ISR) then we can load the return address
9074 directly into the PC. Otherwise we must load it into LR. */
9075 if (really_return
9076 && ! TARGET_INTERWORK)
9077 return_reg = reg_names[PC_REGNUM];
9078 else
9079 return_reg = reg_names[LR_REGNUM];
9081 if ((live_regs_mask & (1 << IP_REGNUM)) == (1 << IP_REGNUM))
9083 /* There are three possible reasons for the IP register
9084 being saved. 1) a stack frame was created, in which case
9085 IP contains the old stack pointer, or 2) an ISR routine
9086 corrupted it, or 3) it was saved to align the stack on
9087 iWMMXt. In case 1, restore IP into SP, otherwise just
9088 restore IP. */
9089 if (frame_pointer_needed)
9091 live_regs_mask &= ~ (1 << IP_REGNUM);
9092 live_regs_mask |= (1 << SP_REGNUM);
9094 else
9095 gcc_assert (IS_INTERRUPT (func_type) || TARGET_REALLY_IWMMXT);
9098 /* On some ARM architectures it is faster to use LDR rather than
9099 LDM to load a single register. On other architectures, the
9100 cost is the same. In 26 bit mode, or for exception handlers,
9101 we have to use LDM to load the PC so that the CPSR is also
9102 restored. */
9103 for (reg = 0; reg <= LAST_ARM_REGNUM; reg++)
9104 if (live_regs_mask == (1U << reg))
9105 break;
9107 if (reg <= LAST_ARM_REGNUM
9108 && (reg != LR_REGNUM
9109 || ! really_return
9110 || ! IS_INTERRUPT (func_type)))
9112 sprintf (instr, "ldr%s\t%%|%s, [%%|sp], #4", conditional,
9113 (reg == LR_REGNUM) ? return_reg : reg_names[reg]);
9115 else
9117 char *p;
9118 int first = 1;
9120 /* Generate the load multiple instruction to restore the
9121 registers. Note we can get here, even if
9122 frame_pointer_needed is true, but only if sp already
9123 points to the base of the saved core registers. */
9124 if (live_regs_mask & (1 << SP_REGNUM))
9126 unsigned HOST_WIDE_INT stack_adjust;
9128 offsets = arm_get_frame_offsets ();
9129 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
9130 gcc_assert (stack_adjust == 0 || stack_adjust == 4);
9132 if (stack_adjust && arm_arch5)
9133 sprintf (instr, "ldm%sib\t%%|sp, {", conditional);
9134 else
9136 /* If we can't use ldmib (SA110 bug),
9137 then try to pop r3 instead. */
9138 if (stack_adjust)
9139 live_regs_mask |= 1 << 3;
9140 sprintf (instr, "ldm%sfd\t%%|sp, {", conditional);
9143 else
9144 sprintf (instr, "ldm%sfd\t%%|sp!, {", conditional);
9146 p = instr + strlen (instr);
9148 for (reg = 0; reg <= SP_REGNUM; reg++)
9149 if (live_regs_mask & (1 << reg))
9151 int l = strlen (reg_names[reg]);
9153 if (first)
9154 first = 0;
9155 else
9157 memcpy (p, ", ", 2);
9158 p += 2;
9161 memcpy (p, "%|", 2);
9162 memcpy (p + 2, reg_names[reg], l);
9163 p += l + 2;
9166 if (live_regs_mask & (1 << LR_REGNUM))
9168 sprintf (p, "%s%%|%s}", first ? "" : ", ", return_reg);
9169 /* If returning from an interrupt, restore the CPSR. */
9170 if (IS_INTERRUPT (func_type))
9171 strcat (p, "^");
9173 else
9174 strcpy (p, "}");
9177 output_asm_insn (instr, & operand);
9179 /* See if we need to generate an extra instruction to
9180 perform the actual function return. */
9181 if (really_return
9182 && func_type != ARM_FT_INTERWORKED
9183 && (live_regs_mask & (1 << LR_REGNUM)) != 0)
9185 /* The return has already been handled
9186 by loading the LR into the PC. */
9187 really_return = 0;
9191 if (really_return)
9193 switch ((int) ARM_FUNC_TYPE (func_type))
9195 case ARM_FT_ISR:
9196 case ARM_FT_FIQ:
9197 sprintf (instr, "sub%ss\t%%|pc, %%|lr, #4", conditional);
9198 break;
9200 case ARM_FT_INTERWORKED:
9201 sprintf (instr, "bx%s\t%%|lr", conditional);
9202 break;
9204 case ARM_FT_EXCEPTION:
9205 sprintf (instr, "mov%ss\t%%|pc, %%|lr", conditional);
9206 break;
9208 default:
9209 /* Use bx if it's available. */
9210 if (arm_arch5 || arm_arch4t)
9211 sprintf (instr, "bx%s\t%%|lr", conditional);
9212 else
9213 sprintf (instr, "mov%s\t%%|pc, %%|lr", conditional);
9214 break;
9217 output_asm_insn (instr, & operand);
9220 return "";
9223 /* Write the function name into the code section, directly preceding
9224 the function prologue.
9226 Code will be output similar to this:
9228 .ascii "arm_poke_function_name", 0
9229 .align
9231 .word 0xff000000 + (t1 - t0)
9232 arm_poke_function_name
9233 mov ip, sp
9234 stmfd sp!, {fp, ip, lr, pc}
9235 sub fp, ip, #4
9237 When performing a stack backtrace, code can inspect the value
9238 of 'pc' stored at 'fp' + 0. If the trace function then looks
9239 at location pc - 12 and the top 8 bits are set, then we know
9240 that there is a function name embedded immediately preceding this
9241 location and has length ((pc[-3]) & 0xff000000).
9243 We assume that pc is declared as a pointer to an unsigned long.
9245 It is of no benefit to output the function name if we are assembling
9246 a leaf function. These function types will not contain a stack
9247 backtrace structure, therefore it is not possible to determine the
9248 function name. */
9249 void
9250 arm_poke_function_name (FILE *stream, const char *name)
9252 unsigned long alignlength;
9253 unsigned long length;
9254 rtx x;
9256 length = strlen (name) + 1;
9257 alignlength = ROUND_UP_WORD (length);
9259 ASM_OUTPUT_ASCII (stream, name, length);
9260 ASM_OUTPUT_ALIGN (stream, 2);
9261 x = GEN_INT ((unsigned HOST_WIDE_INT) 0xff000000 + alignlength);
9262 assemble_aligned_integer (UNITS_PER_WORD, x);
9265 /* Place some comments into the assembler stream
9266 describing the current function. */
9267 static void
9268 arm_output_function_prologue (FILE *f, HOST_WIDE_INT frame_size)
9270 unsigned long func_type;
9272 if (!TARGET_ARM)
9274 thumb_output_function_prologue (f, frame_size);
9275 return;
9278 /* Sanity check. */
9279 gcc_assert (!arm_ccfsm_state && !arm_target_insn);
9281 func_type = arm_current_func_type ();
9283 switch ((int) ARM_FUNC_TYPE (func_type))
9285 default:
9286 case ARM_FT_NORMAL:
9287 break;
9288 case ARM_FT_INTERWORKED:
9289 asm_fprintf (f, "\t%@ Function supports interworking.\n");
9290 break;
9291 case ARM_FT_ISR:
9292 asm_fprintf (f, "\t%@ Interrupt Service Routine.\n");
9293 break;
9294 case ARM_FT_FIQ:
9295 asm_fprintf (f, "\t%@ Fast Interrupt Service Routine.\n");
9296 break;
9297 case ARM_FT_EXCEPTION:
9298 asm_fprintf (f, "\t%@ ARM Exception Handler.\n");
9299 break;
9302 if (IS_NAKED (func_type))
9303 asm_fprintf (f, "\t%@ Naked Function: prologue and epilogue provided by programmer.\n");
9305 if (IS_VOLATILE (func_type))
9306 asm_fprintf (f, "\t%@ Volatile: function does not return.\n");
9308 if (IS_NESTED (func_type))
9309 asm_fprintf (f, "\t%@ Nested: function declared inside another function.\n");
9311 asm_fprintf (f, "\t%@ args = %d, pretend = %d, frame = %wd\n",
9312 current_function_args_size,
9313 current_function_pretend_args_size, frame_size);
9315 asm_fprintf (f, "\t%@ frame_needed = %d, uses_anonymous_args = %d\n",
9316 frame_pointer_needed,
9317 cfun->machine->uses_anonymous_args);
9319 if (cfun->machine->lr_save_eliminated)
9320 asm_fprintf (f, "\t%@ link register save eliminated.\n");
9322 if (current_function_calls_eh_return)
9323 asm_fprintf (f, "\t@ Calls __builtin_eh_return.\n");
9325 #ifdef AOF_ASSEMBLER
9326 if (flag_pic)
9327 asm_fprintf (f, "\tmov\t%r, %r\n", IP_REGNUM, PIC_OFFSET_TABLE_REGNUM);
9328 #endif
9330 return_used_this_function = 0;
9333 const char *
9334 arm_output_epilogue (rtx sibling)
9336 int reg;
9337 unsigned long saved_regs_mask;
9338 unsigned long func_type;
9339 /* Floats_offset is the offset from the "virtual" frame. In an APCS
9340 frame that is $fp + 4 for a non-variadic function. */
9341 int floats_offset = 0;
9342 rtx operands[3];
9343 FILE * f = asm_out_file;
9344 unsigned int lrm_count = 0;
9345 int really_return = (sibling == NULL);
9346 int start_reg;
9347 arm_stack_offsets *offsets;
9349 /* If we have already generated the return instruction
9350 then it is futile to generate anything else. */
9351 if (use_return_insn (FALSE, sibling) && return_used_this_function)
9352 return "";
9354 func_type = arm_current_func_type ();
9356 if (IS_NAKED (func_type))
9357 /* Naked functions don't have epilogues. */
9358 return "";
9360 if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)
9362 rtx op;
9364 /* A volatile function should never return. Call abort. */
9365 op = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)" : "abort");
9366 assemble_external_libcall (op);
9367 output_asm_insn ("bl\t%a0", &op);
9369 return "";
9372 /* If we are throwing an exception, then we really must be doing a
9373 return, so we can't tail-call. */
9374 gcc_assert (!current_function_calls_eh_return || really_return);
9376 offsets = arm_get_frame_offsets ();
9377 saved_regs_mask = arm_compute_save_reg_mask ();
9379 if (TARGET_IWMMXT)
9380 lrm_count = bit_count (saved_regs_mask);
9382 floats_offset = offsets->saved_args;
9383 /* Compute how far away the floats will be. */
9384 for (reg = 0; reg <= LAST_ARM_REGNUM; reg++)
9385 if (saved_regs_mask & (1 << reg))
9386 floats_offset += 4;
9388 if (frame_pointer_needed)
9390 /* This variable is for the Virtual Frame Pointer, not VFP regs. */
9391 int vfp_offset = offsets->frame;
9393 if (arm_fpu_arch == FPUTYPE_FPA_EMU2)
9395 for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
9396 if (regs_ever_live[reg] && !call_used_regs[reg])
9398 floats_offset += 12;
9399 asm_fprintf (f, "\tldfe\t%r, [%r, #-%d]\n",
9400 reg, FP_REGNUM, floats_offset - vfp_offset);
9403 else
9405 start_reg = LAST_FPA_REGNUM;
9407 for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
9409 if (regs_ever_live[reg] && !call_used_regs[reg])
9411 floats_offset += 12;
9413 /* We can't unstack more than four registers at once. */
9414 if (start_reg - reg == 3)
9416 asm_fprintf (f, "\tlfm\t%r, 4, [%r, #-%d]\n",
9417 reg, FP_REGNUM, floats_offset - vfp_offset);
9418 start_reg = reg - 1;
9421 else
9423 if (reg != start_reg)
9424 asm_fprintf (f, "\tlfm\t%r, %d, [%r, #-%d]\n",
9425 reg + 1, start_reg - reg,
9426 FP_REGNUM, floats_offset - vfp_offset);
9427 start_reg = reg - 1;
9431 /* Just in case the last register checked also needs unstacking. */
9432 if (reg != start_reg)
9433 asm_fprintf (f, "\tlfm\t%r, %d, [%r, #-%d]\n",
9434 reg + 1, start_reg - reg,
9435 FP_REGNUM, floats_offset - vfp_offset);
9438 if (TARGET_HARD_FLOAT && TARGET_VFP)
9440 int saved_size;
9442 /* The fldmx insn does not have base+offset addressing modes,
9443 so we use IP to hold the address. */
9444 saved_size = arm_get_vfp_saved_size ();
9446 if (saved_size > 0)
9448 floats_offset += saved_size;
9449 asm_fprintf (f, "\tsub\t%r, %r, #%d\n", IP_REGNUM,
9450 FP_REGNUM, floats_offset - vfp_offset);
9452 start_reg = FIRST_VFP_REGNUM;
9453 for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
9455 if ((!regs_ever_live[reg] || call_used_regs[reg])
9456 && (!regs_ever_live[reg + 1] || call_used_regs[reg + 1]))
9458 if (start_reg != reg)
9459 arm_output_fldmx (f, IP_REGNUM,
9460 (start_reg - FIRST_VFP_REGNUM) / 2,
9461 (reg - start_reg) / 2);
9462 start_reg = reg + 2;
9465 if (start_reg != reg)
9466 arm_output_fldmx (f, IP_REGNUM,
9467 (start_reg - FIRST_VFP_REGNUM) / 2,
9468 (reg - start_reg) / 2);
9471 if (TARGET_IWMMXT)
9473 /* The frame pointer is guaranteed to be non-double-word aligned.
9474 This is because it is set to (old_stack_pointer - 4) and the
9475 old_stack_pointer was double word aligned. Thus the offset to
9476 the iWMMXt registers to be loaded must also be non-double-word
9477 sized, so that the resultant address *is* double-word aligned.
9478 We can ignore floats_offset since that was already included in
9479 the live_regs_mask. */
9480 lrm_count += (lrm_count % 2 ? 2 : 1);
9482 for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
9483 if (regs_ever_live[reg] && !call_used_regs[reg])
9485 asm_fprintf (f, "\twldrd\t%r, [%r, #-%d]\n",
9486 reg, FP_REGNUM, lrm_count * 4);
9487 lrm_count += 2;
9491 /* saved_regs_mask should contain the IP, which at the time of stack
9492 frame generation actually contains the old stack pointer. So a
9493 quick way to unwind the stack is just pop the IP register directly
9494 into the stack pointer. */
9495 gcc_assert (saved_regs_mask & (1 << IP_REGNUM));
9496 saved_regs_mask &= ~ (1 << IP_REGNUM);
9497 saved_regs_mask |= (1 << SP_REGNUM);
9499 /* There are two registers left in saved_regs_mask - LR and PC. We
9500 only need to restore the LR register (the return address), but to
9501 save time we can load it directly into the PC, unless we need a
9502 special function exit sequence, or we are not really returning. */
9503 if (really_return
9504 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
9505 && !current_function_calls_eh_return)
9506 /* Delete the LR from the register mask, so that the LR on
9507 the stack is loaded into the PC in the register mask. */
9508 saved_regs_mask &= ~ (1 << LR_REGNUM);
9509 else
9510 saved_regs_mask &= ~ (1 << PC_REGNUM);
9512 /* We must use SP as the base register, because SP is one of the
9513 registers being restored. If an interrupt or page fault
9514 happens in the ldm instruction, the SP might or might not
9515 have been restored. That would be bad, as then SP will no
9516 longer indicate the safe area of stack, and we can get stack
9517 corruption. Using SP as the base register means that it will
9518 be reset correctly to the original value, should an interrupt
9519 occur. If the stack pointer already points at the right
9520 place, then omit the subtraction. */
9521 if (offsets->outgoing_args != (1 + (int) bit_count (saved_regs_mask))
9522 || current_function_calls_alloca)
9523 asm_fprintf (f, "\tsub\t%r, %r, #%d\n", SP_REGNUM, FP_REGNUM,
9524 4 * bit_count (saved_regs_mask));
9525 print_multi_reg (f, "ldmfd\t%r", SP_REGNUM, saved_regs_mask);
9527 if (IS_INTERRUPT (func_type))
9528 /* Interrupt handlers will have pushed the
9529 IP onto the stack, so restore it now. */
9530 print_multi_reg (f, "ldmfd\t%r!", SP_REGNUM, 1 << IP_REGNUM);
9532 else
9534 /* Restore stack pointer if necessary. */
9535 if (offsets->outgoing_args != offsets->saved_regs)
9537 operands[0] = operands[1] = stack_pointer_rtx;
9538 operands[2] = GEN_INT (offsets->outgoing_args - offsets->saved_regs);
9539 output_add_immediate (operands);
9542 if (arm_fpu_arch == FPUTYPE_FPA_EMU2)
9544 for (reg = FIRST_FPA_REGNUM; reg <= LAST_FPA_REGNUM; reg++)
9545 if (regs_ever_live[reg] && !call_used_regs[reg])
9546 asm_fprintf (f, "\tldfe\t%r, [%r], #12\n",
9547 reg, SP_REGNUM);
9549 else
9551 start_reg = FIRST_FPA_REGNUM;
9553 for (reg = FIRST_FPA_REGNUM; reg <= LAST_FPA_REGNUM; reg++)
9555 if (regs_ever_live[reg] && !call_used_regs[reg])
9557 if (reg - start_reg == 3)
9559 asm_fprintf (f, "\tlfmfd\t%r, 4, [%r]!\n",
9560 start_reg, SP_REGNUM);
9561 start_reg = reg + 1;
9564 else
9566 if (reg != start_reg)
9567 asm_fprintf (f, "\tlfmfd\t%r, %d, [%r]!\n",
9568 start_reg, reg - start_reg,
9569 SP_REGNUM);
9571 start_reg = reg + 1;
9575 /* Just in case the last register checked also needs unstacking. */
9576 if (reg != start_reg)
9577 asm_fprintf (f, "\tlfmfd\t%r, %d, [%r]!\n",
9578 start_reg, reg - start_reg, SP_REGNUM);
9581 if (TARGET_HARD_FLOAT && TARGET_VFP)
9583 start_reg = FIRST_VFP_REGNUM;
9584 for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
9586 if ((!regs_ever_live[reg] || call_used_regs[reg])
9587 && (!regs_ever_live[reg + 1] || call_used_regs[reg + 1]))
9589 if (start_reg != reg)
9590 arm_output_fldmx (f, SP_REGNUM,
9591 (start_reg - FIRST_VFP_REGNUM) / 2,
9592 (reg - start_reg) / 2);
9593 start_reg = reg + 2;
9596 if (start_reg != reg)
9597 arm_output_fldmx (f, SP_REGNUM,
9598 (start_reg - FIRST_VFP_REGNUM) / 2,
9599 (reg - start_reg) / 2);
9601 if (TARGET_IWMMXT)
9602 for (reg = FIRST_IWMMXT_REGNUM; reg <= LAST_IWMMXT_REGNUM; reg++)
9603 if (regs_ever_live[reg] && !call_used_regs[reg])
9604 asm_fprintf (f, "\twldrd\t%r, [%r], #8\n", reg, SP_REGNUM);
9606 /* If we can, restore the LR into the PC. */
9607 if (ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
9608 && really_return
9609 && current_function_pretend_args_size == 0
9610 && saved_regs_mask & (1 << LR_REGNUM)
9611 && !current_function_calls_eh_return)
9613 saved_regs_mask &= ~ (1 << LR_REGNUM);
9614 saved_regs_mask |= (1 << PC_REGNUM);
9617 /* Load the registers off the stack. If we only have one register
9618 to load use the LDR instruction - it is faster. */
9619 if (saved_regs_mask == (1 << LR_REGNUM))
9621 asm_fprintf (f, "\tldr\t%r, [%r], #4\n", LR_REGNUM, SP_REGNUM);
9623 else if (saved_regs_mask)
9625 if (saved_regs_mask & (1 << SP_REGNUM))
9626 /* Note - write back to the stack register is not enabled
9627 (i.e. "ldmfd sp!..."). We know that the stack pointer is
9628 in the list of registers and if we add writeback the
9629 instruction becomes UNPREDICTABLE. */
9630 print_multi_reg (f, "ldmfd\t%r", SP_REGNUM, saved_regs_mask);
9631 else
9632 print_multi_reg (f, "ldmfd\t%r!", SP_REGNUM, saved_regs_mask);
9635 if (current_function_pretend_args_size)
9637 /* Unwind the pre-pushed regs. */
9638 operands[0] = operands[1] = stack_pointer_rtx;
9639 operands[2] = GEN_INT (current_function_pretend_args_size);
9640 output_add_immediate (operands);
9644 /* We may have already restored PC directly from the stack. */
9645 if (!really_return || saved_regs_mask & (1 << PC_REGNUM))
9646 return "";
9648 /* Stack adjustment for exception handler. */
9649 if (current_function_calls_eh_return)
9650 asm_fprintf (f, "\tadd\t%r, %r, %r\n", SP_REGNUM, SP_REGNUM,
9651 ARM_EH_STACKADJ_REGNUM);
9653 /* Generate the return instruction. */
9654 switch ((int) ARM_FUNC_TYPE (func_type))
9656 case ARM_FT_ISR:
9657 case ARM_FT_FIQ:
9658 asm_fprintf (f, "\tsubs\t%r, %r, #4\n", PC_REGNUM, LR_REGNUM);
9659 break;
9661 case ARM_FT_EXCEPTION:
9662 asm_fprintf (f, "\tmovs\t%r, %r\n", PC_REGNUM, LR_REGNUM);
9663 break;
9665 case ARM_FT_INTERWORKED:
9666 asm_fprintf (f, "\tbx\t%r\n", LR_REGNUM);
9667 break;
9669 default:
9670 if (arm_arch5 || arm_arch4t)
9671 asm_fprintf (f, "\tbx\t%r\n", LR_REGNUM);
9672 else
9673 asm_fprintf (f, "\tmov\t%r, %r\n", PC_REGNUM, LR_REGNUM);
9674 break;
9677 return "";
9680 static void
9681 arm_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
9682 HOST_WIDE_INT frame_size ATTRIBUTE_UNUSED)
9684 arm_stack_offsets *offsets;
9686 if (TARGET_THUMB)
9688 int regno;
9690 /* Emit any call-via-reg trampolines that are needed for v4t support
9691 of call_reg and call_value_reg type insns. */
9692 for (regno = 0; regno < LR_REGNUM; regno++)
9694 rtx label = cfun->machine->call_via[regno];
9696 if (label != NULL)
9698 function_section (current_function_decl);
9699 targetm.asm_out.internal_label (asm_out_file, "L",
9700 CODE_LABEL_NUMBER (label));
9701 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
9705 /* ??? Probably not safe to set this here, since it assumes that a
9706 function will be emitted as assembly immediately after we generate
9707 RTL for it. This does not happen for inline functions. */
9708 return_used_this_function = 0;
9710 else
9712 /* We need to take into account any stack-frame rounding. */
9713 offsets = arm_get_frame_offsets ();
9715 gcc_assert (!use_return_insn (FALSE, NULL)
9716 || !return_used_this_function
9717 || offsets->saved_regs == offsets->outgoing_args
9718 || frame_pointer_needed);
9720 /* Reset the ARM-specific per-function variables. */
9721 after_arm_reorg = 0;
9725 /* Generate and emit an insn that we will recognize as a push_multi.
9726 Unfortunately, since this insn does not reflect very well the actual
9727 semantics of the operation, we need to annotate the insn for the benefit
9728 of DWARF2 frame unwind information. */
9729 static rtx
9730 emit_multi_reg_push (unsigned long mask)
9732 int num_regs = 0;
9733 int num_dwarf_regs;
9734 int i, j;
9735 rtx par;
9736 rtx dwarf;
9737 int dwarf_par_index;
9738 rtx tmp, reg;
9740 for (i = 0; i <= LAST_ARM_REGNUM; i++)
9741 if (mask & (1 << i))
9742 num_regs++;
9744 gcc_assert (num_regs && num_regs <= 16);
9746 /* We don't record the PC in the dwarf frame information. */
9747 num_dwarf_regs = num_regs;
9748 if (mask & (1 << PC_REGNUM))
9749 num_dwarf_regs--;
9751 /* For the body of the insn we are going to generate an UNSPEC in
9752 parallel with several USEs. This allows the insn to be recognized
9753 by the push_multi pattern in the arm.md file. The insn looks
9754 something like this:
9756 (parallel [
9757 (set (mem:BLK (pre_dec:BLK (reg:SI sp)))
9758 (unspec:BLK [(reg:SI r4)] UNSPEC_PUSH_MULT))
9759 (use (reg:SI 11 fp))
9760 (use (reg:SI 12 ip))
9761 (use (reg:SI 14 lr))
9762 (use (reg:SI 15 pc))
9765 For the frame note however, we try to be more explicit and actually
9766 show each register being stored into the stack frame, plus a (single)
9767 decrement of the stack pointer. We do it this way in order to be
9768 friendly to the stack unwinding code, which only wants to see a single
9769 stack decrement per instruction. The RTL we generate for the note looks
9770 something like this:
9772 (sequence [
9773 (set (reg:SI sp) (plus:SI (reg:SI sp) (const_int -20)))
9774 (set (mem:SI (reg:SI sp)) (reg:SI r4))
9775 (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI fp))
9776 (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI ip))
9777 (set (mem:SI (plus:SI (reg:SI sp) (const_int 12))) (reg:SI lr))
9780 This sequence is used both by the code to support stack unwinding for
9781 exceptions handlers and the code to generate dwarf2 frame debugging. */
9783 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs));
9784 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_dwarf_regs + 1));
9785 dwarf_par_index = 1;
9787 for (i = 0; i <= LAST_ARM_REGNUM; i++)
9789 if (mask & (1 << i))
9791 reg = gen_rtx_REG (SImode, i);
9793 XVECEXP (par, 0, 0)
9794 = gen_rtx_SET (VOIDmode,
9795 gen_rtx_MEM (BLKmode,
9796 gen_rtx_PRE_DEC (BLKmode,
9797 stack_pointer_rtx)),
9798 gen_rtx_UNSPEC (BLKmode,
9799 gen_rtvec (1, reg),
9800 UNSPEC_PUSH_MULT));
9802 if (i != PC_REGNUM)
9804 tmp = gen_rtx_SET (VOIDmode,
9805 gen_rtx_MEM (SImode, stack_pointer_rtx),
9806 reg);
9807 RTX_FRAME_RELATED_P (tmp) = 1;
9808 XVECEXP (dwarf, 0, dwarf_par_index) = tmp;
9809 dwarf_par_index++;
9812 break;
9816 for (j = 1, i++; j < num_regs; i++)
9818 if (mask & (1 << i))
9820 reg = gen_rtx_REG (SImode, i);
9822 XVECEXP (par, 0, j) = gen_rtx_USE (VOIDmode, reg);
9824 if (i != PC_REGNUM)
9826 tmp = gen_rtx_SET (VOIDmode,
9827 gen_rtx_MEM (SImode,
9828 plus_constant (stack_pointer_rtx,
9829 4 * j)),
9830 reg);
9831 RTX_FRAME_RELATED_P (tmp) = 1;
9832 XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
9835 j++;
9839 par = emit_insn (par);
9841 tmp = gen_rtx_SET (SImode,
9842 stack_pointer_rtx,
9843 gen_rtx_PLUS (SImode,
9844 stack_pointer_rtx,
9845 GEN_INT (-4 * num_regs)));
9846 RTX_FRAME_RELATED_P (tmp) = 1;
9847 XVECEXP (dwarf, 0, 0) = tmp;
9849 REG_NOTES (par) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, dwarf,
9850 REG_NOTES (par));
9851 return par;
9854 static rtx
9855 emit_sfm (int base_reg, int count)
9857 rtx par;
9858 rtx dwarf;
9859 rtx tmp, reg;
9860 int i;
9862 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
9863 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1));
9865 reg = gen_rtx_REG (XFmode, base_reg++);
9867 XVECEXP (par, 0, 0)
9868 = gen_rtx_SET (VOIDmode,
9869 gen_rtx_MEM (BLKmode,
9870 gen_rtx_PRE_DEC (BLKmode, stack_pointer_rtx)),
9871 gen_rtx_UNSPEC (BLKmode,
9872 gen_rtvec (1, reg),
9873 UNSPEC_PUSH_MULT));
9874 tmp = gen_rtx_SET (VOIDmode,
9875 gen_rtx_MEM (XFmode, stack_pointer_rtx), reg);
9876 RTX_FRAME_RELATED_P (tmp) = 1;
9877 XVECEXP (dwarf, 0, 1) = tmp;
9879 for (i = 1; i < count; i++)
9881 reg = gen_rtx_REG (XFmode, base_reg++);
9882 XVECEXP (par, 0, i) = gen_rtx_USE (VOIDmode, reg);
9884 tmp = gen_rtx_SET (VOIDmode,
9885 gen_rtx_MEM (XFmode,
9886 plus_constant (stack_pointer_rtx,
9887 i * 12)),
9888 reg);
9889 RTX_FRAME_RELATED_P (tmp) = 1;
9890 XVECEXP (dwarf, 0, i + 1) = tmp;
9893 tmp = gen_rtx_SET (VOIDmode,
9894 stack_pointer_rtx,
9895 gen_rtx_PLUS (SImode,
9896 stack_pointer_rtx,
9897 GEN_INT (-12 * count)));
9898 RTX_FRAME_RELATED_P (tmp) = 1;
9899 XVECEXP (dwarf, 0, 0) = tmp;
9901 par = emit_insn (par);
9902 REG_NOTES (par) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, dwarf,
9903 REG_NOTES (par));
9904 return par;
9908 /* Return true if the current function needs to save/restore LR. */
9910 static bool
9911 thumb_force_lr_save (void)
9913 return !cfun->machine->lr_save_eliminated
9914 && (!leaf_function_p ()
9915 || thumb_far_jump_used_p ()
9916 || regs_ever_live [LR_REGNUM]);
9920 /* Compute the distance from register FROM to register TO.
9921 These can be the arg pointer (26), the soft frame pointer (25),
9922 the stack pointer (13) or the hard frame pointer (11).
9923 In thumb mode r7 is used as the soft frame pointer, if needed.
9924 Typical stack layout looks like this:
9926 old stack pointer -> | |
9927 ----
9928 | | \
9929 | | saved arguments for
9930 | | vararg functions
9931 | | /
9933 hard FP & arg pointer -> | | \
9934 | | stack
9935 | | frame
9936 | | /
9938 | | \
9939 | | call saved
9940 | | registers
9941 soft frame pointer -> | | /
9943 | | \
9944 | | local
9945 | | variables
9946 | | /
9948 | | \
9949 | | outgoing
9950 | | arguments
9951 current stack pointer -> | | /
9954 For a given function some or all of these stack components
9955 may not be needed, giving rise to the possibility of
9956 eliminating some of the registers.
9958 The values returned by this function must reflect the behavior
9959 of arm_expand_prologue() and arm_compute_save_reg_mask().
9961 The sign of the number returned reflects the direction of stack
9962 growth, so the values are positive for all eliminations except
9963 from the soft frame pointer to the hard frame pointer.
9965 SFP may point just inside the local variables block to ensure correct
9966 alignment. */
9969 /* Calculate stack offsets. These are used to calculate register elimination
9970 offsets and in prologue/epilogue code. */
9972 static arm_stack_offsets *
9973 arm_get_frame_offsets (void)
9975 struct arm_stack_offsets *offsets;
9976 unsigned long func_type;
9977 int leaf;
9978 int saved;
9979 HOST_WIDE_INT frame_size;
9981 offsets = &cfun->machine->stack_offsets;
9983 /* We need to know if we are a leaf function. Unfortunately, it
9984 is possible to be called after start_sequence has been called,
9985 which causes get_insns to return the insns for the sequence,
9986 not the function, which will cause leaf_function_p to return
9987 the incorrect result.
9989 to know about leaf functions once reload has completed, and the
9990 frame size cannot be changed after that time, so we can safely
9991 use the cached value. */
9993 if (reload_completed)
9994 return offsets;
9996 /* Initially this is the size of the local variables. It will translated
9997 into an offset once we have determined the size of preceding data. */
9998 frame_size = ROUND_UP_WORD (get_frame_size ());
10000 leaf = leaf_function_p ();
10002 /* Space for variadic functions. */
10003 offsets->saved_args = current_function_pretend_args_size;
10005 offsets->frame = offsets->saved_args + (frame_pointer_needed ? 4 : 0);
10007 if (TARGET_ARM)
10009 unsigned int regno;
10011 saved = bit_count (arm_compute_save_reg_mask ()) * 4;
10013 /* We know that SP will be doubleword aligned on entry, and we must
10014 preserve that condition at any subroutine call. We also require the
10015 soft frame pointer to be doubleword aligned. */
10017 if (TARGET_REALLY_IWMMXT)
10019 /* Check for the call-saved iWMMXt registers. */
10020 for (regno = FIRST_IWMMXT_REGNUM;
10021 regno <= LAST_IWMMXT_REGNUM;
10022 regno++)
10023 if (regs_ever_live [regno] && ! call_used_regs [regno])
10024 saved += 8;
10027 func_type = arm_current_func_type ();
10028 if (! IS_VOLATILE (func_type))
10030 /* Space for saved FPA registers. */
10031 for (regno = FIRST_FPA_REGNUM; regno <= LAST_FPA_REGNUM; regno++)
10032 if (regs_ever_live[regno] && ! call_used_regs[regno])
10033 saved += 12;
10035 /* Space for saved VFP registers. */
10036 if (TARGET_HARD_FLOAT && TARGET_VFP)
10037 saved += arm_get_vfp_saved_size ();
10040 else /* TARGET_THUMB */
10042 saved = bit_count (thumb_compute_save_reg_mask ()) * 4;
10043 if (TARGET_BACKTRACE)
10044 saved += 16;
10047 /* Saved registers include the stack frame. */
10048 offsets->saved_regs = offsets->saved_args + saved;
10049 offsets->soft_frame = offsets->saved_regs + CALLER_INTERWORKING_SLOT_SIZE;
10050 /* A leaf function does not need any stack alignment if it has nothing
10051 on the stack. */
10052 if (leaf && frame_size == 0)
10054 offsets->outgoing_args = offsets->soft_frame;
10055 return offsets;
10058 /* Ensure SFP has the correct alignment. */
10059 if (ARM_DOUBLEWORD_ALIGN
10060 && (offsets->soft_frame & 7))
10061 offsets->soft_frame += 4;
10063 offsets->outgoing_args = offsets->soft_frame + frame_size
10064 + current_function_outgoing_args_size;
10066 if (ARM_DOUBLEWORD_ALIGN)
10068 /* Ensure SP remains doubleword aligned. */
10069 if (offsets->outgoing_args & 7)
10070 offsets->outgoing_args += 4;
10071 gcc_assert (!(offsets->outgoing_args & 7));
10074 return offsets;
10078 /* Calculate the relative offsets for the different stack pointers. Positive
10079 offsets are in the direction of stack growth. */
10081 HOST_WIDE_INT
10082 arm_compute_initial_elimination_offset (unsigned int from, unsigned int to)
10084 arm_stack_offsets *offsets;
10086 offsets = arm_get_frame_offsets ();
10088 /* OK, now we have enough information to compute the distances.
10089 There must be an entry in these switch tables for each pair
10090 of registers in ELIMINABLE_REGS, even if some of the entries
10091 seem to be redundant or useless. */
10092 switch (from)
10094 case ARG_POINTER_REGNUM:
10095 switch (to)
10097 case THUMB_HARD_FRAME_POINTER_REGNUM:
10098 return 0;
10100 case FRAME_POINTER_REGNUM:
10101 /* This is the reverse of the soft frame pointer
10102 to hard frame pointer elimination below. */
10103 return offsets->soft_frame - offsets->saved_args;
10105 case ARM_HARD_FRAME_POINTER_REGNUM:
10106 /* If there is no stack frame then the hard
10107 frame pointer and the arg pointer coincide. */
10108 if (offsets->frame == offsets->saved_regs)
10109 return 0;
10110 /* FIXME: Not sure about this. Maybe we should always return 0 ? */
10111 return (frame_pointer_needed
10112 && cfun->static_chain_decl != NULL
10113 && ! cfun->machine->uses_anonymous_args) ? 4 : 0;
10115 case STACK_POINTER_REGNUM:
10116 /* If nothing has been pushed on the stack at all
10117 then this will return -4. This *is* correct! */
10118 return offsets->outgoing_args - (offsets->saved_args + 4);
10120 default:
10121 gcc_unreachable ();
10123 gcc_unreachable ();
10125 case FRAME_POINTER_REGNUM:
10126 switch (to)
10128 case THUMB_HARD_FRAME_POINTER_REGNUM:
10129 return 0;
10131 case ARM_HARD_FRAME_POINTER_REGNUM:
10132 /* The hard frame pointer points to the top entry in the
10133 stack frame. The soft frame pointer to the bottom entry
10134 in the stack frame. If there is no stack frame at all,
10135 then they are identical. */
10137 return offsets->frame - offsets->soft_frame;
10139 case STACK_POINTER_REGNUM:
10140 return offsets->outgoing_args - offsets->soft_frame;
10142 default:
10143 gcc_unreachable ();
10145 gcc_unreachable ();
10147 default:
10148 /* You cannot eliminate from the stack pointer.
10149 In theory you could eliminate from the hard frame
10150 pointer to the stack pointer, but this will never
10151 happen, since if a stack frame is not needed the
10152 hard frame pointer will never be used. */
10153 gcc_unreachable ();
10158 /* Generate the prologue instructions for entry into an ARM function. */
10159 void
10160 arm_expand_prologue (void)
10162 int reg;
10163 rtx amount;
10164 rtx insn;
10165 rtx ip_rtx;
10166 unsigned long live_regs_mask;
10167 unsigned long func_type;
10168 int fp_offset = 0;
10169 int saved_pretend_args = 0;
10170 int saved_regs = 0;
10171 unsigned HOST_WIDE_INT args_to_push;
10172 arm_stack_offsets *offsets;
10174 func_type = arm_current_func_type ();
10176 /* Naked functions don't have prologues. */
10177 if (IS_NAKED (func_type))
10178 return;
10180 /* Make a copy of c_f_p_a_s as we may need to modify it locally. */
10181 args_to_push = current_function_pretend_args_size;
10183 /* Compute which register we will have to save onto the stack. */
10184 live_regs_mask = arm_compute_save_reg_mask ();
10186 ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
10188 if (frame_pointer_needed)
10190 if (IS_INTERRUPT (func_type))
10192 /* Interrupt functions must not corrupt any registers.
10193 Creating a frame pointer however, corrupts the IP
10194 register, so we must push it first. */
10195 insn = emit_multi_reg_push (1 << IP_REGNUM);
10197 /* Do not set RTX_FRAME_RELATED_P on this insn.
10198 The dwarf stack unwinding code only wants to see one
10199 stack decrement per function, and this is not it. If
10200 this instruction is labeled as being part of the frame
10201 creation sequence then dwarf2out_frame_debug_expr will
10202 die when it encounters the assignment of IP to FP
10203 later on, since the use of SP here establishes SP as
10204 the CFA register and not IP.
10206 Anyway this instruction is not really part of the stack
10207 frame creation although it is part of the prologue. */
10209 else if (IS_NESTED (func_type))
10211 /* The Static chain register is the same as the IP register
10212 used as a scratch register during stack frame creation.
10213 To get around this need to find somewhere to store IP
10214 whilst the frame is being created. We try the following
10215 places in order:
10217 1. The last argument register.
10218 2. A slot on the stack above the frame. (This only
10219 works if the function is not a varargs function).
10220 3. Register r3, after pushing the argument registers
10221 onto the stack.
10223 Note - we only need to tell the dwarf2 backend about the SP
10224 adjustment in the second variant; the static chain register
10225 doesn't need to be unwound, as it doesn't contain a value
10226 inherited from the caller. */
10228 if (regs_ever_live[3] == 0)
10230 insn = gen_rtx_REG (SImode, 3);
10231 insn = gen_rtx_SET (SImode, insn, ip_rtx);
10232 insn = emit_insn (insn);
10234 else if (args_to_push == 0)
10236 rtx dwarf;
10237 insn = gen_rtx_PRE_DEC (SImode, stack_pointer_rtx);
10238 insn = gen_rtx_MEM (SImode, insn);
10239 insn = gen_rtx_SET (VOIDmode, insn, ip_rtx);
10240 insn = emit_insn (insn);
10242 fp_offset = 4;
10244 /* Just tell the dwarf backend that we adjusted SP. */
10245 dwarf = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10246 gen_rtx_PLUS (SImode, stack_pointer_rtx,
10247 GEN_INT (-fp_offset)));
10248 RTX_FRAME_RELATED_P (insn) = 1;
10249 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
10250 dwarf, REG_NOTES (insn));
10252 else
10254 /* Store the args on the stack. */
10255 if (cfun->machine->uses_anonymous_args)
10256 insn = emit_multi_reg_push
10257 ((0xf0 >> (args_to_push / 4)) & 0xf);
10258 else
10259 insn = emit_insn
10260 (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
10261 GEN_INT (- args_to_push)));
10263 RTX_FRAME_RELATED_P (insn) = 1;
10265 saved_pretend_args = 1;
10266 fp_offset = args_to_push;
10267 args_to_push = 0;
10269 /* Now reuse r3 to preserve IP. */
10270 insn = gen_rtx_REG (SImode, 3);
10271 insn = gen_rtx_SET (SImode, insn, ip_rtx);
10272 (void) emit_insn (insn);
10276 if (fp_offset)
10278 insn = gen_rtx_PLUS (SImode, stack_pointer_rtx, GEN_INT (fp_offset));
10279 insn = gen_rtx_SET (SImode, ip_rtx, insn);
10281 else
10282 insn = gen_movsi (ip_rtx, stack_pointer_rtx);
10284 insn = emit_insn (insn);
10285 RTX_FRAME_RELATED_P (insn) = 1;
10288 if (args_to_push)
10290 /* Push the argument registers, or reserve space for them. */
10291 if (cfun->machine->uses_anonymous_args)
10292 insn = emit_multi_reg_push
10293 ((0xf0 >> (args_to_push / 4)) & 0xf);
10294 else
10295 insn = emit_insn
10296 (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
10297 GEN_INT (- args_to_push)));
10298 RTX_FRAME_RELATED_P (insn) = 1;
10301 /* If this is an interrupt service routine, and the link register
10302 is going to be pushed, and we are not creating a stack frame,
10303 (which would involve an extra push of IP and a pop in the epilogue)
10304 subtracting four from LR now will mean that the function return
10305 can be done with a single instruction. */
10306 if ((func_type == ARM_FT_ISR || func_type == ARM_FT_FIQ)
10307 && (live_regs_mask & (1 << LR_REGNUM)) != 0
10308 && ! frame_pointer_needed)
10309 emit_insn (gen_rtx_SET (SImode,
10310 gen_rtx_REG (SImode, LR_REGNUM),
10311 gen_rtx_PLUS (SImode,
10312 gen_rtx_REG (SImode, LR_REGNUM),
10313 GEN_INT (-4))));
10315 if (live_regs_mask)
10317 insn = emit_multi_reg_push (live_regs_mask);
10318 saved_regs += bit_count (live_regs_mask) * 4;
10319 RTX_FRAME_RELATED_P (insn) = 1;
10322 if (TARGET_IWMMXT)
10323 for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
10324 if (regs_ever_live[reg] && ! call_used_regs [reg])
10326 insn = gen_rtx_PRE_DEC (V2SImode, stack_pointer_rtx);
10327 insn = gen_rtx_MEM (V2SImode, insn);
10328 insn = emit_insn (gen_rtx_SET (VOIDmode, insn,
10329 gen_rtx_REG (V2SImode, reg)));
10330 RTX_FRAME_RELATED_P (insn) = 1;
10331 saved_regs += 8;
10334 if (! IS_VOLATILE (func_type))
10336 int start_reg;
10338 /* Save any floating point call-saved registers used by this
10339 function. */
10340 if (arm_fpu_arch == FPUTYPE_FPA_EMU2)
10342 for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
10343 if (regs_ever_live[reg] && !call_used_regs[reg])
10345 insn = gen_rtx_PRE_DEC (XFmode, stack_pointer_rtx);
10346 insn = gen_rtx_MEM (XFmode, insn);
10347 insn = emit_insn (gen_rtx_SET (VOIDmode, insn,
10348 gen_rtx_REG (XFmode, reg)));
10349 RTX_FRAME_RELATED_P (insn) = 1;
10350 saved_regs += 12;
10353 else
10355 start_reg = LAST_FPA_REGNUM;
10357 for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
10359 if (regs_ever_live[reg] && !call_used_regs[reg])
10361 if (start_reg - reg == 3)
10363 insn = emit_sfm (reg, 4);
10364 RTX_FRAME_RELATED_P (insn) = 1;
10365 saved_regs += 48;
10366 start_reg = reg - 1;
10369 else
10371 if (start_reg != reg)
10373 insn = emit_sfm (reg + 1, start_reg - reg);
10374 RTX_FRAME_RELATED_P (insn) = 1;
10375 saved_regs += (start_reg - reg) * 12;
10377 start_reg = reg - 1;
10381 if (start_reg != reg)
10383 insn = emit_sfm (reg + 1, start_reg - reg);
10384 saved_regs += (start_reg - reg) * 12;
10385 RTX_FRAME_RELATED_P (insn) = 1;
10388 if (TARGET_HARD_FLOAT && TARGET_VFP)
10390 start_reg = FIRST_VFP_REGNUM;
10392 for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
10394 if ((!regs_ever_live[reg] || call_used_regs[reg])
10395 && (!regs_ever_live[reg + 1] || call_used_regs[reg + 1]))
10397 if (start_reg != reg)
10398 saved_regs += vfp_emit_fstmx (start_reg,
10399 (reg - start_reg) / 2);
10400 start_reg = reg + 2;
10403 if (start_reg != reg)
10404 saved_regs += vfp_emit_fstmx (start_reg,
10405 (reg - start_reg) / 2);
10409 if (frame_pointer_needed)
10411 /* Create the new frame pointer. */
10412 insn = GEN_INT (-(4 + args_to_push + fp_offset));
10413 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx, ip_rtx, insn));
10414 RTX_FRAME_RELATED_P (insn) = 1;
10416 if (IS_NESTED (func_type))
10418 /* Recover the static chain register. */
10419 if (regs_ever_live [3] == 0
10420 || saved_pretend_args)
10421 insn = gen_rtx_REG (SImode, 3);
10422 else /* if (current_function_pretend_args_size == 0) */
10424 insn = gen_rtx_PLUS (SImode, hard_frame_pointer_rtx,
10425 GEN_INT (4));
10426 insn = gen_rtx_MEM (SImode, insn);
10429 emit_insn (gen_rtx_SET (SImode, ip_rtx, insn));
10430 /* Add a USE to stop propagate_one_insn() from barfing. */
10431 emit_insn (gen_prologue_use (ip_rtx));
10435 offsets = arm_get_frame_offsets ();
10436 if (offsets->outgoing_args != offsets->saved_args + saved_regs)
10438 /* This add can produce multiple insns for a large constant, so we
10439 need to get tricky. */
10440 rtx last = get_last_insn ();
10442 amount = GEN_INT (offsets->saved_args + saved_regs
10443 - offsets->outgoing_args);
10445 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
10446 amount));
10449 last = last ? NEXT_INSN (last) : get_insns ();
10450 RTX_FRAME_RELATED_P (last) = 1;
10452 while (last != insn);
10454 /* If the frame pointer is needed, emit a special barrier that
10455 will prevent the scheduler from moving stores to the frame
10456 before the stack adjustment. */
10457 if (frame_pointer_needed)
10458 insn = emit_insn (gen_stack_tie (stack_pointer_rtx,
10459 hard_frame_pointer_rtx));
10463 if (flag_pic)
10464 arm_load_pic_register (INVALID_REGNUM);
10466 /* If we are profiling, make sure no instructions are scheduled before
10467 the call to mcount. Similarly if the user has requested no
10468 scheduling in the prolog. */
10469 if (current_function_profile || !TARGET_SCHED_PROLOG)
10470 emit_insn (gen_blockage ());
10472 /* If the link register is being kept alive, with the return address in it,
10473 then make sure that it does not get reused by the ce2 pass. */
10474 if ((live_regs_mask & (1 << LR_REGNUM)) == 0)
10476 emit_insn (gen_prologue_use (gen_rtx_REG (SImode, LR_REGNUM)));
10477 cfun->machine->lr_save_eliminated = 1;
10481 /* If CODE is 'd', then the X is a condition operand and the instruction
10482 should only be executed if the condition is true.
10483 if CODE is 'D', then the X is a condition operand and the instruction
10484 should only be executed if the condition is false: however, if the mode
10485 of the comparison is CCFPEmode, then always execute the instruction -- we
10486 do this because in these circumstances !GE does not necessarily imply LT;
10487 in these cases the instruction pattern will take care to make sure that
10488 an instruction containing %d will follow, thereby undoing the effects of
10489 doing this instruction unconditionally.
10490 If CODE is 'N' then X is a floating point operand that must be negated
10491 before output.
10492 If CODE is 'B' then output a bitwise inverted value of X (a const int).
10493 If X is a REG and CODE is `M', output a ldm/stm style multi-reg. */
10494 void
10495 arm_print_operand (FILE *stream, rtx x, int code)
10497 switch (code)
10499 case '@':
10500 fputs (ASM_COMMENT_START, stream);
10501 return;
10503 case '_':
10504 fputs (user_label_prefix, stream);
10505 return;
10507 case '|':
10508 fputs (REGISTER_PREFIX, stream);
10509 return;
10511 case '?':
10512 if (arm_ccfsm_state == 3 || arm_ccfsm_state == 4)
10514 if (TARGET_THUMB)
10516 output_operand_lossage ("predicated Thumb instruction");
10517 break;
10519 if (current_insn_predicate != NULL)
10521 output_operand_lossage
10522 ("predicated instruction in conditional sequence");
10523 break;
10526 fputs (arm_condition_codes[arm_current_cc], stream);
10528 else if (current_insn_predicate)
10530 enum arm_cond_code code;
10532 if (TARGET_THUMB)
10534 output_operand_lossage ("predicated Thumb instruction");
10535 break;
10538 code = get_arm_condition_code (current_insn_predicate);
10539 fputs (arm_condition_codes[code], stream);
10541 return;
10543 case 'N':
10545 REAL_VALUE_TYPE r;
10546 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
10547 r = REAL_VALUE_NEGATE (r);
10548 fprintf (stream, "%s", fp_const_from_val (&r));
10550 return;
10552 case 'B':
10553 if (GET_CODE (x) == CONST_INT)
10555 HOST_WIDE_INT val;
10556 val = ARM_SIGN_EXTEND (~INTVAL (x));
10557 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, val);
10559 else
10561 putc ('~', stream);
10562 output_addr_const (stream, x);
10564 return;
10566 case 'i':
10567 fprintf (stream, "%s", arithmetic_instr (x, 1));
10568 return;
10570 /* Truncate Cirrus shift counts. */
10571 case 's':
10572 if (GET_CODE (x) == CONST_INT)
10574 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 0x3f);
10575 return;
10577 arm_print_operand (stream, x, 0);
10578 return;
10580 case 'I':
10581 fprintf (stream, "%s", arithmetic_instr (x, 0));
10582 return;
10584 case 'S':
10586 HOST_WIDE_INT val;
10587 const char * shift = shift_op (x, &val);
10589 if (shift)
10591 fprintf (stream, ", %s ", shift_op (x, &val));
10592 if (val == -1)
10593 arm_print_operand (stream, XEXP (x, 1), 0);
10594 else
10595 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
10598 return;
10600 /* An explanation of the 'Q', 'R' and 'H' register operands:
10602 In a pair of registers containing a DI or DF value the 'Q'
10603 operand returns the register number of the register containing
10604 the least significant part of the value. The 'R' operand returns
10605 the register number of the register containing the most
10606 significant part of the value.
10608 The 'H' operand returns the higher of the two register numbers.
10609 On a run where WORDS_BIG_ENDIAN is true the 'H' operand is the
10610 same as the 'Q' operand, since the most significant part of the
10611 value is held in the lower number register. The reverse is true
10612 on systems where WORDS_BIG_ENDIAN is false.
10614 The purpose of these operands is to distinguish between cases
10615 where the endian-ness of the values is important (for example
10616 when they are added together), and cases where the endian-ness
10617 is irrelevant, but the order of register operations is important.
10618 For example when loading a value from memory into a register
10619 pair, the endian-ness does not matter. Provided that the value
10620 from the lower memory address is put into the lower numbered
10621 register, and the value from the higher address is put into the
10622 higher numbered register, the load will work regardless of whether
10623 the value being loaded is big-wordian or little-wordian. The
10624 order of the two register loads can matter however, if the address
10625 of the memory location is actually held in one of the registers
10626 being overwritten by the load. */
10627 case 'Q':
10628 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
10630 output_operand_lossage ("invalid operand for code '%c'", code);
10631 return;
10634 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 1 : 0));
10635 return;
10637 case 'R':
10638 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
10640 output_operand_lossage ("invalid operand for code '%c'", code);
10641 return;
10644 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 0 : 1));
10645 return;
10647 case 'H':
10648 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
10650 output_operand_lossage ("invalid operand for code '%c'", code);
10651 return;
10654 asm_fprintf (stream, "%r", REGNO (x) + 1);
10655 return;
10657 case 'm':
10658 asm_fprintf (stream, "%r",
10659 GET_CODE (XEXP (x, 0)) == REG
10660 ? REGNO (XEXP (x, 0)) : REGNO (XEXP (XEXP (x, 0), 0)));
10661 return;
10663 case 'M':
10664 asm_fprintf (stream, "{%r-%r}",
10665 REGNO (x),
10666 REGNO (x) + ARM_NUM_REGS (GET_MODE (x)) - 1);
10667 return;
10669 case 'd':
10670 /* CONST_TRUE_RTX means always -- that's the default. */
10671 if (x == const_true_rtx)
10672 return;
10674 if (!COMPARISON_P (x))
10676 output_operand_lossage ("invalid operand for code '%c'", code);
10677 return;
10680 fputs (arm_condition_codes[get_arm_condition_code (x)],
10681 stream);
10682 return;
10684 case 'D':
10685 /* CONST_TRUE_RTX means not always -- i.e. never. We shouldn't ever
10686 want to do that. */
10687 if (x == const_true_rtx)
10689 output_operand_lossage ("instruction never exectued");
10690 return;
10692 if (!COMPARISON_P (x))
10694 output_operand_lossage ("invalid operand for code '%c'", code);
10695 return;
10698 fputs (arm_condition_codes[ARM_INVERSE_CONDITION_CODE
10699 (get_arm_condition_code (x))],
10700 stream);
10701 return;
10703 /* Cirrus registers can be accessed in a variety of ways:
10704 single floating point (f)
10705 double floating point (d)
10706 32bit integer (fx)
10707 64bit integer (dx). */
10708 case 'W': /* Cirrus register in F mode. */
10709 case 'X': /* Cirrus register in D mode. */
10710 case 'Y': /* Cirrus register in FX mode. */
10711 case 'Z': /* Cirrus register in DX mode. */
10712 gcc_assert (GET_CODE (x) == REG
10713 && REGNO_REG_CLASS (REGNO (x)) == CIRRUS_REGS);
10715 fprintf (stream, "mv%s%s",
10716 code == 'W' ? "f"
10717 : code == 'X' ? "d"
10718 : code == 'Y' ? "fx" : "dx", reg_names[REGNO (x)] + 2);
10720 return;
10722 /* Print cirrus register in the mode specified by the register's mode. */
10723 case 'V':
10725 int mode = GET_MODE (x);
10727 if (GET_CODE (x) != REG || REGNO_REG_CLASS (REGNO (x)) != CIRRUS_REGS)
10729 output_operand_lossage ("invalid operand for code '%c'", code);
10730 return;
10733 fprintf (stream, "mv%s%s",
10734 mode == DFmode ? "d"
10735 : mode == SImode ? "fx"
10736 : mode == DImode ? "dx"
10737 : "f", reg_names[REGNO (x)] + 2);
10739 return;
10742 case 'U':
10743 if (GET_CODE (x) != REG
10744 || REGNO (x) < FIRST_IWMMXT_GR_REGNUM
10745 || REGNO (x) > LAST_IWMMXT_GR_REGNUM)
10746 /* Bad value for wCG register number. */
10748 output_operand_lossage ("invalid operand for code '%c'", code);
10749 return;
10752 else
10753 fprintf (stream, "%d", REGNO (x) - FIRST_IWMMXT_GR_REGNUM);
10754 return;
10756 /* Print an iWMMXt control register name. */
10757 case 'w':
10758 if (GET_CODE (x) != CONST_INT
10759 || INTVAL (x) < 0
10760 || INTVAL (x) >= 16)
10761 /* Bad value for wC register number. */
10763 output_operand_lossage ("invalid operand for code '%c'", code);
10764 return;
10767 else
10769 static const char * wc_reg_names [16] =
10771 "wCID", "wCon", "wCSSF", "wCASF",
10772 "wC4", "wC5", "wC6", "wC7",
10773 "wCGR0", "wCGR1", "wCGR2", "wCGR3",
10774 "wC12", "wC13", "wC14", "wC15"
10777 fprintf (stream, wc_reg_names [INTVAL (x)]);
10779 return;
10781 /* Print a VFP double precision register name. */
10782 case 'P':
10784 int mode = GET_MODE (x);
10785 int num;
10787 if (mode != DImode && mode != DFmode)
10789 output_operand_lossage ("invalid operand for code '%c'", code);
10790 return;
10793 if (GET_CODE (x) != REG
10794 || !IS_VFP_REGNUM (REGNO (x)))
10796 output_operand_lossage ("invalid operand for code '%c'", code);
10797 return;
10800 num = REGNO(x) - FIRST_VFP_REGNUM;
10801 if (num & 1)
10803 output_operand_lossage ("invalid operand for code '%c'", code);
10804 return;
10807 fprintf (stream, "d%d", num >> 1);
10809 return;
10811 default:
10812 if (x == 0)
10814 output_operand_lossage ("missing operand");
10815 return;
10818 switch (GET_CODE (x))
10820 case REG:
10821 asm_fprintf (stream, "%r", REGNO (x));
10822 break;
10824 case MEM:
10825 output_memory_reference_mode = GET_MODE (x);
10826 output_address (XEXP (x, 0));
10827 break;
10829 case CONST_DOUBLE:
10830 fprintf (stream, "#%s", fp_immediate_constant (x));
10831 break;
10833 default:
10834 gcc_assert (GET_CODE (x) != NEG);
10835 fputc ('#', stream);
10836 output_addr_const (stream, x);
10837 break;
10842 #ifndef AOF_ASSEMBLER
10843 /* Target hook for assembling integer objects. The ARM version needs to
10844 handle word-sized values specially. */
10845 static bool
10846 arm_assemble_integer (rtx x, unsigned int size, int aligned_p)
10848 if (size == UNITS_PER_WORD && aligned_p)
10850 fputs ("\t.word\t", asm_out_file);
10851 output_addr_const (asm_out_file, x);
10853 /* Mark symbols as position independent. We only do this in the
10854 .text segment, not in the .data segment. */
10855 if (NEED_GOT_RELOC && flag_pic && making_const_table &&
10856 (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF))
10858 if (GET_CODE (x) == SYMBOL_REF
10859 && (CONSTANT_POOL_ADDRESS_P (x)
10860 || SYMBOL_REF_LOCAL_P (x)))
10861 fputs ("(GOTOFF)", asm_out_file);
10862 else if (GET_CODE (x) == LABEL_REF)
10863 fputs ("(GOTOFF)", asm_out_file);
10864 else
10865 fputs ("(GOT)", asm_out_file);
10867 fputc ('\n', asm_out_file);
10868 return true;
10871 if (arm_vector_mode_supported_p (GET_MODE (x)))
10873 int i, units;
10875 gcc_assert (GET_CODE (x) == CONST_VECTOR);
10877 units = CONST_VECTOR_NUNITS (x);
10879 switch (GET_MODE (x))
10881 case V2SImode: size = 4; break;
10882 case V4HImode: size = 2; break;
10883 case V8QImode: size = 1; break;
10884 default:
10885 gcc_unreachable ();
10888 for (i = 0; i < units; i++)
10890 rtx elt;
10892 elt = CONST_VECTOR_ELT (x, i);
10893 assemble_integer
10894 (elt, size, i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT, 1);
10897 return true;
10900 return default_assemble_integer (x, size, aligned_p);
10904 /* Add a function to the list of static constructors. */
10906 static void
10907 arm_elf_asm_constructor (rtx symbol, int priority ATTRIBUTE_UNUSED)
10909 if (!TARGET_AAPCS_BASED)
10911 default_named_section_asm_out_constructor (symbol, priority);
10912 return;
10915 /* Put these in the .init_array section, using a special relocation. */
10916 ctors_section ();
10917 assemble_align (POINTER_SIZE);
10918 fputs ("\t.word\t", asm_out_file);
10919 output_addr_const (asm_out_file, symbol);
10920 fputs ("(target1)\n", asm_out_file);
10922 #endif
10924 /* A finite state machine takes care of noticing whether or not instructions
10925 can be conditionally executed, and thus decrease execution time and code
10926 size by deleting branch instructions. The fsm is controlled by
10927 final_prescan_insn, and controls the actions of ASM_OUTPUT_OPCODE. */
10929 /* The state of the fsm controlling condition codes are:
10930 0: normal, do nothing special
10931 1: make ASM_OUTPUT_OPCODE not output this instruction
10932 2: make ASM_OUTPUT_OPCODE not output this instruction
10933 3: make instructions conditional
10934 4: make instructions conditional
10936 State transitions (state->state by whom under condition):
10937 0 -> 1 final_prescan_insn if the `target' is a label
10938 0 -> 2 final_prescan_insn if the `target' is an unconditional branch
10939 1 -> 3 ASM_OUTPUT_OPCODE after not having output the conditional branch
10940 2 -> 4 ASM_OUTPUT_OPCODE after not having output the conditional branch
10941 3 -> 0 (*targetm.asm_out.internal_label) if the `target' label is reached
10942 (the target label has CODE_LABEL_NUMBER equal to arm_target_label).
10943 4 -> 0 final_prescan_insn if the `target' unconditional branch is reached
10944 (the target insn is arm_target_insn).
10946 If the jump clobbers the conditions then we use states 2 and 4.
10948 A similar thing can be done with conditional return insns.
10950 XXX In case the `target' is an unconditional branch, this conditionalising
10951 of the instructions always reduces code size, but not always execution
10952 time. But then, I want to reduce the code size to somewhere near what
10953 /bin/cc produces. */
10955 /* Returns the index of the ARM condition code string in
10956 `arm_condition_codes'. COMPARISON should be an rtx like
10957 `(eq (...) (...))'. */
10958 static enum arm_cond_code
10959 get_arm_condition_code (rtx comparison)
10961 enum machine_mode mode = GET_MODE (XEXP (comparison, 0));
10962 int code;
10963 enum rtx_code comp_code = GET_CODE (comparison);
10965 if (GET_MODE_CLASS (mode) != MODE_CC)
10966 mode = SELECT_CC_MODE (comp_code, XEXP (comparison, 0),
10967 XEXP (comparison, 1));
10969 switch (mode)
10971 case CC_DNEmode: code = ARM_NE; goto dominance;
10972 case CC_DEQmode: code = ARM_EQ; goto dominance;
10973 case CC_DGEmode: code = ARM_GE; goto dominance;
10974 case CC_DGTmode: code = ARM_GT; goto dominance;
10975 case CC_DLEmode: code = ARM_LE; goto dominance;
10976 case CC_DLTmode: code = ARM_LT; goto dominance;
10977 case CC_DGEUmode: code = ARM_CS; goto dominance;
10978 case CC_DGTUmode: code = ARM_HI; goto dominance;
10979 case CC_DLEUmode: code = ARM_LS; goto dominance;
10980 case CC_DLTUmode: code = ARM_CC;
10982 dominance:
10983 gcc_assert (comp_code == EQ || comp_code == NE);
10985 if (comp_code == EQ)
10986 return ARM_INVERSE_CONDITION_CODE (code);
10987 return code;
10989 case CC_NOOVmode:
10990 switch (comp_code)
10992 case NE: return ARM_NE;
10993 case EQ: return ARM_EQ;
10994 case GE: return ARM_PL;
10995 case LT: return ARM_MI;
10996 default: gcc_unreachable ();
10999 case CC_Zmode:
11000 switch (comp_code)
11002 case NE: return ARM_NE;
11003 case EQ: return ARM_EQ;
11004 default: gcc_unreachable ();
11007 case CC_Nmode:
11008 switch (comp_code)
11010 case NE: return ARM_MI;
11011 case EQ: return ARM_PL;
11012 default: gcc_unreachable ();
11015 case CCFPEmode:
11016 case CCFPmode:
11017 /* These encodings assume that AC=1 in the FPA system control
11018 byte. This allows us to handle all cases except UNEQ and
11019 LTGT. */
11020 switch (comp_code)
11022 case GE: return ARM_GE;
11023 case GT: return ARM_GT;
11024 case LE: return ARM_LS;
11025 case LT: return ARM_MI;
11026 case NE: return ARM_NE;
11027 case EQ: return ARM_EQ;
11028 case ORDERED: return ARM_VC;
11029 case UNORDERED: return ARM_VS;
11030 case UNLT: return ARM_LT;
11031 case UNLE: return ARM_LE;
11032 case UNGT: return ARM_HI;
11033 case UNGE: return ARM_PL;
11034 /* UNEQ and LTGT do not have a representation. */
11035 case UNEQ: /* Fall through. */
11036 case LTGT: /* Fall through. */
11037 default: gcc_unreachable ();
11040 case CC_SWPmode:
11041 switch (comp_code)
11043 case NE: return ARM_NE;
11044 case EQ: return ARM_EQ;
11045 case GE: return ARM_LE;
11046 case GT: return ARM_LT;
11047 case LE: return ARM_GE;
11048 case LT: return ARM_GT;
11049 case GEU: return ARM_LS;
11050 case GTU: return ARM_CC;
11051 case LEU: return ARM_CS;
11052 case LTU: return ARM_HI;
11053 default: gcc_unreachable ();
11056 case CC_Cmode:
11057 switch (comp_code)
11059 case LTU: return ARM_CS;
11060 case GEU: return ARM_CC;
11061 default: gcc_unreachable ();
11064 case CCmode:
11065 switch (comp_code)
11067 case NE: return ARM_NE;
11068 case EQ: return ARM_EQ;
11069 case GE: return ARM_GE;
11070 case GT: return ARM_GT;
11071 case LE: return ARM_LE;
11072 case LT: return ARM_LT;
11073 case GEU: return ARM_CS;
11074 case GTU: return ARM_HI;
11075 case LEU: return ARM_LS;
11076 case LTU: return ARM_CC;
11077 default: gcc_unreachable ();
11080 default: gcc_unreachable ();
11084 void
11085 arm_final_prescan_insn (rtx insn)
11087 /* BODY will hold the body of INSN. */
11088 rtx body = PATTERN (insn);
11090 /* This will be 1 if trying to repeat the trick, and things need to be
11091 reversed if it appears to fail. */
11092 int reverse = 0;
11094 /* JUMP_CLOBBERS will be one implies that the conditions if a branch is
11095 taken are clobbered, even if the rtl suggests otherwise. It also
11096 means that we have to grub around within the jump expression to find
11097 out what the conditions are when the jump isn't taken. */
11098 int jump_clobbers = 0;
11100 /* If we start with a return insn, we only succeed if we find another one. */
11101 int seeking_return = 0;
11103 /* START_INSN will hold the insn from where we start looking. This is the
11104 first insn after the following code_label if REVERSE is true. */
11105 rtx start_insn = insn;
11107 /* If in state 4, check if the target branch is reached, in order to
11108 change back to state 0. */
11109 if (arm_ccfsm_state == 4)
11111 if (insn == arm_target_insn)
11113 arm_target_insn = NULL;
11114 arm_ccfsm_state = 0;
11116 return;
11119 /* If in state 3, it is possible to repeat the trick, if this insn is an
11120 unconditional branch to a label, and immediately following this branch
11121 is the previous target label which is only used once, and the label this
11122 branch jumps to is not too far off. */
11123 if (arm_ccfsm_state == 3)
11125 if (simplejump_p (insn))
11127 start_insn = next_nonnote_insn (start_insn);
11128 if (GET_CODE (start_insn) == BARRIER)
11130 /* XXX Isn't this always a barrier? */
11131 start_insn = next_nonnote_insn (start_insn);
11133 if (GET_CODE (start_insn) == CODE_LABEL
11134 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
11135 && LABEL_NUSES (start_insn) == 1)
11136 reverse = TRUE;
11137 else
11138 return;
11140 else if (GET_CODE (body) == RETURN)
11142 start_insn = next_nonnote_insn (start_insn);
11143 if (GET_CODE (start_insn) == BARRIER)
11144 start_insn = next_nonnote_insn (start_insn);
11145 if (GET_CODE (start_insn) == CODE_LABEL
11146 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
11147 && LABEL_NUSES (start_insn) == 1)
11149 reverse = TRUE;
11150 seeking_return = 1;
11152 else
11153 return;
11155 else
11156 return;
11159 gcc_assert (!arm_ccfsm_state || reverse);
11160 if (GET_CODE (insn) != JUMP_INSN)
11161 return;
11163 /* This jump might be paralleled with a clobber of the condition codes
11164 the jump should always come first */
11165 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
11166 body = XVECEXP (body, 0, 0);
11168 if (reverse
11169 || (GET_CODE (body) == SET && GET_CODE (SET_DEST (body)) == PC
11170 && GET_CODE (SET_SRC (body)) == IF_THEN_ELSE))
11172 int insns_skipped;
11173 int fail = FALSE, succeed = FALSE;
11174 /* Flag which part of the IF_THEN_ELSE is the LABEL_REF. */
11175 int then_not_else = TRUE;
11176 rtx this_insn = start_insn, label = 0;
11178 /* If the jump cannot be done with one instruction, we cannot
11179 conditionally execute the instruction in the inverse case. */
11180 if (get_attr_conds (insn) == CONDS_JUMP_CLOB)
11182 jump_clobbers = 1;
11183 return;
11186 /* Register the insn jumped to. */
11187 if (reverse)
11189 if (!seeking_return)
11190 label = XEXP (SET_SRC (body), 0);
11192 else if (GET_CODE (XEXP (SET_SRC (body), 1)) == LABEL_REF)
11193 label = XEXP (XEXP (SET_SRC (body), 1), 0);
11194 else if (GET_CODE (XEXP (SET_SRC (body), 2)) == LABEL_REF)
11196 label = XEXP (XEXP (SET_SRC (body), 2), 0);
11197 then_not_else = FALSE;
11199 else if (GET_CODE (XEXP (SET_SRC (body), 1)) == RETURN)
11200 seeking_return = 1;
11201 else if (GET_CODE (XEXP (SET_SRC (body), 2)) == RETURN)
11203 seeking_return = 1;
11204 then_not_else = FALSE;
11206 else
11207 gcc_unreachable ();
11209 /* See how many insns this branch skips, and what kind of insns. If all
11210 insns are okay, and the label or unconditional branch to the same
11211 label is not too far away, succeed. */
11212 for (insns_skipped = 0;
11213 !fail && !succeed && insns_skipped++ < max_insns_skipped;)
11215 rtx scanbody;
11217 this_insn = next_nonnote_insn (this_insn);
11218 if (!this_insn)
11219 break;
11221 switch (GET_CODE (this_insn))
11223 case CODE_LABEL:
11224 /* Succeed if it is the target label, otherwise fail since
11225 control falls in from somewhere else. */
11226 if (this_insn == label)
11228 if (jump_clobbers)
11230 arm_ccfsm_state = 2;
11231 this_insn = next_nonnote_insn (this_insn);
11233 else
11234 arm_ccfsm_state = 1;
11235 succeed = TRUE;
11237 else
11238 fail = TRUE;
11239 break;
11241 case BARRIER:
11242 /* Succeed if the following insn is the target label.
11243 Otherwise fail.
11244 If return insns are used then the last insn in a function
11245 will be a barrier. */
11246 this_insn = next_nonnote_insn (this_insn);
11247 if (this_insn && this_insn == label)
11249 if (jump_clobbers)
11251 arm_ccfsm_state = 2;
11252 this_insn = next_nonnote_insn (this_insn);
11254 else
11255 arm_ccfsm_state = 1;
11256 succeed = TRUE;
11258 else
11259 fail = TRUE;
11260 break;
11262 case CALL_INSN:
11263 /* The AAPCS says that conditional calls should not be
11264 used since they make interworking inefficient (the
11265 linker can't transform BL<cond> into BLX). That's
11266 only a problem if the machine has BLX. */
11267 if (arm_arch5)
11269 fail = TRUE;
11270 break;
11273 /* Succeed if the following insn is the target label, or
11274 if the following two insns are a barrier and the
11275 target label. */
11276 this_insn = next_nonnote_insn (this_insn);
11277 if (this_insn && GET_CODE (this_insn) == BARRIER)
11278 this_insn = next_nonnote_insn (this_insn);
11280 if (this_insn && this_insn == label
11281 && insns_skipped < max_insns_skipped)
11283 if (jump_clobbers)
11285 arm_ccfsm_state = 2;
11286 this_insn = next_nonnote_insn (this_insn);
11288 else
11289 arm_ccfsm_state = 1;
11290 succeed = TRUE;
11292 else
11293 fail = TRUE;
11294 break;
11296 case JUMP_INSN:
11297 /* If this is an unconditional branch to the same label, succeed.
11298 If it is to another label, do nothing. If it is conditional,
11299 fail. */
11300 /* XXX Probably, the tests for SET and the PC are
11301 unnecessary. */
11303 scanbody = PATTERN (this_insn);
11304 if (GET_CODE (scanbody) == SET
11305 && GET_CODE (SET_DEST (scanbody)) == PC)
11307 if (GET_CODE (SET_SRC (scanbody)) == LABEL_REF
11308 && XEXP (SET_SRC (scanbody), 0) == label && !reverse)
11310 arm_ccfsm_state = 2;
11311 succeed = TRUE;
11313 else if (GET_CODE (SET_SRC (scanbody)) == IF_THEN_ELSE)
11314 fail = TRUE;
11316 /* Fail if a conditional return is undesirable (e.g. on a
11317 StrongARM), but still allow this if optimizing for size. */
11318 else if (GET_CODE (scanbody) == RETURN
11319 && !use_return_insn (TRUE, NULL)
11320 && !optimize_size)
11321 fail = TRUE;
11322 else if (GET_CODE (scanbody) == RETURN
11323 && seeking_return)
11325 arm_ccfsm_state = 2;
11326 succeed = TRUE;
11328 else if (GET_CODE (scanbody) == PARALLEL)
11330 switch (get_attr_conds (this_insn))
11332 case CONDS_NOCOND:
11333 break;
11334 default:
11335 fail = TRUE;
11336 break;
11339 else
11340 fail = TRUE; /* Unrecognized jump (e.g. epilogue). */
11342 break;
11344 case INSN:
11345 /* Instructions using or affecting the condition codes make it
11346 fail. */
11347 scanbody = PATTERN (this_insn);
11348 if (!(GET_CODE (scanbody) == SET
11349 || GET_CODE (scanbody) == PARALLEL)
11350 || get_attr_conds (this_insn) != CONDS_NOCOND)
11351 fail = TRUE;
11353 /* A conditional cirrus instruction must be followed by
11354 a non Cirrus instruction. However, since we
11355 conditionalize instructions in this function and by
11356 the time we get here we can't add instructions
11357 (nops), because shorten_branches() has already been
11358 called, we will disable conditionalizing Cirrus
11359 instructions to be safe. */
11360 if (GET_CODE (scanbody) != USE
11361 && GET_CODE (scanbody) != CLOBBER
11362 && get_attr_cirrus (this_insn) != CIRRUS_NOT)
11363 fail = TRUE;
11364 break;
11366 default:
11367 break;
11370 if (succeed)
11372 if ((!seeking_return) && (arm_ccfsm_state == 1 || reverse))
11373 arm_target_label = CODE_LABEL_NUMBER (label);
11374 else
11376 gcc_assert (seeking_return || arm_ccfsm_state == 2);
11378 while (this_insn && GET_CODE (PATTERN (this_insn)) == USE)
11380 this_insn = next_nonnote_insn (this_insn);
11381 gcc_assert (!this_insn
11382 || (GET_CODE (this_insn) != BARRIER
11383 && GET_CODE (this_insn) != CODE_LABEL));
11385 if (!this_insn)
11387 /* Oh, dear! we ran off the end.. give up. */
11388 recog (PATTERN (insn), insn, NULL);
11389 arm_ccfsm_state = 0;
11390 arm_target_insn = NULL;
11391 return;
11393 arm_target_insn = this_insn;
11395 if (jump_clobbers)
11397 gcc_assert (!reverse);
11398 arm_current_cc =
11399 get_arm_condition_code (XEXP (XEXP (XEXP (SET_SRC (body),
11400 0), 0), 1));
11401 if (GET_CODE (XEXP (XEXP (SET_SRC (body), 0), 0)) == AND)
11402 arm_current_cc = ARM_INVERSE_CONDITION_CODE (arm_current_cc);
11403 if (GET_CODE (XEXP (SET_SRC (body), 0)) == NE)
11404 arm_current_cc = ARM_INVERSE_CONDITION_CODE (arm_current_cc);
11406 else
11408 /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from
11409 what it was. */
11410 if (!reverse)
11411 arm_current_cc = get_arm_condition_code (XEXP (SET_SRC (body),
11412 0));
11415 if (reverse || then_not_else)
11416 arm_current_cc = ARM_INVERSE_CONDITION_CODE (arm_current_cc);
11419 /* Restore recog_data (getting the attributes of other insns can
11420 destroy this array, but final.c assumes that it remains intact
11421 across this call; since the insn has been recognized already we
11422 call recog direct). */
11423 recog (PATTERN (insn), insn, NULL);
11427 /* Returns true if REGNO is a valid register
11428 for holding a quantity of type MODE. */
11430 arm_hard_regno_mode_ok (unsigned int regno, enum machine_mode mode)
11432 if (GET_MODE_CLASS (mode) == MODE_CC)
11433 return regno == CC_REGNUM || regno == VFPCC_REGNUM;
11435 if (TARGET_THUMB)
11436 /* For the Thumb we only allow values bigger than SImode in
11437 registers 0 - 6, so that there is always a second low
11438 register available to hold the upper part of the value.
11439 We probably we ought to ensure that the register is the
11440 start of an even numbered register pair. */
11441 return (ARM_NUM_REGS (mode) < 2) || (regno < LAST_LO_REGNUM);
11443 if (IS_CIRRUS_REGNUM (regno))
11444 /* We have outlawed SI values in Cirrus registers because they
11445 reside in the lower 32 bits, but SF values reside in the
11446 upper 32 bits. This causes gcc all sorts of grief. We can't
11447 even split the registers into pairs because Cirrus SI values
11448 get sign extended to 64bits-- aldyh. */
11449 return (GET_MODE_CLASS (mode) == MODE_FLOAT) || (mode == DImode);
11451 if (IS_VFP_REGNUM (regno))
11453 if (mode == SFmode || mode == SImode)
11454 return TRUE;
11456 /* DFmode values are only valid in even register pairs. */
11457 if (mode == DFmode)
11458 return ((regno - FIRST_VFP_REGNUM) & 1) == 0;
11459 return FALSE;
11462 if (IS_IWMMXT_GR_REGNUM (regno))
11463 return mode == SImode;
11465 if (IS_IWMMXT_REGNUM (regno))
11466 return VALID_IWMMXT_REG_MODE (mode);
11468 /* We allow any value to be stored in the general registers.
11469 Restrict doubleword quantities to even register pairs so that we can
11470 use ldrd. */
11471 if (regno <= LAST_ARM_REGNUM)
11472 return !(TARGET_LDRD && GET_MODE_SIZE (mode) > 4 && (regno & 1) != 0);
11474 if ( regno == FRAME_POINTER_REGNUM
11475 || regno == ARG_POINTER_REGNUM)
11476 /* We only allow integers in the fake hard registers. */
11477 return GET_MODE_CLASS (mode) == MODE_INT;
11479 /* The only registers left are the FPA registers
11480 which we only allow to hold FP values. */
11481 return GET_MODE_CLASS (mode) == MODE_FLOAT
11482 && regno >= FIRST_FPA_REGNUM
11483 && regno <= LAST_FPA_REGNUM;
11487 arm_regno_class (int regno)
11489 if (TARGET_THUMB)
11491 if (regno == STACK_POINTER_REGNUM)
11492 return STACK_REG;
11493 if (regno == CC_REGNUM)
11494 return CC_REG;
11495 if (regno < 8)
11496 return LO_REGS;
11497 return HI_REGS;
11500 if ( regno <= LAST_ARM_REGNUM
11501 || regno == FRAME_POINTER_REGNUM
11502 || regno == ARG_POINTER_REGNUM)
11503 return GENERAL_REGS;
11505 if (regno == CC_REGNUM || regno == VFPCC_REGNUM)
11506 return NO_REGS;
11508 if (IS_CIRRUS_REGNUM (regno))
11509 return CIRRUS_REGS;
11511 if (IS_VFP_REGNUM (regno))
11512 return VFP_REGS;
11514 if (IS_IWMMXT_REGNUM (regno))
11515 return IWMMXT_REGS;
11517 if (IS_IWMMXT_GR_REGNUM (regno))
11518 return IWMMXT_GR_REGS;
11520 return FPA_REGS;
11523 /* Handle a special case when computing the offset
11524 of an argument from the frame pointer. */
11526 arm_debugger_arg_offset (int value, rtx addr)
11528 rtx insn;
11530 /* We are only interested if dbxout_parms() failed to compute the offset. */
11531 if (value != 0)
11532 return 0;
11534 /* We can only cope with the case where the address is held in a register. */
11535 if (GET_CODE (addr) != REG)
11536 return 0;
11538 /* If we are using the frame pointer to point at the argument, then
11539 an offset of 0 is correct. */
11540 if (REGNO (addr) == (unsigned) HARD_FRAME_POINTER_REGNUM)
11541 return 0;
11543 /* If we are using the stack pointer to point at the
11544 argument, then an offset of 0 is correct. */
11545 if ((TARGET_THUMB || !frame_pointer_needed)
11546 && REGNO (addr) == SP_REGNUM)
11547 return 0;
11549 /* Oh dear. The argument is pointed to by a register rather
11550 than being held in a register, or being stored at a known
11551 offset from the frame pointer. Since GDB only understands
11552 those two kinds of argument we must translate the address
11553 held in the register into an offset from the frame pointer.
11554 We do this by searching through the insns for the function
11555 looking to see where this register gets its value. If the
11556 register is initialized from the frame pointer plus an offset
11557 then we are in luck and we can continue, otherwise we give up.
11559 This code is exercised by producing debugging information
11560 for a function with arguments like this:
11562 double func (double a, double b, int c, double d) {return d;}
11564 Without this code the stab for parameter 'd' will be set to
11565 an offset of 0 from the frame pointer, rather than 8. */
11567 /* The if() statement says:
11569 If the insn is a normal instruction
11570 and if the insn is setting the value in a register
11571 and if the register being set is the register holding the address of the argument
11572 and if the address is computing by an addition
11573 that involves adding to a register
11574 which is the frame pointer
11575 a constant integer
11577 then... */
11579 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
11581 if ( GET_CODE (insn) == INSN
11582 && GET_CODE (PATTERN (insn)) == SET
11583 && REGNO (XEXP (PATTERN (insn), 0)) == REGNO (addr)
11584 && GET_CODE (XEXP (PATTERN (insn), 1)) == PLUS
11585 && GET_CODE (XEXP (XEXP (PATTERN (insn), 1), 0)) == REG
11586 && REGNO (XEXP (XEXP (PATTERN (insn), 1), 0)) == (unsigned) HARD_FRAME_POINTER_REGNUM
11587 && GET_CODE (XEXP (XEXP (PATTERN (insn), 1), 1)) == CONST_INT
11590 value = INTVAL (XEXP (XEXP (PATTERN (insn), 1), 1));
11592 break;
11596 if (value == 0)
11598 debug_rtx (addr);
11599 warning (0, "unable to compute real location of stacked parameter");
11600 value = 8; /* XXX magic hack */
11603 return value;
11606 #define def_mbuiltin(MASK, NAME, TYPE, CODE) \
11607 do \
11609 if ((MASK) & insn_flags) \
11610 lang_hooks.builtin_function ((NAME), (TYPE), (CODE), \
11611 BUILT_IN_MD, NULL, NULL_TREE); \
11613 while (0)
11615 struct builtin_description
11617 const unsigned int mask;
11618 const enum insn_code icode;
11619 const char * const name;
11620 const enum arm_builtins code;
11621 const enum rtx_code comparison;
11622 const unsigned int flag;
11625 static const struct builtin_description bdesc_2arg[] =
11627 #define IWMMXT_BUILTIN(code, string, builtin) \
11628 { FL_IWMMXT, CODE_FOR_##code, "__builtin_arm_" string, \
11629 ARM_BUILTIN_##builtin, 0, 0 },
11631 IWMMXT_BUILTIN (addv8qi3, "waddb", WADDB)
11632 IWMMXT_BUILTIN (addv4hi3, "waddh", WADDH)
11633 IWMMXT_BUILTIN (addv2si3, "waddw", WADDW)
11634 IWMMXT_BUILTIN (subv8qi3, "wsubb", WSUBB)
11635 IWMMXT_BUILTIN (subv4hi3, "wsubh", WSUBH)
11636 IWMMXT_BUILTIN (subv2si3, "wsubw", WSUBW)
11637 IWMMXT_BUILTIN (ssaddv8qi3, "waddbss", WADDSSB)
11638 IWMMXT_BUILTIN (ssaddv4hi3, "waddhss", WADDSSH)
11639 IWMMXT_BUILTIN (ssaddv2si3, "waddwss", WADDSSW)
11640 IWMMXT_BUILTIN (sssubv8qi3, "wsubbss", WSUBSSB)
11641 IWMMXT_BUILTIN (sssubv4hi3, "wsubhss", WSUBSSH)
11642 IWMMXT_BUILTIN (sssubv2si3, "wsubwss", WSUBSSW)
11643 IWMMXT_BUILTIN (usaddv8qi3, "waddbus", WADDUSB)
11644 IWMMXT_BUILTIN (usaddv4hi3, "waddhus", WADDUSH)
11645 IWMMXT_BUILTIN (usaddv2si3, "waddwus", WADDUSW)
11646 IWMMXT_BUILTIN (ussubv8qi3, "wsubbus", WSUBUSB)
11647 IWMMXT_BUILTIN (ussubv4hi3, "wsubhus", WSUBUSH)
11648 IWMMXT_BUILTIN (ussubv2si3, "wsubwus", WSUBUSW)
11649 IWMMXT_BUILTIN (mulv4hi3, "wmulul", WMULUL)
11650 IWMMXT_BUILTIN (smulv4hi3_highpart, "wmulsm", WMULSM)
11651 IWMMXT_BUILTIN (umulv4hi3_highpart, "wmulum", WMULUM)
11652 IWMMXT_BUILTIN (eqv8qi3, "wcmpeqb", WCMPEQB)
11653 IWMMXT_BUILTIN (eqv4hi3, "wcmpeqh", WCMPEQH)
11654 IWMMXT_BUILTIN (eqv2si3, "wcmpeqw", WCMPEQW)
11655 IWMMXT_BUILTIN (gtuv8qi3, "wcmpgtub", WCMPGTUB)
11656 IWMMXT_BUILTIN (gtuv4hi3, "wcmpgtuh", WCMPGTUH)
11657 IWMMXT_BUILTIN (gtuv2si3, "wcmpgtuw", WCMPGTUW)
11658 IWMMXT_BUILTIN (gtv8qi3, "wcmpgtsb", WCMPGTSB)
11659 IWMMXT_BUILTIN (gtv4hi3, "wcmpgtsh", WCMPGTSH)
11660 IWMMXT_BUILTIN (gtv2si3, "wcmpgtsw", WCMPGTSW)
11661 IWMMXT_BUILTIN (umaxv8qi3, "wmaxub", WMAXUB)
11662 IWMMXT_BUILTIN (smaxv8qi3, "wmaxsb", WMAXSB)
11663 IWMMXT_BUILTIN (umaxv4hi3, "wmaxuh", WMAXUH)
11664 IWMMXT_BUILTIN (smaxv4hi3, "wmaxsh", WMAXSH)
11665 IWMMXT_BUILTIN (umaxv2si3, "wmaxuw", WMAXUW)
11666 IWMMXT_BUILTIN (smaxv2si3, "wmaxsw", WMAXSW)
11667 IWMMXT_BUILTIN (uminv8qi3, "wminub", WMINUB)
11668 IWMMXT_BUILTIN (sminv8qi3, "wminsb", WMINSB)
11669 IWMMXT_BUILTIN (uminv4hi3, "wminuh", WMINUH)
11670 IWMMXT_BUILTIN (sminv4hi3, "wminsh", WMINSH)
11671 IWMMXT_BUILTIN (uminv2si3, "wminuw", WMINUW)
11672 IWMMXT_BUILTIN (sminv2si3, "wminsw", WMINSW)
11673 IWMMXT_BUILTIN (iwmmxt_anddi3, "wand", WAND)
11674 IWMMXT_BUILTIN (iwmmxt_nanddi3, "wandn", WANDN)
11675 IWMMXT_BUILTIN (iwmmxt_iordi3, "wor", WOR)
11676 IWMMXT_BUILTIN (iwmmxt_xordi3, "wxor", WXOR)
11677 IWMMXT_BUILTIN (iwmmxt_uavgv8qi3, "wavg2b", WAVG2B)
11678 IWMMXT_BUILTIN (iwmmxt_uavgv4hi3, "wavg2h", WAVG2H)
11679 IWMMXT_BUILTIN (iwmmxt_uavgrndv8qi3, "wavg2br", WAVG2BR)
11680 IWMMXT_BUILTIN (iwmmxt_uavgrndv4hi3, "wavg2hr", WAVG2HR)
11681 IWMMXT_BUILTIN (iwmmxt_wunpckilb, "wunpckilb", WUNPCKILB)
11682 IWMMXT_BUILTIN (iwmmxt_wunpckilh, "wunpckilh", WUNPCKILH)
11683 IWMMXT_BUILTIN (iwmmxt_wunpckilw, "wunpckilw", WUNPCKILW)
11684 IWMMXT_BUILTIN (iwmmxt_wunpckihb, "wunpckihb", WUNPCKIHB)
11685 IWMMXT_BUILTIN (iwmmxt_wunpckihh, "wunpckihh", WUNPCKIHH)
11686 IWMMXT_BUILTIN (iwmmxt_wunpckihw, "wunpckihw", WUNPCKIHW)
11687 IWMMXT_BUILTIN (iwmmxt_wmadds, "wmadds", WMADDS)
11688 IWMMXT_BUILTIN (iwmmxt_wmaddu, "wmaddu", WMADDU)
11690 #define IWMMXT_BUILTIN2(code, builtin) \
11691 { FL_IWMMXT, CODE_FOR_##code, NULL, ARM_BUILTIN_##builtin, 0, 0 },
11693 IWMMXT_BUILTIN2 (iwmmxt_wpackhss, WPACKHSS)
11694 IWMMXT_BUILTIN2 (iwmmxt_wpackwss, WPACKWSS)
11695 IWMMXT_BUILTIN2 (iwmmxt_wpackdss, WPACKDSS)
11696 IWMMXT_BUILTIN2 (iwmmxt_wpackhus, WPACKHUS)
11697 IWMMXT_BUILTIN2 (iwmmxt_wpackwus, WPACKWUS)
11698 IWMMXT_BUILTIN2 (iwmmxt_wpackdus, WPACKDUS)
11699 IWMMXT_BUILTIN2 (ashlv4hi3_di, WSLLH)
11700 IWMMXT_BUILTIN2 (ashlv4hi3, WSLLHI)
11701 IWMMXT_BUILTIN2 (ashlv2si3_di, WSLLW)
11702 IWMMXT_BUILTIN2 (ashlv2si3, WSLLWI)
11703 IWMMXT_BUILTIN2 (ashldi3_di, WSLLD)
11704 IWMMXT_BUILTIN2 (ashldi3_iwmmxt, WSLLDI)
11705 IWMMXT_BUILTIN2 (lshrv4hi3_di, WSRLH)
11706 IWMMXT_BUILTIN2 (lshrv4hi3, WSRLHI)
11707 IWMMXT_BUILTIN2 (lshrv2si3_di, WSRLW)
11708 IWMMXT_BUILTIN2 (lshrv2si3, WSRLWI)
11709 IWMMXT_BUILTIN2 (lshrdi3_di, WSRLD)
11710 IWMMXT_BUILTIN2 (lshrdi3_iwmmxt, WSRLDI)
11711 IWMMXT_BUILTIN2 (ashrv4hi3_di, WSRAH)
11712 IWMMXT_BUILTIN2 (ashrv4hi3, WSRAHI)
11713 IWMMXT_BUILTIN2 (ashrv2si3_di, WSRAW)
11714 IWMMXT_BUILTIN2 (ashrv2si3, WSRAWI)
11715 IWMMXT_BUILTIN2 (ashrdi3_di, WSRAD)
11716 IWMMXT_BUILTIN2 (ashrdi3_iwmmxt, WSRADI)
11717 IWMMXT_BUILTIN2 (rorv4hi3_di, WRORH)
11718 IWMMXT_BUILTIN2 (rorv4hi3, WRORHI)
11719 IWMMXT_BUILTIN2 (rorv2si3_di, WRORW)
11720 IWMMXT_BUILTIN2 (rorv2si3, WRORWI)
11721 IWMMXT_BUILTIN2 (rordi3_di, WRORD)
11722 IWMMXT_BUILTIN2 (rordi3, WRORDI)
11723 IWMMXT_BUILTIN2 (iwmmxt_wmacuz, WMACUZ)
11724 IWMMXT_BUILTIN2 (iwmmxt_wmacsz, WMACSZ)
11727 static const struct builtin_description bdesc_1arg[] =
11729 IWMMXT_BUILTIN (iwmmxt_tmovmskb, "tmovmskb", TMOVMSKB)
11730 IWMMXT_BUILTIN (iwmmxt_tmovmskh, "tmovmskh", TMOVMSKH)
11731 IWMMXT_BUILTIN (iwmmxt_tmovmskw, "tmovmskw", TMOVMSKW)
11732 IWMMXT_BUILTIN (iwmmxt_waccb, "waccb", WACCB)
11733 IWMMXT_BUILTIN (iwmmxt_wacch, "wacch", WACCH)
11734 IWMMXT_BUILTIN (iwmmxt_waccw, "waccw", WACCW)
11735 IWMMXT_BUILTIN (iwmmxt_wunpckehub, "wunpckehub", WUNPCKEHUB)
11736 IWMMXT_BUILTIN (iwmmxt_wunpckehuh, "wunpckehuh", WUNPCKEHUH)
11737 IWMMXT_BUILTIN (iwmmxt_wunpckehuw, "wunpckehuw", WUNPCKEHUW)
11738 IWMMXT_BUILTIN (iwmmxt_wunpckehsb, "wunpckehsb", WUNPCKEHSB)
11739 IWMMXT_BUILTIN (iwmmxt_wunpckehsh, "wunpckehsh", WUNPCKEHSH)
11740 IWMMXT_BUILTIN (iwmmxt_wunpckehsw, "wunpckehsw", WUNPCKEHSW)
11741 IWMMXT_BUILTIN (iwmmxt_wunpckelub, "wunpckelub", WUNPCKELUB)
11742 IWMMXT_BUILTIN (iwmmxt_wunpckeluh, "wunpckeluh", WUNPCKELUH)
11743 IWMMXT_BUILTIN (iwmmxt_wunpckeluw, "wunpckeluw", WUNPCKELUW)
11744 IWMMXT_BUILTIN (iwmmxt_wunpckelsb, "wunpckelsb", WUNPCKELSB)
11745 IWMMXT_BUILTIN (iwmmxt_wunpckelsh, "wunpckelsh", WUNPCKELSH)
11746 IWMMXT_BUILTIN (iwmmxt_wunpckelsw, "wunpckelsw", WUNPCKELSW)
11749 /* Set up all the iWMMXt builtins. This is
11750 not called if TARGET_IWMMXT is zero. */
11752 static void
11753 arm_init_iwmmxt_builtins (void)
11755 const struct builtin_description * d;
11756 size_t i;
11757 tree endlink = void_list_node;
11759 tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
11760 tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
11761 tree V8QI_type_node = build_vector_type_for_mode (intQI_type_node, V8QImode);
11763 tree int_ftype_int
11764 = build_function_type (integer_type_node,
11765 tree_cons (NULL_TREE, integer_type_node, endlink));
11766 tree v8qi_ftype_v8qi_v8qi_int
11767 = build_function_type (V8QI_type_node,
11768 tree_cons (NULL_TREE, V8QI_type_node,
11769 tree_cons (NULL_TREE, V8QI_type_node,
11770 tree_cons (NULL_TREE,
11771 integer_type_node,
11772 endlink))));
11773 tree v4hi_ftype_v4hi_int
11774 = build_function_type (V4HI_type_node,
11775 tree_cons (NULL_TREE, V4HI_type_node,
11776 tree_cons (NULL_TREE, integer_type_node,
11777 endlink)));
11778 tree v2si_ftype_v2si_int
11779 = build_function_type (V2SI_type_node,
11780 tree_cons (NULL_TREE, V2SI_type_node,
11781 tree_cons (NULL_TREE, integer_type_node,
11782 endlink)));
11783 tree v2si_ftype_di_di
11784 = build_function_type (V2SI_type_node,
11785 tree_cons (NULL_TREE, long_long_integer_type_node,
11786 tree_cons (NULL_TREE, long_long_integer_type_node,
11787 endlink)));
11788 tree di_ftype_di_int
11789 = build_function_type (long_long_integer_type_node,
11790 tree_cons (NULL_TREE, long_long_integer_type_node,
11791 tree_cons (NULL_TREE, integer_type_node,
11792 endlink)));
11793 tree di_ftype_di_int_int
11794 = build_function_type (long_long_integer_type_node,
11795 tree_cons (NULL_TREE, long_long_integer_type_node,
11796 tree_cons (NULL_TREE, integer_type_node,
11797 tree_cons (NULL_TREE,
11798 integer_type_node,
11799 endlink))));
11800 tree int_ftype_v8qi
11801 = build_function_type (integer_type_node,
11802 tree_cons (NULL_TREE, V8QI_type_node,
11803 endlink));
11804 tree int_ftype_v4hi
11805 = build_function_type (integer_type_node,
11806 tree_cons (NULL_TREE, V4HI_type_node,
11807 endlink));
11808 tree int_ftype_v2si
11809 = build_function_type (integer_type_node,
11810 tree_cons (NULL_TREE, V2SI_type_node,
11811 endlink));
11812 tree int_ftype_v8qi_int
11813 = build_function_type (integer_type_node,
11814 tree_cons (NULL_TREE, V8QI_type_node,
11815 tree_cons (NULL_TREE, integer_type_node,
11816 endlink)));
11817 tree int_ftype_v4hi_int
11818 = build_function_type (integer_type_node,
11819 tree_cons (NULL_TREE, V4HI_type_node,
11820 tree_cons (NULL_TREE, integer_type_node,
11821 endlink)));
11822 tree int_ftype_v2si_int
11823 = build_function_type (integer_type_node,
11824 tree_cons (NULL_TREE, V2SI_type_node,
11825 tree_cons (NULL_TREE, integer_type_node,
11826 endlink)));
11827 tree v8qi_ftype_v8qi_int_int
11828 = build_function_type (V8QI_type_node,
11829 tree_cons (NULL_TREE, V8QI_type_node,
11830 tree_cons (NULL_TREE, integer_type_node,
11831 tree_cons (NULL_TREE,
11832 integer_type_node,
11833 endlink))));
11834 tree v4hi_ftype_v4hi_int_int
11835 = build_function_type (V4HI_type_node,
11836 tree_cons (NULL_TREE, V4HI_type_node,
11837 tree_cons (NULL_TREE, integer_type_node,
11838 tree_cons (NULL_TREE,
11839 integer_type_node,
11840 endlink))));
11841 tree v2si_ftype_v2si_int_int
11842 = build_function_type (V2SI_type_node,
11843 tree_cons (NULL_TREE, V2SI_type_node,
11844 tree_cons (NULL_TREE, integer_type_node,
11845 tree_cons (NULL_TREE,
11846 integer_type_node,
11847 endlink))));
11848 /* Miscellaneous. */
11849 tree v8qi_ftype_v4hi_v4hi
11850 = build_function_type (V8QI_type_node,
11851 tree_cons (NULL_TREE, V4HI_type_node,
11852 tree_cons (NULL_TREE, V4HI_type_node,
11853 endlink)));
11854 tree v4hi_ftype_v2si_v2si
11855 = build_function_type (V4HI_type_node,
11856 tree_cons (NULL_TREE, V2SI_type_node,
11857 tree_cons (NULL_TREE, V2SI_type_node,
11858 endlink)));
11859 tree v2si_ftype_v4hi_v4hi
11860 = build_function_type (V2SI_type_node,
11861 tree_cons (NULL_TREE, V4HI_type_node,
11862 tree_cons (NULL_TREE, V4HI_type_node,
11863 endlink)));
11864 tree v2si_ftype_v8qi_v8qi
11865 = build_function_type (V2SI_type_node,
11866 tree_cons (NULL_TREE, V8QI_type_node,
11867 tree_cons (NULL_TREE, V8QI_type_node,
11868 endlink)));
11869 tree v4hi_ftype_v4hi_di
11870 = build_function_type (V4HI_type_node,
11871 tree_cons (NULL_TREE, V4HI_type_node,
11872 tree_cons (NULL_TREE,
11873 long_long_integer_type_node,
11874 endlink)));
11875 tree v2si_ftype_v2si_di
11876 = build_function_type (V2SI_type_node,
11877 tree_cons (NULL_TREE, V2SI_type_node,
11878 tree_cons (NULL_TREE,
11879 long_long_integer_type_node,
11880 endlink)));
11881 tree void_ftype_int_int
11882 = build_function_type (void_type_node,
11883 tree_cons (NULL_TREE, integer_type_node,
11884 tree_cons (NULL_TREE, integer_type_node,
11885 endlink)));
11886 tree di_ftype_void
11887 = build_function_type (long_long_unsigned_type_node, endlink);
11888 tree di_ftype_v8qi
11889 = build_function_type (long_long_integer_type_node,
11890 tree_cons (NULL_TREE, V8QI_type_node,
11891 endlink));
11892 tree di_ftype_v4hi
11893 = build_function_type (long_long_integer_type_node,
11894 tree_cons (NULL_TREE, V4HI_type_node,
11895 endlink));
11896 tree di_ftype_v2si
11897 = build_function_type (long_long_integer_type_node,
11898 tree_cons (NULL_TREE, V2SI_type_node,
11899 endlink));
11900 tree v2si_ftype_v4hi
11901 = build_function_type (V2SI_type_node,
11902 tree_cons (NULL_TREE, V4HI_type_node,
11903 endlink));
11904 tree v4hi_ftype_v8qi
11905 = build_function_type (V4HI_type_node,
11906 tree_cons (NULL_TREE, V8QI_type_node,
11907 endlink));
11909 tree di_ftype_di_v4hi_v4hi
11910 = build_function_type (long_long_unsigned_type_node,
11911 tree_cons (NULL_TREE,
11912 long_long_unsigned_type_node,
11913 tree_cons (NULL_TREE, V4HI_type_node,
11914 tree_cons (NULL_TREE,
11915 V4HI_type_node,
11916 endlink))));
11918 tree di_ftype_v4hi_v4hi
11919 = build_function_type (long_long_unsigned_type_node,
11920 tree_cons (NULL_TREE, V4HI_type_node,
11921 tree_cons (NULL_TREE, V4HI_type_node,
11922 endlink)));
11924 /* Normal vector binops. */
11925 tree v8qi_ftype_v8qi_v8qi
11926 = build_function_type (V8QI_type_node,
11927 tree_cons (NULL_TREE, V8QI_type_node,
11928 tree_cons (NULL_TREE, V8QI_type_node,
11929 endlink)));
11930 tree v4hi_ftype_v4hi_v4hi
11931 = build_function_type (V4HI_type_node,
11932 tree_cons (NULL_TREE, V4HI_type_node,
11933 tree_cons (NULL_TREE, V4HI_type_node,
11934 endlink)));
11935 tree v2si_ftype_v2si_v2si
11936 = build_function_type (V2SI_type_node,
11937 tree_cons (NULL_TREE, V2SI_type_node,
11938 tree_cons (NULL_TREE, V2SI_type_node,
11939 endlink)));
11940 tree di_ftype_di_di
11941 = build_function_type (long_long_unsigned_type_node,
11942 tree_cons (NULL_TREE, long_long_unsigned_type_node,
11943 tree_cons (NULL_TREE,
11944 long_long_unsigned_type_node,
11945 endlink)));
11947 /* Add all builtins that are more or less simple operations on two
11948 operands. */
11949 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
11951 /* Use one of the operands; the target can have a different mode for
11952 mask-generating compares. */
11953 enum machine_mode mode;
11954 tree type;
11956 if (d->name == 0)
11957 continue;
11959 mode = insn_data[d->icode].operand[1].mode;
11961 switch (mode)
11963 case V8QImode:
11964 type = v8qi_ftype_v8qi_v8qi;
11965 break;
11966 case V4HImode:
11967 type = v4hi_ftype_v4hi_v4hi;
11968 break;
11969 case V2SImode:
11970 type = v2si_ftype_v2si_v2si;
11971 break;
11972 case DImode:
11973 type = di_ftype_di_di;
11974 break;
11976 default:
11977 gcc_unreachable ();
11980 def_mbuiltin (d->mask, d->name, type, d->code);
11983 /* Add the remaining MMX insns with somewhat more complicated types. */
11984 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wzero", di_ftype_void, ARM_BUILTIN_WZERO);
11985 def_mbuiltin (FL_IWMMXT, "__builtin_arm_setwcx", void_ftype_int_int, ARM_BUILTIN_SETWCX);
11986 def_mbuiltin (FL_IWMMXT, "__builtin_arm_getwcx", int_ftype_int, ARM_BUILTIN_GETWCX);
11988 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsllh", v4hi_ftype_v4hi_di, ARM_BUILTIN_WSLLH);
11989 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsllw", v2si_ftype_v2si_di, ARM_BUILTIN_WSLLW);
11990 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wslld", di_ftype_di_di, ARM_BUILTIN_WSLLD);
11991 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsllhi", v4hi_ftype_v4hi_int, ARM_BUILTIN_WSLLHI);
11992 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsllwi", v2si_ftype_v2si_int, ARM_BUILTIN_WSLLWI);
11993 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wslldi", di_ftype_di_int, ARM_BUILTIN_WSLLDI);
11995 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrlh", v4hi_ftype_v4hi_di, ARM_BUILTIN_WSRLH);
11996 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrlw", v2si_ftype_v2si_di, ARM_BUILTIN_WSRLW);
11997 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrld", di_ftype_di_di, ARM_BUILTIN_WSRLD);
11998 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrlhi", v4hi_ftype_v4hi_int, ARM_BUILTIN_WSRLHI);
11999 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrlwi", v2si_ftype_v2si_int, ARM_BUILTIN_WSRLWI);
12000 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrldi", di_ftype_di_int, ARM_BUILTIN_WSRLDI);
12002 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrah", v4hi_ftype_v4hi_di, ARM_BUILTIN_WSRAH);
12003 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsraw", v2si_ftype_v2si_di, ARM_BUILTIN_WSRAW);
12004 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrad", di_ftype_di_di, ARM_BUILTIN_WSRAD);
12005 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrahi", v4hi_ftype_v4hi_int, ARM_BUILTIN_WSRAHI);
12006 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrawi", v2si_ftype_v2si_int, ARM_BUILTIN_WSRAWI);
12007 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsradi", di_ftype_di_int, ARM_BUILTIN_WSRADI);
12009 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrorh", v4hi_ftype_v4hi_di, ARM_BUILTIN_WRORH);
12010 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrorw", v2si_ftype_v2si_di, ARM_BUILTIN_WRORW);
12011 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrord", di_ftype_di_di, ARM_BUILTIN_WRORD);
12012 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrorhi", v4hi_ftype_v4hi_int, ARM_BUILTIN_WRORHI);
12013 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrorwi", v2si_ftype_v2si_int, ARM_BUILTIN_WRORWI);
12014 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrordi", di_ftype_di_int, ARM_BUILTIN_WRORDI);
12016 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wshufh", v4hi_ftype_v4hi_int, ARM_BUILTIN_WSHUFH);
12018 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsadb", v2si_ftype_v8qi_v8qi, ARM_BUILTIN_WSADB);
12019 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsadh", v2si_ftype_v4hi_v4hi, ARM_BUILTIN_WSADH);
12020 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsadbz", v2si_ftype_v8qi_v8qi, ARM_BUILTIN_WSADBZ);
12021 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsadhz", v2si_ftype_v4hi_v4hi, ARM_BUILTIN_WSADHZ);
12023 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmsb", int_ftype_v8qi_int, ARM_BUILTIN_TEXTRMSB);
12024 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmsh", int_ftype_v4hi_int, ARM_BUILTIN_TEXTRMSH);
12025 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmsw", int_ftype_v2si_int, ARM_BUILTIN_TEXTRMSW);
12026 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmub", int_ftype_v8qi_int, ARM_BUILTIN_TEXTRMUB);
12027 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmuh", int_ftype_v4hi_int, ARM_BUILTIN_TEXTRMUH);
12028 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmuw", int_ftype_v2si_int, ARM_BUILTIN_TEXTRMUW);
12029 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tinsrb", v8qi_ftype_v8qi_int_int, ARM_BUILTIN_TINSRB);
12030 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tinsrh", v4hi_ftype_v4hi_int_int, ARM_BUILTIN_TINSRH);
12031 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tinsrw", v2si_ftype_v2si_int_int, ARM_BUILTIN_TINSRW);
12033 def_mbuiltin (FL_IWMMXT, "__builtin_arm_waccb", di_ftype_v8qi, ARM_BUILTIN_WACCB);
12034 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wacch", di_ftype_v4hi, ARM_BUILTIN_WACCH);
12035 def_mbuiltin (FL_IWMMXT, "__builtin_arm_waccw", di_ftype_v2si, ARM_BUILTIN_WACCW);
12037 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmovmskb", int_ftype_v8qi, ARM_BUILTIN_TMOVMSKB);
12038 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmovmskh", int_ftype_v4hi, ARM_BUILTIN_TMOVMSKH);
12039 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmovmskw", int_ftype_v2si, ARM_BUILTIN_TMOVMSKW);
12041 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackhss", v8qi_ftype_v4hi_v4hi, ARM_BUILTIN_WPACKHSS);
12042 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackhus", v8qi_ftype_v4hi_v4hi, ARM_BUILTIN_WPACKHUS);
12043 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackwus", v4hi_ftype_v2si_v2si, ARM_BUILTIN_WPACKWUS);
12044 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackwss", v4hi_ftype_v2si_v2si, ARM_BUILTIN_WPACKWSS);
12045 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackdus", v2si_ftype_di_di, ARM_BUILTIN_WPACKDUS);
12046 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackdss", v2si_ftype_di_di, ARM_BUILTIN_WPACKDSS);
12048 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehub", v4hi_ftype_v8qi, ARM_BUILTIN_WUNPCKEHUB);
12049 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehuh", v2si_ftype_v4hi, ARM_BUILTIN_WUNPCKEHUH);
12050 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehuw", di_ftype_v2si, ARM_BUILTIN_WUNPCKEHUW);
12051 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehsb", v4hi_ftype_v8qi, ARM_BUILTIN_WUNPCKEHSB);
12052 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehsh", v2si_ftype_v4hi, ARM_BUILTIN_WUNPCKEHSH);
12053 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehsw", di_ftype_v2si, ARM_BUILTIN_WUNPCKEHSW);
12054 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckelub", v4hi_ftype_v8qi, ARM_BUILTIN_WUNPCKELUB);
12055 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckeluh", v2si_ftype_v4hi, ARM_BUILTIN_WUNPCKELUH);
12056 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckeluw", di_ftype_v2si, ARM_BUILTIN_WUNPCKELUW);
12057 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckelsb", v4hi_ftype_v8qi, ARM_BUILTIN_WUNPCKELSB);
12058 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckelsh", v2si_ftype_v4hi, ARM_BUILTIN_WUNPCKELSH);
12059 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckelsw", di_ftype_v2si, ARM_BUILTIN_WUNPCKELSW);
12061 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wmacs", di_ftype_di_v4hi_v4hi, ARM_BUILTIN_WMACS);
12062 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wmacsz", di_ftype_v4hi_v4hi, ARM_BUILTIN_WMACSZ);
12063 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wmacu", di_ftype_di_v4hi_v4hi, ARM_BUILTIN_WMACU);
12064 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wmacuz", di_ftype_v4hi_v4hi, ARM_BUILTIN_WMACUZ);
12066 def_mbuiltin (FL_IWMMXT, "__builtin_arm_walign", v8qi_ftype_v8qi_v8qi_int, ARM_BUILTIN_WALIGN);
12067 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmia", di_ftype_di_int_int, ARM_BUILTIN_TMIA);
12068 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiaph", di_ftype_di_int_int, ARM_BUILTIN_TMIAPH);
12069 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiabb", di_ftype_di_int_int, ARM_BUILTIN_TMIABB);
12070 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiabt", di_ftype_di_int_int, ARM_BUILTIN_TMIABT);
12071 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiatb", di_ftype_di_int_int, ARM_BUILTIN_TMIATB);
12072 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiatt", di_ftype_di_int_int, ARM_BUILTIN_TMIATT);
12075 static void
12076 arm_init_builtins (void)
12078 if (TARGET_REALLY_IWMMXT)
12079 arm_init_iwmmxt_builtins ();
12082 /* Errors in the source file can cause expand_expr to return const0_rtx
12083 where we expect a vector. To avoid crashing, use one of the vector
12084 clear instructions. */
12086 static rtx
12087 safe_vector_operand (rtx x, enum machine_mode mode)
12089 if (x != const0_rtx)
12090 return x;
12091 x = gen_reg_rtx (mode);
12093 emit_insn (gen_iwmmxt_clrdi (mode == DImode ? x
12094 : gen_rtx_SUBREG (DImode, x, 0)));
12095 return x;
12098 /* Subroutine of arm_expand_builtin to take care of binop insns. */
12100 static rtx
12101 arm_expand_binop_builtin (enum insn_code icode,
12102 tree arglist, rtx target)
12104 rtx pat;
12105 tree arg0 = TREE_VALUE (arglist);
12106 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12107 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12108 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12109 enum machine_mode tmode = insn_data[icode].operand[0].mode;
12110 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
12111 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
12113 if (VECTOR_MODE_P (mode0))
12114 op0 = safe_vector_operand (op0, mode0);
12115 if (VECTOR_MODE_P (mode1))
12116 op1 = safe_vector_operand (op1, mode1);
12118 if (! target
12119 || GET_MODE (target) != tmode
12120 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12121 target = gen_reg_rtx (tmode);
12123 gcc_assert (GET_MODE (op0) == mode0 && GET_MODE (op1) == mode1);
12125 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12126 op0 = copy_to_mode_reg (mode0, op0);
12127 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
12128 op1 = copy_to_mode_reg (mode1, op1);
12130 pat = GEN_FCN (icode) (target, op0, op1);
12131 if (! pat)
12132 return 0;
12133 emit_insn (pat);
12134 return target;
12137 /* Subroutine of arm_expand_builtin to take care of unop insns. */
12139 static rtx
12140 arm_expand_unop_builtin (enum insn_code icode,
12141 tree arglist, rtx target, int do_load)
12143 rtx pat;
12144 tree arg0 = TREE_VALUE (arglist);
12145 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12146 enum machine_mode tmode = insn_data[icode].operand[0].mode;
12147 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
12149 if (! target
12150 || GET_MODE (target) != tmode
12151 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12152 target = gen_reg_rtx (tmode);
12153 if (do_load)
12154 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
12155 else
12157 if (VECTOR_MODE_P (mode0))
12158 op0 = safe_vector_operand (op0, mode0);
12160 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12161 op0 = copy_to_mode_reg (mode0, op0);
12164 pat = GEN_FCN (icode) (target, op0);
12165 if (! pat)
12166 return 0;
12167 emit_insn (pat);
12168 return target;
12171 /* Expand an expression EXP that calls a built-in function,
12172 with result going to TARGET if that's convenient
12173 (and in mode MODE if that's convenient).
12174 SUBTARGET may be used as the target for computing one of EXP's operands.
12175 IGNORE is nonzero if the value is to be ignored. */
12177 static rtx
12178 arm_expand_builtin (tree exp,
12179 rtx target,
12180 rtx subtarget ATTRIBUTE_UNUSED,
12181 enum machine_mode mode ATTRIBUTE_UNUSED,
12182 int ignore ATTRIBUTE_UNUSED)
12184 const struct builtin_description * d;
12185 enum insn_code icode;
12186 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
12187 tree arglist = TREE_OPERAND (exp, 1);
12188 tree arg0;
12189 tree arg1;
12190 tree arg2;
12191 rtx op0;
12192 rtx op1;
12193 rtx op2;
12194 rtx pat;
12195 int fcode = DECL_FUNCTION_CODE (fndecl);
12196 size_t i;
12197 enum machine_mode tmode;
12198 enum machine_mode mode0;
12199 enum machine_mode mode1;
12200 enum machine_mode mode2;
12202 switch (fcode)
12204 case ARM_BUILTIN_TEXTRMSB:
12205 case ARM_BUILTIN_TEXTRMUB:
12206 case ARM_BUILTIN_TEXTRMSH:
12207 case ARM_BUILTIN_TEXTRMUH:
12208 case ARM_BUILTIN_TEXTRMSW:
12209 case ARM_BUILTIN_TEXTRMUW:
12210 icode = (fcode == ARM_BUILTIN_TEXTRMSB ? CODE_FOR_iwmmxt_textrmsb
12211 : fcode == ARM_BUILTIN_TEXTRMUB ? CODE_FOR_iwmmxt_textrmub
12212 : fcode == ARM_BUILTIN_TEXTRMSH ? CODE_FOR_iwmmxt_textrmsh
12213 : fcode == ARM_BUILTIN_TEXTRMUH ? CODE_FOR_iwmmxt_textrmuh
12214 : CODE_FOR_iwmmxt_textrmw);
12216 arg0 = TREE_VALUE (arglist);
12217 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12218 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12219 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12220 tmode = insn_data[icode].operand[0].mode;
12221 mode0 = insn_data[icode].operand[1].mode;
12222 mode1 = insn_data[icode].operand[2].mode;
12224 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12225 op0 = copy_to_mode_reg (mode0, op0);
12226 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
12228 /* @@@ better error message */
12229 error ("selector must be an immediate");
12230 return gen_reg_rtx (tmode);
12232 if (target == 0
12233 || GET_MODE (target) != tmode
12234 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12235 target = gen_reg_rtx (tmode);
12236 pat = GEN_FCN (icode) (target, op0, op1);
12237 if (! pat)
12238 return 0;
12239 emit_insn (pat);
12240 return target;
12242 case ARM_BUILTIN_TINSRB:
12243 case ARM_BUILTIN_TINSRH:
12244 case ARM_BUILTIN_TINSRW:
12245 icode = (fcode == ARM_BUILTIN_TINSRB ? CODE_FOR_iwmmxt_tinsrb
12246 : fcode == ARM_BUILTIN_TINSRH ? CODE_FOR_iwmmxt_tinsrh
12247 : CODE_FOR_iwmmxt_tinsrw);
12248 arg0 = TREE_VALUE (arglist);
12249 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12250 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
12251 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12252 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12253 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
12254 tmode = insn_data[icode].operand[0].mode;
12255 mode0 = insn_data[icode].operand[1].mode;
12256 mode1 = insn_data[icode].operand[2].mode;
12257 mode2 = insn_data[icode].operand[3].mode;
12259 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12260 op0 = copy_to_mode_reg (mode0, op0);
12261 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
12262 op1 = copy_to_mode_reg (mode1, op1);
12263 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
12265 /* @@@ better error message */
12266 error ("selector must be an immediate");
12267 return const0_rtx;
12269 if (target == 0
12270 || GET_MODE (target) != tmode
12271 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12272 target = gen_reg_rtx (tmode);
12273 pat = GEN_FCN (icode) (target, op0, op1, op2);
12274 if (! pat)
12275 return 0;
12276 emit_insn (pat);
12277 return target;
12279 case ARM_BUILTIN_SETWCX:
12280 arg0 = TREE_VALUE (arglist);
12281 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12282 op0 = force_reg (SImode, expand_expr (arg0, NULL_RTX, VOIDmode, 0));
12283 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12284 emit_insn (gen_iwmmxt_tmcr (op1, op0));
12285 return 0;
12287 case ARM_BUILTIN_GETWCX:
12288 arg0 = TREE_VALUE (arglist);
12289 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12290 target = gen_reg_rtx (SImode);
12291 emit_insn (gen_iwmmxt_tmrc (target, op0));
12292 return target;
12294 case ARM_BUILTIN_WSHUFH:
12295 icode = CODE_FOR_iwmmxt_wshufh;
12296 arg0 = TREE_VALUE (arglist);
12297 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12298 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12299 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12300 tmode = insn_data[icode].operand[0].mode;
12301 mode1 = insn_data[icode].operand[1].mode;
12302 mode2 = insn_data[icode].operand[2].mode;
12304 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
12305 op0 = copy_to_mode_reg (mode1, op0);
12306 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
12308 /* @@@ better error message */
12309 error ("mask must be an immediate");
12310 return const0_rtx;
12312 if (target == 0
12313 || GET_MODE (target) != tmode
12314 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12315 target = gen_reg_rtx (tmode);
12316 pat = GEN_FCN (icode) (target, op0, op1);
12317 if (! pat)
12318 return 0;
12319 emit_insn (pat);
12320 return target;
12322 case ARM_BUILTIN_WSADB:
12323 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadb, arglist, target);
12324 case ARM_BUILTIN_WSADH:
12325 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadh, arglist, target);
12326 case ARM_BUILTIN_WSADBZ:
12327 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadbz, arglist, target);
12328 case ARM_BUILTIN_WSADHZ:
12329 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadhz, arglist, target);
12331 /* Several three-argument builtins. */
12332 case ARM_BUILTIN_WMACS:
12333 case ARM_BUILTIN_WMACU:
12334 case ARM_BUILTIN_WALIGN:
12335 case ARM_BUILTIN_TMIA:
12336 case ARM_BUILTIN_TMIAPH:
12337 case ARM_BUILTIN_TMIATT:
12338 case ARM_BUILTIN_TMIATB:
12339 case ARM_BUILTIN_TMIABT:
12340 case ARM_BUILTIN_TMIABB:
12341 icode = (fcode == ARM_BUILTIN_WMACS ? CODE_FOR_iwmmxt_wmacs
12342 : fcode == ARM_BUILTIN_WMACU ? CODE_FOR_iwmmxt_wmacu
12343 : fcode == ARM_BUILTIN_TMIA ? CODE_FOR_iwmmxt_tmia
12344 : fcode == ARM_BUILTIN_TMIAPH ? CODE_FOR_iwmmxt_tmiaph
12345 : fcode == ARM_BUILTIN_TMIABB ? CODE_FOR_iwmmxt_tmiabb
12346 : fcode == ARM_BUILTIN_TMIABT ? CODE_FOR_iwmmxt_tmiabt
12347 : fcode == ARM_BUILTIN_TMIATB ? CODE_FOR_iwmmxt_tmiatb
12348 : fcode == ARM_BUILTIN_TMIATT ? CODE_FOR_iwmmxt_tmiatt
12349 : CODE_FOR_iwmmxt_walign);
12350 arg0 = TREE_VALUE (arglist);
12351 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12352 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
12353 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12354 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12355 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
12356 tmode = insn_data[icode].operand[0].mode;
12357 mode0 = insn_data[icode].operand[1].mode;
12358 mode1 = insn_data[icode].operand[2].mode;
12359 mode2 = insn_data[icode].operand[3].mode;
12361 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12362 op0 = copy_to_mode_reg (mode0, op0);
12363 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
12364 op1 = copy_to_mode_reg (mode1, op1);
12365 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
12366 op2 = copy_to_mode_reg (mode2, op2);
12367 if (target == 0
12368 || GET_MODE (target) != tmode
12369 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12370 target = gen_reg_rtx (tmode);
12371 pat = GEN_FCN (icode) (target, op0, op1, op2);
12372 if (! pat)
12373 return 0;
12374 emit_insn (pat);
12375 return target;
12377 case ARM_BUILTIN_WZERO:
12378 target = gen_reg_rtx (DImode);
12379 emit_insn (gen_iwmmxt_clrdi (target));
12380 return target;
12382 default:
12383 break;
12386 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
12387 if (d->code == (const enum arm_builtins) fcode)
12388 return arm_expand_binop_builtin (d->icode, arglist, target);
12390 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
12391 if (d->code == (const enum arm_builtins) fcode)
12392 return arm_expand_unop_builtin (d->icode, arglist, target, 0);
12394 /* @@@ Should really do something sensible here. */
12395 return NULL_RTX;
12398 /* Return the number (counting from 0) of
12399 the least significant set bit in MASK. */
12401 inline static int
12402 number_of_first_bit_set (unsigned mask)
12404 int bit;
12406 for (bit = 0;
12407 (mask & (1 << bit)) == 0;
12408 ++bit)
12409 continue;
12411 return bit;
12414 /* Emit code to push or pop registers to or from the stack. F is the
12415 assembly file. MASK is the registers to push or pop. PUSH is
12416 nonzero if we should push, and zero if we should pop. For debugging
12417 output, if pushing, adjust CFA_OFFSET by the amount of space added
12418 to the stack. REAL_REGS should have the same number of bits set as
12419 MASK, and will be used instead (in the same order) to describe which
12420 registers were saved - this is used to mark the save slots when we
12421 push high registers after moving them to low registers. */
12422 static void
12423 thumb_pushpop (FILE *f, unsigned long mask, int push, int *cfa_offset,
12424 unsigned long real_regs)
12426 int regno;
12427 int lo_mask = mask & 0xFF;
12428 int pushed_words = 0;
12430 gcc_assert (mask);
12432 if (lo_mask == 0 && !push && (mask & (1 << PC_REGNUM)))
12434 /* Special case. Do not generate a POP PC statement here, do it in
12435 thumb_exit() */
12436 thumb_exit (f, -1);
12437 return;
12440 fprintf (f, "\t%s\t{", push ? "push" : "pop");
12442 /* Look at the low registers first. */
12443 for (regno = 0; regno <= LAST_LO_REGNUM; regno++, lo_mask >>= 1)
12445 if (lo_mask & 1)
12447 asm_fprintf (f, "%r", regno);
12449 if ((lo_mask & ~1) != 0)
12450 fprintf (f, ", ");
12452 pushed_words++;
12456 if (push && (mask & (1 << LR_REGNUM)))
12458 /* Catch pushing the LR. */
12459 if (mask & 0xFF)
12460 fprintf (f, ", ");
12462 asm_fprintf (f, "%r", LR_REGNUM);
12464 pushed_words++;
12466 else if (!push && (mask & (1 << PC_REGNUM)))
12468 /* Catch popping the PC. */
12469 if (TARGET_INTERWORK || TARGET_BACKTRACE
12470 || current_function_calls_eh_return)
12472 /* The PC is never poped directly, instead
12473 it is popped into r3 and then BX is used. */
12474 fprintf (f, "}\n");
12476 thumb_exit (f, -1);
12478 return;
12480 else
12482 if (mask & 0xFF)
12483 fprintf (f, ", ");
12485 asm_fprintf (f, "%r", PC_REGNUM);
12489 fprintf (f, "}\n");
12491 if (push && pushed_words && dwarf2out_do_frame ())
12493 char *l = dwarf2out_cfi_label ();
12494 int pushed_mask = real_regs;
12496 *cfa_offset += pushed_words * 4;
12497 dwarf2out_def_cfa (l, SP_REGNUM, *cfa_offset);
12499 pushed_words = 0;
12500 pushed_mask = real_regs;
12501 for (regno = 0; regno <= 14; regno++, pushed_mask >>= 1)
12503 if (pushed_mask & 1)
12504 dwarf2out_reg_save (l, regno, 4 * pushed_words++ - *cfa_offset);
12509 /* Generate code to return from a thumb function.
12510 If 'reg_containing_return_addr' is -1, then the return address is
12511 actually on the stack, at the stack pointer. */
12512 static void
12513 thumb_exit (FILE *f, int reg_containing_return_addr)
12515 unsigned regs_available_for_popping;
12516 unsigned regs_to_pop;
12517 int pops_needed;
12518 unsigned available;
12519 unsigned required;
12520 int mode;
12521 int size;
12522 int restore_a4 = FALSE;
12524 /* Compute the registers we need to pop. */
12525 regs_to_pop = 0;
12526 pops_needed = 0;
12528 if (reg_containing_return_addr == -1)
12530 regs_to_pop |= 1 << LR_REGNUM;
12531 ++pops_needed;
12534 if (TARGET_BACKTRACE)
12536 /* Restore the (ARM) frame pointer and stack pointer. */
12537 regs_to_pop |= (1 << ARM_HARD_FRAME_POINTER_REGNUM) | (1 << SP_REGNUM);
12538 pops_needed += 2;
12541 /* If there is nothing to pop then just emit the BX instruction and
12542 return. */
12543 if (pops_needed == 0)
12545 if (current_function_calls_eh_return)
12546 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
12548 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
12549 return;
12551 /* Otherwise if we are not supporting interworking and we have not created
12552 a backtrace structure and the function was not entered in ARM mode then
12553 just pop the return address straight into the PC. */
12554 else if (!TARGET_INTERWORK
12555 && !TARGET_BACKTRACE
12556 && !is_called_in_ARM_mode (current_function_decl)
12557 && !current_function_calls_eh_return)
12559 asm_fprintf (f, "\tpop\t{%r}\n", PC_REGNUM);
12560 return;
12563 /* Find out how many of the (return) argument registers we can corrupt. */
12564 regs_available_for_popping = 0;
12566 /* If returning via __builtin_eh_return, the bottom three registers
12567 all contain information needed for the return. */
12568 if (current_function_calls_eh_return)
12569 size = 12;
12570 else
12572 /* If we can deduce the registers used from the function's
12573 return value. This is more reliable that examining
12574 regs_ever_live[] because that will be set if the register is
12575 ever used in the function, not just if the register is used
12576 to hold a return value. */
12578 if (current_function_return_rtx != 0)
12579 mode = GET_MODE (current_function_return_rtx);
12580 else
12581 mode = DECL_MODE (DECL_RESULT (current_function_decl));
12583 size = GET_MODE_SIZE (mode);
12585 if (size == 0)
12587 /* In a void function we can use any argument register.
12588 In a function that returns a structure on the stack
12589 we can use the second and third argument registers. */
12590 if (mode == VOIDmode)
12591 regs_available_for_popping =
12592 (1 << ARG_REGISTER (1))
12593 | (1 << ARG_REGISTER (2))
12594 | (1 << ARG_REGISTER (3));
12595 else
12596 regs_available_for_popping =
12597 (1 << ARG_REGISTER (2))
12598 | (1 << ARG_REGISTER (3));
12600 else if (size <= 4)
12601 regs_available_for_popping =
12602 (1 << ARG_REGISTER (2))
12603 | (1 << ARG_REGISTER (3));
12604 else if (size <= 8)
12605 regs_available_for_popping =
12606 (1 << ARG_REGISTER (3));
12609 /* Match registers to be popped with registers into which we pop them. */
12610 for (available = regs_available_for_popping,
12611 required = regs_to_pop;
12612 required != 0 && available != 0;
12613 available &= ~(available & - available),
12614 required &= ~(required & - required))
12615 -- pops_needed;
12617 /* If we have any popping registers left over, remove them. */
12618 if (available > 0)
12619 regs_available_for_popping &= ~available;
12621 /* Otherwise if we need another popping register we can use
12622 the fourth argument register. */
12623 else if (pops_needed)
12625 /* If we have not found any free argument registers and
12626 reg a4 contains the return address, we must move it. */
12627 if (regs_available_for_popping == 0
12628 && reg_containing_return_addr == LAST_ARG_REGNUM)
12630 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
12631 reg_containing_return_addr = LR_REGNUM;
12633 else if (size > 12)
12635 /* Register a4 is being used to hold part of the return value,
12636 but we have dire need of a free, low register. */
12637 restore_a4 = TRUE;
12639 asm_fprintf (f, "\tmov\t%r, %r\n",IP_REGNUM, LAST_ARG_REGNUM);
12642 if (reg_containing_return_addr != LAST_ARG_REGNUM)
12644 /* The fourth argument register is available. */
12645 regs_available_for_popping |= 1 << LAST_ARG_REGNUM;
12647 --pops_needed;
12651 /* Pop as many registers as we can. */
12652 thumb_pushpop (f, regs_available_for_popping, FALSE, NULL,
12653 regs_available_for_popping);
12655 /* Process the registers we popped. */
12656 if (reg_containing_return_addr == -1)
12658 /* The return address was popped into the lowest numbered register. */
12659 regs_to_pop &= ~(1 << LR_REGNUM);
12661 reg_containing_return_addr =
12662 number_of_first_bit_set (regs_available_for_popping);
12664 /* Remove this register for the mask of available registers, so that
12665 the return address will not be corrupted by further pops. */
12666 regs_available_for_popping &= ~(1 << reg_containing_return_addr);
12669 /* If we popped other registers then handle them here. */
12670 if (regs_available_for_popping)
12672 int frame_pointer;
12674 /* Work out which register currently contains the frame pointer. */
12675 frame_pointer = number_of_first_bit_set (regs_available_for_popping);
12677 /* Move it into the correct place. */
12678 asm_fprintf (f, "\tmov\t%r, %r\n",
12679 ARM_HARD_FRAME_POINTER_REGNUM, frame_pointer);
12681 /* (Temporarily) remove it from the mask of popped registers. */
12682 regs_available_for_popping &= ~(1 << frame_pointer);
12683 regs_to_pop &= ~(1 << ARM_HARD_FRAME_POINTER_REGNUM);
12685 if (regs_available_for_popping)
12687 int stack_pointer;
12689 /* We popped the stack pointer as well,
12690 find the register that contains it. */
12691 stack_pointer = number_of_first_bit_set (regs_available_for_popping);
12693 /* Move it into the stack register. */
12694 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, stack_pointer);
12696 /* At this point we have popped all necessary registers, so
12697 do not worry about restoring regs_available_for_popping
12698 to its correct value:
12700 assert (pops_needed == 0)
12701 assert (regs_available_for_popping == (1 << frame_pointer))
12702 assert (regs_to_pop == (1 << STACK_POINTER)) */
12704 else
12706 /* Since we have just move the popped value into the frame
12707 pointer, the popping register is available for reuse, and
12708 we know that we still have the stack pointer left to pop. */
12709 regs_available_for_popping |= (1 << frame_pointer);
12713 /* If we still have registers left on the stack, but we no longer have
12714 any registers into which we can pop them, then we must move the return
12715 address into the link register and make available the register that
12716 contained it. */
12717 if (regs_available_for_popping == 0 && pops_needed > 0)
12719 regs_available_for_popping |= 1 << reg_containing_return_addr;
12721 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM,
12722 reg_containing_return_addr);
12724 reg_containing_return_addr = LR_REGNUM;
12727 /* If we have registers left on the stack then pop some more.
12728 We know that at most we will want to pop FP and SP. */
12729 if (pops_needed > 0)
12731 int popped_into;
12732 int move_to;
12734 thumb_pushpop (f, regs_available_for_popping, FALSE, NULL,
12735 regs_available_for_popping);
12737 /* We have popped either FP or SP.
12738 Move whichever one it is into the correct register. */
12739 popped_into = number_of_first_bit_set (regs_available_for_popping);
12740 move_to = number_of_first_bit_set (regs_to_pop);
12742 asm_fprintf (f, "\tmov\t%r, %r\n", move_to, popped_into);
12744 regs_to_pop &= ~(1 << move_to);
12746 --pops_needed;
12749 /* If we still have not popped everything then we must have only
12750 had one register available to us and we are now popping the SP. */
12751 if (pops_needed > 0)
12753 int popped_into;
12755 thumb_pushpop (f, regs_available_for_popping, FALSE, NULL,
12756 regs_available_for_popping);
12758 popped_into = number_of_first_bit_set (regs_available_for_popping);
12760 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, popped_into);
12762 assert (regs_to_pop == (1 << STACK_POINTER))
12763 assert (pops_needed == 1)
12767 /* If necessary restore the a4 register. */
12768 if (restore_a4)
12770 if (reg_containing_return_addr != LR_REGNUM)
12772 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
12773 reg_containing_return_addr = LR_REGNUM;
12776 asm_fprintf (f, "\tmov\t%r, %r\n", LAST_ARG_REGNUM, IP_REGNUM);
12779 if (current_function_calls_eh_return)
12780 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
12782 /* Return to caller. */
12783 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
12787 void
12788 thumb_final_prescan_insn (rtx insn)
12790 if (flag_print_asm_name)
12791 asm_fprintf (asm_out_file, "%@ 0x%04x\n",
12792 INSN_ADDRESSES (INSN_UID (insn)));
12796 thumb_shiftable_const (unsigned HOST_WIDE_INT val)
12798 unsigned HOST_WIDE_INT mask = 0xff;
12799 int i;
12801 if (val == 0) /* XXX */
12802 return 0;
12804 for (i = 0; i < 25; i++)
12805 if ((val & (mask << i)) == val)
12806 return 1;
12808 return 0;
12811 /* Returns nonzero if the current function contains,
12812 or might contain a far jump. */
12813 static int
12814 thumb_far_jump_used_p (void)
12816 rtx insn;
12818 /* This test is only important for leaf functions. */
12819 /* assert (!leaf_function_p ()); */
12821 /* If we have already decided that far jumps may be used,
12822 do not bother checking again, and always return true even if
12823 it turns out that they are not being used. Once we have made
12824 the decision that far jumps are present (and that hence the link
12825 register will be pushed onto the stack) we cannot go back on it. */
12826 if (cfun->machine->far_jump_used)
12827 return 1;
12829 /* If this function is not being called from the prologue/epilogue
12830 generation code then it must be being called from the
12831 INITIAL_ELIMINATION_OFFSET macro. */
12832 if (!(ARM_DOUBLEWORD_ALIGN || reload_completed))
12834 /* In this case we know that we are being asked about the elimination
12835 of the arg pointer register. If that register is not being used,
12836 then there are no arguments on the stack, and we do not have to
12837 worry that a far jump might force the prologue to push the link
12838 register, changing the stack offsets. In this case we can just
12839 return false, since the presence of far jumps in the function will
12840 not affect stack offsets.
12842 If the arg pointer is live (or if it was live, but has now been
12843 eliminated and so set to dead) then we do have to test to see if
12844 the function might contain a far jump. This test can lead to some
12845 false negatives, since before reload is completed, then length of
12846 branch instructions is not known, so gcc defaults to returning their
12847 longest length, which in turn sets the far jump attribute to true.
12849 A false negative will not result in bad code being generated, but it
12850 will result in a needless push and pop of the link register. We
12851 hope that this does not occur too often.
12853 If we need doubleword stack alignment this could affect the other
12854 elimination offsets so we can't risk getting it wrong. */
12855 if (regs_ever_live [ARG_POINTER_REGNUM])
12856 cfun->machine->arg_pointer_live = 1;
12857 else if (!cfun->machine->arg_pointer_live)
12858 return 0;
12861 /* Check to see if the function contains a branch
12862 insn with the far jump attribute set. */
12863 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
12865 if (GET_CODE (insn) == JUMP_INSN
12866 /* Ignore tablejump patterns. */
12867 && GET_CODE (PATTERN (insn)) != ADDR_VEC
12868 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC
12869 && get_attr_far_jump (insn) == FAR_JUMP_YES
12872 /* Record the fact that we have decided that
12873 the function does use far jumps. */
12874 cfun->machine->far_jump_used = 1;
12875 return 1;
12879 return 0;
12882 /* Return nonzero if FUNC must be entered in ARM mode. */
12884 is_called_in_ARM_mode (tree func)
12886 gcc_assert (TREE_CODE (func) == FUNCTION_DECL);
12888 /* Ignore the problem about functions whose address is taken. */
12889 if (TARGET_CALLEE_INTERWORKING && TREE_PUBLIC (func))
12890 return TRUE;
12892 #ifdef ARM_PE
12893 return lookup_attribute ("interfacearm", DECL_ATTRIBUTES (func)) != NULL_TREE;
12894 #else
12895 return FALSE;
12896 #endif
12899 /* The bits which aren't usefully expanded as rtl. */
12900 const char *
12901 thumb_unexpanded_epilogue (void)
12903 int regno;
12904 unsigned long live_regs_mask = 0;
12905 int high_regs_pushed = 0;
12906 int had_to_push_lr;
12907 int size;
12908 int mode;
12910 if (return_used_this_function)
12911 return "";
12913 if (IS_NAKED (arm_current_func_type ()))
12914 return "";
12916 live_regs_mask = thumb_compute_save_reg_mask ();
12917 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
12919 /* If we can deduce the registers used from the function's return value.
12920 This is more reliable that examining regs_ever_live[] because that
12921 will be set if the register is ever used in the function, not just if
12922 the register is used to hold a return value. */
12924 if (current_function_return_rtx != 0)
12925 mode = GET_MODE (current_function_return_rtx);
12926 else
12927 mode = DECL_MODE (DECL_RESULT (current_function_decl));
12929 size = GET_MODE_SIZE (mode);
12931 /* The prolog may have pushed some high registers to use as
12932 work registers. e.g. the testsuite file:
12933 gcc/testsuite/gcc/gcc.c-torture/execute/complex-2.c
12934 compiles to produce:
12935 push {r4, r5, r6, r7, lr}
12936 mov r7, r9
12937 mov r6, r8
12938 push {r6, r7}
12939 as part of the prolog. We have to undo that pushing here. */
12941 if (high_regs_pushed)
12943 unsigned long mask = live_regs_mask & 0xff;
12944 int next_hi_reg;
12946 /* The available low registers depend on the size of the value we are
12947 returning. */
12948 if (size <= 12)
12949 mask |= 1 << 3;
12950 if (size <= 8)
12951 mask |= 1 << 2;
12953 if (mask == 0)
12954 /* Oh dear! We have no low registers into which we can pop
12955 high registers! */
12956 internal_error
12957 ("no low registers available for popping high registers");
12959 for (next_hi_reg = 8; next_hi_reg < 13; next_hi_reg++)
12960 if (live_regs_mask & (1 << next_hi_reg))
12961 break;
12963 while (high_regs_pushed)
12965 /* Find lo register(s) into which the high register(s) can
12966 be popped. */
12967 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
12969 if (mask & (1 << regno))
12970 high_regs_pushed--;
12971 if (high_regs_pushed == 0)
12972 break;
12975 mask &= (2 << regno) - 1; /* A noop if regno == 8 */
12977 /* Pop the values into the low register(s). */
12978 thumb_pushpop (asm_out_file, mask, 0, NULL, mask);
12980 /* Move the value(s) into the high registers. */
12981 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
12983 if (mask & (1 << regno))
12985 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", next_hi_reg,
12986 regno);
12988 for (next_hi_reg++; next_hi_reg < 13; next_hi_reg++)
12989 if (live_regs_mask & (1 << next_hi_reg))
12990 break;
12994 live_regs_mask &= ~0x0f00;
12997 had_to_push_lr = (live_regs_mask & (1 << LR_REGNUM)) != 0;
12998 live_regs_mask &= 0xff;
13000 if (current_function_pretend_args_size == 0 || TARGET_BACKTRACE)
13002 /* Pop the return address into the PC. */
13003 if (had_to_push_lr)
13004 live_regs_mask |= 1 << PC_REGNUM;
13006 /* Either no argument registers were pushed or a backtrace
13007 structure was created which includes an adjusted stack
13008 pointer, so just pop everything. */
13009 if (live_regs_mask)
13010 thumb_pushpop (asm_out_file, live_regs_mask, FALSE, NULL,
13011 live_regs_mask);
13013 /* We have either just popped the return address into the
13014 PC or it is was kept in LR for the entire function. */
13015 if (!had_to_push_lr)
13016 thumb_exit (asm_out_file, LR_REGNUM);
13018 else
13020 /* Pop everything but the return address. */
13021 if (live_regs_mask)
13022 thumb_pushpop (asm_out_file, live_regs_mask, FALSE, NULL,
13023 live_regs_mask);
13025 if (had_to_push_lr)
13027 if (size > 12)
13029 /* We have no free low regs, so save one. */
13030 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", IP_REGNUM,
13031 LAST_ARG_REGNUM);
13034 /* Get the return address into a temporary register. */
13035 thumb_pushpop (asm_out_file, 1 << LAST_ARG_REGNUM, 0, NULL,
13036 1 << LAST_ARG_REGNUM);
13038 if (size > 12)
13040 /* Move the return address to lr. */
13041 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LR_REGNUM,
13042 LAST_ARG_REGNUM);
13043 /* Restore the low register. */
13044 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LAST_ARG_REGNUM,
13045 IP_REGNUM);
13046 regno = LR_REGNUM;
13048 else
13049 regno = LAST_ARG_REGNUM;
13051 else
13052 regno = LR_REGNUM;
13054 /* Remove the argument registers that were pushed onto the stack. */
13055 asm_fprintf (asm_out_file, "\tadd\t%r, %r, #%d\n",
13056 SP_REGNUM, SP_REGNUM,
13057 current_function_pretend_args_size);
13059 thumb_exit (asm_out_file, regno);
13062 return "";
13065 /* Functions to save and restore machine-specific function data. */
13066 static struct machine_function *
13067 arm_init_machine_status (void)
13069 struct machine_function *machine;
13070 machine = (machine_function *) ggc_alloc_cleared (sizeof (machine_function));
13072 #if ARM_FT_UNKNOWN != 0
13073 machine->func_type = ARM_FT_UNKNOWN;
13074 #endif
13075 return machine;
13078 /* Return an RTX indicating where the return address to the
13079 calling function can be found. */
13081 arm_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
13083 if (count != 0)
13084 return NULL_RTX;
13086 return get_hard_reg_initial_val (Pmode, LR_REGNUM);
13089 /* Do anything needed before RTL is emitted for each function. */
13090 void
13091 arm_init_expanders (void)
13093 /* Arrange to initialize and mark the machine per-function status. */
13094 init_machine_status = arm_init_machine_status;
13096 /* This is to stop the combine pass optimizing away the alignment
13097 adjustment of va_arg. */
13098 /* ??? It is claimed that this should not be necessary. */
13099 if (cfun)
13100 mark_reg_pointer (arg_pointer_rtx, PARM_BOUNDARY);
13104 /* Like arm_compute_initial_elimination offset. Simpler because
13105 THUMB_HARD_FRAME_POINTER isn't actually the ABI specified frame pointer. */
13107 HOST_WIDE_INT
13108 thumb_compute_initial_elimination_offset (unsigned int from, unsigned int to)
13110 arm_stack_offsets *offsets;
13112 offsets = arm_get_frame_offsets ();
13114 switch (from)
13116 case ARG_POINTER_REGNUM:
13117 switch (to)
13119 case STACK_POINTER_REGNUM:
13120 return offsets->outgoing_args - offsets->saved_args;
13122 case FRAME_POINTER_REGNUM:
13123 return offsets->soft_frame - offsets->saved_args;
13125 case THUMB_HARD_FRAME_POINTER_REGNUM:
13126 case ARM_HARD_FRAME_POINTER_REGNUM:
13127 return offsets->saved_regs - offsets->saved_args;
13129 default:
13130 gcc_unreachable ();
13132 break;
13134 case FRAME_POINTER_REGNUM:
13135 switch (to)
13137 case STACK_POINTER_REGNUM:
13138 return offsets->outgoing_args - offsets->soft_frame;
13140 case THUMB_HARD_FRAME_POINTER_REGNUM:
13141 case ARM_HARD_FRAME_POINTER_REGNUM:
13142 return offsets->saved_regs - offsets->soft_frame;
13144 default:
13145 gcc_unreachable ();
13147 break;
13149 default:
13150 gcc_unreachable ();
13155 /* Generate the rest of a function's prologue. */
13156 void
13157 thumb_expand_prologue (void)
13159 rtx insn, dwarf;
13161 HOST_WIDE_INT amount;
13162 arm_stack_offsets *offsets;
13163 unsigned long func_type;
13164 int regno;
13165 unsigned long live_regs_mask;
13167 func_type = arm_current_func_type ();
13169 /* Naked functions don't have prologues. */
13170 if (IS_NAKED (func_type))
13171 return;
13173 if (IS_INTERRUPT (func_type))
13175 error ("interrupt Service Routines cannot be coded in Thumb mode");
13176 return;
13179 live_regs_mask = thumb_compute_save_reg_mask ();
13180 /* Load the pic register before setting the frame pointer,
13181 so we can use r7 as a temporary work register. */
13182 if (flag_pic)
13183 arm_load_pic_register (thumb_find_work_register (live_regs_mask));
13185 offsets = arm_get_frame_offsets ();
13187 if (frame_pointer_needed)
13189 insn = emit_insn (gen_movsi (hard_frame_pointer_rtx,
13190 stack_pointer_rtx));
13191 RTX_FRAME_RELATED_P (insn) = 1;
13193 else if (CALLER_INTERWORKING_SLOT_SIZE > 0)
13194 emit_move_insn (gen_rtx_REG (Pmode, ARM_HARD_FRAME_POINTER_REGNUM),
13195 stack_pointer_rtx);
13197 amount = offsets->outgoing_args - offsets->saved_regs;
13198 if (amount)
13200 if (amount < 512)
13202 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
13203 GEN_INT (- amount)));
13204 RTX_FRAME_RELATED_P (insn) = 1;
13206 else
13208 rtx reg;
13210 /* The stack decrement is too big for an immediate value in a single
13211 insn. In theory we could issue multiple subtracts, but after
13212 three of them it becomes more space efficient to place the full
13213 value in the constant pool and load into a register. (Also the
13214 ARM debugger really likes to see only one stack decrement per
13215 function). So instead we look for a scratch register into which
13216 we can load the decrement, and then we subtract this from the
13217 stack pointer. Unfortunately on the thumb the only available
13218 scratch registers are the argument registers, and we cannot use
13219 these as they may hold arguments to the function. Instead we
13220 attempt to locate a call preserved register which is used by this
13221 function. If we can find one, then we know that it will have
13222 been pushed at the start of the prologue and so we can corrupt
13223 it now. */
13224 for (regno = LAST_ARG_REGNUM + 1; regno <= LAST_LO_REGNUM; regno++)
13225 if (live_regs_mask & (1 << regno)
13226 && !(frame_pointer_needed
13227 && (regno == THUMB_HARD_FRAME_POINTER_REGNUM)))
13228 break;
13230 if (regno > LAST_LO_REGNUM) /* Very unlikely. */
13232 rtx spare = gen_rtx_REG (SImode, IP_REGNUM);
13234 /* Choose an arbitrary, non-argument low register. */
13235 reg = gen_rtx_REG (SImode, LAST_LO_REGNUM);
13237 /* Save it by copying it into a high, scratch register. */
13238 emit_insn (gen_movsi (spare, reg));
13239 /* Add a USE to stop propagate_one_insn() from barfing. */
13240 emit_insn (gen_prologue_use (spare));
13242 /* Decrement the stack. */
13243 emit_insn (gen_movsi (reg, GEN_INT (- amount)));
13244 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
13245 stack_pointer_rtx, reg));
13246 RTX_FRAME_RELATED_P (insn) = 1;
13247 dwarf = gen_rtx_SET (SImode, stack_pointer_rtx,
13248 plus_constant (stack_pointer_rtx,
13249 -amount));
13250 RTX_FRAME_RELATED_P (dwarf) = 1;
13251 REG_NOTES (insn)
13252 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, dwarf,
13253 REG_NOTES (insn));
13255 /* Restore the low register's original value. */
13256 emit_insn (gen_movsi (reg, spare));
13258 /* Emit a USE of the restored scratch register, so that flow
13259 analysis will not consider the restore redundant. The
13260 register won't be used again in this function and isn't
13261 restored by the epilogue. */
13262 emit_insn (gen_prologue_use (reg));
13264 else
13266 reg = gen_rtx_REG (SImode, regno);
13268 emit_insn (gen_movsi (reg, GEN_INT (- amount)));
13270 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
13271 stack_pointer_rtx, reg));
13272 RTX_FRAME_RELATED_P (insn) = 1;
13273 dwarf = gen_rtx_SET (SImode, stack_pointer_rtx,
13274 plus_constant (stack_pointer_rtx,
13275 -amount));
13276 RTX_FRAME_RELATED_P (dwarf) = 1;
13277 REG_NOTES (insn)
13278 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, dwarf,
13279 REG_NOTES (insn));
13282 /* If the frame pointer is needed, emit a special barrier that
13283 will prevent the scheduler from moving stores to the frame
13284 before the stack adjustment. */
13285 if (frame_pointer_needed)
13286 emit_insn (gen_stack_tie (stack_pointer_rtx,
13287 hard_frame_pointer_rtx));
13290 if (current_function_profile || !TARGET_SCHED_PROLOG)
13291 emit_insn (gen_blockage ());
13293 cfun->machine->lr_save_eliminated = !thumb_force_lr_save ();
13294 if (live_regs_mask & 0xff)
13295 cfun->machine->lr_save_eliminated = 0;
13297 /* If the link register is being kept alive, with the return address in it,
13298 then make sure that it does not get reused by the ce2 pass. */
13299 if (cfun->machine->lr_save_eliminated)
13300 emit_insn (gen_prologue_use (gen_rtx_REG (SImode, LR_REGNUM)));
13304 void
13305 thumb_expand_epilogue (void)
13307 HOST_WIDE_INT amount;
13308 arm_stack_offsets *offsets;
13309 int regno;
13311 /* Naked functions don't have prologues. */
13312 if (IS_NAKED (arm_current_func_type ()))
13313 return;
13315 offsets = arm_get_frame_offsets ();
13316 amount = offsets->outgoing_args - offsets->saved_regs;
13318 if (frame_pointer_needed)
13319 emit_insn (gen_movsi (stack_pointer_rtx, hard_frame_pointer_rtx));
13320 else if (amount)
13322 if (amount < 512)
13323 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
13324 GEN_INT (amount)));
13325 else
13327 /* r3 is always free in the epilogue. */
13328 rtx reg = gen_rtx_REG (SImode, LAST_ARG_REGNUM);
13330 emit_insn (gen_movsi (reg, GEN_INT (amount)));
13331 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, reg));
13335 /* Emit a USE (stack_pointer_rtx), so that
13336 the stack adjustment will not be deleted. */
13337 emit_insn (gen_prologue_use (stack_pointer_rtx));
13339 if (current_function_profile || !TARGET_SCHED_PROLOG)
13340 emit_insn (gen_blockage ());
13342 /* Emit a clobber for each insn that will be restored in the epilogue,
13343 so that flow2 will get register lifetimes correct. */
13344 for (regno = 0; regno < 13; regno++)
13345 if (regs_ever_live[regno] && !call_used_regs[regno])
13346 emit_insn (gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, regno)));
13348 if (! regs_ever_live[LR_REGNUM])
13349 emit_insn (gen_rtx_USE (VOIDmode, gen_rtx_REG (SImode, LR_REGNUM)));
13352 static void
13353 thumb_output_function_prologue (FILE *f, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
13355 unsigned long live_regs_mask = 0;
13356 unsigned long l_mask;
13357 unsigned high_regs_pushed = 0;
13358 int cfa_offset = 0;
13359 int regno;
13361 if (IS_NAKED (arm_current_func_type ()))
13362 return;
13364 if (is_called_in_ARM_mode (current_function_decl))
13366 const char * name;
13368 gcc_assert (GET_CODE (DECL_RTL (current_function_decl)) == MEM);
13369 gcc_assert (GET_CODE (XEXP (DECL_RTL (current_function_decl), 0))
13370 == SYMBOL_REF);
13371 name = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
13373 /* Generate code sequence to switch us into Thumb mode. */
13374 /* The .code 32 directive has already been emitted by
13375 ASM_DECLARE_FUNCTION_NAME. */
13376 asm_fprintf (f, "\torr\t%r, %r, #1\n", IP_REGNUM, PC_REGNUM);
13377 asm_fprintf (f, "\tbx\t%r\n", IP_REGNUM);
13379 /* Generate a label, so that the debugger will notice the
13380 change in instruction sets. This label is also used by
13381 the assembler to bypass the ARM code when this function
13382 is called from a Thumb encoded function elsewhere in the
13383 same file. Hence the definition of STUB_NAME here must
13384 agree with the definition in gas/config/tc-arm.c. */
13386 #define STUB_NAME ".real_start_of"
13388 fprintf (f, "\t.code\t16\n");
13389 #ifdef ARM_PE
13390 if (arm_dllexport_name_p (name))
13391 name = arm_strip_name_encoding (name);
13392 #endif
13393 asm_fprintf (f, "\t.globl %s%U%s\n", STUB_NAME, name);
13394 fprintf (f, "\t.thumb_func\n");
13395 asm_fprintf (f, "%s%U%s:\n", STUB_NAME, name);
13398 if (current_function_pretend_args_size)
13400 if (cfun->machine->uses_anonymous_args)
13402 int num_pushes;
13404 fprintf (f, "\tpush\t{");
13406 num_pushes = ARM_NUM_INTS (current_function_pretend_args_size);
13408 for (regno = LAST_ARG_REGNUM + 1 - num_pushes;
13409 regno <= LAST_ARG_REGNUM;
13410 regno++)
13411 asm_fprintf (f, "%r%s", regno,
13412 regno == LAST_ARG_REGNUM ? "" : ", ");
13414 fprintf (f, "}\n");
13416 else
13417 asm_fprintf (f, "\tsub\t%r, %r, #%d\n",
13418 SP_REGNUM, SP_REGNUM,
13419 current_function_pretend_args_size);
13421 /* We don't need to record the stores for unwinding (would it
13422 help the debugger any if we did?), but record the change in
13423 the stack pointer. */
13424 if (dwarf2out_do_frame ())
13426 char *l = dwarf2out_cfi_label ();
13428 cfa_offset = cfa_offset + current_function_pretend_args_size;
13429 dwarf2out_def_cfa (l, SP_REGNUM, cfa_offset);
13433 /* Get the registers we are going to push. */
13434 live_regs_mask = thumb_compute_save_reg_mask ();
13435 /* Extract a mask of the ones we can give to the Thumb's push instruction. */
13436 l_mask = live_regs_mask & 0x40ff;
13437 /* Then count how many other high registers will need to be pushed. */
13438 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
13440 if (TARGET_BACKTRACE)
13442 unsigned offset;
13443 unsigned work_register;
13445 /* We have been asked to create a stack backtrace structure.
13446 The code looks like this:
13448 0 .align 2
13449 0 func:
13450 0 sub SP, #16 Reserve space for 4 registers.
13451 2 push {R7} Push low registers.
13452 4 add R7, SP, #20 Get the stack pointer before the push.
13453 6 str R7, [SP, #8] Store the stack pointer (before reserving the space).
13454 8 mov R7, PC Get hold of the start of this code plus 12.
13455 10 str R7, [SP, #16] Store it.
13456 12 mov R7, FP Get hold of the current frame pointer.
13457 14 str R7, [SP, #4] Store it.
13458 16 mov R7, LR Get hold of the current return address.
13459 18 str R7, [SP, #12] Store it.
13460 20 add R7, SP, #16 Point at the start of the backtrace structure.
13461 22 mov FP, R7 Put this value into the frame pointer. */
13463 work_register = thumb_find_work_register (live_regs_mask);
13465 asm_fprintf
13466 (f, "\tsub\t%r, %r, #16\t%@ Create stack backtrace structure\n",
13467 SP_REGNUM, SP_REGNUM);
13469 if (dwarf2out_do_frame ())
13471 char *l = dwarf2out_cfi_label ();
13473 cfa_offset = cfa_offset + 16;
13474 dwarf2out_def_cfa (l, SP_REGNUM, cfa_offset);
13477 if (l_mask)
13479 thumb_pushpop (f, l_mask, 1, &cfa_offset, l_mask);
13480 offset = bit_count (l_mask);
13482 else
13483 offset = 0;
13485 asm_fprintf (f, "\tadd\t%r, %r, #%d\n", work_register, SP_REGNUM,
13486 offset + 16 + current_function_pretend_args_size);
13488 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
13489 offset + 4);
13491 /* Make sure that the instruction fetching the PC is in the right place
13492 to calculate "start of backtrace creation code + 12". */
13493 if (l_mask)
13495 asm_fprintf (f, "\tmov\t%r, %r\n", work_register, PC_REGNUM);
13496 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
13497 offset + 12);
13498 asm_fprintf (f, "\tmov\t%r, %r\n", work_register,
13499 ARM_HARD_FRAME_POINTER_REGNUM);
13500 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
13501 offset);
13503 else
13505 asm_fprintf (f, "\tmov\t%r, %r\n", work_register,
13506 ARM_HARD_FRAME_POINTER_REGNUM);
13507 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
13508 offset);
13509 asm_fprintf (f, "\tmov\t%r, %r\n", work_register, PC_REGNUM);
13510 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
13511 offset + 12);
13514 asm_fprintf (f, "\tmov\t%r, %r\n", work_register, LR_REGNUM);
13515 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
13516 offset + 8);
13517 asm_fprintf (f, "\tadd\t%r, %r, #%d\n", work_register, SP_REGNUM,
13518 offset + 12);
13519 asm_fprintf (f, "\tmov\t%r, %r\t\t%@ Backtrace structure created\n",
13520 ARM_HARD_FRAME_POINTER_REGNUM, work_register);
13522 /* Optimisation: If we are not pushing any low registers but we are going
13523 to push some high registers then delay our first push. This will just
13524 be a push of LR and we can combine it with the push of the first high
13525 register. */
13526 else if ((l_mask & 0xff) != 0
13527 || (high_regs_pushed == 0 && l_mask))
13528 thumb_pushpop (f, l_mask, 1, &cfa_offset, l_mask);
13530 if (high_regs_pushed)
13532 unsigned pushable_regs;
13533 unsigned next_hi_reg;
13535 for (next_hi_reg = 12; next_hi_reg > LAST_LO_REGNUM; next_hi_reg--)
13536 if (live_regs_mask & (1 << next_hi_reg))
13537 break;
13539 pushable_regs = l_mask & 0xff;
13541 if (pushable_regs == 0)
13542 pushable_regs = 1 << thumb_find_work_register (live_regs_mask);
13544 while (high_regs_pushed > 0)
13546 unsigned long real_regs_mask = 0;
13548 for (regno = LAST_LO_REGNUM; regno >= 0; regno --)
13550 if (pushable_regs & (1 << regno))
13552 asm_fprintf (f, "\tmov\t%r, %r\n", regno, next_hi_reg);
13554 high_regs_pushed --;
13555 real_regs_mask |= (1 << next_hi_reg);
13557 if (high_regs_pushed)
13559 for (next_hi_reg --; next_hi_reg > LAST_LO_REGNUM;
13560 next_hi_reg --)
13561 if (live_regs_mask & (1 << next_hi_reg))
13562 break;
13564 else
13566 pushable_regs &= ~((1 << regno) - 1);
13567 break;
13572 /* If we had to find a work register and we have not yet
13573 saved the LR then add it to the list of regs to push. */
13574 if (l_mask == (1 << LR_REGNUM))
13576 thumb_pushpop (f, pushable_regs | (1 << LR_REGNUM),
13577 1, &cfa_offset,
13578 real_regs_mask | (1 << LR_REGNUM));
13579 l_mask = 0;
13581 else
13582 thumb_pushpop (f, pushable_regs, 1, &cfa_offset, real_regs_mask);
13587 /* Handle the case of a double word load into a low register from
13588 a computed memory address. The computed address may involve a
13589 register which is overwritten by the load. */
13590 const char *
13591 thumb_load_double_from_address (rtx *operands)
13593 rtx addr;
13594 rtx base;
13595 rtx offset;
13596 rtx arg1;
13597 rtx arg2;
13599 gcc_assert (GET_CODE (operands[0]) == REG);
13600 gcc_assert (GET_CODE (operands[1]) == MEM);
13602 /* Get the memory address. */
13603 addr = XEXP (operands[1], 0);
13605 /* Work out how the memory address is computed. */
13606 switch (GET_CODE (addr))
13608 case REG:
13609 operands[2] = gen_rtx_MEM (SImode,
13610 plus_constant (XEXP (operands[1], 0), 4));
13612 if (REGNO (operands[0]) == REGNO (addr))
13614 output_asm_insn ("ldr\t%H0, %2", operands);
13615 output_asm_insn ("ldr\t%0, %1", operands);
13617 else
13619 output_asm_insn ("ldr\t%0, %1", operands);
13620 output_asm_insn ("ldr\t%H0, %2", operands);
13622 break;
13624 case CONST:
13625 /* Compute <address> + 4 for the high order load. */
13626 operands[2] = gen_rtx_MEM (SImode,
13627 plus_constant (XEXP (operands[1], 0), 4));
13629 output_asm_insn ("ldr\t%0, %1", operands);
13630 output_asm_insn ("ldr\t%H0, %2", operands);
13631 break;
13633 case PLUS:
13634 arg1 = XEXP (addr, 0);
13635 arg2 = XEXP (addr, 1);
13637 if (CONSTANT_P (arg1))
13638 base = arg2, offset = arg1;
13639 else
13640 base = arg1, offset = arg2;
13642 gcc_assert (GET_CODE (base) == REG);
13644 /* Catch the case of <address> = <reg> + <reg> */
13645 if (GET_CODE (offset) == REG)
13647 int reg_offset = REGNO (offset);
13648 int reg_base = REGNO (base);
13649 int reg_dest = REGNO (operands[0]);
13651 /* Add the base and offset registers together into the
13652 higher destination register. */
13653 asm_fprintf (asm_out_file, "\tadd\t%r, %r, %r",
13654 reg_dest + 1, reg_base, reg_offset);
13656 /* Load the lower destination register from the address in
13657 the higher destination register. */
13658 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #0]",
13659 reg_dest, reg_dest + 1);
13661 /* Load the higher destination register from its own address
13662 plus 4. */
13663 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #4]",
13664 reg_dest + 1, reg_dest + 1);
13666 else
13668 /* Compute <address> + 4 for the high order load. */
13669 operands[2] = gen_rtx_MEM (SImode,
13670 plus_constant (XEXP (operands[1], 0), 4));
13672 /* If the computed address is held in the low order register
13673 then load the high order register first, otherwise always
13674 load the low order register first. */
13675 if (REGNO (operands[0]) == REGNO (base))
13677 output_asm_insn ("ldr\t%H0, %2", operands);
13678 output_asm_insn ("ldr\t%0, %1", operands);
13680 else
13682 output_asm_insn ("ldr\t%0, %1", operands);
13683 output_asm_insn ("ldr\t%H0, %2", operands);
13686 break;
13688 case LABEL_REF:
13689 /* With no registers to worry about we can just load the value
13690 directly. */
13691 operands[2] = gen_rtx_MEM (SImode,
13692 plus_constant (XEXP (operands[1], 0), 4));
13694 output_asm_insn ("ldr\t%H0, %2", operands);
13695 output_asm_insn ("ldr\t%0, %1", operands);
13696 break;
13698 default:
13699 gcc_unreachable ();
13702 return "";
13705 const char *
13706 thumb_output_move_mem_multiple (int n, rtx *operands)
13708 rtx tmp;
13710 switch (n)
13712 case 2:
13713 if (REGNO (operands[4]) > REGNO (operands[5]))
13715 tmp = operands[4];
13716 operands[4] = operands[5];
13717 operands[5] = tmp;
13719 output_asm_insn ("ldmia\t%1!, {%4, %5}", operands);
13720 output_asm_insn ("stmia\t%0!, {%4, %5}", operands);
13721 break;
13723 case 3:
13724 if (REGNO (operands[4]) > REGNO (operands[5]))
13726 tmp = operands[4];
13727 operands[4] = operands[5];
13728 operands[5] = tmp;
13730 if (REGNO (operands[5]) > REGNO (operands[6]))
13732 tmp = operands[5];
13733 operands[5] = operands[6];
13734 operands[6] = tmp;
13736 if (REGNO (operands[4]) > REGNO (operands[5]))
13738 tmp = operands[4];
13739 operands[4] = operands[5];
13740 operands[5] = tmp;
13743 output_asm_insn ("ldmia\t%1!, {%4, %5, %6}", operands);
13744 output_asm_insn ("stmia\t%0!, {%4, %5, %6}", operands);
13745 break;
13747 default:
13748 gcc_unreachable ();
13751 return "";
13754 /* Output a call-via instruction for thumb state. */
13755 const char *
13756 thumb_call_via_reg (rtx reg)
13758 int regno = REGNO (reg);
13759 rtx *labelp;
13761 gcc_assert (regno < LR_REGNUM);
13763 /* If we are in the normal text section we can use a single instance
13764 per compilation unit. If we are doing function sections, then we need
13765 an entry per section, since we can't rely on reachability. */
13766 if (in_text_section ())
13768 thumb_call_reg_needed = 1;
13770 if (thumb_call_via_label[regno] == NULL)
13771 thumb_call_via_label[regno] = gen_label_rtx ();
13772 labelp = thumb_call_via_label + regno;
13774 else
13776 if (cfun->machine->call_via[regno] == NULL)
13777 cfun->machine->call_via[regno] = gen_label_rtx ();
13778 labelp = cfun->machine->call_via + regno;
13781 output_asm_insn ("bl\t%a0", labelp);
13782 return "";
13785 /* Routines for generating rtl. */
13786 void
13787 thumb_expand_movmemqi (rtx *operands)
13789 rtx out = copy_to_mode_reg (SImode, XEXP (operands[0], 0));
13790 rtx in = copy_to_mode_reg (SImode, XEXP (operands[1], 0));
13791 HOST_WIDE_INT len = INTVAL (operands[2]);
13792 HOST_WIDE_INT offset = 0;
13794 while (len >= 12)
13796 emit_insn (gen_movmem12b (out, in, out, in));
13797 len -= 12;
13800 if (len >= 8)
13802 emit_insn (gen_movmem8b (out, in, out, in));
13803 len -= 8;
13806 if (len >= 4)
13808 rtx reg = gen_reg_rtx (SImode);
13809 emit_insn (gen_movsi (reg, gen_rtx_MEM (SImode, in)));
13810 emit_insn (gen_movsi (gen_rtx_MEM (SImode, out), reg));
13811 len -= 4;
13812 offset += 4;
13815 if (len >= 2)
13817 rtx reg = gen_reg_rtx (HImode);
13818 emit_insn (gen_movhi (reg, gen_rtx_MEM (HImode,
13819 plus_constant (in, offset))));
13820 emit_insn (gen_movhi (gen_rtx_MEM (HImode, plus_constant (out, offset)),
13821 reg));
13822 len -= 2;
13823 offset += 2;
13826 if (len)
13828 rtx reg = gen_reg_rtx (QImode);
13829 emit_insn (gen_movqi (reg, gen_rtx_MEM (QImode,
13830 plus_constant (in, offset))));
13831 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (out, offset)),
13832 reg));
13836 void
13837 thumb_reload_out_hi (rtx *operands)
13839 emit_insn (gen_thumb_movhi_clobber (operands[0], operands[1], operands[2]));
13842 /* Handle reading a half-word from memory during reload. */
13843 void
13844 thumb_reload_in_hi (rtx *operands ATTRIBUTE_UNUSED)
13846 gcc_unreachable ();
13849 /* Return the length of a function name prefix
13850 that starts with the character 'c'. */
13851 static int
13852 arm_get_strip_length (int c)
13854 switch (c)
13856 ARM_NAME_ENCODING_LENGTHS
13857 default: return 0;
13861 /* Return a pointer to a function's name with any
13862 and all prefix encodings stripped from it. */
13863 const char *
13864 arm_strip_name_encoding (const char *name)
13866 int skip;
13868 while ((skip = arm_get_strip_length (* name)))
13869 name += skip;
13871 return name;
13874 /* If there is a '*' anywhere in the name's prefix, then
13875 emit the stripped name verbatim, otherwise prepend an
13876 underscore if leading underscores are being used. */
13877 void
13878 arm_asm_output_labelref (FILE *stream, const char *name)
13880 int skip;
13881 int verbatim = 0;
13883 while ((skip = arm_get_strip_length (* name)))
13885 verbatim |= (*name == '*');
13886 name += skip;
13889 if (verbatim)
13890 fputs (name, stream);
13891 else
13892 asm_fprintf (stream, "%U%s", name);
13895 static void
13896 arm_file_end (void)
13898 int regno;
13900 if (! thumb_call_reg_needed)
13901 return;
13903 text_section ();
13904 asm_fprintf (asm_out_file, "\t.code 16\n");
13905 ASM_OUTPUT_ALIGN (asm_out_file, 1);
13907 for (regno = 0; regno < LR_REGNUM; regno++)
13909 rtx label = thumb_call_via_label[regno];
13911 if (label != 0)
13913 targetm.asm_out.internal_label (asm_out_file, "L",
13914 CODE_LABEL_NUMBER (label));
13915 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
13920 rtx aof_pic_label;
13922 #ifdef AOF_ASSEMBLER
13923 /* Special functions only needed when producing AOF syntax assembler. */
13925 struct pic_chain
13927 struct pic_chain * next;
13928 const char * symname;
13931 static struct pic_chain * aof_pic_chain = NULL;
13934 aof_pic_entry (rtx x)
13936 struct pic_chain ** chainp;
13937 int offset;
13939 if (aof_pic_label == NULL_RTX)
13941 aof_pic_label = gen_rtx_SYMBOL_REF (Pmode, "x$adcons");
13944 for (offset = 0, chainp = &aof_pic_chain; *chainp;
13945 offset += 4, chainp = &(*chainp)->next)
13946 if ((*chainp)->symname == XSTR (x, 0))
13947 return plus_constant (aof_pic_label, offset);
13949 *chainp = (struct pic_chain *) xmalloc (sizeof (struct pic_chain));
13950 (*chainp)->next = NULL;
13951 (*chainp)->symname = XSTR (x, 0);
13952 return plus_constant (aof_pic_label, offset);
13955 void
13956 aof_dump_pic_table (FILE *f)
13958 struct pic_chain * chain;
13960 if (aof_pic_chain == NULL)
13961 return;
13963 asm_fprintf (f, "\tAREA |%r$$adcons|, BASED %r\n",
13964 PIC_OFFSET_TABLE_REGNUM,
13965 PIC_OFFSET_TABLE_REGNUM);
13966 fputs ("|x$adcons|\n", f);
13968 for (chain = aof_pic_chain; chain; chain = chain->next)
13970 fputs ("\tDCD\t", f);
13971 assemble_name (f, chain->symname);
13972 fputs ("\n", f);
13976 int arm_text_section_count = 1;
13978 char *
13979 aof_text_section (void )
13981 static char buf[100];
13982 sprintf (buf, "\tAREA |C$$code%d|, CODE, READONLY",
13983 arm_text_section_count++);
13984 if (flag_pic)
13985 strcat (buf, ", PIC, REENTRANT");
13986 return buf;
13989 static int arm_data_section_count = 1;
13991 char *
13992 aof_data_section (void)
13994 static char buf[100];
13995 sprintf (buf, "\tAREA |C$$data%d|, DATA", arm_data_section_count++);
13996 return buf;
13999 /* The AOF assembler is religiously strict about declarations of
14000 imported and exported symbols, so that it is impossible to declare
14001 a function as imported near the beginning of the file, and then to
14002 export it later on. It is, however, possible to delay the decision
14003 until all the functions in the file have been compiled. To get
14004 around this, we maintain a list of the imports and exports, and
14005 delete from it any that are subsequently defined. At the end of
14006 compilation we spit the remainder of the list out before the END
14007 directive. */
14009 struct import
14011 struct import * next;
14012 const char * name;
14015 static struct import * imports_list = NULL;
14017 void
14018 aof_add_import (const char *name)
14020 struct import * new;
14022 for (new = imports_list; new; new = new->next)
14023 if (new->name == name)
14024 return;
14026 new = (struct import *) xmalloc (sizeof (struct import));
14027 new->next = imports_list;
14028 imports_list = new;
14029 new->name = name;
14032 void
14033 aof_delete_import (const char *name)
14035 struct import ** old;
14037 for (old = &imports_list; *old; old = & (*old)->next)
14039 if ((*old)->name == name)
14041 *old = (*old)->next;
14042 return;
14047 int arm_main_function = 0;
14049 static void
14050 aof_dump_imports (FILE *f)
14052 /* The AOF assembler needs this to cause the startup code to be extracted
14053 from the library. Brining in __main causes the whole thing to work
14054 automagically. */
14055 if (arm_main_function)
14057 text_section ();
14058 fputs ("\tIMPORT __main\n", f);
14059 fputs ("\tDCD __main\n", f);
14062 /* Now dump the remaining imports. */
14063 while (imports_list)
14065 fprintf (f, "\tIMPORT\t");
14066 assemble_name (f, imports_list->name);
14067 fputc ('\n', f);
14068 imports_list = imports_list->next;
14072 static void
14073 aof_globalize_label (FILE *stream, const char *name)
14075 default_globalize_label (stream, name);
14076 if (! strcmp (name, "main"))
14077 arm_main_function = 1;
14080 static void
14081 aof_file_start (void)
14083 fputs ("__r0\tRN\t0\n", asm_out_file);
14084 fputs ("__a1\tRN\t0\n", asm_out_file);
14085 fputs ("__a2\tRN\t1\n", asm_out_file);
14086 fputs ("__a3\tRN\t2\n", asm_out_file);
14087 fputs ("__a4\tRN\t3\n", asm_out_file);
14088 fputs ("__v1\tRN\t4\n", asm_out_file);
14089 fputs ("__v2\tRN\t5\n", asm_out_file);
14090 fputs ("__v3\tRN\t6\n", asm_out_file);
14091 fputs ("__v4\tRN\t7\n", asm_out_file);
14092 fputs ("__v5\tRN\t8\n", asm_out_file);
14093 fputs ("__v6\tRN\t9\n", asm_out_file);
14094 fputs ("__sl\tRN\t10\n", asm_out_file);
14095 fputs ("__fp\tRN\t11\n", asm_out_file);
14096 fputs ("__ip\tRN\t12\n", asm_out_file);
14097 fputs ("__sp\tRN\t13\n", asm_out_file);
14098 fputs ("__lr\tRN\t14\n", asm_out_file);
14099 fputs ("__pc\tRN\t15\n", asm_out_file);
14100 fputs ("__f0\tFN\t0\n", asm_out_file);
14101 fputs ("__f1\tFN\t1\n", asm_out_file);
14102 fputs ("__f2\tFN\t2\n", asm_out_file);
14103 fputs ("__f3\tFN\t3\n", asm_out_file);
14104 fputs ("__f4\tFN\t4\n", asm_out_file);
14105 fputs ("__f5\tFN\t5\n", asm_out_file);
14106 fputs ("__f6\tFN\t6\n", asm_out_file);
14107 fputs ("__f7\tFN\t7\n", asm_out_file);
14108 text_section ();
14111 static void
14112 aof_file_end (void)
14114 if (flag_pic)
14115 aof_dump_pic_table (asm_out_file);
14116 arm_file_end ();
14117 aof_dump_imports (asm_out_file);
14118 fputs ("\tEND\n", asm_out_file);
14120 #endif /* AOF_ASSEMBLER */
14122 #ifndef ARM_PE
14123 /* Symbols in the text segment can be accessed without indirecting via the
14124 constant pool; it may take an extra binary operation, but this is still
14125 faster than indirecting via memory. Don't do this when not optimizing,
14126 since we won't be calculating al of the offsets necessary to do this
14127 simplification. */
14129 static void
14130 arm_encode_section_info (tree decl, rtx rtl, int first)
14132 /* This doesn't work with AOF syntax, since the string table may be in
14133 a different AREA. */
14134 #ifndef AOF_ASSEMBLER
14135 if (optimize > 0 && TREE_CONSTANT (decl))
14136 SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1;
14137 #endif
14139 /* If we are referencing a function that is weak then encode a long call
14140 flag in the function name, otherwise if the function is static or
14141 or known to be defined in this file then encode a short call flag. */
14142 if (first && DECL_P (decl))
14144 if (TREE_CODE (decl) == FUNCTION_DECL && DECL_WEAK (decl))
14145 arm_encode_call_attribute (decl, LONG_CALL_FLAG_CHAR);
14146 else if (! TREE_PUBLIC (decl))
14147 arm_encode_call_attribute (decl, SHORT_CALL_FLAG_CHAR);
14150 #endif /* !ARM_PE */
14152 static void
14153 arm_internal_label (FILE *stream, const char *prefix, unsigned long labelno)
14155 if (arm_ccfsm_state == 3 && (unsigned) arm_target_label == labelno
14156 && !strcmp (prefix, "L"))
14158 arm_ccfsm_state = 0;
14159 arm_target_insn = NULL;
14161 default_internal_label (stream, prefix, labelno);
14164 /* Output code to add DELTA to the first argument, and then jump
14165 to FUNCTION. Used for C++ multiple inheritance. */
14166 static void
14167 arm_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
14168 HOST_WIDE_INT delta,
14169 HOST_WIDE_INT vcall_offset ATTRIBUTE_UNUSED,
14170 tree function)
14172 static int thunk_label = 0;
14173 char label[256];
14174 int mi_delta = delta;
14175 const char *const mi_op = mi_delta < 0 ? "sub" : "add";
14176 int shift = 0;
14177 int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function)
14178 ? 1 : 0);
14179 if (mi_delta < 0)
14180 mi_delta = - mi_delta;
14181 if (TARGET_THUMB)
14183 int labelno = thunk_label++;
14184 ASM_GENERATE_INTERNAL_LABEL (label, "LTHUMBFUNC", labelno);
14185 fputs ("\tldr\tr12, ", file);
14186 assemble_name (file, label);
14187 fputc ('\n', file);
14189 while (mi_delta != 0)
14191 if ((mi_delta & (3 << shift)) == 0)
14192 shift += 2;
14193 else
14195 asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
14196 mi_op, this_regno, this_regno,
14197 mi_delta & (0xff << shift));
14198 mi_delta &= ~(0xff << shift);
14199 shift += 8;
14202 if (TARGET_THUMB)
14204 fprintf (file, "\tbx\tr12\n");
14205 ASM_OUTPUT_ALIGN (file, 2);
14206 assemble_name (file, label);
14207 fputs (":\n", file);
14208 assemble_integer (XEXP (DECL_RTL (function), 0), 4, BITS_PER_WORD, 1);
14210 else
14212 fputs ("\tb\t", file);
14213 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
14214 if (NEED_PLT_RELOC)
14215 fputs ("(PLT)", file);
14216 fputc ('\n', file);
14221 arm_emit_vector_const (FILE *file, rtx x)
14223 int i;
14224 const char * pattern;
14226 gcc_assert (GET_CODE (x) == CONST_VECTOR);
14228 switch (GET_MODE (x))
14230 case V2SImode: pattern = "%08x"; break;
14231 case V4HImode: pattern = "%04x"; break;
14232 case V8QImode: pattern = "%02x"; break;
14233 default: gcc_unreachable ();
14236 fprintf (file, "0x");
14237 for (i = CONST_VECTOR_NUNITS (x); i--;)
14239 rtx element;
14241 element = CONST_VECTOR_ELT (x, i);
14242 fprintf (file, pattern, INTVAL (element));
14245 return 1;
14248 const char *
14249 arm_output_load_gr (rtx *operands)
14251 rtx reg;
14252 rtx offset;
14253 rtx wcgr;
14254 rtx sum;
14256 if (GET_CODE (operands [1]) != MEM
14257 || GET_CODE (sum = XEXP (operands [1], 0)) != PLUS
14258 || GET_CODE (reg = XEXP (sum, 0)) != REG
14259 || GET_CODE (offset = XEXP (sum, 1)) != CONST_INT
14260 || ((INTVAL (offset) < 1024) && (INTVAL (offset) > -1024)))
14261 return "wldrw%?\t%0, %1";
14263 /* Fix up an out-of-range load of a GR register. */
14264 output_asm_insn ("str%?\t%0, [sp, #-4]!\t@ Start of GR load expansion", & reg);
14265 wcgr = operands[0];
14266 operands[0] = reg;
14267 output_asm_insn ("ldr%?\t%0, %1", operands);
14269 operands[0] = wcgr;
14270 operands[1] = reg;
14271 output_asm_insn ("tmcr%?\t%0, %1", operands);
14272 output_asm_insn ("ldr%?\t%0, [sp], #4\t@ End of GR load expansion", & reg);
14274 return "";
14277 static rtx
14278 arm_struct_value_rtx (tree fntype ATTRIBUTE_UNUSED,
14279 int incoming ATTRIBUTE_UNUSED)
14281 #if 0
14282 /* FIXME: The ARM backend has special code to handle structure
14283 returns, and will reserve its own hidden first argument. So
14284 if this macro is enabled a *second* hidden argument will be
14285 reserved, which will break binary compatibility with old
14286 toolchains and also thunk handling. One day this should be
14287 fixed. */
14288 return 0;
14289 #else
14290 /* Register in which address to store a structure value
14291 is passed to a function. */
14292 return gen_rtx_REG (Pmode, ARG_REGISTER (1));
14293 #endif
14296 /* Worker function for TARGET_SETUP_INCOMING_VARARGS.
14298 On the ARM, PRETEND_SIZE is set in order to have the prologue push the last
14299 named arg and all anonymous args onto the stack.
14300 XXX I know the prologue shouldn't be pushing registers, but it is faster
14301 that way. */
14303 static void
14304 arm_setup_incoming_varargs (CUMULATIVE_ARGS *cum,
14305 enum machine_mode mode ATTRIBUTE_UNUSED,
14306 tree type ATTRIBUTE_UNUSED,
14307 int *pretend_size,
14308 int second_time ATTRIBUTE_UNUSED)
14310 cfun->machine->uses_anonymous_args = 1;
14311 if (cum->nregs < NUM_ARG_REGS)
14312 *pretend_size = (NUM_ARG_REGS - cum->nregs) * UNITS_PER_WORD;
14315 /* Return nonzero if the CONSUMER instruction (a store) does not need
14316 PRODUCER's value to calculate the address. */
14319 arm_no_early_store_addr_dep (rtx producer, rtx consumer)
14321 rtx value = PATTERN (producer);
14322 rtx addr = PATTERN (consumer);
14324 if (GET_CODE (value) == COND_EXEC)
14325 value = COND_EXEC_CODE (value);
14326 if (GET_CODE (value) == PARALLEL)
14327 value = XVECEXP (value, 0, 0);
14328 value = XEXP (value, 0);
14329 if (GET_CODE (addr) == COND_EXEC)
14330 addr = COND_EXEC_CODE (addr);
14331 if (GET_CODE (addr) == PARALLEL)
14332 addr = XVECEXP (addr, 0, 0);
14333 addr = XEXP (addr, 0);
14335 return !reg_overlap_mentioned_p (value, addr);
14338 /* Return nonzero if the CONSUMER instruction (an ALU op) does not
14339 have an early register shift value or amount dependency on the
14340 result of PRODUCER. */
14343 arm_no_early_alu_shift_dep (rtx producer, rtx consumer)
14345 rtx value = PATTERN (producer);
14346 rtx op = PATTERN (consumer);
14347 rtx early_op;
14349 if (GET_CODE (value) == COND_EXEC)
14350 value = COND_EXEC_CODE (value);
14351 if (GET_CODE (value) == PARALLEL)
14352 value = XVECEXP (value, 0, 0);
14353 value = XEXP (value, 0);
14354 if (GET_CODE (op) == COND_EXEC)
14355 op = COND_EXEC_CODE (op);
14356 if (GET_CODE (op) == PARALLEL)
14357 op = XVECEXP (op, 0, 0);
14358 op = XEXP (op, 1);
14360 early_op = XEXP (op, 0);
14361 /* This is either an actual independent shift, or a shift applied to
14362 the first operand of another operation. We want the whole shift
14363 operation. */
14364 if (GET_CODE (early_op) == REG)
14365 early_op = op;
14367 return !reg_overlap_mentioned_p (value, early_op);
14370 /* Return nonzero if the CONSUMER instruction (an ALU op) does not
14371 have an early register shift value dependency on the result of
14372 PRODUCER. */
14375 arm_no_early_alu_shift_value_dep (rtx producer, rtx consumer)
14377 rtx value = PATTERN (producer);
14378 rtx op = PATTERN (consumer);
14379 rtx early_op;
14381 if (GET_CODE (value) == COND_EXEC)
14382 value = COND_EXEC_CODE (value);
14383 if (GET_CODE (value) == PARALLEL)
14384 value = XVECEXP (value, 0, 0);
14385 value = XEXP (value, 0);
14386 if (GET_CODE (op) == COND_EXEC)
14387 op = COND_EXEC_CODE (op);
14388 if (GET_CODE (op) == PARALLEL)
14389 op = XVECEXP (op, 0, 0);
14390 op = XEXP (op, 1);
14392 early_op = XEXP (op, 0);
14394 /* This is either an actual independent shift, or a shift applied to
14395 the first operand of another operation. We want the value being
14396 shifted, in either case. */
14397 if (GET_CODE (early_op) != REG)
14398 early_op = XEXP (early_op, 0);
14400 return !reg_overlap_mentioned_p (value, early_op);
14403 /* Return nonzero if the CONSUMER (a mul or mac op) does not
14404 have an early register mult dependency on the result of
14405 PRODUCER. */
14408 arm_no_early_mul_dep (rtx producer, rtx consumer)
14410 rtx value = PATTERN (producer);
14411 rtx op = PATTERN (consumer);
14413 if (GET_CODE (value) == COND_EXEC)
14414 value = COND_EXEC_CODE (value);
14415 if (GET_CODE (value) == PARALLEL)
14416 value = XVECEXP (value, 0, 0);
14417 value = XEXP (value, 0);
14418 if (GET_CODE (op) == COND_EXEC)
14419 op = COND_EXEC_CODE (op);
14420 if (GET_CODE (op) == PARALLEL)
14421 op = XVECEXP (op, 0, 0);
14422 op = XEXP (op, 1);
14424 return (GET_CODE (op) == PLUS
14425 && !reg_overlap_mentioned_p (value, XEXP (op, 0)));
14429 /* We can't rely on the caller doing the proper promotion when
14430 using APCS or ATPCS. */
14432 static bool
14433 arm_promote_prototypes (tree t ATTRIBUTE_UNUSED)
14435 return !TARGET_AAPCS_BASED;
14439 /* AAPCS based ABIs use short enums by default. */
14441 static bool
14442 arm_default_short_enums (void)
14444 return TARGET_AAPCS_BASED;
14448 /* AAPCS requires that anonymous bitfields affect structure alignment. */
14450 static bool
14451 arm_align_anon_bitfield (void)
14453 return TARGET_AAPCS_BASED;
14457 /* The generic C++ ABI says 64-bit (long long). The EABI says 32-bit. */
14459 static tree
14460 arm_cxx_guard_type (void)
14462 return TARGET_AAPCS_BASED ? integer_type_node : long_long_integer_type_node;
14466 /* The EABI says test the least significan bit of a guard variable. */
14468 static bool
14469 arm_cxx_guard_mask_bit (void)
14471 return TARGET_AAPCS_BASED;
14475 /* The EABI specifies that all array cookies are 8 bytes long. */
14477 static tree
14478 arm_get_cookie_size (tree type)
14480 tree size;
14482 if (!TARGET_AAPCS_BASED)
14483 return default_cxx_get_cookie_size (type);
14485 size = build_int_cst (sizetype, 8);
14486 return size;
14490 /* The EABI says that array cookies should also contain the element size. */
14492 static bool
14493 arm_cookie_has_size (void)
14495 return TARGET_AAPCS_BASED;
14499 /* The EABI says constructors and destructors should return a pointer to
14500 the object constructed/destroyed. */
14502 static bool
14503 arm_cxx_cdtor_returns_this (void)
14505 return TARGET_AAPCS_BASED;
14508 /* The EABI says that an inline function may never be the key
14509 method. */
14511 static bool
14512 arm_cxx_key_method_may_be_inline (void)
14514 return !TARGET_AAPCS_BASED;
14517 static void
14518 arm_cxx_determine_class_data_visibility (tree decl)
14520 if (!TARGET_AAPCS_BASED)
14521 return;
14523 /* In general, \S 3.2.5.5 of the ARM EABI requires that class data
14524 is exported. However, on systems without dynamic vague linkage,
14525 \S 3.2.5.6 says that COMDAT class data has hidden linkage. */
14526 if (!TARGET_ARM_DYNAMIC_VAGUE_LINKAGE_P && DECL_COMDAT (decl))
14527 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
14528 else
14529 DECL_VISIBILITY (decl) = VISIBILITY_DEFAULT;
14530 DECL_VISIBILITY_SPECIFIED (decl) = 1;
14533 static bool
14534 arm_cxx_class_data_always_comdat (void)
14536 /* \S 3.2.5.4 of the ARM C++ ABI says that class data only have
14537 vague linkage if the class has no key function. */
14538 return !TARGET_AAPCS_BASED;
14542 /* The EABI says __aeabi_atexit should be used to register static
14543 destructors. */
14545 static bool
14546 arm_cxx_use_aeabi_atexit (void)
14548 return TARGET_AAPCS_BASED;
14552 void
14553 arm_set_return_address (rtx source, rtx scratch)
14555 arm_stack_offsets *offsets;
14556 HOST_WIDE_INT delta;
14557 rtx addr;
14558 unsigned long saved_regs;
14560 saved_regs = arm_compute_save_reg_mask ();
14562 if ((saved_regs & (1 << LR_REGNUM)) == 0)
14563 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
14564 else
14566 if (frame_pointer_needed)
14567 addr = plus_constant(hard_frame_pointer_rtx, -4);
14568 else
14570 /* LR will be the first saved register. */
14571 offsets = arm_get_frame_offsets ();
14572 delta = offsets->outgoing_args - (offsets->frame + 4);
14575 if (delta >= 4096)
14577 emit_insn (gen_addsi3 (scratch, stack_pointer_rtx,
14578 GEN_INT (delta & ~4095)));
14579 addr = scratch;
14580 delta &= 4095;
14582 else
14583 addr = stack_pointer_rtx;
14585 addr = plus_constant (addr, delta);
14587 emit_move_insn (gen_rtx_MEM (Pmode, addr), source);
14592 void
14593 thumb_set_return_address (rtx source, rtx scratch)
14595 arm_stack_offsets *offsets;
14596 HOST_WIDE_INT delta;
14597 int reg;
14598 rtx addr;
14599 unsigned long mask;
14601 emit_insn (gen_rtx_USE (VOIDmode, source));
14603 mask = thumb_compute_save_reg_mask ();
14604 if (mask & (1 << LR_REGNUM))
14606 offsets = arm_get_frame_offsets ();
14608 /* Find the saved regs. */
14609 if (frame_pointer_needed)
14611 delta = offsets->soft_frame - offsets->saved_args;
14612 reg = THUMB_HARD_FRAME_POINTER_REGNUM;
14614 else
14616 delta = offsets->outgoing_args - offsets->saved_args;
14617 reg = SP_REGNUM;
14619 /* Allow for the stack frame. */
14620 if (TARGET_BACKTRACE)
14621 delta -= 16;
14622 /* The link register is always the first saved register. */
14623 delta -= 4;
14625 /* Construct the address. */
14626 addr = gen_rtx_REG (SImode, reg);
14627 if ((reg != SP_REGNUM && delta >= 128)
14628 || delta >= 1024)
14630 emit_insn (gen_movsi (scratch, GEN_INT (delta)));
14631 emit_insn (gen_addsi3 (scratch, scratch, stack_pointer_rtx));
14632 addr = scratch;
14634 else
14635 addr = plus_constant (addr, delta);
14637 emit_move_insn (gen_rtx_MEM (Pmode, addr), source);
14639 else
14640 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
14643 /* Implements target hook vector_mode_supported_p. */
14644 bool
14645 arm_vector_mode_supported_p (enum machine_mode mode)
14647 if ((mode == V2SImode)
14648 || (mode == V4HImode)
14649 || (mode == V8QImode))
14650 return true;
14652 return false;
14655 /* Implement TARGET_SHIFT_TRUNCATION_MASK. SImode shifts use normal
14656 ARM insns and therefore guarantee that the shift count is modulo 256.
14657 DImode shifts (those implemented by lib1funcs.asm or by optabs.c)
14658 guarantee no particular behavior for out-of-range counts. */
14660 static unsigned HOST_WIDE_INT
14661 arm_shift_truncation_mask (enum machine_mode mode)
14663 return mode == SImode ? 255 : 0;
14667 /* Map internal gcc register numbers to DWARF2 register numbers. */
14669 unsigned int
14670 arm_dbx_register_number (unsigned int regno)
14672 if (regno < 16)
14673 return regno;
14675 /* TODO: Legacy targets output FPA regs as registers 16-23 for backwards
14676 compatibility. The EABI defines them as registers 96-103. */
14677 if (IS_FPA_REGNUM (regno))
14678 return (TARGET_AAPCS_BASED ? 96 : 16) + regno - FIRST_FPA_REGNUM;
14680 if (IS_VFP_REGNUM (regno))
14681 return 64 + regno - FIRST_VFP_REGNUM;
14683 if (IS_IWMMXT_GR_REGNUM (regno))
14684 return 104 + regno - FIRST_IWMMXT_GR_REGNUM;
14686 if (IS_IWMMXT_REGNUM (regno))
14687 return 112 + regno - FIRST_IWMMXT_REGNUM;
14689 gcc_unreachable ();