* arm.c (arm_gen_constant): Add new heuristic for generating
[official-gcc.git] / gcc / config / arm / arm.c
blob6b84d77b2c65dc80cf45ea88bea0eaaa623942d8
1 /* Output routines for GCC for ARM.
2 Copyright (C) 1991, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004, 2005 Free Software Foundation, Inc.
4 Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
5 and Martin Simmons (@harleqn.co.uk).
6 More major hacks by Richard Earnshaw (rearnsha@arm.com).
8 This file is part of GCC.
10 GCC is free software; you can redistribute it and/or modify it
11 under the terms of the GNU General Public License as published
12 by the Free Software Foundation; either version 2, or (at your
13 option) any later version.
15 GCC is distributed in the hope that it will be useful, but WITHOUT
16 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
17 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
18 License for more details.
20 You should have received a copy of the GNU General Public License
21 along with GCC; see the file COPYING. If not, write to
22 the Free Software Foundation, 59 Temple Place - Suite 330,
23 Boston, MA 02111-1307, USA. */
25 #include "config.h"
26 #include "system.h"
27 #include "coretypes.h"
28 #include "tm.h"
29 #include "rtl.h"
30 #include "tree.h"
31 #include "obstack.h"
32 #include "regs.h"
33 #include "hard-reg-set.h"
34 #include "real.h"
35 #include "insn-config.h"
36 #include "conditions.h"
37 #include "output.h"
38 #include "insn-attr.h"
39 #include "flags.h"
40 #include "reload.h"
41 #include "function.h"
42 #include "expr.h"
43 #include "optabs.h"
44 #include "toplev.h"
45 #include "recog.h"
46 #include "ggc.h"
47 #include "except.h"
48 #include "c-pragma.h"
49 #include "integrate.h"
50 #include "tm_p.h"
51 #include "target.h"
52 #include "target-def.h"
53 #include "debug.h"
54 #include "langhooks.h"
56 /* Forward definitions of types. */
57 typedef struct minipool_node Mnode;
58 typedef struct minipool_fixup Mfix;
60 const struct attribute_spec arm_attribute_table[];
62 /* Forward function declarations. */
63 static arm_stack_offsets *arm_get_frame_offsets (void);
64 static void arm_add_gc_roots (void);
65 static int arm_gen_constant (enum rtx_code, enum machine_mode, rtx,
66 HOST_WIDE_INT, rtx, rtx, int, int);
67 static unsigned bit_count (unsigned long);
68 static int arm_address_register_rtx_p (rtx, int);
69 static int arm_legitimate_index_p (enum machine_mode, rtx, RTX_CODE, int);
70 static int thumb_base_register_rtx_p (rtx, enum machine_mode, int);
71 inline static int thumb_index_register_rtx_p (rtx, int);
72 static int thumb_far_jump_used_p (void);
73 static bool thumb_force_lr_save (void);
74 static int const_ok_for_op (HOST_WIDE_INT, enum rtx_code);
75 static rtx emit_sfm (int, int);
76 #ifndef AOF_ASSEMBLER
77 static bool arm_assemble_integer (rtx, unsigned int, int);
78 #endif
79 static const char *fp_const_from_val (REAL_VALUE_TYPE *);
80 static arm_cc get_arm_condition_code (rtx);
81 static HOST_WIDE_INT int_log2 (HOST_WIDE_INT);
82 static rtx is_jump_table (rtx);
83 static const char *output_multi_immediate (rtx *, const char *, const char *,
84 int, HOST_WIDE_INT);
85 static const char *shift_op (rtx, HOST_WIDE_INT *);
86 static struct machine_function *arm_init_machine_status (void);
87 static void thumb_exit (FILE *, int);
88 static rtx is_jump_table (rtx);
89 static HOST_WIDE_INT get_jump_table_size (rtx);
90 static Mnode *move_minipool_fix_forward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
91 static Mnode *add_minipool_forward_ref (Mfix *);
92 static Mnode *move_minipool_fix_backward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
93 static Mnode *add_minipool_backward_ref (Mfix *);
94 static void assign_minipool_offsets (Mfix *);
95 static void arm_print_value (FILE *, rtx);
96 static void dump_minipool (rtx);
97 static int arm_barrier_cost (rtx);
98 static Mfix *create_fix_barrier (Mfix *, HOST_WIDE_INT);
99 static void push_minipool_barrier (rtx, HOST_WIDE_INT);
100 static void push_minipool_fix (rtx, HOST_WIDE_INT, rtx *, enum machine_mode,
101 rtx);
102 static void arm_reorg (void);
103 static bool note_invalid_constants (rtx, HOST_WIDE_INT, int);
104 static int current_file_function_operand (rtx);
105 static unsigned long arm_compute_save_reg0_reg12_mask (void);
106 static unsigned long arm_compute_save_reg_mask (void);
107 static unsigned long arm_isr_value (tree);
108 static unsigned long arm_compute_func_type (void);
109 static tree arm_handle_fndecl_attribute (tree *, tree, tree, int, bool *);
110 static tree arm_handle_isr_attribute (tree *, tree, tree, int, bool *);
111 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
112 static tree arm_handle_notshared_attribute (tree *, tree, tree, int, bool *);
113 #endif
114 static void arm_output_function_epilogue (FILE *, HOST_WIDE_INT);
115 static void arm_output_function_prologue (FILE *, HOST_WIDE_INT);
116 static void thumb_output_function_prologue (FILE *, HOST_WIDE_INT);
117 static int arm_comp_type_attributes (tree, tree);
118 static void arm_set_default_type_attributes (tree);
119 static int arm_adjust_cost (rtx, rtx, rtx, int);
120 static int count_insns_for_constant (HOST_WIDE_INT, int);
121 static int arm_get_strip_length (int);
122 static bool arm_function_ok_for_sibcall (tree, tree);
123 static void arm_internal_label (FILE *, const char *, unsigned long);
124 static void arm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT,
125 tree);
126 static int arm_rtx_costs_1 (rtx, enum rtx_code, enum rtx_code);
127 static bool arm_size_rtx_costs (rtx, int, int, int *);
128 static bool arm_slowmul_rtx_costs (rtx, int, int, int *);
129 static bool arm_fastmul_rtx_costs (rtx, int, int, int *);
130 static bool arm_xscale_rtx_costs (rtx, int, int, int *);
131 static bool arm_9e_rtx_costs (rtx, int, int, int *);
132 static int arm_address_cost (rtx);
133 static bool arm_memory_load_p (rtx);
134 static bool arm_cirrus_insn_p (rtx);
135 static void cirrus_reorg (rtx);
136 static void arm_init_builtins (void);
137 static rtx arm_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
138 static void arm_init_iwmmxt_builtins (void);
139 static rtx safe_vector_operand (rtx, enum machine_mode);
140 static rtx arm_expand_binop_builtin (enum insn_code, tree, rtx);
141 static rtx arm_expand_unop_builtin (enum insn_code, tree, rtx, int);
142 static rtx arm_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
143 static void emit_constant_insn (rtx cond, rtx pattern);
144 static int arm_arg_partial_bytes (CUMULATIVE_ARGS *, enum machine_mode,
145 tree, bool);
147 #ifdef OBJECT_FORMAT_ELF
148 static void arm_elf_asm_constructor (rtx, int);
149 #endif
150 #ifndef ARM_PE
151 static void arm_encode_section_info (tree, rtx, int);
152 #endif
154 static void arm_file_end (void);
156 #ifdef AOF_ASSEMBLER
157 static void aof_globalize_label (FILE *, const char *);
158 static void aof_dump_imports (FILE *);
159 static void aof_dump_pic_table (FILE *);
160 static void aof_file_start (void);
161 static void aof_file_end (void);
162 #endif
163 static rtx arm_struct_value_rtx (tree, int);
164 static void arm_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode,
165 tree, int *, int);
166 static bool arm_pass_by_reference (CUMULATIVE_ARGS *,
167 enum machine_mode, tree, bool);
168 static bool arm_promote_prototypes (tree);
169 static bool arm_default_short_enums (void);
170 static bool arm_align_anon_bitfield (void);
171 static bool arm_return_in_msb (tree);
172 static bool arm_must_pass_in_stack (enum machine_mode, tree);
174 static tree arm_cxx_guard_type (void);
175 static bool arm_cxx_guard_mask_bit (void);
176 static tree arm_get_cookie_size (tree);
177 static bool arm_cookie_has_size (void);
178 static bool arm_cxx_cdtor_returns_this (void);
179 static bool arm_cxx_key_method_may_be_inline (void);
180 static void arm_cxx_determine_class_data_visibility (tree);
181 static bool arm_cxx_class_data_always_comdat (void);
182 static bool arm_cxx_use_aeabi_atexit (void);
183 static void arm_init_libfuncs (void);
184 static bool arm_handle_option (size_t, const char *, int);
185 static unsigned HOST_WIDE_INT arm_shift_truncation_mask (enum machine_mode);
187 /* Initialize the GCC target structure. */
188 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
189 #undef TARGET_MERGE_DECL_ATTRIBUTES
190 #define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
191 #endif
193 #undef TARGET_ATTRIBUTE_TABLE
194 #define TARGET_ATTRIBUTE_TABLE arm_attribute_table
196 #undef TARGET_ASM_FILE_END
197 #define TARGET_ASM_FILE_END arm_file_end
199 #ifdef AOF_ASSEMBLER
200 #undef TARGET_ASM_BYTE_OP
201 #define TARGET_ASM_BYTE_OP "\tDCB\t"
202 #undef TARGET_ASM_ALIGNED_HI_OP
203 #define TARGET_ASM_ALIGNED_HI_OP "\tDCW\t"
204 #undef TARGET_ASM_ALIGNED_SI_OP
205 #define TARGET_ASM_ALIGNED_SI_OP "\tDCD\t"
206 #undef TARGET_ASM_GLOBALIZE_LABEL
207 #define TARGET_ASM_GLOBALIZE_LABEL aof_globalize_label
208 #undef TARGET_ASM_FILE_START
209 #define TARGET_ASM_FILE_START aof_file_start
210 #undef TARGET_ASM_FILE_END
211 #define TARGET_ASM_FILE_END aof_file_end
212 #else
213 #undef TARGET_ASM_ALIGNED_SI_OP
214 #define TARGET_ASM_ALIGNED_SI_OP NULL
215 #undef TARGET_ASM_INTEGER
216 #define TARGET_ASM_INTEGER arm_assemble_integer
217 #endif
219 #undef TARGET_ASM_FUNCTION_PROLOGUE
220 #define TARGET_ASM_FUNCTION_PROLOGUE arm_output_function_prologue
222 #undef TARGET_ASM_FUNCTION_EPILOGUE
223 #define TARGET_ASM_FUNCTION_EPILOGUE arm_output_function_epilogue
225 #undef TARGET_DEFAULT_TARGET_FLAGS
226 #define TARGET_DEFAULT_TARGET_FLAGS (TARGET_DEFAULT | MASK_SCHED_PROLOG)
227 #undef TARGET_HANDLE_OPTION
228 #define TARGET_HANDLE_OPTION arm_handle_option
230 #undef TARGET_COMP_TYPE_ATTRIBUTES
231 #define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes
233 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
234 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES arm_set_default_type_attributes
236 #undef TARGET_SCHED_ADJUST_COST
237 #define TARGET_SCHED_ADJUST_COST arm_adjust_cost
239 #undef TARGET_ENCODE_SECTION_INFO
240 #ifdef ARM_PE
241 #define TARGET_ENCODE_SECTION_INFO arm_pe_encode_section_info
242 #else
243 #define TARGET_ENCODE_SECTION_INFO arm_encode_section_info
244 #endif
246 #undef TARGET_STRIP_NAME_ENCODING
247 #define TARGET_STRIP_NAME_ENCODING arm_strip_name_encoding
249 #undef TARGET_ASM_INTERNAL_LABEL
250 #define TARGET_ASM_INTERNAL_LABEL arm_internal_label
252 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
253 #define TARGET_FUNCTION_OK_FOR_SIBCALL arm_function_ok_for_sibcall
255 #undef TARGET_ASM_OUTPUT_MI_THUNK
256 #define TARGET_ASM_OUTPUT_MI_THUNK arm_output_mi_thunk
257 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
258 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK default_can_output_mi_thunk_no_vcall
260 /* This will be overridden in arm_override_options. */
261 #undef TARGET_RTX_COSTS
262 #define TARGET_RTX_COSTS arm_slowmul_rtx_costs
263 #undef TARGET_ADDRESS_COST
264 #define TARGET_ADDRESS_COST arm_address_cost
266 #undef TARGET_SHIFT_TRUNCATION_MASK
267 #define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask
268 #undef TARGET_VECTOR_MODE_SUPPORTED_P
269 #define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p
271 #undef TARGET_MACHINE_DEPENDENT_REORG
272 #define TARGET_MACHINE_DEPENDENT_REORG arm_reorg
274 #undef TARGET_INIT_BUILTINS
275 #define TARGET_INIT_BUILTINS arm_init_builtins
276 #undef TARGET_EXPAND_BUILTIN
277 #define TARGET_EXPAND_BUILTIN arm_expand_builtin
279 #undef TARGET_INIT_LIBFUNCS
280 #define TARGET_INIT_LIBFUNCS arm_init_libfuncs
282 #undef TARGET_PROMOTE_FUNCTION_ARGS
283 #define TARGET_PROMOTE_FUNCTION_ARGS hook_bool_tree_true
284 #undef TARGET_PROMOTE_FUNCTION_RETURN
285 #define TARGET_PROMOTE_FUNCTION_RETURN hook_bool_tree_true
286 #undef TARGET_PROMOTE_PROTOTYPES
287 #define TARGET_PROMOTE_PROTOTYPES arm_promote_prototypes
288 #undef TARGET_PASS_BY_REFERENCE
289 #define TARGET_PASS_BY_REFERENCE arm_pass_by_reference
290 #undef TARGET_ARG_PARTIAL_BYTES
291 #define TARGET_ARG_PARTIAL_BYTES arm_arg_partial_bytes
293 #undef TARGET_STRUCT_VALUE_RTX
294 #define TARGET_STRUCT_VALUE_RTX arm_struct_value_rtx
296 #undef TARGET_SETUP_INCOMING_VARARGS
297 #define TARGET_SETUP_INCOMING_VARARGS arm_setup_incoming_varargs
299 #undef TARGET_DEFAULT_SHORT_ENUMS
300 #define TARGET_DEFAULT_SHORT_ENUMS arm_default_short_enums
302 #undef TARGET_ALIGN_ANON_BITFIELD
303 #define TARGET_ALIGN_ANON_BITFIELD arm_align_anon_bitfield
305 #undef TARGET_CXX_GUARD_TYPE
306 #define TARGET_CXX_GUARD_TYPE arm_cxx_guard_type
308 #undef TARGET_CXX_GUARD_MASK_BIT
309 #define TARGET_CXX_GUARD_MASK_BIT arm_cxx_guard_mask_bit
311 #undef TARGET_CXX_GET_COOKIE_SIZE
312 #define TARGET_CXX_GET_COOKIE_SIZE arm_get_cookie_size
314 #undef TARGET_CXX_COOKIE_HAS_SIZE
315 #define TARGET_CXX_COOKIE_HAS_SIZE arm_cookie_has_size
317 #undef TARGET_CXX_CDTOR_RETURNS_THIS
318 #define TARGET_CXX_CDTOR_RETURNS_THIS arm_cxx_cdtor_returns_this
320 #undef TARGET_CXX_KEY_METHOD_MAY_BE_INLINE
321 #define TARGET_CXX_KEY_METHOD_MAY_BE_INLINE arm_cxx_key_method_may_be_inline
323 #undef TARGET_CXX_USE_AEABI_ATEXIT
324 #define TARGET_CXX_USE_AEABI_ATEXIT arm_cxx_use_aeabi_atexit
326 #undef TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY
327 #define TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY \
328 arm_cxx_determine_class_data_visibility
330 #undef TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT
331 #define TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT arm_cxx_class_data_always_comdat
333 #undef TARGET_RETURN_IN_MSB
334 #define TARGET_RETURN_IN_MSB arm_return_in_msb
336 #undef TARGET_MUST_PASS_IN_STACK
337 #define TARGET_MUST_PASS_IN_STACK arm_must_pass_in_stack
339 struct gcc_target targetm = TARGET_INITIALIZER;
341 /* Obstack for minipool constant handling. */
342 static struct obstack minipool_obstack;
343 static char * minipool_startobj;
345 /* The maximum number of insns skipped which
346 will be conditionalised if possible. */
347 static int max_insns_skipped = 5;
349 extern FILE * asm_out_file;
351 /* True if we are currently building a constant table. */
352 int making_const_table;
354 /* Define the information needed to generate branch insns. This is
355 stored from the compare operation. */
356 rtx arm_compare_op0, arm_compare_op1;
358 /* The processor for which instructions should be scheduled. */
359 enum processor_type arm_tune = arm_none;
361 /* Which floating point model to use. */
362 enum arm_fp_model arm_fp_model;
364 /* Which floating point hardware is available. */
365 enum fputype arm_fpu_arch;
367 /* Which floating point hardware to schedule for. */
368 enum fputype arm_fpu_tune;
370 /* Whether to use floating point hardware. */
371 enum float_abi_type arm_float_abi;
373 /* Which ABI to use. */
374 enum arm_abi_type arm_abi;
376 /* Set by the -mfpu=... option. */
377 static const char * target_fpu_name = NULL;
379 /* Set by the -mfpe=... option. */
380 static const char * target_fpe_name = NULL;
382 /* Set by the -mfloat-abi=... option. */
383 static const char * target_float_abi_name = NULL;
385 /* Set by the -mabi=... option. */
386 static const char * target_abi_name = NULL;
388 /* Used to parse -mstructure_size_boundary command line option. */
389 static const char * structure_size_string = NULL;
390 int arm_structure_size_boundary = DEFAULT_STRUCTURE_SIZE_BOUNDARY;
392 /* Used for Thumb call_via trampolines. */
393 rtx thumb_call_via_label[14];
394 static int thumb_call_reg_needed;
396 /* Bit values used to identify processor capabilities. */
397 #define FL_CO_PROC (1 << 0) /* Has external co-processor bus */
398 #define FL_ARCH3M (1 << 1) /* Extended multiply */
399 #define FL_MODE26 (1 << 2) /* 26-bit mode support */
400 #define FL_MODE32 (1 << 3) /* 32-bit mode support */
401 #define FL_ARCH4 (1 << 4) /* Architecture rel 4 */
402 #define FL_ARCH5 (1 << 5) /* Architecture rel 5 */
403 #define FL_THUMB (1 << 6) /* Thumb aware */
404 #define FL_LDSCHED (1 << 7) /* Load scheduling necessary */
405 #define FL_STRONG (1 << 8) /* StrongARM */
406 #define FL_ARCH5E (1 << 9) /* DSP extensions to v5 */
407 #define FL_XSCALE (1 << 10) /* XScale */
408 #define FL_CIRRUS (1 << 11) /* Cirrus/DSP. */
409 #define FL_ARCH6 (1 << 12) /* Architecture rel 6. Adds
410 media instructions. */
411 #define FL_VFPV2 (1 << 13) /* Vector Floating Point V2. */
412 #define FL_WBUF (1 << 14) /* Schedule for write buffer ops.
413 Note: ARM6 & 7 derivatives only. */
415 #define FL_IWMMXT (1 << 29) /* XScale v2 or "Intel Wireless MMX technology". */
417 #define FL_FOR_ARCH2 0
418 #define FL_FOR_ARCH3 FL_MODE32
419 #define FL_FOR_ARCH3M (FL_FOR_ARCH3 | FL_ARCH3M)
420 #define FL_FOR_ARCH4 (FL_FOR_ARCH3M | FL_ARCH4)
421 #define FL_FOR_ARCH4T (FL_FOR_ARCH4 | FL_THUMB)
422 #define FL_FOR_ARCH5 (FL_FOR_ARCH4 | FL_ARCH5)
423 #define FL_FOR_ARCH5T (FL_FOR_ARCH5 | FL_THUMB)
424 #define FL_FOR_ARCH5E (FL_FOR_ARCH5 | FL_ARCH5E)
425 #define FL_FOR_ARCH5TE (FL_FOR_ARCH5E | FL_THUMB)
426 #define FL_FOR_ARCH5TEJ FL_FOR_ARCH5TE
427 #define FL_FOR_ARCH6 (FL_FOR_ARCH5TE | FL_ARCH6)
428 #define FL_FOR_ARCH6J FL_FOR_ARCH6
429 #define FL_FOR_ARCH6K FL_FOR_ARCH6
430 #define FL_FOR_ARCH6Z FL_FOR_ARCH6
431 #define FL_FOR_ARCH6ZK FL_FOR_ARCH6
433 /* The bits in this mask specify which
434 instructions we are allowed to generate. */
435 static unsigned long insn_flags = 0;
437 /* The bits in this mask specify which instruction scheduling options should
438 be used. */
439 static unsigned long tune_flags = 0;
441 /* The following are used in the arm.md file as equivalents to bits
442 in the above two flag variables. */
444 /* Nonzero if this chip supports the ARM Architecture 3M extensions. */
445 int arm_arch3m = 0;
447 /* Nonzero if this chip supports the ARM Architecture 4 extensions. */
448 int arm_arch4 = 0;
450 /* Nonzero if this chip supports the ARM Architecture 4t extensions. */
451 int arm_arch4t = 0;
453 /* Nonzero if this chip supports the ARM Architecture 5 extensions. */
454 int arm_arch5 = 0;
456 /* Nonzero if this chip supports the ARM Architecture 5E extensions. */
457 int arm_arch5e = 0;
459 /* Nonzero if this chip supports the ARM Architecture 6 extensions. */
460 int arm_arch6 = 0;
462 /* Nonzero if this chip can benefit from load scheduling. */
463 int arm_ld_sched = 0;
465 /* Nonzero if this chip is a StrongARM. */
466 int arm_tune_strongarm = 0;
468 /* Nonzero if this chip is a Cirrus variant. */
469 int arm_arch_cirrus = 0;
471 /* Nonzero if this chip supports Intel Wireless MMX technology. */
472 int arm_arch_iwmmxt = 0;
474 /* Nonzero if this chip is an XScale. */
475 int arm_arch_xscale = 0;
477 /* Nonzero if tuning for XScale */
478 int arm_tune_xscale = 0;
480 /* Nonzero if we want to tune for stores that access the write-buffer.
481 This typically means an ARM6 or ARM7 with MMU or MPU. */
482 int arm_tune_wbuf = 0;
484 /* Nonzero if generating Thumb instructions. */
485 int thumb_code = 0;
487 /* Nonzero if we should define __THUMB_INTERWORK__ in the
488 preprocessor.
489 XXX This is a bit of a hack, it's intended to help work around
490 problems in GLD which doesn't understand that armv5t code is
491 interworking clean. */
492 int arm_cpp_interwork = 0;
494 /* In case of a PRE_INC, POST_INC, PRE_DEC, POST_DEC memory reference, we
495 must report the mode of the memory reference from PRINT_OPERAND to
496 PRINT_OPERAND_ADDRESS. */
497 enum machine_mode output_memory_reference_mode;
499 /* The register number to be used for the PIC offset register. */
500 static const char * arm_pic_register_string = NULL;
501 int arm_pic_register = INVALID_REGNUM;
503 /* Set to 1 when a return insn is output, this means that the epilogue
504 is not needed. */
505 int return_used_this_function;
507 /* Set to 1 after arm_reorg has started. Reset to start at the start of
508 the next function. */
509 static int after_arm_reorg = 0;
511 /* The maximum number of insns to be used when loading a constant. */
512 static int arm_constant_limit = 3;
514 /* For an explanation of these variables, see final_prescan_insn below. */
515 int arm_ccfsm_state;
516 enum arm_cond_code arm_current_cc;
517 rtx arm_target_insn;
518 int arm_target_label;
520 /* The condition codes of the ARM, and the inverse function. */
521 static const char * const arm_condition_codes[] =
523 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
524 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
527 #define streq(string1, string2) (strcmp (string1, string2) == 0)
529 /* Initialization code. */
531 struct processors
533 const char *const name;
534 enum processor_type core;
535 const char *arch;
536 const unsigned long flags;
537 bool (* rtx_costs) (rtx, int, int, int *);
540 /* Not all of these give usefully different compilation alternatives,
541 but there is no simple way of generalizing them. */
542 static const struct processors all_cores[] =
544 /* ARM Cores */
545 #define ARM_CORE(NAME, IDENT, ARCH, FLAGS, COSTS) \
546 {NAME, arm_none, #ARCH, FLAGS | FL_FOR_ARCH##ARCH, arm_##COSTS##_rtx_costs},
547 #include "arm-cores.def"
548 #undef ARM_CORE
549 {NULL, arm_none, NULL, 0, NULL}
552 static const struct processors all_architectures[] =
554 /* ARM Architectures */
555 /* We don't specify rtx_costs here as it will be figured out
556 from the core. */
558 {"armv2", arm2, "2", FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH2, NULL},
559 {"armv2a", arm2, "2", FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH2, NULL},
560 {"armv3", arm6, "3", FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH3, NULL},
561 {"armv3m", arm7m, "3M", FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH3M, NULL},
562 {"armv4", arm7tdmi, "4", FL_CO_PROC | FL_MODE26 | FL_FOR_ARCH4, NULL},
563 /* Strictly, FL_MODE26 is a permitted option for v4t, but there are no
564 implementations that support it, so we will leave it out for now. */
565 {"armv4t", arm7tdmi, "4T", FL_CO_PROC | FL_FOR_ARCH4T, NULL},
566 {"armv5", arm10tdmi, "5", FL_CO_PROC | FL_FOR_ARCH5, NULL},
567 {"armv5t", arm10tdmi, "5T", FL_CO_PROC | FL_FOR_ARCH5T, NULL},
568 {"armv5e", arm1026ejs, "5E", FL_CO_PROC | FL_FOR_ARCH5E, NULL},
569 {"armv5te", arm1026ejs, "5TE", FL_CO_PROC | FL_FOR_ARCH5TE, NULL},
570 {"armv6", arm1136js, "6", FL_CO_PROC | FL_FOR_ARCH6, NULL},
571 {"armv6j", arm1136js, "6J", FL_CO_PROC | FL_FOR_ARCH6J, NULL},
572 {"armv6k", mpcore, "6K", FL_CO_PROC | FL_FOR_ARCH6K, NULL},
573 {"armv6z", arm1176jzs, "6Z", FL_CO_PROC | FL_FOR_ARCH6Z, NULL},
574 {"armv6zk", arm1176jzs, "6ZK", FL_CO_PROC | FL_FOR_ARCH6ZK, NULL},
575 {"ep9312", ep9312, "4T", FL_LDSCHED | FL_CIRRUS | FL_FOR_ARCH4, NULL},
576 {"iwmmxt", iwmmxt, "5TE", FL_LDSCHED | FL_STRONG | FL_FOR_ARCH5TE | FL_XSCALE | FL_IWMMXT , NULL},
577 {NULL, arm_none, NULL, 0 , NULL}
580 struct arm_cpu_select
582 const char * string;
583 const char * name;
584 const struct processors * processors;
587 /* This is a magic structure. The 'string' field is magically filled in
588 with a pointer to the value specified by the user on the command line
589 assuming that the user has specified such a value. */
591 static struct arm_cpu_select arm_select[] =
593 /* string name processors */
594 { NULL, "-mcpu=", all_cores },
595 { NULL, "-march=", all_architectures },
596 { NULL, "-mtune=", all_cores }
599 /* Defines representing the indexes into the above table. */
600 #define ARM_OPT_SET_CPU 0
601 #define ARM_OPT_SET_ARCH 1
602 #define ARM_OPT_SET_TUNE 2
604 /* The name of the proprocessor macro to define for this architecture. */
606 char arm_arch_name[] = "__ARM_ARCH_0UNK__";
608 struct fpu_desc
610 const char * name;
611 enum fputype fpu;
615 /* Available values for for -mfpu=. */
617 static const struct fpu_desc all_fpus[] =
619 {"fpa", FPUTYPE_FPA},
620 {"fpe2", FPUTYPE_FPA_EMU2},
621 {"fpe3", FPUTYPE_FPA_EMU2},
622 {"maverick", FPUTYPE_MAVERICK},
623 {"vfp", FPUTYPE_VFP}
627 /* Floating point models used by the different hardware.
628 See fputype in arm.h. */
630 static const enum fputype fp_model_for_fpu[] =
632 /* No FP hardware. */
633 ARM_FP_MODEL_UNKNOWN, /* FPUTYPE_NONE */
634 ARM_FP_MODEL_FPA, /* FPUTYPE_FPA */
635 ARM_FP_MODEL_FPA, /* FPUTYPE_FPA_EMU2 */
636 ARM_FP_MODEL_FPA, /* FPUTYPE_FPA_EMU3 */
637 ARM_FP_MODEL_MAVERICK, /* FPUTYPE_MAVERICK */
638 ARM_FP_MODEL_VFP /* FPUTYPE_VFP */
642 struct float_abi
644 const char * name;
645 enum float_abi_type abi_type;
649 /* Available values for -mfloat-abi=. */
651 static const struct float_abi all_float_abis[] =
653 {"soft", ARM_FLOAT_ABI_SOFT},
654 {"softfp", ARM_FLOAT_ABI_SOFTFP},
655 {"hard", ARM_FLOAT_ABI_HARD}
659 struct abi_name
661 const char *name;
662 enum arm_abi_type abi_type;
666 /* Available values for -mabi=. */
668 static const struct abi_name arm_all_abis[] =
670 {"apcs-gnu", ARM_ABI_APCS},
671 {"atpcs", ARM_ABI_ATPCS},
672 {"aapcs", ARM_ABI_AAPCS},
673 {"iwmmxt", ARM_ABI_IWMMXT}
676 /* Return the number of bits set in VALUE. */
677 static unsigned
678 bit_count (unsigned long value)
680 unsigned long count = 0;
682 while (value)
684 count++;
685 value &= value - 1; /* Clear the least-significant set bit. */
688 return count;
691 /* Set up library functions unique to ARM. */
693 static void
694 arm_init_libfuncs (void)
696 /* There are no special library functions unless we are using the
697 ARM BPABI. */
698 if (!TARGET_BPABI)
699 return;
701 /* The functions below are described in Section 4 of the "Run-Time
702 ABI for the ARM architecture", Version 1.0. */
704 /* Double-precision floating-point arithmetic. Table 2. */
705 set_optab_libfunc (add_optab, DFmode, "__aeabi_dadd");
706 set_optab_libfunc (sdiv_optab, DFmode, "__aeabi_ddiv");
707 set_optab_libfunc (smul_optab, DFmode, "__aeabi_dmul");
708 set_optab_libfunc (neg_optab, DFmode, "__aeabi_dneg");
709 set_optab_libfunc (sub_optab, DFmode, "__aeabi_dsub");
711 /* Double-precision comparisons. Table 3. */
712 set_optab_libfunc (eq_optab, DFmode, "__aeabi_dcmpeq");
713 set_optab_libfunc (ne_optab, DFmode, NULL);
714 set_optab_libfunc (lt_optab, DFmode, "__aeabi_dcmplt");
715 set_optab_libfunc (le_optab, DFmode, "__aeabi_dcmple");
716 set_optab_libfunc (ge_optab, DFmode, "__aeabi_dcmpge");
717 set_optab_libfunc (gt_optab, DFmode, "__aeabi_dcmpgt");
718 set_optab_libfunc (unord_optab, DFmode, "__aeabi_dcmpun");
720 /* Single-precision floating-point arithmetic. Table 4. */
721 set_optab_libfunc (add_optab, SFmode, "__aeabi_fadd");
722 set_optab_libfunc (sdiv_optab, SFmode, "__aeabi_fdiv");
723 set_optab_libfunc (smul_optab, SFmode, "__aeabi_fmul");
724 set_optab_libfunc (neg_optab, SFmode, "__aeabi_fneg");
725 set_optab_libfunc (sub_optab, SFmode, "__aeabi_fsub");
727 /* Single-precision comparisons. Table 5. */
728 set_optab_libfunc (eq_optab, SFmode, "__aeabi_fcmpeq");
729 set_optab_libfunc (ne_optab, SFmode, NULL);
730 set_optab_libfunc (lt_optab, SFmode, "__aeabi_fcmplt");
731 set_optab_libfunc (le_optab, SFmode, "__aeabi_fcmple");
732 set_optab_libfunc (ge_optab, SFmode, "__aeabi_fcmpge");
733 set_optab_libfunc (gt_optab, SFmode, "__aeabi_fcmpgt");
734 set_optab_libfunc (unord_optab, SFmode, "__aeabi_fcmpun");
736 /* Floating-point to integer conversions. Table 6. */
737 set_conv_libfunc (sfix_optab, SImode, DFmode, "__aeabi_d2iz");
738 set_conv_libfunc (ufix_optab, SImode, DFmode, "__aeabi_d2uiz");
739 set_conv_libfunc (sfix_optab, DImode, DFmode, "__aeabi_d2lz");
740 set_conv_libfunc (ufix_optab, DImode, DFmode, "__aeabi_d2ulz");
741 set_conv_libfunc (sfix_optab, SImode, SFmode, "__aeabi_f2iz");
742 set_conv_libfunc (ufix_optab, SImode, SFmode, "__aeabi_f2uiz");
743 set_conv_libfunc (sfix_optab, DImode, SFmode, "__aeabi_f2lz");
744 set_conv_libfunc (ufix_optab, DImode, SFmode, "__aeabi_f2ulz");
746 /* Conversions between floating types. Table 7. */
747 set_conv_libfunc (trunc_optab, SFmode, DFmode, "__aeabi_d2f");
748 set_conv_libfunc (sext_optab, DFmode, SFmode, "__aeabi_f2d");
750 /* Integer to floating-point conversions. Table 8. */
751 set_conv_libfunc (sfloat_optab, DFmode, SImode, "__aeabi_i2d");
752 set_conv_libfunc (ufloat_optab, DFmode, SImode, "__aeabi_ui2d");
753 set_conv_libfunc (sfloat_optab, DFmode, DImode, "__aeabi_l2d");
754 set_conv_libfunc (ufloat_optab, DFmode, DImode, "__aeabi_ul2d");
755 set_conv_libfunc (sfloat_optab, SFmode, SImode, "__aeabi_i2f");
756 set_conv_libfunc (ufloat_optab, SFmode, SImode, "__aeabi_ui2f");
757 set_conv_libfunc (sfloat_optab, SFmode, DImode, "__aeabi_l2f");
758 set_conv_libfunc (ufloat_optab, SFmode, DImode, "__aeabi_ul2f");
760 /* Long long. Table 9. */
761 set_optab_libfunc (smul_optab, DImode, "__aeabi_lmul");
762 set_optab_libfunc (sdivmod_optab, DImode, "__aeabi_ldivmod");
763 set_optab_libfunc (udivmod_optab, DImode, "__aeabi_uldivmod");
764 set_optab_libfunc (ashl_optab, DImode, "__aeabi_llsl");
765 set_optab_libfunc (lshr_optab, DImode, "__aeabi_llsr");
766 set_optab_libfunc (ashr_optab, DImode, "__aeabi_lasr");
767 set_optab_libfunc (cmp_optab, DImode, "__aeabi_lcmp");
768 set_optab_libfunc (ucmp_optab, DImode, "__aeabi_ulcmp");
770 /* Integer (32/32->32) division. \S 4.3.1. */
771 set_optab_libfunc (sdivmod_optab, SImode, "__aeabi_idivmod");
772 set_optab_libfunc (udivmod_optab, SImode, "__aeabi_uidivmod");
774 /* The divmod functions are designed so that they can be used for
775 plain division, even though they return both the quotient and the
776 remainder. The quotient is returned in the usual location (i.e.,
777 r0 for SImode, {r0, r1} for DImode), just as would be expected
778 for an ordinary division routine. Because the AAPCS calling
779 conventions specify that all of { r0, r1, r2, r3 } are
780 callee-saved registers, there is no need to tell the compiler
781 explicitly that those registers are clobbered by these
782 routines. */
783 set_optab_libfunc (sdiv_optab, DImode, "__aeabi_ldivmod");
784 set_optab_libfunc (udiv_optab, DImode, "__aeabi_uldivmod");
785 set_optab_libfunc (sdiv_optab, SImode, "__aeabi_idivmod");
786 set_optab_libfunc (udiv_optab, SImode, "__aeabi_uidivmod");
788 /* We don't have mod libcalls. Fortunately gcc knows how to use the
789 divmod libcalls instead. */
790 set_optab_libfunc (smod_optab, DImode, NULL);
791 set_optab_libfunc (umod_optab, DImode, NULL);
792 set_optab_libfunc (smod_optab, SImode, NULL);
793 set_optab_libfunc (umod_optab, SImode, NULL);
796 /* Implement TARGET_HANDLE_OPTION. */
798 static bool
799 arm_handle_option (size_t code, const char *arg, int value ATTRIBUTE_UNUSED)
801 switch (code)
803 case OPT_mabi_:
804 target_abi_name = arg;
805 return true;
807 case OPT_march_:
808 arm_select[1].string = arg;
809 return true;
811 case OPT_mcpu_:
812 arm_select[0].string = arg;
813 return true;
815 case OPT_mfloat_abi_:
816 target_float_abi_name = arg;
817 return true;
819 case OPT_mfp_:
820 case OPT_mfpe_:
821 target_fpe_name = arg;
822 return true;
824 case OPT_mfpu_:
825 target_fpu_name = arg;
826 return true;
828 case OPT_mhard_float:
829 target_float_abi_name = "hard";
830 return true;
832 case OPT_mpic_register_:
833 arm_pic_register_string = arg;
834 return true;
836 case OPT_msoft_float:
837 target_float_abi_name = "soft";
838 return true;
840 case OPT_mstructure_size_boundary_:
841 structure_size_string = arg;
842 return true;
844 case OPT_mtune_:
845 arm_select[2].string = arg;
846 return true;
848 default:
849 return true;
853 /* Fix up any incompatible options that the user has specified.
854 This has now turned into a maze. */
855 void
856 arm_override_options (void)
858 unsigned i;
859 enum processor_type target_arch_cpu = arm_none;
861 /* Set up the flags based on the cpu/architecture selected by the user. */
862 for (i = ARRAY_SIZE (arm_select); i--;)
864 struct arm_cpu_select * ptr = arm_select + i;
866 if (ptr->string != NULL && ptr->string[0] != '\0')
868 const struct processors * sel;
870 for (sel = ptr->processors; sel->name != NULL; sel++)
871 if (streq (ptr->string, sel->name))
873 /* Set the architecture define. */
874 if (i != ARM_OPT_SET_TUNE)
875 sprintf (arm_arch_name, "__ARM_ARCH_%s__", sel->arch);
877 /* Determine the processor core for which we should
878 tune code-generation. */
879 if (/* -mcpu= is a sensible default. */
880 i == ARM_OPT_SET_CPU
881 /* -mtune= overrides -mcpu= and -march=. */
882 || i == ARM_OPT_SET_TUNE)
883 arm_tune = (enum processor_type) (sel - ptr->processors);
885 /* Remember the CPU associated with this architecture.
886 If no other option is used to set the CPU type,
887 we'll use this to guess the most suitable tuning
888 options. */
889 if (i == ARM_OPT_SET_ARCH)
890 target_arch_cpu = sel->core;
892 if (i != ARM_OPT_SET_TUNE)
894 /* If we have been given an architecture and a processor
895 make sure that they are compatible. We only generate
896 a warning though, and we prefer the CPU over the
897 architecture. */
898 if (insn_flags != 0 && (insn_flags ^ sel->flags))
899 warning (0, "switch -mcpu=%s conflicts with -march= switch",
900 ptr->string);
902 insn_flags = sel->flags;
905 break;
908 if (sel->name == NULL)
909 error ("bad value (%s) for %s switch", ptr->string, ptr->name);
913 /* Guess the tuning options from the architecture if necessary. */
914 if (arm_tune == arm_none)
915 arm_tune = target_arch_cpu;
917 /* If the user did not specify a processor, choose one for them. */
918 if (insn_flags == 0)
920 const struct processors * sel;
921 unsigned int sought;
922 enum processor_type cpu;
924 cpu = TARGET_CPU_DEFAULT;
925 if (cpu == arm_none)
927 #ifdef SUBTARGET_CPU_DEFAULT
928 /* Use the subtarget default CPU if none was specified by
929 configure. */
930 cpu = SUBTARGET_CPU_DEFAULT;
931 #endif
932 /* Default to ARM6. */
933 if (cpu == arm_none)
934 cpu = arm6;
936 sel = &all_cores[cpu];
938 insn_flags = sel->flags;
940 /* Now check to see if the user has specified some command line
941 switch that require certain abilities from the cpu. */
942 sought = 0;
944 if (TARGET_INTERWORK || TARGET_THUMB)
946 sought |= (FL_THUMB | FL_MODE32);
948 /* There are no ARM processors that support both APCS-26 and
949 interworking. Therefore we force FL_MODE26 to be removed
950 from insn_flags here (if it was set), so that the search
951 below will always be able to find a compatible processor. */
952 insn_flags &= ~FL_MODE26;
955 if (sought != 0 && ((sought & insn_flags) != sought))
957 /* Try to locate a CPU type that supports all of the abilities
958 of the default CPU, plus the extra abilities requested by
959 the user. */
960 for (sel = all_cores; sel->name != NULL; sel++)
961 if ((sel->flags & sought) == (sought | insn_flags))
962 break;
964 if (sel->name == NULL)
966 unsigned current_bit_count = 0;
967 const struct processors * best_fit = NULL;
969 /* Ideally we would like to issue an error message here
970 saying that it was not possible to find a CPU compatible
971 with the default CPU, but which also supports the command
972 line options specified by the programmer, and so they
973 ought to use the -mcpu=<name> command line option to
974 override the default CPU type.
976 If we cannot find a cpu that has both the
977 characteristics of the default cpu and the given
978 command line options we scan the array again looking
979 for a best match. */
980 for (sel = all_cores; sel->name != NULL; sel++)
981 if ((sel->flags & sought) == sought)
983 unsigned count;
985 count = bit_count (sel->flags & insn_flags);
987 if (count >= current_bit_count)
989 best_fit = sel;
990 current_bit_count = count;
994 gcc_assert (best_fit);
995 sel = best_fit;
998 insn_flags = sel->flags;
1000 sprintf (arm_arch_name, "__ARM_ARCH_%s__", sel->arch);
1001 if (arm_tune == arm_none)
1002 arm_tune = (enum processor_type) (sel - all_cores);
1005 /* The processor for which we should tune should now have been
1006 chosen. */
1007 gcc_assert (arm_tune != arm_none);
1009 tune_flags = all_cores[(int)arm_tune].flags;
1010 if (optimize_size)
1011 targetm.rtx_costs = arm_size_rtx_costs;
1012 else
1013 targetm.rtx_costs = all_cores[(int)arm_tune].rtx_costs;
1015 /* Make sure that the processor choice does not conflict with any of the
1016 other command line choices. */
1017 if (TARGET_INTERWORK && !(insn_flags & FL_THUMB))
1019 warning (0, "target CPU does not support interworking" );
1020 target_flags &= ~MASK_INTERWORK;
1023 if (TARGET_THUMB && !(insn_flags & FL_THUMB))
1025 warning (0, "target CPU does not support THUMB instructions");
1026 target_flags &= ~MASK_THUMB;
1029 if (TARGET_APCS_FRAME && TARGET_THUMB)
1031 /* warning (0, "ignoring -mapcs-frame because -mthumb was used"); */
1032 target_flags &= ~MASK_APCS_FRAME;
1035 /* TARGET_BACKTRACE calls leaf_function_p, which causes a crash if done
1036 from here where no function is being compiled currently. */
1037 if ((TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME) && TARGET_ARM)
1038 warning (0, "enabling backtrace support is only meaningful when compiling for the Thumb");
1040 if (TARGET_ARM && TARGET_CALLEE_INTERWORKING)
1041 warning (0, "enabling callee interworking support is only meaningful when compiling for the Thumb");
1043 if (TARGET_ARM && TARGET_CALLER_INTERWORKING)
1044 warning (0, "enabling caller interworking support is only meaningful when compiling for the Thumb");
1046 if (TARGET_APCS_STACK && !TARGET_APCS_FRAME)
1048 warning (0, "-mapcs-stack-check incompatible with -mno-apcs-frame");
1049 target_flags |= MASK_APCS_FRAME;
1052 if (TARGET_POKE_FUNCTION_NAME)
1053 target_flags |= MASK_APCS_FRAME;
1055 if (TARGET_APCS_REENT && flag_pic)
1056 error ("-fpic and -mapcs-reent are incompatible");
1058 if (TARGET_APCS_REENT)
1059 warning (0, "APCS reentrant code not supported. Ignored");
1061 /* If this target is normally configured to use APCS frames, warn if they
1062 are turned off and debugging is turned on. */
1063 if (TARGET_ARM
1064 && write_symbols != NO_DEBUG
1065 && !TARGET_APCS_FRAME
1066 && (TARGET_DEFAULT & MASK_APCS_FRAME))
1067 warning (0, "-g with -mno-apcs-frame may not give sensible debugging");
1069 /* If stack checking is disabled, we can use r10 as the PIC register,
1070 which keeps r9 available. */
1071 if (flag_pic)
1072 arm_pic_register = TARGET_APCS_STACK ? 9 : 10;
1074 if (TARGET_APCS_FLOAT)
1075 warning (0, "passing floating point arguments in fp regs not yet supported");
1077 /* Initialize boolean versions of the flags, for use in the arm.md file. */
1078 arm_arch3m = (insn_flags & FL_ARCH3M) != 0;
1079 arm_arch4 = (insn_flags & FL_ARCH4) != 0;
1080 arm_arch4t = arm_arch4 & ((insn_flags & FL_THUMB) != 0);
1081 arm_arch5 = (insn_flags & FL_ARCH5) != 0;
1082 arm_arch5e = (insn_flags & FL_ARCH5E) != 0;
1083 arm_arch6 = (insn_flags & FL_ARCH6) != 0;
1084 arm_arch_xscale = (insn_flags & FL_XSCALE) != 0;
1085 arm_arch_cirrus = (insn_flags & FL_CIRRUS) != 0;
1087 arm_ld_sched = (tune_flags & FL_LDSCHED) != 0;
1088 arm_tune_strongarm = (tune_flags & FL_STRONG) != 0;
1089 thumb_code = (TARGET_ARM == 0);
1090 arm_tune_wbuf = (tune_flags & FL_WBUF) != 0;
1091 arm_tune_xscale = (tune_flags & FL_XSCALE) != 0;
1092 arm_arch_iwmmxt = (insn_flags & FL_IWMMXT) != 0;
1094 /* V5 code we generate is completely interworking capable, so we turn off
1095 TARGET_INTERWORK here to avoid many tests later on. */
1097 /* XXX However, we must pass the right pre-processor defines to CPP
1098 or GLD can get confused. This is a hack. */
1099 if (TARGET_INTERWORK)
1100 arm_cpp_interwork = 1;
1102 if (arm_arch5)
1103 target_flags &= ~MASK_INTERWORK;
1105 if (target_abi_name)
1107 for (i = 0; i < ARRAY_SIZE (arm_all_abis); i++)
1109 if (streq (arm_all_abis[i].name, target_abi_name))
1111 arm_abi = arm_all_abis[i].abi_type;
1112 break;
1115 if (i == ARRAY_SIZE (arm_all_abis))
1116 error ("invalid ABI option: -mabi=%s", target_abi_name);
1118 else
1119 arm_abi = ARM_DEFAULT_ABI;
1121 if (TARGET_IWMMXT && !ARM_DOUBLEWORD_ALIGN)
1122 error ("iwmmxt requires an AAPCS compatible ABI for proper operation");
1124 if (TARGET_IWMMXT_ABI && !TARGET_IWMMXT)
1125 error ("iwmmxt abi requires an iwmmxt capable cpu");
1127 arm_fp_model = ARM_FP_MODEL_UNKNOWN;
1128 if (target_fpu_name == NULL && target_fpe_name != NULL)
1130 if (streq (target_fpe_name, "2"))
1131 target_fpu_name = "fpe2";
1132 else if (streq (target_fpe_name, "3"))
1133 target_fpu_name = "fpe3";
1134 else
1135 error ("invalid floating point emulation option: -mfpe=%s",
1136 target_fpe_name);
1138 if (target_fpu_name != NULL)
1140 /* The user specified a FPU. */
1141 for (i = 0; i < ARRAY_SIZE (all_fpus); i++)
1143 if (streq (all_fpus[i].name, target_fpu_name))
1145 arm_fpu_arch = all_fpus[i].fpu;
1146 arm_fpu_tune = arm_fpu_arch;
1147 arm_fp_model = fp_model_for_fpu[arm_fpu_arch];
1148 break;
1151 if (arm_fp_model == ARM_FP_MODEL_UNKNOWN)
1152 error ("invalid floating point option: -mfpu=%s", target_fpu_name);
1154 else
1156 #ifdef FPUTYPE_DEFAULT
1157 /* Use the default if it is specified for this platform. */
1158 arm_fpu_arch = FPUTYPE_DEFAULT;
1159 arm_fpu_tune = FPUTYPE_DEFAULT;
1160 #else
1161 /* Pick one based on CPU type. */
1162 /* ??? Some targets assume FPA is the default.
1163 if ((insn_flags & FL_VFP) != 0)
1164 arm_fpu_arch = FPUTYPE_VFP;
1165 else
1167 if (arm_arch_cirrus)
1168 arm_fpu_arch = FPUTYPE_MAVERICK;
1169 else
1170 arm_fpu_arch = FPUTYPE_FPA_EMU2;
1171 #endif
1172 if (tune_flags & FL_CO_PROC && arm_fpu_arch == FPUTYPE_FPA_EMU2)
1173 arm_fpu_tune = FPUTYPE_FPA;
1174 else
1175 arm_fpu_tune = arm_fpu_arch;
1176 arm_fp_model = fp_model_for_fpu[arm_fpu_arch];
1177 gcc_assert (arm_fp_model != ARM_FP_MODEL_UNKNOWN);
1180 if (target_float_abi_name != NULL)
1182 /* The user specified a FP ABI. */
1183 for (i = 0; i < ARRAY_SIZE (all_float_abis); i++)
1185 if (streq (all_float_abis[i].name, target_float_abi_name))
1187 arm_float_abi = all_float_abis[i].abi_type;
1188 break;
1191 if (i == ARRAY_SIZE (all_float_abis))
1192 error ("invalid floating point abi: -mfloat-abi=%s",
1193 target_float_abi_name);
1195 else
1196 arm_float_abi = TARGET_DEFAULT_FLOAT_ABI;
1198 if (arm_float_abi == ARM_FLOAT_ABI_HARD && TARGET_VFP)
1199 sorry ("-mfloat-abi=hard and VFP");
1201 /* If soft-float is specified then don't use FPU. */
1202 if (TARGET_SOFT_FLOAT)
1203 arm_fpu_arch = FPUTYPE_NONE;
1205 /* For arm2/3 there is no need to do any scheduling if there is only
1206 a floating point emulator, or we are doing software floating-point. */
1207 if ((TARGET_SOFT_FLOAT
1208 || arm_fpu_tune == FPUTYPE_FPA_EMU2
1209 || arm_fpu_tune == FPUTYPE_FPA_EMU3)
1210 && (tune_flags & FL_MODE32) == 0)
1211 flag_schedule_insns = flag_schedule_insns_after_reload = 0;
1213 /* Override the default structure alignment for AAPCS ABI. */
1214 if (arm_abi == ARM_ABI_AAPCS)
1215 arm_structure_size_boundary = 8;
1217 if (structure_size_string != NULL)
1219 int size = strtol (structure_size_string, NULL, 0);
1221 if (size == 8 || size == 32
1222 || (ARM_DOUBLEWORD_ALIGN && size == 64))
1223 arm_structure_size_boundary = size;
1224 else
1225 warning (0, "structure size boundary can only be set to %s",
1226 ARM_DOUBLEWORD_ALIGN ? "8, 32 or 64": "8 or 32");
1229 if (arm_pic_register_string != NULL)
1231 int pic_register = decode_reg_name (arm_pic_register_string);
1233 if (!flag_pic)
1234 warning (0, "-mpic-register= is useless without -fpic");
1236 /* Prevent the user from choosing an obviously stupid PIC register. */
1237 else if (pic_register < 0 || call_used_regs[pic_register]
1238 || pic_register == HARD_FRAME_POINTER_REGNUM
1239 || pic_register == STACK_POINTER_REGNUM
1240 || pic_register >= PC_REGNUM)
1241 error ("unable to use '%s' for PIC register", arm_pic_register_string);
1242 else
1243 arm_pic_register = pic_register;
1246 if (TARGET_THUMB && flag_schedule_insns)
1248 /* Don't warn since it's on by default in -O2. */
1249 flag_schedule_insns = 0;
1252 if (optimize_size)
1254 /* There's some dispute as to whether this should be 1 or 2. However,
1255 experiments seem to show that in pathological cases a setting of
1256 1 degrades less severely than a setting of 2. This could change if
1257 other parts of the compiler change their behavior. */
1258 arm_constant_limit = 1;
1260 /* If optimizing for size, bump the number of instructions that we
1261 are prepared to conditionally execute (even on a StrongARM). */
1262 max_insns_skipped = 6;
1264 else
1266 /* For processors with load scheduling, it never costs more than
1267 2 cycles to load a constant, and the load scheduler may well
1268 reduce that to 1. */
1269 if (arm_ld_sched)
1270 arm_constant_limit = 1;
1272 /* On XScale the longer latency of a load makes it more difficult
1273 to achieve a good schedule, so it's faster to synthesize
1274 constants that can be done in two insns. */
1275 if (arm_tune_xscale)
1276 arm_constant_limit = 2;
1278 /* StrongARM has early execution of branches, so a sequence
1279 that is worth skipping is shorter. */
1280 if (arm_tune_strongarm)
1281 max_insns_skipped = 3;
1284 /* Register global variables with the garbage collector. */
1285 arm_add_gc_roots ();
1288 static void
1289 arm_add_gc_roots (void)
1291 gcc_obstack_init(&minipool_obstack);
1292 minipool_startobj = (char *) obstack_alloc (&minipool_obstack, 0);
1295 /* A table of known ARM exception types.
1296 For use with the interrupt function attribute. */
1298 typedef struct
1300 const char *const arg;
1301 const unsigned long return_value;
1303 isr_attribute_arg;
1305 static const isr_attribute_arg isr_attribute_args [] =
1307 { "IRQ", ARM_FT_ISR },
1308 { "irq", ARM_FT_ISR },
1309 { "FIQ", ARM_FT_FIQ },
1310 { "fiq", ARM_FT_FIQ },
1311 { "ABORT", ARM_FT_ISR },
1312 { "abort", ARM_FT_ISR },
1313 { "ABORT", ARM_FT_ISR },
1314 { "abort", ARM_FT_ISR },
1315 { "UNDEF", ARM_FT_EXCEPTION },
1316 { "undef", ARM_FT_EXCEPTION },
1317 { "SWI", ARM_FT_EXCEPTION },
1318 { "swi", ARM_FT_EXCEPTION },
1319 { NULL, ARM_FT_NORMAL }
1322 /* Returns the (interrupt) function type of the current
1323 function, or ARM_FT_UNKNOWN if the type cannot be determined. */
1325 static unsigned long
1326 arm_isr_value (tree argument)
1328 const isr_attribute_arg * ptr;
1329 const char * arg;
1331 /* No argument - default to IRQ. */
1332 if (argument == NULL_TREE)
1333 return ARM_FT_ISR;
1335 /* Get the value of the argument. */
1336 if (TREE_VALUE (argument) == NULL_TREE
1337 || TREE_CODE (TREE_VALUE (argument)) != STRING_CST)
1338 return ARM_FT_UNKNOWN;
1340 arg = TREE_STRING_POINTER (TREE_VALUE (argument));
1342 /* Check it against the list of known arguments. */
1343 for (ptr = isr_attribute_args; ptr->arg != NULL; ptr++)
1344 if (streq (arg, ptr->arg))
1345 return ptr->return_value;
1347 /* An unrecognized interrupt type. */
1348 return ARM_FT_UNKNOWN;
1351 /* Computes the type of the current function. */
1353 static unsigned long
1354 arm_compute_func_type (void)
1356 unsigned long type = ARM_FT_UNKNOWN;
1357 tree a;
1358 tree attr;
1360 gcc_assert (TREE_CODE (current_function_decl) == FUNCTION_DECL);
1362 /* Decide if the current function is volatile. Such functions
1363 never return, and many memory cycles can be saved by not storing
1364 register values that will never be needed again. This optimization
1365 was added to speed up context switching in a kernel application. */
1366 if (optimize > 0
1367 && TREE_NOTHROW (current_function_decl)
1368 && TREE_THIS_VOLATILE (current_function_decl))
1369 type |= ARM_FT_VOLATILE;
1371 if (cfun->static_chain_decl != NULL)
1372 type |= ARM_FT_NESTED;
1374 attr = DECL_ATTRIBUTES (current_function_decl);
1376 a = lookup_attribute ("naked", attr);
1377 if (a != NULL_TREE)
1378 type |= ARM_FT_NAKED;
1380 a = lookup_attribute ("isr", attr);
1381 if (a == NULL_TREE)
1382 a = lookup_attribute ("interrupt", attr);
1384 if (a == NULL_TREE)
1385 type |= TARGET_INTERWORK ? ARM_FT_INTERWORKED : ARM_FT_NORMAL;
1386 else
1387 type |= arm_isr_value (TREE_VALUE (a));
1389 return type;
1392 /* Returns the type of the current function. */
1394 unsigned long
1395 arm_current_func_type (void)
1397 if (ARM_FUNC_TYPE (cfun->machine->func_type) == ARM_FT_UNKNOWN)
1398 cfun->machine->func_type = arm_compute_func_type ();
1400 return cfun->machine->func_type;
1403 /* Return 1 if it is possible to return using a single instruction.
1404 If SIBLING is non-null, this is a test for a return before a sibling
1405 call. SIBLING is the call insn, so we can examine its register usage. */
1408 use_return_insn (int iscond, rtx sibling)
1410 int regno;
1411 unsigned int func_type;
1412 unsigned long saved_int_regs;
1413 unsigned HOST_WIDE_INT stack_adjust;
1414 arm_stack_offsets *offsets;
1416 /* Never use a return instruction before reload has run. */
1417 if (!reload_completed)
1418 return 0;
1420 func_type = arm_current_func_type ();
1422 /* Naked functions and volatile functions need special
1423 consideration. */
1424 if (func_type & (ARM_FT_VOLATILE | ARM_FT_NAKED))
1425 return 0;
1427 /* So do interrupt functions that use the frame pointer. */
1428 if (IS_INTERRUPT (func_type) && frame_pointer_needed)
1429 return 0;
1431 offsets = arm_get_frame_offsets ();
1432 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
1434 /* As do variadic functions. */
1435 if (current_function_pretend_args_size
1436 || cfun->machine->uses_anonymous_args
1437 /* Or if the function calls __builtin_eh_return () */
1438 || current_function_calls_eh_return
1439 /* Or if the function calls alloca */
1440 || current_function_calls_alloca
1441 /* Or if there is a stack adjustment. However, if the stack pointer
1442 is saved on the stack, we can use a pre-incrementing stack load. */
1443 || !(stack_adjust == 0 || (frame_pointer_needed && stack_adjust == 4)))
1444 return 0;
1446 saved_int_regs = arm_compute_save_reg_mask ();
1448 /* Unfortunately, the insn
1450 ldmib sp, {..., sp, ...}
1452 triggers a bug on most SA-110 based devices, such that the stack
1453 pointer won't be correctly restored if the instruction takes a
1454 page fault. We work around this problem by popping r3 along with
1455 the other registers, since that is never slower than executing
1456 another instruction.
1458 We test for !arm_arch5 here, because code for any architecture
1459 less than this could potentially be run on one of the buggy
1460 chips. */
1461 if (stack_adjust == 4 && !arm_arch5)
1463 /* Validate that r3 is a call-clobbered register (always true in
1464 the default abi) ... */
1465 if (!call_used_regs[3])
1466 return 0;
1468 /* ... that it isn't being used for a return value (always true
1469 until we implement return-in-regs), or for a tail-call
1470 argument ... */
1471 if (sibling)
1473 gcc_assert (GET_CODE (sibling) == CALL_INSN);
1475 if (find_regno_fusage (sibling, USE, 3))
1476 return 0;
1479 /* ... and that there are no call-saved registers in r0-r2
1480 (always true in the default ABI). */
1481 if (saved_int_regs & 0x7)
1482 return 0;
1485 /* Can't be done if interworking with Thumb, and any registers have been
1486 stacked. */
1487 if (TARGET_INTERWORK && saved_int_regs != 0)
1488 return 0;
1490 /* On StrongARM, conditional returns are expensive if they aren't
1491 taken and multiple registers have been stacked. */
1492 if (iscond && arm_tune_strongarm)
1494 /* Conditional return when just the LR is stored is a simple
1495 conditional-load instruction, that's not expensive. */
1496 if (saved_int_regs != 0 && saved_int_regs != (1 << LR_REGNUM))
1497 return 0;
1499 if (flag_pic && regs_ever_live[PIC_OFFSET_TABLE_REGNUM])
1500 return 0;
1503 /* If there are saved registers but the LR isn't saved, then we need
1504 two instructions for the return. */
1505 if (saved_int_regs && !(saved_int_regs & (1 << LR_REGNUM)))
1506 return 0;
1508 /* Can't be done if any of the FPA regs are pushed,
1509 since this also requires an insn. */
1510 if (TARGET_HARD_FLOAT && TARGET_FPA)
1511 for (regno = FIRST_FPA_REGNUM; regno <= LAST_FPA_REGNUM; regno++)
1512 if (regs_ever_live[regno] && !call_used_regs[regno])
1513 return 0;
1515 /* Likewise VFP regs. */
1516 if (TARGET_HARD_FLOAT && TARGET_VFP)
1517 for (regno = FIRST_VFP_REGNUM; regno <= LAST_VFP_REGNUM; regno++)
1518 if (regs_ever_live[regno] && !call_used_regs[regno])
1519 return 0;
1521 if (TARGET_REALLY_IWMMXT)
1522 for (regno = FIRST_IWMMXT_REGNUM; regno <= LAST_IWMMXT_REGNUM; regno++)
1523 if (regs_ever_live[regno] && ! call_used_regs [regno])
1524 return 0;
1526 return 1;
1529 /* Return TRUE if int I is a valid immediate ARM constant. */
1532 const_ok_for_arm (HOST_WIDE_INT i)
1534 unsigned HOST_WIDE_INT mask = ~(unsigned HOST_WIDE_INT)0xFF;
1536 /* For machines with >32 bit HOST_WIDE_INT, the bits above bit 31 must
1537 be all zero, or all one. */
1538 if ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff) != 0
1539 && ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff)
1540 != ((~(unsigned HOST_WIDE_INT) 0)
1541 & ~(unsigned HOST_WIDE_INT) 0xffffffff)))
1542 return FALSE;
1544 /* Fast return for 0 and powers of 2 */
1545 if ((i & (i - 1)) == 0)
1546 return TRUE;
1550 if ((i & mask & (unsigned HOST_WIDE_INT) 0xffffffff) == 0)
1551 return TRUE;
1552 mask =
1553 (mask << 2) | ((mask & (unsigned HOST_WIDE_INT) 0xffffffff)
1554 >> (32 - 2)) | ~(unsigned HOST_WIDE_INT) 0xffffffff;
1556 while (mask != ~(unsigned HOST_WIDE_INT) 0xFF);
1558 return FALSE;
1561 /* Return true if I is a valid constant for the operation CODE. */
1562 static int
1563 const_ok_for_op (HOST_WIDE_INT i, enum rtx_code code)
1565 if (const_ok_for_arm (i))
1566 return 1;
1568 switch (code)
1570 case PLUS:
1571 return const_ok_for_arm (ARM_SIGN_EXTEND (-i));
1573 case MINUS: /* Should only occur with (MINUS I reg) => rsb */
1574 case XOR:
1575 case IOR:
1576 return 0;
1578 case AND:
1579 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
1581 default:
1582 gcc_unreachable ();
1586 /* Emit a sequence of insns to handle a large constant.
1587 CODE is the code of the operation required, it can be any of SET, PLUS,
1588 IOR, AND, XOR, MINUS;
1589 MODE is the mode in which the operation is being performed;
1590 VAL is the integer to operate on;
1591 SOURCE is the other operand (a register, or a null-pointer for SET);
1592 SUBTARGETS means it is safe to create scratch registers if that will
1593 either produce a simpler sequence, or we will want to cse the values.
1594 Return value is the number of insns emitted. */
1597 arm_split_constant (enum rtx_code code, enum machine_mode mode, rtx insn,
1598 HOST_WIDE_INT val, rtx target, rtx source, int subtargets)
1600 rtx cond;
1602 if (insn && GET_CODE (PATTERN (insn)) == COND_EXEC)
1603 cond = COND_EXEC_TEST (PATTERN (insn));
1604 else
1605 cond = NULL_RTX;
1607 if (subtargets || code == SET
1608 || (GET_CODE (target) == REG && GET_CODE (source) == REG
1609 && REGNO (target) != REGNO (source)))
1611 /* After arm_reorg has been called, we can't fix up expensive
1612 constants by pushing them into memory so we must synthesize
1613 them in-line, regardless of the cost. This is only likely to
1614 be more costly on chips that have load delay slots and we are
1615 compiling without running the scheduler (so no splitting
1616 occurred before the final instruction emission).
1618 Ref: gcc -O1 -mcpu=strongarm gcc.c-torture/compile/980506-2.c
1620 if (!after_arm_reorg
1621 && !cond
1622 && (arm_gen_constant (code, mode, NULL_RTX, val, target, source,
1623 1, 0)
1624 > arm_constant_limit + (code != SET)))
1626 if (code == SET)
1628 /* Currently SET is the only monadic value for CODE, all
1629 the rest are diadic. */
1630 emit_insn (gen_rtx_SET (VOIDmode, target, GEN_INT (val)));
1631 return 1;
1633 else
1635 rtx temp = subtargets ? gen_reg_rtx (mode) : target;
1637 emit_insn (gen_rtx_SET (VOIDmode, temp, GEN_INT (val)));
1638 /* For MINUS, the value is subtracted from, since we never
1639 have subtraction of a constant. */
1640 if (code == MINUS)
1641 emit_insn (gen_rtx_SET (VOIDmode, target,
1642 gen_rtx_MINUS (mode, temp, source)));
1643 else
1644 emit_insn (gen_rtx_SET (VOIDmode, target,
1645 gen_rtx_fmt_ee (code, mode, source, temp)));
1646 return 2;
1651 return arm_gen_constant (code, mode, cond, val, target, source, subtargets,
1655 static int
1656 count_insns_for_constant (HOST_WIDE_INT remainder, int i)
1658 HOST_WIDE_INT temp1;
1659 int num_insns = 0;
1662 int end;
1664 if (i <= 0)
1665 i += 32;
1666 if (remainder & (3 << (i - 2)))
1668 end = i - 8;
1669 if (end < 0)
1670 end += 32;
1671 temp1 = remainder & ((0x0ff << end)
1672 | ((i < end) ? (0xff >> (32 - end)) : 0));
1673 remainder &= ~temp1;
1674 num_insns++;
1675 i -= 6;
1677 i -= 2;
1678 } while (remainder);
1679 return num_insns;
1682 /* Emit an instruction with the indicated PATTERN. If COND is
1683 non-NULL, conditionalize the execution of the instruction on COND
1684 being true. */
1686 static void
1687 emit_constant_insn (rtx cond, rtx pattern)
1689 if (cond)
1690 pattern = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond), pattern);
1691 emit_insn (pattern);
1694 /* As above, but extra parameter GENERATE which, if clear, suppresses
1695 RTL generation. */
1697 static int
1698 arm_gen_constant (enum rtx_code code, enum machine_mode mode, rtx cond,
1699 HOST_WIDE_INT val, rtx target, rtx source, int subtargets,
1700 int generate)
1702 int can_invert = 0;
1703 int can_negate = 0;
1704 int can_negate_initial = 0;
1705 int can_shift = 0;
1706 int i;
1707 int num_bits_set = 0;
1708 int set_sign_bit_copies = 0;
1709 int clear_sign_bit_copies = 0;
1710 int clear_zero_bit_copies = 0;
1711 int set_zero_bit_copies = 0;
1712 int insns = 0;
1713 unsigned HOST_WIDE_INT temp1, temp2;
1714 unsigned HOST_WIDE_INT remainder = val & 0xffffffff;
1716 /* Find out which operations are safe for a given CODE. Also do a quick
1717 check for degenerate cases; these can occur when DImode operations
1718 are split. */
1719 switch (code)
1721 case SET:
1722 can_invert = 1;
1723 can_shift = 1;
1724 can_negate = 1;
1725 break;
1727 case PLUS:
1728 can_negate = 1;
1729 can_negate_initial = 1;
1730 break;
1732 case IOR:
1733 if (remainder == 0xffffffff)
1735 if (generate)
1736 emit_constant_insn (cond,
1737 gen_rtx_SET (VOIDmode, target,
1738 GEN_INT (ARM_SIGN_EXTEND (val))));
1739 return 1;
1741 if (remainder == 0)
1743 if (reload_completed && rtx_equal_p (target, source))
1744 return 0;
1745 if (generate)
1746 emit_constant_insn (cond,
1747 gen_rtx_SET (VOIDmode, target, source));
1748 return 1;
1750 break;
1752 case AND:
1753 if (remainder == 0)
1755 if (generate)
1756 emit_constant_insn (cond,
1757 gen_rtx_SET (VOIDmode, target, const0_rtx));
1758 return 1;
1760 if (remainder == 0xffffffff)
1762 if (reload_completed && rtx_equal_p (target, source))
1763 return 0;
1764 if (generate)
1765 emit_constant_insn (cond,
1766 gen_rtx_SET (VOIDmode, target, source));
1767 return 1;
1769 can_invert = 1;
1770 break;
1772 case XOR:
1773 if (remainder == 0)
1775 if (reload_completed && rtx_equal_p (target, source))
1776 return 0;
1777 if (generate)
1778 emit_constant_insn (cond,
1779 gen_rtx_SET (VOIDmode, target, source));
1780 return 1;
1783 /* We don't know how to handle other cases yet. */
1784 gcc_assert (remainder == 0xffffffff);
1786 if (generate)
1787 emit_constant_insn (cond,
1788 gen_rtx_SET (VOIDmode, target,
1789 gen_rtx_NOT (mode, source)));
1790 return 1;
1792 case MINUS:
1793 /* We treat MINUS as (val - source), since (source - val) is always
1794 passed as (source + (-val)). */
1795 if (remainder == 0)
1797 if (generate)
1798 emit_constant_insn (cond,
1799 gen_rtx_SET (VOIDmode, target,
1800 gen_rtx_NEG (mode, source)));
1801 return 1;
1803 if (const_ok_for_arm (val))
1805 if (generate)
1806 emit_constant_insn (cond,
1807 gen_rtx_SET (VOIDmode, target,
1808 gen_rtx_MINUS (mode, GEN_INT (val),
1809 source)));
1810 return 1;
1812 can_negate = 1;
1814 break;
1816 default:
1817 gcc_unreachable ();
1820 /* If we can do it in one insn get out quickly. */
1821 if (const_ok_for_arm (val)
1822 || (can_negate_initial && const_ok_for_arm (-val))
1823 || (can_invert && const_ok_for_arm (~val)))
1825 if (generate)
1826 emit_constant_insn (cond,
1827 gen_rtx_SET (VOIDmode, target,
1828 (source
1829 ? gen_rtx_fmt_ee (code, mode, source,
1830 GEN_INT (val))
1831 : GEN_INT (val))));
1832 return 1;
1835 /* Calculate a few attributes that may be useful for specific
1836 optimizations. */
1837 for (i = 31; i >= 0; i--)
1839 if ((remainder & (1 << i)) == 0)
1840 clear_sign_bit_copies++;
1841 else
1842 break;
1845 for (i = 31; i >= 0; i--)
1847 if ((remainder & (1 << i)) != 0)
1848 set_sign_bit_copies++;
1849 else
1850 break;
1853 for (i = 0; i <= 31; i++)
1855 if ((remainder & (1 << i)) == 0)
1856 clear_zero_bit_copies++;
1857 else
1858 break;
1861 for (i = 0; i <= 31; i++)
1863 if ((remainder & (1 << i)) != 0)
1864 set_zero_bit_copies++;
1865 else
1866 break;
1869 switch (code)
1871 case SET:
1872 /* See if we can do this by sign_extending a constant that is known
1873 to be negative. This is a good, way of doing it, since the shift
1874 may well merge into a subsequent insn. */
1875 if (set_sign_bit_copies > 1)
1877 if (const_ok_for_arm
1878 (temp1 = ARM_SIGN_EXTEND (remainder
1879 << (set_sign_bit_copies - 1))))
1881 if (generate)
1883 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
1884 emit_constant_insn (cond,
1885 gen_rtx_SET (VOIDmode, new_src,
1886 GEN_INT (temp1)));
1887 emit_constant_insn (cond,
1888 gen_ashrsi3 (target, new_src,
1889 GEN_INT (set_sign_bit_copies - 1)));
1891 return 2;
1893 /* For an inverted constant, we will need to set the low bits,
1894 these will be shifted out of harm's way. */
1895 temp1 |= (1 << (set_sign_bit_copies - 1)) - 1;
1896 if (const_ok_for_arm (~temp1))
1898 if (generate)
1900 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
1901 emit_constant_insn (cond,
1902 gen_rtx_SET (VOIDmode, new_src,
1903 GEN_INT (temp1)));
1904 emit_constant_insn (cond,
1905 gen_ashrsi3 (target, new_src,
1906 GEN_INT (set_sign_bit_copies - 1)));
1908 return 2;
1912 /* See if we can calculate the value as the difference between two
1913 valid immediates. */
1914 if (clear_sign_bit_copies + clear_zero_bit_copies <= 16)
1916 int topshift = clear_sign_bit_copies & ~1;
1918 temp1 = ((remainder + (0x00800000 >> topshift))
1919 & (0xff000000 >> topshift));
1921 /* If temp1 is zero, then that means the 9 most significant
1922 bits of remainder were 1 and we've caused it to overflow.
1923 When topshift is 0 we don't need to do anything since we
1924 can borrow from 'bit 32'. */
1925 if (temp1 == 0 && topshift != 0)
1926 temp1 = 0x80000000 >> (topshift - 1);
1928 temp2 = temp1 - remainder;
1930 if (const_ok_for_arm (temp2))
1932 if (generate)
1934 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
1935 emit_constant_insn (cond,
1936 gen_rtx_SET (VOIDmode, new_src,
1937 GEN_INT (temp1)));
1938 emit_constant_insn (cond,
1939 gen_addsi3 (target, new_src,
1940 GEN_INT (-temp2)));
1943 return 2;
1947 /* See if we can generate this by setting the bottom (or the top)
1948 16 bits, and then shifting these into the other half of the
1949 word. We only look for the simplest cases, to do more would cost
1950 too much. Be careful, however, not to generate this when the
1951 alternative would take fewer insns. */
1952 if (val & 0xffff0000)
1954 temp1 = remainder & 0xffff0000;
1955 temp2 = remainder & 0x0000ffff;
1957 /* Overlaps outside this range are best done using other methods. */
1958 for (i = 9; i < 24; i++)
1960 if ((((temp2 | (temp2 << i)) & 0xffffffff) == remainder)
1961 && !const_ok_for_arm (temp2))
1963 rtx new_src = (subtargets
1964 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
1965 : target);
1966 insns = arm_gen_constant (code, mode, cond, temp2, new_src,
1967 source, subtargets, generate);
1968 source = new_src;
1969 if (generate)
1970 emit_constant_insn
1971 (cond,
1972 gen_rtx_SET
1973 (VOIDmode, target,
1974 gen_rtx_IOR (mode,
1975 gen_rtx_ASHIFT (mode, source,
1976 GEN_INT (i)),
1977 source)));
1978 return insns + 1;
1982 /* Don't duplicate cases already considered. */
1983 for (i = 17; i < 24; i++)
1985 if (((temp1 | (temp1 >> i)) == remainder)
1986 && !const_ok_for_arm (temp1))
1988 rtx new_src = (subtargets
1989 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
1990 : target);
1991 insns = arm_gen_constant (code, mode, cond, temp1, new_src,
1992 source, subtargets, generate);
1993 source = new_src;
1994 if (generate)
1995 emit_constant_insn
1996 (cond,
1997 gen_rtx_SET (VOIDmode, target,
1998 gen_rtx_IOR
1999 (mode,
2000 gen_rtx_LSHIFTRT (mode, source,
2001 GEN_INT (i)),
2002 source)));
2003 return insns + 1;
2007 break;
2009 case IOR:
2010 case XOR:
2011 /* If we have IOR or XOR, and the constant can be loaded in a
2012 single instruction, and we can find a temporary to put it in,
2013 then this can be done in two instructions instead of 3-4. */
2014 if (subtargets
2015 /* TARGET can't be NULL if SUBTARGETS is 0 */
2016 || (reload_completed && !reg_mentioned_p (target, source)))
2018 if (const_ok_for_arm (ARM_SIGN_EXTEND (~val)))
2020 if (generate)
2022 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
2024 emit_constant_insn (cond,
2025 gen_rtx_SET (VOIDmode, sub,
2026 GEN_INT (val)));
2027 emit_constant_insn (cond,
2028 gen_rtx_SET (VOIDmode, target,
2029 gen_rtx_fmt_ee (code, mode,
2030 source, sub)));
2032 return 2;
2036 if (code == XOR)
2037 break;
2039 if (set_sign_bit_copies > 8
2040 && (val & (-1 << (32 - set_sign_bit_copies))) == val)
2042 if (generate)
2044 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
2045 rtx shift = GEN_INT (set_sign_bit_copies);
2047 emit_constant_insn
2048 (cond,
2049 gen_rtx_SET (VOIDmode, sub,
2050 gen_rtx_NOT (mode,
2051 gen_rtx_ASHIFT (mode,
2052 source,
2053 shift))));
2054 emit_constant_insn
2055 (cond,
2056 gen_rtx_SET (VOIDmode, target,
2057 gen_rtx_NOT (mode,
2058 gen_rtx_LSHIFTRT (mode, sub,
2059 shift))));
2061 return 2;
2064 if (set_zero_bit_copies > 8
2065 && (remainder & ((1 << set_zero_bit_copies) - 1)) == remainder)
2067 if (generate)
2069 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
2070 rtx shift = GEN_INT (set_zero_bit_copies);
2072 emit_constant_insn
2073 (cond,
2074 gen_rtx_SET (VOIDmode, sub,
2075 gen_rtx_NOT (mode,
2076 gen_rtx_LSHIFTRT (mode,
2077 source,
2078 shift))));
2079 emit_constant_insn
2080 (cond,
2081 gen_rtx_SET (VOIDmode, target,
2082 gen_rtx_NOT (mode,
2083 gen_rtx_ASHIFT (mode, sub,
2084 shift))));
2086 return 2;
2089 if (const_ok_for_arm (temp1 = ARM_SIGN_EXTEND (~val)))
2091 if (generate)
2093 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
2094 emit_constant_insn (cond,
2095 gen_rtx_SET (VOIDmode, sub,
2096 gen_rtx_NOT (mode, source)));
2097 source = sub;
2098 if (subtargets)
2099 sub = gen_reg_rtx (mode);
2100 emit_constant_insn (cond,
2101 gen_rtx_SET (VOIDmode, sub,
2102 gen_rtx_AND (mode, source,
2103 GEN_INT (temp1))));
2104 emit_constant_insn (cond,
2105 gen_rtx_SET (VOIDmode, target,
2106 gen_rtx_NOT (mode, sub)));
2108 return 3;
2110 break;
2112 case AND:
2113 /* See if two shifts will do 2 or more insn's worth of work. */
2114 if (clear_sign_bit_copies >= 16 && clear_sign_bit_copies < 24)
2116 HOST_WIDE_INT shift_mask = ((0xffffffff
2117 << (32 - clear_sign_bit_copies))
2118 & 0xffffffff);
2120 if ((remainder | shift_mask) != 0xffffffff)
2122 if (generate)
2124 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2125 insns = arm_gen_constant (AND, mode, cond,
2126 remainder | shift_mask,
2127 new_src, source, subtargets, 1);
2128 source = new_src;
2130 else
2132 rtx targ = subtargets ? NULL_RTX : target;
2133 insns = arm_gen_constant (AND, mode, cond,
2134 remainder | shift_mask,
2135 targ, source, subtargets, 0);
2139 if (generate)
2141 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2142 rtx shift = GEN_INT (clear_sign_bit_copies);
2144 emit_insn (gen_ashlsi3 (new_src, source, shift));
2145 emit_insn (gen_lshrsi3 (target, new_src, shift));
2148 return insns + 2;
2151 if (clear_zero_bit_copies >= 16 && clear_zero_bit_copies < 24)
2153 HOST_WIDE_INT shift_mask = (1 << clear_zero_bit_copies) - 1;
2155 if ((remainder | shift_mask) != 0xffffffff)
2157 if (generate)
2159 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2161 insns = arm_gen_constant (AND, mode, cond,
2162 remainder | shift_mask,
2163 new_src, source, subtargets, 1);
2164 source = new_src;
2166 else
2168 rtx targ = subtargets ? NULL_RTX : target;
2170 insns = arm_gen_constant (AND, mode, cond,
2171 remainder | shift_mask,
2172 targ, source, subtargets, 0);
2176 if (generate)
2178 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
2179 rtx shift = GEN_INT (clear_zero_bit_copies);
2181 emit_insn (gen_lshrsi3 (new_src, source, shift));
2182 emit_insn (gen_ashlsi3 (target, new_src, shift));
2185 return insns + 2;
2188 break;
2190 default:
2191 break;
2194 for (i = 0; i < 32; i++)
2195 if (remainder & (1 << i))
2196 num_bits_set++;
2198 if (code == AND || (can_invert && num_bits_set > 16))
2199 remainder = (~remainder) & 0xffffffff;
2200 else if (code == PLUS && num_bits_set > 16)
2201 remainder = (-remainder) & 0xffffffff;
2202 else
2204 can_invert = 0;
2205 can_negate = 0;
2208 /* Now try and find a way of doing the job in either two or three
2209 instructions.
2210 We start by looking for the largest block of zeros that are aligned on
2211 a 2-bit boundary, we then fill up the temps, wrapping around to the
2212 top of the word when we drop off the bottom.
2213 In the worst case this code should produce no more than four insns. */
2215 int best_start = 0;
2216 int best_consecutive_zeros = 0;
2218 for (i = 0; i < 32; i += 2)
2220 int consecutive_zeros = 0;
2222 if (!(remainder & (3 << i)))
2224 while ((i < 32) && !(remainder & (3 << i)))
2226 consecutive_zeros += 2;
2227 i += 2;
2229 if (consecutive_zeros > best_consecutive_zeros)
2231 best_consecutive_zeros = consecutive_zeros;
2232 best_start = i - consecutive_zeros;
2234 i -= 2;
2238 /* So long as it won't require any more insns to do so, it's
2239 desirable to emit a small constant (in bits 0...9) in the last
2240 insn. This way there is more chance that it can be combined with
2241 a later addressing insn to form a pre-indexed load or store
2242 operation. Consider:
2244 *((volatile int *)0xe0000100) = 1;
2245 *((volatile int *)0xe0000110) = 2;
2247 We want this to wind up as:
2249 mov rA, #0xe0000000
2250 mov rB, #1
2251 str rB, [rA, #0x100]
2252 mov rB, #2
2253 str rB, [rA, #0x110]
2255 rather than having to synthesize both large constants from scratch.
2257 Therefore, we calculate how many insns would be required to emit
2258 the constant starting from `best_start', and also starting from
2259 zero (i.e. with bit 31 first to be output). If `best_start' doesn't
2260 yield a shorter sequence, we may as well use zero. */
2261 if (best_start != 0
2262 && ((((unsigned HOST_WIDE_INT) 1) << best_start) < remainder)
2263 && (count_insns_for_constant (remainder, 0) <=
2264 count_insns_for_constant (remainder, best_start)))
2265 best_start = 0;
2267 /* Now start emitting the insns. */
2268 i = best_start;
2271 int end;
2273 if (i <= 0)
2274 i += 32;
2275 if (remainder & (3 << (i - 2)))
2277 end = i - 8;
2278 if (end < 0)
2279 end += 32;
2280 temp1 = remainder & ((0x0ff << end)
2281 | ((i < end) ? (0xff >> (32 - end)) : 0));
2282 remainder &= ~temp1;
2284 if (generate)
2286 rtx new_src, temp1_rtx;
2288 if (code == SET || code == MINUS)
2290 new_src = (subtargets ? gen_reg_rtx (mode) : target);
2291 if (can_invert && code != MINUS)
2292 temp1 = ~temp1;
2294 else
2296 if (remainder && subtargets)
2297 new_src = gen_reg_rtx (mode);
2298 else
2299 new_src = target;
2300 if (can_invert)
2301 temp1 = ~temp1;
2302 else if (can_negate)
2303 temp1 = -temp1;
2306 temp1 = trunc_int_for_mode (temp1, mode);
2307 temp1_rtx = GEN_INT (temp1);
2309 if (code == SET)
2311 else if (code == MINUS)
2312 temp1_rtx = gen_rtx_MINUS (mode, temp1_rtx, source);
2313 else
2314 temp1_rtx = gen_rtx_fmt_ee (code, mode, source, temp1_rtx);
2316 emit_constant_insn (cond,
2317 gen_rtx_SET (VOIDmode, new_src,
2318 temp1_rtx));
2319 source = new_src;
2322 if (code == SET)
2324 can_invert = 0;
2325 code = PLUS;
2327 else if (code == MINUS)
2328 code = PLUS;
2330 insns++;
2331 i -= 6;
2333 i -= 2;
2335 while (remainder);
2338 return insns;
2341 /* Canonicalize a comparison so that we are more likely to recognize it.
2342 This can be done for a few constant compares, where we can make the
2343 immediate value easier to load. */
2345 enum rtx_code
2346 arm_canonicalize_comparison (enum rtx_code code, rtx * op1)
2348 unsigned HOST_WIDE_INT i = INTVAL (*op1);
2350 switch (code)
2352 case EQ:
2353 case NE:
2354 return code;
2356 case GT:
2357 case LE:
2358 if (i != ((((unsigned HOST_WIDE_INT) 1) << (HOST_BITS_PER_WIDE_INT - 1)) - 1)
2359 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
2361 *op1 = GEN_INT (i + 1);
2362 return code == GT ? GE : LT;
2364 break;
2366 case GE:
2367 case LT:
2368 if (i != (((unsigned HOST_WIDE_INT) 1) << (HOST_BITS_PER_WIDE_INT - 1))
2369 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
2371 *op1 = GEN_INT (i - 1);
2372 return code == GE ? GT : LE;
2374 break;
2376 case GTU:
2377 case LEU:
2378 if (i != ~((unsigned HOST_WIDE_INT) 0)
2379 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
2381 *op1 = GEN_INT (i + 1);
2382 return code == GTU ? GEU : LTU;
2384 break;
2386 case GEU:
2387 case LTU:
2388 if (i != 0
2389 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
2391 *op1 = GEN_INT (i - 1);
2392 return code == GEU ? GTU : LEU;
2394 break;
2396 default:
2397 gcc_unreachable ();
2400 return code;
2404 /* Define how to find the value returned by a function. */
2407 arm_function_value(tree type, tree func ATTRIBUTE_UNUSED)
2409 enum machine_mode mode;
2410 int unsignedp ATTRIBUTE_UNUSED;
2411 rtx r ATTRIBUTE_UNUSED;
2413 mode = TYPE_MODE (type);
2414 /* Promote integer types. */
2415 if (INTEGRAL_TYPE_P (type))
2416 PROMOTE_FUNCTION_MODE (mode, unsignedp, type);
2418 /* Promotes small structs returned in a register to full-word size
2419 for big-endian AAPCS. */
2420 if (arm_return_in_msb (type))
2422 HOST_WIDE_INT size = int_size_in_bytes (type);
2423 if (size % UNITS_PER_WORD != 0)
2425 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
2426 mode = mode_for_size (size * BITS_PER_UNIT, MODE_INT, 0);
2430 return LIBCALL_VALUE(mode);
2433 /* Determine the amount of memory needed to store the possible return
2434 registers of an untyped call. */
2436 arm_apply_result_size (void)
2438 int size = 16;
2440 if (TARGET_ARM)
2442 if (TARGET_HARD_FLOAT_ABI)
2444 if (TARGET_FPA)
2445 size += 12;
2446 if (TARGET_MAVERICK)
2447 size += 8;
2449 if (TARGET_IWMMXT_ABI)
2450 size += 8;
2453 return size;
2456 /* Decide whether a type should be returned in memory (true)
2457 or in a register (false). This is called by the macro
2458 RETURN_IN_MEMORY. */
2460 arm_return_in_memory (tree type)
2462 HOST_WIDE_INT size;
2464 if (!AGGREGATE_TYPE_P (type) &&
2465 !(TARGET_AAPCS_BASED && TREE_CODE (type) == COMPLEX_TYPE))
2466 /* All simple types are returned in registers.
2467 For AAPCS, complex types are treated the same as aggregates. */
2468 return 0;
2470 size = int_size_in_bytes (type);
2472 if (arm_abi != ARM_ABI_APCS)
2474 /* ATPCS and later return aggregate types in memory only if they are
2475 larger than a word (or are variable size). */
2476 return (size < 0 || size > UNITS_PER_WORD);
2479 /* For the arm-wince targets we choose to be compatible with Microsoft's
2480 ARM and Thumb compilers, which always return aggregates in memory. */
2481 #ifndef ARM_WINCE
2482 /* All structures/unions bigger than one word are returned in memory.
2483 Also catch the case where int_size_in_bytes returns -1. In this case
2484 the aggregate is either huge or of variable size, and in either case
2485 we will want to return it via memory and not in a register. */
2486 if (size < 0 || size > UNITS_PER_WORD)
2487 return 1;
2489 if (TREE_CODE (type) == RECORD_TYPE)
2491 tree field;
2493 /* For a struct the APCS says that we only return in a register
2494 if the type is 'integer like' and every addressable element
2495 has an offset of zero. For practical purposes this means
2496 that the structure can have at most one non bit-field element
2497 and that this element must be the first one in the structure. */
2499 /* Find the first field, ignoring non FIELD_DECL things which will
2500 have been created by C++. */
2501 for (field = TYPE_FIELDS (type);
2502 field && TREE_CODE (field) != FIELD_DECL;
2503 field = TREE_CHAIN (field))
2504 continue;
2506 if (field == NULL)
2507 return 0; /* An empty structure. Allowed by an extension to ANSI C. */
2509 /* Check that the first field is valid for returning in a register. */
2511 /* ... Floats are not allowed */
2512 if (FLOAT_TYPE_P (TREE_TYPE (field)))
2513 return 1;
2515 /* ... Aggregates that are not themselves valid for returning in
2516 a register are not allowed. */
2517 if (RETURN_IN_MEMORY (TREE_TYPE (field)))
2518 return 1;
2520 /* Now check the remaining fields, if any. Only bitfields are allowed,
2521 since they are not addressable. */
2522 for (field = TREE_CHAIN (field);
2523 field;
2524 field = TREE_CHAIN (field))
2526 if (TREE_CODE (field) != FIELD_DECL)
2527 continue;
2529 if (!DECL_BIT_FIELD_TYPE (field))
2530 return 1;
2533 return 0;
2536 if (TREE_CODE (type) == UNION_TYPE)
2538 tree field;
2540 /* Unions can be returned in registers if every element is
2541 integral, or can be returned in an integer register. */
2542 for (field = TYPE_FIELDS (type);
2543 field;
2544 field = TREE_CHAIN (field))
2546 if (TREE_CODE (field) != FIELD_DECL)
2547 continue;
2549 if (FLOAT_TYPE_P (TREE_TYPE (field)))
2550 return 1;
2552 if (RETURN_IN_MEMORY (TREE_TYPE (field)))
2553 return 1;
2556 return 0;
2558 #endif /* not ARM_WINCE */
2560 /* Return all other types in memory. */
2561 return 1;
2564 /* Indicate whether or not words of a double are in big-endian order. */
2567 arm_float_words_big_endian (void)
2569 if (TARGET_MAVERICK)
2570 return 0;
2572 /* For FPA, float words are always big-endian. For VFP, floats words
2573 follow the memory system mode. */
2575 if (TARGET_FPA)
2577 return 1;
2580 if (TARGET_VFP)
2581 return (TARGET_BIG_END ? 1 : 0);
2583 return 1;
2586 /* Initialize a variable CUM of type CUMULATIVE_ARGS
2587 for a call to a function whose data type is FNTYPE.
2588 For a library call, FNTYPE is NULL. */
2589 void
2590 arm_init_cumulative_args (CUMULATIVE_ARGS *pcum, tree fntype,
2591 rtx libname ATTRIBUTE_UNUSED,
2592 tree fndecl ATTRIBUTE_UNUSED)
2594 /* On the ARM, the offset starts at 0. */
2595 pcum->nregs = ((fntype && aggregate_value_p (TREE_TYPE (fntype), fntype)) ? 1 : 0);
2596 pcum->iwmmxt_nregs = 0;
2597 pcum->can_split = true;
2599 pcum->call_cookie = CALL_NORMAL;
2601 if (TARGET_LONG_CALLS)
2602 pcum->call_cookie = CALL_LONG;
2604 /* Check for long call/short call attributes. The attributes
2605 override any command line option. */
2606 if (fntype)
2608 if (lookup_attribute ("short_call", TYPE_ATTRIBUTES (fntype)))
2609 pcum->call_cookie = CALL_SHORT;
2610 else if (lookup_attribute ("long_call", TYPE_ATTRIBUTES (fntype)))
2611 pcum->call_cookie = CALL_LONG;
2614 /* Varargs vectors are treated the same as long long.
2615 named_count avoids having to change the way arm handles 'named' */
2616 pcum->named_count = 0;
2617 pcum->nargs = 0;
2619 if (TARGET_REALLY_IWMMXT && fntype)
2621 tree fn_arg;
2623 for (fn_arg = TYPE_ARG_TYPES (fntype);
2624 fn_arg;
2625 fn_arg = TREE_CHAIN (fn_arg))
2626 pcum->named_count += 1;
2628 if (! pcum->named_count)
2629 pcum->named_count = INT_MAX;
2634 /* Return true if mode/type need doubleword alignment. */
2635 bool
2636 arm_needs_doubleword_align (enum machine_mode mode, tree type)
2638 return (GET_MODE_ALIGNMENT (mode) > PARM_BOUNDARY
2639 || (type && TYPE_ALIGN (type) > PARM_BOUNDARY));
2643 /* Determine where to put an argument to a function.
2644 Value is zero to push the argument on the stack,
2645 or a hard register in which to store the argument.
2647 MODE is the argument's machine mode.
2648 TYPE is the data type of the argument (as a tree).
2649 This is null for libcalls where that information may
2650 not be available.
2651 CUM is a variable of type CUMULATIVE_ARGS which gives info about
2652 the preceding args and about the function being called.
2653 NAMED is nonzero if this argument is a named parameter
2654 (otherwise it is an extra parameter matching an ellipsis). */
2657 arm_function_arg (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
2658 tree type, int named)
2660 int nregs;
2662 /* Varargs vectors are treated the same as long long.
2663 named_count avoids having to change the way arm handles 'named' */
2664 if (TARGET_IWMMXT_ABI
2665 && arm_vector_mode_supported_p (mode)
2666 && pcum->named_count > pcum->nargs + 1)
2668 if (pcum->iwmmxt_nregs <= 9)
2669 return gen_rtx_REG (mode, pcum->iwmmxt_nregs + FIRST_IWMMXT_REGNUM);
2670 else
2672 pcum->can_split = false;
2673 return NULL_RTX;
2677 /* Put doubleword aligned quantities in even register pairs. */
2678 if (pcum->nregs & 1
2679 && ARM_DOUBLEWORD_ALIGN
2680 && arm_needs_doubleword_align (mode, type))
2681 pcum->nregs++;
2683 if (mode == VOIDmode)
2684 /* Compute operand 2 of the call insn. */
2685 return GEN_INT (pcum->call_cookie);
2687 /* Only allow splitting an arg between regs and memory if all preceding
2688 args were allocated to regs. For args passed by reference we only count
2689 the reference pointer. */
2690 if (pcum->can_split)
2691 nregs = 1;
2692 else
2693 nregs = ARM_NUM_REGS2 (mode, type);
2695 if (!named || pcum->nregs + nregs > NUM_ARG_REGS)
2696 return NULL_RTX;
2698 return gen_rtx_REG (mode, pcum->nregs);
2701 static int
2702 arm_arg_partial_bytes (CUMULATIVE_ARGS *pcum, enum machine_mode mode,
2703 tree type, bool named ATTRIBUTE_UNUSED)
2705 int nregs = pcum->nregs;
2707 if (arm_vector_mode_supported_p (mode))
2708 return 0;
2710 if (NUM_ARG_REGS > nregs
2711 && (NUM_ARG_REGS < nregs + ARM_NUM_REGS2 (mode, type))
2712 && pcum->can_split)
2713 return (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
2715 return 0;
2718 /* Variable sized types are passed by reference. This is a GCC
2719 extension to the ARM ABI. */
2721 static bool
2722 arm_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
2723 enum machine_mode mode ATTRIBUTE_UNUSED,
2724 tree type, bool named ATTRIBUTE_UNUSED)
2726 return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
2729 /* Encode the current state of the #pragma [no_]long_calls. */
2730 typedef enum
2732 OFF, /* No #pramgma [no_]long_calls is in effect. */
2733 LONG, /* #pragma long_calls is in effect. */
2734 SHORT /* #pragma no_long_calls is in effect. */
2735 } arm_pragma_enum;
2737 static arm_pragma_enum arm_pragma_long_calls = OFF;
2739 void
2740 arm_pr_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
2742 arm_pragma_long_calls = LONG;
2745 void
2746 arm_pr_no_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
2748 arm_pragma_long_calls = SHORT;
2751 void
2752 arm_pr_long_calls_off (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
2754 arm_pragma_long_calls = OFF;
2757 /* Table of machine attributes. */
2758 const struct attribute_spec arm_attribute_table[] =
2760 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
2761 /* Function calls made to this symbol must be done indirectly, because
2762 it may lie outside of the 26 bit addressing range of a normal function
2763 call. */
2764 { "long_call", 0, 0, false, true, true, NULL },
2765 /* Whereas these functions are always known to reside within the 26 bit
2766 addressing range. */
2767 { "short_call", 0, 0, false, true, true, NULL },
2768 /* Interrupt Service Routines have special prologue and epilogue requirements. */
2769 { "isr", 0, 1, false, false, false, arm_handle_isr_attribute },
2770 { "interrupt", 0, 1, false, false, false, arm_handle_isr_attribute },
2771 { "naked", 0, 0, true, false, false, arm_handle_fndecl_attribute },
2772 #ifdef ARM_PE
2773 /* ARM/PE has three new attributes:
2774 interfacearm - ?
2775 dllexport - for exporting a function/variable that will live in a dll
2776 dllimport - for importing a function/variable from a dll
2778 Microsoft allows multiple declspecs in one __declspec, separating
2779 them with spaces. We do NOT support this. Instead, use __declspec
2780 multiple times.
2782 { "dllimport", 0, 0, true, false, false, NULL },
2783 { "dllexport", 0, 0, true, false, false, NULL },
2784 { "interfacearm", 0, 0, true, false, false, arm_handle_fndecl_attribute },
2785 #elif TARGET_DLLIMPORT_DECL_ATTRIBUTES
2786 { "dllimport", 0, 0, false, false, false, handle_dll_attribute },
2787 { "dllexport", 0, 0, false, false, false, handle_dll_attribute },
2788 { "notshared", 0, 0, false, true, false, arm_handle_notshared_attribute },
2789 #endif
2790 { NULL, 0, 0, false, false, false, NULL }
2793 /* Handle an attribute requiring a FUNCTION_DECL;
2794 arguments as in struct attribute_spec.handler. */
2795 static tree
2796 arm_handle_fndecl_attribute (tree *node, tree name, tree args ATTRIBUTE_UNUSED,
2797 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
2799 if (TREE_CODE (*node) != FUNCTION_DECL)
2801 warning (0, "%qs attribute only applies to functions",
2802 IDENTIFIER_POINTER (name));
2803 *no_add_attrs = true;
2806 return NULL_TREE;
2809 /* Handle an "interrupt" or "isr" attribute;
2810 arguments as in struct attribute_spec.handler. */
2811 static tree
2812 arm_handle_isr_attribute (tree *node, tree name, tree args, int flags,
2813 bool *no_add_attrs)
2815 if (DECL_P (*node))
2817 if (TREE_CODE (*node) != FUNCTION_DECL)
2819 warning (0, "%qs attribute only applies to functions",
2820 IDENTIFIER_POINTER (name));
2821 *no_add_attrs = true;
2823 /* FIXME: the argument if any is checked for type attributes;
2824 should it be checked for decl ones? */
2826 else
2828 if (TREE_CODE (*node) == FUNCTION_TYPE
2829 || TREE_CODE (*node) == METHOD_TYPE)
2831 if (arm_isr_value (args) == ARM_FT_UNKNOWN)
2833 warning (0, "%qs attribute ignored", IDENTIFIER_POINTER (name));
2834 *no_add_attrs = true;
2837 else if (TREE_CODE (*node) == POINTER_TYPE
2838 && (TREE_CODE (TREE_TYPE (*node)) == FUNCTION_TYPE
2839 || TREE_CODE (TREE_TYPE (*node)) == METHOD_TYPE)
2840 && arm_isr_value (args) != ARM_FT_UNKNOWN)
2842 *node = build_variant_type_copy (*node);
2843 TREE_TYPE (*node) = build_type_attribute_variant
2844 (TREE_TYPE (*node),
2845 tree_cons (name, args, TYPE_ATTRIBUTES (TREE_TYPE (*node))));
2846 *no_add_attrs = true;
2848 else
2850 /* Possibly pass this attribute on from the type to a decl. */
2851 if (flags & ((int) ATTR_FLAG_DECL_NEXT
2852 | (int) ATTR_FLAG_FUNCTION_NEXT
2853 | (int) ATTR_FLAG_ARRAY_NEXT))
2855 *no_add_attrs = true;
2856 return tree_cons (name, args, NULL_TREE);
2858 else
2860 warning (0, "%qs attribute ignored", IDENTIFIER_POINTER (name));
2865 return NULL_TREE;
2868 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
2869 /* Handle the "notshared" attribute. This attribute is another way of
2870 requesting hidden visibility. ARM's compiler supports
2871 "__declspec(notshared)"; we support the same thing via an
2872 attribute. */
2874 static tree
2875 arm_handle_notshared_attribute (tree *node,
2876 tree name ATTRIBUTE_UNUSED,
2877 tree args ATTRIBUTE_UNUSED,
2878 int flags ATTRIBUTE_UNUSED,
2879 bool *no_add_attrs)
2881 tree decl = TYPE_NAME (*node);
2883 if (decl)
2885 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
2886 DECL_VISIBILITY_SPECIFIED (decl) = 1;
2887 *no_add_attrs = false;
2889 return NULL_TREE;
2891 #endif
2893 /* Return 0 if the attributes for two types are incompatible, 1 if they
2894 are compatible, and 2 if they are nearly compatible (which causes a
2895 warning to be generated). */
2896 static int
2897 arm_comp_type_attributes (tree type1, tree type2)
2899 int l1, l2, s1, s2;
2901 /* Check for mismatch of non-default calling convention. */
2902 if (TREE_CODE (type1) != FUNCTION_TYPE)
2903 return 1;
2905 /* Check for mismatched call attributes. */
2906 l1 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1)) != NULL;
2907 l2 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2)) != NULL;
2908 s1 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1)) != NULL;
2909 s2 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2)) != NULL;
2911 /* Only bother to check if an attribute is defined. */
2912 if (l1 | l2 | s1 | s2)
2914 /* If one type has an attribute, the other must have the same attribute. */
2915 if ((l1 != l2) || (s1 != s2))
2916 return 0;
2918 /* Disallow mixed attributes. */
2919 if ((l1 & s2) || (l2 & s1))
2920 return 0;
2923 /* Check for mismatched ISR attribute. */
2924 l1 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type1)) != NULL;
2925 if (! l1)
2926 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type1)) != NULL;
2927 l2 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type2)) != NULL;
2928 if (! l2)
2929 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type2)) != NULL;
2930 if (l1 != l2)
2931 return 0;
2933 return 1;
2936 /* Encode long_call or short_call attribute by prefixing
2937 symbol name in DECL with a special character FLAG. */
2938 void
2939 arm_encode_call_attribute (tree decl, int flag)
2941 const char * str = XSTR (XEXP (DECL_RTL (decl), 0), 0);
2942 int len = strlen (str);
2943 char * newstr;
2945 /* Do not allow weak functions to be treated as short call. */
2946 if (DECL_WEAK (decl) && flag == SHORT_CALL_FLAG_CHAR)
2947 return;
2949 newstr = alloca (len + 2);
2950 newstr[0] = flag;
2951 strcpy (newstr + 1, str);
2953 newstr = (char *) ggc_alloc_string (newstr, len + 1);
2954 XSTR (XEXP (DECL_RTL (decl), 0), 0) = newstr;
2957 /* Assigns default attributes to newly defined type. This is used to
2958 set short_call/long_call attributes for function types of
2959 functions defined inside corresponding #pragma scopes. */
2960 static void
2961 arm_set_default_type_attributes (tree type)
2963 /* Add __attribute__ ((long_call)) to all functions, when
2964 inside #pragma long_calls or __attribute__ ((short_call)),
2965 when inside #pragma no_long_calls. */
2966 if (TREE_CODE (type) == FUNCTION_TYPE || TREE_CODE (type) == METHOD_TYPE)
2968 tree type_attr_list, attr_name;
2969 type_attr_list = TYPE_ATTRIBUTES (type);
2971 if (arm_pragma_long_calls == LONG)
2972 attr_name = get_identifier ("long_call");
2973 else if (arm_pragma_long_calls == SHORT)
2974 attr_name = get_identifier ("short_call");
2975 else
2976 return;
2978 type_attr_list = tree_cons (attr_name, NULL_TREE, type_attr_list);
2979 TYPE_ATTRIBUTES (type) = type_attr_list;
2983 /* Return 1 if the operand is a SYMBOL_REF for a function known to be
2984 defined within the current compilation unit. If this cannot be
2985 determined, then 0 is returned. */
2986 static int
2987 current_file_function_operand (rtx sym_ref)
2989 /* This is a bit of a fib. A function will have a short call flag
2990 applied to its name if it has the short call attribute, or it has
2991 already been defined within the current compilation unit. */
2992 if (ENCODED_SHORT_CALL_ATTR_P (XSTR (sym_ref, 0)))
2993 return 1;
2995 /* The current function is always defined within the current compilation
2996 unit. If it s a weak definition however, then this may not be the real
2997 definition of the function, and so we have to say no. */
2998 if (sym_ref == XEXP (DECL_RTL (current_function_decl), 0)
2999 && !DECL_WEAK (current_function_decl))
3000 return 1;
3002 /* We cannot make the determination - default to returning 0. */
3003 return 0;
3006 /* Return nonzero if a 32 bit "long_call" should be generated for
3007 this call. We generate a long_call if the function:
3009 a. has an __attribute__((long call))
3010 or b. is within the scope of a #pragma long_calls
3011 or c. the -mlong-calls command line switch has been specified
3012 . and either:
3013 1. -ffunction-sections is in effect
3014 or 2. the current function has __attribute__ ((section))
3015 or 3. the target function has __attribute__ ((section))
3017 However we do not generate a long call if the function:
3019 d. has an __attribute__ ((short_call))
3020 or e. is inside the scope of a #pragma no_long_calls
3021 or f. is defined within the current compilation unit.
3023 This function will be called by C fragments contained in the machine
3024 description file. SYM_REF and CALL_COOKIE correspond to the matched
3025 rtl operands. CALL_SYMBOL is used to distinguish between
3026 two different callers of the function. It is set to 1 in the
3027 "call_symbol" and "call_symbol_value" patterns and to 0 in the "call"
3028 and "call_value" patterns. This is because of the difference in the
3029 SYM_REFs passed by these patterns. */
3031 arm_is_longcall_p (rtx sym_ref, int call_cookie, int call_symbol)
3033 if (!call_symbol)
3035 if (GET_CODE (sym_ref) != MEM)
3036 return 0;
3038 sym_ref = XEXP (sym_ref, 0);
3041 if (GET_CODE (sym_ref) != SYMBOL_REF)
3042 return 0;
3044 if (call_cookie & CALL_SHORT)
3045 return 0;
3047 if (TARGET_LONG_CALLS)
3049 if (flag_function_sections
3050 || DECL_SECTION_NAME (current_function_decl))
3051 /* c.3 is handled by the definition of the
3052 ARM_DECLARE_FUNCTION_SIZE macro. */
3053 return 1;
3056 if (current_file_function_operand (sym_ref))
3057 return 0;
3059 return (call_cookie & CALL_LONG)
3060 || ENCODED_LONG_CALL_ATTR_P (XSTR (sym_ref, 0))
3061 || TARGET_LONG_CALLS;
3064 /* Return nonzero if it is ok to make a tail-call to DECL. */
3065 static bool
3066 arm_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
3068 int call_type = TARGET_LONG_CALLS ? CALL_LONG : CALL_NORMAL;
3070 if (cfun->machine->sibcall_blocked)
3071 return false;
3073 /* Never tailcall something for which we have no decl, or if we
3074 are in Thumb mode. */
3075 if (decl == NULL || TARGET_THUMB)
3076 return false;
3078 /* Get the calling method. */
3079 if (lookup_attribute ("short_call", TYPE_ATTRIBUTES (TREE_TYPE (decl))))
3080 call_type = CALL_SHORT;
3081 else if (lookup_attribute ("long_call", TYPE_ATTRIBUTES (TREE_TYPE (decl))))
3082 call_type = CALL_LONG;
3084 /* Cannot tail-call to long calls, since these are out of range of
3085 a branch instruction. However, if not compiling PIC, we know
3086 we can reach the symbol if it is in this compilation unit. */
3087 if (call_type == CALL_LONG && (flag_pic || !TREE_ASM_WRITTEN (decl)))
3088 return false;
3090 /* If we are interworking and the function is not declared static
3091 then we can't tail-call it unless we know that it exists in this
3092 compilation unit (since it might be a Thumb routine). */
3093 if (TARGET_INTERWORK && TREE_PUBLIC (decl) && !TREE_ASM_WRITTEN (decl))
3094 return false;
3096 /* Never tailcall from an ISR routine - it needs a special exit sequence. */
3097 if (IS_INTERRUPT (arm_current_func_type ()))
3098 return false;
3100 /* Everything else is ok. */
3101 return true;
3105 /* Addressing mode support functions. */
3107 /* Return nonzero if X is a legitimate immediate operand when compiling
3108 for PIC. */
3110 legitimate_pic_operand_p (rtx x)
3112 if (CONSTANT_P (x)
3113 && flag_pic
3114 && (GET_CODE (x) == SYMBOL_REF
3115 || (GET_CODE (x) == CONST
3116 && GET_CODE (XEXP (x, 0)) == PLUS
3117 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)))
3118 return 0;
3120 return 1;
3124 legitimize_pic_address (rtx orig, enum machine_mode mode, rtx reg)
3126 if (GET_CODE (orig) == SYMBOL_REF
3127 || GET_CODE (orig) == LABEL_REF)
3129 #ifndef AOF_ASSEMBLER
3130 rtx pic_ref, address;
3131 #endif
3132 rtx insn;
3133 int subregs = 0;
3135 if (reg == 0)
3137 gcc_assert (!no_new_pseudos);
3138 reg = gen_reg_rtx (Pmode);
3140 subregs = 1;
3143 #ifdef AOF_ASSEMBLER
3144 /* The AOF assembler can generate relocations for these directly, and
3145 understands that the PIC register has to be added into the offset. */
3146 insn = emit_insn (gen_pic_load_addr_based (reg, orig));
3147 #else
3148 if (subregs)
3149 address = gen_reg_rtx (Pmode);
3150 else
3151 address = reg;
3153 if (TARGET_ARM)
3154 emit_insn (gen_pic_load_addr_arm (address, orig));
3155 else
3156 emit_insn (gen_pic_load_addr_thumb (address, orig));
3158 if ((GET_CODE (orig) == LABEL_REF
3159 || (GET_CODE (orig) == SYMBOL_REF &&
3160 SYMBOL_REF_LOCAL_P (orig)))
3161 && NEED_GOT_RELOC)
3162 pic_ref = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, address);
3163 else
3165 pic_ref = gen_const_mem (Pmode,
3166 gen_rtx_PLUS (Pmode, pic_offset_table_rtx,
3167 address));
3170 insn = emit_move_insn (reg, pic_ref);
3171 #endif
3172 current_function_uses_pic_offset_table = 1;
3173 /* Put a REG_EQUAL note on this insn, so that it can be optimized
3174 by loop. */
3175 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_EQUAL, orig,
3176 REG_NOTES (insn));
3177 return reg;
3179 else if (GET_CODE (orig) == CONST)
3181 rtx base, offset;
3183 if (GET_CODE (XEXP (orig, 0)) == PLUS
3184 && XEXP (XEXP (orig, 0), 0) == pic_offset_table_rtx)
3185 return orig;
3187 if (reg == 0)
3189 gcc_assert (!no_new_pseudos);
3190 reg = gen_reg_rtx (Pmode);
3193 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
3195 base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
3196 offset = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
3197 base == reg ? 0 : reg);
3199 if (GET_CODE (offset) == CONST_INT)
3201 /* The base register doesn't really matter, we only want to
3202 test the index for the appropriate mode. */
3203 if (!arm_legitimate_index_p (mode, offset, SET, 0))
3205 gcc_assert (!no_new_pseudos);
3206 offset = force_reg (Pmode, offset);
3209 if (GET_CODE (offset) == CONST_INT)
3210 return plus_constant (base, INTVAL (offset));
3213 if (GET_MODE_SIZE (mode) > 4
3214 && (GET_MODE_CLASS (mode) == MODE_INT
3215 || TARGET_SOFT_FLOAT))
3217 emit_insn (gen_addsi3 (reg, base, offset));
3218 return reg;
3221 return gen_rtx_PLUS (Pmode, base, offset);
3224 return orig;
3228 /* Find a spare low register to use during the prolog of a function. */
3230 static int
3231 thumb_find_work_register (unsigned long pushed_regs_mask)
3233 int reg;
3235 /* Check the argument registers first as these are call-used. The
3236 register allocation order means that sometimes r3 might be used
3237 but earlier argument registers might not, so check them all. */
3238 for (reg = LAST_ARG_REGNUM; reg >= 0; reg --)
3239 if (!regs_ever_live[reg])
3240 return reg;
3242 /* Before going on to check the call-saved registers we can try a couple
3243 more ways of deducing that r3 is available. The first is when we are
3244 pushing anonymous arguments onto the stack and we have less than 4
3245 registers worth of fixed arguments(*). In this case r3 will be part of
3246 the variable argument list and so we can be sure that it will be
3247 pushed right at the start of the function. Hence it will be available
3248 for the rest of the prologue.
3249 (*): ie current_function_pretend_args_size is greater than 0. */
3250 if (cfun->machine->uses_anonymous_args
3251 && current_function_pretend_args_size > 0)
3252 return LAST_ARG_REGNUM;
3254 /* The other case is when we have fixed arguments but less than 4 registers
3255 worth. In this case r3 might be used in the body of the function, but
3256 it is not being used to convey an argument into the function. In theory
3257 we could just check current_function_args_size to see how many bytes are
3258 being passed in argument registers, but it seems that it is unreliable.
3259 Sometimes it will have the value 0 when in fact arguments are being
3260 passed. (See testcase execute/20021111-1.c for an example). So we also
3261 check the args_info.nregs field as well. The problem with this field is
3262 that it makes no allowances for arguments that are passed to the
3263 function but which are not used. Hence we could miss an opportunity
3264 when a function has an unused argument in r3. But it is better to be
3265 safe than to be sorry. */
3266 if (! cfun->machine->uses_anonymous_args
3267 && current_function_args_size >= 0
3268 && current_function_args_size <= (LAST_ARG_REGNUM * UNITS_PER_WORD)
3269 && cfun->args_info.nregs < 4)
3270 return LAST_ARG_REGNUM;
3272 /* Otherwise look for a call-saved register that is going to be pushed. */
3273 for (reg = LAST_LO_REGNUM; reg > LAST_ARG_REGNUM; reg --)
3274 if (pushed_regs_mask & (1 << reg))
3275 return reg;
3277 /* Something went wrong - thumb_compute_save_reg_mask()
3278 should have arranged for a suitable register to be pushed. */
3279 gcc_unreachable ();
3283 /* Generate code to load the PIC register. In thumb mode SCRATCH is a
3284 low register. */
3286 void
3287 arm_load_pic_register (unsigned int scratch)
3289 #ifndef AOF_ASSEMBLER
3290 rtx l1, pic_tmp, pic_tmp2, pic_rtx;
3291 rtx global_offset_table;
3293 if (current_function_uses_pic_offset_table == 0 || TARGET_SINGLE_PIC_BASE)
3294 return;
3296 gcc_assert (flag_pic);
3298 l1 = gen_label_rtx ();
3300 global_offset_table = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
3301 /* On the ARM the PC register contains 'dot + 8' at the time of the
3302 addition, on the Thumb it is 'dot + 4'. */
3303 pic_tmp = plus_constant (gen_rtx_LABEL_REF (Pmode, l1), TARGET_ARM ? 8 : 4);
3304 if (GOT_PCREL)
3305 pic_tmp2 = gen_rtx_CONST (VOIDmode,
3306 gen_rtx_PLUS (Pmode, global_offset_table, pc_rtx));
3307 else
3308 pic_tmp2 = gen_rtx_CONST (VOIDmode, global_offset_table);
3310 pic_rtx = gen_rtx_CONST (Pmode, gen_rtx_MINUS (Pmode, pic_tmp2, pic_tmp));
3312 if (TARGET_ARM)
3314 emit_insn (gen_pic_load_addr_arm (pic_offset_table_rtx, pic_rtx));
3315 emit_insn (gen_pic_add_dot_plus_eight (pic_offset_table_rtx, l1));
3317 else
3319 if (REGNO (pic_offset_table_rtx) > LAST_LO_REGNUM)
3321 /* We will have pushed the pic register, so should always be
3322 able to find a work register. */
3323 pic_tmp = gen_rtx_REG (SImode, scratch);
3324 emit_insn (gen_pic_load_addr_thumb (pic_tmp, pic_rtx));
3325 emit_insn (gen_movsi (pic_offset_table_rtx, pic_tmp));
3327 else
3328 emit_insn (gen_pic_load_addr_thumb (pic_offset_table_rtx, pic_rtx));
3329 emit_insn (gen_pic_add_dot_plus_four (pic_offset_table_rtx, l1));
3332 /* Need to emit this whether or not we obey regdecls,
3333 since setjmp/longjmp can cause life info to screw up. */
3334 emit_insn (gen_rtx_USE (VOIDmode, pic_offset_table_rtx));
3335 #endif /* AOF_ASSEMBLER */
3339 /* Return nonzero if X is valid as an ARM state addressing register. */
3340 static int
3341 arm_address_register_rtx_p (rtx x, int strict_p)
3343 int regno;
3345 if (GET_CODE (x) != REG)
3346 return 0;
3348 regno = REGNO (x);
3350 if (strict_p)
3351 return ARM_REGNO_OK_FOR_BASE_P (regno);
3353 return (regno <= LAST_ARM_REGNUM
3354 || regno >= FIRST_PSEUDO_REGISTER
3355 || regno == FRAME_POINTER_REGNUM
3356 || regno == ARG_POINTER_REGNUM);
3359 /* Return nonzero if X is a valid ARM state address operand. */
3361 arm_legitimate_address_p (enum machine_mode mode, rtx x, RTX_CODE outer,
3362 int strict_p)
3364 bool use_ldrd;
3365 enum rtx_code code = GET_CODE (x);
3367 if (arm_address_register_rtx_p (x, strict_p))
3368 return 1;
3370 use_ldrd = (TARGET_LDRD
3371 && (mode == DImode
3372 || (mode == DFmode && (TARGET_SOFT_FLOAT || TARGET_VFP))));
3374 if (code == POST_INC || code == PRE_DEC
3375 || ((code == PRE_INC || code == POST_DEC)
3376 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
3377 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
3379 else if ((code == POST_MODIFY || code == PRE_MODIFY)
3380 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
3381 && GET_CODE (XEXP (x, 1)) == PLUS
3382 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
3384 rtx addend = XEXP (XEXP (x, 1), 1);
3386 /* Don't allow ldrd post increment by register because it's hard
3387 to fixup invalid register choices. */
3388 if (use_ldrd
3389 && GET_CODE (x) == POST_MODIFY
3390 && GET_CODE (addend) == REG)
3391 return 0;
3393 return ((use_ldrd || GET_MODE_SIZE (mode) <= 4)
3394 && arm_legitimate_index_p (mode, addend, outer, strict_p));
3397 /* After reload constants split into minipools will have addresses
3398 from a LABEL_REF. */
3399 else if (reload_completed
3400 && (code == LABEL_REF
3401 || (code == CONST
3402 && GET_CODE (XEXP (x, 0)) == PLUS
3403 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
3404 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)))
3405 return 1;
3407 else if (mode == TImode)
3408 return 0;
3410 else if (code == PLUS)
3412 rtx xop0 = XEXP (x, 0);
3413 rtx xop1 = XEXP (x, 1);
3415 return ((arm_address_register_rtx_p (xop0, strict_p)
3416 && arm_legitimate_index_p (mode, xop1, outer, strict_p))
3417 || (arm_address_register_rtx_p (xop1, strict_p)
3418 && arm_legitimate_index_p (mode, xop0, outer, strict_p)));
3421 #if 0
3422 /* Reload currently can't handle MINUS, so disable this for now */
3423 else if (GET_CODE (x) == MINUS)
3425 rtx xop0 = XEXP (x, 0);
3426 rtx xop1 = XEXP (x, 1);
3428 return (arm_address_register_rtx_p (xop0, strict_p)
3429 && arm_legitimate_index_p (mode, xop1, outer, strict_p));
3431 #endif
3433 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
3434 && code == SYMBOL_REF
3435 && CONSTANT_POOL_ADDRESS_P (x)
3436 && ! (flag_pic
3437 && symbol_mentioned_p (get_pool_constant (x))))
3438 return 1;
3440 return 0;
3443 /* Return nonzero if INDEX is valid for an address index operand in
3444 ARM state. */
3445 static int
3446 arm_legitimate_index_p (enum machine_mode mode, rtx index, RTX_CODE outer,
3447 int strict_p)
3449 HOST_WIDE_INT range;
3450 enum rtx_code code = GET_CODE (index);
3452 /* Standard coprocessor addressing modes. */
3453 if (TARGET_HARD_FLOAT
3454 && (TARGET_FPA || TARGET_MAVERICK)
3455 && (GET_MODE_CLASS (mode) == MODE_FLOAT
3456 || (TARGET_MAVERICK && mode == DImode)))
3457 return (code == CONST_INT && INTVAL (index) < 1024
3458 && INTVAL (index) > -1024
3459 && (INTVAL (index) & 3) == 0);
3461 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
3462 return (code == CONST_INT
3463 && INTVAL (index) < 1024
3464 && INTVAL (index) > -1024
3465 && (INTVAL (index) & 3) == 0);
3467 if (arm_address_register_rtx_p (index, strict_p)
3468 && (GET_MODE_SIZE (mode) <= 4))
3469 return 1;
3471 if (mode == DImode || mode == DFmode)
3473 if (code == CONST_INT)
3475 HOST_WIDE_INT val = INTVAL (index);
3477 if (TARGET_LDRD)
3478 return val > -256 && val < 256;
3479 else
3480 return val > -4096 && val < 4092;
3483 return TARGET_LDRD && arm_address_register_rtx_p (index, strict_p);
3486 if (GET_MODE_SIZE (mode) <= 4
3487 && ! (arm_arch4
3488 && (mode == HImode
3489 || (mode == QImode && outer == SIGN_EXTEND))))
3491 if (code == MULT)
3493 rtx xiop0 = XEXP (index, 0);
3494 rtx xiop1 = XEXP (index, 1);
3496 return ((arm_address_register_rtx_p (xiop0, strict_p)
3497 && power_of_two_operand (xiop1, SImode))
3498 || (arm_address_register_rtx_p (xiop1, strict_p)
3499 && power_of_two_operand (xiop0, SImode)));
3501 else if (code == LSHIFTRT || code == ASHIFTRT
3502 || code == ASHIFT || code == ROTATERT)
3504 rtx op = XEXP (index, 1);
3506 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
3507 && GET_CODE (op) == CONST_INT
3508 && INTVAL (op) > 0
3509 && INTVAL (op) <= 31);
3513 /* For ARM v4 we may be doing a sign-extend operation during the
3514 load. */
3515 if (arm_arch4)
3517 if (mode == HImode || (outer == SIGN_EXTEND && mode == QImode))
3518 range = 256;
3519 else
3520 range = 4096;
3522 else
3523 range = (mode == HImode) ? 4095 : 4096;
3525 return (code == CONST_INT
3526 && INTVAL (index) < range
3527 && INTVAL (index) > -range);
3530 /* Return nonzero if X is valid as a Thumb state base register. */
3531 static int
3532 thumb_base_register_rtx_p (rtx x, enum machine_mode mode, int strict_p)
3534 int regno;
3536 if (GET_CODE (x) != REG)
3537 return 0;
3539 regno = REGNO (x);
3541 if (strict_p)
3542 return THUMB_REGNO_MODE_OK_FOR_BASE_P (regno, mode);
3544 return (regno <= LAST_LO_REGNUM
3545 || regno > LAST_VIRTUAL_REGISTER
3546 || regno == FRAME_POINTER_REGNUM
3547 || (GET_MODE_SIZE (mode) >= 4
3548 && (regno == STACK_POINTER_REGNUM
3549 || regno >= FIRST_PSEUDO_REGISTER
3550 || x == hard_frame_pointer_rtx
3551 || x == arg_pointer_rtx)));
3554 /* Return nonzero if x is a legitimate index register. This is the case
3555 for any base register that can access a QImode object. */
3556 inline static int
3557 thumb_index_register_rtx_p (rtx x, int strict_p)
3559 return thumb_base_register_rtx_p (x, QImode, strict_p);
3562 /* Return nonzero if x is a legitimate Thumb-state address.
3564 The AP may be eliminated to either the SP or the FP, so we use the
3565 least common denominator, e.g. SImode, and offsets from 0 to 64.
3567 ??? Verify whether the above is the right approach.
3569 ??? Also, the FP may be eliminated to the SP, so perhaps that
3570 needs special handling also.
3572 ??? Look at how the mips16 port solves this problem. It probably uses
3573 better ways to solve some of these problems.
3575 Although it is not incorrect, we don't accept QImode and HImode
3576 addresses based on the frame pointer or arg pointer until the
3577 reload pass starts. This is so that eliminating such addresses
3578 into stack based ones won't produce impossible code. */
3580 thumb_legitimate_address_p (enum machine_mode mode, rtx x, int strict_p)
3582 /* ??? Not clear if this is right. Experiment. */
3583 if (GET_MODE_SIZE (mode) < 4
3584 && !(reload_in_progress || reload_completed)
3585 && (reg_mentioned_p (frame_pointer_rtx, x)
3586 || reg_mentioned_p (arg_pointer_rtx, x)
3587 || reg_mentioned_p (virtual_incoming_args_rtx, x)
3588 || reg_mentioned_p (virtual_outgoing_args_rtx, x)
3589 || reg_mentioned_p (virtual_stack_dynamic_rtx, x)
3590 || reg_mentioned_p (virtual_stack_vars_rtx, x)))
3591 return 0;
3593 /* Accept any base register. SP only in SImode or larger. */
3594 else if (thumb_base_register_rtx_p (x, mode, strict_p))
3595 return 1;
3597 /* This is PC relative data before arm_reorg runs. */
3598 else if (GET_MODE_SIZE (mode) >= 4 && CONSTANT_P (x)
3599 && GET_CODE (x) == SYMBOL_REF
3600 && CONSTANT_POOL_ADDRESS_P (x) && ! flag_pic)
3601 return 1;
3603 /* This is PC relative data after arm_reorg runs. */
3604 else if (GET_MODE_SIZE (mode) >= 4 && reload_completed
3605 && (GET_CODE (x) == LABEL_REF
3606 || (GET_CODE (x) == CONST
3607 && GET_CODE (XEXP (x, 0)) == PLUS
3608 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
3609 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)))
3610 return 1;
3612 /* Post-inc indexing only supported for SImode and larger. */
3613 else if (GET_CODE (x) == POST_INC && GET_MODE_SIZE (mode) >= 4
3614 && thumb_index_register_rtx_p (XEXP (x, 0), strict_p))
3615 return 1;
3617 else if (GET_CODE (x) == PLUS)
3619 /* REG+REG address can be any two index registers. */
3620 /* We disallow FRAME+REG addressing since we know that FRAME
3621 will be replaced with STACK, and SP relative addressing only
3622 permits SP+OFFSET. */
3623 if (GET_MODE_SIZE (mode) <= 4
3624 && XEXP (x, 0) != frame_pointer_rtx
3625 && XEXP (x, 1) != frame_pointer_rtx
3626 && thumb_index_register_rtx_p (XEXP (x, 0), strict_p)
3627 && thumb_index_register_rtx_p (XEXP (x, 1), strict_p))
3628 return 1;
3630 /* REG+const has 5-7 bit offset for non-SP registers. */
3631 else if ((thumb_index_register_rtx_p (XEXP (x, 0), strict_p)
3632 || XEXP (x, 0) == arg_pointer_rtx)
3633 && GET_CODE (XEXP (x, 1)) == CONST_INT
3634 && thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
3635 return 1;
3637 /* REG+const has 10 bit offset for SP, but only SImode and
3638 larger is supported. */
3639 /* ??? Should probably check for DI/DFmode overflow here
3640 just like GO_IF_LEGITIMATE_OFFSET does. */
3641 else if (GET_CODE (XEXP (x, 0)) == REG
3642 && REGNO (XEXP (x, 0)) == STACK_POINTER_REGNUM
3643 && GET_MODE_SIZE (mode) >= 4
3644 && GET_CODE (XEXP (x, 1)) == CONST_INT
3645 && INTVAL (XEXP (x, 1)) >= 0
3646 && INTVAL (XEXP (x, 1)) + GET_MODE_SIZE (mode) <= 1024
3647 && (INTVAL (XEXP (x, 1)) & 3) == 0)
3648 return 1;
3650 else if (GET_CODE (XEXP (x, 0)) == REG
3651 && REGNO (XEXP (x, 0)) == FRAME_POINTER_REGNUM
3652 && GET_MODE_SIZE (mode) >= 4
3653 && GET_CODE (XEXP (x, 1)) == CONST_INT
3654 && (INTVAL (XEXP (x, 1)) & 3) == 0)
3655 return 1;
3658 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
3659 && GET_MODE_SIZE (mode) == 4
3660 && GET_CODE (x) == SYMBOL_REF
3661 && CONSTANT_POOL_ADDRESS_P (x)
3662 && !(flag_pic
3663 && symbol_mentioned_p (get_pool_constant (x))))
3664 return 1;
3666 return 0;
3669 /* Return nonzero if VAL can be used as an offset in a Thumb-state address
3670 instruction of mode MODE. */
3672 thumb_legitimate_offset_p (enum machine_mode mode, HOST_WIDE_INT val)
3674 switch (GET_MODE_SIZE (mode))
3676 case 1:
3677 return val >= 0 && val < 32;
3679 case 2:
3680 return val >= 0 && val < 64 && (val & 1) == 0;
3682 default:
3683 return (val >= 0
3684 && (val + GET_MODE_SIZE (mode)) <= 128
3685 && (val & 3) == 0);
3689 /* Try machine-dependent ways of modifying an illegitimate address
3690 to be legitimate. If we find one, return the new, valid address. */
3692 arm_legitimize_address (rtx x, rtx orig_x, enum machine_mode mode)
3694 if (GET_CODE (x) == PLUS)
3696 rtx xop0 = XEXP (x, 0);
3697 rtx xop1 = XEXP (x, 1);
3699 if (CONSTANT_P (xop0) && !symbol_mentioned_p (xop0))
3700 xop0 = force_reg (SImode, xop0);
3702 if (CONSTANT_P (xop1) && !symbol_mentioned_p (xop1))
3703 xop1 = force_reg (SImode, xop1);
3705 if (ARM_BASE_REGISTER_RTX_P (xop0)
3706 && GET_CODE (xop1) == CONST_INT)
3708 HOST_WIDE_INT n, low_n;
3709 rtx base_reg, val;
3710 n = INTVAL (xop1);
3712 /* VFP addressing modes actually allow greater offsets, but for
3713 now we just stick with the lowest common denominator. */
3714 if (mode == DImode
3715 || ((TARGET_SOFT_FLOAT || TARGET_VFP) && mode == DFmode))
3717 low_n = n & 0x0f;
3718 n &= ~0x0f;
3719 if (low_n > 4)
3721 n += 16;
3722 low_n -= 16;
3725 else
3727 low_n = ((mode) == TImode ? 0
3728 : n >= 0 ? (n & 0xfff) : -((-n) & 0xfff));
3729 n -= low_n;
3732 base_reg = gen_reg_rtx (SImode);
3733 val = force_operand (gen_rtx_PLUS (SImode, xop0,
3734 GEN_INT (n)), NULL_RTX);
3735 emit_move_insn (base_reg, val);
3736 x = (low_n == 0 ? base_reg
3737 : gen_rtx_PLUS (SImode, base_reg, GEN_INT (low_n)));
3739 else if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
3740 x = gen_rtx_PLUS (SImode, xop0, xop1);
3743 /* XXX We don't allow MINUS any more -- see comment in
3744 arm_legitimate_address_p (). */
3745 else if (GET_CODE (x) == MINUS)
3747 rtx xop0 = XEXP (x, 0);
3748 rtx xop1 = XEXP (x, 1);
3750 if (CONSTANT_P (xop0))
3751 xop0 = force_reg (SImode, xop0);
3753 if (CONSTANT_P (xop1) && ! symbol_mentioned_p (xop1))
3754 xop1 = force_reg (SImode, xop1);
3756 if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
3757 x = gen_rtx_MINUS (SImode, xop0, xop1);
3760 if (flag_pic)
3762 /* We need to find and carefully transform any SYMBOL and LABEL
3763 references; so go back to the original address expression. */
3764 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
3766 if (new_x != orig_x)
3767 x = new_x;
3770 return x;
3774 /* Try machine-dependent ways of modifying an illegitimate Thumb address
3775 to be legitimate. If we find one, return the new, valid address. */
3777 thumb_legitimize_address (rtx x, rtx orig_x, enum machine_mode mode)
3779 if (GET_CODE (x) == PLUS
3780 && GET_CODE (XEXP (x, 1)) == CONST_INT
3781 && (INTVAL (XEXP (x, 1)) >= 32 * GET_MODE_SIZE (mode)
3782 || INTVAL (XEXP (x, 1)) < 0))
3784 rtx xop0 = XEXP (x, 0);
3785 rtx xop1 = XEXP (x, 1);
3786 HOST_WIDE_INT offset = INTVAL (xop1);
3788 /* Try and fold the offset into a biasing of the base register and
3789 then offsetting that. Don't do this when optimizing for space
3790 since it can cause too many CSEs. */
3791 if (optimize_size && offset >= 0
3792 && offset < 256 + 31 * GET_MODE_SIZE (mode))
3794 HOST_WIDE_INT delta;
3796 if (offset >= 256)
3797 delta = offset - (256 - GET_MODE_SIZE (mode));
3798 else if (offset < 32 * GET_MODE_SIZE (mode) + 8)
3799 delta = 31 * GET_MODE_SIZE (mode);
3800 else
3801 delta = offset & (~31 * GET_MODE_SIZE (mode));
3803 xop0 = force_operand (plus_constant (xop0, offset - delta),
3804 NULL_RTX);
3805 x = plus_constant (xop0, delta);
3807 else if (offset < 0 && offset > -256)
3808 /* Small negative offsets are best done with a subtract before the
3809 dereference, forcing these into a register normally takes two
3810 instructions. */
3811 x = force_operand (x, NULL_RTX);
3812 else
3814 /* For the remaining cases, force the constant into a register. */
3815 xop1 = force_reg (SImode, xop1);
3816 x = gen_rtx_PLUS (SImode, xop0, xop1);
3819 else if (GET_CODE (x) == PLUS
3820 && s_register_operand (XEXP (x, 1), SImode)
3821 && !s_register_operand (XEXP (x, 0), SImode))
3823 rtx xop0 = force_operand (XEXP (x, 0), NULL_RTX);
3825 x = gen_rtx_PLUS (SImode, xop0, XEXP (x, 1));
3828 if (flag_pic)
3830 /* We need to find and carefully transform any SYMBOL and LABEL
3831 references; so go back to the original address expression. */
3832 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
3834 if (new_x != orig_x)
3835 x = new_x;
3838 return x;
3843 #define REG_OR_SUBREG_REG(X) \
3844 (GET_CODE (X) == REG \
3845 || (GET_CODE (X) == SUBREG && GET_CODE (SUBREG_REG (X)) == REG))
3847 #define REG_OR_SUBREG_RTX(X) \
3848 (GET_CODE (X) == REG ? (X) : SUBREG_REG (X))
3850 #ifndef COSTS_N_INSNS
3851 #define COSTS_N_INSNS(N) ((N) * 4 - 2)
3852 #endif
3853 static inline int
3854 thumb_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
3856 enum machine_mode mode = GET_MODE (x);
3858 switch (code)
3860 case ASHIFT:
3861 case ASHIFTRT:
3862 case LSHIFTRT:
3863 case ROTATERT:
3864 case PLUS:
3865 case MINUS:
3866 case COMPARE:
3867 case NEG:
3868 case NOT:
3869 return COSTS_N_INSNS (1);
3871 case MULT:
3872 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
3874 int cycles = 0;
3875 unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
3877 while (i)
3879 i >>= 2;
3880 cycles++;
3882 return COSTS_N_INSNS (2) + cycles;
3884 return COSTS_N_INSNS (1) + 16;
3886 case SET:
3887 return (COSTS_N_INSNS (1)
3888 + 4 * ((GET_CODE (SET_SRC (x)) == MEM)
3889 + GET_CODE (SET_DEST (x)) == MEM));
3891 case CONST_INT:
3892 if (outer == SET)
3894 if ((unsigned HOST_WIDE_INT) INTVAL (x) < 256)
3895 return 0;
3896 if (thumb_shiftable_const (INTVAL (x)))
3897 return COSTS_N_INSNS (2);
3898 return COSTS_N_INSNS (3);
3900 else if ((outer == PLUS || outer == COMPARE)
3901 && INTVAL (x) < 256 && INTVAL (x) > -256)
3902 return 0;
3903 else if (outer == AND
3904 && INTVAL (x) < 256 && INTVAL (x) >= -256)
3905 return COSTS_N_INSNS (1);
3906 else if (outer == ASHIFT || outer == ASHIFTRT
3907 || outer == LSHIFTRT)
3908 return 0;
3909 return COSTS_N_INSNS (2);
3911 case CONST:
3912 case CONST_DOUBLE:
3913 case LABEL_REF:
3914 case SYMBOL_REF:
3915 return COSTS_N_INSNS (3);
3917 case UDIV:
3918 case UMOD:
3919 case DIV:
3920 case MOD:
3921 return 100;
3923 case TRUNCATE:
3924 return 99;
3926 case AND:
3927 case XOR:
3928 case IOR:
3929 /* XXX guess. */
3930 return 8;
3932 case MEM:
3933 /* XXX another guess. */
3934 /* Memory costs quite a lot for the first word, but subsequent words
3935 load at the equivalent of a single insn each. */
3936 return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
3937 + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
3938 ? 4 : 0));
3940 case IF_THEN_ELSE:
3941 /* XXX a guess. */
3942 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
3943 return 14;
3944 return 2;
3946 case ZERO_EXTEND:
3947 /* XXX still guessing. */
3948 switch (GET_MODE (XEXP (x, 0)))
3950 case QImode:
3951 return (1 + (mode == DImode ? 4 : 0)
3952 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
3954 case HImode:
3955 return (4 + (mode == DImode ? 4 : 0)
3956 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
3958 case SImode:
3959 return (1 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
3961 default:
3962 return 99;
3965 default:
3966 return 99;
3971 /* Worker routine for arm_rtx_costs. */
3972 static inline int
3973 arm_rtx_costs_1 (rtx x, enum rtx_code code, enum rtx_code outer)
3975 enum machine_mode mode = GET_MODE (x);
3976 enum rtx_code subcode;
3977 int extra_cost;
3979 switch (code)
3981 case MEM:
3982 /* Memory costs quite a lot for the first word, but subsequent words
3983 load at the equivalent of a single insn each. */
3984 return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
3985 + (GET_CODE (x) == SYMBOL_REF
3986 && CONSTANT_POOL_ADDRESS_P (x) ? 4 : 0));
3988 case DIV:
3989 case MOD:
3990 case UDIV:
3991 case UMOD:
3992 return optimize_size ? COSTS_N_INSNS (2) : 100;
3994 case ROTATE:
3995 if (mode == SImode && GET_CODE (XEXP (x, 1)) == REG)
3996 return 4;
3997 /* Fall through */
3998 case ROTATERT:
3999 if (mode != SImode)
4000 return 8;
4001 /* Fall through */
4002 case ASHIFT: case LSHIFTRT: case ASHIFTRT:
4003 if (mode == DImode)
4004 return (8 + (GET_CODE (XEXP (x, 1)) == CONST_INT ? 0 : 8)
4005 + ((GET_CODE (XEXP (x, 0)) == REG
4006 || (GET_CODE (XEXP (x, 0)) == SUBREG
4007 && GET_CODE (SUBREG_REG (XEXP (x, 0))) == REG))
4008 ? 0 : 8));
4009 return (1 + ((GET_CODE (XEXP (x, 0)) == REG
4010 || (GET_CODE (XEXP (x, 0)) == SUBREG
4011 && GET_CODE (SUBREG_REG (XEXP (x, 0))) == REG))
4012 ? 0 : 4)
4013 + ((GET_CODE (XEXP (x, 1)) == REG
4014 || (GET_CODE (XEXP (x, 1)) == SUBREG
4015 && GET_CODE (SUBREG_REG (XEXP (x, 1))) == REG)
4016 || (GET_CODE (XEXP (x, 1)) == CONST_INT))
4017 ? 0 : 4));
4019 case MINUS:
4020 if (mode == DImode)
4021 return (4 + (REG_OR_SUBREG_REG (XEXP (x, 1)) ? 0 : 8)
4022 + ((REG_OR_SUBREG_REG (XEXP (x, 0))
4023 || (GET_CODE (XEXP (x, 0)) == CONST_INT
4024 && const_ok_for_arm (INTVAL (XEXP (x, 0)))))
4025 ? 0 : 8));
4027 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
4028 return (2 + ((REG_OR_SUBREG_REG (XEXP (x, 1))
4029 || (GET_CODE (XEXP (x, 1)) == CONST_DOUBLE
4030 && arm_const_double_rtx (XEXP (x, 1))))
4031 ? 0 : 8)
4032 + ((REG_OR_SUBREG_REG (XEXP (x, 0))
4033 || (GET_CODE (XEXP (x, 0)) == CONST_DOUBLE
4034 && arm_const_double_rtx (XEXP (x, 0))))
4035 ? 0 : 8));
4037 if (((GET_CODE (XEXP (x, 0)) == CONST_INT
4038 && const_ok_for_arm (INTVAL (XEXP (x, 0)))
4039 && REG_OR_SUBREG_REG (XEXP (x, 1))))
4040 || (((subcode = GET_CODE (XEXP (x, 1))) == ASHIFT
4041 || subcode == ASHIFTRT || subcode == LSHIFTRT
4042 || subcode == ROTATE || subcode == ROTATERT
4043 || (subcode == MULT
4044 && GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT
4045 && ((INTVAL (XEXP (XEXP (x, 1), 1)) &
4046 (INTVAL (XEXP (XEXP (x, 1), 1)) - 1)) == 0)))
4047 && REG_OR_SUBREG_REG (XEXP (XEXP (x, 1), 0))
4048 && (REG_OR_SUBREG_REG (XEXP (XEXP (x, 1), 1))
4049 || GET_CODE (XEXP (XEXP (x, 1), 1)) == CONST_INT)
4050 && REG_OR_SUBREG_REG (XEXP (x, 0))))
4051 return 1;
4052 /* Fall through */
4054 case PLUS:
4055 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
4056 return (2 + (REG_OR_SUBREG_REG (XEXP (x, 0)) ? 0 : 8)
4057 + ((REG_OR_SUBREG_REG (XEXP (x, 1))
4058 || (GET_CODE (XEXP (x, 1)) == CONST_DOUBLE
4059 && arm_const_double_rtx (XEXP (x, 1))))
4060 ? 0 : 8));
4062 /* Fall through */
4063 case AND: case XOR: case IOR:
4064 extra_cost = 0;
4066 /* Normally the frame registers will be spilt into reg+const during
4067 reload, so it is a bad idea to combine them with other instructions,
4068 since then they might not be moved outside of loops. As a compromise
4069 we allow integration with ops that have a constant as their second
4070 operand. */
4071 if ((REG_OR_SUBREG_REG (XEXP (x, 0))
4072 && ARM_FRAME_RTX (REG_OR_SUBREG_RTX (XEXP (x, 0)))
4073 && GET_CODE (XEXP (x, 1)) != CONST_INT)
4074 || (REG_OR_SUBREG_REG (XEXP (x, 0))
4075 && ARM_FRAME_RTX (REG_OR_SUBREG_RTX (XEXP (x, 0)))))
4076 extra_cost = 4;
4078 if (mode == DImode)
4079 return (4 + extra_cost + (REG_OR_SUBREG_REG (XEXP (x, 0)) ? 0 : 8)
4080 + ((REG_OR_SUBREG_REG (XEXP (x, 1))
4081 || (GET_CODE (XEXP (x, 1)) == CONST_INT
4082 && const_ok_for_op (INTVAL (XEXP (x, 1)), code)))
4083 ? 0 : 8));
4085 if (REG_OR_SUBREG_REG (XEXP (x, 0)))
4086 return (1 + (GET_CODE (XEXP (x, 1)) == CONST_INT ? 0 : extra_cost)
4087 + ((REG_OR_SUBREG_REG (XEXP (x, 1))
4088 || (GET_CODE (XEXP (x, 1)) == CONST_INT
4089 && const_ok_for_op (INTVAL (XEXP (x, 1)), code)))
4090 ? 0 : 4));
4092 else if (REG_OR_SUBREG_REG (XEXP (x, 1)))
4093 return (1 + extra_cost
4094 + ((((subcode = GET_CODE (XEXP (x, 0))) == ASHIFT
4095 || subcode == LSHIFTRT || subcode == ASHIFTRT
4096 || subcode == ROTATE || subcode == ROTATERT
4097 || (subcode == MULT
4098 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
4099 && ((INTVAL (XEXP (XEXP (x, 0), 1)) &
4100 (INTVAL (XEXP (XEXP (x, 0), 1)) - 1)) == 0)))
4101 && (REG_OR_SUBREG_REG (XEXP (XEXP (x, 0), 0)))
4102 && ((REG_OR_SUBREG_REG (XEXP (XEXP (x, 0), 1)))
4103 || GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT))
4104 ? 0 : 4));
4106 return 8;
4108 case MULT:
4109 /* This should have been handled by the CPU specific routines. */
4110 gcc_unreachable ();
4112 case TRUNCATE:
4113 if (arm_arch3m && mode == SImode
4114 && GET_CODE (XEXP (x, 0)) == LSHIFTRT
4115 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
4116 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0))
4117 == GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)))
4118 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
4119 || GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND))
4120 return 8;
4121 return 99;
4123 case NEG:
4124 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
4125 return 4 + (REG_OR_SUBREG_REG (XEXP (x, 0)) ? 0 : 6);
4126 /* Fall through */
4127 case NOT:
4128 if (mode == DImode)
4129 return 4 + (REG_OR_SUBREG_REG (XEXP (x, 0)) ? 0 : 4);
4131 return 1 + (REG_OR_SUBREG_REG (XEXP (x, 0)) ? 0 : 4);
4133 case IF_THEN_ELSE:
4134 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
4135 return 14;
4136 return 2;
4138 case COMPARE:
4139 return 1;
4141 case ABS:
4142 return 4 + (mode == DImode ? 4 : 0);
4144 case SIGN_EXTEND:
4145 if (GET_MODE (XEXP (x, 0)) == QImode)
4146 return (4 + (mode == DImode ? 4 : 0)
4147 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
4148 /* Fall through */
4149 case ZERO_EXTEND:
4150 switch (GET_MODE (XEXP (x, 0)))
4152 case QImode:
4153 return (1 + (mode == DImode ? 4 : 0)
4154 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
4156 case HImode:
4157 return (4 + (mode == DImode ? 4 : 0)
4158 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
4160 case SImode:
4161 return (1 + (GET_CODE (XEXP (x, 0)) == MEM ? 10 : 0));
4163 case V8QImode:
4164 case V4HImode:
4165 case V2SImode:
4166 case V4QImode:
4167 case V2HImode:
4168 return 1;
4170 default:
4171 gcc_unreachable ();
4173 gcc_unreachable ();
4175 case CONST_INT:
4176 if (const_ok_for_arm (INTVAL (x)))
4177 return outer == SET ? 2 : -1;
4178 else if (outer == AND
4179 && const_ok_for_arm (~INTVAL (x)))
4180 return -1;
4181 else if ((outer == COMPARE
4182 || outer == PLUS || outer == MINUS)
4183 && const_ok_for_arm (-INTVAL (x)))
4184 return -1;
4185 else
4186 return 5;
4188 case CONST:
4189 case LABEL_REF:
4190 case SYMBOL_REF:
4191 return 6;
4193 case CONST_DOUBLE:
4194 if (arm_const_double_rtx (x))
4195 return outer == SET ? 2 : -1;
4196 else if ((outer == COMPARE || outer == PLUS)
4197 && neg_const_double_rtx_ok_for_fpa (x))
4198 return -1;
4199 return 7;
4201 default:
4202 return 99;
4206 /* RTX costs when optimizing for size. */
4207 static bool
4208 arm_size_rtx_costs (rtx x, int code, int outer_code, int *total)
4210 enum machine_mode mode = GET_MODE (x);
4212 if (TARGET_THUMB)
4214 /* XXX TBD. For now, use the standard costs. */
4215 *total = thumb_rtx_costs (x, code, outer_code);
4216 return true;
4219 switch (code)
4221 case MEM:
4222 /* A memory access costs 1 insn if the mode is small, or the address is
4223 a single register, otherwise it costs one insn per word. */
4224 if (REG_P (XEXP (x, 0)))
4225 *total = COSTS_N_INSNS (1);
4226 else
4227 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
4228 return true;
4230 case DIV:
4231 case MOD:
4232 case UDIV:
4233 case UMOD:
4234 /* Needs a libcall, so it costs about this. */
4235 *total = COSTS_N_INSNS (2);
4236 return false;
4238 case ROTATE:
4239 if (mode == SImode && GET_CODE (XEXP (x, 1)) == REG)
4241 *total = COSTS_N_INSNS (2) + rtx_cost (XEXP (x, 0), code);
4242 return true;
4244 /* Fall through */
4245 case ROTATERT:
4246 case ASHIFT:
4247 case LSHIFTRT:
4248 case ASHIFTRT:
4249 if (mode == DImode && GET_CODE (XEXP (x, 1)) == CONST_INT)
4251 *total = COSTS_N_INSNS (3) + rtx_cost (XEXP (x, 0), code);
4252 return true;
4254 else if (mode == SImode)
4256 *total = COSTS_N_INSNS (1) + rtx_cost (XEXP (x, 0), code);
4257 /* Slightly disparage register shifts, but not by much. */
4258 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
4259 *total += 1 + rtx_cost (XEXP (x, 1), code);
4260 return true;
4263 /* Needs a libcall. */
4264 *total = COSTS_N_INSNS (2);
4265 return false;
4267 case MINUS:
4268 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT)
4270 *total = COSTS_N_INSNS (1);
4271 return false;
4274 if (mode == SImode)
4276 enum rtx_code subcode0 = GET_CODE (XEXP (x, 0));
4277 enum rtx_code subcode1 = GET_CODE (XEXP (x, 1));
4279 if (subcode0 == ROTATE || subcode0 == ROTATERT || subcode0 == ASHIFT
4280 || subcode0 == LSHIFTRT || subcode0 == ASHIFTRT
4281 || subcode1 == ROTATE || subcode1 == ROTATERT
4282 || subcode1 == ASHIFT || subcode1 == LSHIFTRT
4283 || subcode1 == ASHIFTRT)
4285 /* It's just the cost of the two operands. */
4286 *total = 0;
4287 return false;
4290 *total = COSTS_N_INSNS (1);
4291 return false;
4294 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
4295 return false;
4297 case PLUS:
4298 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT)
4300 *total = COSTS_N_INSNS (1);
4301 return false;
4304 /* Fall through */
4305 case AND: case XOR: case IOR:
4306 if (mode == SImode)
4308 enum rtx_code subcode = GET_CODE (XEXP (x, 0));
4310 if (subcode == ROTATE || subcode == ROTATERT || subcode == ASHIFT
4311 || subcode == LSHIFTRT || subcode == ASHIFTRT
4312 || (code == AND && subcode == NOT))
4314 /* It's just the cost of the two operands. */
4315 *total = 0;
4316 return false;
4320 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
4321 return false;
4323 case MULT:
4324 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
4325 return false;
4327 case NEG:
4328 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT)
4329 *total = COSTS_N_INSNS (1);
4330 /* Fall through */
4331 case NOT:
4332 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
4334 return false;
4336 case IF_THEN_ELSE:
4337 *total = 0;
4338 return false;
4340 case COMPARE:
4341 if (cc_register (XEXP (x, 0), VOIDmode))
4342 * total = 0;
4343 else
4344 *total = COSTS_N_INSNS (1);
4345 return false;
4347 case ABS:
4348 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT)
4349 *total = COSTS_N_INSNS (1);
4350 else
4351 *total = COSTS_N_INSNS (1 + ARM_NUM_REGS (mode));
4352 return false;
4354 case SIGN_EXTEND:
4355 *total = 0;
4356 if (GET_MODE_SIZE (GET_MODE (XEXP (x, 0))) < 4)
4358 if (!(arm_arch4 && MEM_P (XEXP (x, 0))))
4359 *total += COSTS_N_INSNS (arm_arch6 ? 1 : 2);
4361 if (mode == DImode)
4362 *total += COSTS_N_INSNS (1);
4363 return false;
4365 case ZERO_EXTEND:
4366 *total = 0;
4367 if (!(arm_arch4 && MEM_P (XEXP (x, 0))))
4369 switch (GET_MODE (XEXP (x, 0)))
4371 case QImode:
4372 *total += COSTS_N_INSNS (1);
4373 break;
4375 case HImode:
4376 *total += COSTS_N_INSNS (arm_arch6 ? 1 : 2);
4378 case SImode:
4379 break;
4381 default:
4382 *total += COSTS_N_INSNS (2);
4386 if (mode == DImode)
4387 *total += COSTS_N_INSNS (1);
4389 return false;
4391 case CONST_INT:
4392 if (const_ok_for_arm (INTVAL (x)))
4393 *total = COSTS_N_INSNS (outer_code == SET ? 1 : 0);
4394 else if (const_ok_for_arm (~INTVAL (x)))
4395 *total = COSTS_N_INSNS (outer_code == AND ? 0 : 1);
4396 else if (const_ok_for_arm (-INTVAL (x)))
4398 if (outer_code == COMPARE || outer_code == PLUS
4399 || outer_code == MINUS)
4400 *total = 0;
4401 else
4402 *total = COSTS_N_INSNS (1);
4404 else
4405 *total = COSTS_N_INSNS (2);
4406 return true;
4408 case CONST:
4409 case LABEL_REF:
4410 case SYMBOL_REF:
4411 *total = COSTS_N_INSNS (2);
4412 return true;
4414 case CONST_DOUBLE:
4415 *total = COSTS_N_INSNS (4);
4416 return true;
4418 default:
4419 if (mode != VOIDmode)
4420 *total = COSTS_N_INSNS (ARM_NUM_REGS (mode));
4421 else
4422 *total = COSTS_N_INSNS (4); /* How knows? */
4423 return false;
4427 /* RTX costs for cores with a slow MUL implementation. */
4429 static bool
4430 arm_slowmul_rtx_costs (rtx x, int code, int outer_code, int *total)
4432 enum machine_mode mode = GET_MODE (x);
4434 if (TARGET_THUMB)
4436 *total = thumb_rtx_costs (x, code, outer_code);
4437 return true;
4440 switch (code)
4442 case MULT:
4443 if (GET_MODE_CLASS (mode) == MODE_FLOAT
4444 || mode == DImode)
4446 *total = 30;
4447 return true;
4450 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
4452 unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
4453 & (unsigned HOST_WIDE_INT) 0xffffffff);
4454 int cost, const_ok = const_ok_for_arm (i);
4455 int j, booth_unit_size;
4457 /* Tune as appropriate. */
4458 cost = const_ok ? 4 : 8;
4459 booth_unit_size = 2;
4460 for (j = 0; i && j < 32; j += booth_unit_size)
4462 i >>= booth_unit_size;
4463 cost += 2;
4466 *total = cost;
4467 return true;
4470 *total = 30 + (REG_OR_SUBREG_REG (XEXP (x, 0)) ? 0 : 4)
4471 + (REG_OR_SUBREG_REG (XEXP (x, 1)) ? 0 : 4);
4472 return true;
4474 default:
4475 *total = arm_rtx_costs_1 (x, code, outer_code);
4476 return true;
4481 /* RTX cost for cores with a fast multiply unit (M variants). */
4483 static bool
4484 arm_fastmul_rtx_costs (rtx x, int code, int outer_code, int *total)
4486 enum machine_mode mode = GET_MODE (x);
4488 if (TARGET_THUMB)
4490 *total = thumb_rtx_costs (x, code, outer_code);
4491 return true;
4494 switch (code)
4496 case MULT:
4497 /* There is no point basing this on the tuning, since it is always the
4498 fast variant if it exists at all. */
4499 if (mode == DImode
4500 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
4501 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
4502 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
4504 *total = 8;
4505 return true;
4509 if (GET_MODE_CLASS (mode) == MODE_FLOAT
4510 || mode == DImode)
4512 *total = 30;
4513 return true;
4516 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
4518 unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
4519 & (unsigned HOST_WIDE_INT) 0xffffffff);
4520 int cost, const_ok = const_ok_for_arm (i);
4521 int j, booth_unit_size;
4523 /* Tune as appropriate. */
4524 cost = const_ok ? 4 : 8;
4525 booth_unit_size = 8;
4526 for (j = 0; i && j < 32; j += booth_unit_size)
4528 i >>= booth_unit_size;
4529 cost += 2;
4532 *total = cost;
4533 return true;
4536 *total = 8 + (REG_OR_SUBREG_REG (XEXP (x, 0)) ? 0 : 4)
4537 + (REG_OR_SUBREG_REG (XEXP (x, 1)) ? 0 : 4);
4538 return true;
4540 default:
4541 *total = arm_rtx_costs_1 (x, code, outer_code);
4542 return true;
4547 /* RTX cost for XScale CPUs. */
4549 static bool
4550 arm_xscale_rtx_costs (rtx x, int code, int outer_code, int *total)
4552 enum machine_mode mode = GET_MODE (x);
4554 if (TARGET_THUMB)
4556 *total = thumb_rtx_costs (x, code, outer_code);
4557 return true;
4560 switch (code)
4562 case MULT:
4563 /* There is no point basing this on the tuning, since it is always the
4564 fast variant if it exists at all. */
4565 if (mode == DImode
4566 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
4567 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
4568 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
4570 *total = 8;
4571 return true;
4575 if (GET_MODE_CLASS (mode) == MODE_FLOAT
4576 || mode == DImode)
4578 *total = 30;
4579 return true;
4582 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
4584 unsigned HOST_WIDE_INT i = (INTVAL (XEXP (x, 1))
4585 & (unsigned HOST_WIDE_INT) 0xffffffff);
4586 int cost, const_ok = const_ok_for_arm (i);
4587 unsigned HOST_WIDE_INT masked_const;
4589 /* The cost will be related to two insns.
4590 First a load of the constant (MOV or LDR), then a multiply. */
4591 cost = 2;
4592 if (! const_ok)
4593 cost += 1; /* LDR is probably more expensive because
4594 of longer result latency. */
4595 masked_const = i & 0xffff8000;
4596 if (masked_const != 0 && masked_const != 0xffff8000)
4598 masked_const = i & 0xf8000000;
4599 if (masked_const == 0 || masked_const == 0xf8000000)
4600 cost += 1;
4601 else
4602 cost += 2;
4604 *total = cost;
4605 return true;
4608 *total = 8 + (REG_OR_SUBREG_REG (XEXP (x, 0)) ? 0 : 4)
4609 + (REG_OR_SUBREG_REG (XEXP (x, 1)) ? 0 : 4);
4610 return true;
4612 case COMPARE:
4613 /* A COMPARE of a MULT is slow on XScale; the muls instruction
4614 will stall until the multiplication is complete. */
4615 if (GET_CODE (XEXP (x, 0)) == MULT)
4616 *total = 4 + rtx_cost (XEXP (x, 0), code);
4617 else
4618 *total = arm_rtx_costs_1 (x, code, outer_code);
4619 return true;
4621 default:
4622 *total = arm_rtx_costs_1 (x, code, outer_code);
4623 return true;
4628 /* RTX costs for 9e (and later) cores. */
4630 static bool
4631 arm_9e_rtx_costs (rtx x, int code, int outer_code, int *total)
4633 enum machine_mode mode = GET_MODE (x);
4634 int nonreg_cost;
4635 int cost;
4637 if (TARGET_THUMB)
4639 switch (code)
4641 case MULT:
4642 *total = COSTS_N_INSNS (3);
4643 return true;
4645 default:
4646 *total = thumb_rtx_costs (x, code, outer_code);
4647 return true;
4651 switch (code)
4653 case MULT:
4654 /* There is no point basing this on the tuning, since it is always the
4655 fast variant if it exists at all. */
4656 if (mode == DImode
4657 && (GET_CODE (XEXP (x, 0)) == GET_CODE (XEXP (x, 1)))
4658 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
4659 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
4661 *total = 3;
4662 return true;
4666 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
4668 *total = 30;
4669 return true;
4671 if (mode == DImode)
4673 cost = 7;
4674 nonreg_cost = 8;
4676 else
4678 cost = 2;
4679 nonreg_cost = 4;
4683 *total = cost + (REG_OR_SUBREG_REG (XEXP (x, 0)) ? 0 : nonreg_cost)
4684 + (REG_OR_SUBREG_REG (XEXP (x, 1)) ? 0 : nonreg_cost);
4685 return true;
4687 default:
4688 *total = arm_rtx_costs_1 (x, code, outer_code);
4689 return true;
4692 /* All address computations that can be done are free, but rtx cost returns
4693 the same for practically all of them. So we weight the different types
4694 of address here in the order (most pref first):
4695 PRE/POST_INC/DEC, SHIFT or NON-INT sum, INT sum, REG, MEM or LABEL. */
4696 static inline int
4697 arm_arm_address_cost (rtx x)
4699 enum rtx_code c = GET_CODE (x);
4701 if (c == PRE_INC || c == PRE_DEC || c == POST_INC || c == POST_DEC)
4702 return 0;
4703 if (c == MEM || c == LABEL_REF || c == SYMBOL_REF)
4704 return 10;
4706 if (c == PLUS || c == MINUS)
4708 if (GET_CODE (XEXP (x, 0)) == CONST_INT)
4709 return 2;
4711 if (ARITHMETIC_P (XEXP (x, 0)) || ARITHMETIC_P (XEXP (x, 1)))
4712 return 3;
4714 return 4;
4717 return 6;
4720 static inline int
4721 arm_thumb_address_cost (rtx x)
4723 enum rtx_code c = GET_CODE (x);
4725 if (c == REG)
4726 return 1;
4727 if (c == PLUS
4728 && GET_CODE (XEXP (x, 0)) == REG
4729 && GET_CODE (XEXP (x, 1)) == CONST_INT)
4730 return 1;
4732 return 2;
4735 static int
4736 arm_address_cost (rtx x)
4738 return TARGET_ARM ? arm_arm_address_cost (x) : arm_thumb_address_cost (x);
4741 static int
4742 arm_adjust_cost (rtx insn, rtx link, rtx dep, int cost)
4744 rtx i_pat, d_pat;
4746 /* Some true dependencies can have a higher cost depending
4747 on precisely how certain input operands are used. */
4748 if (arm_tune_xscale
4749 && REG_NOTE_KIND (link) == 0
4750 && recog_memoized (insn) >= 0
4751 && recog_memoized (dep) >= 0)
4753 int shift_opnum = get_attr_shift (insn);
4754 enum attr_type attr_type = get_attr_type (dep);
4756 /* If nonzero, SHIFT_OPNUM contains the operand number of a shifted
4757 operand for INSN. If we have a shifted input operand and the
4758 instruction we depend on is another ALU instruction, then we may
4759 have to account for an additional stall. */
4760 if (shift_opnum != 0
4761 && (attr_type == TYPE_ALU_SHIFT || attr_type == TYPE_ALU_SHIFT_REG))
4763 rtx shifted_operand;
4764 int opno;
4766 /* Get the shifted operand. */
4767 extract_insn (insn);
4768 shifted_operand = recog_data.operand[shift_opnum];
4770 /* Iterate over all the operands in DEP. If we write an operand
4771 that overlaps with SHIFTED_OPERAND, then we have increase the
4772 cost of this dependency. */
4773 extract_insn (dep);
4774 preprocess_constraints ();
4775 for (opno = 0; opno < recog_data.n_operands; opno++)
4777 /* We can ignore strict inputs. */
4778 if (recog_data.operand_type[opno] == OP_IN)
4779 continue;
4781 if (reg_overlap_mentioned_p (recog_data.operand[opno],
4782 shifted_operand))
4783 return 2;
4788 /* XXX This is not strictly true for the FPA. */
4789 if (REG_NOTE_KIND (link) == REG_DEP_ANTI
4790 || REG_NOTE_KIND (link) == REG_DEP_OUTPUT)
4791 return 0;
4793 /* Call insns don't incur a stall, even if they follow a load. */
4794 if (REG_NOTE_KIND (link) == 0
4795 && GET_CODE (insn) == CALL_INSN)
4796 return 1;
4798 if ((i_pat = single_set (insn)) != NULL
4799 && GET_CODE (SET_SRC (i_pat)) == MEM
4800 && (d_pat = single_set (dep)) != NULL
4801 && GET_CODE (SET_DEST (d_pat)) == MEM)
4803 rtx src_mem = XEXP (SET_SRC (i_pat), 0);
4804 /* This is a load after a store, there is no conflict if the load reads
4805 from a cached area. Assume that loads from the stack, and from the
4806 constant pool are cached, and that others will miss. This is a
4807 hack. */
4809 if ((GET_CODE (src_mem) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (src_mem))
4810 || reg_mentioned_p (stack_pointer_rtx, src_mem)
4811 || reg_mentioned_p (frame_pointer_rtx, src_mem)
4812 || reg_mentioned_p (hard_frame_pointer_rtx, src_mem))
4813 return 1;
4816 return cost;
4819 static int fp_consts_inited = 0;
4821 /* Only zero is valid for VFP. Other values are also valid for FPA. */
4822 static const char * const strings_fp[8] =
4824 "0", "1", "2", "3",
4825 "4", "5", "0.5", "10"
4828 static REAL_VALUE_TYPE values_fp[8];
4830 static void
4831 init_fp_table (void)
4833 int i;
4834 REAL_VALUE_TYPE r;
4836 if (TARGET_VFP)
4837 fp_consts_inited = 1;
4838 else
4839 fp_consts_inited = 8;
4841 for (i = 0; i < fp_consts_inited; i++)
4843 r = REAL_VALUE_ATOF (strings_fp[i], DFmode);
4844 values_fp[i] = r;
4848 /* Return TRUE if rtx X is a valid immediate FP constant. */
4850 arm_const_double_rtx (rtx x)
4852 REAL_VALUE_TYPE r;
4853 int i;
4855 if (!fp_consts_inited)
4856 init_fp_table ();
4858 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
4859 if (REAL_VALUE_MINUS_ZERO (r))
4860 return 0;
4862 for (i = 0; i < fp_consts_inited; i++)
4863 if (REAL_VALUES_EQUAL (r, values_fp[i]))
4864 return 1;
4866 return 0;
4869 /* Return TRUE if rtx X is a valid immediate FPA constant. */
4871 neg_const_double_rtx_ok_for_fpa (rtx x)
4873 REAL_VALUE_TYPE r;
4874 int i;
4876 if (!fp_consts_inited)
4877 init_fp_table ();
4879 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
4880 r = REAL_VALUE_NEGATE (r);
4881 if (REAL_VALUE_MINUS_ZERO (r))
4882 return 0;
4884 for (i = 0; i < 8; i++)
4885 if (REAL_VALUES_EQUAL (r, values_fp[i]))
4886 return 1;
4888 return 0;
4891 /* Predicates for `match_operand' and `match_operator'. */
4893 /* Return nonzero if OP is a valid Cirrus memory address pattern. */
4895 cirrus_memory_offset (rtx op)
4897 /* Reject eliminable registers. */
4898 if (! (reload_in_progress || reload_completed)
4899 && ( reg_mentioned_p (frame_pointer_rtx, op)
4900 || reg_mentioned_p (arg_pointer_rtx, op)
4901 || reg_mentioned_p (virtual_incoming_args_rtx, op)
4902 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
4903 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
4904 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
4905 return 0;
4907 if (GET_CODE (op) == MEM)
4909 rtx ind;
4911 ind = XEXP (op, 0);
4913 /* Match: (mem (reg)). */
4914 if (GET_CODE (ind) == REG)
4915 return 1;
4917 /* Match:
4918 (mem (plus (reg)
4919 (const))). */
4920 if (GET_CODE (ind) == PLUS
4921 && GET_CODE (XEXP (ind, 0)) == REG
4922 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
4923 && GET_CODE (XEXP (ind, 1)) == CONST_INT)
4924 return 1;
4927 return 0;
4930 /* Return TRUE if OP is a valid VFP memory address pattern.
4931 WB if true if writeback address modes are allowed. */
4934 arm_coproc_mem_operand (rtx op, bool wb)
4936 rtx ind;
4938 /* Reject eliminable registers. */
4939 if (! (reload_in_progress || reload_completed)
4940 && ( reg_mentioned_p (frame_pointer_rtx, op)
4941 || reg_mentioned_p (arg_pointer_rtx, op)
4942 || reg_mentioned_p (virtual_incoming_args_rtx, op)
4943 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
4944 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
4945 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
4946 return FALSE;
4948 /* Constants are converted into offsets from labels. */
4949 if (GET_CODE (op) != MEM)
4950 return FALSE;
4952 ind = XEXP (op, 0);
4954 if (reload_completed
4955 && (GET_CODE (ind) == LABEL_REF
4956 || (GET_CODE (ind) == CONST
4957 && GET_CODE (XEXP (ind, 0)) == PLUS
4958 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
4959 && GET_CODE (XEXP (XEXP (ind, 0), 1)) == CONST_INT)))
4960 return TRUE;
4962 /* Match: (mem (reg)). */
4963 if (GET_CODE (ind) == REG)
4964 return arm_address_register_rtx_p (ind, 0);
4966 /* Autoincremment addressing modes. */
4967 if (wb
4968 && (GET_CODE (ind) == PRE_INC
4969 || GET_CODE (ind) == POST_INC
4970 || GET_CODE (ind) == PRE_DEC
4971 || GET_CODE (ind) == POST_DEC))
4972 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
4974 if (wb
4975 && (GET_CODE (ind) == POST_MODIFY || GET_CODE (ind) == PRE_MODIFY)
4976 && arm_address_register_rtx_p (XEXP (ind, 0), 0)
4977 && GET_CODE (XEXP (ind, 1)) == PLUS
4978 && rtx_equal_p (XEXP (XEXP (ind, 1), 0), XEXP (ind, 0)))
4979 ind = XEXP (ind, 1);
4981 /* Match:
4982 (plus (reg)
4983 (const)). */
4984 if (GET_CODE (ind) == PLUS
4985 && GET_CODE (XEXP (ind, 0)) == REG
4986 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
4987 && GET_CODE (XEXP (ind, 1)) == CONST_INT
4988 && INTVAL (XEXP (ind, 1)) > -1024
4989 && INTVAL (XEXP (ind, 1)) < 1024
4990 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
4991 return TRUE;
4993 return FALSE;
4996 /* Return true if X is a register that will be eliminated later on. */
4998 arm_eliminable_register (rtx x)
5000 return REG_P (x) && (REGNO (x) == FRAME_POINTER_REGNUM
5001 || REGNO (x) == ARG_POINTER_REGNUM
5002 || (REGNO (x) >= FIRST_VIRTUAL_REGISTER
5003 && REGNO (x) <= LAST_VIRTUAL_REGISTER));
5006 /* Return GENERAL_REGS if a scratch register required to reload x to/from
5007 VFP registers. Otherwise return NO_REGS. */
5009 enum reg_class
5010 vfp_secondary_reload_class (enum machine_mode mode, rtx x)
5012 if (arm_coproc_mem_operand (x, FALSE) || s_register_operand (x, mode))
5013 return NO_REGS;
5015 return GENERAL_REGS;
5018 /* Values which must be returned in the most-significant end of the return
5019 register. */
5021 static bool
5022 arm_return_in_msb (tree valtype)
5024 return (TARGET_AAPCS_BASED
5025 && BYTES_BIG_ENDIAN
5026 && (AGGREGATE_TYPE_P (valtype)
5027 || TREE_CODE (valtype) == COMPLEX_TYPE));
5030 /* Returns TRUE if INSN is an "LDR REG, ADDR" instruction.
5031 Use by the Cirrus Maverick code which has to workaround
5032 a hardware bug triggered by such instructions. */
5033 static bool
5034 arm_memory_load_p (rtx insn)
5036 rtx body, lhs, rhs;;
5038 if (insn == NULL_RTX || GET_CODE (insn) != INSN)
5039 return false;
5041 body = PATTERN (insn);
5043 if (GET_CODE (body) != SET)
5044 return false;
5046 lhs = XEXP (body, 0);
5047 rhs = XEXP (body, 1);
5049 lhs = REG_OR_SUBREG_RTX (lhs);
5051 /* If the destination is not a general purpose
5052 register we do not have to worry. */
5053 if (GET_CODE (lhs) != REG
5054 || REGNO_REG_CLASS (REGNO (lhs)) != GENERAL_REGS)
5055 return false;
5057 /* As well as loads from memory we also have to react
5058 to loads of invalid constants which will be turned
5059 into loads from the minipool. */
5060 return (GET_CODE (rhs) == MEM
5061 || GET_CODE (rhs) == SYMBOL_REF
5062 || note_invalid_constants (insn, -1, false));
5065 /* Return TRUE if INSN is a Cirrus instruction. */
5066 static bool
5067 arm_cirrus_insn_p (rtx insn)
5069 enum attr_cirrus attr;
5071 /* get_attr cannot accept USE or CLOBBER. */
5072 if (!insn
5073 || GET_CODE (insn) != INSN
5074 || GET_CODE (PATTERN (insn)) == USE
5075 || GET_CODE (PATTERN (insn)) == CLOBBER)
5076 return 0;
5078 attr = get_attr_cirrus (insn);
5080 return attr != CIRRUS_NOT;
5083 /* Cirrus reorg for invalid instruction combinations. */
5084 static void
5085 cirrus_reorg (rtx first)
5087 enum attr_cirrus attr;
5088 rtx body = PATTERN (first);
5089 rtx t;
5090 int nops;
5092 /* Any branch must be followed by 2 non Cirrus instructions. */
5093 if (GET_CODE (first) == JUMP_INSN && GET_CODE (body) != RETURN)
5095 nops = 0;
5096 t = next_nonnote_insn (first);
5098 if (arm_cirrus_insn_p (t))
5099 ++ nops;
5101 if (arm_cirrus_insn_p (next_nonnote_insn (t)))
5102 ++ nops;
5104 while (nops --)
5105 emit_insn_after (gen_nop (), first);
5107 return;
5110 /* (float (blah)) is in parallel with a clobber. */
5111 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
5112 body = XVECEXP (body, 0, 0);
5114 if (GET_CODE (body) == SET)
5116 rtx lhs = XEXP (body, 0), rhs = XEXP (body, 1);
5118 /* cfldrd, cfldr64, cfstrd, cfstr64 must
5119 be followed by a non Cirrus insn. */
5120 if (get_attr_cirrus (first) == CIRRUS_DOUBLE)
5122 if (arm_cirrus_insn_p (next_nonnote_insn (first)))
5123 emit_insn_after (gen_nop (), first);
5125 return;
5127 else if (arm_memory_load_p (first))
5129 unsigned int arm_regno;
5131 /* Any ldr/cfmvdlr, ldr/cfmvdhr, ldr/cfmvsr, ldr/cfmv64lr,
5132 ldr/cfmv64hr combination where the Rd field is the same
5133 in both instructions must be split with a non Cirrus
5134 insn. Example:
5136 ldr r0, blah
5138 cfmvsr mvf0, r0. */
5140 /* Get Arm register number for ldr insn. */
5141 if (GET_CODE (lhs) == REG)
5142 arm_regno = REGNO (lhs);
5143 else
5145 gcc_assert (GET_CODE (rhs) == REG);
5146 arm_regno = REGNO (rhs);
5149 /* Next insn. */
5150 first = next_nonnote_insn (first);
5152 if (! arm_cirrus_insn_p (first))
5153 return;
5155 body = PATTERN (first);
5157 /* (float (blah)) is in parallel with a clobber. */
5158 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0))
5159 body = XVECEXP (body, 0, 0);
5161 if (GET_CODE (body) == FLOAT)
5162 body = XEXP (body, 0);
5164 if (get_attr_cirrus (first) == CIRRUS_MOVE
5165 && GET_CODE (XEXP (body, 1)) == REG
5166 && arm_regno == REGNO (XEXP (body, 1)))
5167 emit_insn_after (gen_nop (), first);
5169 return;
5173 /* get_attr cannot accept USE or CLOBBER. */
5174 if (!first
5175 || GET_CODE (first) != INSN
5176 || GET_CODE (PATTERN (first)) == USE
5177 || GET_CODE (PATTERN (first)) == CLOBBER)
5178 return;
5180 attr = get_attr_cirrus (first);
5182 /* Any coprocessor compare instruction (cfcmps, cfcmpd, ...)
5183 must be followed by a non-coprocessor instruction. */
5184 if (attr == CIRRUS_COMPARE)
5186 nops = 0;
5188 t = next_nonnote_insn (first);
5190 if (arm_cirrus_insn_p (t))
5191 ++ nops;
5193 if (arm_cirrus_insn_p (next_nonnote_insn (t)))
5194 ++ nops;
5196 while (nops --)
5197 emit_insn_after (gen_nop (), first);
5199 return;
5203 /* Return TRUE if X references a SYMBOL_REF. */
5205 symbol_mentioned_p (rtx x)
5207 const char * fmt;
5208 int i;
5210 if (GET_CODE (x) == SYMBOL_REF)
5211 return 1;
5213 fmt = GET_RTX_FORMAT (GET_CODE (x));
5215 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
5217 if (fmt[i] == 'E')
5219 int j;
5221 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
5222 if (symbol_mentioned_p (XVECEXP (x, i, j)))
5223 return 1;
5225 else if (fmt[i] == 'e' && symbol_mentioned_p (XEXP (x, i)))
5226 return 1;
5229 return 0;
5232 /* Return TRUE if X references a LABEL_REF. */
5234 label_mentioned_p (rtx x)
5236 const char * fmt;
5237 int i;
5239 if (GET_CODE (x) == LABEL_REF)
5240 return 1;
5242 fmt = GET_RTX_FORMAT (GET_CODE (x));
5243 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
5245 if (fmt[i] == 'E')
5247 int j;
5249 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
5250 if (label_mentioned_p (XVECEXP (x, i, j)))
5251 return 1;
5253 else if (fmt[i] == 'e' && label_mentioned_p (XEXP (x, i)))
5254 return 1;
5257 return 0;
5260 enum rtx_code
5261 minmax_code (rtx x)
5263 enum rtx_code code = GET_CODE (x);
5265 switch (code)
5267 case SMAX:
5268 return GE;
5269 case SMIN:
5270 return LE;
5271 case UMIN:
5272 return LEU;
5273 case UMAX:
5274 return GEU;
5275 default:
5276 gcc_unreachable ();
5280 /* Return 1 if memory locations are adjacent. */
5282 adjacent_mem_locations (rtx a, rtx b)
5284 /* We don't guarantee to preserve the order of these memory refs. */
5285 if (volatile_refs_p (a) || volatile_refs_p (b))
5286 return 0;
5288 if ((GET_CODE (XEXP (a, 0)) == REG
5289 || (GET_CODE (XEXP (a, 0)) == PLUS
5290 && GET_CODE (XEXP (XEXP (a, 0), 1)) == CONST_INT))
5291 && (GET_CODE (XEXP (b, 0)) == REG
5292 || (GET_CODE (XEXP (b, 0)) == PLUS
5293 && GET_CODE (XEXP (XEXP (b, 0), 1)) == CONST_INT)))
5295 HOST_WIDE_INT val0 = 0, val1 = 0;
5296 rtx reg0, reg1;
5297 int val_diff;
5299 if (GET_CODE (XEXP (a, 0)) == PLUS)
5301 reg0 = XEXP (XEXP (a, 0), 0);
5302 val0 = INTVAL (XEXP (XEXP (a, 0), 1));
5304 else
5305 reg0 = XEXP (a, 0);
5307 if (GET_CODE (XEXP (b, 0)) == PLUS)
5309 reg1 = XEXP (XEXP (b, 0), 0);
5310 val1 = INTVAL (XEXP (XEXP (b, 0), 1));
5312 else
5313 reg1 = XEXP (b, 0);
5315 /* Don't accept any offset that will require multiple
5316 instructions to handle, since this would cause the
5317 arith_adjacentmem pattern to output an overlong sequence. */
5318 if (!const_ok_for_op (PLUS, val0) || !const_ok_for_op (PLUS, val1))
5319 return 0;
5321 /* Don't allow an eliminable register: register elimination can make
5322 the offset too large. */
5323 if (arm_eliminable_register (reg0))
5324 return 0;
5326 val_diff = val1 - val0;
5328 if (arm_ld_sched)
5330 /* If the target has load delay slots, then there's no benefit
5331 to using an ldm instruction unless the offset is zero and
5332 we are optimizing for size. */
5333 return (optimize_size && (REGNO (reg0) == REGNO (reg1))
5334 && (val0 == 0 || val1 == 0 || val0 == 4 || val1 == 4)
5335 && (val_diff == 4 || val_diff == -4));
5338 return ((REGNO (reg0) == REGNO (reg1))
5339 && (val_diff == 4 || val_diff == -4));
5342 return 0;
5346 load_multiple_sequence (rtx *operands, int nops, int *regs, int *base,
5347 HOST_WIDE_INT *load_offset)
5349 int unsorted_regs[4];
5350 HOST_WIDE_INT unsorted_offsets[4];
5351 int order[4];
5352 int base_reg = -1;
5353 int i;
5355 /* Can only handle 2, 3, or 4 insns at present,
5356 though could be easily extended if required. */
5357 gcc_assert (nops >= 2 && nops <= 4);
5359 /* Loop over the operands and check that the memory references are
5360 suitable (i.e. immediate offsets from the same base register). At
5361 the same time, extract the target register, and the memory
5362 offsets. */
5363 for (i = 0; i < nops; i++)
5365 rtx reg;
5366 rtx offset;
5368 /* Convert a subreg of a mem into the mem itself. */
5369 if (GET_CODE (operands[nops + i]) == SUBREG)
5370 operands[nops + i] = alter_subreg (operands + (nops + i));
5372 gcc_assert (GET_CODE (operands[nops + i]) == MEM);
5374 /* Don't reorder volatile memory references; it doesn't seem worth
5375 looking for the case where the order is ok anyway. */
5376 if (MEM_VOLATILE_P (operands[nops + i]))
5377 return 0;
5379 offset = const0_rtx;
5381 if ((GET_CODE (reg = XEXP (operands[nops + i], 0)) == REG
5382 || (GET_CODE (reg) == SUBREG
5383 && GET_CODE (reg = SUBREG_REG (reg)) == REG))
5384 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
5385 && ((GET_CODE (reg = XEXP (XEXP (operands[nops + i], 0), 0))
5386 == REG)
5387 || (GET_CODE (reg) == SUBREG
5388 && GET_CODE (reg = SUBREG_REG (reg)) == REG))
5389 && (GET_CODE (offset = XEXP (XEXP (operands[nops + i], 0), 1))
5390 == CONST_INT)))
5392 if (i == 0)
5394 base_reg = REGNO (reg);
5395 unsorted_regs[0] = (GET_CODE (operands[i]) == REG
5396 ? REGNO (operands[i])
5397 : REGNO (SUBREG_REG (operands[i])));
5398 order[0] = 0;
5400 else
5402 if (base_reg != (int) REGNO (reg))
5403 /* Not addressed from the same base register. */
5404 return 0;
5406 unsorted_regs[i] = (GET_CODE (operands[i]) == REG
5407 ? REGNO (operands[i])
5408 : REGNO (SUBREG_REG (operands[i])));
5409 if (unsorted_regs[i] < unsorted_regs[order[0]])
5410 order[0] = i;
5413 /* If it isn't an integer register, or if it overwrites the
5414 base register but isn't the last insn in the list, then
5415 we can't do this. */
5416 if (unsorted_regs[i] < 0 || unsorted_regs[i] > 14
5417 || (i != nops - 1 && unsorted_regs[i] == base_reg))
5418 return 0;
5420 unsorted_offsets[i] = INTVAL (offset);
5422 else
5423 /* Not a suitable memory address. */
5424 return 0;
5427 /* All the useful information has now been extracted from the
5428 operands into unsorted_regs and unsorted_offsets; additionally,
5429 order[0] has been set to the lowest numbered register in the
5430 list. Sort the registers into order, and check that the memory
5431 offsets are ascending and adjacent. */
5433 for (i = 1; i < nops; i++)
5435 int j;
5437 order[i] = order[i - 1];
5438 for (j = 0; j < nops; j++)
5439 if (unsorted_regs[j] > unsorted_regs[order[i - 1]]
5440 && (order[i] == order[i - 1]
5441 || unsorted_regs[j] < unsorted_regs[order[i]]))
5442 order[i] = j;
5444 /* Have we found a suitable register? if not, one must be used more
5445 than once. */
5446 if (order[i] == order[i - 1])
5447 return 0;
5449 /* Is the memory address adjacent and ascending? */
5450 if (unsorted_offsets[order[i]] != unsorted_offsets[order[i - 1]] + 4)
5451 return 0;
5454 if (base)
5456 *base = base_reg;
5458 for (i = 0; i < nops; i++)
5459 regs[i] = unsorted_regs[order[i]];
5461 *load_offset = unsorted_offsets[order[0]];
5464 if (unsorted_offsets[order[0]] == 0)
5465 return 1; /* ldmia */
5467 if (unsorted_offsets[order[0]] == 4)
5468 return 2; /* ldmib */
5470 if (unsorted_offsets[order[nops - 1]] == 0)
5471 return 3; /* ldmda */
5473 if (unsorted_offsets[order[nops - 1]] == -4)
5474 return 4; /* ldmdb */
5476 /* For ARM8,9 & StrongARM, 2 ldr instructions are faster than an ldm
5477 if the offset isn't small enough. The reason 2 ldrs are faster
5478 is because these ARMs are able to do more than one cache access
5479 in a single cycle. The ARM9 and StrongARM have Harvard caches,
5480 whilst the ARM8 has a double bandwidth cache. This means that
5481 these cores can do both an instruction fetch and a data fetch in
5482 a single cycle, so the trick of calculating the address into a
5483 scratch register (one of the result regs) and then doing a load
5484 multiple actually becomes slower (and no smaller in code size).
5485 That is the transformation
5487 ldr rd1, [rbase + offset]
5488 ldr rd2, [rbase + offset + 4]
5492 add rd1, rbase, offset
5493 ldmia rd1, {rd1, rd2}
5495 produces worse code -- '3 cycles + any stalls on rd2' instead of
5496 '2 cycles + any stalls on rd2'. On ARMs with only one cache
5497 access per cycle, the first sequence could never complete in less
5498 than 6 cycles, whereas the ldm sequence would only take 5 and
5499 would make better use of sequential accesses if not hitting the
5500 cache.
5502 We cheat here and test 'arm_ld_sched' which we currently know to
5503 only be true for the ARM8, ARM9 and StrongARM. If this ever
5504 changes, then the test below needs to be reworked. */
5505 if (nops == 2 && arm_ld_sched)
5506 return 0;
5508 /* Can't do it without setting up the offset, only do this if it takes
5509 no more than one insn. */
5510 return (const_ok_for_arm (unsorted_offsets[order[0]])
5511 || const_ok_for_arm (-unsorted_offsets[order[0]])) ? 5 : 0;
5514 const char *
5515 emit_ldm_seq (rtx *operands, int nops)
5517 int regs[4];
5518 int base_reg;
5519 HOST_WIDE_INT offset;
5520 char buf[100];
5521 int i;
5523 switch (load_multiple_sequence (operands, nops, regs, &base_reg, &offset))
5525 case 1:
5526 strcpy (buf, "ldm%?ia\t");
5527 break;
5529 case 2:
5530 strcpy (buf, "ldm%?ib\t");
5531 break;
5533 case 3:
5534 strcpy (buf, "ldm%?da\t");
5535 break;
5537 case 4:
5538 strcpy (buf, "ldm%?db\t");
5539 break;
5541 case 5:
5542 if (offset >= 0)
5543 sprintf (buf, "add%%?\t%s%s, %s%s, #%ld", REGISTER_PREFIX,
5544 reg_names[regs[0]], REGISTER_PREFIX, reg_names[base_reg],
5545 (long) offset);
5546 else
5547 sprintf (buf, "sub%%?\t%s%s, %s%s, #%ld", REGISTER_PREFIX,
5548 reg_names[regs[0]], REGISTER_PREFIX, reg_names[base_reg],
5549 (long) -offset);
5550 output_asm_insn (buf, operands);
5551 base_reg = regs[0];
5552 strcpy (buf, "ldm%?ia\t");
5553 break;
5555 default:
5556 gcc_unreachable ();
5559 sprintf (buf + strlen (buf), "%s%s, {%s%s", REGISTER_PREFIX,
5560 reg_names[base_reg], REGISTER_PREFIX, reg_names[regs[0]]);
5562 for (i = 1; i < nops; i++)
5563 sprintf (buf + strlen (buf), ", %s%s", REGISTER_PREFIX,
5564 reg_names[regs[i]]);
5566 strcat (buf, "}\t%@ phole ldm");
5568 output_asm_insn (buf, operands);
5569 return "";
5573 store_multiple_sequence (rtx *operands, int nops, int *regs, int *base,
5574 HOST_WIDE_INT * load_offset)
5576 int unsorted_regs[4];
5577 HOST_WIDE_INT unsorted_offsets[4];
5578 int order[4];
5579 int base_reg = -1;
5580 int i;
5582 /* Can only handle 2, 3, or 4 insns at present, though could be easily
5583 extended if required. */
5584 gcc_assert (nops >= 2 && nops <= 4);
5586 /* Loop over the operands and check that the memory references are
5587 suitable (i.e. immediate offsets from the same base register). At
5588 the same time, extract the target register, and the memory
5589 offsets. */
5590 for (i = 0; i < nops; i++)
5592 rtx reg;
5593 rtx offset;
5595 /* Convert a subreg of a mem into the mem itself. */
5596 if (GET_CODE (operands[nops + i]) == SUBREG)
5597 operands[nops + i] = alter_subreg (operands + (nops + i));
5599 gcc_assert (GET_CODE (operands[nops + i]) == MEM);
5601 /* Don't reorder volatile memory references; it doesn't seem worth
5602 looking for the case where the order is ok anyway. */
5603 if (MEM_VOLATILE_P (operands[nops + i]))
5604 return 0;
5606 offset = const0_rtx;
5608 if ((GET_CODE (reg = XEXP (operands[nops + i], 0)) == REG
5609 || (GET_CODE (reg) == SUBREG
5610 && GET_CODE (reg = SUBREG_REG (reg)) == REG))
5611 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
5612 && ((GET_CODE (reg = XEXP (XEXP (operands[nops + i], 0), 0))
5613 == REG)
5614 || (GET_CODE (reg) == SUBREG
5615 && GET_CODE (reg = SUBREG_REG (reg)) == REG))
5616 && (GET_CODE (offset = XEXP (XEXP (operands[nops + i], 0), 1))
5617 == CONST_INT)))
5619 if (i == 0)
5621 base_reg = REGNO (reg);
5622 unsorted_regs[0] = (GET_CODE (operands[i]) == REG
5623 ? REGNO (operands[i])
5624 : REGNO (SUBREG_REG (operands[i])));
5625 order[0] = 0;
5627 else
5629 if (base_reg != (int) REGNO (reg))
5630 /* Not addressed from the same base register. */
5631 return 0;
5633 unsorted_regs[i] = (GET_CODE (operands[i]) == REG
5634 ? REGNO (operands[i])
5635 : REGNO (SUBREG_REG (operands[i])));
5636 if (unsorted_regs[i] < unsorted_regs[order[0]])
5637 order[0] = i;
5640 /* If it isn't an integer register, then we can't do this. */
5641 if (unsorted_regs[i] < 0 || unsorted_regs[i] > 14)
5642 return 0;
5644 unsorted_offsets[i] = INTVAL (offset);
5646 else
5647 /* Not a suitable memory address. */
5648 return 0;
5651 /* All the useful information has now been extracted from the
5652 operands into unsorted_regs and unsorted_offsets; additionally,
5653 order[0] has been set to the lowest numbered register in the
5654 list. Sort the registers into order, and check that the memory
5655 offsets are ascending and adjacent. */
5657 for (i = 1; i < nops; i++)
5659 int j;
5661 order[i] = order[i - 1];
5662 for (j = 0; j < nops; j++)
5663 if (unsorted_regs[j] > unsorted_regs[order[i - 1]]
5664 && (order[i] == order[i - 1]
5665 || unsorted_regs[j] < unsorted_regs[order[i]]))
5666 order[i] = j;
5668 /* Have we found a suitable register? if not, one must be used more
5669 than once. */
5670 if (order[i] == order[i - 1])
5671 return 0;
5673 /* Is the memory address adjacent and ascending? */
5674 if (unsorted_offsets[order[i]] != unsorted_offsets[order[i - 1]] + 4)
5675 return 0;
5678 if (base)
5680 *base = base_reg;
5682 for (i = 0; i < nops; i++)
5683 regs[i] = unsorted_regs[order[i]];
5685 *load_offset = unsorted_offsets[order[0]];
5688 if (unsorted_offsets[order[0]] == 0)
5689 return 1; /* stmia */
5691 if (unsorted_offsets[order[0]] == 4)
5692 return 2; /* stmib */
5694 if (unsorted_offsets[order[nops - 1]] == 0)
5695 return 3; /* stmda */
5697 if (unsorted_offsets[order[nops - 1]] == -4)
5698 return 4; /* stmdb */
5700 return 0;
5703 const char *
5704 emit_stm_seq (rtx *operands, int nops)
5706 int regs[4];
5707 int base_reg;
5708 HOST_WIDE_INT offset;
5709 char buf[100];
5710 int i;
5712 switch (store_multiple_sequence (operands, nops, regs, &base_reg, &offset))
5714 case 1:
5715 strcpy (buf, "stm%?ia\t");
5716 break;
5718 case 2:
5719 strcpy (buf, "stm%?ib\t");
5720 break;
5722 case 3:
5723 strcpy (buf, "stm%?da\t");
5724 break;
5726 case 4:
5727 strcpy (buf, "stm%?db\t");
5728 break;
5730 default:
5731 gcc_unreachable ();
5734 sprintf (buf + strlen (buf), "%s%s, {%s%s", REGISTER_PREFIX,
5735 reg_names[base_reg], REGISTER_PREFIX, reg_names[regs[0]]);
5737 for (i = 1; i < nops; i++)
5738 sprintf (buf + strlen (buf), ", %s%s", REGISTER_PREFIX,
5739 reg_names[regs[i]]);
5741 strcat (buf, "}\t%@ phole stm");
5743 output_asm_insn (buf, operands);
5744 return "";
5748 /* Routines for use in generating RTL. */
5751 arm_gen_load_multiple (int base_regno, int count, rtx from, int up,
5752 int write_back, rtx basemem, HOST_WIDE_INT *offsetp)
5754 HOST_WIDE_INT offset = *offsetp;
5755 int i = 0, j;
5756 rtx result;
5757 int sign = up ? 1 : -1;
5758 rtx mem, addr;
5760 /* XScale has load-store double instructions, but they have stricter
5761 alignment requirements than load-store multiple, so we cannot
5762 use them.
5764 For XScale ldm requires 2 + NREGS cycles to complete and blocks
5765 the pipeline until completion.
5767 NREGS CYCLES
5773 An ldr instruction takes 1-3 cycles, but does not block the
5774 pipeline.
5776 NREGS CYCLES
5777 1 1-3
5778 2 2-6
5779 3 3-9
5780 4 4-12
5782 Best case ldr will always win. However, the more ldr instructions
5783 we issue, the less likely we are to be able to schedule them well.
5784 Using ldr instructions also increases code size.
5786 As a compromise, we use ldr for counts of 1 or 2 regs, and ldm
5787 for counts of 3 or 4 regs. */
5788 if (arm_tune_xscale && count <= 2 && ! optimize_size)
5790 rtx seq;
5792 start_sequence ();
5794 for (i = 0; i < count; i++)
5796 addr = plus_constant (from, i * 4 * sign);
5797 mem = adjust_automodify_address (basemem, SImode, addr, offset);
5798 emit_move_insn (gen_rtx_REG (SImode, base_regno + i), mem);
5799 offset += 4 * sign;
5802 if (write_back)
5804 emit_move_insn (from, plus_constant (from, count * 4 * sign));
5805 *offsetp = offset;
5808 seq = get_insns ();
5809 end_sequence ();
5811 return seq;
5814 result = gen_rtx_PARALLEL (VOIDmode,
5815 rtvec_alloc (count + (write_back ? 1 : 0)));
5816 if (write_back)
5818 XVECEXP (result, 0, 0)
5819 = gen_rtx_SET (GET_MODE (from), from,
5820 plus_constant (from, count * 4 * sign));
5821 i = 1;
5822 count++;
5825 for (j = 0; i < count; i++, j++)
5827 addr = plus_constant (from, j * 4 * sign);
5828 mem = adjust_automodify_address_nv (basemem, SImode, addr, offset);
5829 XVECEXP (result, 0, i)
5830 = gen_rtx_SET (VOIDmode, gen_rtx_REG (SImode, base_regno + j), mem);
5831 offset += 4 * sign;
5834 if (write_back)
5835 *offsetp = offset;
5837 return result;
5841 arm_gen_store_multiple (int base_regno, int count, rtx to, int up,
5842 int write_back, rtx basemem, HOST_WIDE_INT *offsetp)
5844 HOST_WIDE_INT offset = *offsetp;
5845 int i = 0, j;
5846 rtx result;
5847 int sign = up ? 1 : -1;
5848 rtx mem, addr;
5850 /* See arm_gen_load_multiple for discussion of
5851 the pros/cons of ldm/stm usage for XScale. */
5852 if (arm_tune_xscale && count <= 2 && ! optimize_size)
5854 rtx seq;
5856 start_sequence ();
5858 for (i = 0; i < count; i++)
5860 addr = plus_constant (to, i * 4 * sign);
5861 mem = adjust_automodify_address (basemem, SImode, addr, offset);
5862 emit_move_insn (mem, gen_rtx_REG (SImode, base_regno + i));
5863 offset += 4 * sign;
5866 if (write_back)
5868 emit_move_insn (to, plus_constant (to, count * 4 * sign));
5869 *offsetp = offset;
5872 seq = get_insns ();
5873 end_sequence ();
5875 return seq;
5878 result = gen_rtx_PARALLEL (VOIDmode,
5879 rtvec_alloc (count + (write_back ? 1 : 0)));
5880 if (write_back)
5882 XVECEXP (result, 0, 0)
5883 = gen_rtx_SET (GET_MODE (to), to,
5884 plus_constant (to, count * 4 * sign));
5885 i = 1;
5886 count++;
5889 for (j = 0; i < count; i++, j++)
5891 addr = plus_constant (to, j * 4 * sign);
5892 mem = adjust_automodify_address_nv (basemem, SImode, addr, offset);
5893 XVECEXP (result, 0, i)
5894 = gen_rtx_SET (VOIDmode, mem, gen_rtx_REG (SImode, base_regno + j));
5895 offset += 4 * sign;
5898 if (write_back)
5899 *offsetp = offset;
5901 return result;
5905 arm_gen_movmemqi (rtx *operands)
5907 HOST_WIDE_INT in_words_to_go, out_words_to_go, last_bytes;
5908 HOST_WIDE_INT srcoffset, dstoffset;
5909 int i;
5910 rtx src, dst, srcbase, dstbase;
5911 rtx part_bytes_reg = NULL;
5912 rtx mem;
5914 if (GET_CODE (operands[2]) != CONST_INT
5915 || GET_CODE (operands[3]) != CONST_INT
5916 || INTVAL (operands[2]) > 64
5917 || INTVAL (operands[3]) & 3)
5918 return 0;
5920 dstbase = operands[0];
5921 srcbase = operands[1];
5923 dst = copy_to_mode_reg (SImode, XEXP (dstbase, 0));
5924 src = copy_to_mode_reg (SImode, XEXP (srcbase, 0));
5926 in_words_to_go = ARM_NUM_INTS (INTVAL (operands[2]));
5927 out_words_to_go = INTVAL (operands[2]) / 4;
5928 last_bytes = INTVAL (operands[2]) & 3;
5929 dstoffset = srcoffset = 0;
5931 if (out_words_to_go != in_words_to_go && ((in_words_to_go - 1) & 3) != 0)
5932 part_bytes_reg = gen_rtx_REG (SImode, (in_words_to_go - 1) & 3);
5934 for (i = 0; in_words_to_go >= 2; i+=4)
5936 if (in_words_to_go > 4)
5937 emit_insn (arm_gen_load_multiple (0, 4, src, TRUE, TRUE,
5938 srcbase, &srcoffset));
5939 else
5940 emit_insn (arm_gen_load_multiple (0, in_words_to_go, src, TRUE,
5941 FALSE, srcbase, &srcoffset));
5943 if (out_words_to_go)
5945 if (out_words_to_go > 4)
5946 emit_insn (arm_gen_store_multiple (0, 4, dst, TRUE, TRUE,
5947 dstbase, &dstoffset));
5948 else if (out_words_to_go != 1)
5949 emit_insn (arm_gen_store_multiple (0, out_words_to_go,
5950 dst, TRUE,
5951 (last_bytes == 0
5952 ? FALSE : TRUE),
5953 dstbase, &dstoffset));
5954 else
5956 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
5957 emit_move_insn (mem, gen_rtx_REG (SImode, 0));
5958 if (last_bytes != 0)
5960 emit_insn (gen_addsi3 (dst, dst, GEN_INT (4)));
5961 dstoffset += 4;
5966 in_words_to_go -= in_words_to_go < 4 ? in_words_to_go : 4;
5967 out_words_to_go -= out_words_to_go < 4 ? out_words_to_go : 4;
5970 /* OUT_WORDS_TO_GO will be zero here if there are byte stores to do. */
5971 if (out_words_to_go)
5973 rtx sreg;
5975 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
5976 sreg = copy_to_reg (mem);
5978 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
5979 emit_move_insn (mem, sreg);
5980 in_words_to_go--;
5982 gcc_assert (!in_words_to_go); /* Sanity check */
5985 if (in_words_to_go)
5987 gcc_assert (in_words_to_go > 0);
5989 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
5990 part_bytes_reg = copy_to_mode_reg (SImode, mem);
5993 gcc_assert (!last_bytes || part_bytes_reg);
5995 if (BYTES_BIG_ENDIAN && last_bytes)
5997 rtx tmp = gen_reg_rtx (SImode);
5999 /* The bytes we want are in the top end of the word. */
6000 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg,
6001 GEN_INT (8 * (4 - last_bytes))));
6002 part_bytes_reg = tmp;
6004 while (last_bytes)
6006 mem = adjust_automodify_address (dstbase, QImode,
6007 plus_constant (dst, last_bytes - 1),
6008 dstoffset + last_bytes - 1);
6009 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
6011 if (--last_bytes)
6013 tmp = gen_reg_rtx (SImode);
6014 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (8)));
6015 part_bytes_reg = tmp;
6020 else
6022 if (last_bytes > 1)
6024 mem = adjust_automodify_address (dstbase, HImode, dst, dstoffset);
6025 emit_move_insn (mem, gen_lowpart (HImode, part_bytes_reg));
6026 last_bytes -= 2;
6027 if (last_bytes)
6029 rtx tmp = gen_reg_rtx (SImode);
6030 emit_insn (gen_addsi3 (dst, dst, const2_rtx));
6031 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (16)));
6032 part_bytes_reg = tmp;
6033 dstoffset += 2;
6037 if (last_bytes)
6039 mem = adjust_automodify_address (dstbase, QImode, dst, dstoffset);
6040 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
6044 return 1;
6047 /* Generate a memory reference for a half word, such that it will be loaded
6048 into the top 16 bits of the word. We can assume that the address is
6049 known to be alignable and of the form reg, or plus (reg, const). */
6052 arm_gen_rotated_half_load (rtx memref)
6054 HOST_WIDE_INT offset = 0;
6055 rtx base = XEXP (memref, 0);
6057 if (GET_CODE (base) == PLUS)
6059 offset = INTVAL (XEXP (base, 1));
6060 base = XEXP (base, 0);
6063 /* If we aren't allowed to generate unaligned addresses, then fail. */
6064 if ((BYTES_BIG_ENDIAN ? 1 : 0) ^ ((offset & 2) == 0))
6065 return NULL;
6067 base = gen_rtx_MEM (SImode, plus_constant (base, offset & ~2));
6069 if ((BYTES_BIG_ENDIAN ? 1 : 0) ^ ((offset & 2) == 2))
6070 return base;
6072 return gen_rtx_ROTATE (SImode, base, GEN_INT (16));
6075 /* Select a dominance comparison mode if possible for a test of the general
6076 form (OP (COND_OR (X) (Y)) (const_int 0)). We support three forms.
6077 COND_OR == DOM_CC_X_AND_Y => (X && Y)
6078 COND_OR == DOM_CC_NX_OR_Y => ((! X) || Y)
6079 COND_OR == DOM_CC_X_OR_Y => (X || Y)
6080 In all cases OP will be either EQ or NE, but we don't need to know which
6081 here. If we are unable to support a dominance comparison we return
6082 CC mode. This will then fail to match for the RTL expressions that
6083 generate this call. */
6084 enum machine_mode
6085 arm_select_dominance_cc_mode (rtx x, rtx y, HOST_WIDE_INT cond_or)
6087 enum rtx_code cond1, cond2;
6088 int swapped = 0;
6090 /* Currently we will probably get the wrong result if the individual
6091 comparisons are not simple. This also ensures that it is safe to
6092 reverse a comparison if necessary. */
6093 if ((arm_select_cc_mode (cond1 = GET_CODE (x), XEXP (x, 0), XEXP (x, 1))
6094 != CCmode)
6095 || (arm_select_cc_mode (cond2 = GET_CODE (y), XEXP (y, 0), XEXP (y, 1))
6096 != CCmode))
6097 return CCmode;
6099 /* The if_then_else variant of this tests the second condition if the
6100 first passes, but is true if the first fails. Reverse the first
6101 condition to get a true "inclusive-or" expression. */
6102 if (cond_or == DOM_CC_NX_OR_Y)
6103 cond1 = reverse_condition (cond1);
6105 /* If the comparisons are not equal, and one doesn't dominate the other,
6106 then we can't do this. */
6107 if (cond1 != cond2
6108 && !comparison_dominates_p (cond1, cond2)
6109 && (swapped = 1, !comparison_dominates_p (cond2, cond1)))
6110 return CCmode;
6112 if (swapped)
6114 enum rtx_code temp = cond1;
6115 cond1 = cond2;
6116 cond2 = temp;
6119 switch (cond1)
6121 case EQ:
6122 if (cond_or == DOM_CC_X_AND_Y)
6123 return CC_DEQmode;
6125 switch (cond2)
6127 case EQ: return CC_DEQmode;
6128 case LE: return CC_DLEmode;
6129 case LEU: return CC_DLEUmode;
6130 case GE: return CC_DGEmode;
6131 case GEU: return CC_DGEUmode;
6132 default: gcc_unreachable ();
6135 case LT:
6136 if (cond_or == DOM_CC_X_AND_Y)
6137 return CC_DLTmode;
6139 switch (cond2)
6141 case LT:
6142 return CC_DLTmode;
6143 case LE:
6144 return CC_DLEmode;
6145 case NE:
6146 return CC_DNEmode;
6147 default:
6148 gcc_unreachable ();
6151 case GT:
6152 if (cond_or == DOM_CC_X_AND_Y)
6153 return CC_DGTmode;
6155 switch (cond2)
6157 case GT:
6158 return CC_DGTmode;
6159 case GE:
6160 return CC_DGEmode;
6161 case NE:
6162 return CC_DNEmode;
6163 default:
6164 gcc_unreachable ();
6167 case LTU:
6168 if (cond_or == DOM_CC_X_AND_Y)
6169 return CC_DLTUmode;
6171 switch (cond2)
6173 case LTU:
6174 return CC_DLTUmode;
6175 case LEU:
6176 return CC_DLEUmode;
6177 case NE:
6178 return CC_DNEmode;
6179 default:
6180 gcc_unreachable ();
6183 case GTU:
6184 if (cond_or == DOM_CC_X_AND_Y)
6185 return CC_DGTUmode;
6187 switch (cond2)
6189 case GTU:
6190 return CC_DGTUmode;
6191 case GEU:
6192 return CC_DGEUmode;
6193 case NE:
6194 return CC_DNEmode;
6195 default:
6196 gcc_unreachable ();
6199 /* The remaining cases only occur when both comparisons are the
6200 same. */
6201 case NE:
6202 gcc_assert (cond1 == cond2);
6203 return CC_DNEmode;
6205 case LE:
6206 gcc_assert (cond1 == cond2);
6207 return CC_DLEmode;
6209 case GE:
6210 gcc_assert (cond1 == cond2);
6211 return CC_DGEmode;
6213 case LEU:
6214 gcc_assert (cond1 == cond2);
6215 return CC_DLEUmode;
6217 case GEU:
6218 gcc_assert (cond1 == cond2);
6219 return CC_DGEUmode;
6221 default:
6222 gcc_unreachable ();
6226 enum machine_mode
6227 arm_select_cc_mode (enum rtx_code op, rtx x, rtx y)
6229 /* All floating point compares return CCFP if it is an equality
6230 comparison, and CCFPE otherwise. */
6231 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
6233 switch (op)
6235 case EQ:
6236 case NE:
6237 case UNORDERED:
6238 case ORDERED:
6239 case UNLT:
6240 case UNLE:
6241 case UNGT:
6242 case UNGE:
6243 case UNEQ:
6244 case LTGT:
6245 return CCFPmode;
6247 case LT:
6248 case LE:
6249 case GT:
6250 case GE:
6251 if (TARGET_HARD_FLOAT && TARGET_MAVERICK)
6252 return CCFPmode;
6253 return CCFPEmode;
6255 default:
6256 gcc_unreachable ();
6260 /* A compare with a shifted operand. Because of canonicalization, the
6261 comparison will have to be swapped when we emit the assembler. */
6262 if (GET_MODE (y) == SImode && GET_CODE (y) == REG
6263 && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
6264 || GET_CODE (x) == LSHIFTRT || GET_CODE (x) == ROTATE
6265 || GET_CODE (x) == ROTATERT))
6266 return CC_SWPmode;
6268 /* This operation is performed swapped, but since we only rely on the Z
6269 flag we don't need an additional mode. */
6270 if (GET_MODE (y) == SImode && REG_P (y)
6271 && GET_CODE (x) == NEG
6272 && (op == EQ || op == NE))
6273 return CC_Zmode;
6275 /* This is a special case that is used by combine to allow a
6276 comparison of a shifted byte load to be split into a zero-extend
6277 followed by a comparison of the shifted integer (only valid for
6278 equalities and unsigned inequalities). */
6279 if (GET_MODE (x) == SImode
6280 && GET_CODE (x) == ASHIFT
6281 && GET_CODE (XEXP (x, 1)) == CONST_INT && INTVAL (XEXP (x, 1)) == 24
6282 && GET_CODE (XEXP (x, 0)) == SUBREG
6283 && GET_CODE (SUBREG_REG (XEXP (x, 0))) == MEM
6284 && GET_MODE (SUBREG_REG (XEXP (x, 0))) == QImode
6285 && (op == EQ || op == NE
6286 || op == GEU || op == GTU || op == LTU || op == LEU)
6287 && GET_CODE (y) == CONST_INT)
6288 return CC_Zmode;
6290 /* A construct for a conditional compare, if the false arm contains
6291 0, then both conditions must be true, otherwise either condition
6292 must be true. Not all conditions are possible, so CCmode is
6293 returned if it can't be done. */
6294 if (GET_CODE (x) == IF_THEN_ELSE
6295 && (XEXP (x, 2) == const0_rtx
6296 || XEXP (x, 2) == const1_rtx)
6297 && COMPARISON_P (XEXP (x, 0))
6298 && COMPARISON_P (XEXP (x, 1)))
6299 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
6300 INTVAL (XEXP (x, 2)));
6302 /* Alternate canonicalizations of the above. These are somewhat cleaner. */
6303 if (GET_CODE (x) == AND
6304 && COMPARISON_P (XEXP (x, 0))
6305 && COMPARISON_P (XEXP (x, 1)))
6306 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
6307 DOM_CC_X_AND_Y);
6309 if (GET_CODE (x) == IOR
6310 && COMPARISON_P (XEXP (x, 0))
6311 && COMPARISON_P (XEXP (x, 1)))
6312 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
6313 DOM_CC_X_OR_Y);
6315 /* An operation (on Thumb) where we want to test for a single bit.
6316 This is done by shifting that bit up into the top bit of a
6317 scratch register; we can then branch on the sign bit. */
6318 if (TARGET_THUMB
6319 && GET_MODE (x) == SImode
6320 && (op == EQ || op == NE)
6321 && (GET_CODE (x) == ZERO_EXTRACT))
6322 return CC_Nmode;
6324 /* An operation that sets the condition codes as a side-effect, the
6325 V flag is not set correctly, so we can only use comparisons where
6326 this doesn't matter. (For LT and GE we can use "mi" and "pl"
6327 instead.) */
6328 if (GET_MODE (x) == SImode
6329 && y == const0_rtx
6330 && (op == EQ || op == NE || op == LT || op == GE)
6331 && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS
6332 || GET_CODE (x) == AND || GET_CODE (x) == IOR
6333 || GET_CODE (x) == XOR || GET_CODE (x) == MULT
6334 || GET_CODE (x) == NOT || GET_CODE (x) == NEG
6335 || GET_CODE (x) == LSHIFTRT
6336 || GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
6337 || GET_CODE (x) == ROTATERT
6338 || (TARGET_ARM && GET_CODE (x) == ZERO_EXTRACT)))
6339 return CC_NOOVmode;
6341 if (GET_MODE (x) == QImode && (op == EQ || op == NE))
6342 return CC_Zmode;
6344 if (GET_MODE (x) == SImode && (op == LTU || op == GEU)
6345 && GET_CODE (x) == PLUS
6346 && (rtx_equal_p (XEXP (x, 0), y) || rtx_equal_p (XEXP (x, 1), y)))
6347 return CC_Cmode;
6349 return CCmode;
6352 /* X and Y are two things to compare using CODE. Emit the compare insn and
6353 return the rtx for register 0 in the proper mode. FP means this is a
6354 floating point compare: I don't think that it is needed on the arm. */
6356 arm_gen_compare_reg (enum rtx_code code, rtx x, rtx y)
6358 enum machine_mode mode = SELECT_CC_MODE (code, x, y);
6359 rtx cc_reg = gen_rtx_REG (mode, CC_REGNUM);
6361 emit_insn (gen_rtx_SET (VOIDmode, cc_reg,
6362 gen_rtx_COMPARE (mode, x, y)));
6364 return cc_reg;
6367 /* Generate a sequence of insns that will generate the correct return
6368 address mask depending on the physical architecture that the program
6369 is running on. */
6371 arm_gen_return_addr_mask (void)
6373 rtx reg = gen_reg_rtx (Pmode);
6375 emit_insn (gen_return_addr_mask (reg));
6376 return reg;
6379 void
6380 arm_reload_in_hi (rtx *operands)
6382 rtx ref = operands[1];
6383 rtx base, scratch;
6384 HOST_WIDE_INT offset = 0;
6386 if (GET_CODE (ref) == SUBREG)
6388 offset = SUBREG_BYTE (ref);
6389 ref = SUBREG_REG (ref);
6392 if (GET_CODE (ref) == REG)
6394 /* We have a pseudo which has been spilt onto the stack; there
6395 are two cases here: the first where there is a simple
6396 stack-slot replacement and a second where the stack-slot is
6397 out of range, or is used as a subreg. */
6398 if (reg_equiv_mem[REGNO (ref)])
6400 ref = reg_equiv_mem[REGNO (ref)];
6401 base = find_replacement (&XEXP (ref, 0));
6403 else
6404 /* The slot is out of range, or was dressed up in a SUBREG. */
6405 base = reg_equiv_address[REGNO (ref)];
6407 else
6408 base = find_replacement (&XEXP (ref, 0));
6410 /* Handle the case where the address is too complex to be offset by 1. */
6411 if (GET_CODE (base) == MINUS
6412 || (GET_CODE (base) == PLUS && GET_CODE (XEXP (base, 1)) != CONST_INT))
6414 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
6416 emit_insn (gen_rtx_SET (VOIDmode, base_plus, base));
6417 base = base_plus;
6419 else if (GET_CODE (base) == PLUS)
6421 /* The addend must be CONST_INT, or we would have dealt with it above. */
6422 HOST_WIDE_INT hi, lo;
6424 offset += INTVAL (XEXP (base, 1));
6425 base = XEXP (base, 0);
6427 /* Rework the address into a legal sequence of insns. */
6428 /* Valid range for lo is -4095 -> 4095 */
6429 lo = (offset >= 0
6430 ? (offset & 0xfff)
6431 : -((-offset) & 0xfff));
6433 /* Corner case, if lo is the max offset then we would be out of range
6434 once we have added the additional 1 below, so bump the msb into the
6435 pre-loading insn(s). */
6436 if (lo == 4095)
6437 lo &= 0x7ff;
6439 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
6440 ^ (HOST_WIDE_INT) 0x80000000)
6441 - (HOST_WIDE_INT) 0x80000000);
6443 gcc_assert (hi + lo == offset);
6445 if (hi != 0)
6447 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
6449 /* Get the base address; addsi3 knows how to handle constants
6450 that require more than one insn. */
6451 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
6452 base = base_plus;
6453 offset = lo;
6457 /* Operands[2] may overlap operands[0] (though it won't overlap
6458 operands[1]), that's why we asked for a DImode reg -- so we can
6459 use the bit that does not overlap. */
6460 if (REGNO (operands[2]) == REGNO (operands[0]))
6461 scratch = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
6462 else
6463 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
6465 emit_insn (gen_zero_extendqisi2 (scratch,
6466 gen_rtx_MEM (QImode,
6467 plus_constant (base,
6468 offset))));
6469 emit_insn (gen_zero_extendqisi2 (gen_rtx_SUBREG (SImode, operands[0], 0),
6470 gen_rtx_MEM (QImode,
6471 plus_constant (base,
6472 offset + 1))));
6473 if (!BYTES_BIG_ENDIAN)
6474 emit_insn (gen_rtx_SET (VOIDmode, gen_rtx_SUBREG (SImode, operands[0], 0),
6475 gen_rtx_IOR (SImode,
6476 gen_rtx_ASHIFT
6477 (SImode,
6478 gen_rtx_SUBREG (SImode, operands[0], 0),
6479 GEN_INT (8)),
6480 scratch)));
6481 else
6482 emit_insn (gen_rtx_SET (VOIDmode, gen_rtx_SUBREG (SImode, operands[0], 0),
6483 gen_rtx_IOR (SImode,
6484 gen_rtx_ASHIFT (SImode, scratch,
6485 GEN_INT (8)),
6486 gen_rtx_SUBREG (SImode, operands[0],
6487 0))));
6490 /* Handle storing a half-word to memory during reload by synthesizing as two
6491 byte stores. Take care not to clobber the input values until after we
6492 have moved them somewhere safe. This code assumes that if the DImode
6493 scratch in operands[2] overlaps either the input value or output address
6494 in some way, then that value must die in this insn (we absolutely need
6495 two scratch registers for some corner cases). */
6496 void
6497 arm_reload_out_hi (rtx *operands)
6499 rtx ref = operands[0];
6500 rtx outval = operands[1];
6501 rtx base, scratch;
6502 HOST_WIDE_INT offset = 0;
6504 if (GET_CODE (ref) == SUBREG)
6506 offset = SUBREG_BYTE (ref);
6507 ref = SUBREG_REG (ref);
6510 if (GET_CODE (ref) == REG)
6512 /* We have a pseudo which has been spilt onto the stack; there
6513 are two cases here: the first where there is a simple
6514 stack-slot replacement and a second where the stack-slot is
6515 out of range, or is used as a subreg. */
6516 if (reg_equiv_mem[REGNO (ref)])
6518 ref = reg_equiv_mem[REGNO (ref)];
6519 base = find_replacement (&XEXP (ref, 0));
6521 else
6522 /* The slot is out of range, or was dressed up in a SUBREG. */
6523 base = reg_equiv_address[REGNO (ref)];
6525 else
6526 base = find_replacement (&XEXP (ref, 0));
6528 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
6530 /* Handle the case where the address is too complex to be offset by 1. */
6531 if (GET_CODE (base) == MINUS
6532 || (GET_CODE (base) == PLUS && GET_CODE (XEXP (base, 1)) != CONST_INT))
6534 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
6536 /* Be careful not to destroy OUTVAL. */
6537 if (reg_overlap_mentioned_p (base_plus, outval))
6539 /* Updating base_plus might destroy outval, see if we can
6540 swap the scratch and base_plus. */
6541 if (!reg_overlap_mentioned_p (scratch, outval))
6543 rtx tmp = scratch;
6544 scratch = base_plus;
6545 base_plus = tmp;
6547 else
6549 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
6551 /* Be conservative and copy OUTVAL into the scratch now,
6552 this should only be necessary if outval is a subreg
6553 of something larger than a word. */
6554 /* XXX Might this clobber base? I can't see how it can,
6555 since scratch is known to overlap with OUTVAL, and
6556 must be wider than a word. */
6557 emit_insn (gen_movhi (scratch_hi, outval));
6558 outval = scratch_hi;
6562 emit_insn (gen_rtx_SET (VOIDmode, base_plus, base));
6563 base = base_plus;
6565 else if (GET_CODE (base) == PLUS)
6567 /* The addend must be CONST_INT, or we would have dealt with it above. */
6568 HOST_WIDE_INT hi, lo;
6570 offset += INTVAL (XEXP (base, 1));
6571 base = XEXP (base, 0);
6573 /* Rework the address into a legal sequence of insns. */
6574 /* Valid range for lo is -4095 -> 4095 */
6575 lo = (offset >= 0
6576 ? (offset & 0xfff)
6577 : -((-offset) & 0xfff));
6579 /* Corner case, if lo is the max offset then we would be out of range
6580 once we have added the additional 1 below, so bump the msb into the
6581 pre-loading insn(s). */
6582 if (lo == 4095)
6583 lo &= 0x7ff;
6585 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
6586 ^ (HOST_WIDE_INT) 0x80000000)
6587 - (HOST_WIDE_INT) 0x80000000);
6589 gcc_assert (hi + lo == offset);
6591 if (hi != 0)
6593 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
6595 /* Be careful not to destroy OUTVAL. */
6596 if (reg_overlap_mentioned_p (base_plus, outval))
6598 /* Updating base_plus might destroy outval, see if we
6599 can swap the scratch and base_plus. */
6600 if (!reg_overlap_mentioned_p (scratch, outval))
6602 rtx tmp = scratch;
6603 scratch = base_plus;
6604 base_plus = tmp;
6606 else
6608 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
6610 /* Be conservative and copy outval into scratch now,
6611 this should only be necessary if outval is a
6612 subreg of something larger than a word. */
6613 /* XXX Might this clobber base? I can't see how it
6614 can, since scratch is known to overlap with
6615 outval. */
6616 emit_insn (gen_movhi (scratch_hi, outval));
6617 outval = scratch_hi;
6621 /* Get the base address; addsi3 knows how to handle constants
6622 that require more than one insn. */
6623 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
6624 base = base_plus;
6625 offset = lo;
6629 if (BYTES_BIG_ENDIAN)
6631 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
6632 plus_constant (base, offset + 1)),
6633 gen_lowpart (QImode, outval)));
6634 emit_insn (gen_lshrsi3 (scratch,
6635 gen_rtx_SUBREG (SImode, outval, 0),
6636 GEN_INT (8)));
6637 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (base, offset)),
6638 gen_lowpart (QImode, scratch)));
6640 else
6642 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (base, offset)),
6643 gen_lowpart (QImode, outval)));
6644 emit_insn (gen_lshrsi3 (scratch,
6645 gen_rtx_SUBREG (SImode, outval, 0),
6646 GEN_INT (8)));
6647 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
6648 plus_constant (base, offset + 1)),
6649 gen_lowpart (QImode, scratch)));
6653 /* Return true if a type must be passed in memory. For AAPCS, small aggregates
6654 (padded to the size of a word) should be passed in a register. */
6656 static bool
6657 arm_must_pass_in_stack (enum machine_mode mode, tree type)
6659 if (TARGET_AAPCS_BASED)
6660 return must_pass_in_stack_var_size (mode, type);
6661 else
6662 return must_pass_in_stack_var_size_or_pad (mode, type);
6666 /* For use by FUNCTION_ARG_PADDING (MODE, TYPE).
6667 Return true if an argument passed on the stack should be padded upwards,
6668 i.e. if the least-significant byte has useful data. */
6670 bool
6671 arm_pad_arg_upward (enum machine_mode mode, tree type)
6673 if (!TARGET_AAPCS_BASED)
6674 return DEFAULT_FUNCTION_ARG_PADDING(mode, type);
6676 if (type && BYTES_BIG_ENDIAN && INTEGRAL_TYPE_P (type))
6677 return false;
6679 return true;
6683 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
6684 For non-AAPCS, return !BYTES_BIG_ENDIAN if the least significant
6685 byte of the register has useful data, and return the opposite if the
6686 most significant byte does.
6687 For AAPCS, small aggregates and small complex types are always padded
6688 upwards. */
6690 bool
6691 arm_pad_reg_upward (enum machine_mode mode ATTRIBUTE_UNUSED,
6692 tree type, int first ATTRIBUTE_UNUSED)
6694 if (TARGET_AAPCS_BASED
6695 && BYTES_BIG_ENDIAN
6696 && (AGGREGATE_TYPE_P (type) || TREE_CODE (type) == COMPLEX_TYPE)
6697 && int_size_in_bytes (type) <= 4)
6698 return true;
6700 /* Otherwise, use default padding. */
6701 return !BYTES_BIG_ENDIAN;
6706 /* Print a symbolic form of X to the debug file, F. */
6707 static void
6708 arm_print_value (FILE *f, rtx x)
6710 switch (GET_CODE (x))
6712 case CONST_INT:
6713 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
6714 return;
6716 case CONST_DOUBLE:
6717 fprintf (f, "<0x%lx,0x%lx>", (long)XWINT (x, 2), (long)XWINT (x, 3));
6718 return;
6720 case CONST_VECTOR:
6722 int i;
6724 fprintf (f, "<");
6725 for (i = 0; i < CONST_VECTOR_NUNITS (x); i++)
6727 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (CONST_VECTOR_ELT (x, i)));
6728 if (i < (CONST_VECTOR_NUNITS (x) - 1))
6729 fputc (',', f);
6731 fprintf (f, ">");
6733 return;
6735 case CONST_STRING:
6736 fprintf (f, "\"%s\"", XSTR (x, 0));
6737 return;
6739 case SYMBOL_REF:
6740 fprintf (f, "`%s'", XSTR (x, 0));
6741 return;
6743 case LABEL_REF:
6744 fprintf (f, "L%d", INSN_UID (XEXP (x, 0)));
6745 return;
6747 case CONST:
6748 arm_print_value (f, XEXP (x, 0));
6749 return;
6751 case PLUS:
6752 arm_print_value (f, XEXP (x, 0));
6753 fprintf (f, "+");
6754 arm_print_value (f, XEXP (x, 1));
6755 return;
6757 case PC:
6758 fprintf (f, "pc");
6759 return;
6761 default:
6762 fprintf (f, "????");
6763 return;
6767 /* Routines for manipulation of the constant pool. */
6769 /* Arm instructions cannot load a large constant directly into a
6770 register; they have to come from a pc relative load. The constant
6771 must therefore be placed in the addressable range of the pc
6772 relative load. Depending on the precise pc relative load
6773 instruction the range is somewhere between 256 bytes and 4k. This
6774 means that we often have to dump a constant inside a function, and
6775 generate code to branch around it.
6777 It is important to minimize this, since the branches will slow
6778 things down and make the code larger.
6780 Normally we can hide the table after an existing unconditional
6781 branch so that there is no interruption of the flow, but in the
6782 worst case the code looks like this:
6784 ldr rn, L1
6786 b L2
6787 align
6788 L1: .long value
6792 ldr rn, L3
6794 b L4
6795 align
6796 L3: .long value
6800 We fix this by performing a scan after scheduling, which notices
6801 which instructions need to have their operands fetched from the
6802 constant table and builds the table.
6804 The algorithm starts by building a table of all the constants that
6805 need fixing up and all the natural barriers in the function (places
6806 where a constant table can be dropped without breaking the flow).
6807 For each fixup we note how far the pc-relative replacement will be
6808 able to reach and the offset of the instruction into the function.
6810 Having built the table we then group the fixes together to form
6811 tables that are as large as possible (subject to addressing
6812 constraints) and emit each table of constants after the last
6813 barrier that is within range of all the instructions in the group.
6814 If a group does not contain a barrier, then we forcibly create one
6815 by inserting a jump instruction into the flow. Once the table has
6816 been inserted, the insns are then modified to reference the
6817 relevant entry in the pool.
6819 Possible enhancements to the algorithm (not implemented) are:
6821 1) For some processors and object formats, there may be benefit in
6822 aligning the pools to the start of cache lines; this alignment
6823 would need to be taken into account when calculating addressability
6824 of a pool. */
6826 /* These typedefs are located at the start of this file, so that
6827 they can be used in the prototypes there. This comment is to
6828 remind readers of that fact so that the following structures
6829 can be understood more easily.
6831 typedef struct minipool_node Mnode;
6832 typedef struct minipool_fixup Mfix; */
6834 struct minipool_node
6836 /* Doubly linked chain of entries. */
6837 Mnode * next;
6838 Mnode * prev;
6839 /* The maximum offset into the code that this entry can be placed. While
6840 pushing fixes for forward references, all entries are sorted in order
6841 of increasing max_address. */
6842 HOST_WIDE_INT max_address;
6843 /* Similarly for an entry inserted for a backwards ref. */
6844 HOST_WIDE_INT min_address;
6845 /* The number of fixes referencing this entry. This can become zero
6846 if we "unpush" an entry. In this case we ignore the entry when we
6847 come to emit the code. */
6848 int refcount;
6849 /* The offset from the start of the minipool. */
6850 HOST_WIDE_INT offset;
6851 /* The value in table. */
6852 rtx value;
6853 /* The mode of value. */
6854 enum machine_mode mode;
6855 /* The size of the value. With iWMMXt enabled
6856 sizes > 4 also imply an alignment of 8-bytes. */
6857 int fix_size;
6860 struct minipool_fixup
6862 Mfix * next;
6863 rtx insn;
6864 HOST_WIDE_INT address;
6865 rtx * loc;
6866 enum machine_mode mode;
6867 int fix_size;
6868 rtx value;
6869 Mnode * minipool;
6870 HOST_WIDE_INT forwards;
6871 HOST_WIDE_INT backwards;
6874 /* Fixes less than a word need padding out to a word boundary. */
6875 #define MINIPOOL_FIX_SIZE(mode) \
6876 (GET_MODE_SIZE ((mode)) >= 4 ? GET_MODE_SIZE ((mode)) : 4)
6878 static Mnode * minipool_vector_head;
6879 static Mnode * minipool_vector_tail;
6880 static rtx minipool_vector_label;
6882 /* The linked list of all minipool fixes required for this function. */
6883 Mfix * minipool_fix_head;
6884 Mfix * minipool_fix_tail;
6885 /* The fix entry for the current minipool, once it has been placed. */
6886 Mfix * minipool_barrier;
6888 /* Determines if INSN is the start of a jump table. Returns the end
6889 of the TABLE or NULL_RTX. */
6890 static rtx
6891 is_jump_table (rtx insn)
6893 rtx table;
6895 if (GET_CODE (insn) == JUMP_INSN
6896 && JUMP_LABEL (insn) != NULL
6897 && ((table = next_real_insn (JUMP_LABEL (insn)))
6898 == next_real_insn (insn))
6899 && table != NULL
6900 && GET_CODE (table) == JUMP_INSN
6901 && (GET_CODE (PATTERN (table)) == ADDR_VEC
6902 || GET_CODE (PATTERN (table)) == ADDR_DIFF_VEC))
6903 return table;
6905 return NULL_RTX;
6908 #ifndef JUMP_TABLES_IN_TEXT_SECTION
6909 #define JUMP_TABLES_IN_TEXT_SECTION 0
6910 #endif
6912 static HOST_WIDE_INT
6913 get_jump_table_size (rtx insn)
6915 /* ADDR_VECs only take room if read-only data does into the text
6916 section. */
6917 if (JUMP_TABLES_IN_TEXT_SECTION
6918 #if !defined(READONLY_DATA_SECTION) && !defined(READONLY_DATA_SECTION_ASM_OP)
6919 || 1
6920 #endif
6923 rtx body = PATTERN (insn);
6924 int elt = GET_CODE (body) == ADDR_DIFF_VEC ? 1 : 0;
6926 return GET_MODE_SIZE (GET_MODE (body)) * XVECLEN (body, elt);
6929 return 0;
6932 /* Move a minipool fix MP from its current location to before MAX_MP.
6933 If MAX_MP is NULL, then MP doesn't need moving, but the addressing
6934 constraints may need updating. */
6935 static Mnode *
6936 move_minipool_fix_forward_ref (Mnode *mp, Mnode *max_mp,
6937 HOST_WIDE_INT max_address)
6939 /* The code below assumes these are different. */
6940 gcc_assert (mp != max_mp);
6942 if (max_mp == NULL)
6944 if (max_address < mp->max_address)
6945 mp->max_address = max_address;
6947 else
6949 if (max_address > max_mp->max_address - mp->fix_size)
6950 mp->max_address = max_mp->max_address - mp->fix_size;
6951 else
6952 mp->max_address = max_address;
6954 /* Unlink MP from its current position. Since max_mp is non-null,
6955 mp->prev must be non-null. */
6956 mp->prev->next = mp->next;
6957 if (mp->next != NULL)
6958 mp->next->prev = mp->prev;
6959 else
6960 minipool_vector_tail = mp->prev;
6962 /* Re-insert it before MAX_MP. */
6963 mp->next = max_mp;
6964 mp->prev = max_mp->prev;
6965 max_mp->prev = mp;
6967 if (mp->prev != NULL)
6968 mp->prev->next = mp;
6969 else
6970 minipool_vector_head = mp;
6973 /* Save the new entry. */
6974 max_mp = mp;
6976 /* Scan over the preceding entries and adjust their addresses as
6977 required. */
6978 while (mp->prev != NULL
6979 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
6981 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
6982 mp = mp->prev;
6985 return max_mp;
6988 /* Add a constant to the minipool for a forward reference. Returns the
6989 node added or NULL if the constant will not fit in this pool. */
6990 static Mnode *
6991 add_minipool_forward_ref (Mfix *fix)
6993 /* If set, max_mp is the first pool_entry that has a lower
6994 constraint than the one we are trying to add. */
6995 Mnode * max_mp = NULL;
6996 HOST_WIDE_INT max_address = fix->address + fix->forwards;
6997 Mnode * mp;
6999 /* If this fix's address is greater than the address of the first
7000 entry, then we can't put the fix in this pool. We subtract the
7001 size of the current fix to ensure that if the table is fully
7002 packed we still have enough room to insert this value by suffling
7003 the other fixes forwards. */
7004 if (minipool_vector_head &&
7005 fix->address >= minipool_vector_head->max_address - fix->fix_size)
7006 return NULL;
7008 /* Scan the pool to see if a constant with the same value has
7009 already been added. While we are doing this, also note the
7010 location where we must insert the constant if it doesn't already
7011 exist. */
7012 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
7014 if (GET_CODE (fix->value) == GET_CODE (mp->value)
7015 && fix->mode == mp->mode
7016 && (GET_CODE (fix->value) != CODE_LABEL
7017 || (CODE_LABEL_NUMBER (fix->value)
7018 == CODE_LABEL_NUMBER (mp->value)))
7019 && rtx_equal_p (fix->value, mp->value))
7021 /* More than one fix references this entry. */
7022 mp->refcount++;
7023 return move_minipool_fix_forward_ref (mp, max_mp, max_address);
7026 /* Note the insertion point if necessary. */
7027 if (max_mp == NULL
7028 && mp->max_address > max_address)
7029 max_mp = mp;
7031 /* If we are inserting an 8-bytes aligned quantity and
7032 we have not already found an insertion point, then
7033 make sure that all such 8-byte aligned quantities are
7034 placed at the start of the pool. */
7035 if (ARM_DOUBLEWORD_ALIGN
7036 && max_mp == NULL
7037 && fix->fix_size == 8
7038 && mp->fix_size != 8)
7040 max_mp = mp;
7041 max_address = mp->max_address;
7045 /* The value is not currently in the minipool, so we need to create
7046 a new entry for it. If MAX_MP is NULL, the entry will be put on
7047 the end of the list since the placement is less constrained than
7048 any existing entry. Otherwise, we insert the new fix before
7049 MAX_MP and, if necessary, adjust the constraints on the other
7050 entries. */
7051 mp = xmalloc (sizeof (* mp));
7052 mp->fix_size = fix->fix_size;
7053 mp->mode = fix->mode;
7054 mp->value = fix->value;
7055 mp->refcount = 1;
7056 /* Not yet required for a backwards ref. */
7057 mp->min_address = -65536;
7059 if (max_mp == NULL)
7061 mp->max_address = max_address;
7062 mp->next = NULL;
7063 mp->prev = minipool_vector_tail;
7065 if (mp->prev == NULL)
7067 minipool_vector_head = mp;
7068 minipool_vector_label = gen_label_rtx ();
7070 else
7071 mp->prev->next = mp;
7073 minipool_vector_tail = mp;
7075 else
7077 if (max_address > max_mp->max_address - mp->fix_size)
7078 mp->max_address = max_mp->max_address - mp->fix_size;
7079 else
7080 mp->max_address = max_address;
7082 mp->next = max_mp;
7083 mp->prev = max_mp->prev;
7084 max_mp->prev = mp;
7085 if (mp->prev != NULL)
7086 mp->prev->next = mp;
7087 else
7088 minipool_vector_head = mp;
7091 /* Save the new entry. */
7092 max_mp = mp;
7094 /* Scan over the preceding entries and adjust their addresses as
7095 required. */
7096 while (mp->prev != NULL
7097 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
7099 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
7100 mp = mp->prev;
7103 return max_mp;
7106 static Mnode *
7107 move_minipool_fix_backward_ref (Mnode *mp, Mnode *min_mp,
7108 HOST_WIDE_INT min_address)
7110 HOST_WIDE_INT offset;
7112 /* The code below assumes these are different. */
7113 gcc_assert (mp != min_mp);
7115 if (min_mp == NULL)
7117 if (min_address > mp->min_address)
7118 mp->min_address = min_address;
7120 else
7122 /* We will adjust this below if it is too loose. */
7123 mp->min_address = min_address;
7125 /* Unlink MP from its current position. Since min_mp is non-null,
7126 mp->next must be non-null. */
7127 mp->next->prev = mp->prev;
7128 if (mp->prev != NULL)
7129 mp->prev->next = mp->next;
7130 else
7131 minipool_vector_head = mp->next;
7133 /* Reinsert it after MIN_MP. */
7134 mp->prev = min_mp;
7135 mp->next = min_mp->next;
7136 min_mp->next = mp;
7137 if (mp->next != NULL)
7138 mp->next->prev = mp;
7139 else
7140 minipool_vector_tail = mp;
7143 min_mp = mp;
7145 offset = 0;
7146 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
7148 mp->offset = offset;
7149 if (mp->refcount > 0)
7150 offset += mp->fix_size;
7152 if (mp->next && mp->next->min_address < mp->min_address + mp->fix_size)
7153 mp->next->min_address = mp->min_address + mp->fix_size;
7156 return min_mp;
7159 /* Add a constant to the minipool for a backward reference. Returns the
7160 node added or NULL if the constant will not fit in this pool.
7162 Note that the code for insertion for a backwards reference can be
7163 somewhat confusing because the calculated offsets for each fix do
7164 not take into account the size of the pool (which is still under
7165 construction. */
7166 static Mnode *
7167 add_minipool_backward_ref (Mfix *fix)
7169 /* If set, min_mp is the last pool_entry that has a lower constraint
7170 than the one we are trying to add. */
7171 Mnode *min_mp = NULL;
7172 /* This can be negative, since it is only a constraint. */
7173 HOST_WIDE_INT min_address = fix->address - fix->backwards;
7174 Mnode *mp;
7176 /* If we can't reach the current pool from this insn, or if we can't
7177 insert this entry at the end of the pool without pushing other
7178 fixes out of range, then we don't try. This ensures that we
7179 can't fail later on. */
7180 if (min_address >= minipool_barrier->address
7181 || (minipool_vector_tail->min_address + fix->fix_size
7182 >= minipool_barrier->address))
7183 return NULL;
7185 /* Scan the pool to see if a constant with the same value has
7186 already been added. While we are doing this, also note the
7187 location where we must insert the constant if it doesn't already
7188 exist. */
7189 for (mp = minipool_vector_tail; mp != NULL; mp = mp->prev)
7191 if (GET_CODE (fix->value) == GET_CODE (mp->value)
7192 && fix->mode == mp->mode
7193 && (GET_CODE (fix->value) != CODE_LABEL
7194 || (CODE_LABEL_NUMBER (fix->value)
7195 == CODE_LABEL_NUMBER (mp->value)))
7196 && rtx_equal_p (fix->value, mp->value)
7197 /* Check that there is enough slack to move this entry to the
7198 end of the table (this is conservative). */
7199 && (mp->max_address
7200 > (minipool_barrier->address
7201 + minipool_vector_tail->offset
7202 + minipool_vector_tail->fix_size)))
7204 mp->refcount++;
7205 return move_minipool_fix_backward_ref (mp, min_mp, min_address);
7208 if (min_mp != NULL)
7209 mp->min_address += fix->fix_size;
7210 else
7212 /* Note the insertion point if necessary. */
7213 if (mp->min_address < min_address)
7215 /* For now, we do not allow the insertion of 8-byte alignment
7216 requiring nodes anywhere but at the start of the pool. */
7217 if (ARM_DOUBLEWORD_ALIGN
7218 && fix->fix_size == 8 && mp->fix_size != 8)
7219 return NULL;
7220 else
7221 min_mp = mp;
7223 else if (mp->max_address
7224 < minipool_barrier->address + mp->offset + fix->fix_size)
7226 /* Inserting before this entry would push the fix beyond
7227 its maximum address (which can happen if we have
7228 re-located a forwards fix); force the new fix to come
7229 after it. */
7230 min_mp = mp;
7231 min_address = mp->min_address + fix->fix_size;
7233 /* If we are inserting an 8-bytes aligned quantity and
7234 we have not already found an insertion point, then
7235 make sure that all such 8-byte aligned quantities are
7236 placed at the start of the pool. */
7237 else if (ARM_DOUBLEWORD_ALIGN
7238 && min_mp == NULL
7239 && fix->fix_size == 8
7240 && mp->fix_size < 8)
7242 min_mp = mp;
7243 min_address = mp->min_address + fix->fix_size;
7248 /* We need to create a new entry. */
7249 mp = xmalloc (sizeof (* mp));
7250 mp->fix_size = fix->fix_size;
7251 mp->mode = fix->mode;
7252 mp->value = fix->value;
7253 mp->refcount = 1;
7254 mp->max_address = minipool_barrier->address + 65536;
7256 mp->min_address = min_address;
7258 if (min_mp == NULL)
7260 mp->prev = NULL;
7261 mp->next = minipool_vector_head;
7263 if (mp->next == NULL)
7265 minipool_vector_tail = mp;
7266 minipool_vector_label = gen_label_rtx ();
7268 else
7269 mp->next->prev = mp;
7271 minipool_vector_head = mp;
7273 else
7275 mp->next = min_mp->next;
7276 mp->prev = min_mp;
7277 min_mp->next = mp;
7279 if (mp->next != NULL)
7280 mp->next->prev = mp;
7281 else
7282 minipool_vector_tail = mp;
7285 /* Save the new entry. */
7286 min_mp = mp;
7288 if (mp->prev)
7289 mp = mp->prev;
7290 else
7291 mp->offset = 0;
7293 /* Scan over the following entries and adjust their offsets. */
7294 while (mp->next != NULL)
7296 if (mp->next->min_address < mp->min_address + mp->fix_size)
7297 mp->next->min_address = mp->min_address + mp->fix_size;
7299 if (mp->refcount)
7300 mp->next->offset = mp->offset + mp->fix_size;
7301 else
7302 mp->next->offset = mp->offset;
7304 mp = mp->next;
7307 return min_mp;
7310 static void
7311 assign_minipool_offsets (Mfix *barrier)
7313 HOST_WIDE_INT offset = 0;
7314 Mnode *mp;
7316 minipool_barrier = barrier;
7318 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
7320 mp->offset = offset;
7322 if (mp->refcount > 0)
7323 offset += mp->fix_size;
7327 /* Output the literal table */
7328 static void
7329 dump_minipool (rtx scan)
7331 Mnode * mp;
7332 Mnode * nmp;
7333 int align64 = 0;
7335 if (ARM_DOUBLEWORD_ALIGN)
7336 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
7337 if (mp->refcount > 0 && mp->fix_size == 8)
7339 align64 = 1;
7340 break;
7343 if (dump_file)
7344 fprintf (dump_file,
7345 ";; Emitting minipool after insn %u; address %ld; align %d (bytes)\n",
7346 INSN_UID (scan), (unsigned long) minipool_barrier->address, align64 ? 8 : 4);
7348 scan = emit_label_after (gen_label_rtx (), scan);
7349 scan = emit_insn_after (align64 ? gen_align_8 () : gen_align_4 (), scan);
7350 scan = emit_label_after (minipool_vector_label, scan);
7352 for (mp = minipool_vector_head; mp != NULL; mp = nmp)
7354 if (mp->refcount > 0)
7356 if (dump_file)
7358 fprintf (dump_file,
7359 ";; Offset %u, min %ld, max %ld ",
7360 (unsigned) mp->offset, (unsigned long) mp->min_address,
7361 (unsigned long) mp->max_address);
7362 arm_print_value (dump_file, mp->value);
7363 fputc ('\n', dump_file);
7366 switch (mp->fix_size)
7368 #ifdef HAVE_consttable_1
7369 case 1:
7370 scan = emit_insn_after (gen_consttable_1 (mp->value), scan);
7371 break;
7373 #endif
7374 #ifdef HAVE_consttable_2
7375 case 2:
7376 scan = emit_insn_after (gen_consttable_2 (mp->value), scan);
7377 break;
7379 #endif
7380 #ifdef HAVE_consttable_4
7381 case 4:
7382 scan = emit_insn_after (gen_consttable_4 (mp->value), scan);
7383 break;
7385 #endif
7386 #ifdef HAVE_consttable_8
7387 case 8:
7388 scan = emit_insn_after (gen_consttable_8 (mp->value), scan);
7389 break;
7391 #endif
7392 default:
7393 gcc_unreachable ();
7397 nmp = mp->next;
7398 free (mp);
7401 minipool_vector_head = minipool_vector_tail = NULL;
7402 scan = emit_insn_after (gen_consttable_end (), scan);
7403 scan = emit_barrier_after (scan);
7406 /* Return the cost of forcibly inserting a barrier after INSN. */
7407 static int
7408 arm_barrier_cost (rtx insn)
7410 /* Basing the location of the pool on the loop depth is preferable,
7411 but at the moment, the basic block information seems to be
7412 corrupt by this stage of the compilation. */
7413 int base_cost = 50;
7414 rtx next = next_nonnote_insn (insn);
7416 if (next != NULL && GET_CODE (next) == CODE_LABEL)
7417 base_cost -= 20;
7419 switch (GET_CODE (insn))
7421 case CODE_LABEL:
7422 /* It will always be better to place the table before the label, rather
7423 than after it. */
7424 return 50;
7426 case INSN:
7427 case CALL_INSN:
7428 return base_cost;
7430 case JUMP_INSN:
7431 return base_cost - 10;
7433 default:
7434 return base_cost + 10;
7438 /* Find the best place in the insn stream in the range
7439 (FIX->address,MAX_ADDRESS) to forcibly insert a minipool barrier.
7440 Create the barrier by inserting a jump and add a new fix entry for
7441 it. */
7442 static Mfix *
7443 create_fix_barrier (Mfix *fix, HOST_WIDE_INT max_address)
7445 HOST_WIDE_INT count = 0;
7446 rtx barrier;
7447 rtx from = fix->insn;
7448 rtx selected = from;
7449 int selected_cost;
7450 HOST_WIDE_INT selected_address;
7451 Mfix * new_fix;
7452 HOST_WIDE_INT max_count = max_address - fix->address;
7453 rtx label = gen_label_rtx ();
7455 selected_cost = arm_barrier_cost (from);
7456 selected_address = fix->address;
7458 while (from && count < max_count)
7460 rtx tmp;
7461 int new_cost;
7463 /* This code shouldn't have been called if there was a natural barrier
7464 within range. */
7465 gcc_assert (GET_CODE (from) != BARRIER);
7467 /* Count the length of this insn. */
7468 count += get_attr_length (from);
7470 /* If there is a jump table, add its length. */
7471 tmp = is_jump_table (from);
7472 if (tmp != NULL)
7474 count += get_jump_table_size (tmp);
7476 /* Jump tables aren't in a basic block, so base the cost on
7477 the dispatch insn. If we select this location, we will
7478 still put the pool after the table. */
7479 new_cost = arm_barrier_cost (from);
7481 if (count < max_count && new_cost <= selected_cost)
7483 selected = tmp;
7484 selected_cost = new_cost;
7485 selected_address = fix->address + count;
7488 /* Continue after the dispatch table. */
7489 from = NEXT_INSN (tmp);
7490 continue;
7493 new_cost = arm_barrier_cost (from);
7495 if (count < max_count && new_cost <= selected_cost)
7497 selected = from;
7498 selected_cost = new_cost;
7499 selected_address = fix->address + count;
7502 from = NEXT_INSN (from);
7505 /* Create a new JUMP_INSN that branches around a barrier. */
7506 from = emit_jump_insn_after (gen_jump (label), selected);
7507 JUMP_LABEL (from) = label;
7508 barrier = emit_barrier_after (from);
7509 emit_label_after (label, barrier);
7511 /* Create a minipool barrier entry for the new barrier. */
7512 new_fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* new_fix));
7513 new_fix->insn = barrier;
7514 new_fix->address = selected_address;
7515 new_fix->next = fix->next;
7516 fix->next = new_fix;
7518 return new_fix;
7521 /* Record that there is a natural barrier in the insn stream at
7522 ADDRESS. */
7523 static void
7524 push_minipool_barrier (rtx insn, HOST_WIDE_INT address)
7526 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
7528 fix->insn = insn;
7529 fix->address = address;
7531 fix->next = NULL;
7532 if (minipool_fix_head != NULL)
7533 minipool_fix_tail->next = fix;
7534 else
7535 minipool_fix_head = fix;
7537 minipool_fix_tail = fix;
7540 /* Record INSN, which will need fixing up to load a value from the
7541 minipool. ADDRESS is the offset of the insn since the start of the
7542 function; LOC is a pointer to the part of the insn which requires
7543 fixing; VALUE is the constant that must be loaded, which is of type
7544 MODE. */
7545 static void
7546 push_minipool_fix (rtx insn, HOST_WIDE_INT address, rtx *loc,
7547 enum machine_mode mode, rtx value)
7549 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
7551 #ifdef AOF_ASSEMBLER
7552 /* PIC symbol references need to be converted into offsets into the
7553 based area. */
7554 /* XXX This shouldn't be done here. */
7555 if (flag_pic && GET_CODE (value) == SYMBOL_REF)
7556 value = aof_pic_entry (value);
7557 #endif /* AOF_ASSEMBLER */
7559 fix->insn = insn;
7560 fix->address = address;
7561 fix->loc = loc;
7562 fix->mode = mode;
7563 fix->fix_size = MINIPOOL_FIX_SIZE (mode);
7564 fix->value = value;
7565 fix->forwards = get_attr_pool_range (insn);
7566 fix->backwards = get_attr_neg_pool_range (insn);
7567 fix->minipool = NULL;
7569 /* If an insn doesn't have a range defined for it, then it isn't
7570 expecting to be reworked by this code. Better to stop now than
7571 to generate duff assembly code. */
7572 gcc_assert (fix->forwards || fix->backwards);
7574 /* With AAPCS/iWMMXt enabled, the pool is aligned to an 8-byte boundary.
7575 So there might be an empty word before the start of the pool.
7576 Hence we reduce the forward range by 4 to allow for this
7577 possibility. */
7578 if (ARM_DOUBLEWORD_ALIGN && fix->fix_size == 8)
7579 fix->forwards -= 4;
7581 if (dump_file)
7583 fprintf (dump_file,
7584 ";; %smode fixup for i%d; addr %lu, range (%ld,%ld): ",
7585 GET_MODE_NAME (mode),
7586 INSN_UID (insn), (unsigned long) address,
7587 -1 * (long)fix->backwards, (long)fix->forwards);
7588 arm_print_value (dump_file, fix->value);
7589 fprintf (dump_file, "\n");
7592 /* Add it to the chain of fixes. */
7593 fix->next = NULL;
7595 if (minipool_fix_head != NULL)
7596 minipool_fix_tail->next = fix;
7597 else
7598 minipool_fix_head = fix;
7600 minipool_fix_tail = fix;
7603 /* Return the cost of synthesizing a 64-bit constant VAL inline.
7604 Returns the number of insns needed, or 99 if we don't know how to
7605 do it. */
7607 arm_const_double_inline_cost (rtx val)
7609 rtx lowpart, highpart;
7610 enum machine_mode mode;
7612 mode = GET_MODE (val);
7614 if (mode == VOIDmode)
7615 mode = DImode;
7617 gcc_assert (GET_MODE_SIZE (mode) == 8);
7619 lowpart = gen_lowpart (SImode, val);
7620 highpart = gen_highpart_mode (SImode, mode, val);
7622 gcc_assert (GET_CODE (lowpart) == CONST_INT);
7623 gcc_assert (GET_CODE (highpart) == CONST_INT);
7625 return (arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (lowpart),
7626 NULL_RTX, NULL_RTX, 0, 0)
7627 + arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (highpart),
7628 NULL_RTX, NULL_RTX, 0, 0));
7631 /* Return true if it is worthwhile to split a 64-bit constant into two
7632 32-bit operations. This is the case if optimizing for size, or
7633 if we have load delay slots, or if one 32-bit part can be done with
7634 a single data operation. */
7635 bool
7636 arm_const_double_by_parts (rtx val)
7638 enum machine_mode mode = GET_MODE (val);
7639 rtx part;
7641 if (optimize_size || arm_ld_sched)
7642 return true;
7644 if (mode == VOIDmode)
7645 mode = DImode;
7647 part = gen_highpart_mode (SImode, mode, val);
7649 gcc_assert (GET_CODE (part) == CONST_INT);
7651 if (const_ok_for_arm (INTVAL (part))
7652 || const_ok_for_arm (~INTVAL (part)))
7653 return true;
7655 part = gen_lowpart (SImode, val);
7657 gcc_assert (GET_CODE (part) == CONST_INT);
7659 if (const_ok_for_arm (INTVAL (part))
7660 || const_ok_for_arm (~INTVAL (part)))
7661 return true;
7663 return false;
7666 /* Scan INSN and note any of its operands that need fixing.
7667 If DO_PUSHES is false we do not actually push any of the fixups
7668 needed. The function returns TRUE if any fixups were needed/pushed.
7669 This is used by arm_memory_load_p() which needs to know about loads
7670 of constants that will be converted into minipool loads. */
7671 static bool
7672 note_invalid_constants (rtx insn, HOST_WIDE_INT address, int do_pushes)
7674 bool result = false;
7675 int opno;
7677 extract_insn (insn);
7679 if (!constrain_operands (1))
7680 fatal_insn_not_found (insn);
7682 if (recog_data.n_alternatives == 0)
7683 return false;
7685 /* Fill in recog_op_alt with information about the constraints of
7686 this insn. */
7687 preprocess_constraints ();
7689 for (opno = 0; opno < recog_data.n_operands; opno++)
7691 /* Things we need to fix can only occur in inputs. */
7692 if (recog_data.operand_type[opno] != OP_IN)
7693 continue;
7695 /* If this alternative is a memory reference, then any mention
7696 of constants in this alternative is really to fool reload
7697 into allowing us to accept one there. We need to fix them up
7698 now so that we output the right code. */
7699 if (recog_op_alt[opno][which_alternative].memory_ok)
7701 rtx op = recog_data.operand[opno];
7703 if (CONSTANT_P (op))
7705 if (do_pushes)
7706 push_minipool_fix (insn, address, recog_data.operand_loc[opno],
7707 recog_data.operand_mode[opno], op);
7708 result = true;
7710 else if (GET_CODE (op) == MEM
7711 && GET_CODE (XEXP (op, 0)) == SYMBOL_REF
7712 && CONSTANT_POOL_ADDRESS_P (XEXP (op, 0)))
7714 if (do_pushes)
7716 rtx cop = avoid_constant_pool_reference (op);
7718 /* Casting the address of something to a mode narrower
7719 than a word can cause avoid_constant_pool_reference()
7720 to return the pool reference itself. That's no good to
7721 us here. Lets just hope that we can use the
7722 constant pool value directly. */
7723 if (op == cop)
7724 cop = get_pool_constant (XEXP (op, 0));
7726 push_minipool_fix (insn, address,
7727 recog_data.operand_loc[opno],
7728 recog_data.operand_mode[opno], cop);
7731 result = true;
7736 return result;
7739 /* Gcc puts the pool in the wrong place for ARM, since we can only
7740 load addresses a limited distance around the pc. We do some
7741 special munging to move the constant pool values to the correct
7742 point in the code. */
7743 static void
7744 arm_reorg (void)
7746 rtx insn;
7747 HOST_WIDE_INT address = 0;
7748 Mfix * fix;
7750 minipool_fix_head = minipool_fix_tail = NULL;
7752 /* The first insn must always be a note, or the code below won't
7753 scan it properly. */
7754 insn = get_insns ();
7755 gcc_assert (GET_CODE (insn) == NOTE);
7757 /* Scan all the insns and record the operands that will need fixing. */
7758 for (insn = next_nonnote_insn (insn); insn; insn = next_nonnote_insn (insn))
7760 if (TARGET_CIRRUS_FIX_INVALID_INSNS
7761 && (arm_cirrus_insn_p (insn)
7762 || GET_CODE (insn) == JUMP_INSN
7763 || arm_memory_load_p (insn)))
7764 cirrus_reorg (insn);
7766 if (GET_CODE (insn) == BARRIER)
7767 push_minipool_barrier (insn, address);
7768 else if (INSN_P (insn))
7770 rtx table;
7772 note_invalid_constants (insn, address, true);
7773 address += get_attr_length (insn);
7775 /* If the insn is a vector jump, add the size of the table
7776 and skip the table. */
7777 if ((table = is_jump_table (insn)) != NULL)
7779 address += get_jump_table_size (table);
7780 insn = table;
7785 fix = minipool_fix_head;
7787 /* Now scan the fixups and perform the required changes. */
7788 while (fix)
7790 Mfix * ftmp;
7791 Mfix * fdel;
7792 Mfix * last_added_fix;
7793 Mfix * last_barrier = NULL;
7794 Mfix * this_fix;
7796 /* Skip any further barriers before the next fix. */
7797 while (fix && GET_CODE (fix->insn) == BARRIER)
7798 fix = fix->next;
7800 /* No more fixes. */
7801 if (fix == NULL)
7802 break;
7804 last_added_fix = NULL;
7806 for (ftmp = fix; ftmp; ftmp = ftmp->next)
7808 if (GET_CODE (ftmp->insn) == BARRIER)
7810 if (ftmp->address >= minipool_vector_head->max_address)
7811 break;
7813 last_barrier = ftmp;
7815 else if ((ftmp->minipool = add_minipool_forward_ref (ftmp)) == NULL)
7816 break;
7818 last_added_fix = ftmp; /* Keep track of the last fix added. */
7821 /* If we found a barrier, drop back to that; any fixes that we
7822 could have reached but come after the barrier will now go in
7823 the next mini-pool. */
7824 if (last_barrier != NULL)
7826 /* Reduce the refcount for those fixes that won't go into this
7827 pool after all. */
7828 for (fdel = last_barrier->next;
7829 fdel && fdel != ftmp;
7830 fdel = fdel->next)
7832 fdel->minipool->refcount--;
7833 fdel->minipool = NULL;
7836 ftmp = last_barrier;
7838 else
7840 /* ftmp is first fix that we can't fit into this pool and
7841 there no natural barriers that we could use. Insert a
7842 new barrier in the code somewhere between the previous
7843 fix and this one, and arrange to jump around it. */
7844 HOST_WIDE_INT max_address;
7846 /* The last item on the list of fixes must be a barrier, so
7847 we can never run off the end of the list of fixes without
7848 last_barrier being set. */
7849 gcc_assert (ftmp);
7851 max_address = minipool_vector_head->max_address;
7852 /* Check that there isn't another fix that is in range that
7853 we couldn't fit into this pool because the pool was
7854 already too large: we need to put the pool before such an
7855 instruction. */
7856 if (ftmp->address < max_address)
7857 max_address = ftmp->address;
7859 last_barrier = create_fix_barrier (last_added_fix, max_address);
7862 assign_minipool_offsets (last_barrier);
7864 while (ftmp)
7866 if (GET_CODE (ftmp->insn) != BARRIER
7867 && ((ftmp->minipool = add_minipool_backward_ref (ftmp))
7868 == NULL))
7869 break;
7871 ftmp = ftmp->next;
7874 /* Scan over the fixes we have identified for this pool, fixing them
7875 up and adding the constants to the pool itself. */
7876 for (this_fix = fix; this_fix && ftmp != this_fix;
7877 this_fix = this_fix->next)
7878 if (GET_CODE (this_fix->insn) != BARRIER)
7880 rtx addr
7881 = plus_constant (gen_rtx_LABEL_REF (VOIDmode,
7882 minipool_vector_label),
7883 this_fix->minipool->offset);
7884 *this_fix->loc = gen_rtx_MEM (this_fix->mode, addr);
7887 dump_minipool (last_barrier->insn);
7888 fix = ftmp;
7891 /* From now on we must synthesize any constants that we can't handle
7892 directly. This can happen if the RTL gets split during final
7893 instruction generation. */
7894 after_arm_reorg = 1;
7896 /* Free the minipool memory. */
7897 obstack_free (&minipool_obstack, minipool_startobj);
7900 /* Routines to output assembly language. */
7902 /* If the rtx is the correct value then return the string of the number.
7903 In this way we can ensure that valid double constants are generated even
7904 when cross compiling. */
7905 const char *
7906 fp_immediate_constant (rtx x)
7908 REAL_VALUE_TYPE r;
7909 int i;
7911 if (!fp_consts_inited)
7912 init_fp_table ();
7914 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
7915 for (i = 0; i < 8; i++)
7916 if (REAL_VALUES_EQUAL (r, values_fp[i]))
7917 return strings_fp[i];
7919 gcc_unreachable ();
7922 /* As for fp_immediate_constant, but value is passed directly, not in rtx. */
7923 static const char *
7924 fp_const_from_val (REAL_VALUE_TYPE *r)
7926 int i;
7928 if (!fp_consts_inited)
7929 init_fp_table ();
7931 for (i = 0; i < 8; i++)
7932 if (REAL_VALUES_EQUAL (*r, values_fp[i]))
7933 return strings_fp[i];
7935 gcc_unreachable ();
7938 /* Output the operands of a LDM/STM instruction to STREAM.
7939 MASK is the ARM register set mask of which only bits 0-15 are important.
7940 REG is the base register, either the frame pointer or the stack pointer,
7941 INSTR is the possibly suffixed load or store instruction. */
7943 static void
7944 print_multi_reg (FILE *stream, const char *instr, unsigned reg,
7945 unsigned long mask)
7947 unsigned i;
7948 bool not_first = FALSE;
7950 fputc ('\t', stream);
7951 asm_fprintf (stream, instr, reg);
7952 fputs (", {", stream);
7954 for (i = 0; i <= LAST_ARM_REGNUM; i++)
7955 if (mask & (1 << i))
7957 if (not_first)
7958 fprintf (stream, ", ");
7960 asm_fprintf (stream, "%r", i);
7961 not_first = TRUE;
7964 fprintf (stream, "}\n");
7968 /* Output a FLDMX instruction to STREAM.
7969 BASE if the register containing the address.
7970 REG and COUNT specify the register range.
7971 Extra registers may be added to avoid hardware bugs. */
7973 static void
7974 arm_output_fldmx (FILE * stream, unsigned int base, int reg, int count)
7976 int i;
7978 /* Workaround ARM10 VFPr1 bug. */
7979 if (count == 2 && !arm_arch6)
7981 if (reg == 15)
7982 reg--;
7983 count++;
7986 fputc ('\t', stream);
7987 asm_fprintf (stream, "fldmfdx\t%r!, {", base);
7989 for (i = reg; i < reg + count; i++)
7991 if (i > reg)
7992 fputs (", ", stream);
7993 asm_fprintf (stream, "d%d", i);
7995 fputs ("}\n", stream);
8000 /* Output the assembly for a store multiple. */
8002 const char *
8003 vfp_output_fstmx (rtx * operands)
8005 char pattern[100];
8006 int p;
8007 int base;
8008 int i;
8010 strcpy (pattern, "fstmfdx\t%m0!, {%P1");
8011 p = strlen (pattern);
8013 gcc_assert (GET_CODE (operands[1]) == REG);
8015 base = (REGNO (operands[1]) - FIRST_VFP_REGNUM) / 2;
8016 for (i = 1; i < XVECLEN (operands[2], 0); i++)
8018 p += sprintf (&pattern[p], ", d%d", base + i);
8020 strcpy (&pattern[p], "}");
8022 output_asm_insn (pattern, operands);
8023 return "";
8027 /* Emit RTL to save block of VFP register pairs to the stack. Returns the
8028 number of bytes pushed. */
8030 static int
8031 vfp_emit_fstmx (int base_reg, int count)
8033 rtx par;
8034 rtx dwarf;
8035 rtx tmp, reg;
8036 int i;
8038 /* Workaround ARM10 VFPr1 bug. Data corruption can occur when exactly two
8039 register pairs are stored by a store multiple insn. We avoid this
8040 by pushing an extra pair. */
8041 if (count == 2 && !arm_arch6)
8043 if (base_reg == LAST_VFP_REGNUM - 3)
8044 base_reg -= 2;
8045 count++;
8048 /* ??? The frame layout is implementation defined. We describe
8049 standard format 1 (equivalent to a FSTMD insn and unused pad word).
8050 We really need some way of representing the whole block so that the
8051 unwinder can figure it out at runtime. */
8052 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
8053 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1));
8055 reg = gen_rtx_REG (DFmode, base_reg);
8056 base_reg += 2;
8058 XVECEXP (par, 0, 0)
8059 = gen_rtx_SET (VOIDmode,
8060 gen_rtx_MEM (BLKmode,
8061 gen_rtx_PRE_DEC (BLKmode, stack_pointer_rtx)),
8062 gen_rtx_UNSPEC (BLKmode,
8063 gen_rtvec (1, reg),
8064 UNSPEC_PUSH_MULT));
8066 tmp = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
8067 gen_rtx_PLUS (SImode, stack_pointer_rtx,
8068 GEN_INT (-(count * 8 + 4))));
8069 RTX_FRAME_RELATED_P (tmp) = 1;
8070 XVECEXP (dwarf, 0, 0) = tmp;
8072 tmp = gen_rtx_SET (VOIDmode,
8073 gen_rtx_MEM (DFmode, stack_pointer_rtx),
8074 reg);
8075 RTX_FRAME_RELATED_P (tmp) = 1;
8076 XVECEXP (dwarf, 0, 1) = tmp;
8078 for (i = 1; i < count; i++)
8080 reg = gen_rtx_REG (DFmode, base_reg);
8081 base_reg += 2;
8082 XVECEXP (par, 0, i) = gen_rtx_USE (VOIDmode, reg);
8084 tmp = gen_rtx_SET (VOIDmode,
8085 gen_rtx_MEM (DFmode,
8086 gen_rtx_PLUS (SImode,
8087 stack_pointer_rtx,
8088 GEN_INT (i * 8))),
8089 reg);
8090 RTX_FRAME_RELATED_P (tmp) = 1;
8091 XVECEXP (dwarf, 0, i + 1) = tmp;
8094 par = emit_insn (par);
8095 REG_NOTES (par) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, dwarf,
8096 REG_NOTES (par));
8097 RTX_FRAME_RELATED_P (par) = 1;
8099 return count * 8 + 4;
8103 /* Output a 'call' insn. */
8104 const char *
8105 output_call (rtx *operands)
8107 gcc_assert (!arm_arch5); /* Patterns should call blx <reg> directly. */
8109 /* Handle calls to lr using ip (which may be clobbered in subr anyway). */
8110 if (REGNO (operands[0]) == LR_REGNUM)
8112 operands[0] = gen_rtx_REG (SImode, IP_REGNUM);
8113 output_asm_insn ("mov%?\t%0, %|lr", operands);
8116 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
8118 if (TARGET_INTERWORK || arm_arch4t)
8119 output_asm_insn ("bx%?\t%0", operands);
8120 else
8121 output_asm_insn ("mov%?\t%|pc, %0", operands);
8123 return "";
8126 /* Output a 'call' insn that is a reference in memory. */
8127 const char *
8128 output_call_mem (rtx *operands)
8130 if (TARGET_INTERWORK && !arm_arch5)
8132 output_asm_insn ("ldr%?\t%|ip, %0", operands);
8133 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
8134 output_asm_insn ("bx%?\t%|ip", operands);
8136 else if (regno_use_in (LR_REGNUM, operands[0]))
8138 /* LR is used in the memory address. We load the address in the
8139 first instruction. It's safe to use IP as the target of the
8140 load since the call will kill it anyway. */
8141 output_asm_insn ("ldr%?\t%|ip, %0", operands);
8142 if (arm_arch5)
8143 output_asm_insn ("blx%?\t%|ip", operands);
8144 else
8146 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
8147 if (arm_arch4t)
8148 output_asm_insn ("bx%?\t%|ip", operands);
8149 else
8150 output_asm_insn ("mov%?\t%|pc, %|ip", operands);
8153 else
8155 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
8156 output_asm_insn ("ldr%?\t%|pc, %0", operands);
8159 return "";
8163 /* Output a move from arm registers to an fpa registers.
8164 OPERANDS[0] is an fpa register.
8165 OPERANDS[1] is the first registers of an arm register pair. */
8166 const char *
8167 output_mov_long_double_fpa_from_arm (rtx *operands)
8169 int arm_reg0 = REGNO (operands[1]);
8170 rtx ops[3];
8172 gcc_assert (arm_reg0 != IP_REGNUM);
8174 ops[0] = gen_rtx_REG (SImode, arm_reg0);
8175 ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0);
8176 ops[2] = gen_rtx_REG (SImode, 2 + arm_reg0);
8178 output_asm_insn ("stm%?fd\t%|sp!, {%0, %1, %2}", ops);
8179 output_asm_insn ("ldf%?e\t%0, [%|sp], #12", operands);
8181 return "";
8184 /* Output a move from an fpa register to arm registers.
8185 OPERANDS[0] is the first registers of an arm register pair.
8186 OPERANDS[1] is an fpa register. */
8187 const char *
8188 output_mov_long_double_arm_from_fpa (rtx *operands)
8190 int arm_reg0 = REGNO (operands[0]);
8191 rtx ops[3];
8193 gcc_assert (arm_reg0 != IP_REGNUM);
8195 ops[0] = gen_rtx_REG (SImode, arm_reg0);
8196 ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0);
8197 ops[2] = gen_rtx_REG (SImode, 2 + arm_reg0);
8199 output_asm_insn ("stf%?e\t%1, [%|sp, #-12]!", operands);
8200 output_asm_insn ("ldm%?fd\t%|sp!, {%0, %1, %2}", ops);
8201 return "";
8204 /* Output a move from arm registers to arm registers of a long double
8205 OPERANDS[0] is the destination.
8206 OPERANDS[1] is the source. */
8207 const char *
8208 output_mov_long_double_arm_from_arm (rtx *operands)
8210 /* We have to be careful here because the two might overlap. */
8211 int dest_start = REGNO (operands[0]);
8212 int src_start = REGNO (operands[1]);
8213 rtx ops[2];
8214 int i;
8216 if (dest_start < src_start)
8218 for (i = 0; i < 3; i++)
8220 ops[0] = gen_rtx_REG (SImode, dest_start + i);
8221 ops[1] = gen_rtx_REG (SImode, src_start + i);
8222 output_asm_insn ("mov%?\t%0, %1", ops);
8225 else
8227 for (i = 2; i >= 0; i--)
8229 ops[0] = gen_rtx_REG (SImode, dest_start + i);
8230 ops[1] = gen_rtx_REG (SImode, src_start + i);
8231 output_asm_insn ("mov%?\t%0, %1", ops);
8235 return "";
8239 /* Output a move from arm registers to an fpa registers.
8240 OPERANDS[0] is an fpa register.
8241 OPERANDS[1] is the first registers of an arm register pair. */
8242 const char *
8243 output_mov_double_fpa_from_arm (rtx *operands)
8245 int arm_reg0 = REGNO (operands[1]);
8246 rtx ops[2];
8248 gcc_assert (arm_reg0 != IP_REGNUM);
8250 ops[0] = gen_rtx_REG (SImode, arm_reg0);
8251 ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0);
8252 output_asm_insn ("stm%?fd\t%|sp!, {%0, %1}", ops);
8253 output_asm_insn ("ldf%?d\t%0, [%|sp], #8", operands);
8254 return "";
8257 /* Output a move from an fpa register to arm registers.
8258 OPERANDS[0] is the first registers of an arm register pair.
8259 OPERANDS[1] is an fpa register. */
8260 const char *
8261 output_mov_double_arm_from_fpa (rtx *operands)
8263 int arm_reg0 = REGNO (operands[0]);
8264 rtx ops[2];
8266 gcc_assert (arm_reg0 != IP_REGNUM);
8268 ops[0] = gen_rtx_REG (SImode, arm_reg0);
8269 ops[1] = gen_rtx_REG (SImode, 1 + arm_reg0);
8270 output_asm_insn ("stf%?d\t%1, [%|sp, #-8]!", operands);
8271 output_asm_insn ("ldm%?fd\t%|sp!, {%0, %1}", ops);
8272 return "";
8275 /* Output a move between double words.
8276 It must be REG<-REG, REG<-CONST_DOUBLE, REG<-CONST_INT, REG<-MEM
8277 or MEM<-REG and all MEMs must be offsettable addresses. */
8278 const char *
8279 output_move_double (rtx *operands)
8281 enum rtx_code code0 = GET_CODE (operands[0]);
8282 enum rtx_code code1 = GET_CODE (operands[1]);
8283 rtx otherops[3];
8285 if (code0 == REG)
8287 int reg0 = REGNO (operands[0]);
8289 otherops[0] = gen_rtx_REG (SImode, 1 + reg0);
8291 gcc_assert (code1 == MEM); /* Constraints should ensure this. */
8293 switch (GET_CODE (XEXP (operands[1], 0)))
8295 case REG:
8296 output_asm_insn ("ldm%?ia\t%m1, %M0", operands);
8297 break;
8299 case PRE_INC:
8300 gcc_assert (TARGET_LDRD);
8301 output_asm_insn ("ldr%?d\t%0, [%m1, #8]!", operands);
8302 break;
8304 case PRE_DEC:
8305 output_asm_insn ("ldm%?db\t%m1!, %M0", operands);
8306 break;
8308 case POST_INC:
8309 output_asm_insn ("ldm%?ia\t%m1!, %M0", operands);
8310 break;
8312 case POST_DEC:
8313 gcc_assert (TARGET_LDRD);
8314 output_asm_insn ("ldr%?d\t%0, [%m1], #-8", operands);
8315 break;
8317 case PRE_MODIFY:
8318 case POST_MODIFY:
8319 otherops[0] = operands[0];
8320 otherops[1] = XEXP (XEXP (XEXP (operands[1], 0), 1), 0);
8321 otherops[2] = XEXP (XEXP (XEXP (operands[1], 0), 1), 1);
8323 if (GET_CODE (XEXP (operands[1], 0)) == PRE_MODIFY)
8325 if (reg_overlap_mentioned_p (otherops[0], otherops[2]))
8327 /* Registers overlap so split out the increment. */
8328 output_asm_insn ("add%?\t%1, %1, %2", otherops);
8329 output_asm_insn ("ldr%?d\t%0, [%1] @split", otherops);
8331 else
8332 output_asm_insn ("ldr%?d\t%0, [%1, %2]!", otherops);
8334 else
8336 /* We only allow constant increments, so this is safe. */
8337 output_asm_insn ("ldr%?d\t%0, [%1], %2", otherops);
8339 break;
8341 case LABEL_REF:
8342 case CONST:
8343 output_asm_insn ("adr%?\t%0, %1", operands);
8344 output_asm_insn ("ldm%?ia\t%0, %M0", operands);
8345 break;
8347 default:
8348 if (arm_add_operand (XEXP (XEXP (operands[1], 0), 1),
8349 GET_MODE (XEXP (XEXP (operands[1], 0), 1))))
8351 otherops[0] = operands[0];
8352 otherops[1] = XEXP (XEXP (operands[1], 0), 0);
8353 otherops[2] = XEXP (XEXP (operands[1], 0), 1);
8355 if (GET_CODE (XEXP (operands[1], 0)) == PLUS)
8357 if (GET_CODE (otherops[2]) == CONST_INT)
8359 switch ((int) INTVAL (otherops[2]))
8361 case -8:
8362 output_asm_insn ("ldm%?db\t%1, %M0", otherops);
8363 return "";
8364 case -4:
8365 output_asm_insn ("ldm%?da\t%1, %M0", otherops);
8366 return "";
8367 case 4:
8368 output_asm_insn ("ldm%?ib\t%1, %M0", otherops);
8369 return "";
8372 if (TARGET_LDRD
8373 && (GET_CODE (otherops[2]) == REG
8374 || (GET_CODE (otherops[2]) == CONST_INT
8375 && INTVAL (otherops[2]) > -256
8376 && INTVAL (otherops[2]) < 256)))
8378 if (reg_overlap_mentioned_p (otherops[0],
8379 otherops[2]))
8381 /* Swap base and index registers over to
8382 avoid a conflict. */
8383 otherops[1] = XEXP (XEXP (operands[1], 0), 1);
8384 otherops[2] = XEXP (XEXP (operands[1], 0), 0);
8387 /* If both registers conflict, it will usually
8388 have been fixed by a splitter. */
8389 if (reg_overlap_mentioned_p (otherops[0], otherops[2]))
8391 output_asm_insn ("add%?\t%1, %1, %2", otherops);
8392 output_asm_insn ("ldr%?d\t%0, [%1]",
8393 otherops);
8395 else
8396 output_asm_insn ("ldr%?d\t%0, [%1, %2]", otherops);
8397 return "";
8400 if (GET_CODE (otherops[2]) == CONST_INT)
8402 if (!(const_ok_for_arm (INTVAL (otherops[2]))))
8403 output_asm_insn ("sub%?\t%0, %1, #%n2", otherops);
8404 else
8405 output_asm_insn ("add%?\t%0, %1, %2", otherops);
8407 else
8408 output_asm_insn ("add%?\t%0, %1, %2", otherops);
8410 else
8411 output_asm_insn ("sub%?\t%0, %1, %2", otherops);
8413 return "ldm%?ia\t%0, %M0";
8415 else
8417 otherops[1] = adjust_address (operands[1], SImode, 4);
8418 /* Take care of overlapping base/data reg. */
8419 if (reg_mentioned_p (operands[0], operands[1]))
8421 output_asm_insn ("ldr%?\t%0, %1", otherops);
8422 output_asm_insn ("ldr%?\t%0, %1", operands);
8424 else
8426 output_asm_insn ("ldr%?\t%0, %1", operands);
8427 output_asm_insn ("ldr%?\t%0, %1", otherops);
8432 else
8434 /* Constraints should ensure this. */
8435 gcc_assert (code0 == MEM && code1 == REG);
8436 gcc_assert (REGNO (operands[1]) != IP_REGNUM);
8438 switch (GET_CODE (XEXP (operands[0], 0)))
8440 case REG:
8441 output_asm_insn ("stm%?ia\t%m0, %M1", operands);
8442 break;
8444 case PRE_INC:
8445 gcc_assert (TARGET_LDRD);
8446 output_asm_insn ("str%?d\t%1, [%m0, #8]!", operands);
8447 break;
8449 case PRE_DEC:
8450 output_asm_insn ("stm%?db\t%m0!, %M1", operands);
8451 break;
8453 case POST_INC:
8454 output_asm_insn ("stm%?ia\t%m0!, %M1", operands);
8455 break;
8457 case POST_DEC:
8458 gcc_assert (TARGET_LDRD);
8459 output_asm_insn ("str%?d\t%1, [%m0], #-8", operands);
8460 break;
8462 case PRE_MODIFY:
8463 case POST_MODIFY:
8464 otherops[0] = operands[1];
8465 otherops[1] = XEXP (XEXP (XEXP (operands[0], 0), 1), 0);
8466 otherops[2] = XEXP (XEXP (XEXP (operands[0], 0), 1), 1);
8468 if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
8469 output_asm_insn ("str%?d\t%0, [%1, %2]!", otherops);
8470 else
8471 output_asm_insn ("str%?d\t%0, [%1], %2", otherops);
8472 break;
8474 case PLUS:
8475 otherops[2] = XEXP (XEXP (operands[0], 0), 1);
8476 if (GET_CODE (otherops[2]) == CONST_INT)
8478 switch ((int) INTVAL (XEXP (XEXP (operands[0], 0), 1)))
8480 case -8:
8481 output_asm_insn ("stm%?db\t%m0, %M1", operands);
8482 return "";
8484 case -4:
8485 output_asm_insn ("stm%?da\t%m0, %M1", operands);
8486 return "";
8488 case 4:
8489 output_asm_insn ("stm%?ib\t%m0, %M1", operands);
8490 return "";
8493 if (TARGET_LDRD
8494 && (GET_CODE (otherops[2]) == REG
8495 || (GET_CODE (otherops[2]) == CONST_INT
8496 && INTVAL (otherops[2]) > -256
8497 && INTVAL (otherops[2]) < 256)))
8499 otherops[0] = operands[1];
8500 otherops[1] = XEXP (XEXP (operands[0], 0), 0);
8501 output_asm_insn ("str%?d\t%0, [%1, %2]", otherops);
8502 return "";
8504 /* Fall through */
8506 default:
8507 otherops[0] = adjust_address (operands[0], SImode, 4);
8508 otherops[1] = gen_rtx_REG (SImode, 1 + REGNO (operands[1]));
8509 output_asm_insn ("str%?\t%1, %0", operands);
8510 output_asm_insn ("str%?\t%1, %0", otherops);
8514 return "";
8517 /* Output an ADD r, s, #n where n may be too big for one instruction.
8518 If adding zero to one register, output nothing. */
8519 const char *
8520 output_add_immediate (rtx *operands)
8522 HOST_WIDE_INT n = INTVAL (operands[2]);
8524 if (n != 0 || REGNO (operands[0]) != REGNO (operands[1]))
8526 if (n < 0)
8527 output_multi_immediate (operands,
8528 "sub%?\t%0, %1, %2", "sub%?\t%0, %0, %2", 2,
8529 -n);
8530 else
8531 output_multi_immediate (operands,
8532 "add%?\t%0, %1, %2", "add%?\t%0, %0, %2", 2,
8536 return "";
8539 /* Output a multiple immediate operation.
8540 OPERANDS is the vector of operands referred to in the output patterns.
8541 INSTR1 is the output pattern to use for the first constant.
8542 INSTR2 is the output pattern to use for subsequent constants.
8543 IMMED_OP is the index of the constant slot in OPERANDS.
8544 N is the constant value. */
8545 static const char *
8546 output_multi_immediate (rtx *operands, const char *instr1, const char *instr2,
8547 int immed_op, HOST_WIDE_INT n)
8549 #if HOST_BITS_PER_WIDE_INT > 32
8550 n &= 0xffffffff;
8551 #endif
8553 if (n == 0)
8555 /* Quick and easy output. */
8556 operands[immed_op] = const0_rtx;
8557 output_asm_insn (instr1, operands);
8559 else
8561 int i;
8562 const char * instr = instr1;
8564 /* Note that n is never zero here (which would give no output). */
8565 for (i = 0; i < 32; i += 2)
8567 if (n & (3 << i))
8569 operands[immed_op] = GEN_INT (n & (255 << i));
8570 output_asm_insn (instr, operands);
8571 instr = instr2;
8572 i += 6;
8577 return "";
8580 /* Return the appropriate ARM instruction for the operation code.
8581 The returned result should not be overwritten. OP is the rtx of the
8582 operation. SHIFT_FIRST_ARG is TRUE if the first argument of the operator
8583 was shifted. */
8584 const char *
8585 arithmetic_instr (rtx op, int shift_first_arg)
8587 switch (GET_CODE (op))
8589 case PLUS:
8590 return "add";
8592 case MINUS:
8593 return shift_first_arg ? "rsb" : "sub";
8595 case IOR:
8596 return "orr";
8598 case XOR:
8599 return "eor";
8601 case AND:
8602 return "and";
8604 default:
8605 gcc_unreachable ();
8609 /* Ensure valid constant shifts and return the appropriate shift mnemonic
8610 for the operation code. The returned result should not be overwritten.
8611 OP is the rtx code of the shift.
8612 On exit, *AMOUNTP will be -1 if the shift is by a register, or a constant
8613 shift. */
8614 static const char *
8615 shift_op (rtx op, HOST_WIDE_INT *amountp)
8617 const char * mnem;
8618 enum rtx_code code = GET_CODE (op);
8620 switch (GET_CODE (XEXP (op, 1)))
8622 case REG:
8623 case SUBREG:
8624 *amountp = -1;
8625 break;
8627 case CONST_INT:
8628 *amountp = INTVAL (XEXP (op, 1));
8629 break;
8631 default:
8632 gcc_unreachable ();
8635 switch (code)
8637 case ASHIFT:
8638 mnem = "asl";
8639 break;
8641 case ASHIFTRT:
8642 mnem = "asr";
8643 break;
8645 case LSHIFTRT:
8646 mnem = "lsr";
8647 break;
8649 case ROTATE:
8650 gcc_assert (*amountp != -1);
8651 *amountp = 32 - *amountp;
8653 /* Fall through. */
8655 case ROTATERT:
8656 mnem = "ror";
8657 break;
8659 case MULT:
8660 /* We never have to worry about the amount being other than a
8661 power of 2, since this case can never be reloaded from a reg. */
8662 gcc_assert (*amountp != -1);
8663 *amountp = int_log2 (*amountp);
8664 return "asl";
8666 default:
8667 gcc_unreachable ();
8670 if (*amountp != -1)
8672 /* This is not 100% correct, but follows from the desire to merge
8673 multiplication by a power of 2 with the recognizer for a
8674 shift. >=32 is not a valid shift for "asl", so we must try and
8675 output a shift that produces the correct arithmetical result.
8676 Using lsr #32 is identical except for the fact that the carry bit
8677 is not set correctly if we set the flags; but we never use the
8678 carry bit from such an operation, so we can ignore that. */
8679 if (code == ROTATERT)
8680 /* Rotate is just modulo 32. */
8681 *amountp &= 31;
8682 else if (*amountp != (*amountp & 31))
8684 if (code == ASHIFT)
8685 mnem = "lsr";
8686 *amountp = 32;
8689 /* Shifts of 0 are no-ops. */
8690 if (*amountp == 0)
8691 return NULL;
8694 return mnem;
8697 /* Obtain the shift from the POWER of two. */
8699 static HOST_WIDE_INT
8700 int_log2 (HOST_WIDE_INT power)
8702 HOST_WIDE_INT shift = 0;
8704 while ((((HOST_WIDE_INT) 1 << shift) & power) == 0)
8706 gcc_assert (shift <= 31);
8707 shift++;
8710 return shift;
8713 /* Output a .ascii pseudo-op, keeping track of lengths. This is
8714 because /bin/as is horribly restrictive. The judgement about
8715 whether or not each character is 'printable' (and can be output as
8716 is) or not (and must be printed with an octal escape) must be made
8717 with reference to the *host* character set -- the situation is
8718 similar to that discussed in the comments above pp_c_char in
8719 c-pretty-print.c. */
8721 #define MAX_ASCII_LEN 51
8723 void
8724 output_ascii_pseudo_op (FILE *stream, const unsigned char *p, int len)
8726 int i;
8727 int len_so_far = 0;
8729 fputs ("\t.ascii\t\"", stream);
8731 for (i = 0; i < len; i++)
8733 int c = p[i];
8735 if (len_so_far >= MAX_ASCII_LEN)
8737 fputs ("\"\n\t.ascii\t\"", stream);
8738 len_so_far = 0;
8741 if (ISPRINT (c))
8743 if (c == '\\' || c == '\"')
8745 putc ('\\', stream);
8746 len_so_far++;
8748 putc (c, stream);
8749 len_so_far++;
8751 else
8753 fprintf (stream, "\\%03o", c);
8754 len_so_far += 4;
8758 fputs ("\"\n", stream);
8761 /* Compute the register save mask for registers 0 through 12
8762 inclusive. This code is used by arm_compute_save_reg_mask. */
8764 static unsigned long
8765 arm_compute_save_reg0_reg12_mask (void)
8767 unsigned long func_type = arm_current_func_type ();
8768 unsigned long save_reg_mask = 0;
8769 unsigned int reg;
8771 if (IS_INTERRUPT (func_type))
8773 unsigned int max_reg;
8774 /* Interrupt functions must not corrupt any registers,
8775 even call clobbered ones. If this is a leaf function
8776 we can just examine the registers used by the RTL, but
8777 otherwise we have to assume that whatever function is
8778 called might clobber anything, and so we have to save
8779 all the call-clobbered registers as well. */
8780 if (ARM_FUNC_TYPE (func_type) == ARM_FT_FIQ)
8781 /* FIQ handlers have registers r8 - r12 banked, so
8782 we only need to check r0 - r7, Normal ISRs only
8783 bank r14 and r15, so we must check up to r12.
8784 r13 is the stack pointer which is always preserved,
8785 so we do not need to consider it here. */
8786 max_reg = 7;
8787 else
8788 max_reg = 12;
8790 for (reg = 0; reg <= max_reg; reg++)
8791 if (regs_ever_live[reg]
8792 || (! current_function_is_leaf && call_used_regs [reg]))
8793 save_reg_mask |= (1 << reg);
8795 /* Also save the pic base register if necessary. */
8796 if (flag_pic
8797 && !TARGET_SINGLE_PIC_BASE
8798 && current_function_uses_pic_offset_table)
8799 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
8801 else
8803 /* In the normal case we only need to save those registers
8804 which are call saved and which are used by this function. */
8805 for (reg = 0; reg <= 10; reg++)
8806 if (regs_ever_live[reg] && ! call_used_regs [reg])
8807 save_reg_mask |= (1 << reg);
8809 /* Handle the frame pointer as a special case. */
8810 if (! TARGET_APCS_FRAME
8811 && ! frame_pointer_needed
8812 && regs_ever_live[HARD_FRAME_POINTER_REGNUM]
8813 && ! call_used_regs[HARD_FRAME_POINTER_REGNUM])
8814 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
8816 /* If we aren't loading the PIC register,
8817 don't stack it even though it may be live. */
8818 if (flag_pic
8819 && !TARGET_SINGLE_PIC_BASE
8820 && (regs_ever_live[PIC_OFFSET_TABLE_REGNUM]
8821 || current_function_uses_pic_offset_table))
8822 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
8825 /* Save registers so the exception handler can modify them. */
8826 if (current_function_calls_eh_return)
8828 unsigned int i;
8830 for (i = 0; ; i++)
8832 reg = EH_RETURN_DATA_REGNO (i);
8833 if (reg == INVALID_REGNUM)
8834 break;
8835 save_reg_mask |= 1 << reg;
8839 return save_reg_mask;
8842 /* Compute a bit mask of which registers need to be
8843 saved on the stack for the current function. */
8845 static unsigned long
8846 arm_compute_save_reg_mask (void)
8848 unsigned int save_reg_mask = 0;
8849 unsigned long func_type = arm_current_func_type ();
8851 if (IS_NAKED (func_type))
8852 /* This should never really happen. */
8853 return 0;
8855 /* If we are creating a stack frame, then we must save the frame pointer,
8856 IP (which will hold the old stack pointer), LR and the PC. */
8857 if (frame_pointer_needed)
8858 save_reg_mask |=
8859 (1 << ARM_HARD_FRAME_POINTER_REGNUM)
8860 | (1 << IP_REGNUM)
8861 | (1 << LR_REGNUM)
8862 | (1 << PC_REGNUM);
8864 /* Volatile functions do not return, so there
8865 is no need to save any other registers. */
8866 if (IS_VOLATILE (func_type))
8867 return save_reg_mask;
8869 save_reg_mask |= arm_compute_save_reg0_reg12_mask ();
8871 /* Decide if we need to save the link register.
8872 Interrupt routines have their own banked link register,
8873 so they never need to save it.
8874 Otherwise if we do not use the link register we do not need to save
8875 it. If we are pushing other registers onto the stack however, we
8876 can save an instruction in the epilogue by pushing the link register
8877 now and then popping it back into the PC. This incurs extra memory
8878 accesses though, so we only do it when optimizing for size, and only
8879 if we know that we will not need a fancy return sequence. */
8880 if (regs_ever_live [LR_REGNUM]
8881 || (save_reg_mask
8882 && optimize_size
8883 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
8884 && !current_function_calls_eh_return))
8885 save_reg_mask |= 1 << LR_REGNUM;
8887 if (cfun->machine->lr_save_eliminated)
8888 save_reg_mask &= ~ (1 << LR_REGNUM);
8890 if (TARGET_REALLY_IWMMXT
8891 && ((bit_count (save_reg_mask)
8892 + ARM_NUM_INTS (current_function_pretend_args_size)) % 2) != 0)
8894 unsigned int reg;
8896 /* The total number of registers that are going to be pushed
8897 onto the stack is odd. We need to ensure that the stack
8898 is 64-bit aligned before we start to save iWMMXt registers,
8899 and also before we start to create locals. (A local variable
8900 might be a double or long long which we will load/store using
8901 an iWMMXt instruction). Therefore we need to push another
8902 ARM register, so that the stack will be 64-bit aligned. We
8903 try to avoid using the arg registers (r0 -r3) as they might be
8904 used to pass values in a tail call. */
8905 for (reg = 4; reg <= 12; reg++)
8906 if ((save_reg_mask & (1 << reg)) == 0)
8907 break;
8909 if (reg <= 12)
8910 save_reg_mask |= (1 << reg);
8911 else
8913 cfun->machine->sibcall_blocked = 1;
8914 save_reg_mask |= (1 << 3);
8918 return save_reg_mask;
8922 /* Compute a bit mask of which registers need to be
8923 saved on the stack for the current function. */
8924 static unsigned long
8925 thumb_compute_save_reg_mask (void)
8927 unsigned long mask;
8928 unsigned reg;
8930 mask = 0;
8931 for (reg = 0; reg < 12; reg ++)
8932 if (regs_ever_live[reg] && !call_used_regs[reg])
8933 mask |= 1 << reg;
8935 if (flag_pic && !TARGET_SINGLE_PIC_BASE)
8936 mask |= (1 << PIC_OFFSET_TABLE_REGNUM);
8938 if (TARGET_SINGLE_PIC_BASE)
8939 mask &= ~(1 << arm_pic_register);
8941 /* See if we might need r11 for calls to _interwork_r11_call_via_rN(). */
8942 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
8943 mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
8945 /* LR will also be pushed if any lo regs are pushed. */
8946 if (mask & 0xff || thumb_force_lr_save ())
8947 mask |= (1 << LR_REGNUM);
8949 /* Make sure we have a low work register if we need one.
8950 We will need one if we are going to push a high register,
8951 but we are not currently intending to push a low register. */
8952 if ((mask & 0xff) == 0
8953 && ((mask & 0x0f00) || TARGET_BACKTRACE))
8955 /* Use thumb_find_work_register to choose which register
8956 we will use. If the register is live then we will
8957 have to push it. Use LAST_LO_REGNUM as our fallback
8958 choice for the register to select. */
8959 reg = thumb_find_work_register (1 << LAST_LO_REGNUM);
8961 if (! call_used_regs[reg])
8962 mask |= 1 << reg;
8965 return mask;
8969 /* Return the number of bytes required to save VFP registers. */
8970 static int
8971 arm_get_vfp_saved_size (void)
8973 unsigned int regno;
8974 int count;
8975 int saved;
8977 saved = 0;
8978 /* Space for saved VFP registers. */
8979 if (TARGET_HARD_FLOAT && TARGET_VFP)
8981 count = 0;
8982 for (regno = FIRST_VFP_REGNUM;
8983 regno < LAST_VFP_REGNUM;
8984 regno += 2)
8986 if ((!regs_ever_live[regno] || call_used_regs[regno])
8987 && (!regs_ever_live[regno + 1] || call_used_regs[regno + 1]))
8989 if (count > 0)
8991 /* Workaround ARM10 VFPr1 bug. */
8992 if (count == 2 && !arm_arch6)
8993 count++;
8994 saved += count * 8 + 4;
8996 count = 0;
8998 else
8999 count++;
9001 if (count > 0)
9003 if (count == 2 && !arm_arch6)
9004 count++;
9005 saved += count * 8 + 4;
9008 return saved;
9012 /* Generate a function exit sequence. If REALLY_RETURN is false, then do
9013 everything bar the final return instruction. */
9014 const char *
9015 output_return_instruction (rtx operand, int really_return, int reverse)
9017 char conditional[10];
9018 char instr[100];
9019 unsigned reg;
9020 unsigned long live_regs_mask;
9021 unsigned long func_type;
9022 arm_stack_offsets *offsets;
9024 func_type = arm_current_func_type ();
9026 if (IS_NAKED (func_type))
9027 return "";
9029 if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)
9031 /* If this function was declared non-returning, and we have
9032 found a tail call, then we have to trust that the called
9033 function won't return. */
9034 if (really_return)
9036 rtx ops[2];
9038 /* Otherwise, trap an attempted return by aborting. */
9039 ops[0] = operand;
9040 ops[1] = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)"
9041 : "abort");
9042 assemble_external_libcall (ops[1]);
9043 output_asm_insn (reverse ? "bl%D0\t%a1" : "bl%d0\t%a1", ops);
9046 return "";
9049 gcc_assert (!current_function_calls_alloca || really_return);
9051 sprintf (conditional, "%%?%%%c0", reverse ? 'D' : 'd');
9053 return_used_this_function = 1;
9055 live_regs_mask = arm_compute_save_reg_mask ();
9057 if (live_regs_mask)
9059 const char * return_reg;
9061 /* If we do not have any special requirements for function exit
9062 (e.g. interworking, or ISR) then we can load the return address
9063 directly into the PC. Otherwise we must load it into LR. */
9064 if (really_return
9065 && ! TARGET_INTERWORK)
9066 return_reg = reg_names[PC_REGNUM];
9067 else
9068 return_reg = reg_names[LR_REGNUM];
9070 if ((live_regs_mask & (1 << IP_REGNUM)) == (1 << IP_REGNUM))
9072 /* There are three possible reasons for the IP register
9073 being saved. 1) a stack frame was created, in which case
9074 IP contains the old stack pointer, or 2) an ISR routine
9075 corrupted it, or 3) it was saved to align the stack on
9076 iWMMXt. In case 1, restore IP into SP, otherwise just
9077 restore IP. */
9078 if (frame_pointer_needed)
9080 live_regs_mask &= ~ (1 << IP_REGNUM);
9081 live_regs_mask |= (1 << SP_REGNUM);
9083 else
9084 gcc_assert (IS_INTERRUPT (func_type) || TARGET_REALLY_IWMMXT);
9087 /* On some ARM architectures it is faster to use LDR rather than
9088 LDM to load a single register. On other architectures, the
9089 cost is the same. In 26 bit mode, or for exception handlers,
9090 we have to use LDM to load the PC so that the CPSR is also
9091 restored. */
9092 for (reg = 0; reg <= LAST_ARM_REGNUM; reg++)
9093 if (live_regs_mask == (1U << reg))
9094 break;
9096 if (reg <= LAST_ARM_REGNUM
9097 && (reg != LR_REGNUM
9098 || ! really_return
9099 || ! IS_INTERRUPT (func_type)))
9101 sprintf (instr, "ldr%s\t%%|%s, [%%|sp], #4", conditional,
9102 (reg == LR_REGNUM) ? return_reg : reg_names[reg]);
9104 else
9106 char *p;
9107 int first = 1;
9109 /* Generate the load multiple instruction to restore the
9110 registers. Note we can get here, even if
9111 frame_pointer_needed is true, but only if sp already
9112 points to the base of the saved core registers. */
9113 if (live_regs_mask & (1 << SP_REGNUM))
9115 unsigned HOST_WIDE_INT stack_adjust;
9117 offsets = arm_get_frame_offsets ();
9118 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
9119 gcc_assert (stack_adjust == 0 || stack_adjust == 4);
9121 if (stack_adjust && arm_arch5)
9122 sprintf (instr, "ldm%sib\t%%|sp, {", conditional);
9123 else
9125 /* If we can't use ldmib (SA110 bug),
9126 then try to pop r3 instead. */
9127 if (stack_adjust)
9128 live_regs_mask |= 1 << 3;
9129 sprintf (instr, "ldm%sfd\t%%|sp, {", conditional);
9132 else
9133 sprintf (instr, "ldm%sfd\t%%|sp!, {", conditional);
9135 p = instr + strlen (instr);
9137 for (reg = 0; reg <= SP_REGNUM; reg++)
9138 if (live_regs_mask & (1 << reg))
9140 int l = strlen (reg_names[reg]);
9142 if (first)
9143 first = 0;
9144 else
9146 memcpy (p, ", ", 2);
9147 p += 2;
9150 memcpy (p, "%|", 2);
9151 memcpy (p + 2, reg_names[reg], l);
9152 p += l + 2;
9155 if (live_regs_mask & (1 << LR_REGNUM))
9157 sprintf (p, "%s%%|%s}", first ? "" : ", ", return_reg);
9158 /* If returning from an interrupt, restore the CPSR. */
9159 if (IS_INTERRUPT (func_type))
9160 strcat (p, "^");
9162 else
9163 strcpy (p, "}");
9166 output_asm_insn (instr, & operand);
9168 /* See if we need to generate an extra instruction to
9169 perform the actual function return. */
9170 if (really_return
9171 && func_type != ARM_FT_INTERWORKED
9172 && (live_regs_mask & (1 << LR_REGNUM)) != 0)
9174 /* The return has already been handled
9175 by loading the LR into the PC. */
9176 really_return = 0;
9180 if (really_return)
9182 switch ((int) ARM_FUNC_TYPE (func_type))
9184 case ARM_FT_ISR:
9185 case ARM_FT_FIQ:
9186 sprintf (instr, "sub%ss\t%%|pc, %%|lr, #4", conditional);
9187 break;
9189 case ARM_FT_INTERWORKED:
9190 sprintf (instr, "bx%s\t%%|lr", conditional);
9191 break;
9193 case ARM_FT_EXCEPTION:
9194 sprintf (instr, "mov%ss\t%%|pc, %%|lr", conditional);
9195 break;
9197 default:
9198 /* Use bx if it's available. */
9199 if (arm_arch5 || arm_arch4t)
9200 sprintf (instr, "bx%s\t%%|lr", conditional);
9201 else
9202 sprintf (instr, "mov%s\t%%|pc, %%|lr", conditional);
9203 break;
9206 output_asm_insn (instr, & operand);
9209 return "";
9212 /* Write the function name into the code section, directly preceding
9213 the function prologue.
9215 Code will be output similar to this:
9217 .ascii "arm_poke_function_name", 0
9218 .align
9220 .word 0xff000000 + (t1 - t0)
9221 arm_poke_function_name
9222 mov ip, sp
9223 stmfd sp!, {fp, ip, lr, pc}
9224 sub fp, ip, #4
9226 When performing a stack backtrace, code can inspect the value
9227 of 'pc' stored at 'fp' + 0. If the trace function then looks
9228 at location pc - 12 and the top 8 bits are set, then we know
9229 that there is a function name embedded immediately preceding this
9230 location and has length ((pc[-3]) & 0xff000000).
9232 We assume that pc is declared as a pointer to an unsigned long.
9234 It is of no benefit to output the function name if we are assembling
9235 a leaf function. These function types will not contain a stack
9236 backtrace structure, therefore it is not possible to determine the
9237 function name. */
9238 void
9239 arm_poke_function_name (FILE *stream, const char *name)
9241 unsigned long alignlength;
9242 unsigned long length;
9243 rtx x;
9245 length = strlen (name) + 1;
9246 alignlength = ROUND_UP_WORD (length);
9248 ASM_OUTPUT_ASCII (stream, name, length);
9249 ASM_OUTPUT_ALIGN (stream, 2);
9250 x = GEN_INT ((unsigned HOST_WIDE_INT) 0xff000000 + alignlength);
9251 assemble_aligned_integer (UNITS_PER_WORD, x);
9254 /* Place some comments into the assembler stream
9255 describing the current function. */
9256 static void
9257 arm_output_function_prologue (FILE *f, HOST_WIDE_INT frame_size)
9259 unsigned long func_type;
9261 if (!TARGET_ARM)
9263 thumb_output_function_prologue (f, frame_size);
9264 return;
9267 /* Sanity check. */
9268 gcc_assert (!arm_ccfsm_state && !arm_target_insn);
9270 func_type = arm_current_func_type ();
9272 switch ((int) ARM_FUNC_TYPE (func_type))
9274 default:
9275 case ARM_FT_NORMAL:
9276 break;
9277 case ARM_FT_INTERWORKED:
9278 asm_fprintf (f, "\t%@ Function supports interworking.\n");
9279 break;
9280 case ARM_FT_ISR:
9281 asm_fprintf (f, "\t%@ Interrupt Service Routine.\n");
9282 break;
9283 case ARM_FT_FIQ:
9284 asm_fprintf (f, "\t%@ Fast Interrupt Service Routine.\n");
9285 break;
9286 case ARM_FT_EXCEPTION:
9287 asm_fprintf (f, "\t%@ ARM Exception Handler.\n");
9288 break;
9291 if (IS_NAKED (func_type))
9292 asm_fprintf (f, "\t%@ Naked Function: prologue and epilogue provided by programmer.\n");
9294 if (IS_VOLATILE (func_type))
9295 asm_fprintf (f, "\t%@ Volatile: function does not return.\n");
9297 if (IS_NESTED (func_type))
9298 asm_fprintf (f, "\t%@ Nested: function declared inside another function.\n");
9300 asm_fprintf (f, "\t%@ args = %d, pretend = %d, frame = %wd\n",
9301 current_function_args_size,
9302 current_function_pretend_args_size, frame_size);
9304 asm_fprintf (f, "\t%@ frame_needed = %d, uses_anonymous_args = %d\n",
9305 frame_pointer_needed,
9306 cfun->machine->uses_anonymous_args);
9308 if (cfun->machine->lr_save_eliminated)
9309 asm_fprintf (f, "\t%@ link register save eliminated.\n");
9311 if (current_function_calls_eh_return)
9312 asm_fprintf (f, "\t@ Calls __builtin_eh_return.\n");
9314 #ifdef AOF_ASSEMBLER
9315 if (flag_pic)
9316 asm_fprintf (f, "\tmov\t%r, %r\n", IP_REGNUM, PIC_OFFSET_TABLE_REGNUM);
9317 #endif
9319 return_used_this_function = 0;
9322 const char *
9323 arm_output_epilogue (rtx sibling)
9325 int reg;
9326 unsigned long saved_regs_mask;
9327 unsigned long func_type;
9328 /* Floats_offset is the offset from the "virtual" frame. In an APCS
9329 frame that is $fp + 4 for a non-variadic function. */
9330 int floats_offset = 0;
9331 rtx operands[3];
9332 FILE * f = asm_out_file;
9333 unsigned int lrm_count = 0;
9334 int really_return = (sibling == NULL);
9335 int start_reg;
9336 arm_stack_offsets *offsets;
9338 /* If we have already generated the return instruction
9339 then it is futile to generate anything else. */
9340 if (use_return_insn (FALSE, sibling) && return_used_this_function)
9341 return "";
9343 func_type = arm_current_func_type ();
9345 if (IS_NAKED (func_type))
9346 /* Naked functions don't have epilogues. */
9347 return "";
9349 if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)
9351 rtx op;
9353 /* A volatile function should never return. Call abort. */
9354 op = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)" : "abort");
9355 assemble_external_libcall (op);
9356 output_asm_insn ("bl\t%a0", &op);
9358 return "";
9361 /* If we are throwing an exception, then we really must be doing a
9362 return, so we can't tail-call. */
9363 gcc_assert (!current_function_calls_eh_return || really_return);
9365 offsets = arm_get_frame_offsets ();
9366 saved_regs_mask = arm_compute_save_reg_mask ();
9368 if (TARGET_IWMMXT)
9369 lrm_count = bit_count (saved_regs_mask);
9371 floats_offset = offsets->saved_args;
9372 /* Compute how far away the floats will be. */
9373 for (reg = 0; reg <= LAST_ARM_REGNUM; reg++)
9374 if (saved_regs_mask & (1 << reg))
9375 floats_offset += 4;
9377 if (frame_pointer_needed)
9379 /* This variable is for the Virtual Frame Pointer, not VFP regs. */
9380 int vfp_offset = offsets->frame;
9382 if (arm_fpu_arch == FPUTYPE_FPA_EMU2)
9384 for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
9385 if (regs_ever_live[reg] && !call_used_regs[reg])
9387 floats_offset += 12;
9388 asm_fprintf (f, "\tldfe\t%r, [%r, #-%d]\n",
9389 reg, FP_REGNUM, floats_offset - vfp_offset);
9392 else
9394 start_reg = LAST_FPA_REGNUM;
9396 for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
9398 if (regs_ever_live[reg] && !call_used_regs[reg])
9400 floats_offset += 12;
9402 /* We can't unstack more than four registers at once. */
9403 if (start_reg - reg == 3)
9405 asm_fprintf (f, "\tlfm\t%r, 4, [%r, #-%d]\n",
9406 reg, FP_REGNUM, floats_offset - vfp_offset);
9407 start_reg = reg - 1;
9410 else
9412 if (reg != start_reg)
9413 asm_fprintf (f, "\tlfm\t%r, %d, [%r, #-%d]\n",
9414 reg + 1, start_reg - reg,
9415 FP_REGNUM, floats_offset - vfp_offset);
9416 start_reg = reg - 1;
9420 /* Just in case the last register checked also needs unstacking. */
9421 if (reg != start_reg)
9422 asm_fprintf (f, "\tlfm\t%r, %d, [%r, #-%d]\n",
9423 reg + 1, start_reg - reg,
9424 FP_REGNUM, floats_offset - vfp_offset);
9427 if (TARGET_HARD_FLOAT && TARGET_VFP)
9429 int saved_size;
9431 /* The fldmx insn does not have base+offset addressing modes,
9432 so we use IP to hold the address. */
9433 saved_size = arm_get_vfp_saved_size ();
9435 if (saved_size > 0)
9437 floats_offset += saved_size;
9438 asm_fprintf (f, "\tsub\t%r, %r, #%d\n", IP_REGNUM,
9439 FP_REGNUM, floats_offset - vfp_offset);
9441 start_reg = FIRST_VFP_REGNUM;
9442 for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
9444 if ((!regs_ever_live[reg] || call_used_regs[reg])
9445 && (!regs_ever_live[reg + 1] || call_used_regs[reg + 1]))
9447 if (start_reg != reg)
9448 arm_output_fldmx (f, IP_REGNUM,
9449 (start_reg - FIRST_VFP_REGNUM) / 2,
9450 (reg - start_reg) / 2);
9451 start_reg = reg + 2;
9454 if (start_reg != reg)
9455 arm_output_fldmx (f, IP_REGNUM,
9456 (start_reg - FIRST_VFP_REGNUM) / 2,
9457 (reg - start_reg) / 2);
9460 if (TARGET_IWMMXT)
9462 /* The frame pointer is guaranteed to be non-double-word aligned.
9463 This is because it is set to (old_stack_pointer - 4) and the
9464 old_stack_pointer was double word aligned. Thus the offset to
9465 the iWMMXt registers to be loaded must also be non-double-word
9466 sized, so that the resultant address *is* double-word aligned.
9467 We can ignore floats_offset since that was already included in
9468 the live_regs_mask. */
9469 lrm_count += (lrm_count % 2 ? 2 : 1);
9471 for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
9472 if (regs_ever_live[reg] && !call_used_regs[reg])
9474 asm_fprintf (f, "\twldrd\t%r, [%r, #-%d]\n",
9475 reg, FP_REGNUM, lrm_count * 4);
9476 lrm_count += 2;
9480 /* saved_regs_mask should contain the IP, which at the time of stack
9481 frame generation actually contains the old stack pointer. So a
9482 quick way to unwind the stack is just pop the IP register directly
9483 into the stack pointer. */
9484 gcc_assert (saved_regs_mask & (1 << IP_REGNUM));
9485 saved_regs_mask &= ~ (1 << IP_REGNUM);
9486 saved_regs_mask |= (1 << SP_REGNUM);
9488 /* There are two registers left in saved_regs_mask - LR and PC. We
9489 only need to restore the LR register (the return address), but to
9490 save time we can load it directly into the PC, unless we need a
9491 special function exit sequence, or we are not really returning. */
9492 if (really_return
9493 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
9494 && !current_function_calls_eh_return)
9495 /* Delete the LR from the register mask, so that the LR on
9496 the stack is loaded into the PC in the register mask. */
9497 saved_regs_mask &= ~ (1 << LR_REGNUM);
9498 else
9499 saved_regs_mask &= ~ (1 << PC_REGNUM);
9501 /* We must use SP as the base register, because SP is one of the
9502 registers being restored. If an interrupt or page fault
9503 happens in the ldm instruction, the SP might or might not
9504 have been restored. That would be bad, as then SP will no
9505 longer indicate the safe area of stack, and we can get stack
9506 corruption. Using SP as the base register means that it will
9507 be reset correctly to the original value, should an interrupt
9508 occur. If the stack pointer already points at the right
9509 place, then omit the subtraction. */
9510 if (offsets->outgoing_args != (1 + (int) bit_count (saved_regs_mask))
9511 || current_function_calls_alloca)
9512 asm_fprintf (f, "\tsub\t%r, %r, #%d\n", SP_REGNUM, FP_REGNUM,
9513 4 * bit_count (saved_regs_mask));
9514 print_multi_reg (f, "ldmfd\t%r", SP_REGNUM, saved_regs_mask);
9516 if (IS_INTERRUPT (func_type))
9517 /* Interrupt handlers will have pushed the
9518 IP onto the stack, so restore it now. */
9519 print_multi_reg (f, "ldmfd\t%r!", SP_REGNUM, 1 << IP_REGNUM);
9521 else
9523 /* Restore stack pointer if necessary. */
9524 if (offsets->outgoing_args != offsets->saved_regs)
9526 operands[0] = operands[1] = stack_pointer_rtx;
9527 operands[2] = GEN_INT (offsets->outgoing_args - offsets->saved_regs);
9528 output_add_immediate (operands);
9531 if (arm_fpu_arch == FPUTYPE_FPA_EMU2)
9533 for (reg = FIRST_FPA_REGNUM; reg <= LAST_FPA_REGNUM; reg++)
9534 if (regs_ever_live[reg] && !call_used_regs[reg])
9535 asm_fprintf (f, "\tldfe\t%r, [%r], #12\n",
9536 reg, SP_REGNUM);
9538 else
9540 start_reg = FIRST_FPA_REGNUM;
9542 for (reg = FIRST_FPA_REGNUM; reg <= LAST_FPA_REGNUM; reg++)
9544 if (regs_ever_live[reg] && !call_used_regs[reg])
9546 if (reg - start_reg == 3)
9548 asm_fprintf (f, "\tlfmfd\t%r, 4, [%r]!\n",
9549 start_reg, SP_REGNUM);
9550 start_reg = reg + 1;
9553 else
9555 if (reg != start_reg)
9556 asm_fprintf (f, "\tlfmfd\t%r, %d, [%r]!\n",
9557 start_reg, reg - start_reg,
9558 SP_REGNUM);
9560 start_reg = reg + 1;
9564 /* Just in case the last register checked also needs unstacking. */
9565 if (reg != start_reg)
9566 asm_fprintf (f, "\tlfmfd\t%r, %d, [%r]!\n",
9567 start_reg, reg - start_reg, SP_REGNUM);
9570 if (TARGET_HARD_FLOAT && TARGET_VFP)
9572 start_reg = FIRST_VFP_REGNUM;
9573 for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
9575 if ((!regs_ever_live[reg] || call_used_regs[reg])
9576 && (!regs_ever_live[reg + 1] || call_used_regs[reg + 1]))
9578 if (start_reg != reg)
9579 arm_output_fldmx (f, SP_REGNUM,
9580 (start_reg - FIRST_VFP_REGNUM) / 2,
9581 (reg - start_reg) / 2);
9582 start_reg = reg + 2;
9585 if (start_reg != reg)
9586 arm_output_fldmx (f, SP_REGNUM,
9587 (start_reg - FIRST_VFP_REGNUM) / 2,
9588 (reg - start_reg) / 2);
9590 if (TARGET_IWMMXT)
9591 for (reg = FIRST_IWMMXT_REGNUM; reg <= LAST_IWMMXT_REGNUM; reg++)
9592 if (regs_ever_live[reg] && !call_used_regs[reg])
9593 asm_fprintf (f, "\twldrd\t%r, [%r], #8\n", reg, SP_REGNUM);
9595 /* If we can, restore the LR into the PC. */
9596 if (ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
9597 && really_return
9598 && current_function_pretend_args_size == 0
9599 && saved_regs_mask & (1 << LR_REGNUM)
9600 && !current_function_calls_eh_return)
9602 saved_regs_mask &= ~ (1 << LR_REGNUM);
9603 saved_regs_mask |= (1 << PC_REGNUM);
9606 /* Load the registers off the stack. If we only have one register
9607 to load use the LDR instruction - it is faster. */
9608 if (saved_regs_mask == (1 << LR_REGNUM))
9610 asm_fprintf (f, "\tldr\t%r, [%r], #4\n", LR_REGNUM, SP_REGNUM);
9612 else if (saved_regs_mask)
9614 if (saved_regs_mask & (1 << SP_REGNUM))
9615 /* Note - write back to the stack register is not enabled
9616 (i.e. "ldmfd sp!..."). We know that the stack pointer is
9617 in the list of registers and if we add writeback the
9618 instruction becomes UNPREDICTABLE. */
9619 print_multi_reg (f, "ldmfd\t%r", SP_REGNUM, saved_regs_mask);
9620 else
9621 print_multi_reg (f, "ldmfd\t%r!", SP_REGNUM, saved_regs_mask);
9624 if (current_function_pretend_args_size)
9626 /* Unwind the pre-pushed regs. */
9627 operands[0] = operands[1] = stack_pointer_rtx;
9628 operands[2] = GEN_INT (current_function_pretend_args_size);
9629 output_add_immediate (operands);
9633 /* We may have already restored PC directly from the stack. */
9634 if (!really_return || saved_regs_mask & (1 << PC_REGNUM))
9635 return "";
9637 /* Stack adjustment for exception handler. */
9638 if (current_function_calls_eh_return)
9639 asm_fprintf (f, "\tadd\t%r, %r, %r\n", SP_REGNUM, SP_REGNUM,
9640 ARM_EH_STACKADJ_REGNUM);
9642 /* Generate the return instruction. */
9643 switch ((int) ARM_FUNC_TYPE (func_type))
9645 case ARM_FT_ISR:
9646 case ARM_FT_FIQ:
9647 asm_fprintf (f, "\tsubs\t%r, %r, #4\n", PC_REGNUM, LR_REGNUM);
9648 break;
9650 case ARM_FT_EXCEPTION:
9651 asm_fprintf (f, "\tmovs\t%r, %r\n", PC_REGNUM, LR_REGNUM);
9652 break;
9654 case ARM_FT_INTERWORKED:
9655 asm_fprintf (f, "\tbx\t%r\n", LR_REGNUM);
9656 break;
9658 default:
9659 if (arm_arch5 || arm_arch4t)
9660 asm_fprintf (f, "\tbx\t%r\n", LR_REGNUM);
9661 else
9662 asm_fprintf (f, "\tmov\t%r, %r\n", PC_REGNUM, LR_REGNUM);
9663 break;
9666 return "";
9669 static void
9670 arm_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
9671 HOST_WIDE_INT frame_size ATTRIBUTE_UNUSED)
9673 arm_stack_offsets *offsets;
9675 if (TARGET_THUMB)
9677 int regno;
9679 /* Emit any call-via-reg trampolines that are needed for v4t support
9680 of call_reg and call_value_reg type insns. */
9681 for (regno = 0; regno < LR_REGNUM; regno++)
9683 rtx label = cfun->machine->call_via[regno];
9685 if (label != NULL)
9687 function_section (current_function_decl);
9688 targetm.asm_out.internal_label (asm_out_file, "L",
9689 CODE_LABEL_NUMBER (label));
9690 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
9694 /* ??? Probably not safe to set this here, since it assumes that a
9695 function will be emitted as assembly immediately after we generate
9696 RTL for it. This does not happen for inline functions. */
9697 return_used_this_function = 0;
9699 else
9701 /* We need to take into account any stack-frame rounding. */
9702 offsets = arm_get_frame_offsets ();
9704 gcc_assert (!use_return_insn (FALSE, NULL)
9705 || !return_used_this_function
9706 || offsets->saved_regs == offsets->outgoing_args
9707 || frame_pointer_needed);
9709 /* Reset the ARM-specific per-function variables. */
9710 after_arm_reorg = 0;
9714 /* Generate and emit an insn that we will recognize as a push_multi.
9715 Unfortunately, since this insn does not reflect very well the actual
9716 semantics of the operation, we need to annotate the insn for the benefit
9717 of DWARF2 frame unwind information. */
9718 static rtx
9719 emit_multi_reg_push (unsigned long mask)
9721 int num_regs = 0;
9722 int num_dwarf_regs;
9723 int i, j;
9724 rtx par;
9725 rtx dwarf;
9726 int dwarf_par_index;
9727 rtx tmp, reg;
9729 for (i = 0; i <= LAST_ARM_REGNUM; i++)
9730 if (mask & (1 << i))
9731 num_regs++;
9733 gcc_assert (num_regs && num_regs <= 16);
9735 /* We don't record the PC in the dwarf frame information. */
9736 num_dwarf_regs = num_regs;
9737 if (mask & (1 << PC_REGNUM))
9738 num_dwarf_regs--;
9740 /* For the body of the insn we are going to generate an UNSPEC in
9741 parallel with several USEs. This allows the insn to be recognized
9742 by the push_multi pattern in the arm.md file. The insn looks
9743 something like this:
9745 (parallel [
9746 (set (mem:BLK (pre_dec:BLK (reg:SI sp)))
9747 (unspec:BLK [(reg:SI r4)] UNSPEC_PUSH_MULT))
9748 (use (reg:SI 11 fp))
9749 (use (reg:SI 12 ip))
9750 (use (reg:SI 14 lr))
9751 (use (reg:SI 15 pc))
9754 For the frame note however, we try to be more explicit and actually
9755 show each register being stored into the stack frame, plus a (single)
9756 decrement of the stack pointer. We do it this way in order to be
9757 friendly to the stack unwinding code, which only wants to see a single
9758 stack decrement per instruction. The RTL we generate for the note looks
9759 something like this:
9761 (sequence [
9762 (set (reg:SI sp) (plus:SI (reg:SI sp) (const_int -20)))
9763 (set (mem:SI (reg:SI sp)) (reg:SI r4))
9764 (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI fp))
9765 (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI ip))
9766 (set (mem:SI (plus:SI (reg:SI sp) (const_int 12))) (reg:SI lr))
9769 This sequence is used both by the code to support stack unwinding for
9770 exceptions handlers and the code to generate dwarf2 frame debugging. */
9772 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs));
9773 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_dwarf_regs + 1));
9774 dwarf_par_index = 1;
9776 for (i = 0; i <= LAST_ARM_REGNUM; i++)
9778 if (mask & (1 << i))
9780 reg = gen_rtx_REG (SImode, i);
9782 XVECEXP (par, 0, 0)
9783 = gen_rtx_SET (VOIDmode,
9784 gen_rtx_MEM (BLKmode,
9785 gen_rtx_PRE_DEC (BLKmode,
9786 stack_pointer_rtx)),
9787 gen_rtx_UNSPEC (BLKmode,
9788 gen_rtvec (1, reg),
9789 UNSPEC_PUSH_MULT));
9791 if (i != PC_REGNUM)
9793 tmp = gen_rtx_SET (VOIDmode,
9794 gen_rtx_MEM (SImode, stack_pointer_rtx),
9795 reg);
9796 RTX_FRAME_RELATED_P (tmp) = 1;
9797 XVECEXP (dwarf, 0, dwarf_par_index) = tmp;
9798 dwarf_par_index++;
9801 break;
9805 for (j = 1, i++; j < num_regs; i++)
9807 if (mask & (1 << i))
9809 reg = gen_rtx_REG (SImode, i);
9811 XVECEXP (par, 0, j) = gen_rtx_USE (VOIDmode, reg);
9813 if (i != PC_REGNUM)
9815 tmp = gen_rtx_SET (VOIDmode,
9816 gen_rtx_MEM (SImode,
9817 plus_constant (stack_pointer_rtx,
9818 4 * j)),
9819 reg);
9820 RTX_FRAME_RELATED_P (tmp) = 1;
9821 XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
9824 j++;
9828 par = emit_insn (par);
9830 tmp = gen_rtx_SET (SImode,
9831 stack_pointer_rtx,
9832 gen_rtx_PLUS (SImode,
9833 stack_pointer_rtx,
9834 GEN_INT (-4 * num_regs)));
9835 RTX_FRAME_RELATED_P (tmp) = 1;
9836 XVECEXP (dwarf, 0, 0) = tmp;
9838 REG_NOTES (par) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, dwarf,
9839 REG_NOTES (par));
9840 return par;
9843 static rtx
9844 emit_sfm (int base_reg, int count)
9846 rtx par;
9847 rtx dwarf;
9848 rtx tmp, reg;
9849 int i;
9851 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
9852 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1));
9854 reg = gen_rtx_REG (XFmode, base_reg++);
9856 XVECEXP (par, 0, 0)
9857 = gen_rtx_SET (VOIDmode,
9858 gen_rtx_MEM (BLKmode,
9859 gen_rtx_PRE_DEC (BLKmode, stack_pointer_rtx)),
9860 gen_rtx_UNSPEC (BLKmode,
9861 gen_rtvec (1, reg),
9862 UNSPEC_PUSH_MULT));
9863 tmp = gen_rtx_SET (VOIDmode,
9864 gen_rtx_MEM (XFmode, stack_pointer_rtx), reg);
9865 RTX_FRAME_RELATED_P (tmp) = 1;
9866 XVECEXP (dwarf, 0, 1) = tmp;
9868 for (i = 1; i < count; i++)
9870 reg = gen_rtx_REG (XFmode, base_reg++);
9871 XVECEXP (par, 0, i) = gen_rtx_USE (VOIDmode, reg);
9873 tmp = gen_rtx_SET (VOIDmode,
9874 gen_rtx_MEM (XFmode,
9875 plus_constant (stack_pointer_rtx,
9876 i * 12)),
9877 reg);
9878 RTX_FRAME_RELATED_P (tmp) = 1;
9879 XVECEXP (dwarf, 0, i + 1) = tmp;
9882 tmp = gen_rtx_SET (VOIDmode,
9883 stack_pointer_rtx,
9884 gen_rtx_PLUS (SImode,
9885 stack_pointer_rtx,
9886 GEN_INT (-12 * count)));
9887 RTX_FRAME_RELATED_P (tmp) = 1;
9888 XVECEXP (dwarf, 0, 0) = tmp;
9890 par = emit_insn (par);
9891 REG_NOTES (par) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, dwarf,
9892 REG_NOTES (par));
9893 return par;
9897 /* Return true if the current function needs to save/restore LR. */
9899 static bool
9900 thumb_force_lr_save (void)
9902 return !cfun->machine->lr_save_eliminated
9903 && (!leaf_function_p ()
9904 || thumb_far_jump_used_p ()
9905 || regs_ever_live [LR_REGNUM]);
9909 /* Compute the distance from register FROM to register TO.
9910 These can be the arg pointer (26), the soft frame pointer (25),
9911 the stack pointer (13) or the hard frame pointer (11).
9912 In thumb mode r7 is used as the soft frame pointer, if needed.
9913 Typical stack layout looks like this:
9915 old stack pointer -> | |
9916 ----
9917 | | \
9918 | | saved arguments for
9919 | | vararg functions
9920 | | /
9922 hard FP & arg pointer -> | | \
9923 | | stack
9924 | | frame
9925 | | /
9927 | | \
9928 | | call saved
9929 | | registers
9930 soft frame pointer -> | | /
9932 | | \
9933 | | local
9934 | | variables
9935 | | /
9937 | | \
9938 | | outgoing
9939 | | arguments
9940 current stack pointer -> | | /
9943 For a given function some or all of these stack components
9944 may not be needed, giving rise to the possibility of
9945 eliminating some of the registers.
9947 The values returned by this function must reflect the behavior
9948 of arm_expand_prologue() and arm_compute_save_reg_mask().
9950 The sign of the number returned reflects the direction of stack
9951 growth, so the values are positive for all eliminations except
9952 from the soft frame pointer to the hard frame pointer.
9954 SFP may point just inside the local variables block to ensure correct
9955 alignment. */
9958 /* Calculate stack offsets. These are used to calculate register elimination
9959 offsets and in prologue/epilogue code. */
9961 static arm_stack_offsets *
9962 arm_get_frame_offsets (void)
9964 struct arm_stack_offsets *offsets;
9965 unsigned long func_type;
9966 int leaf;
9967 int saved;
9968 HOST_WIDE_INT frame_size;
9970 offsets = &cfun->machine->stack_offsets;
9972 /* We need to know if we are a leaf function. Unfortunately, it
9973 is possible to be called after start_sequence has been called,
9974 which causes get_insns to return the insns for the sequence,
9975 not the function, which will cause leaf_function_p to return
9976 the incorrect result.
9978 to know about leaf functions once reload has completed, and the
9979 frame size cannot be changed after that time, so we can safely
9980 use the cached value. */
9982 if (reload_completed)
9983 return offsets;
9985 /* Initially this is the size of the local variables. It will translated
9986 into an offset once we have determined the size of preceding data. */
9987 frame_size = ROUND_UP_WORD (get_frame_size ());
9989 leaf = leaf_function_p ();
9991 /* Space for variadic functions. */
9992 offsets->saved_args = current_function_pretend_args_size;
9994 offsets->frame = offsets->saved_args + (frame_pointer_needed ? 4 : 0);
9996 if (TARGET_ARM)
9998 unsigned int regno;
10000 saved = bit_count (arm_compute_save_reg_mask ()) * 4;
10002 /* We know that SP will be doubleword aligned on entry, and we must
10003 preserve that condition at any subroutine call. We also require the
10004 soft frame pointer to be doubleword aligned. */
10006 if (TARGET_REALLY_IWMMXT)
10008 /* Check for the call-saved iWMMXt registers. */
10009 for (regno = FIRST_IWMMXT_REGNUM;
10010 regno <= LAST_IWMMXT_REGNUM;
10011 regno++)
10012 if (regs_ever_live [regno] && ! call_used_regs [regno])
10013 saved += 8;
10016 func_type = arm_current_func_type ();
10017 if (! IS_VOLATILE (func_type))
10019 /* Space for saved FPA registers. */
10020 for (regno = FIRST_FPA_REGNUM; regno <= LAST_FPA_REGNUM; regno++)
10021 if (regs_ever_live[regno] && ! call_used_regs[regno])
10022 saved += 12;
10024 /* Space for saved VFP registers. */
10025 if (TARGET_HARD_FLOAT && TARGET_VFP)
10026 saved += arm_get_vfp_saved_size ();
10029 else /* TARGET_THUMB */
10031 saved = bit_count (thumb_compute_save_reg_mask ()) * 4;
10032 if (TARGET_BACKTRACE)
10033 saved += 16;
10036 /* Saved registers include the stack frame. */
10037 offsets->saved_regs = offsets->saved_args + saved;
10038 offsets->soft_frame = offsets->saved_regs + CALLER_INTERWORKING_SLOT_SIZE;
10039 /* A leaf function does not need any stack alignment if it has nothing
10040 on the stack. */
10041 if (leaf && frame_size == 0)
10043 offsets->outgoing_args = offsets->soft_frame;
10044 return offsets;
10047 /* Ensure SFP has the correct alignment. */
10048 if (ARM_DOUBLEWORD_ALIGN
10049 && (offsets->soft_frame & 7))
10050 offsets->soft_frame += 4;
10052 offsets->outgoing_args = offsets->soft_frame + frame_size
10053 + current_function_outgoing_args_size;
10055 if (ARM_DOUBLEWORD_ALIGN)
10057 /* Ensure SP remains doubleword aligned. */
10058 if (offsets->outgoing_args & 7)
10059 offsets->outgoing_args += 4;
10060 gcc_assert (!(offsets->outgoing_args & 7));
10063 return offsets;
10067 /* Calculate the relative offsets for the different stack pointers. Positive
10068 offsets are in the direction of stack growth. */
10070 HOST_WIDE_INT
10071 arm_compute_initial_elimination_offset (unsigned int from, unsigned int to)
10073 arm_stack_offsets *offsets;
10075 offsets = arm_get_frame_offsets ();
10077 /* OK, now we have enough information to compute the distances.
10078 There must be an entry in these switch tables for each pair
10079 of registers in ELIMINABLE_REGS, even if some of the entries
10080 seem to be redundant or useless. */
10081 switch (from)
10083 case ARG_POINTER_REGNUM:
10084 switch (to)
10086 case THUMB_HARD_FRAME_POINTER_REGNUM:
10087 return 0;
10089 case FRAME_POINTER_REGNUM:
10090 /* This is the reverse of the soft frame pointer
10091 to hard frame pointer elimination below. */
10092 return offsets->soft_frame - offsets->saved_args;
10094 case ARM_HARD_FRAME_POINTER_REGNUM:
10095 /* If there is no stack frame then the hard
10096 frame pointer and the arg pointer coincide. */
10097 if (offsets->frame == offsets->saved_regs)
10098 return 0;
10099 /* FIXME: Not sure about this. Maybe we should always return 0 ? */
10100 return (frame_pointer_needed
10101 && cfun->static_chain_decl != NULL
10102 && ! cfun->machine->uses_anonymous_args) ? 4 : 0;
10104 case STACK_POINTER_REGNUM:
10105 /* If nothing has been pushed on the stack at all
10106 then this will return -4. This *is* correct! */
10107 return offsets->outgoing_args - (offsets->saved_args + 4);
10109 default:
10110 gcc_unreachable ();
10112 gcc_unreachable ();
10114 case FRAME_POINTER_REGNUM:
10115 switch (to)
10117 case THUMB_HARD_FRAME_POINTER_REGNUM:
10118 return 0;
10120 case ARM_HARD_FRAME_POINTER_REGNUM:
10121 /* The hard frame pointer points to the top entry in the
10122 stack frame. The soft frame pointer to the bottom entry
10123 in the stack frame. If there is no stack frame at all,
10124 then they are identical. */
10126 return offsets->frame - offsets->soft_frame;
10128 case STACK_POINTER_REGNUM:
10129 return offsets->outgoing_args - offsets->soft_frame;
10131 default:
10132 gcc_unreachable ();
10134 gcc_unreachable ();
10136 default:
10137 /* You cannot eliminate from the stack pointer.
10138 In theory you could eliminate from the hard frame
10139 pointer to the stack pointer, but this will never
10140 happen, since if a stack frame is not needed the
10141 hard frame pointer will never be used. */
10142 gcc_unreachable ();
10147 /* Generate the prologue instructions for entry into an ARM function. */
10148 void
10149 arm_expand_prologue (void)
10151 int reg;
10152 rtx amount;
10153 rtx insn;
10154 rtx ip_rtx;
10155 unsigned long live_regs_mask;
10156 unsigned long func_type;
10157 int fp_offset = 0;
10158 int saved_pretend_args = 0;
10159 int saved_regs = 0;
10160 unsigned HOST_WIDE_INT args_to_push;
10161 arm_stack_offsets *offsets;
10163 func_type = arm_current_func_type ();
10165 /* Naked functions don't have prologues. */
10166 if (IS_NAKED (func_type))
10167 return;
10169 /* Make a copy of c_f_p_a_s as we may need to modify it locally. */
10170 args_to_push = current_function_pretend_args_size;
10172 /* Compute which register we will have to save onto the stack. */
10173 live_regs_mask = arm_compute_save_reg_mask ();
10175 ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
10177 if (frame_pointer_needed)
10179 if (IS_INTERRUPT (func_type))
10181 /* Interrupt functions must not corrupt any registers.
10182 Creating a frame pointer however, corrupts the IP
10183 register, so we must push it first. */
10184 insn = emit_multi_reg_push (1 << IP_REGNUM);
10186 /* Do not set RTX_FRAME_RELATED_P on this insn.
10187 The dwarf stack unwinding code only wants to see one
10188 stack decrement per function, and this is not it. If
10189 this instruction is labeled as being part of the frame
10190 creation sequence then dwarf2out_frame_debug_expr will
10191 die when it encounters the assignment of IP to FP
10192 later on, since the use of SP here establishes SP as
10193 the CFA register and not IP.
10195 Anyway this instruction is not really part of the stack
10196 frame creation although it is part of the prologue. */
10198 else if (IS_NESTED (func_type))
10200 /* The Static chain register is the same as the IP register
10201 used as a scratch register during stack frame creation.
10202 To get around this need to find somewhere to store IP
10203 whilst the frame is being created. We try the following
10204 places in order:
10206 1. The last argument register.
10207 2. A slot on the stack above the frame. (This only
10208 works if the function is not a varargs function).
10209 3. Register r3, after pushing the argument registers
10210 onto the stack.
10212 Note - we only need to tell the dwarf2 backend about the SP
10213 adjustment in the second variant; the static chain register
10214 doesn't need to be unwound, as it doesn't contain a value
10215 inherited from the caller. */
10217 if (regs_ever_live[3] == 0)
10219 insn = gen_rtx_REG (SImode, 3);
10220 insn = gen_rtx_SET (SImode, insn, ip_rtx);
10221 insn = emit_insn (insn);
10223 else if (args_to_push == 0)
10225 rtx dwarf;
10226 insn = gen_rtx_PRE_DEC (SImode, stack_pointer_rtx);
10227 insn = gen_rtx_MEM (SImode, insn);
10228 insn = gen_rtx_SET (VOIDmode, insn, ip_rtx);
10229 insn = emit_insn (insn);
10231 fp_offset = 4;
10233 /* Just tell the dwarf backend that we adjusted SP. */
10234 dwarf = gen_rtx_SET (VOIDmode, stack_pointer_rtx,
10235 gen_rtx_PLUS (SImode, stack_pointer_rtx,
10236 GEN_INT (-fp_offset)));
10237 RTX_FRAME_RELATED_P (insn) = 1;
10238 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
10239 dwarf, REG_NOTES (insn));
10241 else
10243 /* Store the args on the stack. */
10244 if (cfun->machine->uses_anonymous_args)
10245 insn = emit_multi_reg_push
10246 ((0xf0 >> (args_to_push / 4)) & 0xf);
10247 else
10248 insn = emit_insn
10249 (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
10250 GEN_INT (- args_to_push)));
10252 RTX_FRAME_RELATED_P (insn) = 1;
10254 saved_pretend_args = 1;
10255 fp_offset = args_to_push;
10256 args_to_push = 0;
10258 /* Now reuse r3 to preserve IP. */
10259 insn = gen_rtx_REG (SImode, 3);
10260 insn = gen_rtx_SET (SImode, insn, ip_rtx);
10261 (void) emit_insn (insn);
10265 if (fp_offset)
10267 insn = gen_rtx_PLUS (SImode, stack_pointer_rtx, GEN_INT (fp_offset));
10268 insn = gen_rtx_SET (SImode, ip_rtx, insn);
10270 else
10271 insn = gen_movsi (ip_rtx, stack_pointer_rtx);
10273 insn = emit_insn (insn);
10274 RTX_FRAME_RELATED_P (insn) = 1;
10277 if (args_to_push)
10279 /* Push the argument registers, or reserve space for them. */
10280 if (cfun->machine->uses_anonymous_args)
10281 insn = emit_multi_reg_push
10282 ((0xf0 >> (args_to_push / 4)) & 0xf);
10283 else
10284 insn = emit_insn
10285 (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
10286 GEN_INT (- args_to_push)));
10287 RTX_FRAME_RELATED_P (insn) = 1;
10290 /* If this is an interrupt service routine, and the link register
10291 is going to be pushed, and we are not creating a stack frame,
10292 (which would involve an extra push of IP and a pop in the epilogue)
10293 subtracting four from LR now will mean that the function return
10294 can be done with a single instruction. */
10295 if ((func_type == ARM_FT_ISR || func_type == ARM_FT_FIQ)
10296 && (live_regs_mask & (1 << LR_REGNUM)) != 0
10297 && ! frame_pointer_needed)
10298 emit_insn (gen_rtx_SET (SImode,
10299 gen_rtx_REG (SImode, LR_REGNUM),
10300 gen_rtx_PLUS (SImode,
10301 gen_rtx_REG (SImode, LR_REGNUM),
10302 GEN_INT (-4))));
10304 if (live_regs_mask)
10306 insn = emit_multi_reg_push (live_regs_mask);
10307 saved_regs += bit_count (live_regs_mask) * 4;
10308 RTX_FRAME_RELATED_P (insn) = 1;
10311 if (TARGET_IWMMXT)
10312 for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
10313 if (regs_ever_live[reg] && ! call_used_regs [reg])
10315 insn = gen_rtx_PRE_DEC (V2SImode, stack_pointer_rtx);
10316 insn = gen_rtx_MEM (V2SImode, insn);
10317 insn = emit_insn (gen_rtx_SET (VOIDmode, insn,
10318 gen_rtx_REG (V2SImode, reg)));
10319 RTX_FRAME_RELATED_P (insn) = 1;
10320 saved_regs += 8;
10323 if (! IS_VOLATILE (func_type))
10325 int start_reg;
10327 /* Save any floating point call-saved registers used by this
10328 function. */
10329 if (arm_fpu_arch == FPUTYPE_FPA_EMU2)
10331 for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
10332 if (regs_ever_live[reg] && !call_used_regs[reg])
10334 insn = gen_rtx_PRE_DEC (XFmode, stack_pointer_rtx);
10335 insn = gen_rtx_MEM (XFmode, insn);
10336 insn = emit_insn (gen_rtx_SET (VOIDmode, insn,
10337 gen_rtx_REG (XFmode, reg)));
10338 RTX_FRAME_RELATED_P (insn) = 1;
10339 saved_regs += 12;
10342 else
10344 start_reg = LAST_FPA_REGNUM;
10346 for (reg = LAST_FPA_REGNUM; reg >= FIRST_FPA_REGNUM; reg--)
10348 if (regs_ever_live[reg] && !call_used_regs[reg])
10350 if (start_reg - reg == 3)
10352 insn = emit_sfm (reg, 4);
10353 RTX_FRAME_RELATED_P (insn) = 1;
10354 saved_regs += 48;
10355 start_reg = reg - 1;
10358 else
10360 if (start_reg != reg)
10362 insn = emit_sfm (reg + 1, start_reg - reg);
10363 RTX_FRAME_RELATED_P (insn) = 1;
10364 saved_regs += (start_reg - reg) * 12;
10366 start_reg = reg - 1;
10370 if (start_reg != reg)
10372 insn = emit_sfm (reg + 1, start_reg - reg);
10373 saved_regs += (start_reg - reg) * 12;
10374 RTX_FRAME_RELATED_P (insn) = 1;
10377 if (TARGET_HARD_FLOAT && TARGET_VFP)
10379 start_reg = FIRST_VFP_REGNUM;
10381 for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
10383 if ((!regs_ever_live[reg] || call_used_regs[reg])
10384 && (!regs_ever_live[reg + 1] || call_used_regs[reg + 1]))
10386 if (start_reg != reg)
10387 saved_regs += vfp_emit_fstmx (start_reg,
10388 (reg - start_reg) / 2);
10389 start_reg = reg + 2;
10392 if (start_reg != reg)
10393 saved_regs += vfp_emit_fstmx (start_reg,
10394 (reg - start_reg) / 2);
10398 if (frame_pointer_needed)
10400 /* Create the new frame pointer. */
10401 insn = GEN_INT (-(4 + args_to_push + fp_offset));
10402 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx, ip_rtx, insn));
10403 RTX_FRAME_RELATED_P (insn) = 1;
10405 if (IS_NESTED (func_type))
10407 /* Recover the static chain register. */
10408 if (regs_ever_live [3] == 0
10409 || saved_pretend_args)
10410 insn = gen_rtx_REG (SImode, 3);
10411 else /* if (current_function_pretend_args_size == 0) */
10413 insn = gen_rtx_PLUS (SImode, hard_frame_pointer_rtx,
10414 GEN_INT (4));
10415 insn = gen_rtx_MEM (SImode, insn);
10418 emit_insn (gen_rtx_SET (SImode, ip_rtx, insn));
10419 /* Add a USE to stop propagate_one_insn() from barfing. */
10420 emit_insn (gen_prologue_use (ip_rtx));
10424 offsets = arm_get_frame_offsets ();
10425 if (offsets->outgoing_args != offsets->saved_args + saved_regs)
10427 /* This add can produce multiple insns for a large constant, so we
10428 need to get tricky. */
10429 rtx last = get_last_insn ();
10431 amount = GEN_INT (offsets->saved_args + saved_regs
10432 - offsets->outgoing_args);
10434 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
10435 amount));
10438 last = last ? NEXT_INSN (last) : get_insns ();
10439 RTX_FRAME_RELATED_P (last) = 1;
10441 while (last != insn);
10443 /* If the frame pointer is needed, emit a special barrier that
10444 will prevent the scheduler from moving stores to the frame
10445 before the stack adjustment. */
10446 if (frame_pointer_needed)
10447 insn = emit_insn (gen_stack_tie (stack_pointer_rtx,
10448 hard_frame_pointer_rtx));
10452 if (flag_pic)
10453 arm_load_pic_register (INVALID_REGNUM);
10455 /* If we are profiling, make sure no instructions are scheduled before
10456 the call to mcount. Similarly if the user has requested no
10457 scheduling in the prolog. */
10458 if (current_function_profile || !TARGET_SCHED_PROLOG)
10459 emit_insn (gen_blockage ());
10461 /* If the link register is being kept alive, with the return address in it,
10462 then make sure that it does not get reused by the ce2 pass. */
10463 if ((live_regs_mask & (1 << LR_REGNUM)) == 0)
10465 emit_insn (gen_prologue_use (gen_rtx_REG (SImode, LR_REGNUM)));
10466 cfun->machine->lr_save_eliminated = 1;
10470 /* If CODE is 'd', then the X is a condition operand and the instruction
10471 should only be executed if the condition is true.
10472 if CODE is 'D', then the X is a condition operand and the instruction
10473 should only be executed if the condition is false: however, if the mode
10474 of the comparison is CCFPEmode, then always execute the instruction -- we
10475 do this because in these circumstances !GE does not necessarily imply LT;
10476 in these cases the instruction pattern will take care to make sure that
10477 an instruction containing %d will follow, thereby undoing the effects of
10478 doing this instruction unconditionally.
10479 If CODE is 'N' then X is a floating point operand that must be negated
10480 before output.
10481 If CODE is 'B' then output a bitwise inverted value of X (a const int).
10482 If X is a REG and CODE is `M', output a ldm/stm style multi-reg. */
10483 void
10484 arm_print_operand (FILE *stream, rtx x, int code)
10486 switch (code)
10488 case '@':
10489 fputs (ASM_COMMENT_START, stream);
10490 return;
10492 case '_':
10493 fputs (user_label_prefix, stream);
10494 return;
10496 case '|':
10497 fputs (REGISTER_PREFIX, stream);
10498 return;
10500 case '?':
10501 if (arm_ccfsm_state == 3 || arm_ccfsm_state == 4)
10503 if (TARGET_THUMB)
10505 output_operand_lossage ("predicated Thumb instruction");
10506 break;
10508 if (current_insn_predicate != NULL)
10510 output_operand_lossage
10511 ("predicated instruction in conditional sequence");
10512 break;
10515 fputs (arm_condition_codes[arm_current_cc], stream);
10517 else if (current_insn_predicate)
10519 enum arm_cond_code code;
10521 if (TARGET_THUMB)
10523 output_operand_lossage ("predicated Thumb instruction");
10524 break;
10527 code = get_arm_condition_code (current_insn_predicate);
10528 fputs (arm_condition_codes[code], stream);
10530 return;
10532 case 'N':
10534 REAL_VALUE_TYPE r;
10535 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
10536 r = REAL_VALUE_NEGATE (r);
10537 fprintf (stream, "%s", fp_const_from_val (&r));
10539 return;
10541 case 'B':
10542 if (GET_CODE (x) == CONST_INT)
10544 HOST_WIDE_INT val;
10545 val = ARM_SIGN_EXTEND (~INTVAL (x));
10546 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, val);
10548 else
10550 putc ('~', stream);
10551 output_addr_const (stream, x);
10553 return;
10555 case 'i':
10556 fprintf (stream, "%s", arithmetic_instr (x, 1));
10557 return;
10559 /* Truncate Cirrus shift counts. */
10560 case 's':
10561 if (GET_CODE (x) == CONST_INT)
10563 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 0x3f);
10564 return;
10566 arm_print_operand (stream, x, 0);
10567 return;
10569 case 'I':
10570 fprintf (stream, "%s", arithmetic_instr (x, 0));
10571 return;
10573 case 'S':
10575 HOST_WIDE_INT val;
10576 const char * shift = shift_op (x, &val);
10578 if (shift)
10580 fprintf (stream, ", %s ", shift_op (x, &val));
10581 if (val == -1)
10582 arm_print_operand (stream, XEXP (x, 1), 0);
10583 else
10584 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
10587 return;
10589 /* An explanation of the 'Q', 'R' and 'H' register operands:
10591 In a pair of registers containing a DI or DF value the 'Q'
10592 operand returns the register number of the register containing
10593 the least significant part of the value. The 'R' operand returns
10594 the register number of the register containing the most
10595 significant part of the value.
10597 The 'H' operand returns the higher of the two register numbers.
10598 On a run where WORDS_BIG_ENDIAN is true the 'H' operand is the
10599 same as the 'Q' operand, since the most significant part of the
10600 value is held in the lower number register. The reverse is true
10601 on systems where WORDS_BIG_ENDIAN is false.
10603 The purpose of these operands is to distinguish between cases
10604 where the endian-ness of the values is important (for example
10605 when they are added together), and cases where the endian-ness
10606 is irrelevant, but the order of register operations is important.
10607 For example when loading a value from memory into a register
10608 pair, the endian-ness does not matter. Provided that the value
10609 from the lower memory address is put into the lower numbered
10610 register, and the value from the higher address is put into the
10611 higher numbered register, the load will work regardless of whether
10612 the value being loaded is big-wordian or little-wordian. The
10613 order of the two register loads can matter however, if the address
10614 of the memory location is actually held in one of the registers
10615 being overwritten by the load. */
10616 case 'Q':
10617 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
10619 output_operand_lossage ("invalid operand for code '%c'", code);
10620 return;
10623 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 1 : 0));
10624 return;
10626 case 'R':
10627 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
10629 output_operand_lossage ("invalid operand for code '%c'", code);
10630 return;
10633 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 0 : 1));
10634 return;
10636 case 'H':
10637 if (GET_CODE (x) != REG || REGNO (x) > LAST_ARM_REGNUM)
10639 output_operand_lossage ("invalid operand for code '%c'", code);
10640 return;
10643 asm_fprintf (stream, "%r", REGNO (x) + 1);
10644 return;
10646 case 'm':
10647 asm_fprintf (stream, "%r",
10648 GET_CODE (XEXP (x, 0)) == REG
10649 ? REGNO (XEXP (x, 0)) : REGNO (XEXP (XEXP (x, 0), 0)));
10650 return;
10652 case 'M':
10653 asm_fprintf (stream, "{%r-%r}",
10654 REGNO (x),
10655 REGNO (x) + ARM_NUM_REGS (GET_MODE (x)) - 1);
10656 return;
10658 case 'd':
10659 /* CONST_TRUE_RTX means always -- that's the default. */
10660 if (x == const_true_rtx)
10661 return;
10663 if (!COMPARISON_P (x))
10665 output_operand_lossage ("invalid operand for code '%c'", code);
10666 return;
10669 fputs (arm_condition_codes[get_arm_condition_code (x)],
10670 stream);
10671 return;
10673 case 'D':
10674 /* CONST_TRUE_RTX means not always -- i.e. never. We shouldn't ever
10675 want to do that. */
10676 if (x == const_true_rtx)
10678 output_operand_lossage ("instruction never exectued");
10679 return;
10681 if (!COMPARISON_P (x))
10683 output_operand_lossage ("invalid operand for code '%c'", code);
10684 return;
10687 fputs (arm_condition_codes[ARM_INVERSE_CONDITION_CODE
10688 (get_arm_condition_code (x))],
10689 stream);
10690 return;
10692 /* Cirrus registers can be accessed in a variety of ways:
10693 single floating point (f)
10694 double floating point (d)
10695 32bit integer (fx)
10696 64bit integer (dx). */
10697 case 'W': /* Cirrus register in F mode. */
10698 case 'X': /* Cirrus register in D mode. */
10699 case 'Y': /* Cirrus register in FX mode. */
10700 case 'Z': /* Cirrus register in DX mode. */
10701 gcc_assert (GET_CODE (x) == REG
10702 && REGNO_REG_CLASS (REGNO (x)) == CIRRUS_REGS);
10704 fprintf (stream, "mv%s%s",
10705 code == 'W' ? "f"
10706 : code == 'X' ? "d"
10707 : code == 'Y' ? "fx" : "dx", reg_names[REGNO (x)] + 2);
10709 return;
10711 /* Print cirrus register in the mode specified by the register's mode. */
10712 case 'V':
10714 int mode = GET_MODE (x);
10716 if (GET_CODE (x) != REG || REGNO_REG_CLASS (REGNO (x)) != CIRRUS_REGS)
10718 output_operand_lossage ("invalid operand for code '%c'", code);
10719 return;
10722 fprintf (stream, "mv%s%s",
10723 mode == DFmode ? "d"
10724 : mode == SImode ? "fx"
10725 : mode == DImode ? "dx"
10726 : "f", reg_names[REGNO (x)] + 2);
10728 return;
10731 case 'U':
10732 if (GET_CODE (x) != REG
10733 || REGNO (x) < FIRST_IWMMXT_GR_REGNUM
10734 || REGNO (x) > LAST_IWMMXT_GR_REGNUM)
10735 /* Bad value for wCG register number. */
10737 output_operand_lossage ("invalid operand for code '%c'", code);
10738 return;
10741 else
10742 fprintf (stream, "%d", REGNO (x) - FIRST_IWMMXT_GR_REGNUM);
10743 return;
10745 /* Print an iWMMXt control register name. */
10746 case 'w':
10747 if (GET_CODE (x) != CONST_INT
10748 || INTVAL (x) < 0
10749 || INTVAL (x) >= 16)
10750 /* Bad value for wC register number. */
10752 output_operand_lossage ("invalid operand for code '%c'", code);
10753 return;
10756 else
10758 static const char * wc_reg_names [16] =
10760 "wCID", "wCon", "wCSSF", "wCASF",
10761 "wC4", "wC5", "wC6", "wC7",
10762 "wCGR0", "wCGR1", "wCGR2", "wCGR3",
10763 "wC12", "wC13", "wC14", "wC15"
10766 fprintf (stream, wc_reg_names [INTVAL (x)]);
10768 return;
10770 /* Print a VFP double precision register name. */
10771 case 'P':
10773 int mode = GET_MODE (x);
10774 int num;
10776 if (mode != DImode && mode != DFmode)
10778 output_operand_lossage ("invalid operand for code '%c'", code);
10779 return;
10782 if (GET_CODE (x) != REG
10783 || !IS_VFP_REGNUM (REGNO (x)))
10785 output_operand_lossage ("invalid operand for code '%c'", code);
10786 return;
10789 num = REGNO(x) - FIRST_VFP_REGNUM;
10790 if (num & 1)
10792 output_operand_lossage ("invalid operand for code '%c'", code);
10793 return;
10796 fprintf (stream, "d%d", num >> 1);
10798 return;
10800 default:
10801 if (x == 0)
10803 output_operand_lossage ("missing operand");
10804 return;
10807 switch (GET_CODE (x))
10809 case REG:
10810 asm_fprintf (stream, "%r", REGNO (x));
10811 break;
10813 case MEM:
10814 output_memory_reference_mode = GET_MODE (x);
10815 output_address (XEXP (x, 0));
10816 break;
10818 case CONST_DOUBLE:
10819 fprintf (stream, "#%s", fp_immediate_constant (x));
10820 break;
10822 default:
10823 gcc_assert (GET_CODE (x) != NEG);
10824 fputc ('#', stream);
10825 output_addr_const (stream, x);
10826 break;
10831 #ifndef AOF_ASSEMBLER
10832 /* Target hook for assembling integer objects. The ARM version needs to
10833 handle word-sized values specially. */
10834 static bool
10835 arm_assemble_integer (rtx x, unsigned int size, int aligned_p)
10837 if (size == UNITS_PER_WORD && aligned_p)
10839 fputs ("\t.word\t", asm_out_file);
10840 output_addr_const (asm_out_file, x);
10842 /* Mark symbols as position independent. We only do this in the
10843 .text segment, not in the .data segment. */
10844 if (NEED_GOT_RELOC && flag_pic && making_const_table &&
10845 (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF))
10847 if (GET_CODE (x) == SYMBOL_REF
10848 && (CONSTANT_POOL_ADDRESS_P (x)
10849 || SYMBOL_REF_LOCAL_P (x)))
10850 fputs ("(GOTOFF)", asm_out_file);
10851 else if (GET_CODE (x) == LABEL_REF)
10852 fputs ("(GOTOFF)", asm_out_file);
10853 else
10854 fputs ("(GOT)", asm_out_file);
10856 fputc ('\n', asm_out_file);
10857 return true;
10860 if (arm_vector_mode_supported_p (GET_MODE (x)))
10862 int i, units;
10864 gcc_assert (GET_CODE (x) == CONST_VECTOR);
10866 units = CONST_VECTOR_NUNITS (x);
10868 switch (GET_MODE (x))
10870 case V2SImode: size = 4; break;
10871 case V4HImode: size = 2; break;
10872 case V8QImode: size = 1; break;
10873 default:
10874 gcc_unreachable ();
10877 for (i = 0; i < units; i++)
10879 rtx elt;
10881 elt = CONST_VECTOR_ELT (x, i);
10882 assemble_integer
10883 (elt, size, i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT, 1);
10886 return true;
10889 return default_assemble_integer (x, size, aligned_p);
10893 /* Add a function to the list of static constructors. */
10895 static void
10896 arm_elf_asm_constructor (rtx symbol, int priority ATTRIBUTE_UNUSED)
10898 if (!TARGET_AAPCS_BASED)
10900 default_named_section_asm_out_constructor (symbol, priority);
10901 return;
10904 /* Put these in the .init_array section, using a special relocation. */
10905 ctors_section ();
10906 assemble_align (POINTER_SIZE);
10907 fputs ("\t.word\t", asm_out_file);
10908 output_addr_const (asm_out_file, symbol);
10909 fputs ("(target1)\n", asm_out_file);
10911 #endif
10913 /* A finite state machine takes care of noticing whether or not instructions
10914 can be conditionally executed, and thus decrease execution time and code
10915 size by deleting branch instructions. The fsm is controlled by
10916 final_prescan_insn, and controls the actions of ASM_OUTPUT_OPCODE. */
10918 /* The state of the fsm controlling condition codes are:
10919 0: normal, do nothing special
10920 1: make ASM_OUTPUT_OPCODE not output this instruction
10921 2: make ASM_OUTPUT_OPCODE not output this instruction
10922 3: make instructions conditional
10923 4: make instructions conditional
10925 State transitions (state->state by whom under condition):
10926 0 -> 1 final_prescan_insn if the `target' is a label
10927 0 -> 2 final_prescan_insn if the `target' is an unconditional branch
10928 1 -> 3 ASM_OUTPUT_OPCODE after not having output the conditional branch
10929 2 -> 4 ASM_OUTPUT_OPCODE after not having output the conditional branch
10930 3 -> 0 (*targetm.asm_out.internal_label) if the `target' label is reached
10931 (the target label has CODE_LABEL_NUMBER equal to arm_target_label).
10932 4 -> 0 final_prescan_insn if the `target' unconditional branch is reached
10933 (the target insn is arm_target_insn).
10935 If the jump clobbers the conditions then we use states 2 and 4.
10937 A similar thing can be done with conditional return insns.
10939 XXX In case the `target' is an unconditional branch, this conditionalising
10940 of the instructions always reduces code size, but not always execution
10941 time. But then, I want to reduce the code size to somewhere near what
10942 /bin/cc produces. */
10944 /* Returns the index of the ARM condition code string in
10945 `arm_condition_codes'. COMPARISON should be an rtx like
10946 `(eq (...) (...))'. */
10947 static enum arm_cond_code
10948 get_arm_condition_code (rtx comparison)
10950 enum machine_mode mode = GET_MODE (XEXP (comparison, 0));
10951 int code;
10952 enum rtx_code comp_code = GET_CODE (comparison);
10954 if (GET_MODE_CLASS (mode) != MODE_CC)
10955 mode = SELECT_CC_MODE (comp_code, XEXP (comparison, 0),
10956 XEXP (comparison, 1));
10958 switch (mode)
10960 case CC_DNEmode: code = ARM_NE; goto dominance;
10961 case CC_DEQmode: code = ARM_EQ; goto dominance;
10962 case CC_DGEmode: code = ARM_GE; goto dominance;
10963 case CC_DGTmode: code = ARM_GT; goto dominance;
10964 case CC_DLEmode: code = ARM_LE; goto dominance;
10965 case CC_DLTmode: code = ARM_LT; goto dominance;
10966 case CC_DGEUmode: code = ARM_CS; goto dominance;
10967 case CC_DGTUmode: code = ARM_HI; goto dominance;
10968 case CC_DLEUmode: code = ARM_LS; goto dominance;
10969 case CC_DLTUmode: code = ARM_CC;
10971 dominance:
10972 gcc_assert (comp_code == EQ || comp_code == NE);
10974 if (comp_code == EQ)
10975 return ARM_INVERSE_CONDITION_CODE (code);
10976 return code;
10978 case CC_NOOVmode:
10979 switch (comp_code)
10981 case NE: return ARM_NE;
10982 case EQ: return ARM_EQ;
10983 case GE: return ARM_PL;
10984 case LT: return ARM_MI;
10985 default: gcc_unreachable ();
10988 case CC_Zmode:
10989 switch (comp_code)
10991 case NE: return ARM_NE;
10992 case EQ: return ARM_EQ;
10993 default: gcc_unreachable ();
10996 case CC_Nmode:
10997 switch (comp_code)
10999 case NE: return ARM_MI;
11000 case EQ: return ARM_PL;
11001 default: gcc_unreachable ();
11004 case CCFPEmode:
11005 case CCFPmode:
11006 /* These encodings assume that AC=1 in the FPA system control
11007 byte. This allows us to handle all cases except UNEQ and
11008 LTGT. */
11009 switch (comp_code)
11011 case GE: return ARM_GE;
11012 case GT: return ARM_GT;
11013 case LE: return ARM_LS;
11014 case LT: return ARM_MI;
11015 case NE: return ARM_NE;
11016 case EQ: return ARM_EQ;
11017 case ORDERED: return ARM_VC;
11018 case UNORDERED: return ARM_VS;
11019 case UNLT: return ARM_LT;
11020 case UNLE: return ARM_LE;
11021 case UNGT: return ARM_HI;
11022 case UNGE: return ARM_PL;
11023 /* UNEQ and LTGT do not have a representation. */
11024 case UNEQ: /* Fall through. */
11025 case LTGT: /* Fall through. */
11026 default: gcc_unreachable ();
11029 case CC_SWPmode:
11030 switch (comp_code)
11032 case NE: return ARM_NE;
11033 case EQ: return ARM_EQ;
11034 case GE: return ARM_LE;
11035 case GT: return ARM_LT;
11036 case LE: return ARM_GE;
11037 case LT: return ARM_GT;
11038 case GEU: return ARM_LS;
11039 case GTU: return ARM_CC;
11040 case LEU: return ARM_CS;
11041 case LTU: return ARM_HI;
11042 default: gcc_unreachable ();
11045 case CC_Cmode:
11046 switch (comp_code)
11048 case LTU: return ARM_CS;
11049 case GEU: return ARM_CC;
11050 default: gcc_unreachable ();
11053 case CCmode:
11054 switch (comp_code)
11056 case NE: return ARM_NE;
11057 case EQ: return ARM_EQ;
11058 case GE: return ARM_GE;
11059 case GT: return ARM_GT;
11060 case LE: return ARM_LE;
11061 case LT: return ARM_LT;
11062 case GEU: return ARM_CS;
11063 case GTU: return ARM_HI;
11064 case LEU: return ARM_LS;
11065 case LTU: return ARM_CC;
11066 default: gcc_unreachable ();
11069 default: gcc_unreachable ();
11073 void
11074 arm_final_prescan_insn (rtx insn)
11076 /* BODY will hold the body of INSN. */
11077 rtx body = PATTERN (insn);
11079 /* This will be 1 if trying to repeat the trick, and things need to be
11080 reversed if it appears to fail. */
11081 int reverse = 0;
11083 /* JUMP_CLOBBERS will be one implies that the conditions if a branch is
11084 taken are clobbered, even if the rtl suggests otherwise. It also
11085 means that we have to grub around within the jump expression to find
11086 out what the conditions are when the jump isn't taken. */
11087 int jump_clobbers = 0;
11089 /* If we start with a return insn, we only succeed if we find another one. */
11090 int seeking_return = 0;
11092 /* START_INSN will hold the insn from where we start looking. This is the
11093 first insn after the following code_label if REVERSE is true. */
11094 rtx start_insn = insn;
11096 /* If in state 4, check if the target branch is reached, in order to
11097 change back to state 0. */
11098 if (arm_ccfsm_state == 4)
11100 if (insn == arm_target_insn)
11102 arm_target_insn = NULL;
11103 arm_ccfsm_state = 0;
11105 return;
11108 /* If in state 3, it is possible to repeat the trick, if this insn is an
11109 unconditional branch to a label, and immediately following this branch
11110 is the previous target label which is only used once, and the label this
11111 branch jumps to is not too far off. */
11112 if (arm_ccfsm_state == 3)
11114 if (simplejump_p (insn))
11116 start_insn = next_nonnote_insn (start_insn);
11117 if (GET_CODE (start_insn) == BARRIER)
11119 /* XXX Isn't this always a barrier? */
11120 start_insn = next_nonnote_insn (start_insn);
11122 if (GET_CODE (start_insn) == CODE_LABEL
11123 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
11124 && LABEL_NUSES (start_insn) == 1)
11125 reverse = TRUE;
11126 else
11127 return;
11129 else if (GET_CODE (body) == RETURN)
11131 start_insn = next_nonnote_insn (start_insn);
11132 if (GET_CODE (start_insn) == BARRIER)
11133 start_insn = next_nonnote_insn (start_insn);
11134 if (GET_CODE (start_insn) == CODE_LABEL
11135 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
11136 && LABEL_NUSES (start_insn) == 1)
11138 reverse = TRUE;
11139 seeking_return = 1;
11141 else
11142 return;
11144 else
11145 return;
11148 gcc_assert (!arm_ccfsm_state || reverse);
11149 if (GET_CODE (insn) != JUMP_INSN)
11150 return;
11152 /* This jump might be paralleled with a clobber of the condition codes
11153 the jump should always come first */
11154 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
11155 body = XVECEXP (body, 0, 0);
11157 if (reverse
11158 || (GET_CODE (body) == SET && GET_CODE (SET_DEST (body)) == PC
11159 && GET_CODE (SET_SRC (body)) == IF_THEN_ELSE))
11161 int insns_skipped;
11162 int fail = FALSE, succeed = FALSE;
11163 /* Flag which part of the IF_THEN_ELSE is the LABEL_REF. */
11164 int then_not_else = TRUE;
11165 rtx this_insn = start_insn, label = 0;
11167 /* If the jump cannot be done with one instruction, we cannot
11168 conditionally execute the instruction in the inverse case. */
11169 if (get_attr_conds (insn) == CONDS_JUMP_CLOB)
11171 jump_clobbers = 1;
11172 return;
11175 /* Register the insn jumped to. */
11176 if (reverse)
11178 if (!seeking_return)
11179 label = XEXP (SET_SRC (body), 0);
11181 else if (GET_CODE (XEXP (SET_SRC (body), 1)) == LABEL_REF)
11182 label = XEXP (XEXP (SET_SRC (body), 1), 0);
11183 else if (GET_CODE (XEXP (SET_SRC (body), 2)) == LABEL_REF)
11185 label = XEXP (XEXP (SET_SRC (body), 2), 0);
11186 then_not_else = FALSE;
11188 else if (GET_CODE (XEXP (SET_SRC (body), 1)) == RETURN)
11189 seeking_return = 1;
11190 else if (GET_CODE (XEXP (SET_SRC (body), 2)) == RETURN)
11192 seeking_return = 1;
11193 then_not_else = FALSE;
11195 else
11196 gcc_unreachable ();
11198 /* See how many insns this branch skips, and what kind of insns. If all
11199 insns are okay, and the label or unconditional branch to the same
11200 label is not too far away, succeed. */
11201 for (insns_skipped = 0;
11202 !fail && !succeed && insns_skipped++ < max_insns_skipped;)
11204 rtx scanbody;
11206 this_insn = next_nonnote_insn (this_insn);
11207 if (!this_insn)
11208 break;
11210 switch (GET_CODE (this_insn))
11212 case CODE_LABEL:
11213 /* Succeed if it is the target label, otherwise fail since
11214 control falls in from somewhere else. */
11215 if (this_insn == label)
11217 if (jump_clobbers)
11219 arm_ccfsm_state = 2;
11220 this_insn = next_nonnote_insn (this_insn);
11222 else
11223 arm_ccfsm_state = 1;
11224 succeed = TRUE;
11226 else
11227 fail = TRUE;
11228 break;
11230 case BARRIER:
11231 /* Succeed if the following insn is the target label.
11232 Otherwise fail.
11233 If return insns are used then the last insn in a function
11234 will be a barrier. */
11235 this_insn = next_nonnote_insn (this_insn);
11236 if (this_insn && this_insn == label)
11238 if (jump_clobbers)
11240 arm_ccfsm_state = 2;
11241 this_insn = next_nonnote_insn (this_insn);
11243 else
11244 arm_ccfsm_state = 1;
11245 succeed = TRUE;
11247 else
11248 fail = TRUE;
11249 break;
11251 case CALL_INSN:
11252 /* The AAPCS says that conditional calls should not be
11253 used since they make interworking inefficient (the
11254 linker can't transform BL<cond> into BLX). That's
11255 only a problem if the machine has BLX. */
11256 if (arm_arch5)
11258 fail = TRUE;
11259 break;
11262 /* Succeed if the following insn is the target label, or
11263 if the following two insns are a barrier and the
11264 target label. */
11265 this_insn = next_nonnote_insn (this_insn);
11266 if (this_insn && GET_CODE (this_insn) == BARRIER)
11267 this_insn = next_nonnote_insn (this_insn);
11269 if (this_insn && this_insn == label
11270 && insns_skipped < max_insns_skipped)
11272 if (jump_clobbers)
11274 arm_ccfsm_state = 2;
11275 this_insn = next_nonnote_insn (this_insn);
11277 else
11278 arm_ccfsm_state = 1;
11279 succeed = TRUE;
11281 else
11282 fail = TRUE;
11283 break;
11285 case JUMP_INSN:
11286 /* If this is an unconditional branch to the same label, succeed.
11287 If it is to another label, do nothing. If it is conditional,
11288 fail. */
11289 /* XXX Probably, the tests for SET and the PC are
11290 unnecessary. */
11292 scanbody = PATTERN (this_insn);
11293 if (GET_CODE (scanbody) == SET
11294 && GET_CODE (SET_DEST (scanbody)) == PC)
11296 if (GET_CODE (SET_SRC (scanbody)) == LABEL_REF
11297 && XEXP (SET_SRC (scanbody), 0) == label && !reverse)
11299 arm_ccfsm_state = 2;
11300 succeed = TRUE;
11302 else if (GET_CODE (SET_SRC (scanbody)) == IF_THEN_ELSE)
11303 fail = TRUE;
11305 /* Fail if a conditional return is undesirable (e.g. on a
11306 StrongARM), but still allow this if optimizing for size. */
11307 else if (GET_CODE (scanbody) == RETURN
11308 && !use_return_insn (TRUE, NULL)
11309 && !optimize_size)
11310 fail = TRUE;
11311 else if (GET_CODE (scanbody) == RETURN
11312 && seeking_return)
11314 arm_ccfsm_state = 2;
11315 succeed = TRUE;
11317 else if (GET_CODE (scanbody) == PARALLEL)
11319 switch (get_attr_conds (this_insn))
11321 case CONDS_NOCOND:
11322 break;
11323 default:
11324 fail = TRUE;
11325 break;
11328 else
11329 fail = TRUE; /* Unrecognized jump (e.g. epilogue). */
11331 break;
11333 case INSN:
11334 /* Instructions using or affecting the condition codes make it
11335 fail. */
11336 scanbody = PATTERN (this_insn);
11337 if (!(GET_CODE (scanbody) == SET
11338 || GET_CODE (scanbody) == PARALLEL)
11339 || get_attr_conds (this_insn) != CONDS_NOCOND)
11340 fail = TRUE;
11342 /* A conditional cirrus instruction must be followed by
11343 a non Cirrus instruction. However, since we
11344 conditionalize instructions in this function and by
11345 the time we get here we can't add instructions
11346 (nops), because shorten_branches() has already been
11347 called, we will disable conditionalizing Cirrus
11348 instructions to be safe. */
11349 if (GET_CODE (scanbody) != USE
11350 && GET_CODE (scanbody) != CLOBBER
11351 && get_attr_cirrus (this_insn) != CIRRUS_NOT)
11352 fail = TRUE;
11353 break;
11355 default:
11356 break;
11359 if (succeed)
11361 if ((!seeking_return) && (arm_ccfsm_state == 1 || reverse))
11362 arm_target_label = CODE_LABEL_NUMBER (label);
11363 else
11365 gcc_assert (seeking_return || arm_ccfsm_state == 2);
11367 while (this_insn && GET_CODE (PATTERN (this_insn)) == USE)
11369 this_insn = next_nonnote_insn (this_insn);
11370 gcc_assert (!this_insn
11371 || (GET_CODE (this_insn) != BARRIER
11372 && GET_CODE (this_insn) != CODE_LABEL));
11374 if (!this_insn)
11376 /* Oh, dear! we ran off the end.. give up. */
11377 recog (PATTERN (insn), insn, NULL);
11378 arm_ccfsm_state = 0;
11379 arm_target_insn = NULL;
11380 return;
11382 arm_target_insn = this_insn;
11384 if (jump_clobbers)
11386 gcc_assert (!reverse);
11387 arm_current_cc =
11388 get_arm_condition_code (XEXP (XEXP (XEXP (SET_SRC (body),
11389 0), 0), 1));
11390 if (GET_CODE (XEXP (XEXP (SET_SRC (body), 0), 0)) == AND)
11391 arm_current_cc = ARM_INVERSE_CONDITION_CODE (arm_current_cc);
11392 if (GET_CODE (XEXP (SET_SRC (body), 0)) == NE)
11393 arm_current_cc = ARM_INVERSE_CONDITION_CODE (arm_current_cc);
11395 else
11397 /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from
11398 what it was. */
11399 if (!reverse)
11400 arm_current_cc = get_arm_condition_code (XEXP (SET_SRC (body),
11401 0));
11404 if (reverse || then_not_else)
11405 arm_current_cc = ARM_INVERSE_CONDITION_CODE (arm_current_cc);
11408 /* Restore recog_data (getting the attributes of other insns can
11409 destroy this array, but final.c assumes that it remains intact
11410 across this call; since the insn has been recognized already we
11411 call recog direct). */
11412 recog (PATTERN (insn), insn, NULL);
11416 /* Returns true if REGNO is a valid register
11417 for holding a quantity of type MODE. */
11419 arm_hard_regno_mode_ok (unsigned int regno, enum machine_mode mode)
11421 if (GET_MODE_CLASS (mode) == MODE_CC)
11422 return regno == CC_REGNUM || regno == VFPCC_REGNUM;
11424 if (TARGET_THUMB)
11425 /* For the Thumb we only allow values bigger than SImode in
11426 registers 0 - 6, so that there is always a second low
11427 register available to hold the upper part of the value.
11428 We probably we ought to ensure that the register is the
11429 start of an even numbered register pair. */
11430 return (ARM_NUM_REGS (mode) < 2) || (regno < LAST_LO_REGNUM);
11432 if (IS_CIRRUS_REGNUM (regno))
11433 /* We have outlawed SI values in Cirrus registers because they
11434 reside in the lower 32 bits, but SF values reside in the
11435 upper 32 bits. This causes gcc all sorts of grief. We can't
11436 even split the registers into pairs because Cirrus SI values
11437 get sign extended to 64bits-- aldyh. */
11438 return (GET_MODE_CLASS (mode) == MODE_FLOAT) || (mode == DImode);
11440 if (IS_VFP_REGNUM (regno))
11442 if (mode == SFmode || mode == SImode)
11443 return TRUE;
11445 /* DFmode values are only valid in even register pairs. */
11446 if (mode == DFmode)
11447 return ((regno - FIRST_VFP_REGNUM) & 1) == 0;
11448 return FALSE;
11451 if (IS_IWMMXT_GR_REGNUM (regno))
11452 return mode == SImode;
11454 if (IS_IWMMXT_REGNUM (regno))
11455 return VALID_IWMMXT_REG_MODE (mode);
11457 /* We allow any value to be stored in the general registers.
11458 Restrict doubleword quantities to even register pairs so that we can
11459 use ldrd. */
11460 if (regno <= LAST_ARM_REGNUM)
11461 return !(TARGET_LDRD && GET_MODE_SIZE (mode) > 4 && (regno & 1) != 0);
11463 if ( regno == FRAME_POINTER_REGNUM
11464 || regno == ARG_POINTER_REGNUM)
11465 /* We only allow integers in the fake hard registers. */
11466 return GET_MODE_CLASS (mode) == MODE_INT;
11468 /* The only registers left are the FPA registers
11469 which we only allow to hold FP values. */
11470 return GET_MODE_CLASS (mode) == MODE_FLOAT
11471 && regno >= FIRST_FPA_REGNUM
11472 && regno <= LAST_FPA_REGNUM;
11476 arm_regno_class (int regno)
11478 if (TARGET_THUMB)
11480 if (regno == STACK_POINTER_REGNUM)
11481 return STACK_REG;
11482 if (regno == CC_REGNUM)
11483 return CC_REG;
11484 if (regno < 8)
11485 return LO_REGS;
11486 return HI_REGS;
11489 if ( regno <= LAST_ARM_REGNUM
11490 || regno == FRAME_POINTER_REGNUM
11491 || regno == ARG_POINTER_REGNUM)
11492 return GENERAL_REGS;
11494 if (regno == CC_REGNUM || regno == VFPCC_REGNUM)
11495 return NO_REGS;
11497 if (IS_CIRRUS_REGNUM (regno))
11498 return CIRRUS_REGS;
11500 if (IS_VFP_REGNUM (regno))
11501 return VFP_REGS;
11503 if (IS_IWMMXT_REGNUM (regno))
11504 return IWMMXT_REGS;
11506 if (IS_IWMMXT_GR_REGNUM (regno))
11507 return IWMMXT_GR_REGS;
11509 return FPA_REGS;
11512 /* Handle a special case when computing the offset
11513 of an argument from the frame pointer. */
11515 arm_debugger_arg_offset (int value, rtx addr)
11517 rtx insn;
11519 /* We are only interested if dbxout_parms() failed to compute the offset. */
11520 if (value != 0)
11521 return 0;
11523 /* We can only cope with the case where the address is held in a register. */
11524 if (GET_CODE (addr) != REG)
11525 return 0;
11527 /* If we are using the frame pointer to point at the argument, then
11528 an offset of 0 is correct. */
11529 if (REGNO (addr) == (unsigned) HARD_FRAME_POINTER_REGNUM)
11530 return 0;
11532 /* If we are using the stack pointer to point at the
11533 argument, then an offset of 0 is correct. */
11534 if ((TARGET_THUMB || !frame_pointer_needed)
11535 && REGNO (addr) == SP_REGNUM)
11536 return 0;
11538 /* Oh dear. The argument is pointed to by a register rather
11539 than being held in a register, or being stored at a known
11540 offset from the frame pointer. Since GDB only understands
11541 those two kinds of argument we must translate the address
11542 held in the register into an offset from the frame pointer.
11543 We do this by searching through the insns for the function
11544 looking to see where this register gets its value. If the
11545 register is initialized from the frame pointer plus an offset
11546 then we are in luck and we can continue, otherwise we give up.
11548 This code is exercised by producing debugging information
11549 for a function with arguments like this:
11551 double func (double a, double b, int c, double d) {return d;}
11553 Without this code the stab for parameter 'd' will be set to
11554 an offset of 0 from the frame pointer, rather than 8. */
11556 /* The if() statement says:
11558 If the insn is a normal instruction
11559 and if the insn is setting the value in a register
11560 and if the register being set is the register holding the address of the argument
11561 and if the address is computing by an addition
11562 that involves adding to a register
11563 which is the frame pointer
11564 a constant integer
11566 then... */
11568 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
11570 if ( GET_CODE (insn) == INSN
11571 && GET_CODE (PATTERN (insn)) == SET
11572 && REGNO (XEXP (PATTERN (insn), 0)) == REGNO (addr)
11573 && GET_CODE (XEXP (PATTERN (insn), 1)) == PLUS
11574 && GET_CODE (XEXP (XEXP (PATTERN (insn), 1), 0)) == REG
11575 && REGNO (XEXP (XEXP (PATTERN (insn), 1), 0)) == (unsigned) HARD_FRAME_POINTER_REGNUM
11576 && GET_CODE (XEXP (XEXP (PATTERN (insn), 1), 1)) == CONST_INT
11579 value = INTVAL (XEXP (XEXP (PATTERN (insn), 1), 1));
11581 break;
11585 if (value == 0)
11587 debug_rtx (addr);
11588 warning (0, "unable to compute real location of stacked parameter");
11589 value = 8; /* XXX magic hack */
11592 return value;
11595 #define def_mbuiltin(MASK, NAME, TYPE, CODE) \
11596 do \
11598 if ((MASK) & insn_flags) \
11599 lang_hooks.builtin_function ((NAME), (TYPE), (CODE), \
11600 BUILT_IN_MD, NULL, NULL_TREE); \
11602 while (0)
11604 struct builtin_description
11606 const unsigned int mask;
11607 const enum insn_code icode;
11608 const char * const name;
11609 const enum arm_builtins code;
11610 const enum rtx_code comparison;
11611 const unsigned int flag;
11614 static const struct builtin_description bdesc_2arg[] =
11616 #define IWMMXT_BUILTIN(code, string, builtin) \
11617 { FL_IWMMXT, CODE_FOR_##code, "__builtin_arm_" string, \
11618 ARM_BUILTIN_##builtin, 0, 0 },
11620 IWMMXT_BUILTIN (addv8qi3, "waddb", WADDB)
11621 IWMMXT_BUILTIN (addv4hi3, "waddh", WADDH)
11622 IWMMXT_BUILTIN (addv2si3, "waddw", WADDW)
11623 IWMMXT_BUILTIN (subv8qi3, "wsubb", WSUBB)
11624 IWMMXT_BUILTIN (subv4hi3, "wsubh", WSUBH)
11625 IWMMXT_BUILTIN (subv2si3, "wsubw", WSUBW)
11626 IWMMXT_BUILTIN (ssaddv8qi3, "waddbss", WADDSSB)
11627 IWMMXT_BUILTIN (ssaddv4hi3, "waddhss", WADDSSH)
11628 IWMMXT_BUILTIN (ssaddv2si3, "waddwss", WADDSSW)
11629 IWMMXT_BUILTIN (sssubv8qi3, "wsubbss", WSUBSSB)
11630 IWMMXT_BUILTIN (sssubv4hi3, "wsubhss", WSUBSSH)
11631 IWMMXT_BUILTIN (sssubv2si3, "wsubwss", WSUBSSW)
11632 IWMMXT_BUILTIN (usaddv8qi3, "waddbus", WADDUSB)
11633 IWMMXT_BUILTIN (usaddv4hi3, "waddhus", WADDUSH)
11634 IWMMXT_BUILTIN (usaddv2si3, "waddwus", WADDUSW)
11635 IWMMXT_BUILTIN (ussubv8qi3, "wsubbus", WSUBUSB)
11636 IWMMXT_BUILTIN (ussubv4hi3, "wsubhus", WSUBUSH)
11637 IWMMXT_BUILTIN (ussubv2si3, "wsubwus", WSUBUSW)
11638 IWMMXT_BUILTIN (mulv4hi3, "wmulul", WMULUL)
11639 IWMMXT_BUILTIN (smulv4hi3_highpart, "wmulsm", WMULSM)
11640 IWMMXT_BUILTIN (umulv4hi3_highpart, "wmulum", WMULUM)
11641 IWMMXT_BUILTIN (eqv8qi3, "wcmpeqb", WCMPEQB)
11642 IWMMXT_BUILTIN (eqv4hi3, "wcmpeqh", WCMPEQH)
11643 IWMMXT_BUILTIN (eqv2si3, "wcmpeqw", WCMPEQW)
11644 IWMMXT_BUILTIN (gtuv8qi3, "wcmpgtub", WCMPGTUB)
11645 IWMMXT_BUILTIN (gtuv4hi3, "wcmpgtuh", WCMPGTUH)
11646 IWMMXT_BUILTIN (gtuv2si3, "wcmpgtuw", WCMPGTUW)
11647 IWMMXT_BUILTIN (gtv8qi3, "wcmpgtsb", WCMPGTSB)
11648 IWMMXT_BUILTIN (gtv4hi3, "wcmpgtsh", WCMPGTSH)
11649 IWMMXT_BUILTIN (gtv2si3, "wcmpgtsw", WCMPGTSW)
11650 IWMMXT_BUILTIN (umaxv8qi3, "wmaxub", WMAXUB)
11651 IWMMXT_BUILTIN (smaxv8qi3, "wmaxsb", WMAXSB)
11652 IWMMXT_BUILTIN (umaxv4hi3, "wmaxuh", WMAXUH)
11653 IWMMXT_BUILTIN (smaxv4hi3, "wmaxsh", WMAXSH)
11654 IWMMXT_BUILTIN (umaxv2si3, "wmaxuw", WMAXUW)
11655 IWMMXT_BUILTIN (smaxv2si3, "wmaxsw", WMAXSW)
11656 IWMMXT_BUILTIN (uminv8qi3, "wminub", WMINUB)
11657 IWMMXT_BUILTIN (sminv8qi3, "wminsb", WMINSB)
11658 IWMMXT_BUILTIN (uminv4hi3, "wminuh", WMINUH)
11659 IWMMXT_BUILTIN (sminv4hi3, "wminsh", WMINSH)
11660 IWMMXT_BUILTIN (uminv2si3, "wminuw", WMINUW)
11661 IWMMXT_BUILTIN (sminv2si3, "wminsw", WMINSW)
11662 IWMMXT_BUILTIN (iwmmxt_anddi3, "wand", WAND)
11663 IWMMXT_BUILTIN (iwmmxt_nanddi3, "wandn", WANDN)
11664 IWMMXT_BUILTIN (iwmmxt_iordi3, "wor", WOR)
11665 IWMMXT_BUILTIN (iwmmxt_xordi3, "wxor", WXOR)
11666 IWMMXT_BUILTIN (iwmmxt_uavgv8qi3, "wavg2b", WAVG2B)
11667 IWMMXT_BUILTIN (iwmmxt_uavgv4hi3, "wavg2h", WAVG2H)
11668 IWMMXT_BUILTIN (iwmmxt_uavgrndv8qi3, "wavg2br", WAVG2BR)
11669 IWMMXT_BUILTIN (iwmmxt_uavgrndv4hi3, "wavg2hr", WAVG2HR)
11670 IWMMXT_BUILTIN (iwmmxt_wunpckilb, "wunpckilb", WUNPCKILB)
11671 IWMMXT_BUILTIN (iwmmxt_wunpckilh, "wunpckilh", WUNPCKILH)
11672 IWMMXT_BUILTIN (iwmmxt_wunpckilw, "wunpckilw", WUNPCKILW)
11673 IWMMXT_BUILTIN (iwmmxt_wunpckihb, "wunpckihb", WUNPCKIHB)
11674 IWMMXT_BUILTIN (iwmmxt_wunpckihh, "wunpckihh", WUNPCKIHH)
11675 IWMMXT_BUILTIN (iwmmxt_wunpckihw, "wunpckihw", WUNPCKIHW)
11676 IWMMXT_BUILTIN (iwmmxt_wmadds, "wmadds", WMADDS)
11677 IWMMXT_BUILTIN (iwmmxt_wmaddu, "wmaddu", WMADDU)
11679 #define IWMMXT_BUILTIN2(code, builtin) \
11680 { FL_IWMMXT, CODE_FOR_##code, NULL, ARM_BUILTIN_##builtin, 0, 0 },
11682 IWMMXT_BUILTIN2 (iwmmxt_wpackhss, WPACKHSS)
11683 IWMMXT_BUILTIN2 (iwmmxt_wpackwss, WPACKWSS)
11684 IWMMXT_BUILTIN2 (iwmmxt_wpackdss, WPACKDSS)
11685 IWMMXT_BUILTIN2 (iwmmxt_wpackhus, WPACKHUS)
11686 IWMMXT_BUILTIN2 (iwmmxt_wpackwus, WPACKWUS)
11687 IWMMXT_BUILTIN2 (iwmmxt_wpackdus, WPACKDUS)
11688 IWMMXT_BUILTIN2 (ashlv4hi3_di, WSLLH)
11689 IWMMXT_BUILTIN2 (ashlv4hi3, WSLLHI)
11690 IWMMXT_BUILTIN2 (ashlv2si3_di, WSLLW)
11691 IWMMXT_BUILTIN2 (ashlv2si3, WSLLWI)
11692 IWMMXT_BUILTIN2 (ashldi3_di, WSLLD)
11693 IWMMXT_BUILTIN2 (ashldi3_iwmmxt, WSLLDI)
11694 IWMMXT_BUILTIN2 (lshrv4hi3_di, WSRLH)
11695 IWMMXT_BUILTIN2 (lshrv4hi3, WSRLHI)
11696 IWMMXT_BUILTIN2 (lshrv2si3_di, WSRLW)
11697 IWMMXT_BUILTIN2 (lshrv2si3, WSRLWI)
11698 IWMMXT_BUILTIN2 (lshrdi3_di, WSRLD)
11699 IWMMXT_BUILTIN2 (lshrdi3_iwmmxt, WSRLDI)
11700 IWMMXT_BUILTIN2 (ashrv4hi3_di, WSRAH)
11701 IWMMXT_BUILTIN2 (ashrv4hi3, WSRAHI)
11702 IWMMXT_BUILTIN2 (ashrv2si3_di, WSRAW)
11703 IWMMXT_BUILTIN2 (ashrv2si3, WSRAWI)
11704 IWMMXT_BUILTIN2 (ashrdi3_di, WSRAD)
11705 IWMMXT_BUILTIN2 (ashrdi3_iwmmxt, WSRADI)
11706 IWMMXT_BUILTIN2 (rorv4hi3_di, WRORH)
11707 IWMMXT_BUILTIN2 (rorv4hi3, WRORHI)
11708 IWMMXT_BUILTIN2 (rorv2si3_di, WRORW)
11709 IWMMXT_BUILTIN2 (rorv2si3, WRORWI)
11710 IWMMXT_BUILTIN2 (rordi3_di, WRORD)
11711 IWMMXT_BUILTIN2 (rordi3, WRORDI)
11712 IWMMXT_BUILTIN2 (iwmmxt_wmacuz, WMACUZ)
11713 IWMMXT_BUILTIN2 (iwmmxt_wmacsz, WMACSZ)
11716 static const struct builtin_description bdesc_1arg[] =
11718 IWMMXT_BUILTIN (iwmmxt_tmovmskb, "tmovmskb", TMOVMSKB)
11719 IWMMXT_BUILTIN (iwmmxt_tmovmskh, "tmovmskh", TMOVMSKH)
11720 IWMMXT_BUILTIN (iwmmxt_tmovmskw, "tmovmskw", TMOVMSKW)
11721 IWMMXT_BUILTIN (iwmmxt_waccb, "waccb", WACCB)
11722 IWMMXT_BUILTIN (iwmmxt_wacch, "wacch", WACCH)
11723 IWMMXT_BUILTIN (iwmmxt_waccw, "waccw", WACCW)
11724 IWMMXT_BUILTIN (iwmmxt_wunpckehub, "wunpckehub", WUNPCKEHUB)
11725 IWMMXT_BUILTIN (iwmmxt_wunpckehuh, "wunpckehuh", WUNPCKEHUH)
11726 IWMMXT_BUILTIN (iwmmxt_wunpckehuw, "wunpckehuw", WUNPCKEHUW)
11727 IWMMXT_BUILTIN (iwmmxt_wunpckehsb, "wunpckehsb", WUNPCKEHSB)
11728 IWMMXT_BUILTIN (iwmmxt_wunpckehsh, "wunpckehsh", WUNPCKEHSH)
11729 IWMMXT_BUILTIN (iwmmxt_wunpckehsw, "wunpckehsw", WUNPCKEHSW)
11730 IWMMXT_BUILTIN (iwmmxt_wunpckelub, "wunpckelub", WUNPCKELUB)
11731 IWMMXT_BUILTIN (iwmmxt_wunpckeluh, "wunpckeluh", WUNPCKELUH)
11732 IWMMXT_BUILTIN (iwmmxt_wunpckeluw, "wunpckeluw", WUNPCKELUW)
11733 IWMMXT_BUILTIN (iwmmxt_wunpckelsb, "wunpckelsb", WUNPCKELSB)
11734 IWMMXT_BUILTIN (iwmmxt_wunpckelsh, "wunpckelsh", WUNPCKELSH)
11735 IWMMXT_BUILTIN (iwmmxt_wunpckelsw, "wunpckelsw", WUNPCKELSW)
11738 /* Set up all the iWMMXt builtins. This is
11739 not called if TARGET_IWMMXT is zero. */
11741 static void
11742 arm_init_iwmmxt_builtins (void)
11744 const struct builtin_description * d;
11745 size_t i;
11746 tree endlink = void_list_node;
11748 tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
11749 tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
11750 tree V8QI_type_node = build_vector_type_for_mode (intQI_type_node, V8QImode);
11752 tree int_ftype_int
11753 = build_function_type (integer_type_node,
11754 tree_cons (NULL_TREE, integer_type_node, endlink));
11755 tree v8qi_ftype_v8qi_v8qi_int
11756 = build_function_type (V8QI_type_node,
11757 tree_cons (NULL_TREE, V8QI_type_node,
11758 tree_cons (NULL_TREE, V8QI_type_node,
11759 tree_cons (NULL_TREE,
11760 integer_type_node,
11761 endlink))));
11762 tree v4hi_ftype_v4hi_int
11763 = build_function_type (V4HI_type_node,
11764 tree_cons (NULL_TREE, V4HI_type_node,
11765 tree_cons (NULL_TREE, integer_type_node,
11766 endlink)));
11767 tree v2si_ftype_v2si_int
11768 = build_function_type (V2SI_type_node,
11769 tree_cons (NULL_TREE, V2SI_type_node,
11770 tree_cons (NULL_TREE, integer_type_node,
11771 endlink)));
11772 tree v2si_ftype_di_di
11773 = build_function_type (V2SI_type_node,
11774 tree_cons (NULL_TREE, long_long_integer_type_node,
11775 tree_cons (NULL_TREE, long_long_integer_type_node,
11776 endlink)));
11777 tree di_ftype_di_int
11778 = build_function_type (long_long_integer_type_node,
11779 tree_cons (NULL_TREE, long_long_integer_type_node,
11780 tree_cons (NULL_TREE, integer_type_node,
11781 endlink)));
11782 tree di_ftype_di_int_int
11783 = build_function_type (long_long_integer_type_node,
11784 tree_cons (NULL_TREE, long_long_integer_type_node,
11785 tree_cons (NULL_TREE, integer_type_node,
11786 tree_cons (NULL_TREE,
11787 integer_type_node,
11788 endlink))));
11789 tree int_ftype_v8qi
11790 = build_function_type (integer_type_node,
11791 tree_cons (NULL_TREE, V8QI_type_node,
11792 endlink));
11793 tree int_ftype_v4hi
11794 = build_function_type (integer_type_node,
11795 tree_cons (NULL_TREE, V4HI_type_node,
11796 endlink));
11797 tree int_ftype_v2si
11798 = build_function_type (integer_type_node,
11799 tree_cons (NULL_TREE, V2SI_type_node,
11800 endlink));
11801 tree int_ftype_v8qi_int
11802 = build_function_type (integer_type_node,
11803 tree_cons (NULL_TREE, V8QI_type_node,
11804 tree_cons (NULL_TREE, integer_type_node,
11805 endlink)));
11806 tree int_ftype_v4hi_int
11807 = build_function_type (integer_type_node,
11808 tree_cons (NULL_TREE, V4HI_type_node,
11809 tree_cons (NULL_TREE, integer_type_node,
11810 endlink)));
11811 tree int_ftype_v2si_int
11812 = build_function_type (integer_type_node,
11813 tree_cons (NULL_TREE, V2SI_type_node,
11814 tree_cons (NULL_TREE, integer_type_node,
11815 endlink)));
11816 tree v8qi_ftype_v8qi_int_int
11817 = build_function_type (V8QI_type_node,
11818 tree_cons (NULL_TREE, V8QI_type_node,
11819 tree_cons (NULL_TREE, integer_type_node,
11820 tree_cons (NULL_TREE,
11821 integer_type_node,
11822 endlink))));
11823 tree v4hi_ftype_v4hi_int_int
11824 = build_function_type (V4HI_type_node,
11825 tree_cons (NULL_TREE, V4HI_type_node,
11826 tree_cons (NULL_TREE, integer_type_node,
11827 tree_cons (NULL_TREE,
11828 integer_type_node,
11829 endlink))));
11830 tree v2si_ftype_v2si_int_int
11831 = build_function_type (V2SI_type_node,
11832 tree_cons (NULL_TREE, V2SI_type_node,
11833 tree_cons (NULL_TREE, integer_type_node,
11834 tree_cons (NULL_TREE,
11835 integer_type_node,
11836 endlink))));
11837 /* Miscellaneous. */
11838 tree v8qi_ftype_v4hi_v4hi
11839 = build_function_type (V8QI_type_node,
11840 tree_cons (NULL_TREE, V4HI_type_node,
11841 tree_cons (NULL_TREE, V4HI_type_node,
11842 endlink)));
11843 tree v4hi_ftype_v2si_v2si
11844 = build_function_type (V4HI_type_node,
11845 tree_cons (NULL_TREE, V2SI_type_node,
11846 tree_cons (NULL_TREE, V2SI_type_node,
11847 endlink)));
11848 tree v2si_ftype_v4hi_v4hi
11849 = build_function_type (V2SI_type_node,
11850 tree_cons (NULL_TREE, V4HI_type_node,
11851 tree_cons (NULL_TREE, V4HI_type_node,
11852 endlink)));
11853 tree v2si_ftype_v8qi_v8qi
11854 = build_function_type (V2SI_type_node,
11855 tree_cons (NULL_TREE, V8QI_type_node,
11856 tree_cons (NULL_TREE, V8QI_type_node,
11857 endlink)));
11858 tree v4hi_ftype_v4hi_di
11859 = build_function_type (V4HI_type_node,
11860 tree_cons (NULL_TREE, V4HI_type_node,
11861 tree_cons (NULL_TREE,
11862 long_long_integer_type_node,
11863 endlink)));
11864 tree v2si_ftype_v2si_di
11865 = build_function_type (V2SI_type_node,
11866 tree_cons (NULL_TREE, V2SI_type_node,
11867 tree_cons (NULL_TREE,
11868 long_long_integer_type_node,
11869 endlink)));
11870 tree void_ftype_int_int
11871 = build_function_type (void_type_node,
11872 tree_cons (NULL_TREE, integer_type_node,
11873 tree_cons (NULL_TREE, integer_type_node,
11874 endlink)));
11875 tree di_ftype_void
11876 = build_function_type (long_long_unsigned_type_node, endlink);
11877 tree di_ftype_v8qi
11878 = build_function_type (long_long_integer_type_node,
11879 tree_cons (NULL_TREE, V8QI_type_node,
11880 endlink));
11881 tree di_ftype_v4hi
11882 = build_function_type (long_long_integer_type_node,
11883 tree_cons (NULL_TREE, V4HI_type_node,
11884 endlink));
11885 tree di_ftype_v2si
11886 = build_function_type (long_long_integer_type_node,
11887 tree_cons (NULL_TREE, V2SI_type_node,
11888 endlink));
11889 tree v2si_ftype_v4hi
11890 = build_function_type (V2SI_type_node,
11891 tree_cons (NULL_TREE, V4HI_type_node,
11892 endlink));
11893 tree v4hi_ftype_v8qi
11894 = build_function_type (V4HI_type_node,
11895 tree_cons (NULL_TREE, V8QI_type_node,
11896 endlink));
11898 tree di_ftype_di_v4hi_v4hi
11899 = build_function_type (long_long_unsigned_type_node,
11900 tree_cons (NULL_TREE,
11901 long_long_unsigned_type_node,
11902 tree_cons (NULL_TREE, V4HI_type_node,
11903 tree_cons (NULL_TREE,
11904 V4HI_type_node,
11905 endlink))));
11907 tree di_ftype_v4hi_v4hi
11908 = build_function_type (long_long_unsigned_type_node,
11909 tree_cons (NULL_TREE, V4HI_type_node,
11910 tree_cons (NULL_TREE, V4HI_type_node,
11911 endlink)));
11913 /* Normal vector binops. */
11914 tree v8qi_ftype_v8qi_v8qi
11915 = build_function_type (V8QI_type_node,
11916 tree_cons (NULL_TREE, V8QI_type_node,
11917 tree_cons (NULL_TREE, V8QI_type_node,
11918 endlink)));
11919 tree v4hi_ftype_v4hi_v4hi
11920 = build_function_type (V4HI_type_node,
11921 tree_cons (NULL_TREE, V4HI_type_node,
11922 tree_cons (NULL_TREE, V4HI_type_node,
11923 endlink)));
11924 tree v2si_ftype_v2si_v2si
11925 = build_function_type (V2SI_type_node,
11926 tree_cons (NULL_TREE, V2SI_type_node,
11927 tree_cons (NULL_TREE, V2SI_type_node,
11928 endlink)));
11929 tree di_ftype_di_di
11930 = build_function_type (long_long_unsigned_type_node,
11931 tree_cons (NULL_TREE, long_long_unsigned_type_node,
11932 tree_cons (NULL_TREE,
11933 long_long_unsigned_type_node,
11934 endlink)));
11936 /* Add all builtins that are more or less simple operations on two
11937 operands. */
11938 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
11940 /* Use one of the operands; the target can have a different mode for
11941 mask-generating compares. */
11942 enum machine_mode mode;
11943 tree type;
11945 if (d->name == 0)
11946 continue;
11948 mode = insn_data[d->icode].operand[1].mode;
11950 switch (mode)
11952 case V8QImode:
11953 type = v8qi_ftype_v8qi_v8qi;
11954 break;
11955 case V4HImode:
11956 type = v4hi_ftype_v4hi_v4hi;
11957 break;
11958 case V2SImode:
11959 type = v2si_ftype_v2si_v2si;
11960 break;
11961 case DImode:
11962 type = di_ftype_di_di;
11963 break;
11965 default:
11966 gcc_unreachable ();
11969 def_mbuiltin (d->mask, d->name, type, d->code);
11972 /* Add the remaining MMX insns with somewhat more complicated types. */
11973 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wzero", di_ftype_void, ARM_BUILTIN_WZERO);
11974 def_mbuiltin (FL_IWMMXT, "__builtin_arm_setwcx", void_ftype_int_int, ARM_BUILTIN_SETWCX);
11975 def_mbuiltin (FL_IWMMXT, "__builtin_arm_getwcx", int_ftype_int, ARM_BUILTIN_GETWCX);
11977 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsllh", v4hi_ftype_v4hi_di, ARM_BUILTIN_WSLLH);
11978 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsllw", v2si_ftype_v2si_di, ARM_BUILTIN_WSLLW);
11979 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wslld", di_ftype_di_di, ARM_BUILTIN_WSLLD);
11980 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsllhi", v4hi_ftype_v4hi_int, ARM_BUILTIN_WSLLHI);
11981 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsllwi", v2si_ftype_v2si_int, ARM_BUILTIN_WSLLWI);
11982 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wslldi", di_ftype_di_int, ARM_BUILTIN_WSLLDI);
11984 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrlh", v4hi_ftype_v4hi_di, ARM_BUILTIN_WSRLH);
11985 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrlw", v2si_ftype_v2si_di, ARM_BUILTIN_WSRLW);
11986 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrld", di_ftype_di_di, ARM_BUILTIN_WSRLD);
11987 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrlhi", v4hi_ftype_v4hi_int, ARM_BUILTIN_WSRLHI);
11988 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrlwi", v2si_ftype_v2si_int, ARM_BUILTIN_WSRLWI);
11989 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrldi", di_ftype_di_int, ARM_BUILTIN_WSRLDI);
11991 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrah", v4hi_ftype_v4hi_di, ARM_BUILTIN_WSRAH);
11992 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsraw", v2si_ftype_v2si_di, ARM_BUILTIN_WSRAW);
11993 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrad", di_ftype_di_di, ARM_BUILTIN_WSRAD);
11994 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrahi", v4hi_ftype_v4hi_int, ARM_BUILTIN_WSRAHI);
11995 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsrawi", v2si_ftype_v2si_int, ARM_BUILTIN_WSRAWI);
11996 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsradi", di_ftype_di_int, ARM_BUILTIN_WSRADI);
11998 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrorh", v4hi_ftype_v4hi_di, ARM_BUILTIN_WRORH);
11999 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrorw", v2si_ftype_v2si_di, ARM_BUILTIN_WRORW);
12000 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrord", di_ftype_di_di, ARM_BUILTIN_WRORD);
12001 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrorhi", v4hi_ftype_v4hi_int, ARM_BUILTIN_WRORHI);
12002 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrorwi", v2si_ftype_v2si_int, ARM_BUILTIN_WRORWI);
12003 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wrordi", di_ftype_di_int, ARM_BUILTIN_WRORDI);
12005 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wshufh", v4hi_ftype_v4hi_int, ARM_BUILTIN_WSHUFH);
12007 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsadb", v2si_ftype_v8qi_v8qi, ARM_BUILTIN_WSADB);
12008 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsadh", v2si_ftype_v4hi_v4hi, ARM_BUILTIN_WSADH);
12009 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsadbz", v2si_ftype_v8qi_v8qi, ARM_BUILTIN_WSADBZ);
12010 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wsadhz", v2si_ftype_v4hi_v4hi, ARM_BUILTIN_WSADHZ);
12012 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmsb", int_ftype_v8qi_int, ARM_BUILTIN_TEXTRMSB);
12013 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmsh", int_ftype_v4hi_int, ARM_BUILTIN_TEXTRMSH);
12014 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmsw", int_ftype_v2si_int, ARM_BUILTIN_TEXTRMSW);
12015 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmub", int_ftype_v8qi_int, ARM_BUILTIN_TEXTRMUB);
12016 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmuh", int_ftype_v4hi_int, ARM_BUILTIN_TEXTRMUH);
12017 def_mbuiltin (FL_IWMMXT, "__builtin_arm_textrmuw", int_ftype_v2si_int, ARM_BUILTIN_TEXTRMUW);
12018 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tinsrb", v8qi_ftype_v8qi_int_int, ARM_BUILTIN_TINSRB);
12019 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tinsrh", v4hi_ftype_v4hi_int_int, ARM_BUILTIN_TINSRH);
12020 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tinsrw", v2si_ftype_v2si_int_int, ARM_BUILTIN_TINSRW);
12022 def_mbuiltin (FL_IWMMXT, "__builtin_arm_waccb", di_ftype_v8qi, ARM_BUILTIN_WACCB);
12023 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wacch", di_ftype_v4hi, ARM_BUILTIN_WACCH);
12024 def_mbuiltin (FL_IWMMXT, "__builtin_arm_waccw", di_ftype_v2si, ARM_BUILTIN_WACCW);
12026 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmovmskb", int_ftype_v8qi, ARM_BUILTIN_TMOVMSKB);
12027 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmovmskh", int_ftype_v4hi, ARM_BUILTIN_TMOVMSKH);
12028 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmovmskw", int_ftype_v2si, ARM_BUILTIN_TMOVMSKW);
12030 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackhss", v8qi_ftype_v4hi_v4hi, ARM_BUILTIN_WPACKHSS);
12031 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackhus", v8qi_ftype_v4hi_v4hi, ARM_BUILTIN_WPACKHUS);
12032 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackwus", v4hi_ftype_v2si_v2si, ARM_BUILTIN_WPACKWUS);
12033 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackwss", v4hi_ftype_v2si_v2si, ARM_BUILTIN_WPACKWSS);
12034 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackdus", v2si_ftype_di_di, ARM_BUILTIN_WPACKDUS);
12035 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wpackdss", v2si_ftype_di_di, ARM_BUILTIN_WPACKDSS);
12037 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehub", v4hi_ftype_v8qi, ARM_BUILTIN_WUNPCKEHUB);
12038 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehuh", v2si_ftype_v4hi, ARM_BUILTIN_WUNPCKEHUH);
12039 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehuw", di_ftype_v2si, ARM_BUILTIN_WUNPCKEHUW);
12040 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehsb", v4hi_ftype_v8qi, ARM_BUILTIN_WUNPCKEHSB);
12041 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehsh", v2si_ftype_v4hi, ARM_BUILTIN_WUNPCKEHSH);
12042 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckehsw", di_ftype_v2si, ARM_BUILTIN_WUNPCKEHSW);
12043 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckelub", v4hi_ftype_v8qi, ARM_BUILTIN_WUNPCKELUB);
12044 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckeluh", v2si_ftype_v4hi, ARM_BUILTIN_WUNPCKELUH);
12045 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckeluw", di_ftype_v2si, ARM_BUILTIN_WUNPCKELUW);
12046 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckelsb", v4hi_ftype_v8qi, ARM_BUILTIN_WUNPCKELSB);
12047 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckelsh", v2si_ftype_v4hi, ARM_BUILTIN_WUNPCKELSH);
12048 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wunpckelsw", di_ftype_v2si, ARM_BUILTIN_WUNPCKELSW);
12050 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wmacs", di_ftype_di_v4hi_v4hi, ARM_BUILTIN_WMACS);
12051 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wmacsz", di_ftype_v4hi_v4hi, ARM_BUILTIN_WMACSZ);
12052 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wmacu", di_ftype_di_v4hi_v4hi, ARM_BUILTIN_WMACU);
12053 def_mbuiltin (FL_IWMMXT, "__builtin_arm_wmacuz", di_ftype_v4hi_v4hi, ARM_BUILTIN_WMACUZ);
12055 def_mbuiltin (FL_IWMMXT, "__builtin_arm_walign", v8qi_ftype_v8qi_v8qi_int, ARM_BUILTIN_WALIGN);
12056 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmia", di_ftype_di_int_int, ARM_BUILTIN_TMIA);
12057 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiaph", di_ftype_di_int_int, ARM_BUILTIN_TMIAPH);
12058 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiabb", di_ftype_di_int_int, ARM_BUILTIN_TMIABB);
12059 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiabt", di_ftype_di_int_int, ARM_BUILTIN_TMIABT);
12060 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiatb", di_ftype_di_int_int, ARM_BUILTIN_TMIATB);
12061 def_mbuiltin (FL_IWMMXT, "__builtin_arm_tmiatt", di_ftype_di_int_int, ARM_BUILTIN_TMIATT);
12064 static void
12065 arm_init_builtins (void)
12067 if (TARGET_REALLY_IWMMXT)
12068 arm_init_iwmmxt_builtins ();
12071 /* Errors in the source file can cause expand_expr to return const0_rtx
12072 where we expect a vector. To avoid crashing, use one of the vector
12073 clear instructions. */
12075 static rtx
12076 safe_vector_operand (rtx x, enum machine_mode mode)
12078 if (x != const0_rtx)
12079 return x;
12080 x = gen_reg_rtx (mode);
12082 emit_insn (gen_iwmmxt_clrdi (mode == DImode ? x
12083 : gen_rtx_SUBREG (DImode, x, 0)));
12084 return x;
12087 /* Subroutine of arm_expand_builtin to take care of binop insns. */
12089 static rtx
12090 arm_expand_binop_builtin (enum insn_code icode,
12091 tree arglist, rtx target)
12093 rtx pat;
12094 tree arg0 = TREE_VALUE (arglist);
12095 tree arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12096 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12097 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12098 enum machine_mode tmode = insn_data[icode].operand[0].mode;
12099 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
12100 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
12102 if (VECTOR_MODE_P (mode0))
12103 op0 = safe_vector_operand (op0, mode0);
12104 if (VECTOR_MODE_P (mode1))
12105 op1 = safe_vector_operand (op1, mode1);
12107 if (! target
12108 || GET_MODE (target) != tmode
12109 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12110 target = gen_reg_rtx (tmode);
12112 gcc_assert (GET_MODE (op0) == mode0 && GET_MODE (op1) == mode1);
12114 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12115 op0 = copy_to_mode_reg (mode0, op0);
12116 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
12117 op1 = copy_to_mode_reg (mode1, op1);
12119 pat = GEN_FCN (icode) (target, op0, op1);
12120 if (! pat)
12121 return 0;
12122 emit_insn (pat);
12123 return target;
12126 /* Subroutine of arm_expand_builtin to take care of unop insns. */
12128 static rtx
12129 arm_expand_unop_builtin (enum insn_code icode,
12130 tree arglist, rtx target, int do_load)
12132 rtx pat;
12133 tree arg0 = TREE_VALUE (arglist);
12134 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12135 enum machine_mode tmode = insn_data[icode].operand[0].mode;
12136 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
12138 if (! target
12139 || GET_MODE (target) != tmode
12140 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12141 target = gen_reg_rtx (tmode);
12142 if (do_load)
12143 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
12144 else
12146 if (VECTOR_MODE_P (mode0))
12147 op0 = safe_vector_operand (op0, mode0);
12149 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12150 op0 = copy_to_mode_reg (mode0, op0);
12153 pat = GEN_FCN (icode) (target, op0);
12154 if (! pat)
12155 return 0;
12156 emit_insn (pat);
12157 return target;
12160 /* Expand an expression EXP that calls a built-in function,
12161 with result going to TARGET if that's convenient
12162 (and in mode MODE if that's convenient).
12163 SUBTARGET may be used as the target for computing one of EXP's operands.
12164 IGNORE is nonzero if the value is to be ignored. */
12166 static rtx
12167 arm_expand_builtin (tree exp,
12168 rtx target,
12169 rtx subtarget ATTRIBUTE_UNUSED,
12170 enum machine_mode mode ATTRIBUTE_UNUSED,
12171 int ignore ATTRIBUTE_UNUSED)
12173 const struct builtin_description * d;
12174 enum insn_code icode;
12175 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
12176 tree arglist = TREE_OPERAND (exp, 1);
12177 tree arg0;
12178 tree arg1;
12179 tree arg2;
12180 rtx op0;
12181 rtx op1;
12182 rtx op2;
12183 rtx pat;
12184 int fcode = DECL_FUNCTION_CODE (fndecl);
12185 size_t i;
12186 enum machine_mode tmode;
12187 enum machine_mode mode0;
12188 enum machine_mode mode1;
12189 enum machine_mode mode2;
12191 switch (fcode)
12193 case ARM_BUILTIN_TEXTRMSB:
12194 case ARM_BUILTIN_TEXTRMUB:
12195 case ARM_BUILTIN_TEXTRMSH:
12196 case ARM_BUILTIN_TEXTRMUH:
12197 case ARM_BUILTIN_TEXTRMSW:
12198 case ARM_BUILTIN_TEXTRMUW:
12199 icode = (fcode == ARM_BUILTIN_TEXTRMSB ? CODE_FOR_iwmmxt_textrmsb
12200 : fcode == ARM_BUILTIN_TEXTRMUB ? CODE_FOR_iwmmxt_textrmub
12201 : fcode == ARM_BUILTIN_TEXTRMSH ? CODE_FOR_iwmmxt_textrmsh
12202 : fcode == ARM_BUILTIN_TEXTRMUH ? CODE_FOR_iwmmxt_textrmuh
12203 : CODE_FOR_iwmmxt_textrmw);
12205 arg0 = TREE_VALUE (arglist);
12206 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12207 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12208 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12209 tmode = insn_data[icode].operand[0].mode;
12210 mode0 = insn_data[icode].operand[1].mode;
12211 mode1 = insn_data[icode].operand[2].mode;
12213 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12214 op0 = copy_to_mode_reg (mode0, op0);
12215 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
12217 /* @@@ better error message */
12218 error ("selector must be an immediate");
12219 return gen_reg_rtx (tmode);
12221 if (target == 0
12222 || GET_MODE (target) != tmode
12223 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12224 target = gen_reg_rtx (tmode);
12225 pat = GEN_FCN (icode) (target, op0, op1);
12226 if (! pat)
12227 return 0;
12228 emit_insn (pat);
12229 return target;
12231 case ARM_BUILTIN_TINSRB:
12232 case ARM_BUILTIN_TINSRH:
12233 case ARM_BUILTIN_TINSRW:
12234 icode = (fcode == ARM_BUILTIN_TINSRB ? CODE_FOR_iwmmxt_tinsrb
12235 : fcode == ARM_BUILTIN_TINSRH ? CODE_FOR_iwmmxt_tinsrh
12236 : CODE_FOR_iwmmxt_tinsrw);
12237 arg0 = TREE_VALUE (arglist);
12238 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12239 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
12240 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12241 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12242 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
12243 tmode = insn_data[icode].operand[0].mode;
12244 mode0 = insn_data[icode].operand[1].mode;
12245 mode1 = insn_data[icode].operand[2].mode;
12246 mode2 = insn_data[icode].operand[3].mode;
12248 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12249 op0 = copy_to_mode_reg (mode0, op0);
12250 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
12251 op1 = copy_to_mode_reg (mode1, op1);
12252 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
12254 /* @@@ better error message */
12255 error ("selector must be an immediate");
12256 return const0_rtx;
12258 if (target == 0
12259 || GET_MODE (target) != tmode
12260 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12261 target = gen_reg_rtx (tmode);
12262 pat = GEN_FCN (icode) (target, op0, op1, op2);
12263 if (! pat)
12264 return 0;
12265 emit_insn (pat);
12266 return target;
12268 case ARM_BUILTIN_SETWCX:
12269 arg0 = TREE_VALUE (arglist);
12270 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12271 op0 = force_reg (SImode, expand_expr (arg0, NULL_RTX, VOIDmode, 0));
12272 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12273 emit_insn (gen_iwmmxt_tmcr (op1, op0));
12274 return 0;
12276 case ARM_BUILTIN_GETWCX:
12277 arg0 = TREE_VALUE (arglist);
12278 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12279 target = gen_reg_rtx (SImode);
12280 emit_insn (gen_iwmmxt_tmrc (target, op0));
12281 return target;
12283 case ARM_BUILTIN_WSHUFH:
12284 icode = CODE_FOR_iwmmxt_wshufh;
12285 arg0 = TREE_VALUE (arglist);
12286 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12287 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12288 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12289 tmode = insn_data[icode].operand[0].mode;
12290 mode1 = insn_data[icode].operand[1].mode;
12291 mode2 = insn_data[icode].operand[2].mode;
12293 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
12294 op0 = copy_to_mode_reg (mode1, op0);
12295 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
12297 /* @@@ better error message */
12298 error ("mask must be an immediate");
12299 return const0_rtx;
12301 if (target == 0
12302 || GET_MODE (target) != tmode
12303 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12304 target = gen_reg_rtx (tmode);
12305 pat = GEN_FCN (icode) (target, op0, op1);
12306 if (! pat)
12307 return 0;
12308 emit_insn (pat);
12309 return target;
12311 case ARM_BUILTIN_WSADB:
12312 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadb, arglist, target);
12313 case ARM_BUILTIN_WSADH:
12314 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadh, arglist, target);
12315 case ARM_BUILTIN_WSADBZ:
12316 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadbz, arglist, target);
12317 case ARM_BUILTIN_WSADHZ:
12318 return arm_expand_binop_builtin (CODE_FOR_iwmmxt_wsadhz, arglist, target);
12320 /* Several three-argument builtins. */
12321 case ARM_BUILTIN_WMACS:
12322 case ARM_BUILTIN_WMACU:
12323 case ARM_BUILTIN_WALIGN:
12324 case ARM_BUILTIN_TMIA:
12325 case ARM_BUILTIN_TMIAPH:
12326 case ARM_BUILTIN_TMIATT:
12327 case ARM_BUILTIN_TMIATB:
12328 case ARM_BUILTIN_TMIABT:
12329 case ARM_BUILTIN_TMIABB:
12330 icode = (fcode == ARM_BUILTIN_WMACS ? CODE_FOR_iwmmxt_wmacs
12331 : fcode == ARM_BUILTIN_WMACU ? CODE_FOR_iwmmxt_wmacu
12332 : fcode == ARM_BUILTIN_TMIA ? CODE_FOR_iwmmxt_tmia
12333 : fcode == ARM_BUILTIN_TMIAPH ? CODE_FOR_iwmmxt_tmiaph
12334 : fcode == ARM_BUILTIN_TMIABB ? CODE_FOR_iwmmxt_tmiabb
12335 : fcode == ARM_BUILTIN_TMIABT ? CODE_FOR_iwmmxt_tmiabt
12336 : fcode == ARM_BUILTIN_TMIATB ? CODE_FOR_iwmmxt_tmiatb
12337 : fcode == ARM_BUILTIN_TMIATT ? CODE_FOR_iwmmxt_tmiatt
12338 : CODE_FOR_iwmmxt_walign);
12339 arg0 = TREE_VALUE (arglist);
12340 arg1 = TREE_VALUE (TREE_CHAIN (arglist));
12341 arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist)));
12342 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
12343 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
12344 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
12345 tmode = insn_data[icode].operand[0].mode;
12346 mode0 = insn_data[icode].operand[1].mode;
12347 mode1 = insn_data[icode].operand[2].mode;
12348 mode2 = insn_data[icode].operand[3].mode;
12350 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
12351 op0 = copy_to_mode_reg (mode0, op0);
12352 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
12353 op1 = copy_to_mode_reg (mode1, op1);
12354 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
12355 op2 = copy_to_mode_reg (mode2, op2);
12356 if (target == 0
12357 || GET_MODE (target) != tmode
12358 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
12359 target = gen_reg_rtx (tmode);
12360 pat = GEN_FCN (icode) (target, op0, op1, op2);
12361 if (! pat)
12362 return 0;
12363 emit_insn (pat);
12364 return target;
12366 case ARM_BUILTIN_WZERO:
12367 target = gen_reg_rtx (DImode);
12368 emit_insn (gen_iwmmxt_clrdi (target));
12369 return target;
12371 default:
12372 break;
12375 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
12376 if (d->code == (const enum arm_builtins) fcode)
12377 return arm_expand_binop_builtin (d->icode, arglist, target);
12379 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
12380 if (d->code == (const enum arm_builtins) fcode)
12381 return arm_expand_unop_builtin (d->icode, arglist, target, 0);
12383 /* @@@ Should really do something sensible here. */
12384 return NULL_RTX;
12387 /* Return the number (counting from 0) of
12388 the least significant set bit in MASK. */
12390 inline static int
12391 number_of_first_bit_set (unsigned mask)
12393 int bit;
12395 for (bit = 0;
12396 (mask & (1 << bit)) == 0;
12397 ++bit)
12398 continue;
12400 return bit;
12403 /* Emit code to push or pop registers to or from the stack. F is the
12404 assembly file. MASK is the registers to push or pop. PUSH is
12405 nonzero if we should push, and zero if we should pop. For debugging
12406 output, if pushing, adjust CFA_OFFSET by the amount of space added
12407 to the stack. REAL_REGS should have the same number of bits set as
12408 MASK, and will be used instead (in the same order) to describe which
12409 registers were saved - this is used to mark the save slots when we
12410 push high registers after moving them to low registers. */
12411 static void
12412 thumb_pushpop (FILE *f, unsigned long mask, int push, int *cfa_offset,
12413 unsigned long real_regs)
12415 int regno;
12416 int lo_mask = mask & 0xFF;
12417 int pushed_words = 0;
12419 gcc_assert (mask);
12421 if (lo_mask == 0 && !push && (mask & (1 << PC_REGNUM)))
12423 /* Special case. Do not generate a POP PC statement here, do it in
12424 thumb_exit() */
12425 thumb_exit (f, -1);
12426 return;
12429 fprintf (f, "\t%s\t{", push ? "push" : "pop");
12431 /* Look at the low registers first. */
12432 for (regno = 0; regno <= LAST_LO_REGNUM; regno++, lo_mask >>= 1)
12434 if (lo_mask & 1)
12436 asm_fprintf (f, "%r", regno);
12438 if ((lo_mask & ~1) != 0)
12439 fprintf (f, ", ");
12441 pushed_words++;
12445 if (push && (mask & (1 << LR_REGNUM)))
12447 /* Catch pushing the LR. */
12448 if (mask & 0xFF)
12449 fprintf (f, ", ");
12451 asm_fprintf (f, "%r", LR_REGNUM);
12453 pushed_words++;
12455 else if (!push && (mask & (1 << PC_REGNUM)))
12457 /* Catch popping the PC. */
12458 if (TARGET_INTERWORK || TARGET_BACKTRACE
12459 || current_function_calls_eh_return)
12461 /* The PC is never poped directly, instead
12462 it is popped into r3 and then BX is used. */
12463 fprintf (f, "}\n");
12465 thumb_exit (f, -1);
12467 return;
12469 else
12471 if (mask & 0xFF)
12472 fprintf (f, ", ");
12474 asm_fprintf (f, "%r", PC_REGNUM);
12478 fprintf (f, "}\n");
12480 if (push && pushed_words && dwarf2out_do_frame ())
12482 char *l = dwarf2out_cfi_label ();
12483 int pushed_mask = real_regs;
12485 *cfa_offset += pushed_words * 4;
12486 dwarf2out_def_cfa (l, SP_REGNUM, *cfa_offset);
12488 pushed_words = 0;
12489 pushed_mask = real_regs;
12490 for (regno = 0; regno <= 14; regno++, pushed_mask >>= 1)
12492 if (pushed_mask & 1)
12493 dwarf2out_reg_save (l, regno, 4 * pushed_words++ - *cfa_offset);
12498 /* Generate code to return from a thumb function.
12499 If 'reg_containing_return_addr' is -1, then the return address is
12500 actually on the stack, at the stack pointer. */
12501 static void
12502 thumb_exit (FILE *f, int reg_containing_return_addr)
12504 unsigned regs_available_for_popping;
12505 unsigned regs_to_pop;
12506 int pops_needed;
12507 unsigned available;
12508 unsigned required;
12509 int mode;
12510 int size;
12511 int restore_a4 = FALSE;
12513 /* Compute the registers we need to pop. */
12514 regs_to_pop = 0;
12515 pops_needed = 0;
12517 if (reg_containing_return_addr == -1)
12519 regs_to_pop |= 1 << LR_REGNUM;
12520 ++pops_needed;
12523 if (TARGET_BACKTRACE)
12525 /* Restore the (ARM) frame pointer and stack pointer. */
12526 regs_to_pop |= (1 << ARM_HARD_FRAME_POINTER_REGNUM) | (1 << SP_REGNUM);
12527 pops_needed += 2;
12530 /* If there is nothing to pop then just emit the BX instruction and
12531 return. */
12532 if (pops_needed == 0)
12534 if (current_function_calls_eh_return)
12535 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
12537 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
12538 return;
12540 /* Otherwise if we are not supporting interworking and we have not created
12541 a backtrace structure and the function was not entered in ARM mode then
12542 just pop the return address straight into the PC. */
12543 else if (!TARGET_INTERWORK
12544 && !TARGET_BACKTRACE
12545 && !is_called_in_ARM_mode (current_function_decl)
12546 && !current_function_calls_eh_return)
12548 asm_fprintf (f, "\tpop\t{%r}\n", PC_REGNUM);
12549 return;
12552 /* Find out how many of the (return) argument registers we can corrupt. */
12553 regs_available_for_popping = 0;
12555 /* If returning via __builtin_eh_return, the bottom three registers
12556 all contain information needed for the return. */
12557 if (current_function_calls_eh_return)
12558 size = 12;
12559 else
12561 /* If we can deduce the registers used from the function's
12562 return value. This is more reliable that examining
12563 regs_ever_live[] because that will be set if the register is
12564 ever used in the function, not just if the register is used
12565 to hold a return value. */
12567 if (current_function_return_rtx != 0)
12568 mode = GET_MODE (current_function_return_rtx);
12569 else
12570 mode = DECL_MODE (DECL_RESULT (current_function_decl));
12572 size = GET_MODE_SIZE (mode);
12574 if (size == 0)
12576 /* In a void function we can use any argument register.
12577 In a function that returns a structure on the stack
12578 we can use the second and third argument registers. */
12579 if (mode == VOIDmode)
12580 regs_available_for_popping =
12581 (1 << ARG_REGISTER (1))
12582 | (1 << ARG_REGISTER (2))
12583 | (1 << ARG_REGISTER (3));
12584 else
12585 regs_available_for_popping =
12586 (1 << ARG_REGISTER (2))
12587 | (1 << ARG_REGISTER (3));
12589 else if (size <= 4)
12590 regs_available_for_popping =
12591 (1 << ARG_REGISTER (2))
12592 | (1 << ARG_REGISTER (3));
12593 else if (size <= 8)
12594 regs_available_for_popping =
12595 (1 << ARG_REGISTER (3));
12598 /* Match registers to be popped with registers into which we pop them. */
12599 for (available = regs_available_for_popping,
12600 required = regs_to_pop;
12601 required != 0 && available != 0;
12602 available &= ~(available & - available),
12603 required &= ~(required & - required))
12604 -- pops_needed;
12606 /* If we have any popping registers left over, remove them. */
12607 if (available > 0)
12608 regs_available_for_popping &= ~available;
12610 /* Otherwise if we need another popping register we can use
12611 the fourth argument register. */
12612 else if (pops_needed)
12614 /* If we have not found any free argument registers and
12615 reg a4 contains the return address, we must move it. */
12616 if (regs_available_for_popping == 0
12617 && reg_containing_return_addr == LAST_ARG_REGNUM)
12619 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
12620 reg_containing_return_addr = LR_REGNUM;
12622 else if (size > 12)
12624 /* Register a4 is being used to hold part of the return value,
12625 but we have dire need of a free, low register. */
12626 restore_a4 = TRUE;
12628 asm_fprintf (f, "\tmov\t%r, %r\n",IP_REGNUM, LAST_ARG_REGNUM);
12631 if (reg_containing_return_addr != LAST_ARG_REGNUM)
12633 /* The fourth argument register is available. */
12634 regs_available_for_popping |= 1 << LAST_ARG_REGNUM;
12636 --pops_needed;
12640 /* Pop as many registers as we can. */
12641 thumb_pushpop (f, regs_available_for_popping, FALSE, NULL,
12642 regs_available_for_popping);
12644 /* Process the registers we popped. */
12645 if (reg_containing_return_addr == -1)
12647 /* The return address was popped into the lowest numbered register. */
12648 regs_to_pop &= ~(1 << LR_REGNUM);
12650 reg_containing_return_addr =
12651 number_of_first_bit_set (regs_available_for_popping);
12653 /* Remove this register for the mask of available registers, so that
12654 the return address will not be corrupted by further pops. */
12655 regs_available_for_popping &= ~(1 << reg_containing_return_addr);
12658 /* If we popped other registers then handle them here. */
12659 if (regs_available_for_popping)
12661 int frame_pointer;
12663 /* Work out which register currently contains the frame pointer. */
12664 frame_pointer = number_of_first_bit_set (regs_available_for_popping);
12666 /* Move it into the correct place. */
12667 asm_fprintf (f, "\tmov\t%r, %r\n",
12668 ARM_HARD_FRAME_POINTER_REGNUM, frame_pointer);
12670 /* (Temporarily) remove it from the mask of popped registers. */
12671 regs_available_for_popping &= ~(1 << frame_pointer);
12672 regs_to_pop &= ~(1 << ARM_HARD_FRAME_POINTER_REGNUM);
12674 if (regs_available_for_popping)
12676 int stack_pointer;
12678 /* We popped the stack pointer as well,
12679 find the register that contains it. */
12680 stack_pointer = number_of_first_bit_set (regs_available_for_popping);
12682 /* Move it into the stack register. */
12683 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, stack_pointer);
12685 /* At this point we have popped all necessary registers, so
12686 do not worry about restoring regs_available_for_popping
12687 to its correct value:
12689 assert (pops_needed == 0)
12690 assert (regs_available_for_popping == (1 << frame_pointer))
12691 assert (regs_to_pop == (1 << STACK_POINTER)) */
12693 else
12695 /* Since we have just move the popped value into the frame
12696 pointer, the popping register is available for reuse, and
12697 we know that we still have the stack pointer left to pop. */
12698 regs_available_for_popping |= (1 << frame_pointer);
12702 /* If we still have registers left on the stack, but we no longer have
12703 any registers into which we can pop them, then we must move the return
12704 address into the link register and make available the register that
12705 contained it. */
12706 if (regs_available_for_popping == 0 && pops_needed > 0)
12708 regs_available_for_popping |= 1 << reg_containing_return_addr;
12710 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM,
12711 reg_containing_return_addr);
12713 reg_containing_return_addr = LR_REGNUM;
12716 /* If we have registers left on the stack then pop some more.
12717 We know that at most we will want to pop FP and SP. */
12718 if (pops_needed > 0)
12720 int popped_into;
12721 int move_to;
12723 thumb_pushpop (f, regs_available_for_popping, FALSE, NULL,
12724 regs_available_for_popping);
12726 /* We have popped either FP or SP.
12727 Move whichever one it is into the correct register. */
12728 popped_into = number_of_first_bit_set (regs_available_for_popping);
12729 move_to = number_of_first_bit_set (regs_to_pop);
12731 asm_fprintf (f, "\tmov\t%r, %r\n", move_to, popped_into);
12733 regs_to_pop &= ~(1 << move_to);
12735 --pops_needed;
12738 /* If we still have not popped everything then we must have only
12739 had one register available to us and we are now popping the SP. */
12740 if (pops_needed > 0)
12742 int popped_into;
12744 thumb_pushpop (f, regs_available_for_popping, FALSE, NULL,
12745 regs_available_for_popping);
12747 popped_into = number_of_first_bit_set (regs_available_for_popping);
12749 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, popped_into);
12751 assert (regs_to_pop == (1 << STACK_POINTER))
12752 assert (pops_needed == 1)
12756 /* If necessary restore the a4 register. */
12757 if (restore_a4)
12759 if (reg_containing_return_addr != LR_REGNUM)
12761 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
12762 reg_containing_return_addr = LR_REGNUM;
12765 asm_fprintf (f, "\tmov\t%r, %r\n", LAST_ARG_REGNUM, IP_REGNUM);
12768 if (current_function_calls_eh_return)
12769 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
12771 /* Return to caller. */
12772 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
12776 void
12777 thumb_final_prescan_insn (rtx insn)
12779 if (flag_print_asm_name)
12780 asm_fprintf (asm_out_file, "%@ 0x%04x\n",
12781 INSN_ADDRESSES (INSN_UID (insn)));
12785 thumb_shiftable_const (unsigned HOST_WIDE_INT val)
12787 unsigned HOST_WIDE_INT mask = 0xff;
12788 int i;
12790 if (val == 0) /* XXX */
12791 return 0;
12793 for (i = 0; i < 25; i++)
12794 if ((val & (mask << i)) == val)
12795 return 1;
12797 return 0;
12800 /* Returns nonzero if the current function contains,
12801 or might contain a far jump. */
12802 static int
12803 thumb_far_jump_used_p (void)
12805 rtx insn;
12807 /* This test is only important for leaf functions. */
12808 /* assert (!leaf_function_p ()); */
12810 /* If we have already decided that far jumps may be used,
12811 do not bother checking again, and always return true even if
12812 it turns out that they are not being used. Once we have made
12813 the decision that far jumps are present (and that hence the link
12814 register will be pushed onto the stack) we cannot go back on it. */
12815 if (cfun->machine->far_jump_used)
12816 return 1;
12818 /* If this function is not being called from the prologue/epilogue
12819 generation code then it must be being called from the
12820 INITIAL_ELIMINATION_OFFSET macro. */
12821 if (!(ARM_DOUBLEWORD_ALIGN || reload_completed))
12823 /* In this case we know that we are being asked about the elimination
12824 of the arg pointer register. If that register is not being used,
12825 then there are no arguments on the stack, and we do not have to
12826 worry that a far jump might force the prologue to push the link
12827 register, changing the stack offsets. In this case we can just
12828 return false, since the presence of far jumps in the function will
12829 not affect stack offsets.
12831 If the arg pointer is live (or if it was live, but has now been
12832 eliminated and so set to dead) then we do have to test to see if
12833 the function might contain a far jump. This test can lead to some
12834 false negatives, since before reload is completed, then length of
12835 branch instructions is not known, so gcc defaults to returning their
12836 longest length, which in turn sets the far jump attribute to true.
12838 A false negative will not result in bad code being generated, but it
12839 will result in a needless push and pop of the link register. We
12840 hope that this does not occur too often.
12842 If we need doubleword stack alignment this could affect the other
12843 elimination offsets so we can't risk getting it wrong. */
12844 if (regs_ever_live [ARG_POINTER_REGNUM])
12845 cfun->machine->arg_pointer_live = 1;
12846 else if (!cfun->machine->arg_pointer_live)
12847 return 0;
12850 /* Check to see if the function contains a branch
12851 insn with the far jump attribute set. */
12852 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
12854 if (GET_CODE (insn) == JUMP_INSN
12855 /* Ignore tablejump patterns. */
12856 && GET_CODE (PATTERN (insn)) != ADDR_VEC
12857 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC
12858 && get_attr_far_jump (insn) == FAR_JUMP_YES
12861 /* Record the fact that we have decided that
12862 the function does use far jumps. */
12863 cfun->machine->far_jump_used = 1;
12864 return 1;
12868 return 0;
12871 /* Return nonzero if FUNC must be entered in ARM mode. */
12873 is_called_in_ARM_mode (tree func)
12875 gcc_assert (TREE_CODE (func) == FUNCTION_DECL);
12877 /* Ignore the problem about functions whose address is taken. */
12878 if (TARGET_CALLEE_INTERWORKING && TREE_PUBLIC (func))
12879 return TRUE;
12881 #ifdef ARM_PE
12882 return lookup_attribute ("interfacearm", DECL_ATTRIBUTES (func)) != NULL_TREE;
12883 #else
12884 return FALSE;
12885 #endif
12888 /* The bits which aren't usefully expanded as rtl. */
12889 const char *
12890 thumb_unexpanded_epilogue (void)
12892 int regno;
12893 unsigned long live_regs_mask = 0;
12894 int high_regs_pushed = 0;
12895 int had_to_push_lr;
12896 int size;
12897 int mode;
12899 if (return_used_this_function)
12900 return "";
12902 if (IS_NAKED (arm_current_func_type ()))
12903 return "";
12905 live_regs_mask = thumb_compute_save_reg_mask ();
12906 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
12908 /* If we can deduce the registers used from the function's return value.
12909 This is more reliable that examining regs_ever_live[] because that
12910 will be set if the register is ever used in the function, not just if
12911 the register is used to hold a return value. */
12913 if (current_function_return_rtx != 0)
12914 mode = GET_MODE (current_function_return_rtx);
12915 else
12916 mode = DECL_MODE (DECL_RESULT (current_function_decl));
12918 size = GET_MODE_SIZE (mode);
12920 /* The prolog may have pushed some high registers to use as
12921 work registers. e.g. the testsuite file:
12922 gcc/testsuite/gcc/gcc.c-torture/execute/complex-2.c
12923 compiles to produce:
12924 push {r4, r5, r6, r7, lr}
12925 mov r7, r9
12926 mov r6, r8
12927 push {r6, r7}
12928 as part of the prolog. We have to undo that pushing here. */
12930 if (high_regs_pushed)
12932 unsigned long mask = live_regs_mask & 0xff;
12933 int next_hi_reg;
12935 /* The available low registers depend on the size of the value we are
12936 returning. */
12937 if (size <= 12)
12938 mask |= 1 << 3;
12939 if (size <= 8)
12940 mask |= 1 << 2;
12942 if (mask == 0)
12943 /* Oh dear! We have no low registers into which we can pop
12944 high registers! */
12945 internal_error
12946 ("no low registers available for popping high registers");
12948 for (next_hi_reg = 8; next_hi_reg < 13; next_hi_reg++)
12949 if (live_regs_mask & (1 << next_hi_reg))
12950 break;
12952 while (high_regs_pushed)
12954 /* Find lo register(s) into which the high register(s) can
12955 be popped. */
12956 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
12958 if (mask & (1 << regno))
12959 high_regs_pushed--;
12960 if (high_regs_pushed == 0)
12961 break;
12964 mask &= (2 << regno) - 1; /* A noop if regno == 8 */
12966 /* Pop the values into the low register(s). */
12967 thumb_pushpop (asm_out_file, mask, 0, NULL, mask);
12969 /* Move the value(s) into the high registers. */
12970 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
12972 if (mask & (1 << regno))
12974 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", next_hi_reg,
12975 regno);
12977 for (next_hi_reg++; next_hi_reg < 13; next_hi_reg++)
12978 if (live_regs_mask & (1 << next_hi_reg))
12979 break;
12983 live_regs_mask &= ~0x0f00;
12986 had_to_push_lr = (live_regs_mask & (1 << LR_REGNUM)) != 0;
12987 live_regs_mask &= 0xff;
12989 if (current_function_pretend_args_size == 0 || TARGET_BACKTRACE)
12991 /* Pop the return address into the PC. */
12992 if (had_to_push_lr)
12993 live_regs_mask |= 1 << PC_REGNUM;
12995 /* Either no argument registers were pushed or a backtrace
12996 structure was created which includes an adjusted stack
12997 pointer, so just pop everything. */
12998 if (live_regs_mask)
12999 thumb_pushpop (asm_out_file, live_regs_mask, FALSE, NULL,
13000 live_regs_mask);
13002 /* We have either just popped the return address into the
13003 PC or it is was kept in LR for the entire function. */
13004 if (!had_to_push_lr)
13005 thumb_exit (asm_out_file, LR_REGNUM);
13007 else
13009 /* Pop everything but the return address. */
13010 if (live_regs_mask)
13011 thumb_pushpop (asm_out_file, live_regs_mask, FALSE, NULL,
13012 live_regs_mask);
13014 if (had_to_push_lr)
13016 if (size > 12)
13018 /* We have no free low regs, so save one. */
13019 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", IP_REGNUM,
13020 LAST_ARG_REGNUM);
13023 /* Get the return address into a temporary register. */
13024 thumb_pushpop (asm_out_file, 1 << LAST_ARG_REGNUM, 0, NULL,
13025 1 << LAST_ARG_REGNUM);
13027 if (size > 12)
13029 /* Move the return address to lr. */
13030 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LR_REGNUM,
13031 LAST_ARG_REGNUM);
13032 /* Restore the low register. */
13033 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LAST_ARG_REGNUM,
13034 IP_REGNUM);
13035 regno = LR_REGNUM;
13037 else
13038 regno = LAST_ARG_REGNUM;
13040 else
13041 regno = LR_REGNUM;
13043 /* Remove the argument registers that were pushed onto the stack. */
13044 asm_fprintf (asm_out_file, "\tadd\t%r, %r, #%d\n",
13045 SP_REGNUM, SP_REGNUM,
13046 current_function_pretend_args_size);
13048 thumb_exit (asm_out_file, regno);
13051 return "";
13054 /* Functions to save and restore machine-specific function data. */
13055 static struct machine_function *
13056 arm_init_machine_status (void)
13058 struct machine_function *machine;
13059 machine = (machine_function *) ggc_alloc_cleared (sizeof (machine_function));
13061 #if ARM_FT_UNKNOWN != 0
13062 machine->func_type = ARM_FT_UNKNOWN;
13063 #endif
13064 return machine;
13067 /* Return an RTX indicating where the return address to the
13068 calling function can be found. */
13070 arm_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
13072 if (count != 0)
13073 return NULL_RTX;
13075 return get_hard_reg_initial_val (Pmode, LR_REGNUM);
13078 /* Do anything needed before RTL is emitted for each function. */
13079 void
13080 arm_init_expanders (void)
13082 /* Arrange to initialize and mark the machine per-function status. */
13083 init_machine_status = arm_init_machine_status;
13085 /* This is to stop the combine pass optimizing away the alignment
13086 adjustment of va_arg. */
13087 /* ??? It is claimed that this should not be necessary. */
13088 if (cfun)
13089 mark_reg_pointer (arg_pointer_rtx, PARM_BOUNDARY);
13093 /* Like arm_compute_initial_elimination offset. Simpler because
13094 THUMB_HARD_FRAME_POINTER isn't actually the ABI specified frame pointer. */
13096 HOST_WIDE_INT
13097 thumb_compute_initial_elimination_offset (unsigned int from, unsigned int to)
13099 arm_stack_offsets *offsets;
13101 offsets = arm_get_frame_offsets ();
13103 switch (from)
13105 case ARG_POINTER_REGNUM:
13106 switch (to)
13108 case STACK_POINTER_REGNUM:
13109 return offsets->outgoing_args - offsets->saved_args;
13111 case FRAME_POINTER_REGNUM:
13112 return offsets->soft_frame - offsets->saved_args;
13114 case THUMB_HARD_FRAME_POINTER_REGNUM:
13115 case ARM_HARD_FRAME_POINTER_REGNUM:
13116 return offsets->saved_regs - offsets->saved_args;
13118 default:
13119 gcc_unreachable ();
13121 break;
13123 case FRAME_POINTER_REGNUM:
13124 switch (to)
13126 case STACK_POINTER_REGNUM:
13127 return offsets->outgoing_args - offsets->soft_frame;
13129 case THUMB_HARD_FRAME_POINTER_REGNUM:
13130 case ARM_HARD_FRAME_POINTER_REGNUM:
13131 return offsets->saved_regs - offsets->soft_frame;
13133 default:
13134 gcc_unreachable ();
13136 break;
13138 default:
13139 gcc_unreachable ();
13144 /* Generate the rest of a function's prologue. */
13145 void
13146 thumb_expand_prologue (void)
13148 rtx insn, dwarf;
13150 HOST_WIDE_INT amount;
13151 arm_stack_offsets *offsets;
13152 unsigned long func_type;
13153 int regno;
13154 unsigned long live_regs_mask;
13156 func_type = arm_current_func_type ();
13158 /* Naked functions don't have prologues. */
13159 if (IS_NAKED (func_type))
13160 return;
13162 if (IS_INTERRUPT (func_type))
13164 error ("interrupt Service Routines cannot be coded in Thumb mode");
13165 return;
13168 live_regs_mask = thumb_compute_save_reg_mask ();
13169 /* Load the pic register before setting the frame pointer,
13170 so we can use r7 as a temporary work register. */
13171 if (flag_pic)
13172 arm_load_pic_register (thumb_find_work_register (live_regs_mask));
13174 offsets = arm_get_frame_offsets ();
13176 if (frame_pointer_needed)
13178 insn = emit_insn (gen_movsi (hard_frame_pointer_rtx,
13179 stack_pointer_rtx));
13180 RTX_FRAME_RELATED_P (insn) = 1;
13182 else if (CALLER_INTERWORKING_SLOT_SIZE > 0)
13183 emit_move_insn (gen_rtx_REG (Pmode, ARM_HARD_FRAME_POINTER_REGNUM),
13184 stack_pointer_rtx);
13186 amount = offsets->outgoing_args - offsets->saved_regs;
13187 if (amount)
13189 if (amount < 512)
13191 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
13192 GEN_INT (- amount)));
13193 RTX_FRAME_RELATED_P (insn) = 1;
13195 else
13197 rtx reg;
13199 /* The stack decrement is too big for an immediate value in a single
13200 insn. In theory we could issue multiple subtracts, but after
13201 three of them it becomes more space efficient to place the full
13202 value in the constant pool and load into a register. (Also the
13203 ARM debugger really likes to see only one stack decrement per
13204 function). So instead we look for a scratch register into which
13205 we can load the decrement, and then we subtract this from the
13206 stack pointer. Unfortunately on the thumb the only available
13207 scratch registers are the argument registers, and we cannot use
13208 these as they may hold arguments to the function. Instead we
13209 attempt to locate a call preserved register which is used by this
13210 function. If we can find one, then we know that it will have
13211 been pushed at the start of the prologue and so we can corrupt
13212 it now. */
13213 for (regno = LAST_ARG_REGNUM + 1; regno <= LAST_LO_REGNUM; regno++)
13214 if (live_regs_mask & (1 << regno)
13215 && !(frame_pointer_needed
13216 && (regno == THUMB_HARD_FRAME_POINTER_REGNUM)))
13217 break;
13219 if (regno > LAST_LO_REGNUM) /* Very unlikely. */
13221 rtx spare = gen_rtx_REG (SImode, IP_REGNUM);
13223 /* Choose an arbitrary, non-argument low register. */
13224 reg = gen_rtx_REG (SImode, LAST_LO_REGNUM);
13226 /* Save it by copying it into a high, scratch register. */
13227 emit_insn (gen_movsi (spare, reg));
13228 /* Add a USE to stop propagate_one_insn() from barfing. */
13229 emit_insn (gen_prologue_use (spare));
13231 /* Decrement the stack. */
13232 emit_insn (gen_movsi (reg, GEN_INT (- amount)));
13233 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
13234 stack_pointer_rtx, reg));
13235 RTX_FRAME_RELATED_P (insn) = 1;
13236 dwarf = gen_rtx_SET (SImode, stack_pointer_rtx,
13237 plus_constant (stack_pointer_rtx,
13238 -amount));
13239 RTX_FRAME_RELATED_P (dwarf) = 1;
13240 REG_NOTES (insn)
13241 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, dwarf,
13242 REG_NOTES (insn));
13244 /* Restore the low register's original value. */
13245 emit_insn (gen_movsi (reg, spare));
13247 /* Emit a USE of the restored scratch register, so that flow
13248 analysis will not consider the restore redundant. The
13249 register won't be used again in this function and isn't
13250 restored by the epilogue. */
13251 emit_insn (gen_prologue_use (reg));
13253 else
13255 reg = gen_rtx_REG (SImode, regno);
13257 emit_insn (gen_movsi (reg, GEN_INT (- amount)));
13259 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
13260 stack_pointer_rtx, reg));
13261 RTX_FRAME_RELATED_P (insn) = 1;
13262 dwarf = gen_rtx_SET (SImode, stack_pointer_rtx,
13263 plus_constant (stack_pointer_rtx,
13264 -amount));
13265 RTX_FRAME_RELATED_P (dwarf) = 1;
13266 REG_NOTES (insn)
13267 = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, dwarf,
13268 REG_NOTES (insn));
13271 /* If the frame pointer is needed, emit a special barrier that
13272 will prevent the scheduler from moving stores to the frame
13273 before the stack adjustment. */
13274 if (frame_pointer_needed)
13275 emit_insn (gen_stack_tie (stack_pointer_rtx,
13276 hard_frame_pointer_rtx));
13279 if (current_function_profile || !TARGET_SCHED_PROLOG)
13280 emit_insn (gen_blockage ());
13282 cfun->machine->lr_save_eliminated = !thumb_force_lr_save ();
13283 if (live_regs_mask & 0xff)
13284 cfun->machine->lr_save_eliminated = 0;
13286 /* If the link register is being kept alive, with the return address in it,
13287 then make sure that it does not get reused by the ce2 pass. */
13288 if (cfun->machine->lr_save_eliminated)
13289 emit_insn (gen_prologue_use (gen_rtx_REG (SImode, LR_REGNUM)));
13293 void
13294 thumb_expand_epilogue (void)
13296 HOST_WIDE_INT amount;
13297 arm_stack_offsets *offsets;
13298 int regno;
13300 /* Naked functions don't have prologues. */
13301 if (IS_NAKED (arm_current_func_type ()))
13302 return;
13304 offsets = arm_get_frame_offsets ();
13305 amount = offsets->outgoing_args - offsets->saved_regs;
13307 if (frame_pointer_needed)
13308 emit_insn (gen_movsi (stack_pointer_rtx, hard_frame_pointer_rtx));
13309 else if (amount)
13311 if (amount < 512)
13312 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
13313 GEN_INT (amount)));
13314 else
13316 /* r3 is always free in the epilogue. */
13317 rtx reg = gen_rtx_REG (SImode, LAST_ARG_REGNUM);
13319 emit_insn (gen_movsi (reg, GEN_INT (amount)));
13320 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, reg));
13324 /* Emit a USE (stack_pointer_rtx), so that
13325 the stack adjustment will not be deleted. */
13326 emit_insn (gen_prologue_use (stack_pointer_rtx));
13328 if (current_function_profile || !TARGET_SCHED_PROLOG)
13329 emit_insn (gen_blockage ());
13331 /* Emit a clobber for each insn that will be restored in the epilogue,
13332 so that flow2 will get register lifetimes correct. */
13333 for (regno = 0; regno < 13; regno++)
13334 if (regs_ever_live[regno] && !call_used_regs[regno])
13335 emit_insn (gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, regno)));
13337 if (! regs_ever_live[LR_REGNUM])
13338 emit_insn (gen_rtx_USE (VOIDmode, gen_rtx_REG (SImode, LR_REGNUM)));
13341 static void
13342 thumb_output_function_prologue (FILE *f, HOST_WIDE_INT size ATTRIBUTE_UNUSED)
13344 unsigned long live_regs_mask = 0;
13345 unsigned long l_mask;
13346 unsigned high_regs_pushed = 0;
13347 int cfa_offset = 0;
13348 int regno;
13350 if (IS_NAKED (arm_current_func_type ()))
13351 return;
13353 if (is_called_in_ARM_mode (current_function_decl))
13355 const char * name;
13357 gcc_assert (GET_CODE (DECL_RTL (current_function_decl)) == MEM);
13358 gcc_assert (GET_CODE (XEXP (DECL_RTL (current_function_decl), 0))
13359 == SYMBOL_REF);
13360 name = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
13362 /* Generate code sequence to switch us into Thumb mode. */
13363 /* The .code 32 directive has already been emitted by
13364 ASM_DECLARE_FUNCTION_NAME. */
13365 asm_fprintf (f, "\torr\t%r, %r, #1\n", IP_REGNUM, PC_REGNUM);
13366 asm_fprintf (f, "\tbx\t%r\n", IP_REGNUM);
13368 /* Generate a label, so that the debugger will notice the
13369 change in instruction sets. This label is also used by
13370 the assembler to bypass the ARM code when this function
13371 is called from a Thumb encoded function elsewhere in the
13372 same file. Hence the definition of STUB_NAME here must
13373 agree with the definition in gas/config/tc-arm.c. */
13375 #define STUB_NAME ".real_start_of"
13377 fprintf (f, "\t.code\t16\n");
13378 #ifdef ARM_PE
13379 if (arm_dllexport_name_p (name))
13380 name = arm_strip_name_encoding (name);
13381 #endif
13382 asm_fprintf (f, "\t.globl %s%U%s\n", STUB_NAME, name);
13383 fprintf (f, "\t.thumb_func\n");
13384 asm_fprintf (f, "%s%U%s:\n", STUB_NAME, name);
13387 if (current_function_pretend_args_size)
13389 if (cfun->machine->uses_anonymous_args)
13391 int num_pushes;
13393 fprintf (f, "\tpush\t{");
13395 num_pushes = ARM_NUM_INTS (current_function_pretend_args_size);
13397 for (regno = LAST_ARG_REGNUM + 1 - num_pushes;
13398 regno <= LAST_ARG_REGNUM;
13399 regno++)
13400 asm_fprintf (f, "%r%s", regno,
13401 regno == LAST_ARG_REGNUM ? "" : ", ");
13403 fprintf (f, "}\n");
13405 else
13406 asm_fprintf (f, "\tsub\t%r, %r, #%d\n",
13407 SP_REGNUM, SP_REGNUM,
13408 current_function_pretend_args_size);
13410 /* We don't need to record the stores for unwinding (would it
13411 help the debugger any if we did?), but record the change in
13412 the stack pointer. */
13413 if (dwarf2out_do_frame ())
13415 char *l = dwarf2out_cfi_label ();
13417 cfa_offset = cfa_offset + current_function_pretend_args_size;
13418 dwarf2out_def_cfa (l, SP_REGNUM, cfa_offset);
13422 /* Get the registers we are going to push. */
13423 live_regs_mask = thumb_compute_save_reg_mask ();
13424 /* Extract a mask of the ones we can give to the Thumb's push instruction. */
13425 l_mask = live_regs_mask & 0x40ff;
13426 /* Then count how many other high registers will need to be pushed. */
13427 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
13429 if (TARGET_BACKTRACE)
13431 unsigned offset;
13432 unsigned work_register;
13434 /* We have been asked to create a stack backtrace structure.
13435 The code looks like this:
13437 0 .align 2
13438 0 func:
13439 0 sub SP, #16 Reserve space for 4 registers.
13440 2 push {R7} Push low registers.
13441 4 add R7, SP, #20 Get the stack pointer before the push.
13442 6 str R7, [SP, #8] Store the stack pointer (before reserving the space).
13443 8 mov R7, PC Get hold of the start of this code plus 12.
13444 10 str R7, [SP, #16] Store it.
13445 12 mov R7, FP Get hold of the current frame pointer.
13446 14 str R7, [SP, #4] Store it.
13447 16 mov R7, LR Get hold of the current return address.
13448 18 str R7, [SP, #12] Store it.
13449 20 add R7, SP, #16 Point at the start of the backtrace structure.
13450 22 mov FP, R7 Put this value into the frame pointer. */
13452 work_register = thumb_find_work_register (live_regs_mask);
13454 asm_fprintf
13455 (f, "\tsub\t%r, %r, #16\t%@ Create stack backtrace structure\n",
13456 SP_REGNUM, SP_REGNUM);
13458 if (dwarf2out_do_frame ())
13460 char *l = dwarf2out_cfi_label ();
13462 cfa_offset = cfa_offset + 16;
13463 dwarf2out_def_cfa (l, SP_REGNUM, cfa_offset);
13466 if (l_mask)
13468 thumb_pushpop (f, l_mask, 1, &cfa_offset, l_mask);
13469 offset = bit_count (l_mask);
13471 else
13472 offset = 0;
13474 asm_fprintf (f, "\tadd\t%r, %r, #%d\n", work_register, SP_REGNUM,
13475 offset + 16 + current_function_pretend_args_size);
13477 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
13478 offset + 4);
13480 /* Make sure that the instruction fetching the PC is in the right place
13481 to calculate "start of backtrace creation code + 12". */
13482 if (l_mask)
13484 asm_fprintf (f, "\tmov\t%r, %r\n", work_register, PC_REGNUM);
13485 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
13486 offset + 12);
13487 asm_fprintf (f, "\tmov\t%r, %r\n", work_register,
13488 ARM_HARD_FRAME_POINTER_REGNUM);
13489 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
13490 offset);
13492 else
13494 asm_fprintf (f, "\tmov\t%r, %r\n", work_register,
13495 ARM_HARD_FRAME_POINTER_REGNUM);
13496 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
13497 offset);
13498 asm_fprintf (f, "\tmov\t%r, %r\n", work_register, PC_REGNUM);
13499 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
13500 offset + 12);
13503 asm_fprintf (f, "\tmov\t%r, %r\n", work_register, LR_REGNUM);
13504 asm_fprintf (f, "\tstr\t%r, [%r, #%d]\n", work_register, SP_REGNUM,
13505 offset + 8);
13506 asm_fprintf (f, "\tadd\t%r, %r, #%d\n", work_register, SP_REGNUM,
13507 offset + 12);
13508 asm_fprintf (f, "\tmov\t%r, %r\t\t%@ Backtrace structure created\n",
13509 ARM_HARD_FRAME_POINTER_REGNUM, work_register);
13511 /* Optimisation: If we are not pushing any low registers but we are going
13512 to push some high registers then delay our first push. This will just
13513 be a push of LR and we can combine it with the push of the first high
13514 register. */
13515 else if ((l_mask & 0xff) != 0
13516 || (high_regs_pushed == 0 && l_mask))
13517 thumb_pushpop (f, l_mask, 1, &cfa_offset, l_mask);
13519 if (high_regs_pushed)
13521 unsigned pushable_regs;
13522 unsigned next_hi_reg;
13524 for (next_hi_reg = 12; next_hi_reg > LAST_LO_REGNUM; next_hi_reg--)
13525 if (live_regs_mask & (1 << next_hi_reg))
13526 break;
13528 pushable_regs = l_mask & 0xff;
13530 if (pushable_regs == 0)
13531 pushable_regs = 1 << thumb_find_work_register (live_regs_mask);
13533 while (high_regs_pushed > 0)
13535 unsigned long real_regs_mask = 0;
13537 for (regno = LAST_LO_REGNUM; regno >= 0; regno --)
13539 if (pushable_regs & (1 << regno))
13541 asm_fprintf (f, "\tmov\t%r, %r\n", regno, next_hi_reg);
13543 high_regs_pushed --;
13544 real_regs_mask |= (1 << next_hi_reg);
13546 if (high_regs_pushed)
13548 for (next_hi_reg --; next_hi_reg > LAST_LO_REGNUM;
13549 next_hi_reg --)
13550 if (live_regs_mask & (1 << next_hi_reg))
13551 break;
13553 else
13555 pushable_regs &= ~((1 << regno) - 1);
13556 break;
13561 /* If we had to find a work register and we have not yet
13562 saved the LR then add it to the list of regs to push. */
13563 if (l_mask == (1 << LR_REGNUM))
13565 thumb_pushpop (f, pushable_regs | (1 << LR_REGNUM),
13566 1, &cfa_offset,
13567 real_regs_mask | (1 << LR_REGNUM));
13568 l_mask = 0;
13570 else
13571 thumb_pushpop (f, pushable_regs, 1, &cfa_offset, real_regs_mask);
13576 /* Handle the case of a double word load into a low register from
13577 a computed memory address. The computed address may involve a
13578 register which is overwritten by the load. */
13579 const char *
13580 thumb_load_double_from_address (rtx *operands)
13582 rtx addr;
13583 rtx base;
13584 rtx offset;
13585 rtx arg1;
13586 rtx arg2;
13588 gcc_assert (GET_CODE (operands[0]) == REG);
13589 gcc_assert (GET_CODE (operands[1]) == MEM);
13591 /* Get the memory address. */
13592 addr = XEXP (operands[1], 0);
13594 /* Work out how the memory address is computed. */
13595 switch (GET_CODE (addr))
13597 case REG:
13598 operands[2] = gen_rtx_MEM (SImode,
13599 plus_constant (XEXP (operands[1], 0), 4));
13601 if (REGNO (operands[0]) == REGNO (addr))
13603 output_asm_insn ("ldr\t%H0, %2", operands);
13604 output_asm_insn ("ldr\t%0, %1", operands);
13606 else
13608 output_asm_insn ("ldr\t%0, %1", operands);
13609 output_asm_insn ("ldr\t%H0, %2", operands);
13611 break;
13613 case CONST:
13614 /* Compute <address> + 4 for the high order load. */
13615 operands[2] = gen_rtx_MEM (SImode,
13616 plus_constant (XEXP (operands[1], 0), 4));
13618 output_asm_insn ("ldr\t%0, %1", operands);
13619 output_asm_insn ("ldr\t%H0, %2", operands);
13620 break;
13622 case PLUS:
13623 arg1 = XEXP (addr, 0);
13624 arg2 = XEXP (addr, 1);
13626 if (CONSTANT_P (arg1))
13627 base = arg2, offset = arg1;
13628 else
13629 base = arg1, offset = arg2;
13631 gcc_assert (GET_CODE (base) == REG);
13633 /* Catch the case of <address> = <reg> + <reg> */
13634 if (GET_CODE (offset) == REG)
13636 int reg_offset = REGNO (offset);
13637 int reg_base = REGNO (base);
13638 int reg_dest = REGNO (operands[0]);
13640 /* Add the base and offset registers together into the
13641 higher destination register. */
13642 asm_fprintf (asm_out_file, "\tadd\t%r, %r, %r",
13643 reg_dest + 1, reg_base, reg_offset);
13645 /* Load the lower destination register from the address in
13646 the higher destination register. */
13647 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #0]",
13648 reg_dest, reg_dest + 1);
13650 /* Load the higher destination register from its own address
13651 plus 4. */
13652 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #4]",
13653 reg_dest + 1, reg_dest + 1);
13655 else
13657 /* Compute <address> + 4 for the high order load. */
13658 operands[2] = gen_rtx_MEM (SImode,
13659 plus_constant (XEXP (operands[1], 0), 4));
13661 /* If the computed address is held in the low order register
13662 then load the high order register first, otherwise always
13663 load the low order register first. */
13664 if (REGNO (operands[0]) == REGNO (base))
13666 output_asm_insn ("ldr\t%H0, %2", operands);
13667 output_asm_insn ("ldr\t%0, %1", operands);
13669 else
13671 output_asm_insn ("ldr\t%0, %1", operands);
13672 output_asm_insn ("ldr\t%H0, %2", operands);
13675 break;
13677 case LABEL_REF:
13678 /* With no registers to worry about we can just load the value
13679 directly. */
13680 operands[2] = gen_rtx_MEM (SImode,
13681 plus_constant (XEXP (operands[1], 0), 4));
13683 output_asm_insn ("ldr\t%H0, %2", operands);
13684 output_asm_insn ("ldr\t%0, %1", operands);
13685 break;
13687 default:
13688 gcc_unreachable ();
13691 return "";
13694 const char *
13695 thumb_output_move_mem_multiple (int n, rtx *operands)
13697 rtx tmp;
13699 switch (n)
13701 case 2:
13702 if (REGNO (operands[4]) > REGNO (operands[5]))
13704 tmp = operands[4];
13705 operands[4] = operands[5];
13706 operands[5] = tmp;
13708 output_asm_insn ("ldmia\t%1!, {%4, %5}", operands);
13709 output_asm_insn ("stmia\t%0!, {%4, %5}", operands);
13710 break;
13712 case 3:
13713 if (REGNO (operands[4]) > REGNO (operands[5]))
13715 tmp = operands[4];
13716 operands[4] = operands[5];
13717 operands[5] = tmp;
13719 if (REGNO (operands[5]) > REGNO (operands[6]))
13721 tmp = operands[5];
13722 operands[5] = operands[6];
13723 operands[6] = tmp;
13725 if (REGNO (operands[4]) > REGNO (operands[5]))
13727 tmp = operands[4];
13728 operands[4] = operands[5];
13729 operands[5] = tmp;
13732 output_asm_insn ("ldmia\t%1!, {%4, %5, %6}", operands);
13733 output_asm_insn ("stmia\t%0!, {%4, %5, %6}", operands);
13734 break;
13736 default:
13737 gcc_unreachable ();
13740 return "";
13743 /* Output a call-via instruction for thumb state. */
13744 const char *
13745 thumb_call_via_reg (rtx reg)
13747 int regno = REGNO (reg);
13748 rtx *labelp;
13750 gcc_assert (regno < LR_REGNUM);
13752 /* If we are in the normal text section we can use a single instance
13753 per compilation unit. If we are doing function sections, then we need
13754 an entry per section, since we can't rely on reachability. */
13755 if (in_text_section ())
13757 thumb_call_reg_needed = 1;
13759 if (thumb_call_via_label[regno] == NULL)
13760 thumb_call_via_label[regno] = gen_label_rtx ();
13761 labelp = thumb_call_via_label + regno;
13763 else
13765 if (cfun->machine->call_via[regno] == NULL)
13766 cfun->machine->call_via[regno] = gen_label_rtx ();
13767 labelp = cfun->machine->call_via + regno;
13770 output_asm_insn ("bl\t%a0", labelp);
13771 return "";
13774 /* Routines for generating rtl. */
13775 void
13776 thumb_expand_movmemqi (rtx *operands)
13778 rtx out = copy_to_mode_reg (SImode, XEXP (operands[0], 0));
13779 rtx in = copy_to_mode_reg (SImode, XEXP (operands[1], 0));
13780 HOST_WIDE_INT len = INTVAL (operands[2]);
13781 HOST_WIDE_INT offset = 0;
13783 while (len >= 12)
13785 emit_insn (gen_movmem12b (out, in, out, in));
13786 len -= 12;
13789 if (len >= 8)
13791 emit_insn (gen_movmem8b (out, in, out, in));
13792 len -= 8;
13795 if (len >= 4)
13797 rtx reg = gen_reg_rtx (SImode);
13798 emit_insn (gen_movsi (reg, gen_rtx_MEM (SImode, in)));
13799 emit_insn (gen_movsi (gen_rtx_MEM (SImode, out), reg));
13800 len -= 4;
13801 offset += 4;
13804 if (len >= 2)
13806 rtx reg = gen_reg_rtx (HImode);
13807 emit_insn (gen_movhi (reg, gen_rtx_MEM (HImode,
13808 plus_constant (in, offset))));
13809 emit_insn (gen_movhi (gen_rtx_MEM (HImode, plus_constant (out, offset)),
13810 reg));
13811 len -= 2;
13812 offset += 2;
13815 if (len)
13817 rtx reg = gen_reg_rtx (QImode);
13818 emit_insn (gen_movqi (reg, gen_rtx_MEM (QImode,
13819 plus_constant (in, offset))));
13820 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (out, offset)),
13821 reg));
13825 void
13826 thumb_reload_out_hi (rtx *operands)
13828 emit_insn (gen_thumb_movhi_clobber (operands[0], operands[1], operands[2]));
13831 /* Handle reading a half-word from memory during reload. */
13832 void
13833 thumb_reload_in_hi (rtx *operands ATTRIBUTE_UNUSED)
13835 gcc_unreachable ();
13838 /* Return the length of a function name prefix
13839 that starts with the character 'c'. */
13840 static int
13841 arm_get_strip_length (int c)
13843 switch (c)
13845 ARM_NAME_ENCODING_LENGTHS
13846 default: return 0;
13850 /* Return a pointer to a function's name with any
13851 and all prefix encodings stripped from it. */
13852 const char *
13853 arm_strip_name_encoding (const char *name)
13855 int skip;
13857 while ((skip = arm_get_strip_length (* name)))
13858 name += skip;
13860 return name;
13863 /* If there is a '*' anywhere in the name's prefix, then
13864 emit the stripped name verbatim, otherwise prepend an
13865 underscore if leading underscores are being used. */
13866 void
13867 arm_asm_output_labelref (FILE *stream, const char *name)
13869 int skip;
13870 int verbatim = 0;
13872 while ((skip = arm_get_strip_length (* name)))
13874 verbatim |= (*name == '*');
13875 name += skip;
13878 if (verbatim)
13879 fputs (name, stream);
13880 else
13881 asm_fprintf (stream, "%U%s", name);
13884 static void
13885 arm_file_end (void)
13887 int regno;
13889 if (! thumb_call_reg_needed)
13890 return;
13892 text_section ();
13893 asm_fprintf (asm_out_file, "\t.code 16\n");
13894 ASM_OUTPUT_ALIGN (asm_out_file, 1);
13896 for (regno = 0; regno < LR_REGNUM; regno++)
13898 rtx label = thumb_call_via_label[regno];
13900 if (label != 0)
13902 targetm.asm_out.internal_label (asm_out_file, "L",
13903 CODE_LABEL_NUMBER (label));
13904 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
13909 rtx aof_pic_label;
13911 #ifdef AOF_ASSEMBLER
13912 /* Special functions only needed when producing AOF syntax assembler. */
13914 struct pic_chain
13916 struct pic_chain * next;
13917 const char * symname;
13920 static struct pic_chain * aof_pic_chain = NULL;
13923 aof_pic_entry (rtx x)
13925 struct pic_chain ** chainp;
13926 int offset;
13928 if (aof_pic_label == NULL_RTX)
13930 aof_pic_label = gen_rtx_SYMBOL_REF (Pmode, "x$adcons");
13933 for (offset = 0, chainp = &aof_pic_chain; *chainp;
13934 offset += 4, chainp = &(*chainp)->next)
13935 if ((*chainp)->symname == XSTR (x, 0))
13936 return plus_constant (aof_pic_label, offset);
13938 *chainp = (struct pic_chain *) xmalloc (sizeof (struct pic_chain));
13939 (*chainp)->next = NULL;
13940 (*chainp)->symname = XSTR (x, 0);
13941 return plus_constant (aof_pic_label, offset);
13944 void
13945 aof_dump_pic_table (FILE *f)
13947 struct pic_chain * chain;
13949 if (aof_pic_chain == NULL)
13950 return;
13952 asm_fprintf (f, "\tAREA |%r$$adcons|, BASED %r\n",
13953 PIC_OFFSET_TABLE_REGNUM,
13954 PIC_OFFSET_TABLE_REGNUM);
13955 fputs ("|x$adcons|\n", f);
13957 for (chain = aof_pic_chain; chain; chain = chain->next)
13959 fputs ("\tDCD\t", f);
13960 assemble_name (f, chain->symname);
13961 fputs ("\n", f);
13965 int arm_text_section_count = 1;
13967 char *
13968 aof_text_section (void )
13970 static char buf[100];
13971 sprintf (buf, "\tAREA |C$$code%d|, CODE, READONLY",
13972 arm_text_section_count++);
13973 if (flag_pic)
13974 strcat (buf, ", PIC, REENTRANT");
13975 return buf;
13978 static int arm_data_section_count = 1;
13980 char *
13981 aof_data_section (void)
13983 static char buf[100];
13984 sprintf (buf, "\tAREA |C$$data%d|, DATA", arm_data_section_count++);
13985 return buf;
13988 /* The AOF assembler is religiously strict about declarations of
13989 imported and exported symbols, so that it is impossible to declare
13990 a function as imported near the beginning of the file, and then to
13991 export it later on. It is, however, possible to delay the decision
13992 until all the functions in the file have been compiled. To get
13993 around this, we maintain a list of the imports and exports, and
13994 delete from it any that are subsequently defined. At the end of
13995 compilation we spit the remainder of the list out before the END
13996 directive. */
13998 struct import
14000 struct import * next;
14001 const char * name;
14004 static struct import * imports_list = NULL;
14006 void
14007 aof_add_import (const char *name)
14009 struct import * new;
14011 for (new = imports_list; new; new = new->next)
14012 if (new->name == name)
14013 return;
14015 new = (struct import *) xmalloc (sizeof (struct import));
14016 new->next = imports_list;
14017 imports_list = new;
14018 new->name = name;
14021 void
14022 aof_delete_import (const char *name)
14024 struct import ** old;
14026 for (old = &imports_list; *old; old = & (*old)->next)
14028 if ((*old)->name == name)
14030 *old = (*old)->next;
14031 return;
14036 int arm_main_function = 0;
14038 static void
14039 aof_dump_imports (FILE *f)
14041 /* The AOF assembler needs this to cause the startup code to be extracted
14042 from the library. Brining in __main causes the whole thing to work
14043 automagically. */
14044 if (arm_main_function)
14046 text_section ();
14047 fputs ("\tIMPORT __main\n", f);
14048 fputs ("\tDCD __main\n", f);
14051 /* Now dump the remaining imports. */
14052 while (imports_list)
14054 fprintf (f, "\tIMPORT\t");
14055 assemble_name (f, imports_list->name);
14056 fputc ('\n', f);
14057 imports_list = imports_list->next;
14061 static void
14062 aof_globalize_label (FILE *stream, const char *name)
14064 default_globalize_label (stream, name);
14065 if (! strcmp (name, "main"))
14066 arm_main_function = 1;
14069 static void
14070 aof_file_start (void)
14072 fputs ("__r0\tRN\t0\n", asm_out_file);
14073 fputs ("__a1\tRN\t0\n", asm_out_file);
14074 fputs ("__a2\tRN\t1\n", asm_out_file);
14075 fputs ("__a3\tRN\t2\n", asm_out_file);
14076 fputs ("__a4\tRN\t3\n", asm_out_file);
14077 fputs ("__v1\tRN\t4\n", asm_out_file);
14078 fputs ("__v2\tRN\t5\n", asm_out_file);
14079 fputs ("__v3\tRN\t6\n", asm_out_file);
14080 fputs ("__v4\tRN\t7\n", asm_out_file);
14081 fputs ("__v5\tRN\t8\n", asm_out_file);
14082 fputs ("__v6\tRN\t9\n", asm_out_file);
14083 fputs ("__sl\tRN\t10\n", asm_out_file);
14084 fputs ("__fp\tRN\t11\n", asm_out_file);
14085 fputs ("__ip\tRN\t12\n", asm_out_file);
14086 fputs ("__sp\tRN\t13\n", asm_out_file);
14087 fputs ("__lr\tRN\t14\n", asm_out_file);
14088 fputs ("__pc\tRN\t15\n", asm_out_file);
14089 fputs ("__f0\tFN\t0\n", asm_out_file);
14090 fputs ("__f1\tFN\t1\n", asm_out_file);
14091 fputs ("__f2\tFN\t2\n", asm_out_file);
14092 fputs ("__f3\tFN\t3\n", asm_out_file);
14093 fputs ("__f4\tFN\t4\n", asm_out_file);
14094 fputs ("__f5\tFN\t5\n", asm_out_file);
14095 fputs ("__f6\tFN\t6\n", asm_out_file);
14096 fputs ("__f7\tFN\t7\n", asm_out_file);
14097 text_section ();
14100 static void
14101 aof_file_end (void)
14103 if (flag_pic)
14104 aof_dump_pic_table (asm_out_file);
14105 arm_file_end ();
14106 aof_dump_imports (asm_out_file);
14107 fputs ("\tEND\n", asm_out_file);
14109 #endif /* AOF_ASSEMBLER */
14111 #ifndef ARM_PE
14112 /* Symbols in the text segment can be accessed without indirecting via the
14113 constant pool; it may take an extra binary operation, but this is still
14114 faster than indirecting via memory. Don't do this when not optimizing,
14115 since we won't be calculating al of the offsets necessary to do this
14116 simplification. */
14118 static void
14119 arm_encode_section_info (tree decl, rtx rtl, int first)
14121 /* This doesn't work with AOF syntax, since the string table may be in
14122 a different AREA. */
14123 #ifndef AOF_ASSEMBLER
14124 if (optimize > 0 && TREE_CONSTANT (decl))
14125 SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1;
14126 #endif
14128 /* If we are referencing a function that is weak then encode a long call
14129 flag in the function name, otherwise if the function is static or
14130 or known to be defined in this file then encode a short call flag. */
14131 if (first && DECL_P (decl))
14133 if (TREE_CODE (decl) == FUNCTION_DECL && DECL_WEAK (decl))
14134 arm_encode_call_attribute (decl, LONG_CALL_FLAG_CHAR);
14135 else if (! TREE_PUBLIC (decl))
14136 arm_encode_call_attribute (decl, SHORT_CALL_FLAG_CHAR);
14139 #endif /* !ARM_PE */
14141 static void
14142 arm_internal_label (FILE *stream, const char *prefix, unsigned long labelno)
14144 if (arm_ccfsm_state == 3 && (unsigned) arm_target_label == labelno
14145 && !strcmp (prefix, "L"))
14147 arm_ccfsm_state = 0;
14148 arm_target_insn = NULL;
14150 default_internal_label (stream, prefix, labelno);
14153 /* Output code to add DELTA to the first argument, and then jump
14154 to FUNCTION. Used for C++ multiple inheritance. */
14155 static void
14156 arm_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
14157 HOST_WIDE_INT delta,
14158 HOST_WIDE_INT vcall_offset ATTRIBUTE_UNUSED,
14159 tree function)
14161 static int thunk_label = 0;
14162 char label[256];
14163 int mi_delta = delta;
14164 const char *const mi_op = mi_delta < 0 ? "sub" : "add";
14165 int shift = 0;
14166 int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function)
14167 ? 1 : 0);
14168 if (mi_delta < 0)
14169 mi_delta = - mi_delta;
14170 if (TARGET_THUMB)
14172 int labelno = thunk_label++;
14173 ASM_GENERATE_INTERNAL_LABEL (label, "LTHUMBFUNC", labelno);
14174 fputs ("\tldr\tr12, ", file);
14175 assemble_name (file, label);
14176 fputc ('\n', file);
14178 while (mi_delta != 0)
14180 if ((mi_delta & (3 << shift)) == 0)
14181 shift += 2;
14182 else
14184 asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
14185 mi_op, this_regno, this_regno,
14186 mi_delta & (0xff << shift));
14187 mi_delta &= ~(0xff << shift);
14188 shift += 8;
14191 if (TARGET_THUMB)
14193 fprintf (file, "\tbx\tr12\n");
14194 ASM_OUTPUT_ALIGN (file, 2);
14195 assemble_name (file, label);
14196 fputs (":\n", file);
14197 assemble_integer (XEXP (DECL_RTL (function), 0), 4, BITS_PER_WORD, 1);
14199 else
14201 fputs ("\tb\t", file);
14202 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
14203 if (NEED_PLT_RELOC)
14204 fputs ("(PLT)", file);
14205 fputc ('\n', file);
14210 arm_emit_vector_const (FILE *file, rtx x)
14212 int i;
14213 const char * pattern;
14215 gcc_assert (GET_CODE (x) == CONST_VECTOR);
14217 switch (GET_MODE (x))
14219 case V2SImode: pattern = "%08x"; break;
14220 case V4HImode: pattern = "%04x"; break;
14221 case V8QImode: pattern = "%02x"; break;
14222 default: gcc_unreachable ();
14225 fprintf (file, "0x");
14226 for (i = CONST_VECTOR_NUNITS (x); i--;)
14228 rtx element;
14230 element = CONST_VECTOR_ELT (x, i);
14231 fprintf (file, pattern, INTVAL (element));
14234 return 1;
14237 const char *
14238 arm_output_load_gr (rtx *operands)
14240 rtx reg;
14241 rtx offset;
14242 rtx wcgr;
14243 rtx sum;
14245 if (GET_CODE (operands [1]) != MEM
14246 || GET_CODE (sum = XEXP (operands [1], 0)) != PLUS
14247 || GET_CODE (reg = XEXP (sum, 0)) != REG
14248 || GET_CODE (offset = XEXP (sum, 1)) != CONST_INT
14249 || ((INTVAL (offset) < 1024) && (INTVAL (offset) > -1024)))
14250 return "wldrw%?\t%0, %1";
14252 /* Fix up an out-of-range load of a GR register. */
14253 output_asm_insn ("str%?\t%0, [sp, #-4]!\t@ Start of GR load expansion", & reg);
14254 wcgr = operands[0];
14255 operands[0] = reg;
14256 output_asm_insn ("ldr%?\t%0, %1", operands);
14258 operands[0] = wcgr;
14259 operands[1] = reg;
14260 output_asm_insn ("tmcr%?\t%0, %1", operands);
14261 output_asm_insn ("ldr%?\t%0, [sp], #4\t@ End of GR load expansion", & reg);
14263 return "";
14266 static rtx
14267 arm_struct_value_rtx (tree fntype ATTRIBUTE_UNUSED,
14268 int incoming ATTRIBUTE_UNUSED)
14270 #if 0
14271 /* FIXME: The ARM backend has special code to handle structure
14272 returns, and will reserve its own hidden first argument. So
14273 if this macro is enabled a *second* hidden argument will be
14274 reserved, which will break binary compatibility with old
14275 toolchains and also thunk handling. One day this should be
14276 fixed. */
14277 return 0;
14278 #else
14279 /* Register in which address to store a structure value
14280 is passed to a function. */
14281 return gen_rtx_REG (Pmode, ARG_REGISTER (1));
14282 #endif
14285 /* Worker function for TARGET_SETUP_INCOMING_VARARGS.
14287 On the ARM, PRETEND_SIZE is set in order to have the prologue push the last
14288 named arg and all anonymous args onto the stack.
14289 XXX I know the prologue shouldn't be pushing registers, but it is faster
14290 that way. */
14292 static void
14293 arm_setup_incoming_varargs (CUMULATIVE_ARGS *cum,
14294 enum machine_mode mode ATTRIBUTE_UNUSED,
14295 tree type ATTRIBUTE_UNUSED,
14296 int *pretend_size,
14297 int second_time ATTRIBUTE_UNUSED)
14299 cfun->machine->uses_anonymous_args = 1;
14300 if (cum->nregs < NUM_ARG_REGS)
14301 *pretend_size = (NUM_ARG_REGS - cum->nregs) * UNITS_PER_WORD;
14304 /* Return nonzero if the CONSUMER instruction (a store) does not need
14305 PRODUCER's value to calculate the address. */
14308 arm_no_early_store_addr_dep (rtx producer, rtx consumer)
14310 rtx value = PATTERN (producer);
14311 rtx addr = PATTERN (consumer);
14313 if (GET_CODE (value) == COND_EXEC)
14314 value = COND_EXEC_CODE (value);
14315 if (GET_CODE (value) == PARALLEL)
14316 value = XVECEXP (value, 0, 0);
14317 value = XEXP (value, 0);
14318 if (GET_CODE (addr) == COND_EXEC)
14319 addr = COND_EXEC_CODE (addr);
14320 if (GET_CODE (addr) == PARALLEL)
14321 addr = XVECEXP (addr, 0, 0);
14322 addr = XEXP (addr, 0);
14324 return !reg_overlap_mentioned_p (value, addr);
14327 /* Return nonzero if the CONSUMER instruction (an ALU op) does not
14328 have an early register shift value or amount dependency on the
14329 result of PRODUCER. */
14332 arm_no_early_alu_shift_dep (rtx producer, rtx consumer)
14334 rtx value = PATTERN (producer);
14335 rtx op = PATTERN (consumer);
14336 rtx early_op;
14338 if (GET_CODE (value) == COND_EXEC)
14339 value = COND_EXEC_CODE (value);
14340 if (GET_CODE (value) == PARALLEL)
14341 value = XVECEXP (value, 0, 0);
14342 value = XEXP (value, 0);
14343 if (GET_CODE (op) == COND_EXEC)
14344 op = COND_EXEC_CODE (op);
14345 if (GET_CODE (op) == PARALLEL)
14346 op = XVECEXP (op, 0, 0);
14347 op = XEXP (op, 1);
14349 early_op = XEXP (op, 0);
14350 /* This is either an actual independent shift, or a shift applied to
14351 the first operand of another operation. We want the whole shift
14352 operation. */
14353 if (GET_CODE (early_op) == REG)
14354 early_op = op;
14356 return !reg_overlap_mentioned_p (value, early_op);
14359 /* Return nonzero if the CONSUMER instruction (an ALU op) does not
14360 have an early register shift value dependency on the result of
14361 PRODUCER. */
14364 arm_no_early_alu_shift_value_dep (rtx producer, rtx consumer)
14366 rtx value = PATTERN (producer);
14367 rtx op = PATTERN (consumer);
14368 rtx early_op;
14370 if (GET_CODE (value) == COND_EXEC)
14371 value = COND_EXEC_CODE (value);
14372 if (GET_CODE (value) == PARALLEL)
14373 value = XVECEXP (value, 0, 0);
14374 value = XEXP (value, 0);
14375 if (GET_CODE (op) == COND_EXEC)
14376 op = COND_EXEC_CODE (op);
14377 if (GET_CODE (op) == PARALLEL)
14378 op = XVECEXP (op, 0, 0);
14379 op = XEXP (op, 1);
14381 early_op = XEXP (op, 0);
14383 /* This is either an actual independent shift, or a shift applied to
14384 the first operand of another operation. We want the value being
14385 shifted, in either case. */
14386 if (GET_CODE (early_op) != REG)
14387 early_op = XEXP (early_op, 0);
14389 return !reg_overlap_mentioned_p (value, early_op);
14392 /* Return nonzero if the CONSUMER (a mul or mac op) does not
14393 have an early register mult dependency on the result of
14394 PRODUCER. */
14397 arm_no_early_mul_dep (rtx producer, rtx consumer)
14399 rtx value = PATTERN (producer);
14400 rtx op = PATTERN (consumer);
14402 if (GET_CODE (value) == COND_EXEC)
14403 value = COND_EXEC_CODE (value);
14404 if (GET_CODE (value) == PARALLEL)
14405 value = XVECEXP (value, 0, 0);
14406 value = XEXP (value, 0);
14407 if (GET_CODE (op) == COND_EXEC)
14408 op = COND_EXEC_CODE (op);
14409 if (GET_CODE (op) == PARALLEL)
14410 op = XVECEXP (op, 0, 0);
14411 op = XEXP (op, 1);
14413 return (GET_CODE (op) == PLUS
14414 && !reg_overlap_mentioned_p (value, XEXP (op, 0)));
14418 /* We can't rely on the caller doing the proper promotion when
14419 using APCS or ATPCS. */
14421 static bool
14422 arm_promote_prototypes (tree t ATTRIBUTE_UNUSED)
14424 return !TARGET_AAPCS_BASED;
14428 /* AAPCS based ABIs use short enums by default. */
14430 static bool
14431 arm_default_short_enums (void)
14433 return TARGET_AAPCS_BASED;
14437 /* AAPCS requires that anonymous bitfields affect structure alignment. */
14439 static bool
14440 arm_align_anon_bitfield (void)
14442 return TARGET_AAPCS_BASED;
14446 /* The generic C++ ABI says 64-bit (long long). The EABI says 32-bit. */
14448 static tree
14449 arm_cxx_guard_type (void)
14451 return TARGET_AAPCS_BASED ? integer_type_node : long_long_integer_type_node;
14455 /* The EABI says test the least significan bit of a guard variable. */
14457 static bool
14458 arm_cxx_guard_mask_bit (void)
14460 return TARGET_AAPCS_BASED;
14464 /* The EABI specifies that all array cookies are 8 bytes long. */
14466 static tree
14467 arm_get_cookie_size (tree type)
14469 tree size;
14471 if (!TARGET_AAPCS_BASED)
14472 return default_cxx_get_cookie_size (type);
14474 size = build_int_cst (sizetype, 8);
14475 return size;
14479 /* The EABI says that array cookies should also contain the element size. */
14481 static bool
14482 arm_cookie_has_size (void)
14484 return TARGET_AAPCS_BASED;
14488 /* The EABI says constructors and destructors should return a pointer to
14489 the object constructed/destroyed. */
14491 static bool
14492 arm_cxx_cdtor_returns_this (void)
14494 return TARGET_AAPCS_BASED;
14497 /* The EABI says that an inline function may never be the key
14498 method. */
14500 static bool
14501 arm_cxx_key_method_may_be_inline (void)
14503 return !TARGET_AAPCS_BASED;
14506 static void
14507 arm_cxx_determine_class_data_visibility (tree decl)
14509 if (!TARGET_AAPCS_BASED)
14510 return;
14512 /* In general, \S 3.2.5.5 of the ARM EABI requires that class data
14513 is exported. However, on systems without dynamic vague linkage,
14514 \S 3.2.5.6 says that COMDAT class data has hidden linkage. */
14515 if (!TARGET_ARM_DYNAMIC_VAGUE_LINKAGE_P && DECL_COMDAT (decl))
14516 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
14517 else
14518 DECL_VISIBILITY (decl) = VISIBILITY_DEFAULT;
14519 DECL_VISIBILITY_SPECIFIED (decl) = 1;
14522 static bool
14523 arm_cxx_class_data_always_comdat (void)
14525 /* \S 3.2.5.4 of the ARM C++ ABI says that class data only have
14526 vague linkage if the class has no key function. */
14527 return !TARGET_AAPCS_BASED;
14531 /* The EABI says __aeabi_atexit should be used to register static
14532 destructors. */
14534 static bool
14535 arm_cxx_use_aeabi_atexit (void)
14537 return TARGET_AAPCS_BASED;
14541 void
14542 arm_set_return_address (rtx source, rtx scratch)
14544 arm_stack_offsets *offsets;
14545 HOST_WIDE_INT delta;
14546 rtx addr;
14547 unsigned long saved_regs;
14549 saved_regs = arm_compute_save_reg_mask ();
14551 if ((saved_regs & (1 << LR_REGNUM)) == 0)
14552 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
14553 else
14555 if (frame_pointer_needed)
14556 addr = plus_constant(hard_frame_pointer_rtx, -4);
14557 else
14559 /* LR will be the first saved register. */
14560 offsets = arm_get_frame_offsets ();
14561 delta = offsets->outgoing_args - (offsets->frame + 4);
14564 if (delta >= 4096)
14566 emit_insn (gen_addsi3 (scratch, stack_pointer_rtx,
14567 GEN_INT (delta & ~4095)));
14568 addr = scratch;
14569 delta &= 4095;
14571 else
14572 addr = stack_pointer_rtx;
14574 addr = plus_constant (addr, delta);
14576 emit_move_insn (gen_rtx_MEM (Pmode, addr), source);
14581 void
14582 thumb_set_return_address (rtx source, rtx scratch)
14584 arm_stack_offsets *offsets;
14585 HOST_WIDE_INT delta;
14586 int reg;
14587 rtx addr;
14588 unsigned long mask;
14590 emit_insn (gen_rtx_USE (VOIDmode, source));
14592 mask = thumb_compute_save_reg_mask ();
14593 if (mask & (1 << LR_REGNUM))
14595 offsets = arm_get_frame_offsets ();
14597 /* Find the saved regs. */
14598 if (frame_pointer_needed)
14600 delta = offsets->soft_frame - offsets->saved_args;
14601 reg = THUMB_HARD_FRAME_POINTER_REGNUM;
14603 else
14605 delta = offsets->outgoing_args - offsets->saved_args;
14606 reg = SP_REGNUM;
14608 /* Allow for the stack frame. */
14609 if (TARGET_BACKTRACE)
14610 delta -= 16;
14611 /* The link register is always the first saved register. */
14612 delta -= 4;
14614 /* Construct the address. */
14615 addr = gen_rtx_REG (SImode, reg);
14616 if ((reg != SP_REGNUM && delta >= 128)
14617 || delta >= 1024)
14619 emit_insn (gen_movsi (scratch, GEN_INT (delta)));
14620 emit_insn (gen_addsi3 (scratch, scratch, stack_pointer_rtx));
14621 addr = scratch;
14623 else
14624 addr = plus_constant (addr, delta);
14626 emit_move_insn (gen_rtx_MEM (Pmode, addr), source);
14628 else
14629 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
14632 /* Implements target hook vector_mode_supported_p. */
14633 bool
14634 arm_vector_mode_supported_p (enum machine_mode mode)
14636 if ((mode == V2SImode)
14637 || (mode == V4HImode)
14638 || (mode == V8QImode))
14639 return true;
14641 return false;
14644 /* Implement TARGET_SHIFT_TRUNCATION_MASK. SImode shifts use normal
14645 ARM insns and therefore guarantee that the shift count is modulo 256.
14646 DImode shifts (those implemented by lib1funcs.asm or by optabs.c)
14647 guarantee no particular behavior for out-of-range counts. */
14649 static unsigned HOST_WIDE_INT
14650 arm_shift_truncation_mask (enum machine_mode mode)
14652 return mode == SImode ? 255 : 0;
14656 /* Map internal gcc register numbers to DWARF2 register numbers. */
14658 unsigned int
14659 arm_dbx_register_number (unsigned int regno)
14661 if (regno < 16)
14662 return regno;
14664 /* TODO: Legacy targets output FPA regs as registers 16-23 for backwards
14665 compatibility. The EABI defines them as registers 96-103. */
14666 if (IS_FPA_REGNUM (regno))
14667 return (TARGET_AAPCS_BASED ? 96 : 16) + regno - FIRST_FPA_REGNUM;
14669 if (IS_VFP_REGNUM (regno))
14670 return 64 + regno - FIRST_VFP_REGNUM;
14672 if (IS_IWMMXT_GR_REGNUM (regno))
14673 return 104 + regno - FIRST_IWMMXT_GR_REGNUM;
14675 if (IS_IWMMXT_REGNUM (regno))
14676 return 112 + regno - FIRST_IWMMXT_REGNUM;
14678 gcc_unreachable ();