* config/sh/sh.c (prepare_move_operands): Handle the address
[official-gcc.git] / gcc / config / sh / sh.c
blobe11d557d3c1a3c818dc9e3d7b64d81e22ff72feb
1 /* Output routines for GCC for Renesas / SuperH SH.
2 Copyright (C) 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
3 2003, 2004, 2005 Free Software Foundation, Inc.
4 Contributed by Steve Chamberlain (sac@cygnus.com).
5 Improved by Jim Wilson (wilson@cygnus.com).
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 2, or (at your option)
12 any later version.
14 GCC is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING. If not, write to
21 the Free Software Foundation, 51 Franklin Street, Fifth Floor,
22 Boston, MA 02110-1301, USA. */
24 #include "config.h"
25 #include "system.h"
26 #include "coretypes.h"
27 #include "tm.h"
28 #include "insn-config.h"
29 #include "rtl.h"
30 #include "tree.h"
31 #include "flags.h"
32 #include "expr.h"
33 #include "optabs.h"
34 #include "function.h"
35 #include "regs.h"
36 #include "hard-reg-set.h"
37 #include "output.h"
38 #include "insn-attr.h"
39 #include "toplev.h"
40 #include "recog.h"
41 #include "c-pragma.h"
42 #include "integrate.h"
43 #include "dwarf2.h"
44 #include "tm_p.h"
45 #include "target.h"
46 #include "target-def.h"
47 #include "real.h"
48 #include "langhooks.h"
49 #include "basic-block.h"
50 #include "cfglayout.h"
51 #include "intl.h"
52 #include "sched-int.h"
53 #include "ggc.h"
54 #include "tree-gimple.h"
55 #include "cfgloop.h"
58 int code_for_indirect_jump_scratch = CODE_FOR_indirect_jump_scratch;
60 #define MSW (TARGET_LITTLE_ENDIAN ? 1 : 0)
61 #define LSW (TARGET_LITTLE_ENDIAN ? 0 : 1)
63 /* These are some macros to abstract register modes. */
64 #define CONST_OK_FOR_ADD(size) \
65 (TARGET_SHMEDIA ? CONST_OK_FOR_I10 (size) : CONST_OK_FOR_I08 (size))
66 #define GEN_MOV (*(TARGET_SHMEDIA64 ? gen_movdi : gen_movsi))
67 #define GEN_ADD3 (*(TARGET_SHMEDIA64 ? gen_adddi3 : gen_addsi3))
68 #define GEN_SUB3 (*(TARGET_SHMEDIA64 ? gen_subdi3 : gen_subsi3))
70 /* Set to 1 by expand_prologue() when the function is an interrupt handler. */
71 int current_function_interrupt;
73 /* ??? The pragma interrupt support will not work for SH3. */
74 /* This is set by #pragma interrupt and #pragma trapa, and causes gcc to
75 output code for the next function appropriate for an interrupt handler. */
76 int pragma_interrupt;
78 /* This is set by the trap_exit attribute for functions. It specifies
79 a trap number to be used in a trapa instruction at function exit
80 (instead of an rte instruction). */
81 int trap_exit;
83 /* This is used by the sp_switch attribute for functions. It specifies
84 a variable holding the address of the stack the interrupt function
85 should switch to/from at entry/exit. */
86 rtx sp_switch;
88 /* This is set by #pragma trapa, and is similar to the above, except that
89 the compiler doesn't emit code to preserve all registers. */
90 static int pragma_trapa;
92 /* This is set by #pragma nosave_low_regs. This is useful on the SH3,
93 which has a separate set of low regs for User and Supervisor modes.
94 This should only be used for the lowest level of interrupts. Higher levels
95 of interrupts must save the registers in case they themselves are
96 interrupted. */
97 int pragma_nosave_low_regs;
99 /* This is used for communication between TARGET_SETUP_INCOMING_VARARGS and
100 sh_expand_prologue. */
101 int current_function_anonymous_args;
103 /* Global variables for machine-dependent things. */
105 /* Which cpu are we scheduling for. */
106 enum processor_type sh_cpu;
108 /* Definitions used in ready queue reordering for first scheduling pass. */
110 /* Reg weights arrays for modes SFmode and SImode, indexed by insn LUID. */
111 static short *regmode_weight[2];
113 /* Total SFmode and SImode weights of scheduled insns. */
114 static int curr_regmode_pressure[2];
116 /* If true, skip cycles for Q -> R movement. */
117 static int skip_cycles = 0;
119 /* Cached value of can_issue_more. This is cached in sh_variable_issue hook
120 and returned from sh_reorder2. */
121 static short cached_can_issue_more;
123 /* Saved operands from the last compare to use when we generate an scc
124 or bcc insn. */
126 rtx sh_compare_op0;
127 rtx sh_compare_op1;
129 /* Provides the class number of the smallest class containing
130 reg number. */
132 enum reg_class regno_reg_class[FIRST_PSEUDO_REGISTER] =
134 R0_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
135 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
136 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
137 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
138 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
139 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
140 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
141 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
142 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
143 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
144 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
145 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
146 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
147 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
148 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
149 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
150 FP0_REGS,FP_REGS, FP_REGS, FP_REGS,
151 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
152 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
153 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
154 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
155 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
156 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
157 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
158 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
159 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
160 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
161 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
162 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
163 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
164 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
165 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
166 TARGET_REGS, TARGET_REGS, TARGET_REGS, TARGET_REGS,
167 TARGET_REGS, TARGET_REGS, TARGET_REGS, TARGET_REGS,
168 DF_REGS, DF_REGS, DF_REGS, DF_REGS,
169 DF_REGS, DF_REGS, DF_REGS, DF_REGS,
170 NO_REGS, GENERAL_REGS, PR_REGS, T_REGS,
171 MAC_REGS, MAC_REGS, FPUL_REGS, FPSCR_REGS,
172 GENERAL_REGS, GENERAL_REGS,
175 char sh_register_names[FIRST_PSEUDO_REGISTER] \
176 [MAX_REGISTER_NAME_LENGTH + 1] = SH_REGISTER_NAMES_INITIALIZER;
178 char sh_additional_register_names[ADDREGNAMES_SIZE] \
179 [MAX_ADDITIONAL_REGISTER_NAME_LENGTH + 1]
180 = SH_ADDITIONAL_REGISTER_NAMES_INITIALIZER;
182 /* Provide reg_class from a letter such as appears in the machine
183 description. *: target independently reserved letter.
184 reg_class_from_letter['e' - 'a'] is set to NO_REGS for TARGET_FMOVD. */
186 enum reg_class reg_class_from_letter[] =
188 /* a */ ALL_REGS, /* b */ TARGET_REGS, /* c */ FPSCR_REGS, /* d */ DF_REGS,
189 /* e */ FP_REGS, /* f */ FP_REGS, /* g **/ NO_REGS, /* h */ NO_REGS,
190 /* i **/ NO_REGS, /* j */ NO_REGS, /* k */ SIBCALL_REGS, /* l */ PR_REGS,
191 /* m **/ NO_REGS, /* n **/ NO_REGS, /* o **/ NO_REGS, /* p **/ NO_REGS,
192 /* q */ NO_REGS, /* r **/ NO_REGS, /* s **/ NO_REGS, /* t */ T_REGS,
193 /* u */ NO_REGS, /* v */ NO_REGS, /* w */ FP0_REGS, /* x */ MAC_REGS,
194 /* y */ FPUL_REGS, /* z */ R0_REGS
197 int assembler_dialect;
199 static bool shmedia_space_reserved_for_target_registers;
201 static bool sh_handle_option (size_t, const char *, int);
202 static void split_branches (rtx);
203 static int branch_dest (rtx);
204 static void force_into (rtx, rtx);
205 static void print_slot (rtx);
206 static rtx add_constant (rtx, enum machine_mode, rtx);
207 static void dump_table (rtx, rtx);
208 static int hi_const (rtx);
209 static int broken_move (rtx);
210 static int mova_p (rtx);
211 static rtx find_barrier (int, rtx, rtx);
212 static int noncall_uses_reg (rtx, rtx, rtx *);
213 static rtx gen_block_redirect (rtx, int, int);
214 static void sh_reorg (void);
215 static void output_stack_adjust (int, rtx, int, HARD_REG_SET *);
216 static rtx frame_insn (rtx);
217 static rtx push (int);
218 static void pop (int);
219 static void push_regs (HARD_REG_SET *, int);
220 static int calc_live_regs (HARD_REG_SET *);
221 static void mark_use (rtx, rtx *);
222 static HOST_WIDE_INT rounded_frame_size (int);
223 static rtx mark_constant_pool_use (rtx);
224 const struct attribute_spec sh_attribute_table[];
225 static tree sh_handle_interrupt_handler_attribute (tree *, tree, tree, int, bool *);
226 static tree sh_handle_sp_switch_attribute (tree *, tree, tree, int, bool *);
227 static tree sh_handle_trap_exit_attribute (tree *, tree, tree, int, bool *);
228 static tree sh_handle_renesas_attribute (tree *, tree, tree, int, bool *);
229 static void sh_output_function_epilogue (FILE *, HOST_WIDE_INT);
230 static void sh_insert_attributes (tree, tree *);
231 static const char *sh_check_pch_target_flags (int);
232 static int sh_adjust_cost (rtx, rtx, rtx, int);
233 static int sh_issue_rate (void);
234 static int sh_dfa_new_cycle (FILE *, int, rtx, int, int, int *sort_p);
235 static short find_set_regmode_weight (rtx, enum machine_mode);
236 static short find_insn_regmode_weight (rtx, enum machine_mode);
237 static void find_regmode_weight (int, enum machine_mode);
238 static void sh_md_init_global (FILE *, int, int);
239 static void sh_md_finish_global (FILE *, int);
240 static int rank_for_reorder (const void *, const void *);
241 static void swap_reorder (rtx *, int);
242 static void ready_reorder (rtx *, int);
243 static short high_pressure (enum machine_mode);
244 static int sh_reorder (FILE *, int, rtx *, int *, int);
245 static int sh_reorder2 (FILE *, int, rtx *, int *, int);
246 static void sh_md_init (FILE *, int, int);
247 static int sh_variable_issue (FILE *, int, rtx, int);
249 static bool sh_function_ok_for_sibcall (tree, tree);
251 static bool sh_cannot_modify_jumps_p (void);
252 static int sh_target_reg_class (void);
253 static bool sh_optimize_target_register_callee_saved (bool);
254 static bool sh_ms_bitfield_layout_p (tree);
256 static void sh_init_builtins (void);
257 static void sh_media_init_builtins (void);
258 static rtx sh_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
259 static void sh_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT, tree);
260 static void sh_file_start (void);
261 static int flow_dependent_p (rtx, rtx);
262 static void flow_dependent_p_1 (rtx, rtx, void *);
263 static int shiftcosts (rtx);
264 static int andcosts (rtx);
265 static int addsubcosts (rtx);
266 static int multcosts (rtx);
267 static bool unspec_caller_rtx_p (rtx);
268 static bool sh_cannot_copy_insn_p (rtx);
269 static bool sh_rtx_costs (rtx, int, int, int *);
270 static int sh_address_cost (rtx);
271 #ifdef TARGET_ADJUST_UNROLL_MAX
272 static int sh_adjust_unroll_max (struct loop *, int, int, int, int);
273 #endif
274 static int sh_pr_n_sets (void);
275 static rtx sh_allocate_initial_value (rtx);
276 static int shmedia_target_regs_stack_space (HARD_REG_SET *);
277 static int shmedia_reserve_space_for_target_registers_p (int, HARD_REG_SET *);
278 static int shmedia_target_regs_stack_adjust (HARD_REG_SET *);
279 static int scavenge_reg (HARD_REG_SET *s);
280 struct save_schedule_s;
281 static struct save_entry_s *sh5_schedule_saves (HARD_REG_SET *,
282 struct save_schedule_s *, int);
284 static rtx sh_struct_value_rtx (tree, int);
285 static bool sh_return_in_memory (tree, tree);
286 static rtx sh_builtin_saveregs (void);
287 static void sh_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode, tree, int *, int);
288 static bool sh_strict_argument_naming (CUMULATIVE_ARGS *);
289 static bool sh_pretend_outgoing_varargs_named (CUMULATIVE_ARGS *);
290 static tree sh_build_builtin_va_list (void);
291 static tree sh_gimplify_va_arg_expr (tree, tree, tree *, tree *);
292 static bool sh_pass_by_reference (CUMULATIVE_ARGS *, enum machine_mode,
293 tree, bool);
294 static bool sh_callee_copies (CUMULATIVE_ARGS *, enum machine_mode,
295 tree, bool);
296 static int sh_arg_partial_bytes (CUMULATIVE_ARGS *, enum machine_mode,
297 tree, bool);
298 static int sh_dwarf_calling_convention (tree);
299 static int hard_regs_intersect_p (HARD_REG_SET *, HARD_REG_SET *);
302 /* Initialize the GCC target structure. */
303 #undef TARGET_ATTRIBUTE_TABLE
304 #define TARGET_ATTRIBUTE_TABLE sh_attribute_table
306 /* The next two are used for debug info when compiling with -gdwarf. */
307 #undef TARGET_ASM_UNALIGNED_HI_OP
308 #define TARGET_ASM_UNALIGNED_HI_OP "\t.uaword\t"
309 #undef TARGET_ASM_UNALIGNED_SI_OP
310 #define TARGET_ASM_UNALIGNED_SI_OP "\t.ualong\t"
312 /* These are NULLed out on non-SH5 in OVERRIDE_OPTIONS. */
313 #undef TARGET_ASM_UNALIGNED_DI_OP
314 #define TARGET_ASM_UNALIGNED_DI_OP "\t.uaquad\t"
315 #undef TARGET_ASM_ALIGNED_DI_OP
316 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
318 #undef TARGET_ASM_FUNCTION_EPILOGUE
319 #define TARGET_ASM_FUNCTION_EPILOGUE sh_output_function_epilogue
321 #undef TARGET_ASM_OUTPUT_MI_THUNK
322 #define TARGET_ASM_OUTPUT_MI_THUNK sh_output_mi_thunk
324 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
325 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_tree_hwi_hwi_tree_true
327 #undef TARGET_ASM_FILE_START
328 #define TARGET_ASM_FILE_START sh_file_start
329 #undef TARGET_ASM_FILE_START_FILE_DIRECTIVE
330 #define TARGET_ASM_FILE_START_FILE_DIRECTIVE true
332 #undef TARGET_DEFAULT_TARGET_FLAGS
333 #define TARGET_DEFAULT_TARGET_FLAGS TARGET_DEFAULT
334 #undef TARGET_HANDLE_OPTION
335 #define TARGET_HANDLE_OPTION sh_handle_option
337 #undef TARGET_INSERT_ATTRIBUTES
338 #define TARGET_INSERT_ATTRIBUTES sh_insert_attributes
340 #undef TARGET_SCHED_ADJUST_COST
341 #define TARGET_SCHED_ADJUST_COST sh_adjust_cost
343 #undef TARGET_SCHED_ISSUE_RATE
344 #define TARGET_SCHED_ISSUE_RATE sh_issue_rate
346 /* The next 5 hooks have been implemented for reenabling sched1. With the
347 help of these macros we are limiting the movement of insns in sched1 to
348 reduce the register pressure. The overall idea is to keep count of SImode
349 and SFmode regs required by already scheduled insns. When these counts
350 cross some threshold values; give priority to insns that free registers.
351 The insn that frees registers is most likely to be the insn with lowest
352 LUID (original insn order); but such an insn might be there in the stalled
353 queue (Q) instead of the ready queue (R). To solve this, we skip cycles
354 upto a max of 8 cycles so that such insns may move from Q -> R.
356 The description of the hooks are as below:
358 TARGET_SCHED_INIT_GLOBAL: Added a new target hook in the generic
359 scheduler; it is called inside the sched_init function just after
360 find_insn_reg_weights function call. It is used to calculate the SImode
361 and SFmode weights of insns of basic blocks; much similar to what
362 find_insn_reg_weights does.
363 TARGET_SCHED_FINISH_GLOBAL: Corresponding cleanup hook.
365 TARGET_SCHED_DFA_NEW_CYCLE: Skip cycles if high register pressure is
366 indicated by TARGET_SCHED_REORDER2; doing this may move insns from
367 (Q)->(R).
369 TARGET_SCHED_REORDER: If the register pressure for SImode or SFmode is
370 high; reorder the ready queue so that the insn with lowest LUID will be
371 issued next.
373 TARGET_SCHED_REORDER2: If the register pressure is high, indicate to
374 TARGET_SCHED_DFA_NEW_CYCLE to skip cycles.
376 TARGET_SCHED_VARIABLE_ISSUE: Cache the value of can_issue_more so that it
377 can be returned from TARGET_SCHED_REORDER2.
379 TARGET_SCHED_INIT: Reset the register pressure counting variables. */
381 #undef TARGET_SCHED_DFA_NEW_CYCLE
382 #define TARGET_SCHED_DFA_NEW_CYCLE sh_dfa_new_cycle
384 #undef TARGET_SCHED_INIT_GLOBAL
385 #define TARGET_SCHED_INIT_GLOBAL sh_md_init_global
387 #undef TARGET_SCHED_FINISH_GLOBAL
388 #define TARGET_SCHED_FINISH_GLOBAL sh_md_finish_global
390 #undef TARGET_SCHED_VARIABLE_ISSUE
391 #define TARGET_SCHED_VARIABLE_ISSUE sh_variable_issue
393 #undef TARGET_SCHED_REORDER
394 #define TARGET_SCHED_REORDER sh_reorder
396 #undef TARGET_SCHED_REORDER2
397 #define TARGET_SCHED_REORDER2 sh_reorder2
399 #undef TARGET_SCHED_INIT
400 #define TARGET_SCHED_INIT sh_md_init
402 #undef TARGET_CANNOT_MODIFY_JUMPS_P
403 #define TARGET_CANNOT_MODIFY_JUMPS_P sh_cannot_modify_jumps_p
404 #undef TARGET_BRANCH_TARGET_REGISTER_CLASS
405 #define TARGET_BRANCH_TARGET_REGISTER_CLASS sh_target_reg_class
406 #undef TARGET_BRANCH_TARGET_REGISTER_CALLEE_SAVED
407 #define TARGET_BRANCH_TARGET_REGISTER_CALLEE_SAVED \
408 sh_optimize_target_register_callee_saved
410 #undef TARGET_MS_BITFIELD_LAYOUT_P
411 #define TARGET_MS_BITFIELD_LAYOUT_P sh_ms_bitfield_layout_p
413 #undef TARGET_INIT_BUILTINS
414 #define TARGET_INIT_BUILTINS sh_init_builtins
415 #undef TARGET_EXPAND_BUILTIN
416 #define TARGET_EXPAND_BUILTIN sh_expand_builtin
418 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
419 #define TARGET_FUNCTION_OK_FOR_SIBCALL sh_function_ok_for_sibcall
421 #undef TARGET_CANNOT_COPY_INSN_P
422 #define TARGET_CANNOT_COPY_INSN_P sh_cannot_copy_insn_p
423 #undef TARGET_RTX_COSTS
424 #define TARGET_RTX_COSTS sh_rtx_costs
425 #undef TARGET_ADDRESS_COST
426 #define TARGET_ADDRESS_COST sh_address_cost
427 #undef TARGET_ALLOCATE_INITIAL_VALUE
428 #define TARGET_ALLOCATE_INITIAL_VALUE sh_allocate_initial_value
430 #undef TARGET_MACHINE_DEPENDENT_REORG
431 #define TARGET_MACHINE_DEPENDENT_REORG sh_reorg
433 #ifdef HAVE_AS_TLS
434 #undef TARGET_HAVE_TLS
435 #define TARGET_HAVE_TLS true
436 #endif
438 #undef TARGET_PROMOTE_PROTOTYPES
439 #define TARGET_PROMOTE_PROTOTYPES sh_promote_prototypes
440 #undef TARGET_PROMOTE_FUNCTION_ARGS
441 #define TARGET_PROMOTE_FUNCTION_ARGS sh_promote_prototypes
442 #undef TARGET_PROMOTE_FUNCTION_RETURN
443 #define TARGET_PROMOTE_FUNCTION_RETURN sh_promote_prototypes
445 #undef TARGET_STRUCT_VALUE_RTX
446 #define TARGET_STRUCT_VALUE_RTX sh_struct_value_rtx
447 #undef TARGET_RETURN_IN_MEMORY
448 #define TARGET_RETURN_IN_MEMORY sh_return_in_memory
450 #undef TARGET_EXPAND_BUILTIN_SAVEREGS
451 #define TARGET_EXPAND_BUILTIN_SAVEREGS sh_builtin_saveregs
452 #undef TARGET_SETUP_INCOMING_VARARGS
453 #define TARGET_SETUP_INCOMING_VARARGS sh_setup_incoming_varargs
454 #undef TARGET_STRICT_ARGUMENT_NAMING
455 #define TARGET_STRICT_ARGUMENT_NAMING sh_strict_argument_naming
456 #undef TARGET_PRETEND_OUTGOING_VARARGS_NAMED
457 #define TARGET_PRETEND_OUTGOING_VARARGS_NAMED sh_pretend_outgoing_varargs_named
458 #undef TARGET_MUST_PASS_IN_STACK
459 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
460 #undef TARGET_PASS_BY_REFERENCE
461 #define TARGET_PASS_BY_REFERENCE sh_pass_by_reference
462 #undef TARGET_CALLEE_COPIES
463 #define TARGET_CALLEE_COPIES sh_callee_copies
464 #undef TARGET_ARG_PARTIAL_BYTES
465 #define TARGET_ARG_PARTIAL_BYTES sh_arg_partial_bytes
467 #undef TARGET_BUILD_BUILTIN_VA_LIST
468 #define TARGET_BUILD_BUILTIN_VA_LIST sh_build_builtin_va_list
469 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
470 #define TARGET_GIMPLIFY_VA_ARG_EXPR sh_gimplify_va_arg_expr
472 #undef TARGET_VECTOR_MODE_SUPPORTED_P
473 #define TARGET_VECTOR_MODE_SUPPORTED_P sh_vector_mode_supported_p
475 #undef TARGET_CHECK_PCH_TARGET_FLAGS
476 #define TARGET_CHECK_PCH_TARGET_FLAGS sh_check_pch_target_flags
478 #undef TARGET_DWARF_CALLING_CONVENTION
479 #define TARGET_DWARF_CALLING_CONVENTION sh_dwarf_calling_convention
481 /* Return regmode weight for insn. */
482 #define INSN_REGMODE_WEIGHT(INSN, MODE) regmode_weight[((MODE) == SImode) ? 0 : 1][INSN_UID (INSN)]
484 /* Return current register pressure for regmode. */
485 #define CURR_REGMODE_PRESSURE(MODE) curr_regmode_pressure[((MODE) == SImode) ? 0 : 1]
487 #ifdef SYMBIAN
489 #undef TARGET_ENCODE_SECTION_INFO
490 #define TARGET_ENCODE_SECTION_INFO sh_symbian_encode_section_info
491 #undef TARGET_STRIP_NAME_ENCODING
492 #define TARGET_STRIP_NAME_ENCODING sh_symbian_strip_name_encoding
493 #undef TARGET_CXX_IMPORT_EXPORT_CLASS
494 #define TARGET_CXX_IMPORT_EXPORT_CLASS symbian_import_export_class
496 #endif /* SYMBIAN */
498 #ifdef TARGET_ADJUST_UNROLL_MAX
499 #undef TARGET_ADJUST_UNROLL_MAX
500 #define TARGET_ADJUST_UNROLL_MAX sh_adjust_unroll_max
501 #endif
503 struct gcc_target targetm = TARGET_INITIALIZER;
505 /* Implement TARGET_HANDLE_OPTION. */
507 static bool
508 sh_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED,
509 int value ATTRIBUTE_UNUSED)
511 switch (code)
513 case OPT_m1:
514 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH1;
515 return true;
517 case OPT_m2:
518 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2;
519 return true;
521 case OPT_m2a:
522 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2A;
523 return true;
525 case OPT_m2a_nofpu:
526 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2A_NOFPU;
527 return true;
529 case OPT_m2a_single:
530 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2A_SINGLE;
531 return true;
533 case OPT_m2a_single_only:
534 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2A_SINGLE_ONLY;
535 return true;
537 case OPT_m2e:
538 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2E;
539 return true;
541 case OPT_m3:
542 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH3;
543 return true;
545 case OPT_m3e:
546 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH3E;
547 return true;
549 case OPT_m4:
550 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4;
551 return true;
553 case OPT_m4_nofpu:
554 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4_NOFPU;
555 return true;
557 case OPT_m4_single:
558 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4_SINGLE;
559 return true;
561 case OPT_m4_single_only:
562 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4_SINGLE_ONLY;
563 return true;
565 case OPT_m4a:
566 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4A;
567 return true;
569 case OPT_m4a_nofpu:
570 case OPT_m4al:
571 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4A_NOFPU;
572 return true;
574 case OPT_m4a_single:
575 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4A_SINGLE;
576 return true;
578 case OPT_m4a_single_only:
579 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4A_SINGLE_ONLY;
580 return true;
582 case OPT_m5_32media:
583 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_32MEDIA;
584 return true;
586 case OPT_m5_32media_nofpu:
587 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_32MEDIA_NOFPU;
588 return true;
590 case OPT_m5_64media:
591 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_64MEDIA;
592 return true;
594 case OPT_m5_64media_nofpu:
595 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_64MEDIA_NOFPU;
596 return true;
598 case OPT_m5_compact:
599 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_COMPACT;
600 return true;
602 case OPT_m5_compact_nofpu:
603 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_COMPACT_NOFPU;
604 return true;
606 default:
607 return true;
611 /* Print the operand address in x to the stream. */
613 void
614 print_operand_address (FILE *stream, rtx x)
616 switch (GET_CODE (x))
618 case REG:
619 case SUBREG:
620 fprintf (stream, "@%s", reg_names[true_regnum (x)]);
621 break;
623 case PLUS:
625 rtx base = XEXP (x, 0);
626 rtx index = XEXP (x, 1);
628 switch (GET_CODE (index))
630 case CONST_INT:
631 fprintf (stream, "@(%d,%s)", (int) INTVAL (index),
632 reg_names[true_regnum (base)]);
633 break;
635 case REG:
636 case SUBREG:
638 int base_num = true_regnum (base);
639 int index_num = true_regnum (index);
641 fprintf (stream, "@(r0,%s)",
642 reg_names[MAX (base_num, index_num)]);
643 break;
646 default:
647 gcc_unreachable ();
650 break;
652 case PRE_DEC:
653 fprintf (stream, "@-%s", reg_names[true_regnum (XEXP (x, 0))]);
654 break;
656 case POST_INC:
657 fprintf (stream, "@%s+", reg_names[true_regnum (XEXP (x, 0))]);
658 break;
660 default:
661 x = mark_constant_pool_use (x);
662 output_addr_const (stream, x);
663 break;
667 /* Print operand x (an rtx) in assembler syntax to file stream
668 according to modifier code.
670 '.' print a .s if insn needs delay slot
671 ',' print LOCAL_LABEL_PREFIX
672 '@' print trap, rte or rts depending upon pragma interruptness
673 '#' output a nop if there is nothing to put in the delay slot
674 ''' print likelihood suffix (/u for unlikely).
675 '>' print branch target if -fverbose-asm
676 'O' print a constant without the #
677 'R' print the LSW of a dp value - changes if in little endian
678 'S' print the MSW of a dp value - changes if in little endian
679 'T' print the next word of a dp value - same as 'R' in big endian mode.
680 'M' print an `x' if `m' will print `base,index'.
681 'N' print 'r63' if the operand is (const_int 0).
682 'd' print a V2SF reg as dN instead of fpN.
683 'm' print a pair `base,offset' or `base,index', for LD and ST.
684 'U' Likewise for {LD,ST}{HI,LO}.
685 'u' prints the lowest 16 bits of CONST_INT, as an unsigned value.
686 'o' output an operator. */
688 void
689 print_operand (FILE *stream, rtx x, int code)
691 int regno;
692 enum machine_mode mode;
694 switch (code)
696 case '.':
697 if (final_sequence
698 && ! INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))
699 && get_attr_length (XVECEXP (final_sequence, 0, 1)))
700 fprintf (stream, ASSEMBLER_DIALECT ? "/s" : ".s");
701 break;
702 case ',':
703 fprintf (stream, "%s", LOCAL_LABEL_PREFIX);
704 break;
705 case '@':
706 if (trap_exit)
707 fprintf (stream, "trapa #%d", trap_exit);
708 else if (sh_cfun_interrupt_handler_p ())
709 fprintf (stream, "rte");
710 else
711 fprintf (stream, "rts");
712 break;
713 case '#':
714 /* Output a nop if there's nothing in the delay slot. */
715 if (dbr_sequence_length () == 0)
716 fprintf (stream, "\n\tnop");
717 break;
718 case '\'':
720 rtx note = find_reg_note (current_output_insn, REG_BR_PROB, 0);
722 if (note && INTVAL (XEXP (note, 0)) * 2 < REG_BR_PROB_BASE)
723 fputs ("/u", stream);
724 break;
726 case '>':
727 if (flag_verbose_asm && JUMP_LABEL (current_output_insn))
729 fputs ("\t! target: ", stream);
730 output_addr_const (stream, JUMP_LABEL (current_output_insn));
732 break;
733 case 'O':
734 x = mark_constant_pool_use (x);
735 output_addr_const (stream, x);
736 break;
737 /* N.B.: %R / %S / %T adjust memory addresses by four.
738 For SHMEDIA, that means they can be used to access the first and
739 second 32 bit part of a 64 bit (or larger) value that
740 might be held in floating point registers or memory.
741 While they can be used to access 64 bit parts of a larger value
742 held in general purpose registers, that won't work with memory -
743 neither for fp registers, since the frxx names are used. */
744 case 'R':
745 if (REG_P (x) || GET_CODE (x) == SUBREG)
747 regno = true_regnum (x);
748 regno += FP_REGISTER_P (regno) ? 1 : LSW;
749 fputs (reg_names[regno], (stream));
751 else if (MEM_P (x))
753 x = adjust_address (x, SImode, 4 * LSW);
754 print_operand_address (stream, XEXP (x, 0));
756 else
758 rtx sub = NULL_RTX;
760 mode = GET_MODE (x);
761 if (mode == VOIDmode)
762 mode = DImode;
763 if (GET_MODE_SIZE (mode) >= 8)
764 sub = simplify_subreg (SImode, x, mode, 4 * LSW);
765 if (sub)
766 print_operand (stream, sub, 0);
767 else
768 output_operand_lossage ("invalid operand to %%R");
770 break;
771 case 'S':
772 if (REG_P (x) || GET_CODE (x) == SUBREG)
774 regno = true_regnum (x);
775 regno += FP_REGISTER_P (regno) ? 0 : MSW;
776 fputs (reg_names[regno], (stream));
778 else if (MEM_P (x))
780 x = adjust_address (x, SImode, 4 * MSW);
781 print_operand_address (stream, XEXP (x, 0));
783 else
785 rtx sub = NULL_RTX;
787 mode = GET_MODE (x);
788 if (mode == VOIDmode)
789 mode = DImode;
790 if (GET_MODE_SIZE (mode) >= 8)
791 sub = simplify_subreg (SImode, x, mode, 4 * MSW);
792 if (sub)
793 print_operand (stream, sub, 0);
794 else
795 output_operand_lossage ("invalid operand to %%S");
797 break;
798 case 'T':
799 /* Next word of a double. */
800 switch (GET_CODE (x))
802 case REG:
803 fputs (reg_names[REGNO (x) + 1], (stream));
804 break;
805 case MEM:
806 if (GET_CODE (XEXP (x, 0)) != PRE_DEC
807 && GET_CODE (XEXP (x, 0)) != POST_INC)
808 x = adjust_address (x, SImode, 4);
809 print_operand_address (stream, XEXP (x, 0));
810 break;
811 default:
812 break;
814 break;
815 case 'o':
816 switch (GET_CODE (x))
818 case PLUS: fputs ("add", stream); break;
819 case MINUS: fputs ("sub", stream); break;
820 case MULT: fputs ("mul", stream); break;
821 case DIV: fputs ("div", stream); break;
822 case EQ: fputs ("eq", stream); break;
823 case NE: fputs ("ne", stream); break;
824 case GT: case LT: fputs ("gt", stream); break;
825 case GE: case LE: fputs ("ge", stream); break;
826 case GTU: case LTU: fputs ("gtu", stream); break;
827 case GEU: case LEU: fputs ("geu", stream); break;
828 default:
829 break;
831 break;
832 case 'M':
833 if (GET_CODE (x) == MEM
834 && GET_CODE (XEXP (x, 0)) == PLUS
835 && (GET_CODE (XEXP (XEXP (x, 0), 1)) == REG
836 || GET_CODE (XEXP (XEXP (x, 0), 1)) == SUBREG))
837 fputc ('x', stream);
838 break;
840 case 'm':
841 gcc_assert (GET_CODE (x) == MEM);
842 x = XEXP (x, 0);
843 /* Fall through. */
844 case 'U':
845 switch (GET_CODE (x))
847 case REG:
848 case SUBREG:
849 print_operand (stream, x, 0);
850 fputs (", 0", stream);
851 break;
853 case PLUS:
854 print_operand (stream, XEXP (x, 0), 0);
855 fputs (", ", stream);
856 print_operand (stream, XEXP (x, 1), 0);
857 break;
859 default:
860 gcc_unreachable ();
862 break;
864 case 'd':
865 gcc_assert (GET_CODE (x) == REG && GET_MODE (x) == V2SFmode);
867 fprintf ((stream), "d%s", reg_names[REGNO (x)] + 1);
868 break;
870 case 'N':
871 if (x == CONST0_RTX (GET_MODE (x)))
873 fprintf ((stream), "r63");
874 break;
876 goto default_output;
877 case 'u':
878 if (GET_CODE (x) == CONST_INT)
880 fprintf ((stream), "%u", (unsigned) INTVAL (x) & (0x10000 - 1));
881 break;
883 /* Fall through. */
885 default_output:
886 default:
887 regno = 0;
888 mode = GET_MODE (x);
890 switch (GET_CODE (x))
892 case TRUNCATE:
894 rtx inner = XEXP (x, 0);
895 int offset = 0;
896 enum machine_mode inner_mode;
898 /* We might see SUBREGs with vector mode registers inside. */
899 if (GET_CODE (inner) == SUBREG
900 && (GET_MODE_SIZE (GET_MODE (inner))
901 == GET_MODE_SIZE (GET_MODE (SUBREG_REG (inner))))
902 && subreg_lowpart_p (inner))
903 inner = SUBREG_REG (inner);
904 if (GET_CODE (inner) == CONST_INT)
906 x = GEN_INT (trunc_int_for_mode (INTVAL (inner), GET_MODE (x)));
907 goto default_output;
909 inner_mode = GET_MODE (inner);
910 if (GET_CODE (inner) == SUBREG
911 && (GET_MODE_SIZE (GET_MODE (inner))
912 < GET_MODE_SIZE (GET_MODE (SUBREG_REG (inner))))
913 && GET_CODE (SUBREG_REG (inner)) == REG)
915 offset = subreg_regno_offset (REGNO (SUBREG_REG (inner)),
916 GET_MODE (SUBREG_REG (inner)),
917 SUBREG_BYTE (inner),
918 GET_MODE (inner));
919 inner = SUBREG_REG (inner);
921 if (GET_CODE (inner) != REG || GET_MODE_SIZE (inner_mode) > 8)
922 abort ();
923 /* Floating point register pairs are always big endian;
924 general purpose registers are 64 bit wide. */
925 regno = REGNO (inner);
926 regno = (HARD_REGNO_NREGS (regno, inner_mode)
927 - HARD_REGNO_NREGS (regno, mode))
928 + offset;
929 x = inner;
930 goto reg;
932 case SIGN_EXTEND:
933 x = XEXP (x, 0);
934 goto reg;
935 /* FIXME: We need this on SHmedia32 because reload generates
936 some sign-extended HI or QI loads into DImode registers
937 but, because Pmode is SImode, the address ends up with a
938 subreg:SI of the DImode register. Maybe reload should be
939 fixed so as to apply alter_subreg to such loads? */
940 case IF_THEN_ELSE:
941 gcc_assert (trapping_target_operand (x, VOIDmode));
942 x = XEXP (XEXP (x, 2), 0);
943 goto default_output;
944 case SUBREG:
945 gcc_assert (SUBREG_BYTE (x) == 0
946 && GET_CODE (SUBREG_REG (x)) == REG);
948 x = SUBREG_REG (x);
949 /* Fall through. */
951 reg:
952 case REG:
953 regno += REGNO (x);
954 if (FP_REGISTER_P (regno)
955 && mode == V16SFmode)
956 fprintf ((stream), "mtrx%s", reg_names[regno] + 2);
957 else if (FP_REGISTER_P (REGNO (x))
958 && mode == V4SFmode)
959 fprintf ((stream), "fv%s", reg_names[regno] + 2);
960 else if (GET_CODE (x) == REG
961 && mode == V2SFmode)
962 fprintf ((stream), "fp%s", reg_names[regno] + 2);
963 else if (FP_REGISTER_P (REGNO (x))
964 && GET_MODE_SIZE (mode) > 4)
965 fprintf ((stream), "d%s", reg_names[regno] + 1);
966 else
967 fputs (reg_names[regno], (stream));
968 break;
970 case MEM:
971 output_address (XEXP (x, 0));
972 break;
974 case CONST:
975 if (TARGET_SHMEDIA
976 && GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
977 && (GET_MODE (XEXP (x, 0)) == DImode
978 || GET_MODE (XEXP (x, 0)) == SImode)
979 && GET_CODE (XEXP (XEXP (x, 0), 0)) == TRUNCATE
980 && GET_MODE (XEXP (XEXP (x, 0), 0)) == HImode)
982 rtx val = XEXP (XEXP (XEXP (x, 0), 0), 0);
984 fputc ('(', stream);
985 if (GET_CODE (val) == ASHIFTRT)
987 fputc ('(', stream);
988 if (GET_CODE (XEXP (val, 0)) == CONST)
989 fputc ('(', stream);
990 output_addr_const (stream, XEXP (val, 0));
991 if (GET_CODE (XEXP (val, 0)) == CONST)
992 fputc (')', stream);
993 fputs (" >> ", stream);
994 output_addr_const (stream, XEXP (val, 1));
995 fputc (')', stream);
997 else
999 if (GET_CODE (val) == CONST)
1000 fputc ('(', stream);
1001 output_addr_const (stream, val);
1002 if (GET_CODE (val) == CONST)
1003 fputc (')', stream);
1005 fputs (" & 65535)", stream);
1006 break;
1009 /* Fall through. */
1010 default:
1011 if (TARGET_SH1)
1012 fputc ('#', stream);
1013 output_addr_const (stream, x);
1014 break;
1016 break;
1020 /* Like force_operand, but guarantees that VALUE ends up in TARGET. */
1021 static void
1022 force_into (rtx value, rtx target)
1024 value = force_operand (value, target);
1025 if (! rtx_equal_p (value, target))
1026 emit_insn (gen_move_insn (target, value));
1029 /* Emit code to perform a block move. Choose the best method.
1031 OPERANDS[0] is the destination.
1032 OPERANDS[1] is the source.
1033 OPERANDS[2] is the size.
1034 OPERANDS[3] is the alignment safe to use. */
1037 expand_block_move (rtx *operands)
1039 int align = INTVAL (operands[3]);
1040 int constp = (GET_CODE (operands[2]) == CONST_INT);
1041 int bytes = (constp ? INTVAL (operands[2]) : 0);
1043 if (! constp)
1044 return 0;
1046 /* If we could use mov.l to move words and dest is word-aligned, we
1047 can use movua.l for loads and still generate a relatively short
1048 and efficient sequence. */
1049 if (TARGET_SH4A_ARCH && align < 4
1050 && MEM_ALIGN (operands[0]) >= 32
1051 && can_move_by_pieces (bytes, 32))
1053 rtx dest = copy_rtx (operands[0]);
1054 rtx src = copy_rtx (operands[1]);
1055 /* We could use different pseudos for each copied word, but
1056 since movua can only load into r0, it's kind of
1057 pointless. */
1058 rtx temp = gen_reg_rtx (SImode);
1059 rtx src_addr = copy_addr_to_reg (XEXP (src, 0));
1060 int copied = 0;
1062 while (copied + 4 <= bytes)
1064 rtx to = adjust_address (dest, SImode, copied);
1065 rtx from = adjust_automodify_address (src, SImode, src_addr, copied);
1067 emit_insn (gen_movua (temp, from));
1068 emit_move_insn (src_addr, plus_constant (src_addr, 4));
1069 emit_move_insn (to, temp);
1070 copied += 4;
1073 if (copied < bytes)
1074 move_by_pieces (adjust_address (dest, BLKmode, copied),
1075 adjust_automodify_address (src, BLKmode,
1076 src_addr, copied),
1077 bytes - copied, align, 0);
1079 return 1;
1082 /* If it isn't a constant number of bytes, or if it doesn't have 4 byte
1083 alignment, or if it isn't a multiple of 4 bytes, then fail. */
1084 if (align < 4 || (bytes % 4 != 0))
1085 return 0;
1087 if (TARGET_HARD_SH4)
1089 if (bytes < 12)
1090 return 0;
1091 else if (bytes == 12)
1093 rtx func_addr_rtx = gen_reg_rtx (Pmode);
1094 rtx r4 = gen_rtx_REG (SImode, 4);
1095 rtx r5 = gen_rtx_REG (SImode, 5);
1097 function_symbol (func_addr_rtx, "__movmemSI12_i4", SFUNC_STATIC);
1098 force_into (XEXP (operands[0], 0), r4);
1099 force_into (XEXP (operands[1], 0), r5);
1100 emit_insn (gen_block_move_real_i4 (func_addr_rtx));
1101 return 1;
1103 else if (! TARGET_SMALLCODE)
1105 const char *entry_name;
1106 rtx func_addr_rtx = gen_reg_rtx (Pmode);
1107 int dwords;
1108 rtx r4 = gen_rtx_REG (SImode, 4);
1109 rtx r5 = gen_rtx_REG (SImode, 5);
1110 rtx r6 = gen_rtx_REG (SImode, 6);
1112 entry_name = (bytes & 4 ? "__movmem_i4_odd" : "__movmem_i4_even");
1113 function_symbol (func_addr_rtx, entry_name, SFUNC_STATIC);
1114 force_into (XEXP (operands[0], 0), r4);
1115 force_into (XEXP (operands[1], 0), r5);
1117 dwords = bytes >> 3;
1118 emit_insn (gen_move_insn (r6, GEN_INT (dwords - 1)));
1119 emit_insn (gen_block_lump_real_i4 (func_addr_rtx));
1120 return 1;
1122 else
1123 return 0;
1125 if (bytes < 64)
1127 char entry[30];
1128 rtx func_addr_rtx = gen_reg_rtx (Pmode);
1129 rtx r4 = gen_rtx_REG (SImode, 4);
1130 rtx r5 = gen_rtx_REG (SImode, 5);
1132 sprintf (entry, "__movmemSI%d", bytes);
1133 function_symbol (func_addr_rtx, entry, SFUNC_STATIC);
1134 force_into (XEXP (operands[0], 0), r4);
1135 force_into (XEXP (operands[1], 0), r5);
1136 emit_insn (gen_block_move_real (func_addr_rtx));
1137 return 1;
1140 /* This is the same number of bytes as a memcpy call, but to a different
1141 less common function name, so this will occasionally use more space. */
1142 if (! TARGET_SMALLCODE)
1144 rtx func_addr_rtx = gen_reg_rtx (Pmode);
1145 int final_switch, while_loop;
1146 rtx r4 = gen_rtx_REG (SImode, 4);
1147 rtx r5 = gen_rtx_REG (SImode, 5);
1148 rtx r6 = gen_rtx_REG (SImode, 6);
1150 function_symbol (func_addr_rtx, "__movmem", SFUNC_STATIC);
1151 force_into (XEXP (operands[0], 0), r4);
1152 force_into (XEXP (operands[1], 0), r5);
1154 /* r6 controls the size of the move. 16 is decremented from it
1155 for each 64 bytes moved. Then the negative bit left over is used
1156 as an index into a list of move instructions. e.g., a 72 byte move
1157 would be set up with size(r6) = 14, for one iteration through the
1158 big while loop, and a switch of -2 for the last part. */
1160 final_switch = 16 - ((bytes / 4) % 16);
1161 while_loop = ((bytes / 4) / 16 - 1) * 16;
1162 emit_insn (gen_move_insn (r6, GEN_INT (while_loop + final_switch)));
1163 emit_insn (gen_block_lump_real (func_addr_rtx));
1164 return 1;
1167 return 0;
1170 /* Prepare operands for a move define_expand; specifically, one of the
1171 operands must be in a register. */
1174 prepare_move_operands (rtx operands[], enum machine_mode mode)
1176 if ((mode == SImode || mode == DImode)
1177 && flag_pic
1178 && ! ((mode == Pmode || mode == ptr_mode)
1179 && tls_symbolic_operand (operands[1], Pmode) != 0))
1181 rtx temp;
1182 if (SYMBOLIC_CONST_P (operands[1]))
1184 if (GET_CODE (operands[0]) == MEM)
1185 operands[1] = force_reg (Pmode, operands[1]);
1186 else if (TARGET_SHMEDIA
1187 && GET_CODE (operands[1]) == LABEL_REF
1188 && target_reg_operand (operands[0], mode))
1189 /* It's ok. */;
1190 else
1192 temp = no_new_pseudos ? operands[0] : gen_reg_rtx (Pmode);
1193 operands[1] = legitimize_pic_address (operands[1], mode, temp);
1196 else if (GET_CODE (operands[1]) == CONST
1197 && GET_CODE (XEXP (operands[1], 0)) == PLUS
1198 && SYMBOLIC_CONST_P (XEXP (XEXP (operands[1], 0), 0)))
1200 temp = no_new_pseudos ? operands[0] : gen_reg_rtx (Pmode);
1201 temp = legitimize_pic_address (XEXP (XEXP (operands[1], 0), 0),
1202 mode, temp);
1203 operands[1] = expand_binop (mode, add_optab, temp,
1204 XEXP (XEXP (operands[1], 0), 1),
1205 no_new_pseudos ? temp
1206 : gen_reg_rtx (Pmode),
1207 0, OPTAB_LIB_WIDEN);
1211 if (! reload_in_progress && ! reload_completed)
1213 /* Copy the source to a register if both operands aren't registers. */
1214 if (! register_operand (operands[0], mode)
1215 && ! sh_register_operand (operands[1], mode))
1216 operands[1] = copy_to_mode_reg (mode, operands[1]);
1218 if (GET_CODE (operands[0]) == MEM && ! memory_operand (operands[0], mode))
1220 /* This is like change_address_1 (operands[0], mode, 0, 1) ,
1221 except that we can't use that function because it is static. */
1222 rtx new = change_address (operands[0], mode, 0);
1223 MEM_COPY_ATTRIBUTES (new, operands[0]);
1224 operands[0] = new;
1227 /* This case can happen while generating code to move the result
1228 of a library call to the target. Reject `st r0,@(rX,rY)' because
1229 reload will fail to find a spill register for rX, since r0 is already
1230 being used for the source. */
1231 else if (TARGET_SH1
1232 && refers_to_regno_p (R0_REG, R0_REG + 1, operands[1], (rtx *)0)
1233 && GET_CODE (operands[0]) == MEM
1234 && GET_CODE (XEXP (operands[0], 0)) == PLUS
1235 && GET_CODE (XEXP (XEXP (operands[0], 0), 1)) == REG)
1236 operands[1] = copy_to_mode_reg (mode, operands[1]);
1239 if (mode == Pmode || mode == ptr_mode)
1241 rtx op0, op1, opc;
1242 enum tls_model tls_kind;
1244 op0 = operands[0];
1245 op1 = operands[1];
1246 if (GET_CODE (op1) == CONST
1247 && GET_CODE (XEXP (op1, 0)) == PLUS
1248 && tls_symbolic_operand (XEXP (XEXP (op1, 0), 0), Pmode))
1250 opc = XEXP (XEXP (op1, 0), 1);
1251 op1 = XEXP (XEXP (op1, 0), 0);
1253 else
1254 opc = NULL_RTX;
1256 if ((tls_kind = tls_symbolic_operand (op1, Pmode)))
1258 rtx tga_op1, tga_ret, tmp, tmp2;
1260 switch (tls_kind)
1262 case TLS_MODEL_GLOBAL_DYNAMIC:
1263 tga_ret = gen_rtx_REG (Pmode, R0_REG);
1264 emit_call_insn (gen_tls_global_dynamic (tga_ret, op1));
1265 op1 = tga_ret;
1266 break;
1268 case TLS_MODEL_LOCAL_DYNAMIC:
1269 tga_ret = gen_rtx_REG (Pmode, R0_REG);
1270 emit_call_insn (gen_tls_local_dynamic (tga_ret, op1));
1272 tmp = gen_reg_rtx (Pmode);
1273 emit_move_insn (tmp, tga_ret);
1275 if (register_operand (op0, Pmode))
1276 tmp2 = op0;
1277 else
1278 tmp2 = gen_reg_rtx (Pmode);
1280 emit_insn (gen_symDTPOFF2reg (tmp2, op1, tmp));
1281 op1 = tmp2;
1282 break;
1284 case TLS_MODEL_INITIAL_EXEC:
1285 if (! flag_pic)
1287 /* Don't schedule insns for getting GOT address when
1288 the first scheduling is enabled, to avoid spill
1289 failures for R0. */
1290 if (flag_schedule_insns)
1291 emit_insn (gen_blockage ());
1292 emit_insn (gen_GOTaddr2picreg ());
1293 emit_insn (gen_rtx_USE (VOIDmode, gen_rtx_REG (SImode,
1294 PIC_REG)));
1295 if (flag_schedule_insns)
1296 emit_insn (gen_blockage ());
1298 tga_op1 = no_new_pseudos ? op0 : gen_reg_rtx (Pmode);
1299 tmp = gen_sym2GOTTPOFF (op1);
1300 emit_insn (gen_tls_initial_exec (tga_op1, tmp));
1301 op1 = tga_op1;
1302 break;
1304 case TLS_MODEL_LOCAL_EXEC:
1305 tmp2 = gen_reg_rtx (Pmode);
1306 emit_insn (gen_load_gbr (tmp2));
1307 tmp = gen_reg_rtx (Pmode);
1308 emit_insn (gen_symTPOFF2reg (tmp, op1));
1310 if (register_operand (op0, Pmode))
1311 op1 = op0;
1312 else
1313 op1 = gen_reg_rtx (Pmode);
1315 emit_insn (gen_addsi3 (op1, tmp, tmp2));
1316 break;
1318 default:
1319 gcc_unreachable ();
1321 if (opc)
1322 emit_insn (gen_addsi3 (op1, op1, force_reg (SImode, opc)));
1323 operands[1] = op1;
1327 return 0;
1330 /* Prepare the operands for an scc instruction; make sure that the
1331 compare has been done. */
1333 prepare_scc_operands (enum rtx_code code)
1335 rtx t_reg = gen_rtx_REG (SImode, T_REG);
1336 enum rtx_code oldcode = code;
1337 enum machine_mode mode;
1339 /* First need a compare insn. */
1340 switch (code)
1342 case NE:
1343 /* It isn't possible to handle this case. */
1344 gcc_unreachable ();
1345 case LT:
1346 code = GT;
1347 break;
1348 case LE:
1349 code = GE;
1350 break;
1351 case LTU:
1352 code = GTU;
1353 break;
1354 case LEU:
1355 code = GEU;
1356 break;
1357 default:
1358 break;
1360 if (code != oldcode)
1362 rtx tmp = sh_compare_op0;
1363 sh_compare_op0 = sh_compare_op1;
1364 sh_compare_op1 = tmp;
1367 mode = GET_MODE (sh_compare_op0);
1368 if (mode == VOIDmode)
1369 mode = GET_MODE (sh_compare_op1);
1371 sh_compare_op0 = force_reg (mode, sh_compare_op0);
1372 if ((code != EQ && code != NE
1373 && (sh_compare_op1 != const0_rtx
1374 || code == GTU || code == GEU || code == LTU || code == LEU))
1375 || (mode == DImode && sh_compare_op1 != const0_rtx)
1376 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
1377 sh_compare_op1 = force_reg (mode, sh_compare_op1);
1379 if ((TARGET_SH4 || TARGET_SH2A) && GET_MODE_CLASS (mode) == MODE_FLOAT)
1380 (mode == SFmode ? emit_sf_insn : emit_df_insn)
1381 (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2,
1382 gen_rtx_SET (VOIDmode, t_reg,
1383 gen_rtx_fmt_ee (code, SImode,
1384 sh_compare_op0, sh_compare_op1)),
1385 gen_rtx_USE (VOIDmode, get_fpscr_rtx ()))));
1386 else
1387 emit_insn (gen_rtx_SET (VOIDmode, t_reg,
1388 gen_rtx_fmt_ee (code, SImode,
1389 sh_compare_op0, sh_compare_op1)));
1391 return t_reg;
1394 /* Called from the md file, set up the operands of a compare instruction. */
1396 void
1397 from_compare (rtx *operands, int code)
1399 enum machine_mode mode = GET_MODE (sh_compare_op0);
1400 rtx insn;
1401 if (mode == VOIDmode)
1402 mode = GET_MODE (sh_compare_op1);
1403 if (code != EQ
1404 || mode == DImode
1405 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
1407 /* Force args into regs, since we can't use constants here. */
1408 sh_compare_op0 = force_reg (mode, sh_compare_op0);
1409 if (sh_compare_op1 != const0_rtx
1410 || code == GTU || code == GEU
1411 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
1412 sh_compare_op1 = force_reg (mode, sh_compare_op1);
1414 if (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT && code == GE)
1416 from_compare (operands, GT);
1417 insn = gen_ieee_ccmpeqsf_t (sh_compare_op0, sh_compare_op1);
1419 else
1420 insn = gen_rtx_SET (VOIDmode,
1421 gen_rtx_REG (SImode, T_REG),
1422 gen_rtx_fmt_ee (code, SImode,
1423 sh_compare_op0, sh_compare_op1));
1424 if ((TARGET_SH4 || TARGET_SH2A) && GET_MODE_CLASS (mode) == MODE_FLOAT)
1426 insn = gen_rtx_PARALLEL (VOIDmode,
1427 gen_rtvec (2, insn,
1428 gen_rtx_USE (VOIDmode, get_fpscr_rtx ())));
1429 (mode == SFmode ? emit_sf_insn : emit_df_insn) (insn);
1431 else
1432 emit_insn (insn);
1435 /* Functions to output assembly code. */
1437 /* Return a sequence of instructions to perform DI or DF move.
1439 Since the SH cannot move a DI or DF in one instruction, we have
1440 to take care when we see overlapping source and dest registers. */
1442 const char *
1443 output_movedouble (rtx insn ATTRIBUTE_UNUSED, rtx operands[],
1444 enum machine_mode mode)
1446 rtx dst = operands[0];
1447 rtx src = operands[1];
1449 if (GET_CODE (dst) == MEM
1450 && GET_CODE (XEXP (dst, 0)) == PRE_DEC)
1451 return "mov.l %T1,%0\n\tmov.l %1,%0";
1453 if (register_operand (dst, mode)
1454 && register_operand (src, mode))
1456 if (REGNO (src) == MACH_REG)
1457 return "sts mach,%S0\n\tsts macl,%R0";
1459 /* When mov.d r1,r2 do r2->r3 then r1->r2;
1460 when mov.d r1,r0 do r1->r0 then r2->r1. */
1462 if (REGNO (src) + 1 == REGNO (dst))
1463 return "mov %T1,%T0\n\tmov %1,%0";
1464 else
1465 return "mov %1,%0\n\tmov %T1,%T0";
1467 else if (GET_CODE (src) == CONST_INT)
1469 if (INTVAL (src) < 0)
1470 output_asm_insn ("mov #-1,%S0", operands);
1471 else
1472 output_asm_insn ("mov #0,%S0", operands);
1474 return "mov %1,%R0";
1476 else if (GET_CODE (src) == MEM)
1478 int ptrreg = -1;
1479 int dreg = REGNO (dst);
1480 rtx inside = XEXP (src, 0);
1482 switch (GET_CODE (inside))
1484 case REG:
1485 ptrreg = REGNO (inside);
1486 break;
1488 case SUBREG:
1489 ptrreg = subreg_regno (inside);
1490 break;
1492 case PLUS:
1493 ptrreg = REGNO (XEXP (inside, 0));
1494 /* ??? A r0+REG address shouldn't be possible here, because it isn't
1495 an offsettable address. Unfortunately, offsettable addresses use
1496 QImode to check the offset, and a QImode offsettable address
1497 requires r0 for the other operand, which is not currently
1498 supported, so we can't use the 'o' constraint.
1499 Thus we must check for and handle r0+REG addresses here.
1500 We punt for now, since this is likely very rare. */
1501 gcc_assert (GET_CODE (XEXP (inside, 1)) != REG);
1502 break;
1504 case LABEL_REF:
1505 return "mov.l %1,%0\n\tmov.l %1+4,%T0";
1506 case POST_INC:
1507 return "mov.l %1,%0\n\tmov.l %1,%T0";
1508 default:
1509 gcc_unreachable ();
1512 /* Work out the safe way to copy. Copy into the second half first. */
1513 if (dreg == ptrreg)
1514 return "mov.l %T1,%T0\n\tmov.l %1,%0";
1517 return "mov.l %1,%0\n\tmov.l %T1,%T0";
1520 /* Print an instruction which would have gone into a delay slot after
1521 another instruction, but couldn't because the other instruction expanded
1522 into a sequence where putting the slot insn at the end wouldn't work. */
1524 static void
1525 print_slot (rtx insn)
1527 final_scan_insn (XVECEXP (insn, 0, 1), asm_out_file, optimize, 1, NULL);
1529 INSN_DELETED_P (XVECEXP (insn, 0, 1)) = 1;
1532 const char *
1533 output_far_jump (rtx insn, rtx op)
1535 struct { rtx lab, reg, op; } this;
1536 rtx braf_base_lab = NULL_RTX;
1537 const char *jump;
1538 int far;
1539 int offset = branch_dest (insn) - INSN_ADDRESSES (INSN_UID (insn));
1540 rtx prev;
1542 this.lab = gen_label_rtx ();
1544 if (TARGET_SH2
1545 && offset >= -32764
1546 && offset - get_attr_length (insn) <= 32766)
1548 far = 0;
1549 jump = "mov.w %O0,%1; braf %1";
1551 else
1553 far = 1;
1554 if (flag_pic)
1556 if (TARGET_SH2)
1557 jump = "mov.l %O0,%1; braf %1";
1558 else
1559 jump = "mov.l r0,@-r15; mova %O0,r0; mov.l @r0,%1; add r0,%1; mov.l @r15+,r0; jmp @%1";
1561 else
1562 jump = "mov.l %O0,%1; jmp @%1";
1564 /* If we have a scratch register available, use it. */
1565 if (GET_CODE ((prev = prev_nonnote_insn (insn))) == INSN
1566 && INSN_CODE (prev) == CODE_FOR_indirect_jump_scratch)
1568 this.reg = SET_DEST (XVECEXP (PATTERN (prev), 0, 0));
1569 if (REGNO (this.reg) == R0_REG && flag_pic && ! TARGET_SH2)
1570 jump = "mov.l r1,@-r15; mova %O0,r0; mov.l @r0,r1; add r1,r0; mov.l @r15+,r1; jmp @%1";
1571 output_asm_insn (jump, &this.lab);
1572 if (dbr_sequence_length ())
1573 print_slot (final_sequence);
1574 else
1575 output_asm_insn ("nop", 0);
1577 else
1579 /* Output the delay slot insn first if any. */
1580 if (dbr_sequence_length ())
1581 print_slot (final_sequence);
1583 this.reg = gen_rtx_REG (SImode, 13);
1584 /* We must keep the stack aligned to 8-byte boundaries on SH5.
1585 Fortunately, MACL is fixed and call-clobbered, and we never
1586 need its value across jumps, so save r13 in it instead of in
1587 the stack. */
1588 if (TARGET_SH5)
1589 output_asm_insn ("lds r13, macl", 0);
1590 else
1591 output_asm_insn ("mov.l r13,@-r15", 0);
1592 output_asm_insn (jump, &this.lab);
1593 if (TARGET_SH5)
1594 output_asm_insn ("sts macl, r13", 0);
1595 else
1596 output_asm_insn ("mov.l @r15+,r13", 0);
1598 if (far && flag_pic && TARGET_SH2)
1600 braf_base_lab = gen_label_rtx ();
1601 (*targetm.asm_out.internal_label) (asm_out_file, "L",
1602 CODE_LABEL_NUMBER (braf_base_lab));
1604 if (far)
1605 output_asm_insn (".align 2", 0);
1606 (*targetm.asm_out.internal_label) (asm_out_file, "L", CODE_LABEL_NUMBER (this.lab));
1607 this.op = op;
1608 if (far && flag_pic)
1610 if (TARGET_SH2)
1611 this.lab = braf_base_lab;
1612 output_asm_insn (".long %O2-%O0", &this.lab);
1614 else
1615 output_asm_insn (far ? ".long %O2" : ".word %O2-%O0", &this.lab);
1616 return "";
1619 /* Local label counter, used for constants in the pool and inside
1620 pattern branches. */
1622 static int lf = 100;
1624 /* Output code for ordinary branches. */
1626 const char *
1627 output_branch (int logic, rtx insn, rtx *operands)
1629 switch (get_attr_length (insn))
1631 case 6:
1632 /* This can happen if filling the delay slot has caused a forward
1633 branch to exceed its range (we could reverse it, but only
1634 when we know we won't overextend other branches; this should
1635 best be handled by relaxation).
1636 It can also happen when other condbranches hoist delay slot insn
1637 from their destination, thus leading to code size increase.
1638 But the branch will still be in the range -4092..+4098 bytes. */
1640 if (! TARGET_RELAX)
1642 int label = lf++;
1643 /* The call to print_slot will clobber the operands. */
1644 rtx op0 = operands[0];
1646 /* If the instruction in the delay slot is annulled (true), then
1647 there is no delay slot where we can put it now. The only safe
1648 place for it is after the label. final will do that by default. */
1650 if (final_sequence
1651 && ! INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))
1652 && get_attr_length (XVECEXP (final_sequence, 0, 1)))
1654 asm_fprintf (asm_out_file, "\tb%s%ss\t%LLF%d\n", logic ? "f" : "t",
1655 ASSEMBLER_DIALECT ? "/" : ".", label);
1656 print_slot (final_sequence);
1658 else
1659 asm_fprintf (asm_out_file, "\tb%s\t%LLF%d\n", logic ? "f" : "t", label);
1661 output_asm_insn ("bra\t%l0", &op0);
1662 fprintf (asm_out_file, "\tnop\n");
1663 (*targetm.asm_out.internal_label) (asm_out_file, "LF", label);
1665 return "";
1667 /* When relaxing, handle this like a short branch. The linker
1668 will fix it up if it still doesn't fit after relaxation. */
1669 case 2:
1670 return logic ? "bt%.\t%l0" : "bf%.\t%l0";
1672 /* These are for SH2e, in which we have to account for the
1673 extra nop because of the hardware bug in annulled branches. */
1674 case 8:
1675 if (! TARGET_RELAX)
1677 int label = lf++;
1679 gcc_assert (!final_sequence
1680 || !(INSN_ANNULLED_BRANCH_P
1681 (XVECEXP (final_sequence, 0, 0))));
1682 asm_fprintf (asm_out_file, "b%s%ss\t%LLF%d\n",
1683 logic ? "f" : "t",
1684 ASSEMBLER_DIALECT ? "/" : ".", label);
1685 fprintf (asm_out_file, "\tnop\n");
1686 output_asm_insn ("bra\t%l0", operands);
1687 fprintf (asm_out_file, "\tnop\n");
1688 (*targetm.asm_out.internal_label) (asm_out_file, "LF", label);
1690 return "";
1692 /* When relaxing, fall through. */
1693 case 4:
1695 char buffer[10];
1697 sprintf (buffer, "b%s%ss\t%%l0",
1698 logic ? "t" : "f",
1699 ASSEMBLER_DIALECT ? "/" : ".");
1700 output_asm_insn (buffer, &operands[0]);
1701 return "nop";
1704 default:
1705 /* There should be no longer branches now - that would
1706 indicate that something has destroyed the branches set
1707 up in machine_dependent_reorg. */
1708 gcc_unreachable ();
1712 const char *
1713 output_branchy_insn (enum rtx_code code, const char *template,
1714 rtx insn, rtx *operands)
1716 rtx next_insn = NEXT_INSN (insn);
1718 if (next_insn && GET_CODE (next_insn) == JUMP_INSN && condjump_p (next_insn))
1720 rtx src = SET_SRC (PATTERN (next_insn));
1721 if (GET_CODE (src) == IF_THEN_ELSE && GET_CODE (XEXP (src, 0)) != code)
1723 /* Following branch not taken */
1724 operands[9] = gen_label_rtx ();
1725 emit_label_after (operands[9], next_insn);
1726 INSN_ADDRESSES_NEW (operands[9],
1727 INSN_ADDRESSES (INSN_UID (next_insn))
1728 + get_attr_length (next_insn));
1729 return template;
1731 else
1733 int offset = (branch_dest (next_insn)
1734 - INSN_ADDRESSES (INSN_UID (next_insn)) + 4);
1735 if (offset >= -252 && offset <= 258)
1737 if (GET_CODE (src) == IF_THEN_ELSE)
1738 /* branch_true */
1739 src = XEXP (src, 1);
1740 operands[9] = src;
1741 return template;
1745 operands[9] = gen_label_rtx ();
1746 emit_label_after (operands[9], insn);
1747 INSN_ADDRESSES_NEW (operands[9],
1748 INSN_ADDRESSES (INSN_UID (insn))
1749 + get_attr_length (insn));
1750 return template;
1753 const char *
1754 output_ieee_ccmpeq (rtx insn, rtx *operands)
1756 return output_branchy_insn (NE, "bt\t%l9\n\tfcmp/eq\t%1,%0",
1757 insn, operands);
1760 /* Output the start of the assembler file. */
1762 static void
1763 sh_file_start (void)
1765 default_file_start ();
1767 #ifdef SYMBIAN
1768 /* Declare the .directive section before it is used. */
1769 fputs ("\t.section .directive, \"SM\", @progbits, 1\n", asm_out_file);
1770 fputs ("\t.asciz \"#<SYMEDIT>#\\n\"\n", asm_out_file);
1771 #endif
1773 if (TARGET_ELF)
1774 /* We need to show the text section with the proper
1775 attributes as in TEXT_SECTION_ASM_OP, before dwarf2out
1776 emits it without attributes in TEXT_SECTION_ASM_OP, else GAS
1777 will complain. We can teach GAS specifically about the
1778 default attributes for our choice of text section, but
1779 then we would have to change GAS again if/when we change
1780 the text section name. */
1781 fprintf (asm_out_file, "%s\n", TEXT_SECTION_ASM_OP);
1782 else
1783 /* Switch to the data section so that the coffsem symbol
1784 isn't in the text section. */
1785 data_section ();
1787 if (TARGET_LITTLE_ENDIAN)
1788 fputs ("\t.little\n", asm_out_file);
1790 if (!TARGET_ELF)
1792 if (TARGET_SHCOMPACT)
1793 fputs ("\t.mode\tSHcompact\n", asm_out_file);
1794 else if (TARGET_SHMEDIA)
1795 fprintf (asm_out_file, "\t.mode\tSHmedia\n\t.abi\t%i\n",
1796 TARGET_SHMEDIA64 ? 64 : 32);
1800 /* Check if PAT includes UNSPEC_CALLER unspec pattern. */
1802 static bool
1803 unspec_caller_rtx_p (rtx pat)
1805 switch (GET_CODE (pat))
1807 case CONST:
1808 return unspec_caller_rtx_p (XEXP (pat, 0));
1809 case PLUS:
1810 case MINUS:
1811 if (unspec_caller_rtx_p (XEXP (pat, 0)))
1812 return true;
1813 return unspec_caller_rtx_p (XEXP (pat, 1));
1814 case UNSPEC:
1815 if (XINT (pat, 1) == UNSPEC_CALLER)
1816 return true;
1817 default:
1818 break;
1821 return false;
1824 /* Indicate that INSN cannot be duplicated. This is true for insn
1825 that generates a unique label. */
1827 static bool
1828 sh_cannot_copy_insn_p (rtx insn)
1830 rtx pat;
1832 if (!reload_completed || !flag_pic)
1833 return false;
1835 if (GET_CODE (insn) != INSN)
1836 return false;
1837 if (asm_noperands (insn) >= 0)
1838 return false;
1840 pat = PATTERN (insn);
1841 if (GET_CODE (pat) != SET)
1842 return false;
1843 pat = SET_SRC (pat);
1845 if (unspec_caller_rtx_p (pat))
1846 return true;
1848 return false;
1851 /* Actual number of instructions used to make a shift by N. */
1852 static const char ashiftrt_insns[] =
1853 { 0,1,2,3,4,5,8,8,8,8,8,8,8,8,8,8,2,3,4,5,8,8,8,8,8,8,8,8,8,8,8,2};
1855 /* Left shift and logical right shift are the same. */
1856 static const char shift_insns[] =
1857 { 0,1,1,2,2,3,3,4,1,2,2,3,3,4,3,3,1,2,2,3,3,4,3,3,2,3,3,4,4,4,3,3};
1859 /* Individual shift amounts needed to get the above length sequences.
1860 One bit right shifts clobber the T bit, so when possible, put one bit
1861 shifts in the middle of the sequence, so the ends are eligible for
1862 branch delay slots. */
1863 static const short shift_amounts[32][5] = {
1864 {0}, {1}, {2}, {2, 1},
1865 {2, 2}, {2, 1, 2}, {2, 2, 2}, {2, 2, 1, 2},
1866 {8}, {8, 1}, {8, 2}, {8, 1, 2},
1867 {8, 2, 2}, {8, 2, 1, 2}, {8, -2, 8}, {8, -1, 8},
1868 {16}, {16, 1}, {16, 2}, {16, 1, 2},
1869 {16, 2, 2}, {16, 2, 1, 2}, {16, -2, 8}, {16, -1, 8},
1870 {16, 8}, {16, 1, 8}, {16, 8, 2}, {16, 8, 1, 2},
1871 {16, 8, 2, 2}, {16, -1, -2, 16}, {16, -2, 16}, {16, -1, 16}};
1873 /* Likewise, but for shift amounts < 16, up to three highmost bits
1874 might be clobbered. This is typically used when combined with some
1875 kind of sign or zero extension. */
1877 static const char ext_shift_insns[] =
1878 { 0,1,1,2,2,3,2,2,1,2,2,3,3,3,2,2,1,2,2,3,3,4,3,3,2,3,3,4,4,4,3,3};
1880 static const short ext_shift_amounts[32][4] = {
1881 {0}, {1}, {2}, {2, 1},
1882 {2, 2}, {2, 1, 2}, {8, -2}, {8, -1},
1883 {8}, {8, 1}, {8, 2}, {8, 1, 2},
1884 {8, 2, 2}, {16, -2, -1}, {16, -2}, {16, -1},
1885 {16}, {16, 1}, {16, 2}, {16, 1, 2},
1886 {16, 2, 2}, {16, 2, 1, 2}, {16, -2, 8}, {16, -1, 8},
1887 {16, 8}, {16, 1, 8}, {16, 8, 2}, {16, 8, 1, 2},
1888 {16, 8, 2, 2}, {16, -1, -2, 16}, {16, -2, 16}, {16, -1, 16}};
1890 /* Assuming we have a value that has been sign-extended by at least one bit,
1891 can we use the ext_shift_amounts with the last shift turned to an arithmetic shift
1892 to shift it by N without data loss, and quicker than by other means? */
1893 #define EXT_SHIFT_SIGNED(n) (((n) | 8) == 15)
1895 /* This is used in length attributes in sh.md to help compute the length
1896 of arbitrary constant shift instructions. */
1899 shift_insns_rtx (rtx insn)
1901 rtx set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
1902 int shift_count = INTVAL (XEXP (set_src, 1));
1903 enum rtx_code shift_code = GET_CODE (set_src);
1905 switch (shift_code)
1907 case ASHIFTRT:
1908 return ashiftrt_insns[shift_count];
1909 case LSHIFTRT:
1910 case ASHIFT:
1911 return shift_insns[shift_count];
1912 default:
1913 gcc_unreachable ();
1917 /* Return the cost of a shift. */
1919 static inline int
1920 shiftcosts (rtx x)
1922 int value;
1924 if (TARGET_SHMEDIA)
1925 return 1;
1927 if (GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD)
1929 if (GET_MODE (x) == DImode
1930 && GET_CODE (XEXP (x, 1)) == CONST_INT
1931 && INTVAL (XEXP (x, 1)) == 1)
1932 return 2;
1934 /* Everything else is invalid, because there is no pattern for it. */
1935 return 10000;
1937 /* If shift by a non constant, then this will be expensive. */
1938 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
1939 return SH_DYNAMIC_SHIFT_COST;
1941 value = INTVAL (XEXP (x, 1));
1943 /* Otherwise, return the true cost in instructions. */
1944 if (GET_CODE (x) == ASHIFTRT)
1946 int cost = ashiftrt_insns[value];
1947 /* If SH3, then we put the constant in a reg and use shad. */
1948 if (cost > 1 + SH_DYNAMIC_SHIFT_COST)
1949 cost = 1 + SH_DYNAMIC_SHIFT_COST;
1950 return cost;
1952 else
1953 return shift_insns[value];
1956 /* Return the cost of an AND operation. */
1958 static inline int
1959 andcosts (rtx x)
1961 int i;
1963 /* Anding with a register is a single cycle and instruction. */
1964 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
1965 return 1;
1967 i = INTVAL (XEXP (x, 1));
1969 if (TARGET_SHMEDIA)
1971 if ((GET_CODE (XEXP (x, 1)) == CONST_INT
1972 && CONST_OK_FOR_I16 (INTVAL (XEXP (x, 1))))
1973 || EXTRA_CONSTRAINT_C16 (XEXP (x, 1)))
1974 return 1;
1975 else
1976 return 2;
1979 /* These constants are single cycle extu.[bw] instructions. */
1980 if (i == 0xff || i == 0xffff)
1981 return 1;
1982 /* Constants that can be used in an and immediate instruction in a single
1983 cycle, but this requires r0, so make it a little more expensive. */
1984 if (CONST_OK_FOR_K08 (i))
1985 return 2;
1986 /* Constants that can be loaded with a mov immediate and an and.
1987 This case is probably unnecessary. */
1988 if (CONST_OK_FOR_I08 (i))
1989 return 2;
1990 /* Any other constants requires a 2 cycle pc-relative load plus an and.
1991 This case is probably unnecessary. */
1992 return 3;
1995 /* Return the cost of an addition or a subtraction. */
1997 static inline int
1998 addsubcosts (rtx x)
2000 /* Adding a register is a single cycle insn. */
2001 if (GET_CODE (XEXP (x, 1)) == REG
2002 || GET_CODE (XEXP (x, 1)) == SUBREG)
2003 return 1;
2005 /* Likewise for small constants. */
2006 if (GET_CODE (XEXP (x, 1)) == CONST_INT
2007 && CONST_OK_FOR_ADD (INTVAL (XEXP (x, 1))))
2008 return 1;
2010 if (TARGET_SHMEDIA)
2011 switch (GET_CODE (XEXP (x, 1)))
2013 case CONST:
2014 case LABEL_REF:
2015 case SYMBOL_REF:
2016 return TARGET_SHMEDIA64 ? 5 : 3;
2018 case CONST_INT:
2019 if (CONST_OK_FOR_I16 (INTVAL (XEXP (x, 1))))
2020 return 2;
2021 else if (CONST_OK_FOR_I16 (INTVAL (XEXP (x, 1)) >> 16))
2022 return 3;
2023 else if (CONST_OK_FOR_I16 ((INTVAL (XEXP (x, 1)) >> 16) >> 16))
2024 return 4;
2026 /* Fall through. */
2027 default:
2028 return 5;
2031 /* Any other constant requires a 2 cycle pc-relative load plus an
2032 addition. */
2033 return 3;
2036 /* Return the cost of a multiply. */
2037 static inline int
2038 multcosts (rtx x ATTRIBUTE_UNUSED)
2040 if (sh_multcost >= 0)
2041 return sh_multcost;
2042 if (TARGET_SHMEDIA)
2043 /* ??? We have a mul insn, but it has a latency of three, and doesn't
2044 accept constants. Ideally, we would use a cost of one or two and
2045 add the cost of the operand, but disregard the latter when inside loops
2046 and loop invariant code motion is still to follow.
2047 Using a multiply first and splitting it later if it's a loss
2048 doesn't work because of different sign / zero extension semantics
2049 of multiplies vs. shifts. */
2050 return TARGET_SMALLCODE ? 2 : 3;
2052 if (TARGET_SH2)
2054 /* We have a mul insn, so we can never take more than the mul and the
2055 read of the mac reg, but count more because of the latency and extra
2056 reg usage. */
2057 if (TARGET_SMALLCODE)
2058 return 2;
2059 return 3;
2062 /* If we're aiming at small code, then just count the number of
2063 insns in a multiply call sequence. */
2064 if (TARGET_SMALLCODE)
2065 return 5;
2067 /* Otherwise count all the insns in the routine we'd be calling too. */
2068 return 20;
2071 /* Compute a (partial) cost for rtx X. Return true if the complete
2072 cost has been computed, and false if subexpressions should be
2073 scanned. In either case, *TOTAL contains the cost result. */
2075 static bool
2076 sh_rtx_costs (rtx x, int code, int outer_code, int *total)
2078 switch (code)
2080 case CONST_INT:
2081 if (TARGET_SHMEDIA)
2083 if (INTVAL (x) == 0)
2084 *total = 0;
2085 else if (outer_code == AND && and_operand ((x), DImode))
2086 *total = 0;
2087 else if ((outer_code == IOR || outer_code == XOR
2088 || outer_code == PLUS)
2089 && CONST_OK_FOR_I10 (INTVAL (x)))
2090 *total = 0;
2091 else if (CONST_OK_FOR_I16 (INTVAL (x)))
2092 *total = COSTS_N_INSNS (outer_code != SET);
2093 else if (CONST_OK_FOR_I16 (INTVAL (x) >> 16))
2094 *total = COSTS_N_INSNS ((outer_code != SET) + 1);
2095 else if (CONST_OK_FOR_I16 ((INTVAL (x) >> 16) >> 16))
2096 *total = COSTS_N_INSNS (3);
2097 else
2098 *total = COSTS_N_INSNS (4);
2099 return true;
2101 if (CONST_OK_FOR_I08 (INTVAL (x)))
2102 *total = 0;
2103 else if ((outer_code == AND || outer_code == IOR || outer_code == XOR)
2104 && CONST_OK_FOR_K08 (INTVAL (x)))
2105 *total = 1;
2106 else
2107 *total = 8;
2108 return true;
2110 case CONST:
2111 case LABEL_REF:
2112 case SYMBOL_REF:
2113 if (TARGET_SHMEDIA64)
2114 *total = COSTS_N_INSNS (4);
2115 else if (TARGET_SHMEDIA32)
2116 *total = COSTS_N_INSNS (2);
2117 else
2118 *total = 5;
2119 return true;
2121 case CONST_DOUBLE:
2122 if (TARGET_SHMEDIA)
2123 *total = COSTS_N_INSNS (4);
2124 else
2125 *total = 10;
2126 return true;
2127 case CONST_VECTOR:
2128 if (x == CONST0_RTX (GET_MODE (x)))
2129 *total = 0;
2130 else if (sh_1el_vec (x, VOIDmode))
2131 *total = outer_code != SET;
2132 if (sh_rep_vec (x, VOIDmode))
2133 *total = ((GET_MODE_UNIT_SIZE (GET_MODE (x)) + 3) / 4
2134 + (outer_code != SET));
2135 *total = COSTS_N_INSNS (3) + (outer_code != SET);
2136 return true;
2138 case PLUS:
2139 case MINUS:
2140 *total = COSTS_N_INSNS (addsubcosts (x));
2141 return true;
2143 case AND:
2144 *total = COSTS_N_INSNS (andcosts (x));
2145 return true;
2147 case MULT:
2148 *total = COSTS_N_INSNS (multcosts (x));
2149 return true;
2151 case ASHIFT:
2152 case ASHIFTRT:
2153 case LSHIFTRT:
2154 *total = COSTS_N_INSNS (shiftcosts (x));
2155 return true;
2157 case DIV:
2158 case UDIV:
2159 case MOD:
2160 case UMOD:
2161 *total = COSTS_N_INSNS (20);
2162 return true;
2164 case PARALLEL:
2165 if (sh_1el_vec (x, VOIDmode))
2166 *total = outer_code != SET;
2167 if (sh_rep_vec (x, VOIDmode))
2168 *total = ((GET_MODE_UNIT_SIZE (GET_MODE (x)) + 3) / 4
2169 + (outer_code != SET));
2170 *total = COSTS_N_INSNS (3) + (outer_code != SET);
2171 return true;
2173 case FLOAT:
2174 case FIX:
2175 *total = 100;
2176 return true;
2178 default:
2179 return false;
2183 /* Compute the cost of an address. For the SH, all valid addresses are
2184 the same cost. Use a slightly higher cost for reg + reg addressing,
2185 since it increases pressure on r0. */
2187 static int
2188 sh_address_cost (rtx X)
2190 return (GET_CODE (X) == PLUS
2191 && ! CONSTANT_P (XEXP (X, 1))
2192 && ! TARGET_SHMEDIA ? 1 : 0);
2195 /* Code to expand a shift. */
2197 void
2198 gen_ashift (int type, int n, rtx reg)
2200 /* Negative values here come from the shift_amounts array. */
2201 if (n < 0)
2203 if (type == ASHIFT)
2204 type = LSHIFTRT;
2205 else
2206 type = ASHIFT;
2207 n = -n;
2210 switch (type)
2212 case ASHIFTRT:
2213 emit_insn (gen_ashrsi3_k (reg, reg, GEN_INT (n)));
2214 break;
2215 case LSHIFTRT:
2216 if (n == 1)
2217 emit_insn (gen_lshrsi3_m (reg, reg, GEN_INT (n)));
2218 else
2219 emit_insn (gen_lshrsi3_k (reg, reg, GEN_INT (n)));
2220 break;
2221 case ASHIFT:
2222 emit_insn (gen_ashlsi3_std (reg, reg, GEN_INT (n)));
2223 break;
2227 /* Same for HImode */
2229 void
2230 gen_ashift_hi (int type, int n, rtx reg)
2232 /* Negative values here come from the shift_amounts array. */
2233 if (n < 0)
2235 if (type == ASHIFT)
2236 type = LSHIFTRT;
2237 else
2238 type = ASHIFT;
2239 n = -n;
2242 switch (type)
2244 case ASHIFTRT:
2245 case LSHIFTRT:
2246 /* We don't have HImode right shift operations because using the
2247 ordinary 32 bit shift instructions for that doesn't generate proper
2248 zero/sign extension.
2249 gen_ashift_hi is only called in contexts where we know that the
2250 sign extension works out correctly. */
2252 int offset = 0;
2253 if (GET_CODE (reg) == SUBREG)
2255 offset = SUBREG_BYTE (reg);
2256 reg = SUBREG_REG (reg);
2258 gen_ashift (type, n, gen_rtx_SUBREG (SImode, reg, offset));
2259 break;
2261 case ASHIFT:
2262 emit_insn (gen_ashlhi3_k (reg, reg, GEN_INT (n)));
2263 break;
2267 /* Output RTL to split a constant shift into its component SH constant
2268 shift instructions. */
2270 void
2271 gen_shifty_op (int code, rtx *operands)
2273 int value = INTVAL (operands[2]);
2274 int max, i;
2276 /* Truncate the shift count in case it is out of bounds. */
2277 value = value & 0x1f;
2279 if (value == 31)
2281 if (code == LSHIFTRT)
2283 emit_insn (gen_rotlsi3_1 (operands[0], operands[0]));
2284 emit_insn (gen_movt (operands[0]));
2285 return;
2287 else if (code == ASHIFT)
2289 /* There is a two instruction sequence for 31 bit left shifts,
2290 but it requires r0. */
2291 if (GET_CODE (operands[0]) == REG && REGNO (operands[0]) == 0)
2293 emit_insn (gen_andsi3 (operands[0], operands[0], const1_rtx));
2294 emit_insn (gen_rotlsi3_31 (operands[0], operands[0]));
2295 return;
2299 else if (value == 0)
2301 /* This can happen even when optimizing, if there were subregs before
2302 reload. Don't output a nop here, as this is never optimized away;
2303 use a no-op move instead. */
2304 emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[0]));
2305 return;
2308 max = shift_insns[value];
2309 for (i = 0; i < max; i++)
2310 gen_ashift (code, shift_amounts[value][i], operands[0]);
2313 /* Same as above, but optimized for values where the topmost bits don't
2314 matter. */
2316 void
2317 gen_shifty_hi_op (int code, rtx *operands)
2319 int value = INTVAL (operands[2]);
2320 int max, i;
2321 void (*gen_fun) (int, int, rtx);
2323 /* This operation is used by and_shl for SImode values with a few
2324 high bits known to be cleared. */
2325 value &= 31;
2326 if (value == 0)
2328 emit_insn (gen_nop ());
2329 return;
2332 gen_fun = GET_MODE (operands[0]) == HImode ? gen_ashift_hi : gen_ashift;
2333 if (code == ASHIFT)
2335 max = ext_shift_insns[value];
2336 for (i = 0; i < max; i++)
2337 gen_fun (code, ext_shift_amounts[value][i], operands[0]);
2339 else
2340 /* When shifting right, emit the shifts in reverse order, so that
2341 solitary negative values come first. */
2342 for (i = ext_shift_insns[value] - 1; i >= 0; i--)
2343 gen_fun (code, ext_shift_amounts[value][i], operands[0]);
2346 /* Output RTL for an arithmetic right shift. */
2348 /* ??? Rewrite to use super-optimizer sequences. */
2351 expand_ashiftrt (rtx *operands)
2353 rtx wrk;
2354 char func[18];
2355 int value;
2357 if (TARGET_SH3)
2359 if (GET_CODE (operands[2]) != CONST_INT)
2361 rtx count = copy_to_mode_reg (SImode, operands[2]);
2362 emit_insn (gen_negsi2 (count, count));
2363 emit_insn (gen_ashrsi3_d (operands[0], operands[1], count));
2364 return 1;
2366 else if (ashiftrt_insns[INTVAL (operands[2]) & 31]
2367 > 1 + SH_DYNAMIC_SHIFT_COST)
2369 rtx count
2370 = force_reg (SImode, GEN_INT (- (INTVAL (operands[2]) & 31)));
2371 emit_insn (gen_ashrsi3_d (operands[0], operands[1], count));
2372 return 1;
2375 if (GET_CODE (operands[2]) != CONST_INT)
2376 return 0;
2378 value = INTVAL (operands[2]) & 31;
2380 if (value == 31)
2382 /* If we are called from abs expansion, arrange things so that we
2383 we can use a single MT instruction that doesn't clobber the source,
2384 if LICM can hoist out the load of the constant zero. */
2385 if (currently_expanding_to_rtl)
2387 emit_insn (gen_cmpgtsi_t (force_reg (SImode, CONST0_RTX (SImode)),
2388 operands[1]));
2389 emit_insn (gen_mov_neg_si_t (operands[0]));
2390 return 1;
2392 emit_insn (gen_ashrsi2_31 (operands[0], operands[1]));
2393 return 1;
2395 else if (value >= 16 && value <= 19)
2397 wrk = gen_reg_rtx (SImode);
2398 emit_insn (gen_ashrsi2_16 (wrk, operands[1]));
2399 value -= 16;
2400 while (value--)
2401 gen_ashift (ASHIFTRT, 1, wrk);
2402 emit_move_insn (operands[0], wrk);
2403 return 1;
2405 /* Expand a short sequence inline, longer call a magic routine. */
2406 else if (value <= 5)
2408 wrk = gen_reg_rtx (SImode);
2409 emit_move_insn (wrk, operands[1]);
2410 while (value--)
2411 gen_ashift (ASHIFTRT, 1, wrk);
2412 emit_move_insn (operands[0], wrk);
2413 return 1;
2416 wrk = gen_reg_rtx (Pmode);
2418 /* Load the value into an arg reg and call a helper. */
2419 emit_move_insn (gen_rtx_REG (SImode, 4), operands[1]);
2420 sprintf (func, "__ashiftrt_r4_%d", value);
2421 function_symbol (wrk, func, SFUNC_STATIC);
2422 emit_insn (gen_ashrsi3_n (GEN_INT (value), wrk));
2423 emit_move_insn (operands[0], gen_rtx_REG (SImode, 4));
2424 return 1;
2428 sh_dynamicalize_shift_p (rtx count)
2430 return shift_insns[INTVAL (count)] > 1 + SH_DYNAMIC_SHIFT_COST;
2433 /* Try to find a good way to implement the combiner pattern
2434 [(set (match_operand:SI 0 "register_operand" "r")
2435 (and:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
2436 (match_operand:SI 2 "const_int_operand" "n"))
2437 (match_operand:SI 3 "const_int_operand" "n"))) .
2438 LEFT_RTX is operand 2 in the above pattern, and MASK_RTX is operand 3.
2439 return 0 for simple right / left or left/right shift combination.
2440 return 1 for a combination of shifts with zero_extend.
2441 return 2 for a combination of shifts with an AND that needs r0.
2442 return 3 for a combination of shifts with an AND that needs an extra
2443 scratch register, when the three highmost bits of the AND mask are clear.
2444 return 4 for a combination of shifts with an AND that needs an extra
2445 scratch register, when any of the three highmost bits of the AND mask
2446 is set.
2447 If ATTRP is set, store an initial right shift width in ATTRP[0],
2448 and the instruction length in ATTRP[1] . These values are not valid
2449 when returning 0.
2450 When ATTRP is set and returning 1, ATTRP[2] gets set to the index into
2451 shift_amounts for the last shift value that is to be used before the
2452 sign extend. */
2454 shl_and_kind (rtx left_rtx, rtx mask_rtx, int *attrp)
2456 unsigned HOST_WIDE_INT mask, lsb, mask2, lsb2;
2457 int left = INTVAL (left_rtx), right;
2458 int best = 0;
2459 int cost, best_cost = 10000;
2460 int best_right = 0, best_len = 0;
2461 int i;
2462 int can_ext;
2464 if (left < 0 || left > 31)
2465 return 0;
2466 if (GET_CODE (mask_rtx) == CONST_INT)
2467 mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> left;
2468 else
2469 mask = (unsigned HOST_WIDE_INT) GET_MODE_MASK (SImode) >> left;
2470 /* Can this be expressed as a right shift / left shift pair? */
2471 lsb = ((mask ^ (mask - 1)) >> 1) + 1;
2472 right = exact_log2 (lsb);
2473 mask2 = ~(mask + lsb - 1);
2474 lsb2 = ((mask2 ^ (mask2 - 1)) >> 1) + 1;
2475 /* mask has no zeroes but trailing zeroes <==> ! mask2 */
2476 if (! mask2)
2477 best_cost = shift_insns[right] + shift_insns[right + left];
2478 /* mask has no trailing zeroes <==> ! right */
2479 else if (! right && mask2 == ~(lsb2 - 1))
2481 int late_right = exact_log2 (lsb2);
2482 best_cost = shift_insns[left + late_right] + shift_insns[late_right];
2484 /* Try to use zero extend. */
2485 if (mask2 == ~(lsb2 - 1))
2487 int width, first;
2489 for (width = 8; width <= 16; width += 8)
2491 /* Can we zero-extend right away? */
2492 if (lsb2 == (unsigned HOST_WIDE_INT) 1 << width)
2494 cost
2495 = 1 + ext_shift_insns[right] + ext_shift_insns[left + right];
2496 if (cost < best_cost)
2498 best = 1;
2499 best_cost = cost;
2500 best_right = right;
2501 best_len = cost;
2502 if (attrp)
2503 attrp[2] = -1;
2505 continue;
2507 /* ??? Could try to put zero extend into initial right shift,
2508 or even shift a bit left before the right shift. */
2509 /* Determine value of first part of left shift, to get to the
2510 zero extend cut-off point. */
2511 first = width - exact_log2 (lsb2) + right;
2512 if (first >= 0 && right + left - first >= 0)
2514 cost = ext_shift_insns[right] + ext_shift_insns[first] + 1
2515 + ext_shift_insns[right + left - first];
2516 if (cost < best_cost)
2518 best = 1;
2519 best_cost = cost;
2520 best_right = right;
2521 best_len = cost;
2522 if (attrp)
2523 attrp[2] = first;
2528 /* Try to use r0 AND pattern */
2529 for (i = 0; i <= 2; i++)
2531 if (i > right)
2532 break;
2533 if (! CONST_OK_FOR_K08 (mask >> i))
2534 continue;
2535 cost = (i != 0) + 2 + ext_shift_insns[left + i];
2536 if (cost < best_cost)
2538 best = 2;
2539 best_cost = cost;
2540 best_right = i;
2541 best_len = cost - 1;
2544 /* Try to use a scratch register to hold the AND operand. */
2545 can_ext = ((mask << left) & ((unsigned HOST_WIDE_INT) 3 << 30)) == 0;
2546 for (i = 0; i <= 2; i++)
2548 if (i > right)
2549 break;
2550 cost = (i != 0) + (CONST_OK_FOR_I08 (mask >> i) ? 2 : 3)
2551 + (can_ext ? ext_shift_insns : shift_insns)[left + i];
2552 if (cost < best_cost)
2554 best = 4 - can_ext;
2555 best_cost = cost;
2556 best_right = i;
2557 best_len = cost - 1 - ! CONST_OK_FOR_I08 (mask >> i);
2561 if (attrp)
2563 attrp[0] = best_right;
2564 attrp[1] = best_len;
2566 return best;
2569 /* This is used in length attributes of the unnamed instructions
2570 corresponding to shl_and_kind return values of 1 and 2. */
2572 shl_and_length (rtx insn)
2574 rtx set_src, left_rtx, mask_rtx;
2575 int attributes[3];
2577 set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
2578 left_rtx = XEXP (XEXP (set_src, 0), 1);
2579 mask_rtx = XEXP (set_src, 1);
2580 shl_and_kind (left_rtx, mask_rtx, attributes);
2581 return attributes[1];
2584 /* This is used in length attribute of the and_shl_scratch instruction. */
2587 shl_and_scr_length (rtx insn)
2589 rtx set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
2590 int len = shift_insns[INTVAL (XEXP (set_src, 1))];
2591 rtx op = XEXP (set_src, 0);
2592 len += shift_insns[INTVAL (XEXP (op, 1))] + 1;
2593 op = XEXP (XEXP (op, 0), 0);
2594 return len + shift_insns[INTVAL (XEXP (op, 1))];
2597 /* Generate rtl for instructions for which shl_and_kind advised a particular
2598 method of generating them, i.e. returned zero. */
2601 gen_shl_and (rtx dest, rtx left_rtx, rtx mask_rtx, rtx source)
2603 int attributes[3];
2604 unsigned HOST_WIDE_INT mask;
2605 int kind = shl_and_kind (left_rtx, mask_rtx, attributes);
2606 int right, total_shift;
2607 void (*shift_gen_fun) (int, rtx *) = gen_shifty_hi_op;
2609 right = attributes[0];
2610 total_shift = INTVAL (left_rtx) + right;
2611 mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> total_shift;
2612 switch (kind)
2614 default:
2615 return -1;
2616 case 1:
2618 int first = attributes[2];
2619 rtx operands[3];
2621 if (first < 0)
2623 emit_insn ((mask << right) <= 0xff
2624 ? gen_zero_extendqisi2 (dest,
2625 gen_lowpart (QImode, source))
2626 : gen_zero_extendhisi2 (dest,
2627 gen_lowpart (HImode, source)));
2628 source = dest;
2630 if (source != dest)
2631 emit_insn (gen_movsi (dest, source));
2632 operands[0] = dest;
2633 if (right)
2635 operands[2] = GEN_INT (right);
2636 gen_shifty_hi_op (LSHIFTRT, operands);
2638 if (first > 0)
2640 operands[2] = GEN_INT (first);
2641 gen_shifty_hi_op (ASHIFT, operands);
2642 total_shift -= first;
2643 mask <<= first;
2645 if (first >= 0)
2646 emit_insn (mask <= 0xff
2647 ? gen_zero_extendqisi2 (dest, gen_lowpart (QImode, dest))
2648 : gen_zero_extendhisi2 (dest, gen_lowpart (HImode, dest)));
2649 if (total_shift > 0)
2651 operands[2] = GEN_INT (total_shift);
2652 gen_shifty_hi_op (ASHIFT, operands);
2654 break;
2656 case 4:
2657 shift_gen_fun = gen_shifty_op;
2658 case 3:
2659 /* If the topmost bit that matters is set, set the topmost bits
2660 that don't matter. This way, we might be able to get a shorter
2661 signed constant. */
2662 if (mask & ((HOST_WIDE_INT) 1 << (31 - total_shift)))
2663 mask |= (HOST_WIDE_INT) ~0 << (31 - total_shift);
2664 case 2:
2665 /* Don't expand fine-grained when combining, because that will
2666 make the pattern fail. */
2667 if (currently_expanding_to_rtl
2668 || reload_in_progress || reload_completed)
2670 rtx operands[3];
2672 /* Cases 3 and 4 should be handled by this split
2673 only while combining */
2674 gcc_assert (kind <= 2);
2675 if (right)
2677 emit_insn (gen_lshrsi3 (dest, source, GEN_INT (right)));
2678 source = dest;
2680 emit_insn (gen_andsi3 (dest, source, GEN_INT (mask)));
2681 if (total_shift)
2683 operands[0] = dest;
2684 operands[1] = dest;
2685 operands[2] = GEN_INT (total_shift);
2686 shift_gen_fun (ASHIFT, operands);
2688 break;
2690 else
2692 int neg = 0;
2693 if (kind != 4 && total_shift < 16)
2695 neg = -ext_shift_amounts[total_shift][1];
2696 if (neg > 0)
2697 neg -= ext_shift_amounts[total_shift][2];
2698 else
2699 neg = 0;
2701 emit_insn (gen_and_shl_scratch (dest, source,
2702 GEN_INT (right),
2703 GEN_INT (mask),
2704 GEN_INT (total_shift + neg),
2705 GEN_INT (neg)));
2706 emit_insn (gen_movsi (dest, dest));
2707 break;
2710 return 0;
2713 /* Try to find a good way to implement the combiner pattern
2714 [(set (match_operand:SI 0 "register_operand" "=r")
2715 (sign_extract:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
2716 (match_operand:SI 2 "const_int_operand" "n")
2717 (match_operand:SI 3 "const_int_operand" "n")
2718 (const_int 0)))
2719 (clobber (reg:SI T_REG))]
2720 LEFT_RTX is operand 2 in the above pattern, and SIZE_RTX is operand 3.
2721 return 0 for simple left / right shift combination.
2722 return 1 for left shift / 8 bit sign extend / left shift.
2723 return 2 for left shift / 16 bit sign extend / left shift.
2724 return 3 for left shift / 8 bit sign extend / shift / sign extend.
2725 return 4 for left shift / 16 bit sign extend / shift / sign extend.
2726 return 5 for left shift / 16 bit sign extend / right shift
2727 return 6 for < 8 bit sign extend / left shift.
2728 return 7 for < 8 bit sign extend / left shift / single right shift.
2729 If COSTP is nonzero, assign the calculated cost to *COSTP. */
2732 shl_sext_kind (rtx left_rtx, rtx size_rtx, int *costp)
2734 int left, size, insize, ext;
2735 int cost = 0, best_cost;
2736 int kind;
2738 left = INTVAL (left_rtx);
2739 size = INTVAL (size_rtx);
2740 insize = size - left;
2741 gcc_assert (insize > 0);
2742 /* Default to left / right shift. */
2743 kind = 0;
2744 best_cost = shift_insns[32 - insize] + ashiftrt_insns[32 - size];
2745 if (size <= 16)
2747 /* 16 bit shift / sign extend / 16 bit shift */
2748 cost = shift_insns[16 - insize] + 1 + ashiftrt_insns[16 - size];
2749 /* If ashiftrt_insns[16 - size] is 8, this choice will be overridden
2750 below, by alternative 3 or something even better. */
2751 if (cost < best_cost)
2753 kind = 5;
2754 best_cost = cost;
2757 /* Try a plain sign extend between two shifts. */
2758 for (ext = 16; ext >= insize; ext -= 8)
2760 if (ext <= size)
2762 cost = ext_shift_insns[ext - insize] + 1 + shift_insns[size - ext];
2763 if (cost < best_cost)
2765 kind = ext / (unsigned) 8;
2766 best_cost = cost;
2769 /* Check if we can do a sloppy shift with a final signed shift
2770 restoring the sign. */
2771 if (EXT_SHIFT_SIGNED (size - ext))
2772 cost = ext_shift_insns[ext - insize] + ext_shift_insns[size - ext] + 1;
2773 /* If not, maybe it's still cheaper to do the second shift sloppy,
2774 and do a final sign extend? */
2775 else if (size <= 16)
2776 cost = ext_shift_insns[ext - insize] + 1
2777 + ext_shift_insns[size > ext ? size - ext : ext - size] + 1;
2778 else
2779 continue;
2780 if (cost < best_cost)
2782 kind = ext / (unsigned) 8 + 2;
2783 best_cost = cost;
2786 /* Check if we can sign extend in r0 */
2787 if (insize < 8)
2789 cost = 3 + shift_insns[left];
2790 if (cost < best_cost)
2792 kind = 6;
2793 best_cost = cost;
2795 /* Try the same with a final signed shift. */
2796 if (left < 31)
2798 cost = 3 + ext_shift_insns[left + 1] + 1;
2799 if (cost < best_cost)
2801 kind = 7;
2802 best_cost = cost;
2806 if (TARGET_SH3)
2808 /* Try to use a dynamic shift. */
2809 cost = shift_insns[32 - insize] + 1 + SH_DYNAMIC_SHIFT_COST;
2810 if (cost < best_cost)
2812 kind = 0;
2813 best_cost = cost;
2816 if (costp)
2817 *costp = cost;
2818 return kind;
2821 /* Function to be used in the length attribute of the instructions
2822 implementing this pattern. */
2825 shl_sext_length (rtx insn)
2827 rtx set_src, left_rtx, size_rtx;
2828 int cost;
2830 set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
2831 left_rtx = XEXP (XEXP (set_src, 0), 1);
2832 size_rtx = XEXP (set_src, 1);
2833 shl_sext_kind (left_rtx, size_rtx, &cost);
2834 return cost;
2837 /* Generate rtl for this pattern */
2840 gen_shl_sext (rtx dest, rtx left_rtx, rtx size_rtx, rtx source)
2842 int kind;
2843 int left, size, insize, cost;
2844 rtx operands[3];
2846 kind = shl_sext_kind (left_rtx, size_rtx, &cost);
2847 left = INTVAL (left_rtx);
2848 size = INTVAL (size_rtx);
2849 insize = size - left;
2850 switch (kind)
2852 case 1:
2853 case 2:
2854 case 3:
2855 case 4:
2857 int ext = kind & 1 ? 8 : 16;
2858 int shift2 = size - ext;
2860 /* Don't expand fine-grained when combining, because that will
2861 make the pattern fail. */
2862 if (! currently_expanding_to_rtl
2863 && ! reload_in_progress && ! reload_completed)
2865 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
2866 emit_insn (gen_movsi (dest, source));
2867 break;
2869 if (dest != source)
2870 emit_insn (gen_movsi (dest, source));
2871 operands[0] = dest;
2872 if (ext - insize)
2874 operands[2] = GEN_INT (ext - insize);
2875 gen_shifty_hi_op (ASHIFT, operands);
2877 emit_insn (kind & 1
2878 ? gen_extendqisi2 (dest, gen_lowpart (QImode, dest))
2879 : gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
2880 if (kind <= 2)
2882 if (shift2)
2884 operands[2] = GEN_INT (shift2);
2885 gen_shifty_op (ASHIFT, operands);
2888 else
2890 if (shift2 > 0)
2892 if (EXT_SHIFT_SIGNED (shift2))
2894 operands[2] = GEN_INT (shift2 + 1);
2895 gen_shifty_op (ASHIFT, operands);
2896 operands[2] = const1_rtx;
2897 gen_shifty_op (ASHIFTRT, operands);
2898 break;
2900 operands[2] = GEN_INT (shift2);
2901 gen_shifty_hi_op (ASHIFT, operands);
2903 else if (shift2)
2905 operands[2] = GEN_INT (-shift2);
2906 gen_shifty_hi_op (LSHIFTRT, operands);
2908 emit_insn (size <= 8
2909 ? gen_extendqisi2 (dest, gen_lowpart (QImode, dest))
2910 : gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
2912 break;
2914 case 5:
2916 int i = 16 - size;
2917 if (! currently_expanding_to_rtl
2918 && ! reload_in_progress && ! reload_completed)
2919 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
2920 else
2922 operands[0] = dest;
2923 operands[2] = GEN_INT (16 - insize);
2924 gen_shifty_hi_op (ASHIFT, operands);
2925 emit_insn (gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
2927 /* Don't use gen_ashrsi3 because it generates new pseudos. */
2928 while (--i >= 0)
2929 gen_ashift (ASHIFTRT, 1, dest);
2930 break;
2932 case 6:
2933 case 7:
2934 /* Don't expand fine-grained when combining, because that will
2935 make the pattern fail. */
2936 if (! currently_expanding_to_rtl
2937 && ! reload_in_progress && ! reload_completed)
2939 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
2940 emit_insn (gen_movsi (dest, source));
2941 break;
2943 emit_insn (gen_andsi3 (dest, source, GEN_INT ((1 << insize) - 1)));
2944 emit_insn (gen_xorsi3 (dest, dest, GEN_INT (1 << (insize - 1))));
2945 emit_insn (gen_addsi3 (dest, dest, GEN_INT (-1 << (insize - 1))));
2946 operands[0] = dest;
2947 operands[2] = kind == 7 ? GEN_INT (left + 1) : left_rtx;
2948 gen_shifty_op (ASHIFT, operands);
2949 if (kind == 7)
2950 emit_insn (gen_ashrsi3_k (dest, dest, const1_rtx));
2951 break;
2952 default:
2953 return -1;
2955 return 0;
2958 /* Prefix a symbol_ref name with "datalabel". */
2961 gen_datalabel_ref (rtx sym)
2963 const char *str;
2965 if (GET_CODE (sym) == LABEL_REF)
2966 return gen_rtx_CONST (GET_MODE (sym),
2967 gen_rtx_UNSPEC (GET_MODE (sym),
2968 gen_rtvec (1, sym),
2969 UNSPEC_DATALABEL));
2971 gcc_assert (GET_CODE (sym) == SYMBOL_REF);
2973 str = XSTR (sym, 0);
2974 /* Share all SYMBOL_REF strings with the same value - that is important
2975 for cse. */
2976 str = IDENTIFIER_POINTER (get_identifier (str));
2977 XSTR (sym, 0) = str;
2979 return sym;
2983 /* The SH cannot load a large constant into a register, constants have to
2984 come from a pc relative load. The reference of a pc relative load
2985 instruction must be less than 1k in front of the instruction. This
2986 means that we often have to dump a constant inside a function, and
2987 generate code to branch around it.
2989 It is important to minimize this, since the branches will slow things
2990 down and make things bigger.
2992 Worst case code looks like:
2994 mov.l L1,rn
2995 bra L2
2997 align
2998 L1: .long value
3002 mov.l L3,rn
3003 bra L4
3005 align
3006 L3: .long value
3010 We fix this by performing a scan before scheduling, which notices which
3011 instructions need to have their operands fetched from the constant table
3012 and builds the table.
3014 The algorithm is:
3016 scan, find an instruction which needs a pcrel move. Look forward, find the
3017 last barrier which is within MAX_COUNT bytes of the requirement.
3018 If there isn't one, make one. Process all the instructions between
3019 the find and the barrier.
3021 In the above example, we can tell that L3 is within 1k of L1, so
3022 the first move can be shrunk from the 3 insn+constant sequence into
3023 just 1 insn, and the constant moved to L3 to make:
3025 mov.l L1,rn
3027 mov.l L3,rn
3028 bra L4
3030 align
3031 L3:.long value
3032 L4:.long value
3034 Then the second move becomes the target for the shortening process. */
3036 typedef struct
3038 rtx value; /* Value in table. */
3039 rtx label; /* Label of value. */
3040 rtx wend; /* End of window. */
3041 enum machine_mode mode; /* Mode of value. */
3043 /* True if this constant is accessed as part of a post-increment
3044 sequence. Note that HImode constants are never accessed in this way. */
3045 bool part_of_sequence_p;
3046 } pool_node;
3048 /* The maximum number of constants that can fit into one pool, since
3049 constants in the range 0..510 are at least 2 bytes long, and in the
3050 range from there to 1018 at least 4 bytes. */
3052 #define MAX_POOL_SIZE 372
3053 static pool_node pool_vector[MAX_POOL_SIZE];
3054 static int pool_size;
3055 static rtx pool_window_label;
3056 static int pool_window_last;
3058 /* ??? If we need a constant in HImode which is the truncated value of a
3059 constant we need in SImode, we could combine the two entries thus saving
3060 two bytes. Is this common enough to be worth the effort of implementing
3061 it? */
3063 /* ??? This stuff should be done at the same time that we shorten branches.
3064 As it is now, we must assume that all branches are the maximum size, and
3065 this causes us to almost always output constant pools sooner than
3066 necessary. */
3068 /* Add a constant to the pool and return its label. */
3070 static rtx
3071 add_constant (rtx x, enum machine_mode mode, rtx last_value)
3073 int i;
3074 rtx lab, new, ref, newref;
3076 /* First see if we've already got it. */
3077 for (i = 0; i < pool_size; i++)
3079 if (x->code == pool_vector[i].value->code
3080 && mode == pool_vector[i].mode)
3082 if (x->code == CODE_LABEL)
3084 if (XINT (x, 3) != XINT (pool_vector[i].value, 3))
3085 continue;
3087 if (rtx_equal_p (x, pool_vector[i].value))
3089 lab = new = 0;
3090 if (! last_value
3091 || ! i
3092 || ! rtx_equal_p (last_value, pool_vector[i-1].value))
3094 new = gen_label_rtx ();
3095 LABEL_REFS (new) = pool_vector[i].label;
3096 pool_vector[i].label = lab = new;
3098 if (lab && pool_window_label)
3100 newref = gen_rtx_LABEL_REF (VOIDmode, pool_window_label);
3101 ref = pool_vector[pool_window_last].wend;
3102 LABEL_NEXTREF (newref) = ref;
3103 pool_vector[pool_window_last].wend = newref;
3105 if (new)
3106 pool_window_label = new;
3107 pool_window_last = i;
3108 return lab;
3113 /* Need a new one. */
3114 pool_vector[pool_size].value = x;
3115 if (last_value && rtx_equal_p (last_value, pool_vector[pool_size - 1].value))
3117 lab = 0;
3118 pool_vector[pool_size - 1].part_of_sequence_p = true;
3120 else
3121 lab = gen_label_rtx ();
3122 pool_vector[pool_size].mode = mode;
3123 pool_vector[pool_size].label = lab;
3124 pool_vector[pool_size].wend = NULL_RTX;
3125 pool_vector[pool_size].part_of_sequence_p = (lab == 0);
3126 if (lab && pool_window_label)
3128 newref = gen_rtx_LABEL_REF (VOIDmode, pool_window_label);
3129 ref = pool_vector[pool_window_last].wend;
3130 LABEL_NEXTREF (newref) = ref;
3131 pool_vector[pool_window_last].wend = newref;
3133 if (lab)
3134 pool_window_label = lab;
3135 pool_window_last = pool_size;
3136 pool_size++;
3137 return lab;
3140 /* Output the literal table. START, if nonzero, is the first instruction
3141 this table is needed for, and also indicates that there is at least one
3142 casesi_worker_2 instruction; We have to emit the operand3 labels from
3143 these insns at a 4-byte aligned position. BARRIER is the barrier
3144 after which we are to place the table. */
3146 static void
3147 dump_table (rtx start, rtx barrier)
3149 rtx scan = barrier;
3150 int i;
3151 int need_align = 1;
3152 rtx lab, ref;
3153 int have_df = 0;
3155 /* Do two passes, first time dump out the HI sized constants. */
3157 for (i = 0; i < pool_size; i++)
3159 pool_node *p = &pool_vector[i];
3161 if (p->mode == HImode)
3163 if (need_align)
3165 scan = emit_insn_after (gen_align_2 (), scan);
3166 need_align = 0;
3168 for (lab = p->label; lab; lab = LABEL_REFS (lab))
3169 scan = emit_label_after (lab, scan);
3170 scan = emit_insn_after (gen_consttable_2 (p->value, const0_rtx),
3171 scan);
3172 for (ref = p->wend; ref; ref = LABEL_NEXTREF (ref))
3174 lab = XEXP (ref, 0);
3175 scan = emit_insn_after (gen_consttable_window_end (lab), scan);
3178 else if (p->mode == DFmode)
3179 have_df = 1;
3182 need_align = 1;
3184 if (start)
3186 scan = emit_insn_after (gen_align_4 (), scan);
3187 need_align = 0;
3188 for (; start != barrier; start = NEXT_INSN (start))
3189 if (GET_CODE (start) == INSN
3190 && recog_memoized (start) == CODE_FOR_casesi_worker_2)
3192 rtx src = SET_SRC (XVECEXP (PATTERN (start), 0, 0));
3193 rtx lab = XEXP (XVECEXP (src, 0, 3), 0);
3195 scan = emit_label_after (lab, scan);
3198 if (TARGET_FMOVD && TARGET_ALIGN_DOUBLE && have_df)
3200 rtx align_insn = NULL_RTX;
3202 scan = emit_label_after (gen_label_rtx (), scan);
3203 scan = emit_insn_after (gen_align_log (GEN_INT (3)), scan);
3204 need_align = 0;
3206 for (i = 0; i < pool_size; i++)
3208 pool_node *p = &pool_vector[i];
3210 switch (p->mode)
3212 case HImode:
3213 break;
3214 case SImode:
3215 case SFmode:
3216 if (align_insn && !p->part_of_sequence_p)
3218 for (lab = p->label; lab; lab = LABEL_REFS (lab))
3219 emit_label_before (lab, align_insn);
3220 emit_insn_before (gen_consttable_4 (p->value, const0_rtx),
3221 align_insn);
3222 for (ref = p->wend; ref; ref = LABEL_NEXTREF (ref))
3224 lab = XEXP (ref, 0);
3225 emit_insn_before (gen_consttable_window_end (lab),
3226 align_insn);
3228 delete_insn (align_insn);
3229 align_insn = NULL_RTX;
3230 continue;
3232 else
3234 for (lab = p->label; lab; lab = LABEL_REFS (lab))
3235 scan = emit_label_after (lab, scan);
3236 scan = emit_insn_after (gen_consttable_4 (p->value,
3237 const0_rtx), scan);
3238 need_align = ! need_align;
3240 break;
3241 case DFmode:
3242 if (need_align)
3244 scan = emit_insn_after (gen_align_log (GEN_INT (3)), scan);
3245 align_insn = scan;
3246 need_align = 0;
3248 case DImode:
3249 for (lab = p->label; lab; lab = LABEL_REFS (lab))
3250 scan = emit_label_after (lab, scan);
3251 scan = emit_insn_after (gen_consttable_8 (p->value, const0_rtx),
3252 scan);
3253 break;
3254 default:
3255 gcc_unreachable ();
3258 if (p->mode != HImode)
3260 for (ref = p->wend; ref; ref = LABEL_NEXTREF (ref))
3262 lab = XEXP (ref, 0);
3263 scan = emit_insn_after (gen_consttable_window_end (lab),
3264 scan);
3269 pool_size = 0;
3272 for (i = 0; i < pool_size; i++)
3274 pool_node *p = &pool_vector[i];
3276 switch (p->mode)
3278 case HImode:
3279 break;
3280 case SImode:
3281 case SFmode:
3282 if (need_align)
3284 need_align = 0;
3285 scan = emit_label_after (gen_label_rtx (), scan);
3286 scan = emit_insn_after (gen_align_4 (), scan);
3288 for (lab = p->label; lab; lab = LABEL_REFS (lab))
3289 scan = emit_label_after (lab, scan);
3290 scan = emit_insn_after (gen_consttable_4 (p->value, const0_rtx),
3291 scan);
3292 break;
3293 case DFmode:
3294 case DImode:
3295 if (need_align)
3297 need_align = 0;
3298 scan = emit_label_after (gen_label_rtx (), scan);
3299 scan = emit_insn_after (gen_align_4 (), scan);
3301 for (lab = p->label; lab; lab = LABEL_REFS (lab))
3302 scan = emit_label_after (lab, scan);
3303 scan = emit_insn_after (gen_consttable_8 (p->value, const0_rtx),
3304 scan);
3305 break;
3306 default:
3307 gcc_unreachable ();
3310 if (p->mode != HImode)
3312 for (ref = p->wend; ref; ref = LABEL_NEXTREF (ref))
3314 lab = XEXP (ref, 0);
3315 scan = emit_insn_after (gen_consttable_window_end (lab), scan);
3320 scan = emit_insn_after (gen_consttable_end (), scan);
3321 scan = emit_barrier_after (scan);
3322 pool_size = 0;
3323 pool_window_label = NULL_RTX;
3324 pool_window_last = 0;
3327 /* Return nonzero if constant would be an ok source for a
3328 mov.w instead of a mov.l. */
3330 static int
3331 hi_const (rtx src)
3333 return (GET_CODE (src) == CONST_INT
3334 && INTVAL (src) >= -32768
3335 && INTVAL (src) <= 32767);
3338 /* Nonzero if the insn is a move instruction which needs to be fixed. */
3340 /* ??? For a DImode/DFmode moves, we don't need to fix it if each half of the
3341 CONST_DOUBLE input value is CONST_OK_FOR_I08. For a SFmode move, we don't
3342 need to fix it if the input value is CONST_OK_FOR_I08. */
3344 static int
3345 broken_move (rtx insn)
3347 if (GET_CODE (insn) == INSN)
3349 rtx pat = PATTERN (insn);
3350 if (GET_CODE (pat) == PARALLEL)
3351 pat = XVECEXP (pat, 0, 0);
3352 if (GET_CODE (pat) == SET
3353 /* We can load any 8 bit value if we don't care what the high
3354 order bits end up as. */
3355 && GET_MODE (SET_DEST (pat)) != QImode
3356 && (CONSTANT_P (SET_SRC (pat))
3357 /* Match mova_const. */
3358 || (GET_CODE (SET_SRC (pat)) == UNSPEC
3359 && XINT (SET_SRC (pat), 1) == UNSPEC_MOVA
3360 && GET_CODE (XVECEXP (SET_SRC (pat), 0, 0)) == CONST))
3361 && ! (TARGET_SH2E
3362 && GET_CODE (SET_SRC (pat)) == CONST_DOUBLE
3363 && (fp_zero_operand (SET_SRC (pat))
3364 || fp_one_operand (SET_SRC (pat)))
3365 /* ??? If this is a -m4 or -m4-single compilation, in general
3366 we don't know the current setting of fpscr, so disable fldi.
3367 There is an exception if this was a register-register move
3368 before reload - and hence it was ascertained that we have
3369 single precision setting - and in a post-reload optimization
3370 we changed this to do a constant load. In that case
3371 we don't have an r0 clobber, hence we must use fldi. */
3372 && (! TARGET_SH4 || TARGET_FMOVD
3373 || (GET_CODE (XEXP (XVECEXP (PATTERN (insn), 0, 2), 0))
3374 == SCRATCH))
3375 && GET_CODE (SET_DEST (pat)) == REG
3376 && FP_REGISTER_P (REGNO (SET_DEST (pat))))
3377 && ! (TARGET_SH2A
3378 && GET_MODE (SET_DEST (pat)) == SImode
3379 && GET_CODE (SET_SRC (pat)) == CONST_INT
3380 && CONST_OK_FOR_I20 (INTVAL (SET_SRC (pat))))
3381 && (GET_CODE (SET_SRC (pat)) != CONST_INT
3382 || ! CONST_OK_FOR_I08 (INTVAL (SET_SRC (pat)))))
3383 return 1;
3386 return 0;
3389 static int
3390 mova_p (rtx insn)
3392 return (GET_CODE (insn) == INSN
3393 && GET_CODE (PATTERN (insn)) == SET
3394 && GET_CODE (SET_SRC (PATTERN (insn))) == UNSPEC
3395 && XINT (SET_SRC (PATTERN (insn)), 1) == UNSPEC_MOVA
3396 /* Don't match mova_const. */
3397 && GET_CODE (XVECEXP (SET_SRC (PATTERN (insn)), 0, 0)) == LABEL_REF);
3400 /* Fix up a mova from a switch that went out of range. */
3401 static void
3402 fixup_mova (rtx mova)
3404 if (! flag_pic)
3406 SET_SRC (PATTERN (mova)) = XVECEXP (SET_SRC (PATTERN (mova)), 0, 0);
3407 INSN_CODE (mova) = -1;
3409 else
3411 rtx worker = mova;
3412 rtx lab = gen_label_rtx ();
3413 rtx wpat, wpat0, wpat1, wsrc, diff;
3417 worker = NEXT_INSN (worker);
3418 gcc_assert (worker
3419 && GET_CODE (worker) != CODE_LABEL
3420 && GET_CODE (worker) != JUMP_INSN);
3421 } while (GET_CODE (worker) == NOTE
3422 || recog_memoized (worker) != CODE_FOR_casesi_worker_1);
3423 wpat = PATTERN (worker);
3424 wpat0 = XVECEXP (wpat, 0, 0);
3425 wpat1 = XVECEXP (wpat, 0, 1);
3426 wsrc = SET_SRC (wpat0);
3427 PATTERN (worker) = (gen_casesi_worker_2
3428 (SET_DEST (wpat0), XVECEXP (wsrc, 0, 1),
3429 XEXP (XVECEXP (wsrc, 0, 2), 0), lab,
3430 XEXP (wpat1, 0)));
3431 INSN_CODE (worker) = -1;
3432 diff = gen_rtx_MINUS (Pmode, XVECEXP (SET_SRC (PATTERN (mova)), 0, 0),
3433 gen_rtx_LABEL_REF (Pmode, lab));
3434 diff = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, diff), UNSPEC_PIC);
3435 SET_SRC (PATTERN (mova)) = gen_rtx_CONST (Pmode, diff);
3436 INSN_CODE (mova) = -1;
3440 /* Find the last barrier from insn FROM which is close enough to hold the
3441 constant pool. If we can't find one, then create one near the end of
3442 the range. */
3444 static rtx
3445 find_barrier (int num_mova, rtx mova, rtx from)
3447 int count_si = 0;
3448 int count_hi = 0;
3449 int found_hi = 0;
3450 int found_si = 0;
3451 int found_di = 0;
3452 int hi_align = 2;
3453 int si_align = 2;
3454 int leading_mova = num_mova;
3455 rtx barrier_before_mova = 0, found_barrier = 0, good_barrier = 0;
3456 int si_limit;
3457 int hi_limit;
3459 /* For HImode: range is 510, add 4 because pc counts from address of
3460 second instruction after this one, subtract 2 for the jump instruction
3461 that we may need to emit before the table, subtract 2 for the instruction
3462 that fills the jump delay slot (in very rare cases, reorg will take an
3463 instruction from after the constant pool or will leave the delay slot
3464 empty). This gives 510.
3465 For SImode: range is 1020, add 4 because pc counts from address of
3466 second instruction after this one, subtract 2 in case pc is 2 byte
3467 aligned, subtract 2 for the jump instruction that we may need to emit
3468 before the table, subtract 2 for the instruction that fills the jump
3469 delay slot. This gives 1018. */
3471 /* The branch will always be shortened now that the reference address for
3472 forward branches is the successor address, thus we need no longer make
3473 adjustments to the [sh]i_limit for -O0. */
3475 si_limit = 1018;
3476 hi_limit = 510;
3478 while (from && count_si < si_limit && count_hi < hi_limit)
3480 int inc = get_attr_length (from);
3481 int new_align = 1;
3483 if (GET_CODE (from) == CODE_LABEL)
3485 if (optimize)
3486 new_align = 1 << label_to_alignment (from);
3487 else if (GET_CODE (prev_nonnote_insn (from)) == BARRIER)
3488 new_align = 1 << barrier_align (from);
3489 else
3490 new_align = 1;
3491 inc = 0;
3494 if (GET_CODE (from) == BARRIER)
3497 found_barrier = from;
3499 /* If we are at the end of the function, or in front of an alignment
3500 instruction, we need not insert an extra alignment. We prefer
3501 this kind of barrier. */
3502 if (barrier_align (from) > 2)
3503 good_barrier = from;
3506 if (broken_move (from))
3508 rtx pat, src, dst;
3509 enum machine_mode mode;
3511 pat = PATTERN (from);
3512 if (GET_CODE (pat) == PARALLEL)
3513 pat = XVECEXP (pat, 0, 0);
3514 src = SET_SRC (pat);
3515 dst = SET_DEST (pat);
3516 mode = GET_MODE (dst);
3518 /* We must explicitly check the mode, because sometimes the
3519 front end will generate code to load unsigned constants into
3520 HImode targets without properly sign extending them. */
3521 if (mode == HImode
3522 || (mode == SImode && hi_const (src) && REGNO (dst) != FPUL_REG))
3524 found_hi += 2;
3525 /* We put the short constants before the long constants, so
3526 we must count the length of short constants in the range
3527 for the long constants. */
3528 /* ??? This isn't optimal, but is easy to do. */
3529 si_limit -= 2;
3531 else
3533 /* We dump DF/DI constants before SF/SI ones, because
3534 the limit is the same, but the alignment requirements
3535 are higher. We may waste up to 4 additional bytes
3536 for alignment, and the DF/DI constant may have
3537 another SF/SI constant placed before it. */
3538 if (TARGET_SHCOMPACT
3539 && ! found_di
3540 && (mode == DFmode || mode == DImode))
3542 found_di = 1;
3543 si_limit -= 8;
3545 while (si_align > 2 && found_si + si_align - 2 > count_si)
3546 si_align >>= 1;
3547 if (found_si > count_si)
3548 count_si = found_si;
3549 found_si += GET_MODE_SIZE (mode);
3550 if (num_mova)
3551 si_limit -= GET_MODE_SIZE (mode);
3555 if (mova_p (from))
3557 if (! num_mova++)
3559 leading_mova = 0;
3560 mova = from;
3561 barrier_before_mova = good_barrier ? good_barrier : found_barrier;
3563 if (found_si > count_si)
3564 count_si = found_si;
3566 else if (GET_CODE (from) == JUMP_INSN
3567 && (GET_CODE (PATTERN (from)) == ADDR_VEC
3568 || GET_CODE (PATTERN (from)) == ADDR_DIFF_VEC))
3570 if (num_mova)
3571 num_mova--;
3572 if (barrier_align (next_real_insn (from)) == align_jumps_log)
3574 /* We have just passed the barrier in front of the
3575 ADDR_DIFF_VEC, which is stored in found_barrier. Since
3576 the ADDR_DIFF_VEC is accessed as data, just like our pool
3577 constants, this is a good opportunity to accommodate what
3578 we have gathered so far.
3579 If we waited any longer, we could end up at a barrier in
3580 front of code, which gives worse cache usage for separated
3581 instruction / data caches. */
3582 good_barrier = found_barrier;
3583 break;
3585 else
3587 rtx body = PATTERN (from);
3588 inc = XVECLEN (body, 1) * GET_MODE_SIZE (GET_MODE (body));
3591 /* For the SH1, we generate alignments even after jumps-around-jumps. */
3592 else if (GET_CODE (from) == JUMP_INSN
3593 && ! TARGET_SH2
3594 && ! TARGET_SMALLCODE)
3595 new_align = 4;
3597 if (found_si)
3599 count_si += inc;
3600 if (new_align > si_align)
3602 si_limit -= (count_si - 1) & (new_align - si_align);
3603 si_align = new_align;
3605 count_si = (count_si + new_align - 1) & -new_align;
3607 if (found_hi)
3609 count_hi += inc;
3610 if (new_align > hi_align)
3612 hi_limit -= (count_hi - 1) & (new_align - hi_align);
3613 hi_align = new_align;
3615 count_hi = (count_hi + new_align - 1) & -new_align;
3617 from = NEXT_INSN (from);
3620 if (num_mova)
3622 if (leading_mova)
3624 /* Try as we might, the leading mova is out of range. Change
3625 it into a load (which will become a pcload) and retry. */
3626 fixup_mova (mova);
3627 return find_barrier (0, 0, mova);
3629 else
3631 /* Insert the constant pool table before the mova instruction,
3632 to prevent the mova label reference from going out of range. */
3633 from = mova;
3634 good_barrier = found_barrier = barrier_before_mova;
3638 if (found_barrier)
3640 if (good_barrier && next_real_insn (found_barrier))
3641 found_barrier = good_barrier;
3643 else
3645 /* We didn't find a barrier in time to dump our stuff,
3646 so we'll make one. */
3647 rtx label = gen_label_rtx ();
3649 /* If we exceeded the range, then we must back up over the last
3650 instruction we looked at. Otherwise, we just need to undo the
3651 NEXT_INSN at the end of the loop. */
3652 if (count_hi > hi_limit || count_si > si_limit)
3653 from = PREV_INSN (PREV_INSN (from));
3654 else
3655 from = PREV_INSN (from);
3657 /* Walk back to be just before any jump or label.
3658 Putting it before a label reduces the number of times the branch
3659 around the constant pool table will be hit. Putting it before
3660 a jump makes it more likely that the bra delay slot will be
3661 filled. */
3662 while (GET_CODE (from) == JUMP_INSN || GET_CODE (from) == NOTE
3663 || GET_CODE (from) == CODE_LABEL)
3664 from = PREV_INSN (from);
3666 from = emit_jump_insn_after (gen_jump (label), from);
3667 JUMP_LABEL (from) = label;
3668 LABEL_NUSES (label) = 1;
3669 found_barrier = emit_barrier_after (from);
3670 emit_label_after (label, found_barrier);
3673 return found_barrier;
3676 /* If the instruction INSN is implemented by a special function, and we can
3677 positively find the register that is used to call the sfunc, and this
3678 register is not used anywhere else in this instruction - except as the
3679 destination of a set, return this register; else, return 0. */
3681 sfunc_uses_reg (rtx insn)
3683 int i;
3684 rtx pattern, part, reg_part, reg;
3686 if (GET_CODE (insn) != INSN)
3687 return 0;
3688 pattern = PATTERN (insn);
3689 if (GET_CODE (pattern) != PARALLEL || get_attr_type (insn) != TYPE_SFUNC)
3690 return 0;
3692 for (reg_part = 0, i = XVECLEN (pattern, 0) - 1; i >= 1; i--)
3694 part = XVECEXP (pattern, 0, i);
3695 if (GET_CODE (part) == USE && GET_MODE (XEXP (part, 0)) == SImode)
3696 reg_part = part;
3698 if (! reg_part)
3699 return 0;
3700 reg = XEXP (reg_part, 0);
3701 for (i = XVECLEN (pattern, 0) - 1; i >= 0; i--)
3703 part = XVECEXP (pattern, 0, i);
3704 if (part == reg_part || GET_CODE (part) == CLOBBER)
3705 continue;
3706 if (reg_mentioned_p (reg, ((GET_CODE (part) == SET
3707 && GET_CODE (SET_DEST (part)) == REG)
3708 ? SET_SRC (part) : part)))
3709 return 0;
3711 return reg;
3714 /* See if the only way in which INSN uses REG is by calling it, or by
3715 setting it while calling it. Set *SET to a SET rtx if the register
3716 is set by INSN. */
3718 static int
3719 noncall_uses_reg (rtx reg, rtx insn, rtx *set)
3721 rtx pattern, reg2;
3723 *set = NULL_RTX;
3725 reg2 = sfunc_uses_reg (insn);
3726 if (reg2 && REGNO (reg2) == REGNO (reg))
3728 pattern = single_set (insn);
3729 if (pattern
3730 && GET_CODE (SET_DEST (pattern)) == REG
3731 && REGNO (reg) == REGNO (SET_DEST (pattern)))
3732 *set = pattern;
3733 return 0;
3735 if (GET_CODE (insn) != CALL_INSN)
3737 /* We don't use rtx_equal_p because we don't care if the mode is
3738 different. */
3739 pattern = single_set (insn);
3740 if (pattern
3741 && GET_CODE (SET_DEST (pattern)) == REG
3742 && REGNO (reg) == REGNO (SET_DEST (pattern)))
3744 rtx par, part;
3745 int i;
3747 *set = pattern;
3748 par = PATTERN (insn);
3749 if (GET_CODE (par) == PARALLEL)
3750 for (i = XVECLEN (par, 0) - 1; i >= 0; i--)
3752 part = XVECEXP (par, 0, i);
3753 if (GET_CODE (part) != SET && reg_mentioned_p (reg, part))
3754 return 1;
3756 return reg_mentioned_p (reg, SET_SRC (pattern));
3759 return 1;
3762 pattern = PATTERN (insn);
3764 if (GET_CODE (pattern) == PARALLEL)
3766 int i;
3768 for (i = XVECLEN (pattern, 0) - 1; i >= 1; i--)
3769 if (reg_mentioned_p (reg, XVECEXP (pattern, 0, i)))
3770 return 1;
3771 pattern = XVECEXP (pattern, 0, 0);
3774 if (GET_CODE (pattern) == SET)
3776 if (reg_mentioned_p (reg, SET_DEST (pattern)))
3778 /* We don't use rtx_equal_p, because we don't care if the
3779 mode is different. */
3780 if (GET_CODE (SET_DEST (pattern)) != REG
3781 || REGNO (reg) != REGNO (SET_DEST (pattern)))
3782 return 1;
3784 *set = pattern;
3787 pattern = SET_SRC (pattern);
3790 if (GET_CODE (pattern) != CALL
3791 || GET_CODE (XEXP (pattern, 0)) != MEM
3792 || ! rtx_equal_p (reg, XEXP (XEXP (pattern, 0), 0)))
3793 return 1;
3795 return 0;
3798 /* Given a X, a pattern of an insn or a part of it, return a mask of used
3799 general registers. Bits 0..15 mean that the respective registers
3800 are used as inputs in the instruction. Bits 16..31 mean that the
3801 registers 0..15, respectively, are used as outputs, or are clobbered.
3802 IS_DEST should be set to 16 if X is the destination of a SET, else to 0. */
3804 regs_used (rtx x, int is_dest)
3806 enum rtx_code code;
3807 const char *fmt;
3808 int i, used = 0;
3810 if (! x)
3811 return used;
3812 code = GET_CODE (x);
3813 switch (code)
3815 case REG:
3816 if (REGNO (x) < 16)
3817 return (((1 << HARD_REGNO_NREGS (0, GET_MODE (x))) - 1)
3818 << (REGNO (x) + is_dest));
3819 return 0;
3820 case SUBREG:
3822 rtx y = SUBREG_REG (x);
3824 if (GET_CODE (y) != REG)
3825 break;
3826 if (REGNO (y) < 16)
3827 return (((1 << HARD_REGNO_NREGS (0, GET_MODE (x))) - 1)
3828 << (REGNO (y) +
3829 subreg_regno_offset (REGNO (y),
3830 GET_MODE (y),
3831 SUBREG_BYTE (x),
3832 GET_MODE (x)) + is_dest));
3833 return 0;
3835 case SET:
3836 return regs_used (SET_SRC (x), 0) | regs_used (SET_DEST (x), 16);
3837 case RETURN:
3838 /* If there was a return value, it must have been indicated with USE. */
3839 return 0x00ffff00;
3840 case CLOBBER:
3841 is_dest = 1;
3842 break;
3843 case MEM:
3844 is_dest = 0;
3845 break;
3846 case CALL:
3847 used |= 0x00ff00f0;
3848 break;
3849 default:
3850 break;
3853 fmt = GET_RTX_FORMAT (code);
3855 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
3857 if (fmt[i] == 'E')
3859 register int j;
3860 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
3861 used |= regs_used (XVECEXP (x, i, j), is_dest);
3863 else if (fmt[i] == 'e')
3864 used |= regs_used (XEXP (x, i), is_dest);
3866 return used;
3869 /* Create an instruction that prevents redirection of a conditional branch
3870 to the destination of the JUMP with address ADDR.
3871 If the branch needs to be implemented as an indirect jump, try to find
3872 a scratch register for it.
3873 If NEED_BLOCK is 0, don't do anything unless we need a scratch register.
3874 If any preceding insn that doesn't fit into a delay slot is good enough,
3875 pass 1. Pass 2 if a definite blocking insn is needed.
3876 -1 is used internally to avoid deep recursion.
3877 If a blocking instruction is made or recognized, return it. */
3879 static rtx
3880 gen_block_redirect (rtx jump, int addr, int need_block)
3882 int dead = 0;
3883 rtx prev = prev_nonnote_insn (jump);
3884 rtx dest;
3886 /* First, check if we already have an instruction that satisfies our need. */
3887 if (prev && GET_CODE (prev) == INSN && ! INSN_DELETED_P (prev))
3889 if (INSN_CODE (prev) == CODE_FOR_indirect_jump_scratch)
3890 return prev;
3891 if (GET_CODE (PATTERN (prev)) == USE
3892 || GET_CODE (PATTERN (prev)) == CLOBBER
3893 || get_attr_in_delay_slot (prev) == IN_DELAY_SLOT_YES)
3894 prev = jump;
3895 else if ((need_block &= ~1) < 0)
3896 return prev;
3897 else if (recog_memoized (prev) == CODE_FOR_block_branch_redirect)
3898 need_block = 0;
3900 if (GET_CODE (PATTERN (jump)) == RETURN)
3902 if (! need_block)
3903 return prev;
3904 /* Reorg even does nasty things with return insns that cause branches
3905 to go out of range - see find_end_label and callers. */
3906 return emit_insn_before (gen_block_branch_redirect (const0_rtx) , jump);
3908 /* We can't use JUMP_LABEL here because it might be undefined
3909 when not optimizing. */
3910 dest = XEXP (SET_SRC (PATTERN (jump)), 0);
3911 /* If the branch is out of range, try to find a scratch register for it. */
3912 if (optimize
3913 && (INSN_ADDRESSES (INSN_UID (dest)) - addr + (unsigned) 4092
3914 > 4092 + 4098))
3916 rtx scan;
3917 /* Don't look for the stack pointer as a scratch register,
3918 it would cause trouble if an interrupt occurred. */
3919 unsigned try = 0x7fff, used;
3920 int jump_left = flag_expensive_optimizations + 1;
3922 /* It is likely that the most recent eligible instruction is wanted for
3923 the delay slot. Therefore, find out which registers it uses, and
3924 try to avoid using them. */
3926 for (scan = jump; (scan = PREV_INSN (scan)); )
3928 enum rtx_code code;
3930 if (INSN_DELETED_P (scan))
3931 continue;
3932 code = GET_CODE (scan);
3933 if (code == CODE_LABEL || code == JUMP_INSN)
3934 break;
3935 if (code == INSN
3936 && GET_CODE (PATTERN (scan)) != USE
3937 && GET_CODE (PATTERN (scan)) != CLOBBER
3938 && get_attr_in_delay_slot (scan) == IN_DELAY_SLOT_YES)
3940 try &= ~regs_used (PATTERN (scan), 0);
3941 break;
3944 for (used = dead = 0, scan = JUMP_LABEL (jump);
3945 (scan = NEXT_INSN (scan)); )
3947 enum rtx_code code;
3949 if (INSN_DELETED_P (scan))
3950 continue;
3951 code = GET_CODE (scan);
3952 if (INSN_P (scan))
3954 used |= regs_used (PATTERN (scan), 0);
3955 if (code == CALL_INSN)
3956 used |= regs_used (CALL_INSN_FUNCTION_USAGE (scan), 0);
3957 dead |= (used >> 16) & ~used;
3958 if (dead & try)
3960 dead &= try;
3961 break;
3963 if (code == JUMP_INSN)
3965 if (jump_left-- && simplejump_p (scan))
3966 scan = JUMP_LABEL (scan);
3967 else
3968 break;
3972 /* Mask out the stack pointer again, in case it was
3973 the only 'free' register we have found. */
3974 dead &= 0x7fff;
3976 /* If the immediate destination is still in range, check for possible
3977 threading with a jump beyond the delay slot insn.
3978 Don't check if we are called recursively; the jump has been or will be
3979 checked in a different invocation then. */
3981 else if (optimize && need_block >= 0)
3983 rtx next = next_active_insn (next_active_insn (dest));
3984 if (next && GET_CODE (next) == JUMP_INSN
3985 && GET_CODE (PATTERN (next)) == SET
3986 && recog_memoized (next) == CODE_FOR_jump_compact)
3988 dest = JUMP_LABEL (next);
3989 if (dest
3990 && (INSN_ADDRESSES (INSN_UID (dest)) - addr + (unsigned) 4092
3991 > 4092 + 4098))
3992 gen_block_redirect (next, INSN_ADDRESSES (INSN_UID (next)), -1);
3996 if (dead)
3998 rtx reg = gen_rtx_REG (SImode, exact_log2 (dead & -dead));
4000 /* It would be nice if we could convert the jump into an indirect
4001 jump / far branch right now, and thus exposing all constituent
4002 instructions to further optimization. However, reorg uses
4003 simplejump_p to determine if there is an unconditional jump where
4004 it should try to schedule instructions from the target of the
4005 branch; simplejump_p fails for indirect jumps even if they have
4006 a JUMP_LABEL. */
4007 rtx insn = emit_insn_before (gen_indirect_jump_scratch
4008 (reg, GEN_INT (INSN_UID (JUMP_LABEL (jump))))
4009 , jump);
4010 /* ??? We would like this to have the scope of the jump, but that
4011 scope will change when a delay slot insn of an inner scope is added.
4012 Hence, after delay slot scheduling, we'll have to expect
4013 NOTE_INSN_BLOCK_END notes between the indirect_jump_scratch and
4014 the jump. */
4016 INSN_LOCATOR (insn) = INSN_LOCATOR (jump);
4017 INSN_CODE (insn) = CODE_FOR_indirect_jump_scratch;
4018 return insn;
4020 else if (need_block)
4021 /* We can't use JUMP_LABEL here because it might be undefined
4022 when not optimizing. */
4023 return emit_insn_before (gen_block_branch_redirect
4024 (GEN_INT (INSN_UID (XEXP (SET_SRC (PATTERN (jump)), 0))))
4025 , jump);
4026 return prev;
4029 #define CONDJUMP_MIN -252
4030 #define CONDJUMP_MAX 262
4031 struct far_branch
4033 /* A label (to be placed) in front of the jump
4034 that jumps to our ultimate destination. */
4035 rtx near_label;
4036 /* Where we are going to insert it if we cannot move the jump any farther,
4037 or the jump itself if we have picked up an existing jump. */
4038 rtx insert_place;
4039 /* The ultimate destination. */
4040 rtx far_label;
4041 struct far_branch *prev;
4042 /* If the branch has already been created, its address;
4043 else the address of its first prospective user. */
4044 int address;
4047 static void gen_far_branch (struct far_branch *);
4048 enum mdep_reorg_phase_e mdep_reorg_phase;
4049 static void
4050 gen_far_branch (struct far_branch *bp)
4052 rtx insn = bp->insert_place;
4053 rtx jump;
4054 rtx label = gen_label_rtx ();
4055 int ok;
4057 emit_label_after (label, insn);
4058 if (bp->far_label)
4060 jump = emit_jump_insn_after (gen_jump (bp->far_label), insn);
4061 LABEL_NUSES (bp->far_label)++;
4063 else
4064 jump = emit_jump_insn_after (gen_return (), insn);
4065 /* Emit a barrier so that reorg knows that any following instructions
4066 are not reachable via a fall-through path.
4067 But don't do this when not optimizing, since we wouldn't suppress the
4068 alignment for the barrier then, and could end up with out-of-range
4069 pc-relative loads. */
4070 if (optimize)
4071 emit_barrier_after (jump);
4072 emit_label_after (bp->near_label, insn);
4073 JUMP_LABEL (jump) = bp->far_label;
4074 ok = invert_jump (insn, label, 1);
4075 gcc_assert (ok);
4077 /* If we are branching around a jump (rather than a return), prevent
4078 reorg from using an insn from the jump target as the delay slot insn -
4079 when reorg did this, it pessimized code (we rather hide the delay slot)
4080 and it could cause branches to go out of range. */
4081 if (bp->far_label)
4082 (emit_insn_after
4083 (gen_stuff_delay_slot
4084 (GEN_INT (INSN_UID (XEXP (SET_SRC (PATTERN (jump)), 0))),
4085 GEN_INT (recog_memoized (insn) == CODE_FOR_branch_false)),
4086 insn));
4087 /* Prevent reorg from undoing our splits. */
4088 gen_block_redirect (jump, bp->address += 2, 2);
4091 /* Fix up ADDR_DIFF_VECs. */
4092 void
4093 fixup_addr_diff_vecs (rtx first)
4095 rtx insn;
4097 for (insn = first; insn; insn = NEXT_INSN (insn))
4099 rtx vec_lab, pat, prev, prevpat, x, braf_label;
4101 if (GET_CODE (insn) != JUMP_INSN
4102 || GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
4103 continue;
4104 pat = PATTERN (insn);
4105 vec_lab = XEXP (XEXP (pat, 0), 0);
4107 /* Search the matching casesi_jump_2. */
4108 for (prev = vec_lab; ; prev = PREV_INSN (prev))
4110 if (GET_CODE (prev) != JUMP_INSN)
4111 continue;
4112 prevpat = PATTERN (prev);
4113 if (GET_CODE (prevpat) != PARALLEL || XVECLEN (prevpat, 0) != 2)
4114 continue;
4115 x = XVECEXP (prevpat, 0, 1);
4116 if (GET_CODE (x) != USE)
4117 continue;
4118 x = XEXP (x, 0);
4119 if (GET_CODE (x) == LABEL_REF && XEXP (x, 0) == vec_lab)
4120 break;
4122 /* FIXME: This is a bug in the optimizer, but it seems harmless
4123 to just avoid panicing. */
4124 if (!prev)
4125 continue;
4127 /* Emit the reference label of the braf where it belongs, right after
4128 the casesi_jump_2 (i.e. braf). */
4129 braf_label = XEXP (XEXP (SET_SRC (XVECEXP (prevpat, 0, 0)), 1), 0);
4130 emit_label_after (braf_label, prev);
4132 /* Fix up the ADDR_DIF_VEC to be relative
4133 to the reference address of the braf. */
4134 XEXP (XEXP (pat, 0), 0) = braf_label;
4138 /* BARRIER_OR_LABEL is either a BARRIER or a CODE_LABEL immediately following
4139 a barrier. Return the base 2 logarithm of the desired alignment. */
4141 barrier_align (rtx barrier_or_label)
4143 rtx next = next_real_insn (barrier_or_label), pat, prev;
4144 int slot, credit, jump_to_next = 0;
4146 if (! next)
4147 return 0;
4149 pat = PATTERN (next);
4151 if (GET_CODE (pat) == ADDR_DIFF_VEC)
4152 return 2;
4154 if (GET_CODE (pat) == UNSPEC_VOLATILE && XINT (pat, 1) == UNSPECV_ALIGN)
4155 /* This is a barrier in front of a constant table. */
4156 return 0;
4158 prev = prev_real_insn (barrier_or_label);
4159 if (GET_CODE (PATTERN (prev)) == ADDR_DIFF_VEC)
4161 pat = PATTERN (prev);
4162 /* If this is a very small table, we want to keep the alignment after
4163 the table to the minimum for proper code alignment. */
4164 return ((TARGET_SMALLCODE
4165 || ((unsigned) XVECLEN (pat, 1) * GET_MODE_SIZE (GET_MODE (pat))
4166 <= (unsigned) 1 << (CACHE_LOG - 2)))
4167 ? 1 << TARGET_SHMEDIA : align_jumps_log);
4170 if (TARGET_SMALLCODE)
4171 return 0;
4173 if (! TARGET_SH2 || ! optimize)
4174 return align_jumps_log;
4176 /* When fixing up pcloads, a constant table might be inserted just before
4177 the basic block that ends with the barrier. Thus, we can't trust the
4178 instruction lengths before that. */
4179 if (mdep_reorg_phase > SH_FIXUP_PCLOAD)
4181 /* Check if there is an immediately preceding branch to the insn beyond
4182 the barrier. We must weight the cost of discarding useful information
4183 from the current cache line when executing this branch and there is
4184 an alignment, against that of fetching unneeded insn in front of the
4185 branch target when there is no alignment. */
4187 /* There are two delay_slot cases to consider. One is the simple case
4188 where the preceding branch is to the insn beyond the barrier (simple
4189 delay slot filling), and the other is where the preceding branch has
4190 a delay slot that is a duplicate of the insn after the barrier
4191 (fill_eager_delay_slots) and the branch is to the insn after the insn
4192 after the barrier. */
4194 /* PREV is presumed to be the JUMP_INSN for the barrier under
4195 investigation. Skip to the insn before it. */
4196 prev = prev_real_insn (prev);
4198 for (slot = 2, credit = (1 << (CACHE_LOG - 2)) + 2;
4199 credit >= 0 && prev && GET_CODE (prev) == INSN;
4200 prev = prev_real_insn (prev))
4202 jump_to_next = 0;
4203 if (GET_CODE (PATTERN (prev)) == USE
4204 || GET_CODE (PATTERN (prev)) == CLOBBER)
4205 continue;
4206 if (GET_CODE (PATTERN (prev)) == SEQUENCE)
4208 prev = XVECEXP (PATTERN (prev), 0, 1);
4209 if (INSN_UID (prev) == INSN_UID (next))
4211 /* Delay slot was filled with insn at jump target. */
4212 jump_to_next = 1;
4213 continue;
4217 if (slot &&
4218 get_attr_in_delay_slot (prev) == IN_DELAY_SLOT_YES)
4219 slot = 0;
4220 credit -= get_attr_length (prev);
4222 if (prev
4223 && GET_CODE (prev) == JUMP_INSN
4224 && JUMP_LABEL (prev))
4226 rtx x;
4227 if (jump_to_next
4228 || next_real_insn (JUMP_LABEL (prev)) == next
4229 /* If relax_delay_slots() decides NEXT was redundant
4230 with some previous instruction, it will have
4231 redirected PREV's jump to the following insn. */
4232 || JUMP_LABEL (prev) == next_nonnote_insn (next)
4233 /* There is no upper bound on redundant instructions
4234 that might have been skipped, but we must not put an
4235 alignment where none had been before. */
4236 || (x = (NEXT_INSN (NEXT_INSN (PREV_INSN (prev)))),
4237 (INSN_P (x)
4238 && (INSN_CODE (x) == CODE_FOR_block_branch_redirect
4239 || INSN_CODE (x) == CODE_FOR_indirect_jump_scratch
4240 || INSN_CODE (x) == CODE_FOR_stuff_delay_slot))))
4242 rtx pat = PATTERN (prev);
4243 if (GET_CODE (pat) == PARALLEL)
4244 pat = XVECEXP (pat, 0, 0);
4245 if (credit - slot >= (GET_CODE (SET_SRC (pat)) == PC ? 2 : 0))
4246 return 0;
4251 return align_jumps_log;
4254 /* If we are inside a phony loop, almost any kind of label can turn up as the
4255 first one in the loop. Aligning a braf label causes incorrect switch
4256 destination addresses; we can detect braf labels because they are
4257 followed by a BARRIER.
4258 Applying loop alignment to small constant or switch tables is a waste
4259 of space, so we suppress this too. */
4261 sh_loop_align (rtx label)
4263 rtx next = label;
4266 next = next_nonnote_insn (next);
4267 while (next && GET_CODE (next) == CODE_LABEL);
4269 if (! next
4270 || ! INSN_P (next)
4271 || GET_CODE (PATTERN (next)) == ADDR_DIFF_VEC
4272 || recog_memoized (next) == CODE_FOR_consttable_2)
4273 return 0;
4275 return align_loops_log;
4278 /* Do a final pass over the function, just before delayed branch
4279 scheduling. */
4281 static void
4282 sh_reorg (void)
4284 rtx first, insn, mova = NULL_RTX;
4285 int num_mova;
4286 rtx r0_rtx = gen_rtx_REG (Pmode, 0);
4287 rtx r0_inc_rtx = gen_rtx_POST_INC (Pmode, r0_rtx);
4289 first = get_insns ();
4291 /* We must split call insns before introducing `mova's. If we're
4292 optimizing, they'll have already been split. Otherwise, make
4293 sure we don't split them too late. */
4294 if (! optimize)
4295 split_all_insns_noflow ();
4297 if (TARGET_SHMEDIA)
4298 return;
4300 /* If relaxing, generate pseudo-ops to associate function calls with
4301 the symbols they call. It does no harm to not generate these
4302 pseudo-ops. However, when we can generate them, it enables to
4303 linker to potentially relax the jsr to a bsr, and eliminate the
4304 register load and, possibly, the constant pool entry. */
4306 mdep_reorg_phase = SH_INSERT_USES_LABELS;
4307 if (TARGET_RELAX)
4309 /* Remove all REG_LABEL notes. We want to use them for our own
4310 purposes. This works because none of the remaining passes
4311 need to look at them.
4313 ??? But it may break in the future. We should use a machine
4314 dependent REG_NOTE, or some other approach entirely. */
4315 for (insn = first; insn; insn = NEXT_INSN (insn))
4317 if (INSN_P (insn))
4319 rtx note;
4321 while ((note = find_reg_note (insn, REG_LABEL, NULL_RTX)) != 0)
4322 remove_note (insn, note);
4326 for (insn = first; insn; insn = NEXT_INSN (insn))
4328 rtx pattern, reg, link, set, scan, dies, label;
4329 int rescan = 0, foundinsn = 0;
4331 if (GET_CODE (insn) == CALL_INSN)
4333 pattern = PATTERN (insn);
4335 if (GET_CODE (pattern) == PARALLEL)
4336 pattern = XVECEXP (pattern, 0, 0);
4337 if (GET_CODE (pattern) == SET)
4338 pattern = SET_SRC (pattern);
4340 if (GET_CODE (pattern) != CALL
4341 || GET_CODE (XEXP (pattern, 0)) != MEM)
4342 continue;
4344 reg = XEXP (XEXP (pattern, 0), 0);
4346 else
4348 reg = sfunc_uses_reg (insn);
4349 if (! reg)
4350 continue;
4353 if (GET_CODE (reg) != REG)
4354 continue;
4356 /* This is a function call via REG. If the only uses of REG
4357 between the time that it is set and the time that it dies
4358 are in function calls, then we can associate all the
4359 function calls with the setting of REG. */
4361 for (link = LOG_LINKS (insn); link; link = XEXP (link, 1))
4363 if (REG_NOTE_KIND (link) != 0)
4364 continue;
4365 set = single_set (XEXP (link, 0));
4366 if (set && rtx_equal_p (reg, SET_DEST (set)))
4368 link = XEXP (link, 0);
4369 break;
4373 if (! link)
4375 /* ??? Sometimes global register allocation will have
4376 deleted the insn pointed to by LOG_LINKS. Try
4377 scanning backward to find where the register is set. */
4378 for (scan = PREV_INSN (insn);
4379 scan && GET_CODE (scan) != CODE_LABEL;
4380 scan = PREV_INSN (scan))
4382 if (! INSN_P (scan))
4383 continue;
4385 if (! reg_mentioned_p (reg, scan))
4386 continue;
4388 if (noncall_uses_reg (reg, scan, &set))
4389 break;
4391 if (set)
4393 link = scan;
4394 break;
4399 if (! link)
4400 continue;
4402 /* The register is set at LINK. */
4404 /* We can only optimize the function call if the register is
4405 being set to a symbol. In theory, we could sometimes
4406 optimize calls to a constant location, but the assembler
4407 and linker do not support that at present. */
4408 if (GET_CODE (SET_SRC (set)) != SYMBOL_REF
4409 && GET_CODE (SET_SRC (set)) != LABEL_REF)
4410 continue;
4412 /* Scan forward from LINK to the place where REG dies, and
4413 make sure that the only insns which use REG are
4414 themselves function calls. */
4416 /* ??? This doesn't work for call targets that were allocated
4417 by reload, since there may not be a REG_DEAD note for the
4418 register. */
4420 dies = NULL_RTX;
4421 for (scan = NEXT_INSN (link); scan; scan = NEXT_INSN (scan))
4423 rtx scanset;
4425 /* Don't try to trace forward past a CODE_LABEL if we haven't
4426 seen INSN yet. Ordinarily, we will only find the setting insn
4427 in LOG_LINKS if it is in the same basic block. However,
4428 cross-jumping can insert code labels in between the load and
4429 the call, and can result in situations where a single call
4430 insn may have two targets depending on where we came from. */
4432 if (GET_CODE (scan) == CODE_LABEL && ! foundinsn)
4433 break;
4435 if (! INSN_P (scan))
4436 continue;
4438 /* Don't try to trace forward past a JUMP. To optimize
4439 safely, we would have to check that all the
4440 instructions at the jump destination did not use REG. */
4442 if (GET_CODE (scan) == JUMP_INSN)
4443 break;
4445 if (! reg_mentioned_p (reg, scan))
4446 continue;
4448 if (noncall_uses_reg (reg, scan, &scanset))
4449 break;
4451 if (scan == insn)
4452 foundinsn = 1;
4454 if (scan != insn
4455 && (GET_CODE (scan) == CALL_INSN || sfunc_uses_reg (scan)))
4457 /* There is a function call to this register other
4458 than the one we are checking. If we optimize
4459 this call, we need to rescan again below. */
4460 rescan = 1;
4463 /* ??? We shouldn't have to worry about SCANSET here.
4464 We should just be able to check for a REG_DEAD note
4465 on a function call. However, the REG_DEAD notes are
4466 apparently not dependable around libcalls; c-torture
4467 execute/920501-2 is a test case. If SCANSET is set,
4468 then this insn sets the register, so it must have
4469 died earlier. Unfortunately, this will only handle
4470 the cases in which the register is, in fact, set in a
4471 later insn. */
4473 /* ??? We shouldn't have to use FOUNDINSN here.
4474 However, the LOG_LINKS fields are apparently not
4475 entirely reliable around libcalls;
4476 newlib/libm/math/e_pow.c is a test case. Sometimes
4477 an insn will appear in LOG_LINKS even though it is
4478 not the most recent insn which sets the register. */
4480 if (foundinsn
4481 && (scanset
4482 || find_reg_note (scan, REG_DEAD, reg)))
4484 dies = scan;
4485 break;
4489 if (! dies)
4491 /* Either there was a branch, or some insn used REG
4492 other than as a function call address. */
4493 continue;
4496 /* Create a code label, and put it in a REG_LABEL note on
4497 the insn which sets the register, and on each call insn
4498 which uses the register. In final_prescan_insn we look
4499 for the REG_LABEL notes, and output the appropriate label
4500 or pseudo-op. */
4502 label = gen_label_rtx ();
4503 REG_NOTES (link) = gen_rtx_INSN_LIST (REG_LABEL, label,
4504 REG_NOTES (link));
4505 REG_NOTES (insn) = gen_rtx_INSN_LIST (REG_LABEL, label,
4506 REG_NOTES (insn));
4507 if (rescan)
4509 scan = link;
4512 rtx reg2;
4514 scan = NEXT_INSN (scan);
4515 if (scan != insn
4516 && ((GET_CODE (scan) == CALL_INSN
4517 && reg_mentioned_p (reg, scan))
4518 || ((reg2 = sfunc_uses_reg (scan))
4519 && REGNO (reg2) == REGNO (reg))))
4520 REG_NOTES (scan)
4521 = gen_rtx_INSN_LIST (REG_LABEL, label, REG_NOTES (scan));
4523 while (scan != dies);
4528 if (TARGET_SH2)
4529 fixup_addr_diff_vecs (first);
4531 if (optimize)
4533 mdep_reorg_phase = SH_SHORTEN_BRANCHES0;
4534 shorten_branches (first);
4536 /* Scan the function looking for move instructions which have to be
4537 changed to pc-relative loads and insert the literal tables. */
4539 mdep_reorg_phase = SH_FIXUP_PCLOAD;
4540 for (insn = first, num_mova = 0; insn; insn = NEXT_INSN (insn))
4542 if (mova_p (insn))
4544 /* ??? basic block reordering can move a switch table dispatch
4545 below the switch table. Check if that has happened.
4546 We only have the addresses available when optimizing; but then,
4547 this check shouldn't be needed when not optimizing. */
4548 rtx label_ref = XVECEXP (SET_SRC (PATTERN (insn)), 0, 0);
4549 if (optimize
4550 && (INSN_ADDRESSES (INSN_UID (insn))
4551 > INSN_ADDRESSES (INSN_UID (XEXP (label_ref, 0)))))
4553 /* Change the mova into a load.
4554 broken_move will then return true for it. */
4555 fixup_mova (insn);
4557 else if (! num_mova++)
4558 mova = insn;
4560 else if (GET_CODE (insn) == JUMP_INSN
4561 && GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC
4562 && num_mova)
4564 rtx scan;
4565 int total;
4567 num_mova--;
4569 /* Some code might have been inserted between the mova and
4570 its ADDR_DIFF_VEC. Check if the mova is still in range. */
4571 for (scan = mova, total = 0; scan != insn; scan = NEXT_INSN (scan))
4572 total += get_attr_length (scan);
4574 /* range of mova is 1020, add 4 because pc counts from address of
4575 second instruction after this one, subtract 2 in case pc is 2
4576 byte aligned. Possible alignment needed for the ADDR_DIFF_VEC
4577 cancels out with alignment effects of the mova itself. */
4578 if (total > 1022)
4580 /* Change the mova into a load, and restart scanning
4581 there. broken_move will then return true for mova. */
4582 fixup_mova (mova);
4583 insn = mova;
4586 if (broken_move (insn)
4587 || (GET_CODE (insn) == INSN
4588 && recog_memoized (insn) == CODE_FOR_casesi_worker_2))
4590 rtx scan;
4591 /* Scan ahead looking for a barrier to stick the constant table
4592 behind. */
4593 rtx barrier = find_barrier (num_mova, mova, insn);
4594 rtx last_float_move = NULL_RTX, last_float = 0, *last_float_addr = NULL;
4595 int need_aligned_label = 0;
4597 if (num_mova && ! mova_p (mova))
4599 /* find_barrier had to change the first mova into a
4600 pcload; thus, we have to start with this new pcload. */
4601 insn = mova;
4602 num_mova = 0;
4604 /* Now find all the moves between the points and modify them. */
4605 for (scan = insn; scan != barrier; scan = NEXT_INSN (scan))
4607 if (GET_CODE (scan) == CODE_LABEL)
4608 last_float = 0;
4609 if (GET_CODE (scan) == INSN
4610 && recog_memoized (scan) == CODE_FOR_casesi_worker_2)
4611 need_aligned_label = 1;
4612 if (broken_move (scan))
4614 rtx *patp = &PATTERN (scan), pat = *patp;
4615 rtx src, dst;
4616 rtx lab;
4617 rtx newsrc;
4618 enum machine_mode mode;
4620 if (GET_CODE (pat) == PARALLEL)
4621 patp = &XVECEXP (pat, 0, 0), pat = *patp;
4622 src = SET_SRC (pat);
4623 dst = SET_DEST (pat);
4624 mode = GET_MODE (dst);
4626 if (mode == SImode && hi_const (src)
4627 && REGNO (dst) != FPUL_REG)
4629 int offset = 0;
4631 mode = HImode;
4632 while (GET_CODE (dst) == SUBREG)
4634 offset += subreg_regno_offset (REGNO (SUBREG_REG (dst)),
4635 GET_MODE (SUBREG_REG (dst)),
4636 SUBREG_BYTE (dst),
4637 GET_MODE (dst));
4638 dst = SUBREG_REG (dst);
4640 dst = gen_rtx_REG (HImode, REGNO (dst) + offset);
4642 if (GET_CODE (dst) == REG && FP_ANY_REGISTER_P (REGNO (dst)))
4644 /* This must be an insn that clobbers r0. */
4645 rtx *clobberp = &XVECEXP (PATTERN (scan), 0,
4646 XVECLEN (PATTERN (scan), 0)
4647 - 1);
4648 rtx clobber = *clobberp;
4650 gcc_assert (GET_CODE (clobber) == CLOBBER
4651 && rtx_equal_p (XEXP (clobber, 0), r0_rtx));
4653 if (last_float
4654 && reg_set_between_p (r0_rtx, last_float_move, scan))
4655 last_float = 0;
4656 if (last_float
4657 && TARGET_SHCOMPACT
4658 && GET_MODE_SIZE (mode) != 4
4659 && GET_MODE_SIZE (GET_MODE (last_float)) == 4)
4660 last_float = 0;
4661 lab = add_constant (src, mode, last_float);
4662 if (lab)
4663 emit_insn_before (gen_mova (lab), scan);
4664 else
4666 /* There will be a REG_UNUSED note for r0 on
4667 LAST_FLOAT_MOVE; we have to change it to REG_INC,
4668 lest reorg:mark_target_live_regs will not
4669 consider r0 to be used, and we end up with delay
4670 slot insn in front of SCAN that clobbers r0. */
4671 rtx note
4672 = find_regno_note (last_float_move, REG_UNUSED, 0);
4674 /* If we are not optimizing, then there may not be
4675 a note. */
4676 if (note)
4677 PUT_MODE (note, REG_INC);
4679 *last_float_addr = r0_inc_rtx;
4681 last_float_move = scan;
4682 last_float = src;
4683 newsrc = gen_const_mem (mode,
4684 (((TARGET_SH4 && ! TARGET_FMOVD)
4685 || REGNO (dst) == FPUL_REG)
4686 ? r0_inc_rtx
4687 : r0_rtx));
4688 last_float_addr = &XEXP (newsrc, 0);
4690 /* Remove the clobber of r0. */
4691 *clobberp = gen_rtx_CLOBBER (GET_MODE (clobber),
4692 gen_rtx_SCRATCH (Pmode));
4694 /* This is a mova needing a label. Create it. */
4695 else if (GET_CODE (src) == UNSPEC
4696 && XINT (src, 1) == UNSPEC_MOVA
4697 && GET_CODE (XVECEXP (src, 0, 0)) == CONST)
4699 lab = add_constant (XVECEXP (src, 0, 0), mode, 0);
4700 newsrc = gen_rtx_LABEL_REF (VOIDmode, lab);
4701 newsrc = gen_rtx_UNSPEC (SImode,
4702 gen_rtvec (1, newsrc),
4703 UNSPEC_MOVA);
4705 else
4707 lab = add_constant (src, mode, 0);
4708 newsrc = gen_rtx_LABEL_REF (VOIDmode, lab);
4709 newsrc = gen_const_mem (mode, newsrc);
4711 *patp = gen_rtx_SET (VOIDmode, dst, newsrc);
4712 INSN_CODE (scan) = -1;
4715 dump_table (need_aligned_label ? insn : 0, barrier);
4716 insn = barrier;
4720 mdep_reorg_phase = SH_SHORTEN_BRANCHES1;
4721 INSN_ADDRESSES_FREE ();
4722 split_branches (first);
4724 /* The INSN_REFERENCES_ARE_DELAYED in sh.h is problematic because it
4725 also has an effect on the register that holds the address of the sfunc.
4726 Insert an extra dummy insn in front of each sfunc that pretends to
4727 use this register. */
4728 if (flag_delayed_branch)
4730 for (insn = first; insn; insn = NEXT_INSN (insn))
4732 rtx reg = sfunc_uses_reg (insn);
4734 if (! reg)
4735 continue;
4736 emit_insn_before (gen_use_sfunc_addr (reg), insn);
4739 #if 0
4740 /* fpscr is not actually a user variable, but we pretend it is for the
4741 sake of the previous optimization passes, since we want it handled like
4742 one. However, we don't have any debugging information for it, so turn
4743 it into a non-user variable now. */
4744 if (TARGET_SH4)
4745 REG_USERVAR_P (get_fpscr_rtx ()) = 0;
4746 #endif
4747 mdep_reorg_phase = SH_AFTER_MDEP_REORG;
4751 get_dest_uid (rtx label, int max_uid)
4753 rtx dest = next_real_insn (label);
4754 int dest_uid;
4755 if (! dest)
4756 /* This can happen for an undefined label. */
4757 return 0;
4758 dest_uid = INSN_UID (dest);
4759 /* If this is a newly created branch redirection blocking instruction,
4760 we cannot index the branch_uid or insn_addresses arrays with its
4761 uid. But then, we won't need to, because the actual destination is
4762 the following branch. */
4763 while (dest_uid >= max_uid)
4765 dest = NEXT_INSN (dest);
4766 dest_uid = INSN_UID (dest);
4768 if (GET_CODE (dest) == JUMP_INSN && GET_CODE (PATTERN (dest)) == RETURN)
4769 return 0;
4770 return dest_uid;
4773 /* Split condbranches that are out of range. Also add clobbers for
4774 scratch registers that are needed in far jumps.
4775 We do this before delay slot scheduling, so that it can take our
4776 newly created instructions into account. It also allows us to
4777 find branches with common targets more easily. */
4779 static void
4780 split_branches (rtx first)
4782 rtx insn;
4783 struct far_branch **uid_branch, *far_branch_list = 0;
4784 int max_uid = get_max_uid ();
4785 int ok;
4787 /* Find out which branches are out of range. */
4788 shorten_branches (first);
4790 uid_branch = (struct far_branch **) alloca (max_uid * sizeof *uid_branch);
4791 memset ((char *) uid_branch, 0, max_uid * sizeof *uid_branch);
4793 for (insn = first; insn; insn = NEXT_INSN (insn))
4794 if (! INSN_P (insn))
4795 continue;
4796 else if (INSN_DELETED_P (insn))
4798 /* Shorten_branches would split this instruction again,
4799 so transform it into a note. */
4800 PUT_CODE (insn, NOTE);
4801 NOTE_LINE_NUMBER (insn) = NOTE_INSN_DELETED;
4802 NOTE_SOURCE_FILE (insn) = 0;
4804 else if (GET_CODE (insn) == JUMP_INSN
4805 /* Don't mess with ADDR_DIFF_VEC */
4806 && (GET_CODE (PATTERN (insn)) == SET
4807 || GET_CODE (PATTERN (insn)) == RETURN))
4809 enum attr_type type = get_attr_type (insn);
4810 if (type == TYPE_CBRANCH)
4812 rtx next, beyond;
4814 if (get_attr_length (insn) > 4)
4816 rtx src = SET_SRC (PATTERN (insn));
4817 rtx olabel = XEXP (XEXP (src, 1), 0);
4818 int addr = INSN_ADDRESSES (INSN_UID (insn));
4819 rtx label = 0;
4820 int dest_uid = get_dest_uid (olabel, max_uid);
4821 struct far_branch *bp = uid_branch[dest_uid];
4823 /* redirect_jump needs a valid JUMP_LABEL, and it might delete
4824 the label if the LABEL_NUSES count drops to zero. There is
4825 always a jump_optimize pass that sets these values, but it
4826 proceeds to delete unreferenced code, and then if not
4827 optimizing, to un-delete the deleted instructions, thus
4828 leaving labels with too low uses counts. */
4829 if (! optimize)
4831 JUMP_LABEL (insn) = olabel;
4832 LABEL_NUSES (olabel)++;
4834 if (! bp)
4836 bp = (struct far_branch *) alloca (sizeof *bp);
4837 uid_branch[dest_uid] = bp;
4838 bp->prev = far_branch_list;
4839 far_branch_list = bp;
4840 bp->far_label
4841 = XEXP (XEXP (SET_SRC (PATTERN (insn)), 1), 0);
4842 LABEL_NUSES (bp->far_label)++;
4844 else
4846 label = bp->near_label;
4847 if (! label && bp->address - addr >= CONDJUMP_MIN)
4849 rtx block = bp->insert_place;
4851 if (GET_CODE (PATTERN (block)) == RETURN)
4852 block = PREV_INSN (block);
4853 else
4854 block = gen_block_redirect (block,
4855 bp->address, 2);
4856 label = emit_label_after (gen_label_rtx (),
4857 PREV_INSN (block));
4858 bp->near_label = label;
4860 else if (label && ! NEXT_INSN (label))
4862 if (addr + 2 - bp->address <= CONDJUMP_MAX)
4863 bp->insert_place = insn;
4864 else
4865 gen_far_branch (bp);
4868 if (! label
4869 || (NEXT_INSN (label) && bp->address - addr < CONDJUMP_MIN))
4871 bp->near_label = label = gen_label_rtx ();
4872 bp->insert_place = insn;
4873 bp->address = addr;
4875 ok = redirect_jump (insn, label, 1);
4876 gcc_assert (ok);
4878 else
4880 /* get_attr_length (insn) == 2 */
4881 /* Check if we have a pattern where reorg wants to redirect
4882 the branch to a label from an unconditional branch that
4883 is too far away. */
4884 /* We can't use JUMP_LABEL here because it might be undefined
4885 when not optimizing. */
4886 /* A syntax error might cause beyond to be NULL_RTX. */
4887 beyond
4888 = next_active_insn (XEXP (XEXP (SET_SRC (PATTERN (insn)), 1),
4889 0));
4891 if (beyond
4892 && (GET_CODE (beyond) == JUMP_INSN
4893 || ((beyond = next_active_insn (beyond))
4894 && GET_CODE (beyond) == JUMP_INSN))
4895 && GET_CODE (PATTERN (beyond)) == SET
4896 && recog_memoized (beyond) == CODE_FOR_jump_compact
4897 && ((INSN_ADDRESSES
4898 (INSN_UID (XEXP (SET_SRC (PATTERN (beyond)), 0)))
4899 - INSN_ADDRESSES (INSN_UID (insn)) + (unsigned) 252)
4900 > 252 + 258 + 2))
4901 gen_block_redirect (beyond,
4902 INSN_ADDRESSES (INSN_UID (beyond)), 1);
4905 next = next_active_insn (insn);
4907 if ((GET_CODE (next) == JUMP_INSN
4908 || ((next = next_active_insn (next))
4909 && GET_CODE (next) == JUMP_INSN))
4910 && GET_CODE (PATTERN (next)) == SET
4911 && recog_memoized (next) == CODE_FOR_jump_compact
4912 && ((INSN_ADDRESSES
4913 (INSN_UID (XEXP (SET_SRC (PATTERN (next)), 0)))
4914 - INSN_ADDRESSES (INSN_UID (insn)) + (unsigned) 252)
4915 > 252 + 258 + 2))
4916 gen_block_redirect (next, INSN_ADDRESSES (INSN_UID (next)), 1);
4918 else if (type == TYPE_JUMP || type == TYPE_RETURN)
4920 int addr = INSN_ADDRESSES (INSN_UID (insn));
4921 rtx far_label = 0;
4922 int dest_uid = 0;
4923 struct far_branch *bp;
4925 if (type == TYPE_JUMP)
4927 far_label = XEXP (SET_SRC (PATTERN (insn)), 0);
4928 dest_uid = get_dest_uid (far_label, max_uid);
4929 if (! dest_uid)
4931 /* Parse errors can lead to labels outside
4932 the insn stream. */
4933 if (! NEXT_INSN (far_label))
4934 continue;
4936 if (! optimize)
4938 JUMP_LABEL (insn) = far_label;
4939 LABEL_NUSES (far_label)++;
4941 redirect_jump (insn, NULL_RTX, 1);
4942 far_label = 0;
4945 bp = uid_branch[dest_uid];
4946 if (! bp)
4948 bp = (struct far_branch *) alloca (sizeof *bp);
4949 uid_branch[dest_uid] = bp;
4950 bp->prev = far_branch_list;
4951 far_branch_list = bp;
4952 bp->near_label = 0;
4953 bp->far_label = far_label;
4954 if (far_label)
4955 LABEL_NUSES (far_label)++;
4957 else if (bp->near_label && ! NEXT_INSN (bp->near_label))
4958 if (addr - bp->address <= CONDJUMP_MAX)
4959 emit_label_after (bp->near_label, PREV_INSN (insn));
4960 else
4962 gen_far_branch (bp);
4963 bp->near_label = 0;
4965 else
4966 bp->near_label = 0;
4967 bp->address = addr;
4968 bp->insert_place = insn;
4969 if (! far_label)
4970 emit_insn_before (gen_block_branch_redirect (const0_rtx), insn);
4971 else
4972 gen_block_redirect (insn, addr, bp->near_label ? 2 : 0);
4975 /* Generate all pending far branches,
4976 and free our references to the far labels. */
4977 while (far_branch_list)
4979 if (far_branch_list->near_label
4980 && ! NEXT_INSN (far_branch_list->near_label))
4981 gen_far_branch (far_branch_list);
4982 if (optimize
4983 && far_branch_list->far_label
4984 && ! --LABEL_NUSES (far_branch_list->far_label))
4985 delete_insn (far_branch_list->far_label);
4986 far_branch_list = far_branch_list->prev;
4989 /* Instruction length information is no longer valid due to the new
4990 instructions that have been generated. */
4991 init_insn_lengths ();
4994 /* Dump out instruction addresses, which is useful for debugging the
4995 constant pool table stuff.
4997 If relaxing, output the label and pseudo-ops used to link together
4998 calls and the instruction which set the registers. */
5000 /* ??? The addresses printed by this routine for insns are nonsense for
5001 insns which are inside of a sequence where none of the inner insns have
5002 variable length. This is because the second pass of shorten_branches
5003 does not bother to update them. */
5005 void
5006 final_prescan_insn (rtx insn, rtx *opvec ATTRIBUTE_UNUSED,
5007 int noperands ATTRIBUTE_UNUSED)
5009 if (TARGET_DUMPISIZE)
5010 fprintf (asm_out_file, "\n! at %04x\n", INSN_ADDRESSES (INSN_UID (insn)));
5012 if (TARGET_RELAX)
5014 rtx note;
5016 note = find_reg_note (insn, REG_LABEL, NULL_RTX);
5017 if (note)
5019 rtx pattern;
5021 pattern = PATTERN (insn);
5022 if (GET_CODE (pattern) == PARALLEL)
5023 pattern = XVECEXP (pattern, 0, 0);
5024 switch (GET_CODE (pattern))
5026 case SET:
5027 if (GET_CODE (SET_SRC (pattern)) != CALL
5028 && get_attr_type (insn) != TYPE_SFUNC)
5030 targetm.asm_out.internal_label
5031 (asm_out_file, "L", CODE_LABEL_NUMBER (XEXP (note, 0)));
5032 break;
5034 /* else FALLTHROUGH */
5035 case CALL:
5036 asm_fprintf (asm_out_file, "\t.uses %LL%d\n",
5037 CODE_LABEL_NUMBER (XEXP (note, 0)));
5038 break;
5040 default:
5041 gcc_unreachable ();
5047 /* Dump out any constants accumulated in the final pass. These will
5048 only be labels. */
5050 const char *
5051 output_jump_label_table (void)
5053 int i;
5055 if (pool_size)
5057 fprintf (asm_out_file, "\t.align 2\n");
5058 for (i = 0; i < pool_size; i++)
5060 pool_node *p = &pool_vector[i];
5062 (*targetm.asm_out.internal_label) (asm_out_file, "L",
5063 CODE_LABEL_NUMBER (p->label));
5064 output_asm_insn (".long %O0", &p->value);
5066 pool_size = 0;
5069 return "";
5072 /* A full frame looks like:
5074 arg-5
5075 arg-4
5076 [ if current_function_anonymous_args
5077 arg-3
5078 arg-2
5079 arg-1
5080 arg-0 ]
5081 saved-fp
5082 saved-r10
5083 saved-r11
5084 saved-r12
5085 saved-pr
5086 local-n
5088 local-1
5089 local-0 <- fp points here. */
5091 /* Number of bytes pushed for anonymous args, used to pass information
5092 between expand_prologue and expand_epilogue. */
5094 /* Adjust the stack by SIZE bytes. REG holds the rtl of the register to be
5095 adjusted. If epilogue_p is zero, this is for a prologue; otherwise, it's
5096 for an epilogue and a negative value means that it's for a sibcall
5097 epilogue. If LIVE_REGS_MASK is nonzero, it points to a HARD_REG_SET of
5098 all the registers that are about to be restored, and hence dead. */
5100 static void
5101 output_stack_adjust (int size, rtx reg, int epilogue_p,
5102 HARD_REG_SET *live_regs_mask)
5104 rtx (*emit_fn) (rtx) = epilogue_p ? &emit_insn : &frame_insn;
5105 if (size)
5107 HOST_WIDE_INT align = STACK_BOUNDARY / BITS_PER_UNIT;
5109 /* This test is bogus, as output_stack_adjust is used to re-align the
5110 stack. */
5111 #if 0
5112 gcc_assert (!(size % align));
5113 #endif
5115 if (CONST_OK_FOR_ADD (size))
5116 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size)));
5117 /* Try to do it with two partial adjustments; however, we must make
5118 sure that the stack is properly aligned at all times, in case
5119 an interrupt occurs between the two partial adjustments. */
5120 else if (CONST_OK_FOR_ADD (size / 2 & -align)
5121 && CONST_OK_FOR_ADD (size - (size / 2 & -align)))
5123 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size / 2 & -align)));
5124 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size - (size / 2 & -align))));
5126 else
5128 rtx const_reg;
5129 rtx insn;
5130 int temp = epilogue_p ? 7 : (TARGET_SH5 ? 0 : 1);
5131 int i;
5133 /* If TEMP is invalid, we could temporarily save a general
5134 register to MACL. However, there is currently no need
5135 to handle this case, so just die when we see it. */
5136 if (epilogue_p < 0
5137 || current_function_interrupt
5138 || ! call_really_used_regs[temp] || fixed_regs[temp])
5139 temp = -1;
5140 if (temp < 0 && ! current_function_interrupt
5141 && (TARGET_SHMEDIA || epilogue_p >= 0))
5143 HARD_REG_SET temps;
5144 COPY_HARD_REG_SET (temps, call_used_reg_set);
5145 AND_COMPL_HARD_REG_SET (temps, call_fixed_reg_set);
5146 if (epilogue_p > 0)
5148 int nreg = 0;
5149 if (current_function_return_rtx)
5151 enum machine_mode mode;
5152 mode = GET_MODE (current_function_return_rtx);
5153 if (BASE_RETURN_VALUE_REG (mode) == FIRST_RET_REG)
5154 nreg = HARD_REGNO_NREGS (FIRST_RET_REG, mode);
5156 for (i = 0; i < nreg; i++)
5157 CLEAR_HARD_REG_BIT (temps, FIRST_RET_REG + i);
5158 if (current_function_calls_eh_return)
5160 CLEAR_HARD_REG_BIT (temps, EH_RETURN_STACKADJ_REGNO);
5161 for (i = 0; i <= 3; i++)
5162 CLEAR_HARD_REG_BIT (temps, EH_RETURN_DATA_REGNO (i));
5165 if (TARGET_SHMEDIA && epilogue_p < 0)
5166 for (i = FIRST_TARGET_REG; i <= LAST_TARGET_REG; i++)
5167 CLEAR_HARD_REG_BIT (temps, i);
5168 if (epilogue_p <= 0)
5170 for (i = FIRST_PARM_REG;
5171 i < FIRST_PARM_REG + NPARM_REGS (SImode); i++)
5172 CLEAR_HARD_REG_BIT (temps, i);
5173 if (cfun->static_chain_decl != NULL)
5174 CLEAR_HARD_REG_BIT (temps, STATIC_CHAIN_REGNUM);
5176 temp = scavenge_reg (&temps);
5178 if (temp < 0 && live_regs_mask)
5179 temp = scavenge_reg (live_regs_mask);
5180 if (temp < 0)
5182 rtx adj_reg, tmp_reg, mem;
5184 /* If we reached here, the most likely case is the (sibcall)
5185 epilogue for non SHmedia. Put a special push/pop sequence
5186 for such case as the last resort. This looks lengthy but
5187 would not be problem because it seems to be very
5188 rare. */
5190 gcc_assert (!TARGET_SHMEDIA && epilogue_p);
5193 /* ??? There is still the slight possibility that r4 or
5194 r5 have been reserved as fixed registers or assigned
5195 as global registers, and they change during an
5196 interrupt. There are possible ways to handle this:
5198 - If we are adjusting the frame pointer (r14), we can do
5199 with a single temp register and an ordinary push / pop
5200 on the stack.
5201 - Grab any call-used or call-saved registers (i.e. not
5202 fixed or globals) for the temps we need. We might
5203 also grab r14 if we are adjusting the stack pointer.
5204 If we can't find enough available registers, issue
5205 a diagnostic and die - the user must have reserved
5206 way too many registers.
5207 But since all this is rather unlikely to happen and
5208 would require extra testing, we just die if r4 / r5
5209 are not available. */
5210 gcc_assert (!fixed_regs[4] && !fixed_regs[5]
5211 && !global_regs[4] && !global_regs[5]);
5213 adj_reg = gen_rtx_REG (GET_MODE (reg), 4);
5214 tmp_reg = gen_rtx_REG (GET_MODE (reg), 5);
5215 emit_move_insn (gen_tmp_stack_mem (Pmode, reg), adj_reg);
5216 emit_insn (GEN_MOV (adj_reg, GEN_INT (size)));
5217 emit_insn (GEN_ADD3 (adj_reg, adj_reg, reg));
5218 mem = gen_tmp_stack_mem (Pmode, gen_rtx_PRE_DEC (Pmode, adj_reg));
5219 emit_move_insn (mem, tmp_reg);
5220 emit_move_insn (tmp_reg, gen_tmp_stack_mem (Pmode, reg));
5221 mem = gen_tmp_stack_mem (Pmode, gen_rtx_PRE_DEC (Pmode, adj_reg));
5222 emit_move_insn (mem, tmp_reg);
5223 emit_move_insn (reg, adj_reg);
5224 mem = gen_tmp_stack_mem (Pmode, gen_rtx_POST_INC (Pmode, reg));
5225 emit_move_insn (adj_reg, mem);
5226 mem = gen_tmp_stack_mem (Pmode, gen_rtx_POST_INC (Pmode, reg));
5227 emit_move_insn (tmp_reg, mem);
5228 return;
5230 const_reg = gen_rtx_REG (GET_MODE (reg), temp);
5232 /* If SIZE is negative, subtract the positive value.
5233 This sometimes allows a constant pool entry to be shared
5234 between prologue and epilogue code. */
5235 if (size < 0)
5237 emit_insn (GEN_MOV (const_reg, GEN_INT (-size)));
5238 insn = emit_fn (GEN_SUB3 (reg, reg, const_reg));
5240 else
5242 emit_insn (GEN_MOV (const_reg, GEN_INT (size)));
5243 insn = emit_fn (GEN_ADD3 (reg, reg, const_reg));
5245 if (! epilogue_p)
5246 REG_NOTES (insn)
5247 = (gen_rtx_EXPR_LIST
5248 (REG_FRAME_RELATED_EXPR,
5249 gen_rtx_SET (VOIDmode, reg,
5250 gen_rtx_PLUS (SImode, reg, GEN_INT (size))),
5251 REG_NOTES (insn)));
5256 static rtx
5257 frame_insn (rtx x)
5259 x = emit_insn (x);
5260 RTX_FRAME_RELATED_P (x) = 1;
5261 return x;
5264 /* Output RTL to push register RN onto the stack. */
5266 static rtx
5267 push (int rn)
5269 rtx x;
5270 if (rn == FPUL_REG)
5271 x = gen_push_fpul ();
5272 else if (rn == FPSCR_REG)
5273 x = gen_push_fpscr ();
5274 else if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && ! TARGET_FPU_SINGLE
5275 && FP_OR_XD_REGISTER_P (rn))
5277 if (FP_REGISTER_P (rn) && (rn - FIRST_FP_REG) & 1)
5278 return NULL_RTX;
5279 x = gen_push_4 (gen_rtx_REG (DFmode, rn));
5281 else if (TARGET_SH2E && FP_REGISTER_P (rn))
5282 x = gen_push_e (gen_rtx_REG (SFmode, rn));
5283 else
5284 x = gen_push (gen_rtx_REG (SImode, rn));
5286 x = frame_insn (x);
5287 REG_NOTES (x)
5288 = gen_rtx_EXPR_LIST (REG_INC,
5289 gen_rtx_REG (SImode, STACK_POINTER_REGNUM), 0);
5290 return x;
5293 /* Output RTL to pop register RN from the stack. */
5295 static void
5296 pop (int rn)
5298 rtx x;
5299 if (rn == FPUL_REG)
5300 x = gen_pop_fpul ();
5301 else if (rn == FPSCR_REG)
5302 x = gen_pop_fpscr ();
5303 else if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && ! TARGET_FPU_SINGLE
5304 && FP_OR_XD_REGISTER_P (rn))
5306 if (FP_REGISTER_P (rn) && (rn - FIRST_FP_REG) & 1)
5307 return;
5308 x = gen_pop_4 (gen_rtx_REG (DFmode, rn));
5310 else if (TARGET_SH2E && FP_REGISTER_P (rn))
5311 x = gen_pop_e (gen_rtx_REG (SFmode, rn));
5312 else
5313 x = gen_pop (gen_rtx_REG (SImode, rn));
5315 x = emit_insn (x);
5316 REG_NOTES (x)
5317 = gen_rtx_EXPR_LIST (REG_INC,
5318 gen_rtx_REG (SImode, STACK_POINTER_REGNUM), 0);
5321 /* Generate code to push the regs specified in the mask. */
5323 static void
5324 push_regs (HARD_REG_SET *mask, int interrupt_handler)
5326 int i;
5327 int skip_fpscr = 0;
5329 /* Push PR last; this gives better latencies after the prologue, and
5330 candidates for the return delay slot when there are no general
5331 registers pushed. */
5332 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
5334 /* If this is an interrupt handler, and the SZ bit varies,
5335 and we have to push any floating point register, we need
5336 to switch to the correct precision first. */
5337 if (i == FIRST_FP_REG && interrupt_handler && TARGET_FMOVD
5338 && hard_regs_intersect_p (mask, &reg_class_contents[DF_REGS]))
5340 HARD_REG_SET unsaved;
5342 push (FPSCR_REG);
5343 COMPL_HARD_REG_SET (unsaved, *mask);
5344 fpscr_set_from_mem (NORMAL_MODE (FP_MODE), unsaved);
5345 skip_fpscr = 1;
5347 if (i != PR_REG
5348 && (i != FPSCR_REG || ! skip_fpscr)
5349 && TEST_HARD_REG_BIT (*mask, i))
5350 push (i);
5352 if (TEST_HARD_REG_BIT (*mask, PR_REG))
5353 push (PR_REG);
5356 /* Calculate how much extra space is needed to save all callee-saved
5357 target registers.
5358 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
5360 static int
5361 shmedia_target_regs_stack_space (HARD_REG_SET *live_regs_mask)
5363 int reg;
5364 int stack_space = 0;
5365 int interrupt_handler = sh_cfun_interrupt_handler_p ();
5367 for (reg = LAST_TARGET_REG; reg >= FIRST_TARGET_REG; reg--)
5368 if ((! call_really_used_regs[reg] || interrupt_handler)
5369 && ! TEST_HARD_REG_BIT (*live_regs_mask, reg))
5370 /* Leave space to save this target register on the stack,
5371 in case target register allocation wants to use it. */
5372 stack_space += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
5373 return stack_space;
5376 /* Decide whether we should reserve space for callee-save target registers,
5377 in case target register allocation wants to use them. REGS_SAVED is
5378 the space, in bytes, that is already required for register saves.
5379 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
5381 static int
5382 shmedia_reserve_space_for_target_registers_p (int regs_saved,
5383 HARD_REG_SET *live_regs_mask)
5385 if (optimize_size)
5386 return 0;
5387 return shmedia_target_regs_stack_space (live_regs_mask) <= regs_saved;
5390 /* Decide how much space to reserve for callee-save target registers
5391 in case target register allocation wants to use them.
5392 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
5394 static int
5395 shmedia_target_regs_stack_adjust (HARD_REG_SET *live_regs_mask)
5397 if (shmedia_space_reserved_for_target_registers)
5398 return shmedia_target_regs_stack_space (live_regs_mask);
5399 else
5400 return 0;
5403 /* Work out the registers which need to be saved, both as a mask and a
5404 count of saved words. Return the count.
5406 If doing a pragma interrupt function, then push all regs used by the
5407 function, and if we call another function (we can tell by looking at PR),
5408 make sure that all the regs it clobbers are safe too. */
5410 static int
5411 calc_live_regs (HARD_REG_SET *live_regs_mask)
5413 unsigned int reg;
5414 int count;
5415 int interrupt_handler;
5416 int pr_live, has_call;
5418 interrupt_handler = sh_cfun_interrupt_handler_p ();
5420 CLEAR_HARD_REG_SET (*live_regs_mask);
5421 if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && interrupt_handler
5422 && regs_ever_live[FPSCR_REG])
5423 target_flags &= ~MASK_FPU_SINGLE;
5424 /* If we can save a lot of saves by switching to double mode, do that. */
5425 else if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && TARGET_FPU_SINGLE)
5426 for (count = 0, reg = FIRST_FP_REG; reg <= LAST_FP_REG; reg += 2)
5427 if (regs_ever_live[reg] && regs_ever_live[reg+1]
5428 && (! call_really_used_regs[reg]
5429 || (interrupt_handler && ! pragma_trapa))
5430 && ++count > 2)
5432 target_flags &= ~MASK_FPU_SINGLE;
5433 break;
5435 /* PR_MEDIA_REG is a general purpose register, thus global_alloc already
5436 knows how to use it. That means the pseudo originally allocated for
5437 the initial value can become the PR_MEDIA_REG hard register, as seen for
5438 execute/20010122-1.c:test9. */
5439 if (TARGET_SHMEDIA)
5440 /* ??? this function is called from initial_elimination_offset, hence we
5441 can't use the result of sh_media_register_for_return here. */
5442 pr_live = sh_pr_n_sets ();
5443 else
5445 rtx pr_initial = has_hard_reg_initial_val (Pmode, PR_REG);
5446 pr_live = (pr_initial
5447 ? (GET_CODE (pr_initial) != REG
5448 || REGNO (pr_initial) != (PR_REG))
5449 : regs_ever_live[PR_REG]);
5450 /* For Shcompact, if not optimizing, we end up with a memory reference
5451 using the return address pointer for __builtin_return_address even
5452 though there is no actual need to put the PR register on the stack. */
5453 pr_live |= regs_ever_live[RETURN_ADDRESS_POINTER_REGNUM];
5455 /* Force PR to be live if the prologue has to call the SHmedia
5456 argument decoder or register saver. */
5457 if (TARGET_SHCOMPACT
5458 && ((current_function_args_info.call_cookie
5459 & ~ CALL_COOKIE_RET_TRAMP (1))
5460 || current_function_has_nonlocal_label))
5461 pr_live = 1;
5462 has_call = TARGET_SHMEDIA ? ! leaf_function_p () : pr_live;
5463 for (count = 0, reg = FIRST_PSEUDO_REGISTER; reg-- != 0; )
5465 if (reg == (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG)
5466 ? pr_live
5467 : (interrupt_handler && ! pragma_trapa)
5468 ? (/* Need to save all the regs ever live. */
5469 (regs_ever_live[reg]
5470 || (call_really_used_regs[reg]
5471 && (! fixed_regs[reg] || reg == MACH_REG || reg == MACL_REG
5472 || reg == PIC_OFFSET_TABLE_REGNUM)
5473 && has_call)
5474 || (has_call && REGISTER_NATURAL_MODE (reg) == SImode
5475 && (GENERAL_REGISTER_P (reg) || TARGET_REGISTER_P (reg))))
5476 && reg != STACK_POINTER_REGNUM && reg != ARG_POINTER_REGNUM
5477 && reg != RETURN_ADDRESS_POINTER_REGNUM
5478 && reg != T_REG && reg != GBR_REG
5479 /* Push fpscr only on targets which have FPU */
5480 && (reg != FPSCR_REG || TARGET_FPU_ANY))
5481 : (/* Only push those regs which are used and need to be saved. */
5482 (TARGET_SHCOMPACT
5483 && flag_pic
5484 && current_function_args_info.call_cookie
5485 && reg == PIC_OFFSET_TABLE_REGNUM)
5486 || (regs_ever_live[reg] && ! call_really_used_regs[reg])
5487 || (current_function_calls_eh_return
5488 && (reg == EH_RETURN_DATA_REGNO (0)
5489 || reg == EH_RETURN_DATA_REGNO (1)
5490 || reg == EH_RETURN_DATA_REGNO (2)
5491 || reg == EH_RETURN_DATA_REGNO (3)))
5492 || ((reg == MACL_REG || reg == MACH_REG)
5493 && regs_ever_live[reg]
5494 && sh_cfun_attr_renesas_p ())
5497 SET_HARD_REG_BIT (*live_regs_mask, reg);
5498 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
5500 if ((TARGET_SH4 || TARGET_SH2A_DOUBLE || TARGET_SH5) && TARGET_FMOVD
5501 && GET_MODE_CLASS (REGISTER_NATURAL_MODE (reg)) == MODE_FLOAT)
5503 if (FP_REGISTER_P (reg))
5505 if (! TARGET_FPU_SINGLE && ! regs_ever_live[reg ^ 1])
5507 SET_HARD_REG_BIT (*live_regs_mask, (reg ^ 1));
5508 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg ^ 1));
5511 else if (XD_REGISTER_P (reg))
5513 /* Must switch to double mode to access these registers. */
5514 target_flags &= ~MASK_FPU_SINGLE;
5519 /* If we have a target register optimization pass after prologue / epilogue
5520 threading, we need to assume all target registers will be live even if
5521 they aren't now. */
5522 if (flag_branch_target_load_optimize2
5523 && TARGET_SAVE_ALL_TARGET_REGS
5524 && shmedia_space_reserved_for_target_registers)
5525 for (reg = LAST_TARGET_REG; reg >= FIRST_TARGET_REG; reg--)
5526 if ((! call_really_used_regs[reg] || interrupt_handler)
5527 && ! TEST_HARD_REG_BIT (*live_regs_mask, reg))
5529 SET_HARD_REG_BIT (*live_regs_mask, reg);
5530 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
5532 /* If this is an interrupt handler, we don't have any call-clobbered
5533 registers we can conveniently use for target register save/restore.
5534 Make sure we save at least one general purpose register when we need
5535 to save target registers. */
5536 if (interrupt_handler
5537 && hard_regs_intersect_p (live_regs_mask,
5538 &reg_class_contents[TARGET_REGS])
5539 && ! hard_regs_intersect_p (live_regs_mask,
5540 &reg_class_contents[GENERAL_REGS]))
5542 SET_HARD_REG_BIT (*live_regs_mask, R0_REG);
5543 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (R0_REG));
5546 return count;
5549 /* Code to generate prologue and epilogue sequences */
5551 /* PUSHED is the number of bytes that are being pushed on the
5552 stack for register saves. Return the frame size, padded
5553 appropriately so that the stack stays properly aligned. */
5554 static HOST_WIDE_INT
5555 rounded_frame_size (int pushed)
5557 HOST_WIDE_INT size = get_frame_size ();
5558 HOST_WIDE_INT align = STACK_BOUNDARY / BITS_PER_UNIT;
5560 return ((size + pushed + align - 1) & -align) - pushed;
5563 /* Choose a call-clobbered target-branch register that remains
5564 unchanged along the whole function. We set it up as the return
5565 value in the prologue. */
5567 sh_media_register_for_return (void)
5569 int regno;
5570 int tr0_used;
5572 if (! current_function_is_leaf)
5573 return -1;
5574 if (lookup_attribute ("interrupt_handler",
5575 DECL_ATTRIBUTES (current_function_decl)))
5576 return -1;
5577 if (sh_cfun_interrupt_handler_p ())
5578 return -1;
5580 tr0_used = flag_pic && regs_ever_live[PIC_OFFSET_TABLE_REGNUM];
5582 for (regno = FIRST_TARGET_REG + tr0_used; regno <= LAST_TARGET_REG; regno++)
5583 if (call_really_used_regs[regno] && ! regs_ever_live[regno])
5584 return regno;
5586 return -1;
5589 /* The maximum registers we need to save are:
5590 - 62 general purpose registers (r15 is stack pointer, r63 is zero)
5591 - 32 floating point registers (for each pair, we save none,
5592 one single precision value, or a double precision value).
5593 - 8 target registers
5594 - add 1 entry for a delimiter. */
5595 #define MAX_SAVED_REGS (62+32+8)
5597 typedef struct save_entry_s
5599 unsigned char reg;
5600 unsigned char mode;
5601 short offset;
5602 } save_entry;
5604 #define MAX_TEMPS 4
5606 /* There will be a delimiter entry with VOIDmode both at the start and the
5607 end of a filled in schedule. The end delimiter has the offset of the
5608 save with the smallest (i.e. most negative) offset. */
5609 typedef struct save_schedule_s
5611 save_entry entries[MAX_SAVED_REGS + 2];
5612 int temps[MAX_TEMPS+1];
5613 } save_schedule;
5615 /* Fill in SCHEDULE according to LIVE_REGS_MASK. If RESTORE is nonzero,
5616 use reverse order. Returns the last entry written to (not counting
5617 the delimiter). OFFSET_BASE is a number to be added to all offset
5618 entries. */
5620 static save_entry *
5621 sh5_schedule_saves (HARD_REG_SET *live_regs_mask, save_schedule *schedule,
5622 int offset_base)
5624 int align, i;
5625 save_entry *entry = schedule->entries;
5626 int tmpx = 0;
5627 int offset;
5629 if (! current_function_interrupt)
5630 for (i = FIRST_GENERAL_REG; tmpx < MAX_TEMPS && i <= LAST_GENERAL_REG; i++)
5631 if (call_really_used_regs[i] && ! fixed_regs[i] && i != PR_MEDIA_REG
5632 && ! FUNCTION_ARG_REGNO_P (i)
5633 && i != FIRST_RET_REG
5634 && ! (cfun->static_chain_decl != NULL && i == STATIC_CHAIN_REGNUM)
5635 && ! (current_function_calls_eh_return
5636 && (i == EH_RETURN_STACKADJ_REGNO
5637 || ((unsigned) i >= EH_RETURN_DATA_REGNO (0)
5638 && (unsigned) i <= EH_RETURN_DATA_REGNO (3)))))
5639 schedule->temps[tmpx++] = i;
5640 entry->reg = -1;
5641 entry->mode = VOIDmode;
5642 entry->offset = offset_base;
5643 entry++;
5644 /* We loop twice: first, we save 8-byte aligned registers in the
5645 higher addresses, that are known to be aligned. Then, we
5646 proceed to saving 32-bit registers that don't need 8-byte
5647 alignment.
5648 If this is an interrupt function, all registers that need saving
5649 need to be saved in full. moreover, we need to postpone saving
5650 target registers till we have saved some general purpose registers
5651 we can then use as scratch registers. */
5652 offset = offset_base;
5653 for (align = 1; align >= 0; align--)
5655 for (i = FIRST_PSEUDO_REGISTER - 1; i >= 0; i--)
5656 if (TEST_HARD_REG_BIT (*live_regs_mask, i))
5658 enum machine_mode mode = REGISTER_NATURAL_MODE (i);
5659 int reg = i;
5661 if (current_function_interrupt)
5663 if (TARGET_REGISTER_P (i))
5664 continue;
5665 if (GENERAL_REGISTER_P (i))
5666 mode = DImode;
5668 if (mode == SFmode && (i % 2) == 1
5669 && ! TARGET_FPU_SINGLE && FP_REGISTER_P (i)
5670 && (TEST_HARD_REG_BIT (*live_regs_mask, (i ^ 1))))
5672 mode = DFmode;
5673 i--;
5674 reg--;
5677 /* If we're doing the aligned pass and this is not aligned,
5678 or we're doing the unaligned pass and this is aligned,
5679 skip it. */
5680 if ((GET_MODE_SIZE (mode) % (STACK_BOUNDARY / BITS_PER_UNIT) == 0)
5681 != align)
5682 continue;
5684 if (current_function_interrupt
5685 && GENERAL_REGISTER_P (i)
5686 && tmpx < MAX_TEMPS)
5687 schedule->temps[tmpx++] = i;
5689 offset -= GET_MODE_SIZE (mode);
5690 entry->reg = i;
5691 entry->mode = mode;
5692 entry->offset = offset;
5693 entry++;
5695 if (align && current_function_interrupt)
5696 for (i = LAST_TARGET_REG; i >= FIRST_TARGET_REG; i--)
5697 if (TEST_HARD_REG_BIT (*live_regs_mask, i))
5699 offset -= GET_MODE_SIZE (DImode);
5700 entry->reg = i;
5701 entry->mode = DImode;
5702 entry->offset = offset;
5703 entry++;
5706 entry->reg = -1;
5707 entry->mode = VOIDmode;
5708 entry->offset = offset;
5709 schedule->temps[tmpx] = -1;
5710 return entry - 1;
5713 void
5714 sh_expand_prologue (void)
5716 HARD_REG_SET live_regs_mask;
5717 int d, i;
5718 int d_rounding = 0;
5719 int save_flags = target_flags;
5720 int pretend_args;
5722 current_function_interrupt = sh_cfun_interrupt_handler_p ();
5724 /* We have pretend args if we had an object sent partially in registers
5725 and partially on the stack, e.g. a large structure. */
5726 pretend_args = current_function_pretend_args_size;
5727 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl)
5728 && (NPARM_REGS(SImode)
5729 > current_function_args_info.arg_count[(int) SH_ARG_INT]))
5730 pretend_args = 0;
5731 output_stack_adjust (-pretend_args
5732 - current_function_args_info.stack_regs * 8,
5733 stack_pointer_rtx, 0, NULL);
5735 if (TARGET_SHCOMPACT && flag_pic && current_function_args_info.call_cookie)
5736 /* We're going to use the PIC register to load the address of the
5737 incoming-argument decoder and/or of the return trampoline from
5738 the GOT, so make sure the PIC register is preserved and
5739 initialized. */
5740 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
5742 if (TARGET_SHCOMPACT
5743 && (current_function_args_info.call_cookie & ~ CALL_COOKIE_RET_TRAMP(1)))
5745 int reg;
5747 /* First, make all registers with incoming arguments that will
5748 be pushed onto the stack live, so that register renaming
5749 doesn't overwrite them. */
5750 for (reg = 0; reg < NPARM_REGS (SImode); reg++)
5751 if (CALL_COOKIE_STACKSEQ_GET (current_function_args_info.call_cookie)
5752 >= NPARM_REGS (SImode) - reg)
5753 for (; reg < NPARM_REGS (SImode); reg++)
5754 emit_insn (gen_shcompact_preserve_incoming_args
5755 (gen_rtx_REG (SImode, FIRST_PARM_REG + reg)));
5756 else if (CALL_COOKIE_INT_REG_GET
5757 (current_function_args_info.call_cookie, reg) == 1)
5758 emit_insn (gen_shcompact_preserve_incoming_args
5759 (gen_rtx_REG (SImode, FIRST_PARM_REG + reg)));
5761 emit_move_insn (gen_rtx_REG (Pmode, MACL_REG),
5762 stack_pointer_rtx);
5763 emit_move_insn (gen_rtx_REG (SImode, R0_REG),
5764 GEN_INT (current_function_args_info.call_cookie));
5765 emit_move_insn (gen_rtx_REG (SImode, MACH_REG),
5766 gen_rtx_REG (SImode, R0_REG));
5768 else if (TARGET_SHMEDIA)
5770 int tr = sh_media_register_for_return ();
5772 if (tr >= 0)
5774 rtx insn = emit_move_insn (gen_rtx_REG (DImode, tr),
5775 gen_rtx_REG (DImode, PR_MEDIA_REG));
5777 /* ??? We should suppress saving pr when we don't need it, but this
5778 is tricky because of builtin_return_address. */
5780 /* If this function only exits with sibcalls, this copy
5781 will be flagged as dead. */
5782 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD,
5783 const0_rtx,
5784 REG_NOTES (insn));
5788 /* Emit the code for SETUP_VARARGS. */
5789 if (current_function_stdarg)
5791 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl))
5793 /* Push arg regs as if they'd been provided by caller in stack. */
5794 for (i = 0; i < NPARM_REGS(SImode); i++)
5796 int rn = NPARM_REGS(SImode) + FIRST_PARM_REG - i - 1;
5797 rtx insn;
5799 if (i >= (NPARM_REGS(SImode)
5800 - current_function_args_info.arg_count[(int) SH_ARG_INT]
5802 break;
5803 insn = push (rn);
5804 RTX_FRAME_RELATED_P (insn) = 0;
5809 /* If we're supposed to switch stacks at function entry, do so now. */
5810 if (sp_switch)
5811 emit_insn (gen_sp_switch_1 ());
5813 d = calc_live_regs (&live_regs_mask);
5814 /* ??? Maybe we could save some switching if we can move a mode switch
5815 that already happens to be at the function start into the prologue. */
5816 if (target_flags != save_flags && ! current_function_interrupt)
5817 emit_insn (gen_toggle_sz ());
5819 if (TARGET_SH5)
5821 int offset_base, offset;
5822 rtx r0 = NULL_RTX;
5823 int offset_in_r0 = -1;
5824 int sp_in_r0 = 0;
5825 int tregs_space = shmedia_target_regs_stack_adjust (&live_regs_mask);
5826 int total_size, save_size;
5827 save_schedule schedule;
5828 save_entry *entry;
5829 int *tmp_pnt;
5831 if (call_really_used_regs[R0_REG] && ! fixed_regs[R0_REG]
5832 && ! current_function_interrupt)
5833 r0 = gen_rtx_REG (Pmode, R0_REG);
5835 /* D is the actual number of bytes that we need for saving registers,
5836 however, in initial_elimination_offset we have committed to using
5837 an additional TREGS_SPACE amount of bytes - in order to keep both
5838 addresses to arguments supplied by the caller and local variables
5839 valid, we must keep this gap. Place it between the incoming
5840 arguments and the actually saved registers in a bid to optimize
5841 locality of reference. */
5842 total_size = d + tregs_space;
5843 total_size += rounded_frame_size (total_size);
5844 save_size = total_size - rounded_frame_size (d);
5845 if (save_size % (STACK_BOUNDARY / BITS_PER_UNIT))
5846 d_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
5847 - save_size % (STACK_BOUNDARY / BITS_PER_UNIT));
5849 /* If adjusting the stack in a single step costs nothing extra, do so.
5850 I.e. either if a single addi is enough, or we need a movi anyway,
5851 and we don't exceed the maximum offset range (the test for the
5852 latter is conservative for simplicity). */
5853 if (TARGET_SHMEDIA
5854 && (CONST_OK_FOR_I10 (-total_size)
5855 || (! CONST_OK_FOR_I10 (-(save_size + d_rounding))
5856 && total_size <= 2044)))
5857 d_rounding = total_size - save_size;
5859 offset_base = d + d_rounding;
5861 output_stack_adjust (-(save_size + d_rounding), stack_pointer_rtx,
5862 0, NULL);
5864 sh5_schedule_saves (&live_regs_mask, &schedule, offset_base);
5865 tmp_pnt = schedule.temps;
5866 for (entry = &schedule.entries[1]; entry->mode != VOIDmode; entry++)
5868 enum machine_mode mode = entry->mode;
5869 unsigned int reg = entry->reg;
5870 rtx reg_rtx, mem_rtx, pre_dec = NULL_RTX;
5871 rtx orig_reg_rtx;
5873 offset = entry->offset;
5875 reg_rtx = gen_rtx_REG (mode, reg);
5877 mem_rtx = gen_frame_mem (mode,
5878 gen_rtx_PLUS (Pmode,
5879 stack_pointer_rtx,
5880 GEN_INT (offset)));
5882 GO_IF_LEGITIMATE_ADDRESS (mode, XEXP (mem_rtx, 0), try_pre_dec);
5884 gcc_assert (r0);
5885 mem_rtx = NULL_RTX;
5887 try_pre_dec:
5889 if (HAVE_PRE_DECREMENT
5890 && (offset_in_r0 - offset == GET_MODE_SIZE (mode)
5891 || mem_rtx == NULL_RTX
5892 || reg == PR_REG || SPECIAL_REGISTER_P (reg)))
5894 pre_dec = gen_frame_mem (mode, gen_rtx_PRE_DEC (Pmode, r0));
5896 GO_IF_LEGITIMATE_ADDRESS (mode, XEXP (pre_dec, 0),
5897 pre_dec_ok);
5899 pre_dec = NULL_RTX;
5901 break;
5903 pre_dec_ok:
5904 mem_rtx = NULL_RTX;
5905 offset += GET_MODE_SIZE (mode);
5907 while (0);
5909 if (mem_rtx != NULL_RTX)
5910 goto addr_ok;
5912 if (offset_in_r0 == -1)
5914 emit_move_insn (r0, GEN_INT (offset));
5915 offset_in_r0 = offset;
5917 else if (offset != offset_in_r0)
5919 emit_move_insn (r0,
5920 gen_rtx_PLUS
5921 (Pmode, r0,
5922 GEN_INT (offset - offset_in_r0)));
5923 offset_in_r0 += offset - offset_in_r0;
5926 if (pre_dec != NULL_RTX)
5928 if (! sp_in_r0)
5930 emit_move_insn (r0,
5931 gen_rtx_PLUS
5932 (Pmode, r0, stack_pointer_rtx));
5933 sp_in_r0 = 1;
5936 offset -= GET_MODE_SIZE (mode);
5937 offset_in_r0 -= GET_MODE_SIZE (mode);
5939 mem_rtx = pre_dec;
5941 else if (sp_in_r0)
5942 mem_rtx = gen_frame_mem (mode, r0);
5943 else
5944 mem_rtx = gen_frame_mem (mode,
5945 gen_rtx_PLUS (Pmode,
5946 stack_pointer_rtx,
5947 r0));
5949 /* We must not use an r0-based address for target-branch
5950 registers or for special registers without pre-dec
5951 memory addresses, since we store their values in r0
5952 first. */
5953 gcc_assert (!TARGET_REGISTER_P (reg)
5954 && ((reg != PR_REG && !SPECIAL_REGISTER_P (reg))
5955 || mem_rtx == pre_dec));
5957 addr_ok:
5958 orig_reg_rtx = reg_rtx;
5959 if (TARGET_REGISTER_P (reg)
5960 || ((reg == PR_REG || SPECIAL_REGISTER_P (reg))
5961 && mem_rtx != pre_dec))
5963 rtx tmp_reg = gen_rtx_REG (GET_MODE (reg_rtx), *tmp_pnt);
5965 emit_move_insn (tmp_reg, reg_rtx);
5967 if (REGNO (tmp_reg) == R0_REG)
5969 offset_in_r0 = -1;
5970 sp_in_r0 = 0;
5971 gcc_assert (!refers_to_regno_p
5972 (R0_REG, R0_REG+1, mem_rtx, (rtx *) 0));
5975 if (*++tmp_pnt <= 0)
5976 tmp_pnt = schedule.temps;
5978 reg_rtx = tmp_reg;
5981 rtx insn;
5983 /* Mark as interesting for dwarf cfi generator */
5984 insn = emit_move_insn (mem_rtx, reg_rtx);
5985 RTX_FRAME_RELATED_P (insn) = 1;
5986 /* If we use an intermediate register for the save, we can't
5987 describe this exactly in cfi as a copy of the to-be-saved
5988 register into the temporary register and then the temporary
5989 register on the stack, because the temporary register can
5990 have a different natural size than the to-be-saved register.
5991 Thus, we gloss over the intermediate copy and pretend we do
5992 a direct save from the to-be-saved register. */
5993 if (REGNO (reg_rtx) != reg)
5995 rtx set, note_rtx;
5997 set = gen_rtx_SET (VOIDmode, mem_rtx, orig_reg_rtx);
5998 note_rtx = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, set,
5999 REG_NOTES (insn));
6000 REG_NOTES (insn) = note_rtx;
6003 if (TARGET_SHCOMPACT && (offset_in_r0 != -1))
6005 rtx reg_rtx = gen_rtx_REG (mode, reg);
6006 rtx set, note_rtx;
6007 rtx mem_rtx = gen_frame_mem (mode,
6008 gen_rtx_PLUS (Pmode,
6009 stack_pointer_rtx,
6010 GEN_INT (offset)));
6012 set = gen_rtx_SET (VOIDmode, mem_rtx, reg_rtx);
6013 note_rtx = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, set,
6014 REG_NOTES (insn));
6015 REG_NOTES (insn) = note_rtx;
6020 gcc_assert (entry->offset == d_rounding);
6022 else
6023 push_regs (&live_regs_mask, current_function_interrupt);
6025 if (flag_pic && regs_ever_live[PIC_OFFSET_TABLE_REGNUM])
6027 rtx insn = get_last_insn ();
6028 rtx last = emit_insn (gen_GOTaddr2picreg ());
6030 /* Mark these insns as possibly dead. Sometimes, flow2 may
6031 delete all uses of the PIC register. In this case, let it
6032 delete the initialization too. */
6035 insn = NEXT_INSN (insn);
6037 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD,
6038 const0_rtx,
6039 REG_NOTES (insn));
6041 while (insn != last);
6044 if (SHMEDIA_REGS_STACK_ADJUST ())
6046 /* This must NOT go through the PLT, otherwise mach and macl
6047 may be clobbered. */
6048 function_symbol (gen_rtx_REG (Pmode, R0_REG),
6049 (TARGET_FPU_ANY
6050 ? "__GCC_push_shmedia_regs"
6051 : "__GCC_push_shmedia_regs_nofpu"), SFUNC_GOT);
6052 emit_insn (gen_shmedia_save_restore_regs_compact
6053 (GEN_INT (-SHMEDIA_REGS_STACK_ADJUST ())));
6056 if (target_flags != save_flags && ! current_function_interrupt)
6058 rtx insn = emit_insn (gen_toggle_sz ());
6060 /* If we're lucky, a mode switch in the function body will
6061 overwrite fpscr, turning this insn dead. Tell flow this
6062 insn is ok to delete. */
6063 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD,
6064 const0_rtx,
6065 REG_NOTES (insn));
6068 target_flags = save_flags;
6070 output_stack_adjust (-rounded_frame_size (d) + d_rounding,
6071 stack_pointer_rtx, 0, NULL);
6073 if (frame_pointer_needed)
6074 frame_insn (GEN_MOV (hard_frame_pointer_rtx, stack_pointer_rtx));
6076 if (TARGET_SHCOMPACT
6077 && (current_function_args_info.call_cookie & ~ CALL_COOKIE_RET_TRAMP(1)))
6079 /* This must NOT go through the PLT, otherwise mach and macl
6080 may be clobbered. */
6081 function_symbol (gen_rtx_REG (Pmode, R0_REG),
6082 "__GCC_shcompact_incoming_args", SFUNC_GOT);
6083 emit_insn (gen_shcompact_incoming_args ());
6087 void
6088 sh_expand_epilogue (bool sibcall_p)
6090 HARD_REG_SET live_regs_mask;
6091 int d, i;
6092 int d_rounding = 0;
6094 int save_flags = target_flags;
6095 int frame_size, save_size;
6096 int fpscr_deferred = 0;
6097 int e = sibcall_p ? -1 : 1;
6099 d = calc_live_regs (&live_regs_mask);
6101 save_size = d;
6102 frame_size = rounded_frame_size (d);
6104 if (TARGET_SH5)
6106 int tregs_space = shmedia_target_regs_stack_adjust (&live_regs_mask);
6107 int total_size;
6108 if (d % (STACK_BOUNDARY / BITS_PER_UNIT))
6109 d_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
6110 - d % (STACK_BOUNDARY / BITS_PER_UNIT));
6112 total_size = d + tregs_space;
6113 total_size += rounded_frame_size (total_size);
6114 save_size = total_size - frame_size;
6116 /* If adjusting the stack in a single step costs nothing extra, do so.
6117 I.e. either if a single addi is enough, or we need a movi anyway,
6118 and we don't exceed the maximum offset range (the test for the
6119 latter is conservative for simplicity). */
6120 if (TARGET_SHMEDIA
6121 && ! frame_pointer_needed
6122 && (CONST_OK_FOR_I10 (total_size)
6123 || (! CONST_OK_FOR_I10 (save_size + d_rounding)
6124 && total_size <= 2044)))
6125 d_rounding = frame_size;
6127 frame_size -= d_rounding;
6130 if (frame_pointer_needed)
6132 /* We must avoid scheduling the epilogue with previous basic blocks
6133 when exception handling is enabled. See PR/18032. */
6134 if (flag_exceptions)
6135 emit_insn (gen_blockage ());
6136 output_stack_adjust (frame_size, hard_frame_pointer_rtx, e,
6137 &live_regs_mask);
6139 /* We must avoid moving the stack pointer adjustment past code
6140 which reads from the local frame, else an interrupt could
6141 occur after the SP adjustment and clobber data in the local
6142 frame. */
6143 emit_insn (gen_blockage ());
6144 emit_insn (GEN_MOV (stack_pointer_rtx, hard_frame_pointer_rtx));
6146 else if (frame_size)
6148 /* We must avoid moving the stack pointer adjustment past code
6149 which reads from the local frame, else an interrupt could
6150 occur after the SP adjustment and clobber data in the local
6151 frame. */
6152 emit_insn (gen_blockage ());
6153 output_stack_adjust (frame_size, stack_pointer_rtx, e, &live_regs_mask);
6156 if (SHMEDIA_REGS_STACK_ADJUST ())
6158 function_symbol (gen_rtx_REG (Pmode, R0_REG),
6159 (TARGET_FPU_ANY
6160 ? "__GCC_pop_shmedia_regs"
6161 : "__GCC_pop_shmedia_regs_nofpu"), SFUNC_GOT);
6162 /* This must NOT go through the PLT, otherwise mach and macl
6163 may be clobbered. */
6164 emit_insn (gen_shmedia_save_restore_regs_compact
6165 (GEN_INT (SHMEDIA_REGS_STACK_ADJUST ())));
6168 /* Pop all the registers. */
6170 if (target_flags != save_flags && ! current_function_interrupt)
6171 emit_insn (gen_toggle_sz ());
6172 if (TARGET_SH5)
6174 int offset_base, offset;
6175 int offset_in_r0 = -1;
6176 int sp_in_r0 = 0;
6177 rtx r0 = gen_rtx_REG (Pmode, R0_REG);
6178 save_schedule schedule;
6179 save_entry *entry;
6180 int *tmp_pnt;
6182 entry = sh5_schedule_saves (&live_regs_mask, &schedule, d_rounding);
6183 offset_base = -entry[1].offset + d_rounding;
6184 tmp_pnt = schedule.temps;
6185 for (; entry->mode != VOIDmode; entry--)
6187 enum machine_mode mode = entry->mode;
6188 int reg = entry->reg;
6189 rtx reg_rtx, mem_rtx, post_inc = NULL_RTX, insn;
6191 offset = offset_base + entry->offset;
6192 reg_rtx = gen_rtx_REG (mode, reg);
6194 mem_rtx = gen_frame_mem (mode,
6195 gen_rtx_PLUS (Pmode,
6196 stack_pointer_rtx,
6197 GEN_INT (offset)));
6199 GO_IF_LEGITIMATE_ADDRESS (mode, XEXP (mem_rtx, 0), try_post_inc);
6201 mem_rtx = NULL_RTX;
6203 try_post_inc:
6205 if (HAVE_POST_INCREMENT
6206 && (offset == offset_in_r0
6207 || (offset + GET_MODE_SIZE (mode) != d + d_rounding
6208 && mem_rtx == NULL_RTX)
6209 || reg == PR_REG || SPECIAL_REGISTER_P (reg)))
6211 post_inc = gen_frame_mem (mode, gen_rtx_POST_INC (Pmode, r0));
6213 GO_IF_LEGITIMATE_ADDRESS (mode, XEXP (post_inc, 0),
6214 post_inc_ok);
6216 post_inc = NULL_RTX;
6218 break;
6220 post_inc_ok:
6221 mem_rtx = NULL_RTX;
6223 while (0);
6225 if (mem_rtx != NULL_RTX)
6226 goto addr_ok;
6228 if (offset_in_r0 == -1)
6230 emit_move_insn (r0, GEN_INT (offset));
6231 offset_in_r0 = offset;
6233 else if (offset != offset_in_r0)
6235 emit_move_insn (r0,
6236 gen_rtx_PLUS
6237 (Pmode, r0,
6238 GEN_INT (offset - offset_in_r0)));
6239 offset_in_r0 += offset - offset_in_r0;
6242 if (post_inc != NULL_RTX)
6244 if (! sp_in_r0)
6246 emit_move_insn (r0,
6247 gen_rtx_PLUS
6248 (Pmode, r0, stack_pointer_rtx));
6249 sp_in_r0 = 1;
6252 mem_rtx = post_inc;
6254 offset_in_r0 += GET_MODE_SIZE (mode);
6256 else if (sp_in_r0)
6257 mem_rtx = gen_frame_mem (mode, r0);
6258 else
6259 mem_rtx = gen_frame_mem (mode,
6260 gen_rtx_PLUS (Pmode,
6261 stack_pointer_rtx,
6262 r0));
6264 gcc_assert ((reg != PR_REG && !SPECIAL_REGISTER_P (reg))
6265 || mem_rtx == post_inc);
6267 addr_ok:
6268 if ((reg == PR_REG || SPECIAL_REGISTER_P (reg))
6269 && mem_rtx != post_inc)
6271 insn = emit_move_insn (r0, mem_rtx);
6272 mem_rtx = r0;
6274 else if (TARGET_REGISTER_P (reg))
6276 rtx tmp_reg = gen_rtx_REG (mode, *tmp_pnt);
6278 /* Give the scheduler a bit of freedom by using up to
6279 MAX_TEMPS registers in a round-robin fashion. */
6280 insn = emit_move_insn (tmp_reg, mem_rtx);
6281 mem_rtx = tmp_reg;
6282 if (*++tmp_pnt < 0)
6283 tmp_pnt = schedule.temps;
6286 insn = emit_move_insn (reg_rtx, mem_rtx);
6287 if (reg == PR_MEDIA_REG && sh_media_register_for_return () >= 0)
6288 /* This is dead, unless we return with a sibcall. */
6289 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD,
6290 const0_rtx,
6291 REG_NOTES (insn));
6294 gcc_assert (entry->offset + offset_base == d + d_rounding);
6296 else /* ! TARGET_SH5 */
6298 save_size = 0;
6299 if (TEST_HARD_REG_BIT (live_regs_mask, PR_REG))
6300 pop (PR_REG);
6301 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
6303 int j = (FIRST_PSEUDO_REGISTER - 1) - i;
6305 if (j == FPSCR_REG && current_function_interrupt && TARGET_FMOVD
6306 && hard_regs_intersect_p (&live_regs_mask,
6307 &reg_class_contents[DF_REGS]))
6308 fpscr_deferred = 1;
6309 else if (j != PR_REG && TEST_HARD_REG_BIT (live_regs_mask, j))
6310 pop (j);
6311 if (j == FIRST_FP_REG && fpscr_deferred)
6312 pop (FPSCR_REG);
6316 if (target_flags != save_flags && ! current_function_interrupt)
6317 emit_insn (gen_toggle_sz ());
6318 target_flags = save_flags;
6320 output_stack_adjust (current_function_pretend_args_size
6321 + save_size + d_rounding
6322 + current_function_args_info.stack_regs * 8,
6323 stack_pointer_rtx, e, NULL);
6325 if (current_function_calls_eh_return)
6326 emit_insn (GEN_ADD3 (stack_pointer_rtx, stack_pointer_rtx,
6327 EH_RETURN_STACKADJ_RTX));
6329 /* Switch back to the normal stack if necessary. */
6330 if (sp_switch)
6331 emit_insn (gen_sp_switch_2 ());
6333 /* Tell flow the insn that pops PR isn't dead. */
6334 /* PR_REG will never be live in SHmedia mode, and we don't need to
6335 USE PR_MEDIA_REG, since it will be explicitly copied to TR0_REG
6336 by the return pattern. */
6337 if (TEST_HARD_REG_BIT (live_regs_mask, PR_REG))
6338 emit_insn (gen_rtx_USE (VOIDmode, gen_rtx_REG (SImode, PR_REG)));
6341 static int sh_need_epilogue_known = 0;
6344 sh_need_epilogue (void)
6346 if (! sh_need_epilogue_known)
6348 rtx epilogue;
6350 start_sequence ();
6351 sh_expand_epilogue (0);
6352 epilogue = get_insns ();
6353 end_sequence ();
6354 sh_need_epilogue_known = (epilogue == NULL ? -1 : 1);
6356 return sh_need_epilogue_known > 0;
6359 /* Emit code to change the current function's return address to RA.
6360 TEMP is available as a scratch register, if needed. */
6362 void
6363 sh_set_return_address (rtx ra, rtx tmp)
6365 HARD_REG_SET live_regs_mask;
6366 int d;
6367 int pr_reg = TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG;
6368 int pr_offset;
6370 d = calc_live_regs (&live_regs_mask);
6372 /* If pr_reg isn't life, we can set it (or the register given in
6373 sh_media_register_for_return) directly. */
6374 if (! TEST_HARD_REG_BIT (live_regs_mask, pr_reg))
6376 rtx rr;
6378 if (TARGET_SHMEDIA)
6380 int rr_regno = sh_media_register_for_return ();
6382 if (rr_regno < 0)
6383 rr_regno = pr_reg;
6385 rr = gen_rtx_REG (DImode, rr_regno);
6387 else
6388 rr = gen_rtx_REG (SImode, pr_reg);
6390 emit_insn (GEN_MOV (rr, ra));
6391 /* Tell flow the register for return isn't dead. */
6392 emit_insn (gen_rtx_USE (VOIDmode, rr));
6393 return;
6396 if (TARGET_SH5)
6398 int offset;
6399 save_schedule schedule;
6400 save_entry *entry;
6402 entry = sh5_schedule_saves (&live_regs_mask, &schedule, 0);
6403 offset = entry[1].offset;
6404 for (; entry->mode != VOIDmode; entry--)
6405 if (entry->reg == pr_reg)
6406 goto found;
6408 /* We can't find pr register. */
6409 gcc_unreachable ();
6411 found:
6412 offset = entry->offset - offset;
6413 pr_offset = (rounded_frame_size (d) + offset
6414 + SHMEDIA_REGS_STACK_ADJUST ());
6416 else
6417 pr_offset = rounded_frame_size (d);
6419 emit_insn (GEN_MOV (tmp, GEN_INT (pr_offset)));
6420 emit_insn (GEN_ADD3 (tmp, tmp, hard_frame_pointer_rtx));
6422 tmp = gen_frame_mem (Pmode, tmp);
6423 emit_insn (GEN_MOV (tmp, ra));
6426 /* Clear variables at function end. */
6428 static void
6429 sh_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
6430 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
6432 trap_exit = pragma_interrupt = pragma_trapa = pragma_nosave_low_regs = 0;
6433 sh_need_epilogue_known = 0;
6434 sp_switch = NULL_RTX;
6437 static rtx
6438 sh_builtin_saveregs (void)
6440 /* First unnamed integer register. */
6441 int first_intreg = current_function_args_info.arg_count[(int) SH_ARG_INT];
6442 /* Number of integer registers we need to save. */
6443 int n_intregs = MAX (0, NPARM_REGS (SImode) - first_intreg);
6444 /* First unnamed SFmode float reg */
6445 int first_floatreg = current_function_args_info.arg_count[(int) SH_ARG_FLOAT];
6446 /* Number of SFmode float regs to save. */
6447 int n_floatregs = MAX (0, NPARM_REGS (SFmode) - first_floatreg);
6448 rtx regbuf, fpregs;
6449 int bufsize, regno;
6450 HOST_WIDE_INT alias_set;
6452 if (TARGET_SH5)
6454 if (n_intregs)
6456 int pushregs = n_intregs;
6458 while (pushregs < NPARM_REGS (SImode) - 1
6459 && (CALL_COOKIE_INT_REG_GET
6460 (current_function_args_info.call_cookie,
6461 NPARM_REGS (SImode) - pushregs)
6462 == 1))
6464 current_function_args_info.call_cookie
6465 &= ~ CALL_COOKIE_INT_REG (NPARM_REGS (SImode)
6466 - pushregs, 1);
6467 pushregs++;
6470 if (pushregs == NPARM_REGS (SImode))
6471 current_function_args_info.call_cookie
6472 |= (CALL_COOKIE_INT_REG (0, 1)
6473 | CALL_COOKIE_STACKSEQ (pushregs - 1));
6474 else
6475 current_function_args_info.call_cookie
6476 |= CALL_COOKIE_STACKSEQ (pushregs);
6478 current_function_pretend_args_size += 8 * n_intregs;
6480 if (TARGET_SHCOMPACT)
6481 return const0_rtx;
6484 if (! TARGET_SH2E && ! TARGET_SH4 && ! TARGET_SH5)
6486 error ("__builtin_saveregs not supported by this subtarget");
6487 return const0_rtx;
6490 if (TARGET_SHMEDIA)
6491 n_floatregs = 0;
6493 /* Allocate block of memory for the regs. */
6494 /* ??? If n_intregs + n_floatregs == 0, should we allocate at least 1 byte?
6495 Or can assign_stack_local accept a 0 SIZE argument? */
6496 bufsize = (n_intregs * UNITS_PER_WORD) + (n_floatregs * UNITS_PER_WORD);
6498 if (TARGET_SHMEDIA)
6499 regbuf = gen_frame_mem (BLKmode, gen_rtx_REG (Pmode, ARG_POINTER_REGNUM));
6500 else if (n_floatregs & 1)
6502 rtx addr;
6504 regbuf = assign_stack_local (BLKmode, bufsize + UNITS_PER_WORD, 0);
6505 addr = copy_to_mode_reg (Pmode, XEXP (regbuf, 0));
6506 emit_insn (gen_iorsi3 (addr, addr, GEN_INT (UNITS_PER_WORD)));
6507 regbuf = change_address (regbuf, BLKmode, addr);
6509 else if (STACK_BOUNDARY < 64 && TARGET_FPU_DOUBLE && n_floatregs)
6511 rtx addr, mask;
6513 regbuf = assign_stack_local (BLKmode, bufsize + UNITS_PER_WORD, 0);
6514 addr = copy_to_mode_reg (Pmode, plus_constant (XEXP (regbuf, 0), 4));
6515 mask = copy_to_mode_reg (Pmode, GEN_INT (-8));
6516 emit_insn (gen_andsi3 (addr, addr, mask));
6517 regbuf = change_address (regbuf, BLKmode, addr);
6519 else
6520 regbuf = assign_stack_local (BLKmode, bufsize, TARGET_FPU_DOUBLE ? 64 : 0);
6521 alias_set = get_varargs_alias_set ();
6522 set_mem_alias_set (regbuf, alias_set);
6524 /* Save int args.
6525 This is optimized to only save the regs that are necessary. Explicitly
6526 named args need not be saved. */
6527 if (n_intregs > 0)
6528 move_block_from_reg (BASE_ARG_REG (SImode) + first_intreg,
6529 adjust_address (regbuf, BLKmode,
6530 n_floatregs * UNITS_PER_WORD),
6531 n_intregs);
6533 if (TARGET_SHMEDIA)
6534 /* Return the address of the regbuf. */
6535 return XEXP (regbuf, 0);
6537 /* Save float args.
6538 This is optimized to only save the regs that are necessary. Explicitly
6539 named args need not be saved.
6540 We explicitly build a pointer to the buffer because it halves the insn
6541 count when not optimizing (otherwise the pointer is built for each reg
6542 saved).
6543 We emit the moves in reverse order so that we can use predecrement. */
6545 fpregs = copy_to_mode_reg (Pmode,
6546 plus_constant (XEXP (regbuf, 0),
6547 n_floatregs * UNITS_PER_WORD));
6548 if (TARGET_SH4 || TARGET_SH2A_DOUBLE)
6550 rtx mem;
6551 for (regno = NPARM_REGS (DFmode) - 2; regno >= first_floatreg; regno -= 2)
6553 emit_insn (gen_addsi3 (fpregs, fpregs,
6554 GEN_INT (-2 * UNITS_PER_WORD)));
6555 mem = change_address (regbuf, DFmode, fpregs);
6556 emit_move_insn (mem,
6557 gen_rtx_REG (DFmode, BASE_ARG_REG (DFmode) + regno));
6559 regno = first_floatreg;
6560 if (regno & 1)
6562 emit_insn (gen_addsi3 (fpregs, fpregs, GEN_INT (-UNITS_PER_WORD)));
6563 mem = change_address (regbuf, SFmode, fpregs);
6564 emit_move_insn (mem,
6565 gen_rtx_REG (SFmode, BASE_ARG_REG (SFmode) + regno
6566 - (TARGET_LITTLE_ENDIAN != 0)));
6569 else
6570 for (regno = NPARM_REGS (SFmode) - 1; regno >= first_floatreg; regno--)
6572 rtx mem;
6574 emit_insn (gen_addsi3 (fpregs, fpregs, GEN_INT (-UNITS_PER_WORD)));
6575 mem = change_address (regbuf, SFmode, fpregs);
6576 emit_move_insn (mem,
6577 gen_rtx_REG (SFmode, BASE_ARG_REG (SFmode) + regno));
6580 /* Return the address of the regbuf. */
6581 return XEXP (regbuf, 0);
6584 /* Define the `__builtin_va_list' type for the ABI. */
6586 static tree
6587 sh_build_builtin_va_list (void)
6589 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
6590 tree record;
6592 if (TARGET_SH5 || (! TARGET_SH2E && ! TARGET_SH4)
6593 || TARGET_HITACHI || sh_cfun_attr_renesas_p ())
6594 return ptr_type_node;
6596 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
6598 f_next_o = build_decl (FIELD_DECL, get_identifier ("__va_next_o"),
6599 ptr_type_node);
6600 f_next_o_limit = build_decl (FIELD_DECL,
6601 get_identifier ("__va_next_o_limit"),
6602 ptr_type_node);
6603 f_next_fp = build_decl (FIELD_DECL, get_identifier ("__va_next_fp"),
6604 ptr_type_node);
6605 f_next_fp_limit = build_decl (FIELD_DECL,
6606 get_identifier ("__va_next_fp_limit"),
6607 ptr_type_node);
6608 f_next_stack = build_decl (FIELD_DECL, get_identifier ("__va_next_stack"),
6609 ptr_type_node);
6611 DECL_FIELD_CONTEXT (f_next_o) = record;
6612 DECL_FIELD_CONTEXT (f_next_o_limit) = record;
6613 DECL_FIELD_CONTEXT (f_next_fp) = record;
6614 DECL_FIELD_CONTEXT (f_next_fp_limit) = record;
6615 DECL_FIELD_CONTEXT (f_next_stack) = record;
6617 TYPE_FIELDS (record) = f_next_o;
6618 TREE_CHAIN (f_next_o) = f_next_o_limit;
6619 TREE_CHAIN (f_next_o_limit) = f_next_fp;
6620 TREE_CHAIN (f_next_fp) = f_next_fp_limit;
6621 TREE_CHAIN (f_next_fp_limit) = f_next_stack;
6623 layout_type (record);
6625 return record;
6628 /* Implement `va_start' for varargs and stdarg. */
6630 void
6631 sh_va_start (tree valist, rtx nextarg)
6633 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
6634 tree next_o, next_o_limit, next_fp, next_fp_limit, next_stack;
6635 tree t, u;
6636 int nfp, nint;
6638 if (TARGET_SH5)
6640 expand_builtin_saveregs ();
6641 std_expand_builtin_va_start (valist, nextarg);
6642 return;
6645 if ((! TARGET_SH2E && ! TARGET_SH4)
6646 || TARGET_HITACHI || sh_cfun_attr_renesas_p ())
6648 std_expand_builtin_va_start (valist, nextarg);
6649 return;
6652 f_next_o = TYPE_FIELDS (va_list_type_node);
6653 f_next_o_limit = TREE_CHAIN (f_next_o);
6654 f_next_fp = TREE_CHAIN (f_next_o_limit);
6655 f_next_fp_limit = TREE_CHAIN (f_next_fp);
6656 f_next_stack = TREE_CHAIN (f_next_fp_limit);
6658 next_o = build (COMPONENT_REF, TREE_TYPE (f_next_o), valist, f_next_o,
6659 NULL_TREE);
6660 next_o_limit = build (COMPONENT_REF, TREE_TYPE (f_next_o_limit),
6661 valist, f_next_o_limit, NULL_TREE);
6662 next_fp = build (COMPONENT_REF, TREE_TYPE (f_next_fp), valist, f_next_fp,
6663 NULL_TREE);
6664 next_fp_limit = build (COMPONENT_REF, TREE_TYPE (f_next_fp_limit),
6665 valist, f_next_fp_limit, NULL_TREE);
6666 next_stack = build (COMPONENT_REF, TREE_TYPE (f_next_stack),
6667 valist, f_next_stack, NULL_TREE);
6669 /* Call __builtin_saveregs. */
6670 u = make_tree (ptr_type_node, expand_builtin_saveregs ());
6671 t = build (MODIFY_EXPR, ptr_type_node, next_fp, u);
6672 TREE_SIDE_EFFECTS (t) = 1;
6673 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6675 nfp = current_function_args_info.arg_count[SH_ARG_FLOAT];
6676 if (nfp < 8)
6677 nfp = 8 - nfp;
6678 else
6679 nfp = 0;
6680 u = fold (build (PLUS_EXPR, ptr_type_node, u,
6681 build_int_cst (NULL_TREE, UNITS_PER_WORD * nfp)));
6682 t = build (MODIFY_EXPR, ptr_type_node, next_fp_limit, u);
6683 TREE_SIDE_EFFECTS (t) = 1;
6684 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6686 t = build (MODIFY_EXPR, ptr_type_node, next_o, u);
6687 TREE_SIDE_EFFECTS (t) = 1;
6688 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6690 nint = current_function_args_info.arg_count[SH_ARG_INT];
6691 if (nint < 4)
6692 nint = 4 - nint;
6693 else
6694 nint = 0;
6695 u = fold (build (PLUS_EXPR, ptr_type_node, u,
6696 build_int_cst (NULL_TREE, UNITS_PER_WORD * nint)));
6697 t = build (MODIFY_EXPR, ptr_type_node, next_o_limit, u);
6698 TREE_SIDE_EFFECTS (t) = 1;
6699 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6701 u = make_tree (ptr_type_node, nextarg);
6702 t = build (MODIFY_EXPR, ptr_type_node, next_stack, u);
6703 TREE_SIDE_EFFECTS (t) = 1;
6704 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6707 /* TYPE is a RECORD_TYPE. If there is only a single non-zero-sized
6708 member, return it. */
6709 static tree
6710 find_sole_member (tree type)
6712 tree field, member = NULL_TREE;
6714 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
6716 if (TREE_CODE (field) != FIELD_DECL)
6717 continue;
6718 if (!DECL_SIZE (field))
6719 return NULL_TREE;
6720 if (integer_zerop (DECL_SIZE (field)))
6721 continue;
6722 if (member)
6723 return NULL_TREE;
6724 member = field;
6726 return member;
6728 /* Implement `va_arg'. */
6730 static tree
6731 sh_gimplify_va_arg_expr (tree valist, tree type, tree *pre_p,
6732 tree *post_p ATTRIBUTE_UNUSED)
6734 HOST_WIDE_INT size, rsize;
6735 tree tmp, pptr_type_node;
6736 tree addr, lab_over = NULL, result = NULL;
6737 int pass_by_ref = targetm.calls.must_pass_in_stack (TYPE_MODE (type), type);
6739 if (pass_by_ref)
6740 type = build_pointer_type (type);
6742 size = int_size_in_bytes (type);
6743 rsize = (size + UNITS_PER_WORD - 1) & -UNITS_PER_WORD;
6744 pptr_type_node = build_pointer_type (ptr_type_node);
6746 if (! TARGET_SH5 && (TARGET_SH2E || TARGET_SH4)
6747 && ! (TARGET_HITACHI || sh_cfun_attr_renesas_p ()))
6749 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
6750 tree next_o, next_o_limit, next_fp, next_fp_limit, next_stack;
6751 int pass_as_float;
6752 tree lab_false;
6753 tree member;
6755 f_next_o = TYPE_FIELDS (va_list_type_node);
6756 f_next_o_limit = TREE_CHAIN (f_next_o);
6757 f_next_fp = TREE_CHAIN (f_next_o_limit);
6758 f_next_fp_limit = TREE_CHAIN (f_next_fp);
6759 f_next_stack = TREE_CHAIN (f_next_fp_limit);
6761 next_o = build (COMPONENT_REF, TREE_TYPE (f_next_o), valist, f_next_o,
6762 NULL_TREE);
6763 next_o_limit = build (COMPONENT_REF, TREE_TYPE (f_next_o_limit),
6764 valist, f_next_o_limit, NULL_TREE);
6765 next_fp = build (COMPONENT_REF, TREE_TYPE (f_next_fp),
6766 valist, f_next_fp, NULL_TREE);
6767 next_fp_limit = build (COMPONENT_REF, TREE_TYPE (f_next_fp_limit),
6768 valist, f_next_fp_limit, NULL_TREE);
6769 next_stack = build (COMPONENT_REF, TREE_TYPE (f_next_stack),
6770 valist, f_next_stack, NULL_TREE);
6772 /* Structures with a single member with a distinct mode are passed
6773 like their member. This is relevant if the latter has a REAL_TYPE
6774 or COMPLEX_TYPE type. */
6775 while (TREE_CODE (type) == RECORD_TYPE
6776 && (member = find_sole_member (type))
6777 && (TREE_CODE (TREE_TYPE (member)) == REAL_TYPE
6778 || TREE_CODE (TREE_TYPE (member)) == COMPLEX_TYPE
6779 || TREE_CODE (TREE_TYPE (member)) == RECORD_TYPE))
6781 tree field_type = TREE_TYPE (member);
6783 if (TYPE_MODE (type) == TYPE_MODE (field_type))
6784 type = field_type;
6785 else
6787 gcc_assert ((TYPE_ALIGN (type)
6788 < GET_MODE_ALIGNMENT (TYPE_MODE (field_type)))
6789 || (TYPE_ALIGN (type)
6790 > GET_MODE_BITSIZE (TYPE_MODE (field_type))));
6791 break;
6795 if (TARGET_SH4)
6797 pass_as_float = ((TREE_CODE (type) == REAL_TYPE && size <= 8)
6798 || (TREE_CODE (type) == COMPLEX_TYPE
6799 && TREE_CODE (TREE_TYPE (type)) == REAL_TYPE
6800 && size <= 16));
6802 else
6804 pass_as_float = (TREE_CODE (type) == REAL_TYPE && size == 4);
6807 addr = create_tmp_var (pptr_type_node, NULL);
6808 lab_false = create_artificial_label ();
6809 lab_over = create_artificial_label ();
6811 valist = build1 (INDIRECT_REF, ptr_type_node, addr);
6813 if (pass_as_float)
6815 tree next_fp_tmp = create_tmp_var (TREE_TYPE (f_next_fp), NULL);
6816 tree cmp;
6817 bool is_double = size == 8 && TREE_CODE (type) == REAL_TYPE;
6819 tmp = build1 (ADDR_EXPR, pptr_type_node, next_fp);
6820 tmp = build2 (MODIFY_EXPR, void_type_node, addr, tmp);
6821 gimplify_and_add (tmp, pre_p);
6823 tmp = build2 (MODIFY_EXPR, ptr_type_node, next_fp_tmp, valist);
6824 gimplify_and_add (tmp, pre_p);
6825 tmp = next_fp_limit;
6826 if (size > 4 && !is_double)
6827 tmp = build2 (PLUS_EXPR, TREE_TYPE (tmp), tmp,
6828 fold_convert (TREE_TYPE (tmp), size_int (4 - size)));
6829 tmp = build (GE_EXPR, boolean_type_node, next_fp_tmp, tmp);
6830 cmp = build (COND_EXPR, void_type_node, tmp,
6831 build (GOTO_EXPR, void_type_node, lab_false),
6832 NULL);
6833 if (!is_double)
6834 gimplify_and_add (cmp, pre_p);
6836 if (TYPE_ALIGN (type) > BITS_PER_WORD || (is_double || size == 16))
6838 tmp = fold_convert (ptr_type_node, size_int (UNITS_PER_WORD));
6839 tmp = build (BIT_AND_EXPR, ptr_type_node, next_fp_tmp, tmp);
6840 tmp = build (PLUS_EXPR, ptr_type_node, next_fp_tmp, tmp);
6841 tmp = build (MODIFY_EXPR, ptr_type_node, next_fp_tmp, tmp);
6842 gimplify_and_add (tmp, pre_p);
6844 if (is_double)
6845 gimplify_and_add (cmp, pre_p);
6847 #ifdef FUNCTION_ARG_SCmode_WART
6848 if (TYPE_MODE (type) == SCmode && TARGET_SH4 && TARGET_LITTLE_ENDIAN)
6850 tree subtype = TREE_TYPE (type);
6851 tree real, imag;
6853 imag
6854 = std_gimplify_va_arg_expr (next_fp_tmp, subtype, pre_p, NULL);
6855 imag = get_initialized_tmp_var (imag, pre_p, NULL);
6857 real
6858 = std_gimplify_va_arg_expr (next_fp_tmp, subtype, pre_p, NULL);
6859 real = get_initialized_tmp_var (real, pre_p, NULL);
6861 result = build (COMPLEX_EXPR, type, real, imag);
6862 result = get_initialized_tmp_var (result, pre_p, NULL);
6864 #endif /* FUNCTION_ARG_SCmode_WART */
6866 tmp = build (GOTO_EXPR, void_type_node, lab_over);
6867 gimplify_and_add (tmp, pre_p);
6869 tmp = build (LABEL_EXPR, void_type_node, lab_false);
6870 gimplify_and_add (tmp, pre_p);
6872 tmp = build1 (ADDR_EXPR, pptr_type_node, next_stack);
6873 tmp = build (MODIFY_EXPR, void_type_node, addr, tmp);
6874 gimplify_and_add (tmp, pre_p);
6875 tmp = build2 (MODIFY_EXPR, ptr_type_node, next_fp_tmp, valist);
6876 gimplify_and_add (tmp, pre_p);
6878 tmp = build2 (MODIFY_EXPR, ptr_type_node, valist, next_fp_tmp);
6879 gimplify_and_add (tmp, post_p);
6880 valist = next_fp_tmp;
6882 else
6884 tmp = fold_convert (ptr_type_node, size_int (rsize));
6885 tmp = build (PLUS_EXPR, ptr_type_node, next_o, tmp);
6886 tmp = build (GT_EXPR, boolean_type_node, tmp, next_o_limit);
6887 tmp = build (COND_EXPR, void_type_node, tmp,
6888 build (GOTO_EXPR, void_type_node, lab_false),
6889 NULL);
6890 gimplify_and_add (tmp, pre_p);
6892 tmp = build1 (ADDR_EXPR, pptr_type_node, next_o);
6893 tmp = build (MODIFY_EXPR, void_type_node, addr, tmp);
6894 gimplify_and_add (tmp, pre_p);
6896 tmp = build (GOTO_EXPR, void_type_node, lab_over);
6897 gimplify_and_add (tmp, pre_p);
6899 tmp = build (LABEL_EXPR, void_type_node, lab_false);
6900 gimplify_and_add (tmp, pre_p);
6902 if (size > 4 && ! TARGET_SH4)
6904 tmp = build (MODIFY_EXPR, ptr_type_node, next_o, next_o_limit);
6905 gimplify_and_add (tmp, pre_p);
6908 tmp = build1 (ADDR_EXPR, pptr_type_node, next_stack);
6909 tmp = build (MODIFY_EXPR, void_type_node, addr, tmp);
6910 gimplify_and_add (tmp, pre_p);
6913 if (!result)
6915 tmp = build (LABEL_EXPR, void_type_node, lab_over);
6916 gimplify_and_add (tmp, pre_p);
6920 /* ??? In va-sh.h, there had been code to make values larger than
6921 size 8 indirect. This does not match the FUNCTION_ARG macros. */
6923 tmp = std_gimplify_va_arg_expr (valist, type, pre_p, NULL);
6924 if (result)
6926 tmp = build (MODIFY_EXPR, void_type_node, result, tmp);
6927 gimplify_and_add (tmp, pre_p);
6929 tmp = build (LABEL_EXPR, void_type_node, lab_over);
6930 gimplify_and_add (tmp, pre_p);
6932 else
6933 result = tmp;
6935 if (pass_by_ref)
6936 result = build_va_arg_indirect_ref (result);
6938 return result;
6941 bool
6942 sh_promote_prototypes (tree type)
6944 if (TARGET_HITACHI)
6945 return 0;
6946 if (! type)
6947 return 1;
6948 return ! sh_attr_renesas_p (type);
6951 /* Whether an argument must be passed by reference. On SHcompact, we
6952 pretend arguments wider than 32-bits that would have been passed in
6953 registers are passed by reference, so that an SHmedia trampoline
6954 loads them into the full 64-bits registers. */
6956 static int
6957 shcompact_byref (CUMULATIVE_ARGS *cum, enum machine_mode mode,
6958 tree type, bool named)
6960 unsigned HOST_WIDE_INT size;
6962 if (type)
6963 size = int_size_in_bytes (type);
6964 else
6965 size = GET_MODE_SIZE (mode);
6967 if (cum->arg_count[SH_ARG_INT] < NPARM_REGS (SImode)
6968 && (!named
6969 || GET_SH_ARG_CLASS (mode) == SH_ARG_INT
6970 || (GET_SH_ARG_CLASS (mode) == SH_ARG_FLOAT
6971 && cum->arg_count[SH_ARG_FLOAT] >= NPARM_REGS (SFmode)))
6972 && size > 4
6973 && !SHCOMPACT_FORCE_ON_STACK (mode, type)
6974 && !SH5_WOULD_BE_PARTIAL_NREGS (*cum, mode, type, named))
6975 return size;
6976 else
6977 return 0;
6980 static bool
6981 sh_pass_by_reference (CUMULATIVE_ARGS *cum, enum machine_mode mode,
6982 tree type, bool named)
6984 if (targetm.calls.must_pass_in_stack (mode, type))
6985 return true;
6987 /* ??? std_gimplify_va_arg_expr passes NULL for cum. That function
6988 wants to know about pass-by-reference semantics for incoming
6989 arguments. */
6990 if (! cum)
6991 return false;
6993 if (TARGET_SHCOMPACT)
6995 cum->byref = shcompact_byref (cum, mode, type, named);
6996 return cum->byref != 0;
6999 return false;
7002 static bool
7003 sh_callee_copies (CUMULATIVE_ARGS *cum, enum machine_mode mode,
7004 tree type, bool named ATTRIBUTE_UNUSED)
7006 /* ??? How can it possibly be correct to return true only on the
7007 caller side of the equation? Is there someplace else in the
7008 sh backend that's magically producing the copies? */
7009 return (cum->outgoing
7010 && ((mode == BLKmode ? TYPE_ALIGN (type) : GET_MODE_ALIGNMENT (mode))
7011 % SH_MIN_ALIGN_FOR_CALLEE_COPY == 0));
7014 static int
7015 sh_arg_partial_bytes (CUMULATIVE_ARGS *cum, enum machine_mode mode,
7016 tree type, bool named ATTRIBUTE_UNUSED)
7018 int words = 0;
7020 if (!TARGET_SH5
7021 && PASS_IN_REG_P (*cum, mode, type)
7022 && !(TARGET_SH4 || TARGET_SH2A_DOUBLE)
7023 && (ROUND_REG (*cum, mode)
7024 + (mode != BLKmode
7025 ? ROUND_ADVANCE (GET_MODE_SIZE (mode))
7026 : ROUND_ADVANCE (int_size_in_bytes (type)))
7027 > NPARM_REGS (mode)))
7028 words = NPARM_REGS (mode) - ROUND_REG (*cum, mode);
7030 else if (!TARGET_SHCOMPACT
7031 && SH5_WOULD_BE_PARTIAL_NREGS (*cum, mode, type, named))
7032 words = NPARM_REGS (SImode) - cum->arg_count[SH_ARG_INT];
7034 return words * UNITS_PER_WORD;
7038 /* Define where to put the arguments to a function.
7039 Value is zero to push the argument on the stack,
7040 or a hard register in which to store the argument.
7042 MODE is the argument's machine mode.
7043 TYPE is the data type of the argument (as a tree).
7044 This is null for libcalls where that information may
7045 not be available.
7046 CUM is a variable of type CUMULATIVE_ARGS which gives info about
7047 the preceding args and about the function being called.
7048 NAMED is nonzero if this argument is a named parameter
7049 (otherwise it is an extra parameter matching an ellipsis).
7051 On SH the first args are normally in registers
7052 and the rest are pushed. Any arg that starts within the first
7053 NPARM_REGS words is at least partially passed in a register unless
7054 its data type forbids. */
7058 sh_function_arg (CUMULATIVE_ARGS *ca, enum machine_mode mode,
7059 tree type, int named)
7061 if (! TARGET_SH5 && mode == VOIDmode)
7062 return GEN_INT (ca->renesas_abi ? 1 : 0);
7064 if (! TARGET_SH5
7065 && PASS_IN_REG_P (*ca, mode, type)
7066 && (named || ! (TARGET_HITACHI || ca->renesas_abi)))
7068 int regno;
7070 if (mode == SCmode && TARGET_SH4 && TARGET_LITTLE_ENDIAN
7071 && (! FUNCTION_ARG_SCmode_WART || (ROUND_REG (*ca, mode) & 1)))
7073 rtx r1 = gen_rtx_EXPR_LIST (VOIDmode,
7074 gen_rtx_REG (SFmode,
7075 BASE_ARG_REG (mode)
7076 + (ROUND_REG (*ca, mode) ^ 1)),
7077 const0_rtx);
7078 rtx r2 = gen_rtx_EXPR_LIST (VOIDmode,
7079 gen_rtx_REG (SFmode,
7080 BASE_ARG_REG (mode)
7081 + ((ROUND_REG (*ca, mode) + 1) ^ 1)),
7082 GEN_INT (4));
7083 return gen_rtx_PARALLEL(SCmode, gen_rtvec(2, r1, r2));
7086 /* If the alignment of a DF value causes an SF register to be
7087 skipped, we will use that skipped register for the next SF
7088 value. */
7089 if ((TARGET_HITACHI || ca->renesas_abi)
7090 && ca->free_single_fp_reg
7091 && mode == SFmode)
7092 return gen_rtx_REG (mode, ca->free_single_fp_reg);
7094 regno = (BASE_ARG_REG (mode) + ROUND_REG (*ca, mode))
7095 ^ (mode == SFmode && TARGET_SH4
7096 && TARGET_LITTLE_ENDIAN != 0
7097 && ! TARGET_HITACHI && ! ca->renesas_abi);
7098 return gen_rtx_REG (mode, regno);
7102 if (TARGET_SH5)
7104 if (mode == VOIDmode && TARGET_SHCOMPACT)
7105 return GEN_INT (ca->call_cookie);
7107 /* The following test assumes unnamed arguments are promoted to
7108 DFmode. */
7109 if (mode == SFmode && ca->free_single_fp_reg)
7110 return SH5_PROTOTYPED_FLOAT_ARG (*ca, mode, ca->free_single_fp_reg);
7112 if ((GET_SH_ARG_CLASS (mode) == SH_ARG_FLOAT)
7113 && (named || ! ca->prototype_p)
7114 && ca->arg_count[(int) SH_ARG_FLOAT] < NPARM_REGS (SFmode))
7116 if (! ca->prototype_p && TARGET_SHMEDIA)
7117 return SH5_PROTOTYPELESS_FLOAT_ARG (*ca, mode);
7119 return SH5_PROTOTYPED_FLOAT_ARG (*ca, mode,
7120 FIRST_FP_PARM_REG
7121 + ca->arg_count[(int) SH_ARG_FLOAT]);
7124 if (ca->arg_count[(int) SH_ARG_INT] < NPARM_REGS (SImode)
7125 && (! TARGET_SHCOMPACT
7126 || (! SHCOMPACT_FORCE_ON_STACK (mode, type)
7127 && ! SH5_WOULD_BE_PARTIAL_NREGS (*ca, mode,
7128 type, named))))
7130 return gen_rtx_REG (mode, (FIRST_PARM_REG
7131 + ca->arg_count[(int) SH_ARG_INT]));
7134 return 0;
7137 return 0;
7140 /* Update the data in CUM to advance over an argument
7141 of mode MODE and data type TYPE.
7142 (TYPE is null for libcalls where that information may not be
7143 available.) */
7145 void
7146 sh_function_arg_advance (CUMULATIVE_ARGS *ca, enum machine_mode mode,
7147 tree type, int named)
7149 if (ca->force_mem)
7150 ca->force_mem = 0;
7151 else if (TARGET_SH5)
7153 tree type2 = (ca->byref && type
7154 ? TREE_TYPE (type)
7155 : type);
7156 enum machine_mode mode2 = (ca->byref && type
7157 ? TYPE_MODE (type2)
7158 : mode);
7159 int dwords = ((ca->byref
7160 ? ca->byref
7161 : mode2 == BLKmode
7162 ? int_size_in_bytes (type2)
7163 : GET_MODE_SIZE (mode2)) + 7) / 8;
7164 int numregs = MIN (dwords, NPARM_REGS (SImode)
7165 - ca->arg_count[(int) SH_ARG_INT]);
7167 if (numregs)
7169 ca->arg_count[(int) SH_ARG_INT] += numregs;
7170 if (TARGET_SHCOMPACT
7171 && SHCOMPACT_FORCE_ON_STACK (mode2, type2))
7173 ca->call_cookie
7174 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
7175 - numregs, 1);
7176 /* N.B. We want this also for outgoing. */
7177 ca->stack_regs += numregs;
7179 else if (ca->byref)
7181 if (! ca->outgoing)
7182 ca->stack_regs += numregs;
7183 ca->byref_regs += numregs;
7184 ca->byref = 0;
7186 ca->call_cookie
7187 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
7188 - numregs, 2);
7189 while (--numregs);
7190 ca->call_cookie
7191 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
7192 - 1, 1);
7194 else if (dwords > numregs)
7196 int pushregs = numregs;
7198 if (TARGET_SHCOMPACT)
7199 ca->stack_regs += numregs;
7200 while (pushregs < NPARM_REGS (SImode) - 1
7201 && (CALL_COOKIE_INT_REG_GET
7202 (ca->call_cookie,
7203 NPARM_REGS (SImode) - pushregs)
7204 == 1))
7206 ca->call_cookie
7207 &= ~ CALL_COOKIE_INT_REG (NPARM_REGS (SImode)
7208 - pushregs, 1);
7209 pushregs++;
7211 if (numregs == NPARM_REGS (SImode))
7212 ca->call_cookie
7213 |= CALL_COOKIE_INT_REG (0, 1)
7214 | CALL_COOKIE_STACKSEQ (numregs - 1);
7215 else
7216 ca->call_cookie
7217 |= CALL_COOKIE_STACKSEQ (numregs);
7220 if (GET_SH_ARG_CLASS (mode2) == SH_ARG_FLOAT
7221 && (named || ! ca->prototype_p))
7223 if (mode2 == SFmode && ca->free_single_fp_reg)
7224 ca->free_single_fp_reg = 0;
7225 else if (ca->arg_count[(int) SH_ARG_FLOAT]
7226 < NPARM_REGS (SFmode))
7228 int numfpregs
7229 = MIN ((GET_MODE_SIZE (mode2) + 7) / 8 * 2,
7230 NPARM_REGS (SFmode)
7231 - ca->arg_count[(int) SH_ARG_FLOAT]);
7233 ca->arg_count[(int) SH_ARG_FLOAT] += numfpregs;
7235 if (TARGET_SHCOMPACT && ! ca->prototype_p)
7237 if (ca->outgoing && numregs > 0)
7240 ca->call_cookie
7241 |= (CALL_COOKIE_INT_REG
7242 (ca->arg_count[(int) SH_ARG_INT]
7243 - numregs + ((numfpregs - 2) / 2),
7244 4 + (ca->arg_count[(int) SH_ARG_FLOAT]
7245 - numfpregs) / 2));
7247 while (numfpregs -= 2);
7249 else if (mode2 == SFmode && (named)
7250 && (ca->arg_count[(int) SH_ARG_FLOAT]
7251 < NPARM_REGS (SFmode)))
7252 ca->free_single_fp_reg
7253 = FIRST_FP_PARM_REG - numfpregs
7254 + ca->arg_count[(int) SH_ARG_FLOAT] + 1;
7257 return;
7260 if ((TARGET_HITACHI || ca->renesas_abi) && TARGET_FPU_DOUBLE)
7262 /* Note that we've used the skipped register. */
7263 if (mode == SFmode && ca->free_single_fp_reg)
7265 ca->free_single_fp_reg = 0;
7266 return;
7268 /* When we have a DF after an SF, there's an SF register that get
7269 skipped in order to align the DF value. We note this skipped
7270 register, because the next SF value will use it, and not the
7271 SF that follows the DF. */
7272 if (mode == DFmode
7273 && ROUND_REG (*ca, DFmode) != ROUND_REG (*ca, SFmode))
7275 ca->free_single_fp_reg = (ROUND_REG (*ca, SFmode)
7276 + BASE_ARG_REG (mode));
7280 if (! ((TARGET_SH4 || TARGET_SH2A) || ca->renesas_abi)
7281 || PASS_IN_REG_P (*ca, mode, type))
7282 (ca->arg_count[(int) GET_SH_ARG_CLASS (mode)]
7283 = (ROUND_REG (*ca, mode)
7284 + (mode == BLKmode
7285 ? ROUND_ADVANCE (int_size_in_bytes (type))
7286 : ROUND_ADVANCE (GET_MODE_SIZE (mode)))));
7289 /* The Renesas calling convention doesn't quite fit into this scheme since
7290 the address is passed like an invisible argument, but one that is always
7291 passed in memory. */
7292 static rtx
7293 sh_struct_value_rtx (tree fndecl, int incoming ATTRIBUTE_UNUSED)
7295 if (TARGET_HITACHI || sh_attr_renesas_p (fndecl))
7296 return 0;
7297 return gen_rtx_REG (Pmode, 2);
7300 /* Worker function for TARGET_RETURN_IN_MEMORY. */
7302 static bool
7303 sh_return_in_memory (tree type, tree fndecl)
7305 if (TARGET_SH5)
7307 if (TYPE_MODE (type) == BLKmode)
7308 return ((unsigned HOST_WIDE_INT) int_size_in_bytes (type)) > 8;
7309 else
7310 return GET_MODE_SIZE (TYPE_MODE (type)) > 8;
7312 else
7314 return (TYPE_MODE (type) == BLKmode
7315 || ((TARGET_HITACHI || sh_attr_renesas_p (fndecl))
7316 && TREE_CODE (type) == RECORD_TYPE));
7320 /* We actually emit the code in sh_expand_prologue. We used to use
7321 a static variable to flag that we need to emit this code, but that
7322 doesn't when inlining, when functions are deferred and then emitted
7323 later. Fortunately, we already have two flags that are part of struct
7324 function that tell if a function uses varargs or stdarg. */
7325 static void
7326 sh_setup_incoming_varargs (CUMULATIVE_ARGS *ca,
7327 enum machine_mode mode,
7328 tree type,
7329 int *pretend_arg_size,
7330 int second_time ATTRIBUTE_UNUSED)
7332 gcc_assert (current_function_stdarg);
7333 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl))
7335 int named_parm_regs, anon_parm_regs;
7337 named_parm_regs = (ROUND_REG (*ca, mode)
7338 + (mode == BLKmode
7339 ? ROUND_ADVANCE (int_size_in_bytes (type))
7340 : ROUND_ADVANCE (GET_MODE_SIZE (mode))));
7341 anon_parm_regs = NPARM_REGS (SImode) - named_parm_regs;
7342 if (anon_parm_regs > 0)
7343 *pretend_arg_size = anon_parm_regs * 4;
7347 static bool
7348 sh_strict_argument_naming (CUMULATIVE_ARGS *ca ATTRIBUTE_UNUSED)
7350 return TARGET_SH5;
7353 static bool
7354 sh_pretend_outgoing_varargs_named (CUMULATIVE_ARGS *ca)
7356 return ! (TARGET_HITACHI || ca->renesas_abi) && ! TARGET_SH5;
7360 /* Define the offset between two registers, one to be eliminated, and
7361 the other its replacement, at the start of a routine. */
7364 initial_elimination_offset (int from, int to)
7366 int regs_saved;
7367 int regs_saved_rounding = 0;
7368 int total_saved_regs_space;
7369 int total_auto_space;
7370 int save_flags = target_flags;
7371 int copy_flags;
7372 HARD_REG_SET live_regs_mask;
7374 shmedia_space_reserved_for_target_registers = false;
7375 regs_saved = calc_live_regs (&live_regs_mask);
7376 regs_saved += SHMEDIA_REGS_STACK_ADJUST ();
7378 if (shmedia_reserve_space_for_target_registers_p (regs_saved, &live_regs_mask))
7380 shmedia_space_reserved_for_target_registers = true;
7381 regs_saved += shmedia_target_regs_stack_adjust (&live_regs_mask);
7384 if (TARGET_SH5 && regs_saved % (STACK_BOUNDARY / BITS_PER_UNIT))
7385 regs_saved_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
7386 - regs_saved % (STACK_BOUNDARY / BITS_PER_UNIT));
7388 total_auto_space = rounded_frame_size (regs_saved) - regs_saved_rounding;
7389 copy_flags = target_flags;
7390 target_flags = save_flags;
7392 total_saved_regs_space = regs_saved + regs_saved_rounding;
7394 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
7395 return total_saved_regs_space + total_auto_space
7396 + current_function_args_info.byref_regs * 8;
7398 if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
7399 return total_saved_regs_space + total_auto_space
7400 + current_function_args_info.byref_regs * 8;
7402 /* Initial gap between fp and sp is 0. */
7403 if (from == HARD_FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
7404 return 0;
7406 if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
7407 return rounded_frame_size (0);
7409 if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
7410 return rounded_frame_size (0);
7412 gcc_assert (from == RETURN_ADDRESS_POINTER_REGNUM
7413 && (to == HARD_FRAME_POINTER_REGNUM
7414 || to == STACK_POINTER_REGNUM));
7415 if (TARGET_SH5)
7417 int n = total_saved_regs_space;
7418 int pr_reg = TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG;
7419 save_schedule schedule;
7420 save_entry *entry;
7422 n += total_auto_space;
7424 /* If it wasn't saved, there's not much we can do. */
7425 if (! TEST_HARD_REG_BIT (live_regs_mask, pr_reg))
7426 return n;
7428 target_flags = copy_flags;
7430 sh5_schedule_saves (&live_regs_mask, &schedule, n);
7431 for (entry = &schedule.entries[1]; entry->mode != VOIDmode; entry++)
7432 if (entry->reg == pr_reg)
7434 target_flags = save_flags;
7435 return entry->offset;
7437 gcc_unreachable ();
7439 else
7440 return total_auto_space;
7443 /* Handle machine specific pragmas to be semi-compatible with Renesas
7444 compiler. */
7446 void
7447 sh_pr_interrupt (struct cpp_reader *pfile ATTRIBUTE_UNUSED)
7449 pragma_interrupt = 1;
7452 void
7453 sh_pr_trapa (struct cpp_reader *pfile ATTRIBUTE_UNUSED)
7455 pragma_interrupt = pragma_trapa = 1;
7458 void
7459 sh_pr_nosave_low_regs (struct cpp_reader *pfile ATTRIBUTE_UNUSED)
7461 pragma_nosave_low_regs = 1;
7464 /* Generate 'handle_interrupt' attribute for decls */
7466 static void
7467 sh_insert_attributes (tree node, tree *attributes)
7469 if (! pragma_interrupt
7470 || TREE_CODE (node) != FUNCTION_DECL)
7471 return;
7473 /* We are only interested in fields. */
7474 if (!DECL_P (node))
7475 return;
7477 /* Add a 'handle_interrupt' attribute. */
7478 * attributes = tree_cons (get_identifier ("interrupt_handler"), NULL, * attributes);
7480 return;
7483 /* Supported attributes:
7485 interrupt_handler -- specifies this function is an interrupt handler.
7487 sp_switch -- specifies an alternate stack for an interrupt handler
7488 to run on.
7490 trap_exit -- use a trapa to exit an interrupt function instead of
7491 an rte instruction.
7493 renesas -- use Renesas calling/layout conventions (functions and
7494 structures).
7498 const struct attribute_spec sh_attribute_table[] =
7500 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
7501 { "interrupt_handler", 0, 0, true, false, false, sh_handle_interrupt_handler_attribute },
7502 { "sp_switch", 1, 1, true, false, false, sh_handle_sp_switch_attribute },
7503 { "trap_exit", 1, 1, true, false, false, sh_handle_trap_exit_attribute },
7504 { "renesas", 0, 0, false, true, false, sh_handle_renesas_attribute },
7505 #ifdef SYMBIAN
7506 /* Symbian support adds three new attributes:
7507 dllexport - for exporting a function/variable that will live in a dll
7508 dllimport - for importing a function/variable from a dll
7510 Microsoft allows multiple declspecs in one __declspec, separating
7511 them with spaces. We do NOT support this. Instead, use __declspec
7512 multiple times. */
7513 { "dllimport", 0, 0, true, false, false, sh_symbian_handle_dll_attribute },
7514 { "dllexport", 0, 0, true, false, false, sh_symbian_handle_dll_attribute },
7515 #endif
7516 { NULL, 0, 0, false, false, false, NULL }
7519 /* Handle an "interrupt_handler" attribute; arguments as in
7520 struct attribute_spec.handler. */
7521 static tree
7522 sh_handle_interrupt_handler_attribute (tree *node, tree name,
7523 tree args ATTRIBUTE_UNUSED,
7524 int flags ATTRIBUTE_UNUSED,
7525 bool *no_add_attrs)
7527 if (TREE_CODE (*node) != FUNCTION_DECL)
7529 warning (OPT_Wattributes, "%qs attribute only applies to functions",
7530 IDENTIFIER_POINTER (name));
7531 *no_add_attrs = true;
7533 else if (TARGET_SHCOMPACT)
7535 error ("attribute interrupt_handler is not compatible with -m5-compact");
7536 *no_add_attrs = true;
7539 return NULL_TREE;
7542 /* Handle an "sp_switch" attribute; arguments as in
7543 struct attribute_spec.handler. */
7544 static tree
7545 sh_handle_sp_switch_attribute (tree *node, tree name, tree args,
7546 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
7548 if (TREE_CODE (*node) != FUNCTION_DECL)
7550 warning (OPT_Wattributes, "%qs attribute only applies to functions",
7551 IDENTIFIER_POINTER (name));
7552 *no_add_attrs = true;
7554 else if (!pragma_interrupt)
7556 /* The sp_switch attribute only has meaning for interrupt functions. */
7557 warning (OPT_Wattributes, "%qs attribute only applies to "
7558 "interrupt functions", IDENTIFIER_POINTER (name));
7559 *no_add_attrs = true;
7561 else if (TREE_CODE (TREE_VALUE (args)) != STRING_CST)
7563 /* The argument must be a constant string. */
7564 warning (OPT_Wattributes, "%qs attribute argument not a string constant",
7565 IDENTIFIER_POINTER (name));
7566 *no_add_attrs = true;
7568 else
7570 const char *s = ggc_strdup (TREE_STRING_POINTER (TREE_VALUE (args)));
7571 sp_switch = gen_rtx_SYMBOL_REF (VOIDmode, s);
7574 return NULL_TREE;
7577 /* Handle an "trap_exit" attribute; arguments as in
7578 struct attribute_spec.handler. */
7579 static tree
7580 sh_handle_trap_exit_attribute (tree *node, tree name, tree args,
7581 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
7583 if (TREE_CODE (*node) != FUNCTION_DECL)
7585 warning (OPT_Wattributes, "%qs attribute only applies to functions",
7586 IDENTIFIER_POINTER (name));
7587 *no_add_attrs = true;
7589 else if (!pragma_interrupt)
7591 /* The trap_exit attribute only has meaning for interrupt functions. */
7592 warning (OPT_Wattributes, "%qs attribute only applies to "
7593 "interrupt functions", IDENTIFIER_POINTER (name));
7594 *no_add_attrs = true;
7596 else if (TREE_CODE (TREE_VALUE (args)) != INTEGER_CST)
7598 /* The argument must be a constant integer. */
7599 warning (OPT_Wattributes, "%qs attribute argument not an "
7600 "integer constant", IDENTIFIER_POINTER (name));
7601 *no_add_attrs = true;
7603 else
7605 trap_exit = TREE_INT_CST_LOW (TREE_VALUE (args));
7608 return NULL_TREE;
7611 static tree
7612 sh_handle_renesas_attribute (tree *node ATTRIBUTE_UNUSED,
7613 tree name ATTRIBUTE_UNUSED,
7614 tree args ATTRIBUTE_UNUSED,
7615 int flags ATTRIBUTE_UNUSED,
7616 bool *no_add_attrs ATTRIBUTE_UNUSED)
7618 return NULL_TREE;
7621 /* True if __attribute__((renesas)) or -mrenesas. */
7623 sh_attr_renesas_p (tree td)
7625 if (TARGET_HITACHI)
7626 return 1;
7627 if (td == 0)
7628 return 0;
7629 if (DECL_P (td))
7630 td = TREE_TYPE (td);
7631 if (td == error_mark_node)
7632 return 0;
7633 return (lookup_attribute ("renesas", TYPE_ATTRIBUTES (td))
7634 != NULL_TREE);
7637 /* True if __attribute__((renesas)) or -mrenesas, for the current
7638 function. */
7640 sh_cfun_attr_renesas_p (void)
7642 return sh_attr_renesas_p (current_function_decl);
7646 sh_cfun_interrupt_handler_p (void)
7648 return (lookup_attribute ("interrupt_handler",
7649 DECL_ATTRIBUTES (current_function_decl))
7650 != NULL_TREE);
7653 /* Implement TARGET_CHECK_PCH_TARGET_FLAGS. */
7655 static const char *
7656 sh_check_pch_target_flags (int old_flags)
7658 if ((old_flags ^ target_flags) & (MASK_SH1 | MASK_SH2 | MASK_SH3
7659 | MASK_SH_E | MASK_HARD_SH4
7660 | MASK_FPU_SINGLE | MASK_SH4))
7661 return _("created and used with different architectures / ABIs");
7662 if ((old_flags ^ target_flags) & MASK_HITACHI)
7663 return _("created and used with different ABIs");
7664 if ((old_flags ^ target_flags) & MASK_LITTLE_ENDIAN)
7665 return _("created and used with different endianness");
7666 return NULL;
7669 /* Predicates used by the templates. */
7671 /* Returns 1 if OP is MACL, MACH or PR. The input must be a REG rtx.
7672 Used only in general_movsrc_operand. */
7675 system_reg_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
7677 switch (REGNO (op))
7679 case PR_REG:
7680 case MACL_REG:
7681 case MACH_REG:
7682 return 1;
7684 return 0;
7687 /* Nonzero if OP is a floating point value with value 0.0. */
7690 fp_zero_operand (rtx op)
7692 REAL_VALUE_TYPE r;
7694 if (GET_MODE (op) != SFmode)
7695 return 0;
7697 REAL_VALUE_FROM_CONST_DOUBLE (r, op);
7698 return REAL_VALUES_EQUAL (r, dconst0) && ! REAL_VALUE_MINUS_ZERO (r);
7701 /* Nonzero if OP is a floating point value with value 1.0. */
7704 fp_one_operand (rtx op)
7706 REAL_VALUE_TYPE r;
7708 if (GET_MODE (op) != SFmode)
7709 return 0;
7711 REAL_VALUE_FROM_CONST_DOUBLE (r, op);
7712 return REAL_VALUES_EQUAL (r, dconst1);
7715 /* For -m4 and -m4-single-only, mode switching is used. If we are
7716 compiling without -mfmovd, movsf_ie isn't taken into account for
7717 mode switching. We could check in machine_dependent_reorg for
7718 cases where we know we are in single precision mode, but there is
7719 interface to find that out during reload, so we must avoid
7720 choosing an fldi alternative during reload and thus failing to
7721 allocate a scratch register for the constant loading. */
7723 fldi_ok (void)
7725 return ! TARGET_SH4 || TARGET_FMOVD || reload_completed;
7729 tertiary_reload_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
7731 enum rtx_code code = GET_CODE (op);
7732 return code == MEM || (TARGET_SH4 && code == CONST_DOUBLE);
7735 /* Return the TLS type for TLS symbols, 0 for otherwise. */
7737 tls_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
7739 if (GET_CODE (op) != SYMBOL_REF)
7740 return 0;
7741 return SYMBOL_REF_TLS_MODEL (op);
7744 /* Return the destination address of a branch. */
7746 static int
7747 branch_dest (rtx branch)
7749 rtx dest = SET_SRC (PATTERN (branch));
7750 int dest_uid;
7752 if (GET_CODE (dest) == IF_THEN_ELSE)
7753 dest = XEXP (dest, 1);
7754 dest = XEXP (dest, 0);
7755 dest_uid = INSN_UID (dest);
7756 return INSN_ADDRESSES (dest_uid);
7759 /* Return nonzero if REG is not used after INSN.
7760 We assume REG is a reload reg, and therefore does
7761 not live past labels. It may live past calls or jumps though. */
7763 reg_unused_after (rtx reg, rtx insn)
7765 enum rtx_code code;
7766 rtx set;
7768 /* If the reg is set by this instruction, then it is safe for our
7769 case. Disregard the case where this is a store to memory, since
7770 we are checking a register used in the store address. */
7771 set = single_set (insn);
7772 if (set && GET_CODE (SET_DEST (set)) != MEM
7773 && reg_overlap_mentioned_p (reg, SET_DEST (set)))
7774 return 1;
7776 while ((insn = NEXT_INSN (insn)))
7778 rtx set;
7779 if (!INSN_P (insn))
7780 continue;
7782 code = GET_CODE (insn);
7784 #if 0
7785 /* If this is a label that existed before reload, then the register
7786 if dead here. However, if this is a label added by reorg, then
7787 the register may still be live here. We can't tell the difference,
7788 so we just ignore labels completely. */
7789 if (code == CODE_LABEL)
7790 return 1;
7791 /* else */
7792 #endif
7794 if (code == JUMP_INSN)
7795 return 0;
7797 /* If this is a sequence, we must handle them all at once.
7798 We could have for instance a call that sets the target register,
7799 and an insn in a delay slot that uses the register. In this case,
7800 we must return 0. */
7801 else if (code == INSN && GET_CODE (PATTERN (insn)) == SEQUENCE)
7803 int i;
7804 int retval = 0;
7806 for (i = 0; i < XVECLEN (PATTERN (insn), 0); i++)
7808 rtx this_insn = XVECEXP (PATTERN (insn), 0, i);
7809 rtx set = single_set (this_insn);
7811 if (GET_CODE (this_insn) == CALL_INSN)
7812 code = CALL_INSN;
7813 else if (GET_CODE (this_insn) == JUMP_INSN)
7815 if (INSN_ANNULLED_BRANCH_P (this_insn))
7816 return 0;
7817 code = JUMP_INSN;
7820 if (set && reg_overlap_mentioned_p (reg, SET_SRC (set)))
7821 return 0;
7822 if (set && reg_overlap_mentioned_p (reg, SET_DEST (set)))
7824 if (GET_CODE (SET_DEST (set)) != MEM)
7825 retval = 1;
7826 else
7827 return 0;
7829 if (set == 0
7830 && reg_overlap_mentioned_p (reg, PATTERN (this_insn)))
7831 return 0;
7833 if (retval == 1)
7834 return 1;
7835 else if (code == JUMP_INSN)
7836 return 0;
7839 set = single_set (insn);
7840 if (set && reg_overlap_mentioned_p (reg, SET_SRC (set)))
7841 return 0;
7842 if (set && reg_overlap_mentioned_p (reg, SET_DEST (set)))
7843 return GET_CODE (SET_DEST (set)) != MEM;
7844 if (set == 0 && reg_overlap_mentioned_p (reg, PATTERN (insn)))
7845 return 0;
7847 if (code == CALL_INSN && call_really_used_regs[REGNO (reg)])
7848 return 1;
7850 return 1;
7853 #include "ggc.h"
7855 static GTY(()) rtx fpscr_rtx;
7857 get_fpscr_rtx (void)
7859 if (! fpscr_rtx)
7861 fpscr_rtx = gen_rtx_REG (PSImode, FPSCR_REG);
7862 REG_USERVAR_P (fpscr_rtx) = 1;
7863 mark_user_reg (fpscr_rtx);
7865 if (! reload_completed || mdep_reorg_phase != SH_AFTER_MDEP_REORG)
7866 mark_user_reg (fpscr_rtx);
7867 return fpscr_rtx;
7870 static GTY(()) tree fpscr_values;
7872 static void
7873 emit_fpu_switch (rtx scratch, int index)
7875 rtx dst, src;
7877 if (fpscr_values == NULL)
7879 tree t;
7881 t = build_index_type (integer_one_node);
7882 t = build_array_type (integer_type_node, t);
7883 t = build_decl (VAR_DECL, get_identifier ("__fpscr_values"), t);
7884 DECL_ARTIFICIAL (t) = 1;
7885 DECL_IGNORED_P (t) = 1;
7886 DECL_EXTERNAL (t) = 1;
7887 TREE_STATIC (t) = 1;
7888 TREE_PUBLIC (t) = 1;
7889 TREE_USED (t) = 1;
7891 fpscr_values = t;
7894 src = DECL_RTL (fpscr_values);
7895 if (no_new_pseudos)
7897 emit_move_insn (scratch, XEXP (src, 0));
7898 if (index != 0)
7899 emit_insn (gen_addsi3 (scratch, scratch, GEN_INT (index * 4)));
7900 src = adjust_automodify_address (src, PSImode, scratch, index * 4);
7902 else
7903 src = adjust_address (src, PSImode, index * 4);
7905 dst = get_fpscr_rtx ();
7906 emit_move_insn (dst, src);
7909 void
7910 emit_sf_insn (rtx pat)
7912 emit_insn (pat);
7915 void
7916 emit_df_insn (rtx pat)
7918 emit_insn (pat);
7921 void
7922 expand_sf_unop (rtx (*fun) (rtx, rtx, rtx), rtx *operands)
7924 emit_sf_insn ((*fun) (operands[0], operands[1], get_fpscr_rtx ()));
7927 void
7928 expand_sf_binop (rtx (*fun) (rtx, rtx, rtx, rtx), rtx *operands)
7930 emit_sf_insn ((*fun) (operands[0], operands[1], operands[2],
7931 get_fpscr_rtx ()));
7934 void
7935 expand_df_unop (rtx (*fun) (rtx, rtx, rtx), rtx *operands)
7937 emit_df_insn ((*fun) (operands[0], operands[1], get_fpscr_rtx ()));
7940 void
7941 expand_df_binop (rtx (*fun) (rtx, rtx, rtx, rtx), rtx *operands)
7943 emit_df_insn ((*fun) (operands[0], operands[1], operands[2],
7944 get_fpscr_rtx ()));
7947 /* ??? gcc does flow analysis strictly after common subexpression
7948 elimination. As a result, common subexpression elimination fails
7949 when there are some intervening statements setting the same register.
7950 If we did nothing about this, this would hurt the precision switching
7951 for SH4 badly. There is some cse after reload, but it is unable to
7952 undo the extra register pressure from the unused instructions, and
7953 it cannot remove auto-increment loads.
7955 A C code example that shows this flow/cse weakness for (at least) SH
7956 and sparc (as of gcc ss-970706) is this:
7958 double
7959 f(double a)
7961 double d;
7962 d = 0.1;
7963 a += d;
7964 d = 1.1;
7965 d = 0.1;
7966 a *= d;
7967 return a;
7970 So we add another pass before common subexpression elimination, to
7971 remove assignments that are dead due to a following assignment in the
7972 same basic block. */
7974 static void
7975 mark_use (rtx x, rtx *reg_set_block)
7977 enum rtx_code code;
7979 if (! x)
7980 return;
7981 code = GET_CODE (x);
7982 switch (code)
7984 case REG:
7986 int regno = REGNO (x);
7987 int nregs = (regno < FIRST_PSEUDO_REGISTER
7988 ? HARD_REGNO_NREGS (regno, GET_MODE (x))
7989 : 1);
7992 reg_set_block[regno + nregs - 1] = 0;
7994 while (--nregs);
7995 break;
7997 case SET:
7999 rtx dest = SET_DEST (x);
8001 if (GET_CODE (dest) == SUBREG)
8002 dest = SUBREG_REG (dest);
8003 if (GET_CODE (dest) != REG)
8004 mark_use (dest, reg_set_block);
8005 mark_use (SET_SRC (x), reg_set_block);
8006 break;
8008 case CLOBBER:
8009 break;
8010 default:
8012 const char *fmt = GET_RTX_FORMAT (code);
8013 int i, j;
8014 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
8016 if (fmt[i] == 'e')
8017 mark_use (XEXP (x, i), reg_set_block);
8018 else if (fmt[i] == 'E')
8019 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
8020 mark_use (XVECEXP (x, i, j), reg_set_block);
8022 break;
8027 static rtx get_free_reg (HARD_REG_SET);
8029 /* This function returns a register to use to load the address to load
8030 the fpscr from. Currently it always returns r1 or r7, but when we are
8031 able to use pseudo registers after combine, or have a better mechanism
8032 for choosing a register, it should be done here. */
8033 /* REGS_LIVE is the liveness information for the point for which we
8034 need this allocation. In some bare-bones exit blocks, r1 is live at the
8035 start. We can even have all of r0..r3 being live:
8036 __complex__ long long f (double d) { if (d == 0) return 2; else return 3; }
8037 INSN before which new insns are placed with will clobber the register
8038 we return. If a basic block consists only of setting the return value
8039 register to a pseudo and using that register, the return value is not
8040 live before or after this block, yet we we'll insert our insns right in
8041 the middle. */
8043 static rtx
8044 get_free_reg (HARD_REG_SET regs_live)
8046 if (! TEST_HARD_REG_BIT (regs_live, 1))
8047 return gen_rtx_REG (Pmode, 1);
8049 /* Hard reg 1 is live; since this is a SMALL_REGISTER_CLASSES target,
8050 there shouldn't be anything but a jump before the function end. */
8051 gcc_assert (!TEST_HARD_REG_BIT (regs_live, 7));
8052 return gen_rtx_REG (Pmode, 7);
8055 /* This function will set the fpscr from memory.
8056 MODE is the mode we are setting it to. */
8057 void
8058 fpscr_set_from_mem (int mode, HARD_REG_SET regs_live)
8060 enum attr_fp_mode fp_mode = mode;
8061 enum attr_fp_mode norm_mode = ACTUAL_NORMAL_MODE (FP_MODE);
8062 rtx addr_reg = get_free_reg (regs_live);
8064 emit_fpu_switch (addr_reg, fp_mode == norm_mode);
8067 /* Is the given character a logical line separator for the assembler? */
8068 #ifndef IS_ASM_LOGICAL_LINE_SEPARATOR
8069 #define IS_ASM_LOGICAL_LINE_SEPARATOR(C) ((C) == ';')
8070 #endif
8073 sh_insn_length_adjustment (rtx insn)
8075 /* Instructions with unfilled delay slots take up an extra two bytes for
8076 the nop in the delay slot. */
8077 if (((GET_CODE (insn) == INSN
8078 && GET_CODE (PATTERN (insn)) != USE
8079 && GET_CODE (PATTERN (insn)) != CLOBBER)
8080 || GET_CODE (insn) == CALL_INSN
8081 || (GET_CODE (insn) == JUMP_INSN
8082 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC
8083 && GET_CODE (PATTERN (insn)) != ADDR_VEC))
8084 && GET_CODE (PATTERN (NEXT_INSN (PREV_INSN (insn)))) != SEQUENCE
8085 && get_attr_needs_delay_slot (insn) == NEEDS_DELAY_SLOT_YES)
8086 return 2;
8088 /* SH2e has a bug that prevents the use of annulled branches, so if
8089 the delay slot is not filled, we'll have to put a NOP in it. */
8090 if (sh_cpu == CPU_SH2E
8091 && GET_CODE (insn) == JUMP_INSN
8092 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC
8093 && GET_CODE (PATTERN (insn)) != ADDR_VEC
8094 && get_attr_type (insn) == TYPE_CBRANCH
8095 && GET_CODE (PATTERN (NEXT_INSN (PREV_INSN (insn)))) != SEQUENCE)
8096 return 2;
8098 /* sh-dsp parallel processing insn take four bytes instead of two. */
8100 if (GET_CODE (insn) == INSN)
8102 int sum = 0;
8103 rtx body = PATTERN (insn);
8104 const char *template;
8105 char c;
8106 int maybe_label = 1;
8108 if (GET_CODE (body) == ASM_INPUT)
8109 template = XSTR (body, 0);
8110 else if (asm_noperands (body) >= 0)
8111 template
8112 = decode_asm_operands (body, NULL, NULL, NULL, NULL);
8113 else
8114 return 0;
8117 int ppi_adjust = 0;
8120 c = *template++;
8121 while (c == ' ' || c == '\t');
8122 /* all sh-dsp parallel-processing insns start with p.
8123 The only non-ppi sh insn starting with p is pref.
8124 The only ppi starting with pr is prnd. */
8125 if ((c == 'p' || c == 'P') && strncasecmp ("re", template, 2))
8126 ppi_adjust = 2;
8127 /* The repeat pseudo-insn expands two three insns, a total of
8128 six bytes in size. */
8129 else if ((c == 'r' || c == 'R')
8130 && ! strncasecmp ("epeat", template, 5))
8131 ppi_adjust = 4;
8132 while (c && c != '\n' && ! IS_ASM_LOGICAL_LINE_SEPARATOR (c))
8134 /* If this is a label, it is obviously not a ppi insn. */
8135 if (c == ':' && maybe_label)
8137 ppi_adjust = 0;
8138 break;
8140 else if (c == '\'' || c == '"')
8141 maybe_label = 0;
8142 c = *template++;
8144 sum += ppi_adjust;
8145 maybe_label = c != ':';
8147 while (c);
8148 return sum;
8150 return 0;
8153 /* Return TRUE if X references a SYMBOL_REF or LABEL_REF whose symbol
8154 isn't protected by a PIC unspec. */
8156 nonpic_symbol_mentioned_p (rtx x)
8158 register const char *fmt;
8159 register int i;
8161 if (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF
8162 || GET_CODE (x) == PC)
8163 return 1;
8165 /* We don't want to look into the possible MEM location of a
8166 CONST_DOUBLE, since we're not going to use it, in general. */
8167 if (GET_CODE (x) == CONST_DOUBLE)
8168 return 0;
8170 if (GET_CODE (x) == UNSPEC
8171 && (XINT (x, 1) == UNSPEC_PIC
8172 || XINT (x, 1) == UNSPEC_GOT
8173 || XINT (x, 1) == UNSPEC_GOTOFF
8174 || XINT (x, 1) == UNSPEC_GOTPLT
8175 || XINT (x, 1) == UNSPEC_GOTTPOFF
8176 || XINT (x, 1) == UNSPEC_DTPOFF
8177 || XINT (x, 1) == UNSPEC_PLT))
8178 return 0;
8180 fmt = GET_RTX_FORMAT (GET_CODE (x));
8181 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
8183 if (fmt[i] == 'E')
8185 register int j;
8187 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
8188 if (nonpic_symbol_mentioned_p (XVECEXP (x, i, j)))
8189 return 1;
8191 else if (fmt[i] == 'e' && nonpic_symbol_mentioned_p (XEXP (x, i)))
8192 return 1;
8195 return 0;
8198 /* Convert a non-PIC address in `orig' to a PIC address using @GOT or
8199 @GOTOFF in `reg'. */
8201 legitimize_pic_address (rtx orig, enum machine_mode mode ATTRIBUTE_UNUSED,
8202 rtx reg)
8204 if (tls_symbolic_operand (orig, Pmode))
8205 return orig;
8207 if (GET_CODE (orig) == LABEL_REF
8208 || (GET_CODE (orig) == SYMBOL_REF && SYMBOL_REF_LOCAL_P (orig)))
8210 if (reg == 0)
8211 reg = gen_reg_rtx (Pmode);
8213 emit_insn (gen_symGOTOFF2reg (reg, orig));
8214 return reg;
8216 else if (GET_CODE (orig) == SYMBOL_REF)
8218 if (reg == 0)
8219 reg = gen_reg_rtx (Pmode);
8221 emit_insn (gen_symGOT2reg (reg, orig));
8222 return reg;
8224 return orig;
8227 /* Mark the use of a constant in the literal table. If the constant
8228 has multiple labels, make it unique. */
8229 static rtx
8230 mark_constant_pool_use (rtx x)
8232 rtx insn, lab, pattern;
8234 if (x == NULL)
8235 return x;
8237 switch (GET_CODE (x))
8239 case LABEL_REF:
8240 x = XEXP (x, 0);
8241 case CODE_LABEL:
8242 break;
8243 default:
8244 return x;
8247 /* Get the first label in the list of labels for the same constant
8248 and delete another labels in the list. */
8249 lab = x;
8250 for (insn = PREV_INSN (x); insn; insn = PREV_INSN (insn))
8252 if (GET_CODE (insn) != CODE_LABEL
8253 || LABEL_REFS (insn) != NEXT_INSN (insn))
8254 break;
8255 lab = insn;
8258 for (insn = LABEL_REFS (lab); insn; insn = LABEL_REFS (insn))
8259 INSN_DELETED_P (insn) = 1;
8261 /* Mark constants in a window. */
8262 for (insn = NEXT_INSN (x); insn; insn = NEXT_INSN (insn))
8264 if (GET_CODE (insn) != INSN)
8265 continue;
8267 pattern = PATTERN (insn);
8268 if (GET_CODE (pattern) != UNSPEC_VOLATILE)
8269 continue;
8271 switch (XINT (pattern, 1))
8273 case UNSPECV_CONST2:
8274 case UNSPECV_CONST4:
8275 case UNSPECV_CONST8:
8276 XVECEXP (pattern, 0, 1) = const1_rtx;
8277 break;
8278 case UNSPECV_WINDOW_END:
8279 if (XVECEXP (pattern, 0, 0) == x)
8280 return lab;
8281 break;
8282 case UNSPECV_CONST_END:
8283 return lab;
8284 default:
8285 break;
8289 return lab;
8292 /* Return true if it's possible to redirect BRANCH1 to the destination
8293 of an unconditional jump BRANCH2. We only want to do this if the
8294 resulting branch will have a short displacement. */
8296 sh_can_redirect_branch (rtx branch1, rtx branch2)
8298 if (flag_expensive_optimizations && simplejump_p (branch2))
8300 rtx dest = XEXP (SET_SRC (single_set (branch2)), 0);
8301 rtx insn;
8302 int distance;
8304 for (distance = 0, insn = NEXT_INSN (branch1);
8305 insn && distance < 256;
8306 insn = PREV_INSN (insn))
8308 if (insn == dest)
8309 return 1;
8310 else
8311 distance += get_attr_length (insn);
8313 for (distance = 0, insn = NEXT_INSN (branch1);
8314 insn && distance < 256;
8315 insn = NEXT_INSN (insn))
8317 if (insn == dest)
8318 return 1;
8319 else
8320 distance += get_attr_length (insn);
8323 return 0;
8326 /* Return nonzero if register old_reg can be renamed to register new_reg. */
8328 sh_hard_regno_rename_ok (unsigned int old_reg ATTRIBUTE_UNUSED,
8329 unsigned int new_reg)
8331 /* Interrupt functions can only use registers that have already been
8332 saved by the prologue, even if they would normally be
8333 call-clobbered. */
8335 if (sh_cfun_interrupt_handler_p () && !regs_ever_live[new_reg])
8336 return 0;
8338 return 1;
8341 /* Function to update the integer COST
8342 based on the relationship between INSN that is dependent on
8343 DEP_INSN through the dependence LINK. The default is to make no
8344 adjustment to COST. This can be used for example to specify to
8345 the scheduler that an output- or anti-dependence does not incur
8346 the same cost as a data-dependence. The return value should be
8347 the new value for COST. */
8348 static int
8349 sh_adjust_cost (rtx insn, rtx link ATTRIBUTE_UNUSED, rtx dep_insn, int cost)
8351 rtx reg, use_pat;
8353 if (TARGET_SHMEDIA)
8355 /* On SHmedia, if the dependence is an anti-dependence or
8356 output-dependence, there is no cost. */
8357 if (REG_NOTE_KIND (link) != 0)
8359 /* However, dependencies between target register loads and
8360 uses of the register in a subsequent block that are separated
8361 by a conditional branch are not modelled - we have to do with
8362 the anti-dependency between the target register load and the
8363 conditional branch that ends the current block. */
8364 if (REG_NOTE_KIND (link) == REG_DEP_ANTI
8365 && GET_CODE (PATTERN (dep_insn)) == SET
8366 && (get_attr_type (dep_insn) == TYPE_PT_MEDIA
8367 || get_attr_type (dep_insn) == TYPE_PTABS_MEDIA)
8368 && get_attr_type (insn) == TYPE_CBRANCH_MEDIA)
8370 int orig_cost = cost;
8371 rtx note = find_reg_note (insn, REG_BR_PROB, 0);
8372 rtx target = ((! note
8373 || INTVAL (XEXP (note, 0)) * 2 < REG_BR_PROB_BASE)
8374 ? insn : JUMP_LABEL (insn));
8375 /* On the likely path, the branch costs 1, on the unlikely path,
8376 it costs 3. */
8377 cost--;
8379 target = next_active_insn (target);
8380 while (target && ! flow_dependent_p (target, dep_insn)
8381 && --cost > 0);
8382 /* If two branches are executed in immediate succession, with the
8383 first branch properly predicted, this causes a stall at the
8384 second branch, hence we won't need the target for the
8385 second branch for two cycles after the launch of the first
8386 branch. */
8387 if (cost > orig_cost - 2)
8388 cost = orig_cost - 2;
8390 else
8391 cost = 0;
8394 else if (get_attr_is_mac_media (insn)
8395 && get_attr_is_mac_media (dep_insn))
8396 cost = 1;
8398 else if (! reload_completed
8399 && GET_CODE (PATTERN (insn)) == SET
8400 && GET_CODE (SET_SRC (PATTERN (insn))) == FLOAT
8401 && GET_CODE (PATTERN (dep_insn)) == SET
8402 && fp_arith_reg_operand (SET_SRC (PATTERN (dep_insn)), VOIDmode)
8403 && cost < 4)
8404 cost = 4;
8405 /* Schedule the ptabs for a casesi_jump_media in preference to stuff
8406 that is needed at the target. */
8407 else if (get_attr_type (insn) == TYPE_JUMP_MEDIA
8408 && ! flow_dependent_p (insn, dep_insn))
8409 cost--;
8411 else if (REG_NOTE_KIND (link) == 0)
8413 enum attr_type dep_type, type;
8415 if (recog_memoized (insn) < 0
8416 || recog_memoized (dep_insn) < 0)
8417 return cost;
8419 dep_type = get_attr_type (dep_insn);
8420 if (dep_type == TYPE_FLOAD || dep_type == TYPE_PCFLOAD)
8421 cost--;
8422 if ((dep_type == TYPE_LOAD_SI || dep_type == TYPE_PCLOAD_SI)
8423 && (type = get_attr_type (insn)) != TYPE_CALL
8424 && type != TYPE_SFUNC)
8425 cost--;
8427 /* The only input for a call that is timing-critical is the
8428 function's address. */
8429 if (GET_CODE(insn) == CALL_INSN)
8431 rtx call = PATTERN (insn);
8433 if (GET_CODE (call) == PARALLEL)
8434 call = XVECEXP (call, 0 ,0);
8435 if (GET_CODE (call) == SET)
8436 call = SET_SRC (call);
8437 if (GET_CODE (call) == CALL && GET_CODE (XEXP (call, 0)) == MEM
8438 /* sibcalli_thunk uses a symbol_ref in an unspec. */
8439 && (GET_CODE (XEXP (XEXP (call, 0), 0)) == UNSPEC
8440 || ! reg_set_p (XEXP (XEXP (call, 0), 0), dep_insn)))
8441 cost = 0;
8443 /* Likewise, the most timing critical input for an sfuncs call
8444 is the function address. However, sfuncs typically start
8445 using their arguments pretty quickly.
8446 Assume a four cycle delay before they are needed. */
8447 /* All sfunc calls are parallels with at least four components.
8448 Exploit this to avoid unnecessary calls to sfunc_uses_reg. */
8449 else if (GET_CODE (PATTERN (insn)) == PARALLEL
8450 && XVECLEN (PATTERN (insn), 0) >= 4
8451 && (reg = sfunc_uses_reg (insn)))
8453 if (! reg_set_p (reg, dep_insn))
8454 cost -= 4;
8456 /* When the preceding instruction loads the shift amount of
8457 the following SHAD/SHLD, the latency of the load is increased
8458 by 1 cycle. */
8459 else if (TARGET_SH4
8460 && get_attr_type (insn) == TYPE_DYN_SHIFT
8461 && get_attr_any_int_load (dep_insn) == ANY_INT_LOAD_YES
8462 && reg_overlap_mentioned_p (SET_DEST (PATTERN (dep_insn)),
8463 XEXP (SET_SRC (single_set (insn)),
8464 1)))
8465 cost++;
8466 /* When an LS group instruction with a latency of less than
8467 3 cycles is followed by a double-precision floating-point
8468 instruction, FIPR, or FTRV, the latency of the first
8469 instruction is increased to 3 cycles. */
8470 else if (cost < 3
8471 && get_attr_insn_class (dep_insn) == INSN_CLASS_LS_GROUP
8472 && get_attr_dfp_comp (insn) == DFP_COMP_YES)
8473 cost = 3;
8474 /* The lsw register of a double-precision computation is ready one
8475 cycle earlier. */
8476 else if (reload_completed
8477 && get_attr_dfp_comp (dep_insn) == DFP_COMP_YES
8478 && (use_pat = single_set (insn))
8479 && ! regno_use_in (REGNO (SET_DEST (single_set (dep_insn))),
8480 SET_SRC (use_pat)))
8481 cost -= 1;
8483 if (get_attr_any_fp_comp (dep_insn) == ANY_FP_COMP_YES
8484 && get_attr_late_fp_use (insn) == LATE_FP_USE_YES)
8485 cost -= 1;
8487 /* An anti-dependence penalty of two applies if the first insn is a double
8488 precision fadd / fsub / fmul. */
8489 else if (REG_NOTE_KIND (link) == REG_DEP_ANTI
8490 && recog_memoized (dep_insn) >= 0
8491 && get_attr_type (dep_insn) == TYPE_DFP_ARITH
8492 /* A lot of alleged anti-flow dependences are fake,
8493 so check this one is real. */
8494 && flow_dependent_p (dep_insn, insn))
8495 cost = 2;
8498 return cost;
8501 /* Check if INSN is flow-dependent on DEP_INSN. Can also be used to check
8502 if DEP_INSN is anti-flow dependent on INSN. */
8503 static int
8504 flow_dependent_p (rtx insn, rtx dep_insn)
8506 rtx tmp = PATTERN (insn);
8508 note_stores (PATTERN (dep_insn), flow_dependent_p_1, &tmp);
8509 return tmp == NULL_RTX;
8512 /* A helper function for flow_dependent_p called through note_stores. */
8513 static void
8514 flow_dependent_p_1 (rtx x, rtx pat ATTRIBUTE_UNUSED, void *data)
8516 rtx * pinsn = (rtx *) data;
8518 if (*pinsn && reg_referenced_p (x, *pinsn))
8519 *pinsn = NULL_RTX;
8522 /* For use by sh_allocate_initial_value. Note that sh.md contains some
8523 'special function' patterns (type sfunc) that clobber pr, but that
8524 do not look like function calls to leaf_function_p. Hence we must
8525 do this extra check. */
8526 static int
8527 sh_pr_n_sets (void)
8529 return REG_N_SETS (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG);
8532 /* Return where to allocate pseudo for a given hard register initial
8533 value. */
8534 static rtx
8535 sh_allocate_initial_value (rtx hard_reg)
8537 rtx x;
8539 if (REGNO (hard_reg) == (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG))
8541 if (current_function_is_leaf
8542 && ! sh_pr_n_sets ()
8543 && ! (TARGET_SHCOMPACT
8544 && ((current_function_args_info.call_cookie
8545 & ~ CALL_COOKIE_RET_TRAMP (1))
8546 || current_function_has_nonlocal_label)))
8547 x = hard_reg;
8548 else
8549 x = gen_frame_mem (Pmode, return_address_pointer_rtx);
8551 else
8552 x = NULL_RTX;
8554 return x;
8557 /* This function returns "2" to indicate dual issue for the SH4
8558 processor. To be used by the DFA pipeline description. */
8559 static int
8560 sh_issue_rate (void)
8562 if (TARGET_SUPERSCALAR)
8563 return 2;
8564 else
8565 return 1;
8568 /* Functions for ready queue reordering for sched1. */
8570 /* Get weight for mode for a set x. */
8571 static short
8572 find_set_regmode_weight (rtx x, enum machine_mode mode)
8574 if (GET_CODE (x) == CLOBBER && register_operand (SET_DEST (x), mode))
8575 return 1;
8576 if (GET_CODE (x) == SET && register_operand (SET_DEST (x), mode))
8578 if (GET_CODE (SET_DEST (x)) == REG)
8580 if (!reg_mentioned_p (SET_DEST (x), SET_SRC (x)))
8581 return 1;
8582 else
8583 return 0;
8585 return 1;
8587 return 0;
8590 /* Get regmode weight for insn. */
8591 static short
8592 find_insn_regmode_weight (rtx insn, enum machine_mode mode)
8594 short reg_weight = 0;
8595 rtx x;
8597 /* Increment weight for each register born here. */
8598 x = PATTERN (insn);
8599 reg_weight += find_set_regmode_weight (x, mode);
8600 if (GET_CODE (x) == PARALLEL)
8602 int j;
8603 for (j = XVECLEN (x, 0) - 1; j >= 0; j--)
8605 x = XVECEXP (PATTERN (insn), 0, j);
8606 reg_weight += find_set_regmode_weight (x, mode);
8609 /* Decrement weight for each register that dies here. */
8610 for (x = REG_NOTES (insn); x; x = XEXP (x, 1))
8612 if (REG_NOTE_KIND (x) == REG_DEAD || REG_NOTE_KIND (x) == REG_UNUSED)
8614 rtx note = XEXP (x, 0);
8615 if (GET_CODE (note) == REG && GET_MODE (note) == mode)
8616 reg_weight--;
8619 return reg_weight;
8622 /* Calculate regmode weights for all insns of a basic block. */
8623 static void
8624 find_regmode_weight (int b, enum machine_mode mode)
8626 rtx insn, next_tail, head, tail;
8628 get_block_head_tail (b, &head, &tail);
8629 next_tail = NEXT_INSN (tail);
8631 for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
8633 /* Handle register life information. */
8634 if (!INSN_P (insn))
8635 continue;
8637 if (mode == SFmode)
8638 INSN_REGMODE_WEIGHT (insn, mode) =
8639 find_insn_regmode_weight (insn, mode) + 2 * find_insn_regmode_weight (insn, DFmode);
8640 else if (mode == SImode)
8641 INSN_REGMODE_WEIGHT (insn, mode) =
8642 find_insn_regmode_weight (insn, mode) + 2 * find_insn_regmode_weight (insn, DImode);
8646 /* Comparison function for ready queue sorting. */
8647 static int
8648 rank_for_reorder (const void *x, const void *y)
8650 rtx tmp = *(const rtx *) y;
8651 rtx tmp2 = *(const rtx *) x;
8653 /* The insn in a schedule group should be issued the first. */
8654 if (SCHED_GROUP_P (tmp) != SCHED_GROUP_P (tmp2))
8655 return SCHED_GROUP_P (tmp2) ? 1 : -1;
8657 /* If insns are equally good, sort by INSN_LUID (original insn order), This
8658 minimizes instruction movement, thus minimizing sched's effect on
8659 register pressure. */
8660 return INSN_LUID (tmp) - INSN_LUID (tmp2);
8663 /* Resort the array A in which only element at index N may be out of order. */
8664 static void
8665 swap_reorder (rtx *a, int n)
8667 rtx insn = a[n - 1];
8668 int i = n - 2;
8670 while (i >= 0 && rank_for_reorder (a + i, &insn) >= 0)
8672 a[i + 1] = a[i];
8673 i -= 1;
8675 a[i + 1] = insn;
8678 #define SCHED_REORDER(READY, N_READY) \
8679 do \
8681 if ((N_READY) == 2) \
8682 swap_reorder (READY, N_READY); \
8683 else if ((N_READY) > 2) \
8684 qsort (READY, N_READY, sizeof (rtx), rank_for_reorder); \
8686 while (0)
8688 /* Sort the ready list READY by ascending priority, using the SCHED_REORDER
8689 macro. */
8690 static void
8691 ready_reorder (rtx *ready, int nready)
8693 SCHED_REORDER (ready, nready);
8696 /* Calculate regmode weights for all insns of all basic block. */
8697 static void
8698 sh_md_init_global (FILE *dump ATTRIBUTE_UNUSED,
8699 int verbose ATTRIBUTE_UNUSED,
8700 int old_max_uid)
8702 basic_block b;
8704 regmode_weight[0] = (short *) xcalloc (old_max_uid, sizeof (short));
8705 regmode_weight[1] = (short *) xcalloc (old_max_uid, sizeof (short));
8707 FOR_EACH_BB_REVERSE (b)
8709 find_regmode_weight (b->index, SImode);
8710 find_regmode_weight (b->index, SFmode);
8713 CURR_REGMODE_PRESSURE (SImode) = 0;
8714 CURR_REGMODE_PRESSURE (SFmode) = 0;
8718 /* Cleanup. */
8719 static void
8720 sh_md_finish_global (FILE *dump ATTRIBUTE_UNUSED,
8721 int verbose ATTRIBUTE_UNUSED)
8723 if (regmode_weight[0])
8725 free (regmode_weight[0]);
8726 regmode_weight[0] = NULL;
8728 if (regmode_weight[1])
8730 free (regmode_weight[1]);
8731 regmode_weight[1] = NULL;
8735 /* Cache the can_issue_more so that we can return it from reorder2. Also,
8736 keep count of register pressures on SImode and SFmode. */
8737 static int
8738 sh_variable_issue (FILE *dump ATTRIBUTE_UNUSED,
8739 int sched_verbose ATTRIBUTE_UNUSED,
8740 rtx insn,
8741 int can_issue_more)
8743 if (GET_CODE (PATTERN (insn)) != USE
8744 && GET_CODE (PATTERN (insn)) != CLOBBER)
8745 cached_can_issue_more = can_issue_more - 1;
8746 else
8747 cached_can_issue_more = can_issue_more;
8749 if (reload_completed)
8750 return cached_can_issue_more;
8752 CURR_REGMODE_PRESSURE (SImode) += INSN_REGMODE_WEIGHT (insn, SImode);
8753 CURR_REGMODE_PRESSURE (SFmode) += INSN_REGMODE_WEIGHT (insn, SFmode);
8755 return cached_can_issue_more;
8758 static void
8759 sh_md_init (FILE *dump ATTRIBUTE_UNUSED,
8760 int verbose ATTRIBUTE_UNUSED,
8761 int veclen ATTRIBUTE_UNUSED)
8763 CURR_REGMODE_PRESSURE (SImode) = 0;
8764 CURR_REGMODE_PRESSURE (SFmode) = 0;
8767 /* Some magic numbers. */
8768 /* Pressure on register r0 can lead to spill failures. so avoid sched1 for
8769 functions that already have high pressure on r0. */
8770 #define R0_MAX_LIFE_REGIONS 2
8771 #define R0_MAX_LIVE_LENGTH 12
8772 /* Register Pressure thresholds for SImode and SFmode registers. */
8773 #define SIMODE_MAX_WEIGHT 5
8774 #define SFMODE_MAX_WEIGHT 10
8776 /* Return true if the pressure is high for MODE. */
8777 static short
8778 high_pressure (enum machine_mode mode)
8780 /* Pressure on register r0 can lead to spill failures. so avoid sched1 for
8781 functions that already have high pressure on r0. */
8782 if ((REG_N_SETS (0) - REG_N_DEATHS (0)) >= R0_MAX_LIFE_REGIONS
8783 && REG_LIVE_LENGTH (0) >= R0_MAX_LIVE_LENGTH)
8784 return 1;
8786 if (mode == SFmode)
8787 return (CURR_REGMODE_PRESSURE (SFmode) > SFMODE_MAX_WEIGHT);
8788 else
8789 return (CURR_REGMODE_PRESSURE (SImode) > SIMODE_MAX_WEIGHT);
8792 /* Reorder ready queue if register pressure is high. */
8793 static int
8794 sh_reorder (FILE *dump ATTRIBUTE_UNUSED,
8795 int sched_verbose ATTRIBUTE_UNUSED,
8796 rtx *ready,
8797 int *n_readyp,
8798 int clock_var ATTRIBUTE_UNUSED)
8800 if (reload_completed)
8801 return sh_issue_rate ();
8803 if (high_pressure (SFmode) || high_pressure (SImode))
8805 ready_reorder (ready, *n_readyp);
8808 return sh_issue_rate ();
8811 /* Skip cycles if the current register pressure is high. */
8812 static int
8813 sh_reorder2 (FILE *dump ATTRIBUTE_UNUSED,
8814 int sched_verbose ATTRIBUTE_UNUSED,
8815 rtx *ready ATTRIBUTE_UNUSED,
8816 int *n_readyp ATTRIBUTE_UNUSED,
8817 int clock_var ATTRIBUTE_UNUSED)
8819 if (reload_completed)
8820 return cached_can_issue_more;
8822 if (high_pressure(SFmode) || high_pressure (SImode))
8823 skip_cycles = 1;
8825 return cached_can_issue_more;
8828 /* Skip cycles without sorting the ready queue. This will move insn from
8829 Q->R. If this is the last cycle we are skipping; allow sorting of ready
8830 queue by sh_reorder. */
8832 /* Generally, skipping these many cycles are sufficient for all insns to move
8833 from Q -> R. */
8834 #define MAX_SKIPS 8
8836 static int
8837 sh_dfa_new_cycle (FILE *sched_dump ATTRIBUTE_UNUSED,
8838 int sched_verbose ATTRIBUTE_UNUSED,
8839 rtx insn ATTRIBUTE_UNUSED,
8840 int last_clock_var,
8841 int clock_var,
8842 int *sort_p)
8844 if (reload_completed)
8845 return 0;
8847 if (skip_cycles)
8849 if ((clock_var - last_clock_var) < MAX_SKIPS)
8851 *sort_p = 0;
8852 return 1;
8854 /* If this is the last cycle we are skipping, allow reordering of R. */
8855 if ((clock_var - last_clock_var) == MAX_SKIPS)
8857 *sort_p = 1;
8858 return 1;
8862 skip_cycles = 0;
8864 return 0;
8867 /* SHmedia requires registers for branches, so we can't generate new
8868 branches past reload. */
8869 static bool
8870 sh_cannot_modify_jumps_p (void)
8872 return (TARGET_SHMEDIA && (reload_in_progress || reload_completed));
8875 static int
8876 sh_target_reg_class (void)
8878 return TARGET_SHMEDIA ? TARGET_REGS : NO_REGS;
8881 static bool
8882 sh_optimize_target_register_callee_saved (bool after_prologue_epilogue_gen)
8884 HARD_REG_SET dummy;
8885 rtx insn;
8887 if (! shmedia_space_reserved_for_target_registers)
8888 return 0;
8889 if (after_prologue_epilogue_gen && ! TARGET_SAVE_ALL_TARGET_REGS)
8890 return 0;
8891 if (calc_live_regs (&dummy) >= 6 * 8)
8892 return 1;
8893 /* This is a borderline case. See if we got a nested loop, or a loop
8894 with a call, or with more than 4 labels inside. */
8895 for (insn = get_insns(); insn; insn = NEXT_INSN (insn))
8897 if (GET_CODE (insn) == NOTE
8898 && NOTE_LINE_NUMBER (insn) == NOTE_INSN_LOOP_BEG)
8900 int labels = 0;
8904 insn = NEXT_INSN (insn);
8905 if ((GET_CODE (insn) == NOTE
8906 && NOTE_LINE_NUMBER (insn) == NOTE_INSN_LOOP_BEG)
8907 || GET_CODE (insn) == CALL_INSN
8908 || (GET_CODE (insn) == CODE_LABEL && ++labels > 4))
8909 return 1;
8911 while (GET_CODE (insn) != NOTE
8912 || NOTE_LINE_NUMBER (insn) != NOTE_INSN_LOOP_END);
8915 return 0;
8918 static bool
8919 sh_ms_bitfield_layout_p (tree record_type ATTRIBUTE_UNUSED)
8921 return (TARGET_SH5 || TARGET_HITACHI || sh_attr_renesas_p (record_type));
8925 On the SH1..SH4, the trampoline looks like
8926 2 0002 D202 mov.l l2,r2
8927 1 0000 D301 mov.l l1,r3
8928 3 0004 422B jmp @r2
8929 4 0006 0009 nop
8930 5 0008 00000000 l1: .long area
8931 6 000c 00000000 l2: .long function
8933 SH5 (compact) uses r1 instead of r3 for the static chain. */
8936 /* Emit RTL insns to initialize the variable parts of a trampoline.
8937 FNADDR is an RTX for the address of the function's pure code.
8938 CXT is an RTX for the static chain value for the function. */
8940 void
8941 sh_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
8943 rtx tramp_mem = gen_frame_mem (BLKmode, tramp);
8945 if (TARGET_SHMEDIA64)
8947 rtx tramp_templ;
8948 int fixed_len;
8950 rtx movi1 = GEN_INT (0xcc000010);
8951 rtx shori1 = GEN_INT (0xc8000010);
8952 rtx src, dst;
8954 /* The following trampoline works within a +- 128 KB range for cxt:
8955 ptb/u cxt,tr1; movi fnaddr >> 48,r0; shori fnaddr >> 32,r0;
8956 shori fnaddr >> 16,r0; shori fnaddr,r0; ptabs/l r0,tr0
8957 gettr tr1,r1; blink tr0,r63 */
8958 /* Address rounding makes it hard to compute the exact bounds of the
8959 offset for this trampoline, but we have a rather generous offset
8960 range, so frame_offset should do fine as an upper bound. */
8961 if (cxt == virtual_stack_vars_rtx && frame_offset < 0x20000)
8963 /* ??? could optimize this trampoline initialization
8964 by writing DImode words with two insns each. */
8965 rtx mask = force_reg (DImode, GEN_INT (0x3fffc00));
8966 rtx insn = gen_rtx_MINUS (DImode, cxt, tramp);
8967 insn = gen_rtx_ASHIFT (DImode, insn, GEN_INT (10-2));
8968 insn = gen_rtx_AND (DImode, insn, mask);
8969 /* Or in ptb/u .,tr1 pattern */
8970 insn = gen_rtx_IOR (DImode, insn, gen_int_mode (0xec000010, SImode));
8971 insn = force_operand (insn, NULL_RTX);
8972 insn = gen_lowpart (SImode, insn);
8973 emit_move_insn (change_address (tramp_mem, SImode, NULL_RTX), insn);
8974 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (38));
8975 insn = gen_rtx_AND (DImode, insn, mask);
8976 insn = force_operand (gen_rtx_IOR (DImode, movi1, insn), NULL_RTX);
8977 insn = gen_lowpart (SImode, insn);
8978 emit_move_insn (adjust_address (tramp_mem, SImode, 4), insn);
8979 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (22));
8980 insn = gen_rtx_AND (DImode, insn, mask);
8981 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
8982 insn = gen_lowpart (SImode, insn);
8983 emit_move_insn (adjust_address (tramp_mem, SImode, 8), insn);
8984 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (6));
8985 insn = gen_rtx_AND (DImode, insn, mask);
8986 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
8987 insn = gen_lowpart (SImode, insn);
8988 emit_move_insn (adjust_address (tramp_mem, SImode, 12), insn);
8989 insn = gen_rtx_ASHIFT (DImode, fnaddr, GEN_INT (10));
8990 insn = gen_rtx_AND (DImode, insn, mask);
8991 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
8992 insn = gen_lowpart (SImode, insn);
8993 emit_move_insn (adjust_address (tramp_mem, SImode, 16), insn);
8994 emit_move_insn (adjust_address (tramp_mem, SImode, 20),
8995 GEN_INT (0x6bf10600));
8996 emit_move_insn (adjust_address (tramp_mem, SImode, 24),
8997 GEN_INT (0x4415fc10));
8998 emit_move_insn (adjust_address (tramp_mem, SImode, 28),
8999 GEN_INT (0x4401fff0));
9000 emit_insn (gen_ic_invalidate_line (tramp));
9001 return;
9003 tramp_templ = gen_rtx_SYMBOL_REF (Pmode,"__GCC_nested_trampoline");
9004 fixed_len = TRAMPOLINE_SIZE - 2 * GET_MODE_SIZE (Pmode);
9006 tramp_templ = gen_datalabel_ref (tramp_templ);
9007 dst = tramp_mem;
9008 src = gen_const_mem (BLKmode, tramp_templ);
9009 set_mem_align (dst, 256);
9010 set_mem_align (src, 64);
9011 emit_block_move (dst, src, GEN_INT (fixed_len), BLOCK_OP_NORMAL);
9013 emit_move_insn (adjust_address (tramp_mem, Pmode, fixed_len), fnaddr);
9014 emit_move_insn (adjust_address (tramp_mem, Pmode,
9015 fixed_len + GET_MODE_SIZE (Pmode)),
9016 cxt);
9017 emit_insn (gen_ic_invalidate_line (tramp));
9018 return;
9020 else if (TARGET_SHMEDIA)
9022 /* movi fnaddr >> 16,r1; shori fnaddr,r1; ptabs/l r1,tr0
9023 movi cxt >> 16,r1; shori cxt,r1; blink tr0,r63 */
9024 rtx quad0 = gen_reg_rtx (DImode), cxtload = gen_reg_rtx (DImode);
9025 rtx quad1 = gen_reg_rtx (DImode), quad2 = gen_reg_rtx (DImode);
9026 /* movi 0,r1: 0xcc000010 shori 0,r1: c8000010 concatenated,
9027 rotated 10 right, and higher 16 bit of every 32 selected. */
9028 rtx movishori
9029 = force_reg (V2HImode, (simplify_gen_subreg
9030 (V2HImode, GEN_INT (0x4330432), SImode, 0)));
9031 rtx ptabs = force_reg (DImode, GEN_INT (0x6bf10600));
9032 rtx blink = force_reg (DImode, GEN_INT (0x4401fff0));
9034 tramp = force_reg (Pmode, tramp);
9035 fnaddr = force_reg (SImode, fnaddr);
9036 cxt = force_reg (SImode, cxt);
9037 emit_insn (gen_mshflo_w_x (gen_rtx_SUBREG (V4HImode, quad0, 0),
9038 gen_rtx_SUBREG (V2HImode, fnaddr, 0),
9039 movishori));
9040 emit_insn (gen_rotrdi3_mextr (quad0, quad0,
9041 GEN_INT (TARGET_LITTLE_ENDIAN ? 24 : 56)));
9042 emit_insn (gen_ashldi3_media (quad0, quad0, const2_rtx));
9043 emit_move_insn (change_address (tramp_mem, DImode, NULL_RTX), quad0);
9044 emit_insn (gen_mshflo_w_x (gen_rtx_SUBREG (V4HImode, cxtload, 0),
9045 gen_rtx_SUBREG (V2HImode, cxt, 0),
9046 movishori));
9047 emit_insn (gen_rotrdi3_mextr (cxtload, cxtload,
9048 GEN_INT (TARGET_LITTLE_ENDIAN ? 24 : 56)));
9049 emit_insn (gen_ashldi3_media (cxtload, cxtload, const2_rtx));
9050 if (TARGET_LITTLE_ENDIAN)
9052 emit_insn (gen_mshflo_l_di (quad1, ptabs, cxtload));
9053 emit_insn (gen_mextr4 (quad2, cxtload, blink));
9055 else
9057 emit_insn (gen_mextr4 (quad1, cxtload, ptabs));
9058 emit_insn (gen_mshflo_l_di (quad2, blink, cxtload));
9060 emit_move_insn (adjust_address (tramp_mem, DImode, 8), quad1);
9061 emit_move_insn (adjust_address (tramp_mem, DImode, 16), quad2);
9062 emit_insn (gen_ic_invalidate_line (tramp));
9063 return;
9065 else if (TARGET_SHCOMPACT)
9067 emit_insn (gen_initialize_trampoline (tramp, cxt, fnaddr));
9068 return;
9070 emit_move_insn (change_address (tramp_mem, SImode, NULL_RTX),
9071 gen_int_mode (TARGET_LITTLE_ENDIAN ? 0xd301d202 : 0xd202d301,
9072 SImode));
9073 emit_move_insn (adjust_address (tramp_mem, SImode, 4),
9074 gen_int_mode (TARGET_LITTLE_ENDIAN ? 0x0009422b : 0x422b0009,
9075 SImode));
9076 emit_move_insn (adjust_address (tramp_mem, SImode, 8), cxt);
9077 emit_move_insn (adjust_address (tramp_mem, SImode, 12), fnaddr);
9078 if (TARGET_HARVARD)
9080 if (TARGET_USERMODE)
9081 emit_library_call (function_symbol (NULL, "__ic_invalidate",
9082 FUNCTION_ORDINARY),
9083 0, VOIDmode, 1, tramp, SImode);
9084 else
9085 emit_insn (gen_ic_invalidate_line (tramp));
9089 /* FIXME: This is overly conservative. A SHcompact function that
9090 receives arguments ``by reference'' will have them stored in its
9091 own stack frame, so it must not pass pointers or references to
9092 these arguments to other functions by means of sibling calls. */
9093 /* If PIC, we cannot make sibling calls to global functions
9094 because the PLT requires r12 to be live. */
9095 static bool
9096 sh_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
9098 return (1
9099 && (! TARGET_SHCOMPACT
9100 || current_function_args_info.stack_regs == 0)
9101 && ! sh_cfun_interrupt_handler_p ()
9102 && (! flag_pic
9103 || (decl && ! TREE_PUBLIC (decl))
9104 || (decl && DECL_VISIBILITY (decl) != VISIBILITY_DEFAULT)));
9107 /* Machine specific built-in functions. */
9109 struct builtin_description
9111 const enum insn_code icode;
9112 const char *const name;
9113 int signature;
9116 /* describe number and signedness of arguments; arg[0] == result
9117 (1: unsigned, 2: signed, 4: don't care, 8: pointer 0: no argument */
9118 /* 9: 64 bit pointer, 10: 32 bit pointer */
9119 static const char signature_args[][4] =
9121 #define SH_BLTIN_V2SI2 0
9122 { 4, 4 },
9123 #define SH_BLTIN_V4HI2 1
9124 { 4, 4 },
9125 #define SH_BLTIN_V2SI3 2
9126 { 4, 4, 4 },
9127 #define SH_BLTIN_V4HI3 3
9128 { 4, 4, 4 },
9129 #define SH_BLTIN_V8QI3 4
9130 { 4, 4, 4 },
9131 #define SH_BLTIN_MAC_HISI 5
9132 { 1, 4, 4, 1 },
9133 #define SH_BLTIN_SH_HI 6
9134 { 4, 4, 1 },
9135 #define SH_BLTIN_SH_SI 7
9136 { 4, 4, 1 },
9137 #define SH_BLTIN_V4HI2V2SI 8
9138 { 4, 4, 4 },
9139 #define SH_BLTIN_V4HI2V8QI 9
9140 { 4, 4, 4 },
9141 #define SH_BLTIN_SISF 10
9142 { 4, 2 },
9143 #define SH_BLTIN_LDUA_L 11
9144 { 2, 10 },
9145 #define SH_BLTIN_LDUA_Q 12
9146 { 1, 10 },
9147 #define SH_BLTIN_STUA_L 13
9148 { 0, 10, 2 },
9149 #define SH_BLTIN_STUA_Q 14
9150 { 0, 10, 1 },
9151 #define SH_BLTIN_LDUA_L64 15
9152 { 2, 9 },
9153 #define SH_BLTIN_LDUA_Q64 16
9154 { 1, 9 },
9155 #define SH_BLTIN_STUA_L64 17
9156 { 0, 9, 2 },
9157 #define SH_BLTIN_STUA_Q64 18
9158 { 0, 9, 1 },
9159 #define SH_BLTIN_NUM_SHARED_SIGNATURES 19
9160 #define SH_BLTIN_2 19
9161 #define SH_BLTIN_SU 19
9162 { 1, 2 },
9163 #define SH_BLTIN_3 20
9164 #define SH_BLTIN_SUS 20
9165 { 2, 2, 1 },
9166 #define SH_BLTIN_PSSV 21
9167 { 0, 8, 2, 2 },
9168 #define SH_BLTIN_XXUU 22
9169 #define SH_BLTIN_UUUU 22
9170 { 1, 1, 1, 1 },
9171 #define SH_BLTIN_PV 23
9172 { 0, 8 },
9174 /* mcmv: operands considered unsigned. */
9175 /* mmulsum_wq, msad_ubq: result considered unsigned long long. */
9176 /* mperm: control value considered unsigned int. */
9177 /* mshalds, mshard, mshards, mshlld, mshlrd: shift count is unsigned int. */
9178 /* mshards_q: returns signed short. */
9179 /* nsb: takes long long arg, returns unsigned char. */
9180 static const struct builtin_description bdesc[] =
9182 { CODE_FOR_absv2si2, "__builtin_absv2si2", SH_BLTIN_V2SI2 },
9183 { CODE_FOR_absv4hi2, "__builtin_absv4hi2", SH_BLTIN_V4HI2 },
9184 { CODE_FOR_addv2si3, "__builtin_addv2si3", SH_BLTIN_V2SI3 },
9185 { CODE_FOR_addv4hi3, "__builtin_addv4hi3", SH_BLTIN_V4HI3 },
9186 { CODE_FOR_ssaddv2si3,"__builtin_ssaddv2si3", SH_BLTIN_V2SI3 },
9187 { CODE_FOR_usaddv8qi3,"__builtin_usaddv8qi3", SH_BLTIN_V8QI3 },
9188 { CODE_FOR_ssaddv4hi3,"__builtin_ssaddv4hi3", SH_BLTIN_V4HI3 },
9189 { CODE_FOR_alloco_i, "__builtin_sh_media_ALLOCO", SH_BLTIN_PV },
9190 { CODE_FOR_negcmpeqv8qi,"__builtin_sh_media_MCMPEQ_B", SH_BLTIN_V8QI3 },
9191 { CODE_FOR_negcmpeqv2si,"__builtin_sh_media_MCMPEQ_L", SH_BLTIN_V2SI3 },
9192 { CODE_FOR_negcmpeqv4hi,"__builtin_sh_media_MCMPEQ_W", SH_BLTIN_V4HI3 },
9193 { CODE_FOR_negcmpgtuv8qi,"__builtin_sh_media_MCMPGT_UB", SH_BLTIN_V8QI3 },
9194 { CODE_FOR_negcmpgtv2si,"__builtin_sh_media_MCMPGT_L", SH_BLTIN_V2SI3 },
9195 { CODE_FOR_negcmpgtv4hi,"__builtin_sh_media_MCMPGT_W", SH_BLTIN_V4HI3 },
9196 { CODE_FOR_mcmv, "__builtin_sh_media_MCMV", SH_BLTIN_UUUU },
9197 { CODE_FOR_mcnvs_lw, "__builtin_sh_media_MCNVS_LW", SH_BLTIN_3 },
9198 { CODE_FOR_mcnvs_wb, "__builtin_sh_media_MCNVS_WB", SH_BLTIN_V4HI2V8QI },
9199 { CODE_FOR_mcnvs_wub, "__builtin_sh_media_MCNVS_WUB", SH_BLTIN_V4HI2V8QI },
9200 { CODE_FOR_mextr1, "__builtin_sh_media_MEXTR1", SH_BLTIN_V8QI3 },
9201 { CODE_FOR_mextr2, "__builtin_sh_media_MEXTR2", SH_BLTIN_V8QI3 },
9202 { CODE_FOR_mextr3, "__builtin_sh_media_MEXTR3", SH_BLTIN_V8QI3 },
9203 { CODE_FOR_mextr4, "__builtin_sh_media_MEXTR4", SH_BLTIN_V8QI3 },
9204 { CODE_FOR_mextr5, "__builtin_sh_media_MEXTR5", SH_BLTIN_V8QI3 },
9205 { CODE_FOR_mextr6, "__builtin_sh_media_MEXTR6", SH_BLTIN_V8QI3 },
9206 { CODE_FOR_mextr7, "__builtin_sh_media_MEXTR7", SH_BLTIN_V8QI3 },
9207 { CODE_FOR_mmacfx_wl, "__builtin_sh_media_MMACFX_WL", SH_BLTIN_MAC_HISI },
9208 { CODE_FOR_mmacnfx_wl,"__builtin_sh_media_MMACNFX_WL", SH_BLTIN_MAC_HISI },
9209 { CODE_FOR_mulv2si3, "__builtin_mulv2si3", SH_BLTIN_V2SI3, },
9210 { CODE_FOR_mulv4hi3, "__builtin_mulv4hi3", SH_BLTIN_V4HI3 },
9211 { CODE_FOR_mmulfx_l, "__builtin_sh_media_MMULFX_L", SH_BLTIN_V2SI3 },
9212 { CODE_FOR_mmulfx_w, "__builtin_sh_media_MMULFX_W", SH_BLTIN_V4HI3 },
9213 { CODE_FOR_mmulfxrp_w,"__builtin_sh_media_MMULFXRP_W", SH_BLTIN_V4HI3 },
9214 { CODE_FOR_mmulhi_wl, "__builtin_sh_media_MMULHI_WL", SH_BLTIN_V4HI2V2SI },
9215 { CODE_FOR_mmullo_wl, "__builtin_sh_media_MMULLO_WL", SH_BLTIN_V4HI2V2SI },
9216 { CODE_FOR_mmulsum_wq,"__builtin_sh_media_MMULSUM_WQ", SH_BLTIN_XXUU },
9217 { CODE_FOR_mperm_w, "__builtin_sh_media_MPERM_W", SH_BLTIN_SH_HI },
9218 { CODE_FOR_msad_ubq, "__builtin_sh_media_MSAD_UBQ", SH_BLTIN_XXUU },
9219 { CODE_FOR_mshalds_l, "__builtin_sh_media_MSHALDS_L", SH_BLTIN_SH_SI },
9220 { CODE_FOR_mshalds_w, "__builtin_sh_media_MSHALDS_W", SH_BLTIN_SH_HI },
9221 { CODE_FOR_ashrv2si3, "__builtin_ashrv2si3", SH_BLTIN_SH_SI },
9222 { CODE_FOR_ashrv4hi3, "__builtin_ashrv4hi3", SH_BLTIN_SH_HI },
9223 { CODE_FOR_mshards_q, "__builtin_sh_media_MSHARDS_Q", SH_BLTIN_SUS },
9224 { CODE_FOR_mshfhi_b, "__builtin_sh_media_MSHFHI_B", SH_BLTIN_V8QI3 },
9225 { CODE_FOR_mshfhi_l, "__builtin_sh_media_MSHFHI_L", SH_BLTIN_V2SI3 },
9226 { CODE_FOR_mshfhi_w, "__builtin_sh_media_MSHFHI_W", SH_BLTIN_V4HI3 },
9227 { CODE_FOR_mshflo_b, "__builtin_sh_media_MSHFLO_B", SH_BLTIN_V8QI3 },
9228 { CODE_FOR_mshflo_l, "__builtin_sh_media_MSHFLO_L", SH_BLTIN_V2SI3 },
9229 { CODE_FOR_mshflo_w, "__builtin_sh_media_MSHFLO_W", SH_BLTIN_V4HI3 },
9230 { CODE_FOR_ashlv2si3, "__builtin_ashlv2si3", SH_BLTIN_SH_SI },
9231 { CODE_FOR_ashlv4hi3, "__builtin_ashlv4hi3", SH_BLTIN_SH_HI },
9232 { CODE_FOR_lshrv2si3, "__builtin_lshrv2si3", SH_BLTIN_SH_SI },
9233 { CODE_FOR_lshrv4hi3, "__builtin_lshrv4hi3", SH_BLTIN_SH_HI },
9234 { CODE_FOR_subv2si3, "__builtin_subv2si3", SH_BLTIN_V2SI3 },
9235 { CODE_FOR_subv4hi3, "__builtin_subv4hi3", SH_BLTIN_V4HI3 },
9236 { CODE_FOR_sssubv2si3,"__builtin_sssubv2si3", SH_BLTIN_V2SI3 },
9237 { CODE_FOR_ussubv8qi3,"__builtin_ussubv8qi3", SH_BLTIN_V8QI3 },
9238 { CODE_FOR_sssubv4hi3,"__builtin_sssubv4hi3", SH_BLTIN_V4HI3 },
9239 { CODE_FOR_fcosa_s, "__builtin_sh_media_FCOSA_S", SH_BLTIN_SISF },
9240 { CODE_FOR_fsina_s, "__builtin_sh_media_FSINA_S", SH_BLTIN_SISF },
9241 { CODE_FOR_fipr, "__builtin_sh_media_FIPR_S", SH_BLTIN_3 },
9242 { CODE_FOR_ftrv, "__builtin_sh_media_FTRV_S", SH_BLTIN_3 },
9243 { CODE_FOR_mac_media, "__builtin_sh_media_FMAC_S", SH_BLTIN_3 },
9244 { CODE_FOR_sqrtdf2, "__builtin_sh_media_FSQRT_D", SH_BLTIN_2 },
9245 { CODE_FOR_sqrtsf2, "__builtin_sh_media_FSQRT_S", SH_BLTIN_2 },
9246 { CODE_FOR_fsrra_s, "__builtin_sh_media_FSRRA_S", SH_BLTIN_2 },
9247 { CODE_FOR_ldhi_l, "__builtin_sh_media_LDHI_L", SH_BLTIN_LDUA_L },
9248 { CODE_FOR_ldhi_q, "__builtin_sh_media_LDHI_Q", SH_BLTIN_LDUA_Q },
9249 { CODE_FOR_ldlo_l, "__builtin_sh_media_LDLO_L", SH_BLTIN_LDUA_L },
9250 { CODE_FOR_ldlo_q, "__builtin_sh_media_LDLO_Q", SH_BLTIN_LDUA_Q },
9251 { CODE_FOR_sthi_l, "__builtin_sh_media_STHI_L", SH_BLTIN_STUA_L },
9252 { CODE_FOR_sthi_q, "__builtin_sh_media_STHI_Q", SH_BLTIN_STUA_Q },
9253 { CODE_FOR_stlo_l, "__builtin_sh_media_STLO_L", SH_BLTIN_STUA_L },
9254 { CODE_FOR_stlo_q, "__builtin_sh_media_STLO_Q", SH_BLTIN_STUA_Q },
9255 { CODE_FOR_ldhi_l64, "__builtin_sh_media_LDHI_L", SH_BLTIN_LDUA_L64 },
9256 { CODE_FOR_ldhi_q64, "__builtin_sh_media_LDHI_Q", SH_BLTIN_LDUA_Q64 },
9257 { CODE_FOR_ldlo_l64, "__builtin_sh_media_LDLO_L", SH_BLTIN_LDUA_L64 },
9258 { CODE_FOR_ldlo_q64, "__builtin_sh_media_LDLO_Q", SH_BLTIN_LDUA_Q64 },
9259 { CODE_FOR_sthi_l64, "__builtin_sh_media_STHI_L", SH_BLTIN_STUA_L64 },
9260 { CODE_FOR_sthi_q64, "__builtin_sh_media_STHI_Q", SH_BLTIN_STUA_Q64 },
9261 { CODE_FOR_stlo_l64, "__builtin_sh_media_STLO_L", SH_BLTIN_STUA_L64 },
9262 { CODE_FOR_stlo_q64, "__builtin_sh_media_STLO_Q", SH_BLTIN_STUA_Q64 },
9263 { CODE_FOR_nsb, "__builtin_sh_media_NSB", SH_BLTIN_SU },
9264 { CODE_FOR_byterev, "__builtin_sh_media_BYTEREV", SH_BLTIN_2 },
9265 { CODE_FOR_prefetch, "__builtin_sh_media_PREFO", SH_BLTIN_PSSV },
9268 static void
9269 sh_media_init_builtins (void)
9271 tree shared[SH_BLTIN_NUM_SHARED_SIGNATURES];
9272 const struct builtin_description *d;
9274 memset (shared, 0, sizeof shared);
9275 for (d = bdesc; d - bdesc < (int) ARRAY_SIZE (bdesc); d++)
9277 tree type, arg_type = 0;
9278 int signature = d->signature;
9279 int i;
9281 if (signature < SH_BLTIN_NUM_SHARED_SIGNATURES && shared[signature])
9282 type = shared[signature];
9283 else
9285 int has_result = signature_args[signature][0] != 0;
9287 if ((signature_args[signature][1] & 8)
9288 && (((signature_args[signature][1] & 1) && TARGET_SHMEDIA32)
9289 || ((signature_args[signature][1] & 2) && TARGET_SHMEDIA64)))
9290 continue;
9291 if (! TARGET_FPU_ANY
9292 && FLOAT_MODE_P (insn_data[d->icode].operand[0].mode))
9293 continue;
9294 type = void_list_node;
9295 for (i = 3; ; i--)
9297 int arg = signature_args[signature][i];
9298 int opno = i - 1 + has_result;
9300 if (arg & 8)
9301 arg_type = ptr_type_node;
9302 else if (arg)
9303 arg_type = (*lang_hooks.types.type_for_mode)
9304 (insn_data[d->icode].operand[opno].mode,
9305 (arg & 1));
9306 else if (i)
9307 continue;
9308 else
9309 arg_type = void_type_node;
9310 if (i == 0)
9311 break;
9312 type = tree_cons (NULL_TREE, arg_type, type);
9314 type = build_function_type (arg_type, type);
9315 if (signature < SH_BLTIN_NUM_SHARED_SIGNATURES)
9316 shared[signature] = type;
9318 lang_hooks.builtin_function (d->name, type, d - bdesc, BUILT_IN_MD,
9319 NULL, NULL_TREE);
9323 /* Implements target hook vector_mode_supported_p. */
9324 bool
9325 sh_vector_mode_supported_p (enum machine_mode mode)
9327 if (TARGET_FPU_ANY
9328 && ((mode == V2SFmode)
9329 || (mode == V4SFmode)
9330 || (mode == V16SFmode)))
9331 return true;
9333 else if (TARGET_SHMEDIA
9334 && ((mode == V8QImode)
9335 || (mode == V2HImode)
9336 || (mode == V4HImode)
9337 || (mode == V2SImode)))
9338 return true;
9340 return false;
9343 /* Implements target hook dwarf_calling_convention. Return an enum
9344 of dwarf_calling_convention. */
9346 sh_dwarf_calling_convention (tree func)
9348 if (sh_attr_renesas_p (func))
9349 return DW_CC_GNU_renesas_sh;
9351 return DW_CC_normal;
9354 static void
9355 sh_init_builtins (void)
9357 if (TARGET_SHMEDIA)
9358 sh_media_init_builtins ();
9361 /* Expand an expression EXP that calls a built-in function,
9362 with result going to TARGET if that's convenient
9363 (and in mode MODE if that's convenient).
9364 SUBTARGET may be used as the target for computing one of EXP's operands.
9365 IGNORE is nonzero if the value is to be ignored. */
9367 static rtx
9368 sh_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
9369 enum machine_mode mode ATTRIBUTE_UNUSED, int ignore)
9371 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
9372 tree arglist = TREE_OPERAND (exp, 1);
9373 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
9374 const struct builtin_description *d = &bdesc[fcode];
9375 enum insn_code icode = d->icode;
9376 int signature = d->signature;
9377 enum machine_mode tmode = VOIDmode;
9378 int nop = 0, i;
9379 rtx op[4];
9380 rtx pat = 0;
9382 if (signature_args[signature][0])
9384 if (ignore)
9385 return 0;
9387 tmode = insn_data[icode].operand[0].mode;
9388 if (! target
9389 || GET_MODE (target) != tmode
9390 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
9391 target = gen_reg_rtx (tmode);
9392 op[nop++] = target;
9394 else
9395 target = 0;
9397 for (i = 1; i <= 3; i++, nop++)
9399 tree arg;
9400 enum machine_mode opmode, argmode;
9401 tree optype;
9403 if (! signature_args[signature][i])
9404 break;
9405 arg = TREE_VALUE (arglist);
9406 if (arg == error_mark_node)
9407 return const0_rtx;
9408 arglist = TREE_CHAIN (arglist);
9409 if (signature_args[signature][i] & 8)
9411 opmode = ptr_mode;
9412 optype = ptr_type_node;
9414 else
9416 opmode = insn_data[icode].operand[nop].mode;
9417 optype = (*lang_hooks.types.type_for_mode) (opmode, 0);
9419 argmode = TYPE_MODE (TREE_TYPE (arg));
9420 if (argmode != opmode)
9421 arg = build1 (NOP_EXPR, optype, arg);
9422 op[nop] = expand_expr (arg, NULL_RTX, opmode, 0);
9423 if (! (*insn_data[icode].operand[nop].predicate) (op[nop], opmode))
9424 op[nop] = copy_to_mode_reg (opmode, op[nop]);
9427 switch (nop)
9429 case 1:
9430 pat = (*insn_data[d->icode].genfun) (op[0]);
9431 break;
9432 case 2:
9433 pat = (*insn_data[d->icode].genfun) (op[0], op[1]);
9434 break;
9435 case 3:
9436 pat = (*insn_data[d->icode].genfun) (op[0], op[1], op[2]);
9437 break;
9438 case 4:
9439 pat = (*insn_data[d->icode].genfun) (op[0], op[1], op[2], op[3]);
9440 break;
9441 default:
9442 gcc_unreachable ();
9444 if (! pat)
9445 return 0;
9446 emit_insn (pat);
9447 return target;
9450 void
9451 sh_expand_unop_v2sf (enum rtx_code code, rtx op0, rtx op1)
9453 rtx sel0 = const0_rtx;
9454 rtx sel1 = const1_rtx;
9455 rtx (*fn) (rtx, rtx, rtx, rtx, rtx) = gen_unary_sf_op;
9456 rtx op = gen_rtx_fmt_e (code, SFmode, op1);
9458 emit_insn ((*fn) (op0, op1, op, sel0, sel0));
9459 emit_insn ((*fn) (op0, op1, op, sel1, sel1));
9462 void
9463 sh_expand_binop_v2sf (enum rtx_code code, rtx op0, rtx op1, rtx op2)
9465 rtx sel0 = const0_rtx;
9466 rtx sel1 = const1_rtx;
9467 rtx (*fn) (rtx, rtx, rtx, rtx, rtx, rtx, rtx, rtx)
9468 = gen_binary_sf_op;
9469 rtx op = gen_rtx_fmt_ee (code, SFmode, op1, op2);
9471 emit_insn ((*fn) (op0, op1, op2, op, sel0, sel0, sel0, sel1));
9472 emit_insn ((*fn) (op0, op1, op2, op, sel1, sel1, sel1, sel0));
9475 /* Return the class of registers for which a mode change from FROM to TO
9476 is invalid. */
9477 bool
9478 sh_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
9479 enum reg_class class)
9481 /* We want to enable the use of SUBREGs as a means to
9482 VEC_SELECT a single element of a vector. */
9483 if (to == SFmode && VECTOR_MODE_P (from) && GET_MODE_INNER (from) == SFmode)
9484 return (reg_classes_intersect_p (GENERAL_REGS, class));
9486 if (GET_MODE_SIZE (from) != GET_MODE_SIZE (to))
9488 if (TARGET_LITTLE_ENDIAN)
9490 if (GET_MODE_SIZE (to) < 8 || GET_MODE_SIZE (from) < 8)
9491 return reg_classes_intersect_p (DF_REGS, class);
9493 else
9495 if (GET_MODE_SIZE (from) < 8)
9496 return reg_classes_intersect_p (DF_HI_REGS, class);
9499 return 0;
9503 /* If ADDRESS refers to a CODE_LABEL, add NUSES to the number of times
9504 that label is used. */
9506 void
9507 sh_mark_label (rtx address, int nuses)
9509 if (GOTOFF_P (address))
9511 /* Extract the label or symbol. */
9512 address = XEXP (address, 0);
9513 if (GET_CODE (address) == PLUS)
9514 address = XEXP (address, 0);
9515 address = XVECEXP (address, 0, 0);
9517 if (GET_CODE (address) == LABEL_REF
9518 && GET_CODE (XEXP (address, 0)) == CODE_LABEL)
9519 LABEL_NUSES (XEXP (address, 0)) += nuses;
9522 /* Compute extra cost of moving data between one register class
9523 and another. */
9525 /* If SECONDARY*_RELOAD_CLASS says something about the src/dst pair, regclass
9526 uses this information. Hence, the general register <-> floating point
9527 register information here is not used for SFmode. */
9530 sh_register_move_cost (enum machine_mode mode,
9531 enum reg_class srcclass, enum reg_class dstclass)
9533 if (dstclass == T_REGS || dstclass == PR_REGS)
9534 return 10;
9536 if (dstclass == MAC_REGS && srcclass == MAC_REGS)
9537 return 4;
9539 if (mode == SImode && ! TARGET_SHMEDIA && TARGET_FMOVD
9540 && REGCLASS_HAS_FP_REG (srcclass)
9541 && REGCLASS_HAS_FP_REG (dstclass))
9542 return 4;
9544 if (REGCLASS_HAS_FP_REG (dstclass) && srcclass == T_REGS)
9545 return ((TARGET_HARD_SH4 && !optimize_size) ? 10 : 7);
9547 if ((REGCLASS_HAS_FP_REG (dstclass) && srcclass == MAC_REGS)
9548 || (dstclass == MAC_REGS && REGCLASS_HAS_FP_REG (srcclass)))
9549 return 9;
9551 if ((REGCLASS_HAS_FP_REG (dstclass)
9552 && REGCLASS_HAS_GENERAL_REG (srcclass))
9553 || (REGCLASS_HAS_GENERAL_REG (dstclass)
9554 && REGCLASS_HAS_FP_REG (srcclass)))
9555 return ((TARGET_SHMEDIA ? 4 : TARGET_FMOVD ? 8 : 12)
9556 * ((GET_MODE_SIZE (mode) + 7) / 8U));
9558 if ((dstclass == FPUL_REGS
9559 && REGCLASS_HAS_GENERAL_REG (srcclass))
9560 || (srcclass == FPUL_REGS
9561 && REGCLASS_HAS_GENERAL_REG (dstclass)))
9562 return 5;
9564 if ((dstclass == FPUL_REGS
9565 && (srcclass == PR_REGS || srcclass == MAC_REGS || srcclass == T_REGS))
9566 || (srcclass == FPUL_REGS
9567 && (dstclass == PR_REGS || dstclass == MAC_REGS)))
9568 return 7;
9570 if ((srcclass == TARGET_REGS && ! REGCLASS_HAS_GENERAL_REG (dstclass))
9571 || ((dstclass) == TARGET_REGS && ! REGCLASS_HAS_GENERAL_REG (srcclass)))
9572 return 20;
9574 /* ??? ptabs faults on (value & 0x3) == 0x3 */
9575 if (TARGET_SHMEDIA
9576 && ((srcclass) == TARGET_REGS || (srcclass) == SIBCALL_REGS))
9578 if (sh_gettrcost >= 0)
9579 return sh_gettrcost;
9580 else if (!TARGET_PT_FIXED)
9581 return 100;
9584 if ((srcclass == FPSCR_REGS && ! REGCLASS_HAS_GENERAL_REG (dstclass))
9585 || (dstclass == FPSCR_REGS && ! REGCLASS_HAS_GENERAL_REG (srcclass)))
9586 return 4;
9588 if (TARGET_SHMEDIA
9589 || (TARGET_FMOVD
9590 && ! REGCLASS_HAS_GENERAL_REG (srcclass)
9591 && ! REGCLASS_HAS_GENERAL_REG (dstclass)))
9592 return 2 * ((GET_MODE_SIZE (mode) + 7) / 8U);
9594 return 2 * ((GET_MODE_SIZE (mode) + 3) / 4U);
9597 static rtx emit_load_ptr (rtx, rtx);
9599 static rtx
9600 emit_load_ptr (rtx reg, rtx addr)
9602 rtx mem = gen_const_mem (ptr_mode, addr);
9604 if (Pmode != ptr_mode)
9605 mem = gen_rtx_SIGN_EXTEND (Pmode, mem);
9606 return emit_move_insn (reg, mem);
9609 static void
9610 sh_output_mi_thunk (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED,
9611 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
9612 tree function)
9614 CUMULATIVE_ARGS cum;
9615 int structure_value_byref = 0;
9616 rtx this, this_value, sibcall, insns, funexp;
9617 tree funtype = TREE_TYPE (function);
9618 int simple_add = CONST_OK_FOR_ADD (delta);
9619 int did_load = 0;
9620 rtx scratch0, scratch1, scratch2;
9621 unsigned i;
9623 reload_completed = 1;
9624 epilogue_completed = 1;
9625 no_new_pseudos = 1;
9626 current_function_uses_only_leaf_regs = 1;
9627 reset_block_changes ();
9629 emit_note (NOTE_INSN_PROLOGUE_END);
9631 /* Find the "this" pointer. We have such a wide range of ABIs for the
9632 SH that it's best to do this completely machine independently.
9633 "this" is passed as first argument, unless a structure return pointer
9634 comes first, in which case "this" comes second. */
9635 INIT_CUMULATIVE_ARGS (cum, funtype, NULL_RTX, 0, 1);
9636 #ifndef PCC_STATIC_STRUCT_RETURN
9637 if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
9638 structure_value_byref = 1;
9639 #endif /* not PCC_STATIC_STRUCT_RETURN */
9640 if (structure_value_byref && sh_struct_value_rtx (function, 0) == 0)
9642 tree ptype = build_pointer_type (TREE_TYPE (funtype));
9644 FUNCTION_ARG_ADVANCE (cum, Pmode, ptype, 1);
9646 this = FUNCTION_ARG (cum, Pmode, ptr_type_node, 1);
9648 /* For SHcompact, we only have r0 for a scratch register: r1 is the
9649 static chain pointer (even if you can't have nested virtual functions
9650 right now, someone might implement them sometime), and the rest of the
9651 registers are used for argument passing, are callee-saved, or reserved. */
9652 /* We need to check call_used_regs / fixed_regs in case -fcall_saved-reg /
9653 -ffixed-reg has been used. */
9654 if (! call_used_regs[0] || fixed_regs[0])
9655 error ("r0 needs to be available as a call-clobbered register");
9656 scratch0 = scratch1 = scratch2 = gen_rtx_REG (Pmode, 0);
9657 if (! TARGET_SH5)
9659 if (call_used_regs[1] && ! fixed_regs[1])
9660 scratch1 = gen_rtx_REG (ptr_mode, 1);
9661 /* N.B., if not TARGET_HITACHI, register 2 is used to pass the pointer
9662 pointing where to return struct values. */
9663 if (call_used_regs[3] && ! fixed_regs[3])
9664 scratch2 = gen_rtx_REG (Pmode, 3);
9666 else if (TARGET_SHMEDIA)
9668 for (i = FIRST_GENERAL_REG; i <= LAST_GENERAL_REG; i++)
9669 if (i != REGNO (scratch0) &&
9670 call_used_regs[i] && ! fixed_regs[i] && ! FUNCTION_ARG_REGNO_P (i))
9672 scratch1 = gen_rtx_REG (ptr_mode, i);
9673 break;
9675 if (scratch1 == scratch0)
9676 error ("Need a second call-clobbered general purpose register");
9677 for (i = FIRST_TARGET_REG; i <= LAST_TARGET_REG; i++)
9678 if (call_used_regs[i] && ! fixed_regs[i])
9680 scratch2 = gen_rtx_REG (Pmode, i);
9681 break;
9683 if (scratch2 == scratch0)
9684 error ("Need a call-clobbered target register");
9687 this_value = plus_constant (this, delta);
9688 if (vcall_offset
9689 && (simple_add || scratch0 != scratch1)
9690 && strict_memory_address_p (ptr_mode, this_value))
9692 emit_load_ptr (scratch0, this_value);
9693 did_load = 1;
9696 if (!delta)
9697 ; /* Do nothing. */
9698 else if (simple_add)
9699 emit_move_insn (this, this_value);
9700 else
9702 emit_move_insn (scratch1, GEN_INT (delta));
9703 emit_insn (gen_add2_insn (this, scratch1));
9706 if (vcall_offset)
9708 rtx offset_addr;
9710 if (!did_load)
9711 emit_load_ptr (scratch0, this);
9713 offset_addr = plus_constant (scratch0, vcall_offset);
9714 if (strict_memory_address_p (ptr_mode, offset_addr))
9715 ; /* Do nothing. */
9716 else if (! TARGET_SH5 && scratch0 != scratch1)
9718 /* scratch0 != scratch1, and we have indexed loads. Get better
9719 schedule by loading the offset into r1 and using an indexed
9720 load - then the load of r1 can issue before the load from
9721 (this + delta) finishes. */
9722 emit_move_insn (scratch1, GEN_INT (vcall_offset));
9723 offset_addr = gen_rtx_PLUS (Pmode, scratch0, scratch1);
9725 else if (CONST_OK_FOR_ADD (vcall_offset))
9727 emit_insn (gen_add2_insn (scratch0, GEN_INT (vcall_offset)));
9728 offset_addr = scratch0;
9730 else if (scratch0 != scratch1)
9732 emit_move_insn (scratch1, GEN_INT (vcall_offset));
9733 emit_insn (gen_add2_insn (scratch0, scratch1));
9734 offset_addr = scratch0;
9736 else
9737 gcc_unreachable (); /* FIXME */
9738 emit_load_ptr (scratch0, offset_addr);
9740 if (Pmode != ptr_mode)
9741 scratch0 = gen_rtx_TRUNCATE (ptr_mode, scratch0);
9742 emit_insn (gen_add2_insn (this, scratch0));
9745 /* Generate a tail call to the target function. */
9746 if (! TREE_USED (function))
9748 assemble_external (function);
9749 TREE_USED (function) = 1;
9751 funexp = XEXP (DECL_RTL (function), 0);
9752 /* If the function is overridden, so is the thunk, hence we don't
9753 need GOT addressing even if this is a public symbol. */
9754 #if 0
9755 if (TARGET_SH1 && ! flag_weak)
9756 sibcall = gen_sibcalli_thunk (funexp, const0_rtx);
9757 else
9758 #endif
9759 if (TARGET_SH2 && flag_pic)
9761 sibcall = gen_sibcall_pcrel (funexp, const0_rtx);
9762 XEXP (XVECEXP (sibcall, 0, 2), 0) = scratch2;
9764 else
9766 if (TARGET_SHMEDIA && flag_pic)
9768 funexp = gen_sym2PIC (funexp);
9769 PUT_MODE (funexp, Pmode);
9771 emit_move_insn (scratch2, funexp);
9772 funexp = gen_rtx_MEM (FUNCTION_MODE, scratch2);
9773 sibcall = gen_sibcall (funexp, const0_rtx, NULL_RTX);
9775 sibcall = emit_call_insn (sibcall);
9776 SIBLING_CALL_P (sibcall) = 1;
9777 use_reg (&CALL_INSN_FUNCTION_USAGE (sibcall), this);
9778 emit_barrier ();
9780 /* Run just enough of rest_of_compilation to do scheduling and get
9781 the insns emitted. Note that use_thunk calls
9782 assemble_start_function and assemble_end_function. */
9784 insn_locators_initialize ();
9785 insns = get_insns ();
9787 if (optimize > 0)
9789 /* Initialize the bitmap obstacks. */
9790 bitmap_obstack_initialize (NULL);
9791 bitmap_obstack_initialize (&reg_obstack);
9792 if (! cfun->cfg)
9793 init_flow ();
9794 rtl_register_cfg_hooks ();
9795 init_rtl_bb_info (ENTRY_BLOCK_PTR);
9796 init_rtl_bb_info (EXIT_BLOCK_PTR);
9797 ENTRY_BLOCK_PTR->flags |= BB_RTL;
9798 EXIT_BLOCK_PTR->flags |= BB_RTL;
9799 find_basic_blocks (insns);
9801 if (flag_schedule_insns_after_reload)
9803 life_analysis (dump_file, PROP_FINAL);
9805 split_all_insns (1);
9807 schedule_insns (dump_file);
9809 /* We must split jmp insn in PIC case. */
9810 else if (flag_pic)
9811 split_all_insns_noflow ();
9814 sh_reorg ();
9816 if (optimize > 0 && flag_delayed_branch)
9817 dbr_schedule (insns, dump_file);
9819 shorten_branches (insns);
9820 final_start_function (insns, file, 1);
9821 final (insns, file, 1);
9822 final_end_function ();
9824 if (optimize > 0)
9826 /* Release all memory allocated by flow. */
9827 free_basic_block_vars ();
9829 /* Release the bitmap obstacks. */
9830 bitmap_obstack_release (&reg_obstack);
9831 bitmap_obstack_release (NULL);
9834 reload_completed = 0;
9835 epilogue_completed = 0;
9836 no_new_pseudos = 0;
9840 function_symbol (rtx target, const char *name, enum sh_function_kind kind)
9842 rtx sym;
9844 /* If this is not an ordinary function, the name usually comes from a
9845 string literal or an sprintf buffer. Make sure we use the same
9846 string consistently, so that cse will be able to unify address loads. */
9847 if (kind != FUNCTION_ORDINARY)
9848 name = IDENTIFIER_POINTER (get_identifier (name));
9849 sym = gen_rtx_SYMBOL_REF (Pmode, name);
9850 SYMBOL_REF_FLAGS (sym) = SYMBOL_FLAG_FUNCTION;
9851 if (flag_pic)
9852 switch (kind)
9854 case FUNCTION_ORDINARY:
9855 break;
9856 case SFUNC_GOT:
9858 rtx reg = target ? target : gen_reg_rtx (Pmode);
9860 emit_insn (gen_symGOT2reg (reg, sym));
9861 sym = reg;
9862 break;
9864 case SFUNC_STATIC:
9866 /* ??? To allow cse to work, we use GOTOFF relocations.
9867 we could add combiner patterns to transform this into
9868 straight pc-relative calls with sym2PIC / bsrf when
9869 label load and function call are still 1:1 and in the
9870 same basic block during combine. */
9871 rtx reg = target ? target : gen_reg_rtx (Pmode);
9873 emit_insn (gen_symGOTOFF2reg (reg, sym));
9874 sym = reg;
9875 break;
9878 if (target && sym != target)
9880 emit_move_insn (target, sym);
9881 return target;
9883 return sym;
9886 /* Find the number of a general purpose register in S. */
9887 static int
9888 scavenge_reg (HARD_REG_SET *s)
9890 int r;
9891 for (r = FIRST_GENERAL_REG; r <= LAST_GENERAL_REG; r++)
9892 if (TEST_HARD_REG_BIT (*s, r))
9893 return r;
9894 return -1;
9898 sh_get_pr_initial_val (void)
9900 rtx val;
9902 /* ??? Unfortunately, get_hard_reg_initial_val doesn't always work for the
9903 PR register on SHcompact, because it might be clobbered by the prologue.
9904 We check first if that is known to be the case. */
9905 if (TARGET_SHCOMPACT
9906 && ((current_function_args_info.call_cookie
9907 & ~ CALL_COOKIE_RET_TRAMP (1))
9908 || current_function_has_nonlocal_label))
9909 return gen_frame_mem (SImode, return_address_pointer_rtx);
9911 /* If we haven't finished rtl generation, there might be a nonlocal label
9912 that we haven't seen yet.
9913 ??? get_hard_reg_initial_val fails if it is called while no_new_pseudos
9914 is set, unless it has been called before for the same register. And even
9915 then, we end in trouble if we didn't use the register in the same
9916 basic block before. So call get_hard_reg_initial_val now and wrap it
9917 in an unspec if we might need to replace it. */
9918 /* ??? We also must do this for TARGET_SH1 in general, because otherwise
9919 combine can put the pseudo returned by get_hard_reg_initial_val into
9920 instructions that need a general purpose registers, which will fail to
9921 be recognized when the pseudo becomes allocated to PR. */
9923 = get_hard_reg_initial_val (Pmode, TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG);
9924 if (TARGET_SH1)
9925 return gen_rtx_UNSPEC (SImode, gen_rtvec (1, val), UNSPEC_RA);
9926 return val;
9930 sh_expand_t_scc (enum rtx_code code, rtx target)
9932 rtx result = target;
9933 HOST_WIDE_INT val;
9935 if (GET_CODE (sh_compare_op0) != REG || REGNO (sh_compare_op0) != T_REG
9936 || GET_CODE (sh_compare_op1) != CONST_INT)
9937 return 0;
9938 if (GET_CODE (result) != REG)
9939 result = gen_reg_rtx (SImode);
9940 val = INTVAL (sh_compare_op1);
9941 if ((code == EQ && val == 1) || (code == NE && val == 0))
9942 emit_insn (gen_movt (result));
9943 else if ((code == EQ && val == 0) || (code == NE && val == 1))
9945 emit_insn (gen_rtx_CLOBBER (VOIDmode, result));
9946 emit_insn (gen_subc (result, result, result));
9947 emit_insn (gen_addsi3 (result, result, const1_rtx));
9949 else if (code == EQ || code == NE)
9950 emit_insn (gen_move_insn (result, GEN_INT (code == NE)));
9951 else
9952 return 0;
9953 if (result != target)
9954 emit_move_insn (target, result);
9955 return 1;
9958 /* INSN is an sfunc; return the rtx that describes the address used. */
9959 static rtx
9960 extract_sfunc_addr (rtx insn)
9962 rtx pattern, part = NULL_RTX;
9963 int len, i;
9965 pattern = PATTERN (insn);
9966 len = XVECLEN (pattern, 0);
9967 for (i = 0; i < len; i++)
9969 part = XVECEXP (pattern, 0, i);
9970 if (GET_CODE (part) == USE && GET_MODE (XEXP (part, 0)) == Pmode
9971 && GENERAL_REGISTER_P (true_regnum (XEXP (part, 0))))
9972 return XEXP (part, 0);
9974 gcc_assert (GET_CODE (XVECEXP (pattern, 0, 0)) == UNSPEC_VOLATILE);
9975 return XVECEXP (XVECEXP (pattern, 0, 0), 0, 1);
9978 /* Verify that the register in use_sfunc_addr still agrees with the address
9979 used in the sfunc. This prevents fill_slots_from_thread from changing
9980 use_sfunc_addr.
9981 INSN is the use_sfunc_addr instruction, and REG is the register it
9982 guards. */
9984 check_use_sfunc_addr (rtx insn, rtx reg)
9986 /* Search for the sfunc. It should really come right after INSN. */
9987 while ((insn = NEXT_INSN (insn)))
9989 if (GET_CODE (insn) == CODE_LABEL || GET_CODE (insn) == JUMP_INSN)
9990 break;
9991 if (! INSN_P (insn))
9992 continue;
9994 if (GET_CODE (PATTERN (insn)) == SEQUENCE)
9995 insn = XVECEXP (PATTERN (insn), 0, 0);
9996 if (GET_CODE (PATTERN (insn)) != PARALLEL
9997 || get_attr_type (insn) != TYPE_SFUNC)
9998 continue;
9999 return rtx_equal_p (extract_sfunc_addr (insn), reg);
10001 gcc_unreachable ();
10004 /* This function returns a constant rtx that represents pi / 2**15 in
10005 SFmode. it's used to scale SFmode angles, in radians, to a
10006 fixed-point signed 16.16-bit fraction of a full circle, i.e., 2*pi
10007 maps to 0x10000). */
10009 static GTY(()) rtx sh_fsca_sf2int_rtx;
10012 sh_fsca_sf2int (void)
10014 if (! sh_fsca_sf2int_rtx)
10016 REAL_VALUE_TYPE rv;
10018 real_from_string (&rv, "10430.378350470453");
10019 sh_fsca_sf2int_rtx = const_double_from_real_value (rv, SFmode);
10022 return sh_fsca_sf2int_rtx;
10025 /* This function returns a constant rtx that represents pi / 2**15 in
10026 DFmode. it's used to scale DFmode angles, in radians, to a
10027 fixed-point signed 16.16-bit fraction of a full circle, i.e., 2*pi
10028 maps to 0x10000). */
10030 static GTY(()) rtx sh_fsca_df2int_rtx;
10033 sh_fsca_df2int (void)
10035 if (! sh_fsca_df2int_rtx)
10037 REAL_VALUE_TYPE rv;
10039 real_from_string (&rv, "10430.378350470453");
10040 sh_fsca_df2int_rtx = const_double_from_real_value (rv, DFmode);
10043 return sh_fsca_df2int_rtx;
10046 /* This function returns a constant rtx that represents 2**15 / pi in
10047 SFmode. it's used to scale a fixed-point signed 16.16-bit fraction
10048 of a full circle back to a SFmode value, i.e., 0x10000 maps to
10049 2*pi). */
10051 static GTY(()) rtx sh_fsca_int2sf_rtx;
10054 sh_fsca_int2sf (void)
10056 if (! sh_fsca_int2sf_rtx)
10058 REAL_VALUE_TYPE rv;
10060 real_from_string (&rv, "9.587379924285257e-5");
10061 sh_fsca_int2sf_rtx = const_double_from_real_value (rv, SFmode);
10064 return sh_fsca_int2sf_rtx;
10067 /* Initialize the CUMULATIVE_ARGS structure. */
10069 void
10070 sh_init_cumulative_args (CUMULATIVE_ARGS * pcum,
10071 tree fntype,
10072 rtx libname ATTRIBUTE_UNUSED,
10073 tree fndecl,
10074 signed int n_named_args,
10075 enum machine_mode mode)
10077 pcum->arg_count [(int) SH_ARG_FLOAT] = 0;
10078 pcum->free_single_fp_reg = 0;
10079 pcum->stack_regs = 0;
10080 pcum->byref_regs = 0;
10081 pcum->byref = 0;
10082 pcum->outgoing = (n_named_args == -1) ? 0 : 1;
10084 /* XXX - Should we check TARGET_HITACHI here ??? */
10085 pcum->renesas_abi = sh_attr_renesas_p (fntype) ? 1 : 0;
10087 if (fntype)
10089 pcum->force_mem = ((TARGET_HITACHI || pcum->renesas_abi)
10090 && aggregate_value_p (TREE_TYPE (fntype), fndecl));
10091 pcum->prototype_p = TYPE_ARG_TYPES (fntype) ? TRUE : FALSE;
10092 pcum->arg_count [(int) SH_ARG_INT]
10093 = TARGET_SH5 && aggregate_value_p (TREE_TYPE (fntype), fndecl);
10095 pcum->call_cookie
10096 = CALL_COOKIE_RET_TRAMP (TARGET_SHCOMPACT
10097 && pcum->arg_count [(int) SH_ARG_INT] == 0
10098 && (TYPE_MODE (TREE_TYPE (fntype)) == BLKmode
10099 ? int_size_in_bytes (TREE_TYPE (fntype))
10100 : GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (fntype)))) > 4
10101 && (BASE_RETURN_VALUE_REG (TYPE_MODE (TREE_TYPE (fntype)))
10102 == FIRST_RET_REG));
10104 else
10106 pcum->arg_count [(int) SH_ARG_INT] = 0;
10107 pcum->prototype_p = FALSE;
10108 if (mode != VOIDmode)
10110 pcum->call_cookie =
10111 CALL_COOKIE_RET_TRAMP (TARGET_SHCOMPACT
10112 && GET_MODE_SIZE (mode) > 4
10113 && BASE_RETURN_VALUE_REG (mode) == FIRST_RET_REG);
10115 /* If the default ABI is the Renesas ABI then all library
10116 calls must assume that the library will be using the
10117 Renesas ABI. So if the function would return its result
10118 in memory then we must force the address of this memory
10119 block onto the stack. Ideally we would like to call
10120 targetm.calls.return_in_memory() here but we do not have
10121 the TYPE or the FNDECL available so we synthesize the
10122 contents of that function as best we can. */
10123 pcum->force_mem =
10124 (TARGET_DEFAULT & MASK_HITACHI)
10125 && (mode == BLKmode
10126 || (GET_MODE_SIZE (mode) > 4
10127 && !(mode == DFmode
10128 && TARGET_FPU_DOUBLE)));
10130 else
10132 pcum->call_cookie = 0;
10133 pcum->force_mem = FALSE;
10138 /* Determine if two hard register sets intersect.
10139 Return 1 if they do. */
10141 static int
10142 hard_regs_intersect_p (HARD_REG_SET *a, HARD_REG_SET *b)
10144 HARD_REG_SET c;
10145 COPY_HARD_REG_SET (c, *a);
10146 AND_HARD_REG_SET (c, *b);
10147 GO_IF_HARD_REG_SUBSET (c, reg_class_contents[(int) NO_REGS], lose);
10148 return 1;
10149 lose:
10150 return 0;
10153 #ifdef TARGET_ADJUST_UNROLL_MAX
10154 static int
10155 sh_adjust_unroll_max (struct loop * loop, int insn_count,
10156 int max_unrolled_insns, int strength_reduce_p,
10157 int unroll_type)
10159 /* This doesn't work in 4.0 because the old unroller & loop.h is gone. */
10160 if (TARGET_ADJUST_UNROLL && TARGET_SHMEDIA)
10162 /* Throttle back loop unrolling so that the costs of using more
10163 targets than the eight target register we have don't outweigh
10164 the benefits of unrolling. */
10165 rtx insn;
10166 int n_labels = 0, n_calls = 0, n_exit_dest = 0, n_inner_loops = -1;
10167 int n_barriers = 0;
10168 rtx dest;
10169 int i;
10170 rtx exit_dest[8];
10171 int threshold;
10172 int unroll_benefit = 0, mem_latency = 0;
10173 int base_cost, best_cost, cost;
10174 int factor, best_factor;
10175 int n_dest;
10176 unsigned max_iterations = 32767;
10177 int n_iterations;
10178 int need_precond = 0, precond = 0;
10179 basic_block * bbs = get_loop_body (loop);
10180 struct niter_desc *desc;
10182 /* Assume that all labels inside the loop are used from inside the
10183 loop. If the loop has multiple entry points, it is unlikely to
10184 be unrolled anyways.
10185 Also assume that all calls are to different functions. That is
10186 somewhat pessimistic, but if you have lots of calls, unrolling the
10187 loop is not likely to gain you much in the first place. */
10188 i = loop->num_nodes - 1;
10189 for (insn = BB_HEAD (bbs[i]); ; )
10191 if (GET_CODE (insn) == CODE_LABEL)
10192 n_labels++;
10193 else if (GET_CODE (insn) == CALL_INSN)
10194 n_calls++;
10195 else if (GET_CODE (insn) == NOTE
10196 && NOTE_LINE_NUMBER (insn) == NOTE_INSN_LOOP_BEG)
10197 n_inner_loops++;
10198 else if (GET_CODE (insn) == BARRIER)
10199 n_barriers++;
10200 if (insn != BB_END (bbs[i]))
10201 insn = NEXT_INSN (insn);
10202 else if (--i >= 0)
10203 insn = BB_HEAD (bbs[i]);
10204 else
10205 break;
10207 free (bbs);
10208 /* One label for the loop top is normal, and it won't be duplicated by
10209 unrolling. */
10210 if (n_labels <= 1)
10211 return max_unrolled_insns;
10212 if (n_inner_loops > 0)
10213 return 0;
10214 for (dest = loop->exit_labels; dest && n_exit_dest < 8;
10215 dest = LABEL_NEXTREF (dest))
10217 for (i = n_exit_dest - 1;
10218 i >= 0 && XEXP (dest, 0) != XEXP (exit_dest[i], 0); i--);
10219 if (i < 0)
10220 exit_dest[n_exit_dest++] = dest;
10222 /* If the loop top and call and exit destinations are enough to fill up
10223 the target registers, we're unlikely to do any more damage by
10224 unrolling. */
10225 if (n_calls + n_exit_dest >= 7)
10226 return max_unrolled_insns;
10228 /* ??? In the new loop unroller, there is no longer any strength
10229 reduction information available. Thus, when it comes to unrolling,
10230 we know the cost of everything, but we know the value of nothing. */
10231 #if 0
10232 if (strength_reduce_p
10233 && (unroll_type == LPT_UNROLL_RUNTIME
10234 || unroll_type == LPT_UNROLL_CONSTANT
10235 || unroll_type == LPT_PEEL_COMPLETELY))
10237 struct loop_ivs *ivs = LOOP_IVS (loop);
10238 struct iv_class *bl;
10240 /* We'll save one compare-and-branch in each loop body copy
10241 but the last one. */
10242 unroll_benefit = 1;
10243 /* Assess the benefit of removing biv & giv updates. */
10244 for (bl = ivs->list; bl; bl = bl->next)
10246 rtx increment = biv_total_increment (bl);
10247 struct induction *v;
10249 if (increment && GET_CODE (increment) == CONST_INT)
10251 unroll_benefit++;
10252 for (v = bl->giv; v; v = v->next_iv)
10254 if (! v->ignore && v->same == 0
10255 && GET_CODE (v->mult_val) == CONST_INT)
10256 unroll_benefit++;
10257 /* If this giv uses an array, try to determine
10258 a maximum iteration count from the size of the
10259 array. This need not be correct all the time,
10260 but should not be too far off the mark too often. */
10261 while (v->giv_type == DEST_ADDR)
10263 rtx mem = PATTERN (v->insn);
10264 tree mem_expr, type, size_tree;
10266 if (GET_CODE (SET_SRC (mem)) == MEM)
10267 mem = SET_SRC (mem);
10268 else if (GET_CODE (SET_DEST (mem)) == MEM)
10269 mem = SET_DEST (mem);
10270 else
10271 break;
10272 mem_expr = MEM_EXPR (mem);
10273 if (! mem_expr)
10274 break;
10275 type = TREE_TYPE (mem_expr);
10276 if (TREE_CODE (type) != ARRAY_TYPE
10277 || ! TYPE_SIZE (type) || ! TYPE_SIZE_UNIT (type))
10278 break;
10279 size_tree = fold (build (TRUNC_DIV_EXPR,
10280 bitsizetype,
10281 TYPE_SIZE (type),
10282 TYPE_SIZE_UNIT (type)));
10283 if (TREE_CODE (size_tree) == INTEGER_CST
10284 && ! TREE_INT_CST_HIGH (size_tree)
10285 && TREE_INT_CST_LOW (size_tree) < max_iterations)
10286 max_iterations = TREE_INT_CST_LOW (size_tree);
10287 break;
10293 #else /* 0 */
10294 /* Assume there is at least some benefit. */
10295 unroll_benefit = 1;
10296 #endif /* 0 */
10298 desc = get_simple_loop_desc (loop);
10299 n_iterations = desc->const_iter ? desc->niter : 0;
10300 max_iterations
10301 = max_iterations < desc->niter_max ? max_iterations : desc->niter_max;
10303 if (! strength_reduce_p || ! n_iterations)
10304 need_precond = 1;
10305 if (! n_iterations)
10307 n_iterations
10308 = max_iterations < 3 ? max_iterations : max_iterations * 3 / 4;
10309 if (! n_iterations)
10310 return 0;
10312 #if 0 /* ??? See above - missing induction variable information. */
10313 while (unroll_benefit > 1) /* no loop */
10315 /* We include the benefit of biv/ giv updates. Check if some or
10316 all of these updates are likely to fit into a scheduling
10317 bubble of a load.
10318 We check for the following case:
10319 - All the insns leading to the first JUMP_INSN are in a strict
10320 dependency chain.
10321 - there is at least one memory reference in them.
10323 When we find such a pattern, we assume that we can hide as many
10324 updates as the total of the load latency is, if we have an
10325 unroll factor of at least two. We might or might not also do
10326 this without unrolling, so rather than considering this as an
10327 extra unroll benefit, discount it in the unroll benefits of unroll
10328 factors higher than two. */
10330 rtx set, last_set;
10332 insn = next_active_insn (loop->start);
10333 last_set = single_set (insn);
10334 if (! last_set)
10335 break;
10336 if (GET_CODE (SET_SRC (last_set)) == MEM)
10337 mem_latency += 2;
10338 for (insn = NEXT_INSN (insn); insn != end; insn = NEXT_INSN (insn))
10340 if (! INSN_P (insn))
10341 continue;
10342 if (GET_CODE (insn) == JUMP_INSN)
10343 break;
10344 if (! reg_referenced_p (SET_DEST (last_set), PATTERN (insn)))
10346 /* Check if this is a to-be-reduced giv insn. */
10347 struct loop_ivs *ivs = LOOP_IVS (loop);
10348 struct iv_class *bl;
10349 struct induction *v;
10350 for (bl = ivs->list; bl; bl = bl->next)
10352 if (bl->biv->insn == insn)
10353 goto is_biv;
10354 for (v = bl->giv; v; v = v->next_iv)
10355 if (v->insn == insn)
10356 goto is_giv;
10358 mem_latency--;
10359 is_biv:
10360 is_giv:
10361 continue;
10363 set = single_set (insn);
10364 if (! set)
10365 continue;
10366 if (GET_CODE (SET_SRC (set)) == MEM)
10367 mem_latency += 2;
10368 last_set = set;
10370 if (mem_latency < 0)
10371 mem_latency = 0;
10372 else if (mem_latency > unroll_benefit - 1)
10373 mem_latency = unroll_benefit - 1;
10374 break;
10376 #endif /* 0 */
10377 if (n_labels + (unroll_benefit + n_labels * 8) / n_iterations
10378 <= unroll_benefit)
10379 return max_unrolled_insns;
10381 n_dest = n_labels + n_calls + n_exit_dest;
10382 base_cost = n_dest <= 8 ? 0 : n_dest - 7;
10383 best_cost = 0;
10384 best_factor = 1;
10385 if (n_barriers * 2 > n_labels - 1)
10386 n_barriers = (n_labels - 1) / 2;
10387 for (factor = 2; factor <= 8; factor++)
10389 /* Bump up preconditioning cost for each power of two. */
10390 if (! (factor & (factor-1)))
10391 precond += 4;
10392 /* When preconditioning, only powers of two will be considered. */
10393 else if (need_precond)
10394 continue;
10395 n_dest = ((unroll_type != LPT_PEEL_COMPLETELY)
10396 + (n_labels - 1) * factor + n_calls + n_exit_dest
10397 - (n_barriers * factor >> 1)
10398 + need_precond);
10399 cost
10400 = ((n_dest <= 8 ? 0 : n_dest - 7)
10401 - base_cost * factor
10402 - ((factor > 2 ? unroll_benefit - mem_latency : unroll_benefit)
10403 * (factor - (unroll_type != LPT_PEEL_COMPLETELY)))
10404 + ((unroll_benefit + 1 + (n_labels - 1) * factor)
10405 / n_iterations));
10406 if (need_precond)
10407 cost += (precond + unroll_benefit * factor / 2) / n_iterations;
10408 if (cost < best_cost)
10410 best_cost = cost;
10411 best_factor = factor;
10414 threshold = best_factor * insn_count;
10415 if (max_unrolled_insns > threshold)
10416 max_unrolled_insns = threshold;
10418 return max_unrolled_insns;
10420 #endif /* TARGET_ADJUST_UNROLL_MAX */
10422 /* Replace any occurrence of FROM(n) in X with TO(n). The function does
10423 not enter into CONST_DOUBLE for the replace.
10425 Note that copying is not done so X must not be shared unless all copies
10426 are to be modified.
10428 This is like replace_rtx, except that we operate on N_REPLACEMENTS
10429 replacements simultaneously - FROM(n) is replacements[n*2] and to(n) is
10430 replacements[n*2+1] - and that we take mode changes into account.
10432 If a replacement is ambiguous, return NULL_RTX.
10434 If MODIFY is zero, don't modify any rtl in place,
10435 just return zero or nonzero for failure / success. */
10438 replace_n_hard_rtx (rtx x, rtx *replacements, int n_replacements, int modify)
10440 int i, j;
10441 const char *fmt;
10443 /* The following prevents loops occurrence when we change MEM in
10444 CONST_DOUBLE onto the same CONST_DOUBLE. */
10445 if (x != 0 && GET_CODE (x) == CONST_DOUBLE)
10446 return x;
10448 for (i = n_replacements - 1; i >= 0 ; i--)
10449 if (x == replacements[i*2] && GET_MODE (x) == GET_MODE (replacements[i*2+1]))
10450 return replacements[i*2+1];
10452 /* Allow this function to make replacements in EXPR_LISTs. */
10453 if (x == 0)
10454 return 0;
10456 if (GET_CODE (x) == SUBREG)
10458 rtx new = replace_n_hard_rtx (SUBREG_REG (x), replacements,
10459 n_replacements, modify);
10461 if (GET_CODE (new) == CONST_INT)
10463 x = simplify_subreg (GET_MODE (x), new,
10464 GET_MODE (SUBREG_REG (x)),
10465 SUBREG_BYTE (x));
10466 if (! x)
10467 abort ();
10469 else if (modify)
10470 SUBREG_REG (x) = new;
10472 return x;
10474 else if (GET_CODE (x) == REG)
10476 unsigned regno = REGNO (x);
10477 unsigned nregs = (regno < FIRST_PSEUDO_REGISTER
10478 ? HARD_REGNO_NREGS (regno, GET_MODE (x)) : 1);
10479 rtx result = NULL_RTX;
10481 for (i = n_replacements - 1; i >= 0; i--)
10483 rtx from = replacements[i*2];
10484 rtx to = replacements[i*2+1];
10485 unsigned from_regno, from_nregs, to_regno, new_regno;
10487 if (GET_CODE (from) != REG)
10488 continue;
10489 from_regno = REGNO (from);
10490 from_nregs = (from_regno < FIRST_PSEUDO_REGISTER
10491 ? HARD_REGNO_NREGS (from_regno, GET_MODE (from)) : 1);
10492 if (regno < from_regno + from_nregs && regno + nregs > from_regno)
10494 if (regno < from_regno
10495 || regno + nregs > from_regno + nregs
10496 || GET_CODE (to) != REG
10497 || result)
10498 return NULL_RTX;
10499 to_regno = REGNO (to);
10500 if (to_regno < FIRST_PSEUDO_REGISTER)
10502 new_regno = regno + to_regno - from_regno;
10503 if ((unsigned) HARD_REGNO_NREGS (new_regno, GET_MODE (x))
10504 != nregs)
10505 return NULL_RTX;
10506 result = gen_rtx_REG (GET_MODE (x), new_regno);
10508 else if (GET_MODE (x) <= GET_MODE (to))
10509 result = gen_lowpart_common (GET_MODE (x), to);
10510 else
10511 result = gen_lowpart_SUBREG (GET_MODE (x), to);
10514 return result ? result : x;
10516 else if (GET_CODE (x) == ZERO_EXTEND)
10518 rtx new = replace_n_hard_rtx (XEXP (x, 0), replacements,
10519 n_replacements, modify);
10521 if (GET_CODE (new) == CONST_INT)
10523 x = simplify_unary_operation (ZERO_EXTEND, GET_MODE (x),
10524 new, GET_MODE (XEXP (x, 0)));
10525 if (! x)
10526 abort ();
10528 else if (modify)
10529 XEXP (x, 0) = new;
10531 return x;
10534 fmt = GET_RTX_FORMAT (GET_CODE (x));
10535 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
10537 rtx new;
10539 if (fmt[i] == 'e')
10541 new = replace_n_hard_rtx (XEXP (x, i), replacements,
10542 n_replacements, modify);
10543 if (!new)
10544 return NULL_RTX;
10545 if (modify)
10546 XEXP (x, i) = new;
10548 else if (fmt[i] == 'E')
10549 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
10551 new = replace_n_hard_rtx (XVECEXP (x, i, j), replacements,
10552 n_replacements, modify);
10553 if (!new)
10554 return NULL_RTX;
10555 if (modify)
10556 XVECEXP (x, i, j) = new;
10560 return x;
10564 sh_gen_truncate (enum machine_mode mode, rtx x, int need_sign_ext)
10566 enum rtx_code code = TRUNCATE;
10568 if (GET_CODE (x) == ZERO_EXTEND || GET_CODE (x) == SIGN_EXTEND)
10570 rtx inner = XEXP (x, 0);
10571 enum machine_mode inner_mode = GET_MODE (inner);
10573 if (inner_mode == mode)
10574 return inner;
10575 else if (GET_MODE_SIZE (inner_mode) >= GET_MODE_SIZE (mode))
10576 x = inner;
10577 else if (GET_MODE_SIZE (inner_mode) < GET_MODE_SIZE (mode)
10578 && (! need_sign_ext || GET_CODE (x) == SIGN_EXTEND))
10580 code = GET_CODE (x);
10581 x = inner;
10584 return gen_rtx_fmt_e (code, mode, x);
10587 /* called via for_each_rtx after reload, to clean up truncates of
10588 registers that span multiple actual hard registers. */
10590 shmedia_cleanup_truncate (rtx *p, void *n_changes)
10592 rtx x = *p, reg;
10594 if (GET_CODE (x) != TRUNCATE)
10595 return 0;
10596 reg = XEXP (x, 0);
10597 if (GET_MODE_SIZE (GET_MODE (reg)) > 8 && GET_CODE (reg) == REG)
10599 enum machine_mode reg_mode = GET_MODE (reg);
10600 XEXP (x, 0) = simplify_subreg (DImode, reg, reg_mode,
10601 subreg_lowpart_offset (DImode, reg_mode));
10602 *(int*) n_changes += 1;
10603 return -1;
10605 return 0;
10608 /* Load and store depend on the highpart of the address. However,
10609 set_attr_alternative does not give well-defined results before reload,
10610 so we must look at the rtl ourselves to see if any of the feeding
10611 registers is used in a memref. */
10613 /* Called by sh_contains_memref_p via for_each_rtx. */
10614 static int
10615 sh_contains_memref_p_1 (rtx *loc, void *data ATTRIBUTE_UNUSED)
10617 return (GET_CODE (*loc) == MEM);
10620 /* Return nonzero iff INSN contains a MEM. */
10622 sh_contains_memref_p (rtx insn)
10624 return for_each_rtx (&PATTERN (insn), &sh_contains_memref_p_1, NULL);
10627 /* FNADDR is the MEM expression from a call expander. Return an address
10628 to use in an SHmedia insn pattern. */
10630 shmedia_prepare_call_address (rtx fnaddr, int is_sibcall)
10632 int is_sym;
10634 fnaddr = XEXP (fnaddr, 0);
10635 is_sym = GET_CODE (fnaddr) == SYMBOL_REF;
10636 if (flag_pic && is_sym)
10638 if (! SYMBOL_REF_LOCAL_P (fnaddr))
10640 rtx reg = gen_reg_rtx (Pmode);
10642 /* We must not use GOTPLT for sibcalls, because PIC_REG
10643 must be restored before the PLT code gets to run. */
10644 if (is_sibcall)
10645 emit_insn (gen_symGOT2reg (reg, fnaddr));
10646 else
10647 emit_insn (gen_symGOTPLT2reg (reg, fnaddr));
10648 fnaddr = reg;
10650 else
10652 fnaddr = gen_sym2PIC (fnaddr);
10653 PUT_MODE (fnaddr, Pmode);
10656 /* If ptabs might trap, make this visible to the rest of the compiler.
10657 We generally assume that symbols pertain to valid locations, but
10658 it is possible to generate invalid symbols with asm or linker tricks.
10659 In a list of functions where each returns its successor, an invalid
10660 symbol might denote an empty list. */
10661 if (!TARGET_PT_FIXED
10662 && (!is_sym || TARGET_INVALID_SYMBOLS)
10663 && (!REG_P (fnaddr) || ! TARGET_REGISTER_P (REGNO (fnaddr))))
10665 rtx tr = gen_reg_rtx (PDImode);
10667 emit_insn (gen_ptabs (tr, fnaddr));
10668 fnaddr = tr;
10670 else if (! target_reg_operand (fnaddr, Pmode))
10671 fnaddr = copy_to_mode_reg (Pmode, fnaddr);
10672 return fnaddr;
10675 enum sh_divide_strategy_e sh_div_strategy = SH_DIV_STRATEGY_DEFAULT;
10677 /* This defines the storage for the variable part of a -mboard= option.
10678 It is only required when using the sh-superh-elf target */
10679 #ifdef _SUPERH_H
10680 const char * boardtype = "7750p2";
10681 const char * osruntime = "bare";
10682 #endif
10684 #include "gt-sh.h"