PR target/21623:
[official-gcc.git] / gcc / config / sh / sh.c
blob1bcf81d8bdc1b4b535248b2d569b1c7a7dde68f6
1 /* Output routines for GCC for Renesas / SuperH SH.
2 Copyright (C) 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
3 2003, 2004, 2005 Free Software Foundation, Inc.
4 Contributed by Steve Chamberlain (sac@cygnus.com).
5 Improved by Jim Wilson (wilson@cygnus.com).
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 2, or (at your option)
12 any later version.
14 GCC is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING. If not, write to
21 the Free Software Foundation, 51 Franklin Street, Fifth Floor,
22 Boston, MA 02110-1301, USA. */
24 #include "config.h"
25 #include "system.h"
26 #include "coretypes.h"
27 #include "tm.h"
28 #include "insn-config.h"
29 #include "rtl.h"
30 #include "tree.h"
31 #include "flags.h"
32 #include "expr.h"
33 #include "optabs.h"
34 #include "function.h"
35 #include "regs.h"
36 #include "hard-reg-set.h"
37 #include "output.h"
38 #include "insn-attr.h"
39 #include "toplev.h"
40 #include "recog.h"
41 #include "c-pragma.h"
42 #include "integrate.h"
43 #include "dwarf2.h"
44 #include "tm_p.h"
45 #include "target.h"
46 #include "target-def.h"
47 #include "real.h"
48 #include "langhooks.h"
49 #include "basic-block.h"
50 #include "cfglayout.h"
51 #include "intl.h"
52 #include "sched-int.h"
53 #include "ggc.h"
54 #include "tree-gimple.h"
55 #include "cfgloop.h"
58 int code_for_indirect_jump_scratch = CODE_FOR_indirect_jump_scratch;
60 #define MSW (TARGET_LITTLE_ENDIAN ? 1 : 0)
61 #define LSW (TARGET_LITTLE_ENDIAN ? 0 : 1)
63 /* These are some macros to abstract register modes. */
64 #define CONST_OK_FOR_ADD(size) \
65 (TARGET_SHMEDIA ? CONST_OK_FOR_I10 (size) : CONST_OK_FOR_I08 (size))
66 #define GEN_MOV (*(TARGET_SHMEDIA64 ? gen_movdi : gen_movsi))
67 #define GEN_ADD3 (*(TARGET_SHMEDIA64 ? gen_adddi3 : gen_addsi3))
68 #define GEN_SUB3 (*(TARGET_SHMEDIA64 ? gen_subdi3 : gen_subsi3))
70 /* Set to 1 by expand_prologue() when the function is an interrupt handler. */
71 int current_function_interrupt;
73 /* ??? The pragma interrupt support will not work for SH3. */
74 /* This is set by #pragma interrupt and #pragma trapa, and causes gcc to
75 output code for the next function appropriate for an interrupt handler. */
76 int pragma_interrupt;
78 /* This is set by the trap_exit attribute for functions. It specifies
79 a trap number to be used in a trapa instruction at function exit
80 (instead of an rte instruction). */
81 int trap_exit;
83 /* This is used by the sp_switch attribute for functions. It specifies
84 a variable holding the address of the stack the interrupt function
85 should switch to/from at entry/exit. */
86 rtx sp_switch;
88 /* This is set by #pragma trapa, and is similar to the above, except that
89 the compiler doesn't emit code to preserve all registers. */
90 static int pragma_trapa;
92 /* This is set by #pragma nosave_low_regs. This is useful on the SH3,
93 which has a separate set of low regs for User and Supervisor modes.
94 This should only be used for the lowest level of interrupts. Higher levels
95 of interrupts must save the registers in case they themselves are
96 interrupted. */
97 int pragma_nosave_low_regs;
99 /* This is used for communication between TARGET_SETUP_INCOMING_VARARGS and
100 sh_expand_prologue. */
101 int current_function_anonymous_args;
103 /* Global variables for machine-dependent things. */
105 /* Which cpu are we scheduling for. */
106 enum processor_type sh_cpu;
108 /* Definitions used in ready queue reordering for first scheduling pass. */
110 /* Reg weights arrays for modes SFmode and SImode, indexed by insn LUID. */
111 static short *regmode_weight[2];
113 /* Total SFmode and SImode weights of scheduled insns. */
114 static int curr_regmode_pressure[2];
116 /* If true, skip cycles for Q -> R movement. */
117 static int skip_cycles = 0;
119 /* Cached value of can_issue_more. This is cached in sh_variable_issue hook
120 and returned from sh_reorder2. */
121 static short cached_can_issue_more;
123 /* Saved operands from the last compare to use when we generate an scc
124 or bcc insn. */
126 rtx sh_compare_op0;
127 rtx sh_compare_op1;
129 /* Provides the class number of the smallest class containing
130 reg number. */
132 enum reg_class regno_reg_class[FIRST_PSEUDO_REGISTER] =
134 R0_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
135 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
136 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
137 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
138 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
139 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
140 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
141 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
142 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
143 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
144 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
145 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
146 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
147 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
148 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
149 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
150 FP0_REGS,FP_REGS, FP_REGS, FP_REGS,
151 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
152 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
153 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
154 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
155 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
156 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
157 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
158 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
159 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
160 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
161 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
162 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
163 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
164 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
165 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
166 TARGET_REGS, TARGET_REGS, TARGET_REGS, TARGET_REGS,
167 TARGET_REGS, TARGET_REGS, TARGET_REGS, TARGET_REGS,
168 DF_REGS, DF_REGS, DF_REGS, DF_REGS,
169 DF_REGS, DF_REGS, DF_REGS, DF_REGS,
170 NO_REGS, GENERAL_REGS, PR_REGS, T_REGS,
171 MAC_REGS, MAC_REGS, FPUL_REGS, FPSCR_REGS,
172 GENERAL_REGS, GENERAL_REGS,
175 char sh_register_names[FIRST_PSEUDO_REGISTER] \
176 [MAX_REGISTER_NAME_LENGTH + 1] = SH_REGISTER_NAMES_INITIALIZER;
178 char sh_additional_register_names[ADDREGNAMES_SIZE] \
179 [MAX_ADDITIONAL_REGISTER_NAME_LENGTH + 1]
180 = SH_ADDITIONAL_REGISTER_NAMES_INITIALIZER;
182 /* Provide reg_class from a letter such as appears in the machine
183 description. *: target independently reserved letter.
184 reg_class_from_letter['e' - 'a'] is set to NO_REGS for TARGET_FMOVD. */
186 enum reg_class reg_class_from_letter[] =
188 /* a */ ALL_REGS, /* b */ TARGET_REGS, /* c */ FPSCR_REGS, /* d */ DF_REGS,
189 /* e */ FP_REGS, /* f */ FP_REGS, /* g **/ NO_REGS, /* h */ NO_REGS,
190 /* i **/ NO_REGS, /* j */ NO_REGS, /* k */ SIBCALL_REGS, /* l */ PR_REGS,
191 /* m **/ NO_REGS, /* n **/ NO_REGS, /* o **/ NO_REGS, /* p **/ NO_REGS,
192 /* q */ NO_REGS, /* r **/ NO_REGS, /* s **/ NO_REGS, /* t */ T_REGS,
193 /* u */ NO_REGS, /* v */ NO_REGS, /* w */ FP0_REGS, /* x */ MAC_REGS,
194 /* y */ FPUL_REGS, /* z */ R0_REGS
197 int assembler_dialect;
199 static bool shmedia_space_reserved_for_target_registers;
201 static bool sh_handle_option (size_t, const char *, int);
202 static void split_branches (rtx);
203 static int branch_dest (rtx);
204 static void force_into (rtx, rtx);
205 static void print_slot (rtx);
206 static rtx add_constant (rtx, enum machine_mode, rtx);
207 static void dump_table (rtx, rtx);
208 static int hi_const (rtx);
209 static int broken_move (rtx);
210 static int mova_p (rtx);
211 static rtx find_barrier (int, rtx, rtx);
212 static int noncall_uses_reg (rtx, rtx, rtx *);
213 static rtx gen_block_redirect (rtx, int, int);
214 static void sh_reorg (void);
215 static void output_stack_adjust (int, rtx, int, HARD_REG_SET *);
216 static rtx frame_insn (rtx);
217 static rtx push (int);
218 static void pop (int);
219 static void push_regs (HARD_REG_SET *, int);
220 static int calc_live_regs (HARD_REG_SET *);
221 static void mark_use (rtx, rtx *);
222 static HOST_WIDE_INT rounded_frame_size (int);
223 static rtx mark_constant_pool_use (rtx);
224 const struct attribute_spec sh_attribute_table[];
225 static tree sh_handle_interrupt_handler_attribute (tree *, tree, tree, int, bool *);
226 static tree sh_handle_sp_switch_attribute (tree *, tree, tree, int, bool *);
227 static tree sh_handle_trap_exit_attribute (tree *, tree, tree, int, bool *);
228 static tree sh_handle_renesas_attribute (tree *, tree, tree, int, bool *);
229 static void sh_output_function_epilogue (FILE *, HOST_WIDE_INT);
230 static void sh_insert_attributes (tree, tree *);
231 static const char *sh_check_pch_target_flags (int);
232 static int sh_adjust_cost (rtx, rtx, rtx, int);
233 static int sh_issue_rate (void);
234 static int sh_dfa_new_cycle (FILE *, int, rtx, int, int, int *sort_p);
235 static short find_set_regmode_weight (rtx, enum machine_mode);
236 static short find_insn_regmode_weight (rtx, enum machine_mode);
237 static void find_regmode_weight (int, enum machine_mode);
238 static void sh_md_init_global (FILE *, int, int);
239 static void sh_md_finish_global (FILE *, int);
240 static int rank_for_reorder (const void *, const void *);
241 static void swap_reorder (rtx *, int);
242 static void ready_reorder (rtx *, int);
243 static short high_pressure (enum machine_mode);
244 static int sh_reorder (FILE *, int, rtx *, int *, int);
245 static int sh_reorder2 (FILE *, int, rtx *, int *, int);
246 static void sh_md_init (FILE *, int, int);
247 static int sh_variable_issue (FILE *, int, rtx, int);
249 static bool sh_function_ok_for_sibcall (tree, tree);
251 static bool sh_cannot_modify_jumps_p (void);
252 static int sh_target_reg_class (void);
253 static bool sh_optimize_target_register_callee_saved (bool);
254 static bool sh_ms_bitfield_layout_p (tree);
256 static void sh_init_builtins (void);
257 static void sh_media_init_builtins (void);
258 static rtx sh_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
259 static void sh_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT, tree);
260 static void sh_file_start (void);
261 static int flow_dependent_p (rtx, rtx);
262 static void flow_dependent_p_1 (rtx, rtx, void *);
263 static int shiftcosts (rtx);
264 static int andcosts (rtx);
265 static int addsubcosts (rtx);
266 static int multcosts (rtx);
267 static bool unspec_caller_rtx_p (rtx);
268 static bool sh_cannot_copy_insn_p (rtx);
269 static bool sh_rtx_costs (rtx, int, int, int *);
270 static int sh_address_cost (rtx);
271 #ifdef TARGET_ADJUST_UNROLL_MAX
272 static int sh_adjust_unroll_max (struct loop *, int, int, int, int);
273 #endif
274 static int sh_pr_n_sets (void);
275 static rtx sh_allocate_initial_value (rtx);
276 static int shmedia_target_regs_stack_space (HARD_REG_SET *);
277 static int shmedia_reserve_space_for_target_registers_p (int, HARD_REG_SET *);
278 static int shmedia_target_regs_stack_adjust (HARD_REG_SET *);
279 static int scavenge_reg (HARD_REG_SET *s);
280 struct save_schedule_s;
281 static struct save_entry_s *sh5_schedule_saves (HARD_REG_SET *,
282 struct save_schedule_s *, int);
284 static rtx sh_struct_value_rtx (tree, int);
285 static bool sh_return_in_memory (tree, tree);
286 static rtx sh_builtin_saveregs (void);
287 static void sh_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode, tree, int *, int);
288 static bool sh_strict_argument_naming (CUMULATIVE_ARGS *);
289 static bool sh_pretend_outgoing_varargs_named (CUMULATIVE_ARGS *);
290 static tree sh_build_builtin_va_list (void);
291 static tree sh_gimplify_va_arg_expr (tree, tree, tree *, tree *);
292 static bool sh_pass_by_reference (CUMULATIVE_ARGS *, enum machine_mode,
293 tree, bool);
294 static bool sh_callee_copies (CUMULATIVE_ARGS *, enum machine_mode,
295 tree, bool);
296 static int sh_arg_partial_bytes (CUMULATIVE_ARGS *, enum machine_mode,
297 tree, bool);
298 static int sh_dwarf_calling_convention (tree);
299 static int hard_regs_intersect_p (HARD_REG_SET *, HARD_REG_SET *);
302 /* Initialize the GCC target structure. */
303 #undef TARGET_ATTRIBUTE_TABLE
304 #define TARGET_ATTRIBUTE_TABLE sh_attribute_table
306 /* The next two are used for debug info when compiling with -gdwarf. */
307 #undef TARGET_ASM_UNALIGNED_HI_OP
308 #define TARGET_ASM_UNALIGNED_HI_OP "\t.uaword\t"
309 #undef TARGET_ASM_UNALIGNED_SI_OP
310 #define TARGET_ASM_UNALIGNED_SI_OP "\t.ualong\t"
312 /* These are NULLed out on non-SH5 in OVERRIDE_OPTIONS. */
313 #undef TARGET_ASM_UNALIGNED_DI_OP
314 #define TARGET_ASM_UNALIGNED_DI_OP "\t.uaquad\t"
315 #undef TARGET_ASM_ALIGNED_DI_OP
316 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
318 #undef TARGET_ASM_FUNCTION_EPILOGUE
319 #define TARGET_ASM_FUNCTION_EPILOGUE sh_output_function_epilogue
321 #undef TARGET_ASM_OUTPUT_MI_THUNK
322 #define TARGET_ASM_OUTPUT_MI_THUNK sh_output_mi_thunk
324 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
325 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_tree_hwi_hwi_tree_true
327 #undef TARGET_ASM_FILE_START
328 #define TARGET_ASM_FILE_START sh_file_start
329 #undef TARGET_ASM_FILE_START_FILE_DIRECTIVE
330 #define TARGET_ASM_FILE_START_FILE_DIRECTIVE true
332 #undef TARGET_DEFAULT_TARGET_FLAGS
333 #define TARGET_DEFAULT_TARGET_FLAGS TARGET_DEFAULT
334 #undef TARGET_HANDLE_OPTION
335 #define TARGET_HANDLE_OPTION sh_handle_option
337 #undef TARGET_INSERT_ATTRIBUTES
338 #define TARGET_INSERT_ATTRIBUTES sh_insert_attributes
340 #undef TARGET_SCHED_ADJUST_COST
341 #define TARGET_SCHED_ADJUST_COST sh_adjust_cost
343 #undef TARGET_SCHED_ISSUE_RATE
344 #define TARGET_SCHED_ISSUE_RATE sh_issue_rate
346 /* The next 5 hooks have been implemented for reenabling sched1. With the
347 help of these macros we are limiting the movement of insns in sched1 to
348 reduce the register pressure. The overall idea is to keep count of SImode
349 and SFmode regs required by already scheduled insns. When these counts
350 cross some threshold values; give priority to insns that free registers.
351 The insn that frees registers is most likely to be the insn with lowest
352 LUID (original insn order); but such an insn might be there in the stalled
353 queue (Q) instead of the ready queue (R). To solve this, we skip cycles
354 upto a max of 8 cycles so that such insns may move from Q -> R.
356 The description of the hooks are as below:
358 TARGET_SCHED_INIT_GLOBAL: Added a new target hook in the generic
359 scheduler; it is called inside the sched_init function just after
360 find_insn_reg_weights function call. It is used to calculate the SImode
361 and SFmode weights of insns of basic blocks; much similar to what
362 find_insn_reg_weights does.
363 TARGET_SCHED_FINISH_GLOBAL: Corresponding cleanup hook.
365 TARGET_SCHED_DFA_NEW_CYCLE: Skip cycles if high register pressure is
366 indicated by TARGET_SCHED_REORDER2; doing this may move insns from
367 (Q)->(R).
369 TARGET_SCHED_REORDER: If the register pressure for SImode or SFmode is
370 high; reorder the ready queue so that the insn with lowest LUID will be
371 issued next.
373 TARGET_SCHED_REORDER2: If the register pressure is high, indicate to
374 TARGET_SCHED_DFA_NEW_CYCLE to skip cycles.
376 TARGET_SCHED_VARIABLE_ISSUE: Cache the value of can_issue_more so that it
377 can be returned from TARGET_SCHED_REORDER2.
379 TARGET_SCHED_INIT: Reset the register pressure counting variables. */
381 #undef TARGET_SCHED_DFA_NEW_CYCLE
382 #define TARGET_SCHED_DFA_NEW_CYCLE sh_dfa_new_cycle
384 #undef TARGET_SCHED_INIT_GLOBAL
385 #define TARGET_SCHED_INIT_GLOBAL sh_md_init_global
387 #undef TARGET_SCHED_FINISH_GLOBAL
388 #define TARGET_SCHED_FINISH_GLOBAL sh_md_finish_global
390 #undef TARGET_SCHED_VARIABLE_ISSUE
391 #define TARGET_SCHED_VARIABLE_ISSUE sh_variable_issue
393 #undef TARGET_SCHED_REORDER
394 #define TARGET_SCHED_REORDER sh_reorder
396 #undef TARGET_SCHED_REORDER2
397 #define TARGET_SCHED_REORDER2 sh_reorder2
399 #undef TARGET_SCHED_INIT
400 #define TARGET_SCHED_INIT sh_md_init
402 #undef TARGET_CANNOT_MODIFY_JUMPS_P
403 #define TARGET_CANNOT_MODIFY_JUMPS_P sh_cannot_modify_jumps_p
404 #undef TARGET_BRANCH_TARGET_REGISTER_CLASS
405 #define TARGET_BRANCH_TARGET_REGISTER_CLASS sh_target_reg_class
406 #undef TARGET_BRANCH_TARGET_REGISTER_CALLEE_SAVED
407 #define TARGET_BRANCH_TARGET_REGISTER_CALLEE_SAVED \
408 sh_optimize_target_register_callee_saved
410 #undef TARGET_MS_BITFIELD_LAYOUT_P
411 #define TARGET_MS_BITFIELD_LAYOUT_P sh_ms_bitfield_layout_p
413 #undef TARGET_INIT_BUILTINS
414 #define TARGET_INIT_BUILTINS sh_init_builtins
415 #undef TARGET_EXPAND_BUILTIN
416 #define TARGET_EXPAND_BUILTIN sh_expand_builtin
418 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
419 #define TARGET_FUNCTION_OK_FOR_SIBCALL sh_function_ok_for_sibcall
421 #undef TARGET_CANNOT_COPY_INSN_P
422 #define TARGET_CANNOT_COPY_INSN_P sh_cannot_copy_insn_p
423 #undef TARGET_RTX_COSTS
424 #define TARGET_RTX_COSTS sh_rtx_costs
425 #undef TARGET_ADDRESS_COST
426 #define TARGET_ADDRESS_COST sh_address_cost
427 #undef TARGET_ALLOCATE_INITIAL_VALUE
428 #define TARGET_ALLOCATE_INITIAL_VALUE sh_allocate_initial_value
430 #undef TARGET_MACHINE_DEPENDENT_REORG
431 #define TARGET_MACHINE_DEPENDENT_REORG sh_reorg
433 #ifdef HAVE_AS_TLS
434 #undef TARGET_HAVE_TLS
435 #define TARGET_HAVE_TLS true
436 #endif
438 #undef TARGET_PROMOTE_PROTOTYPES
439 #define TARGET_PROMOTE_PROTOTYPES sh_promote_prototypes
440 #undef TARGET_PROMOTE_FUNCTION_ARGS
441 #define TARGET_PROMOTE_FUNCTION_ARGS sh_promote_prototypes
442 #undef TARGET_PROMOTE_FUNCTION_RETURN
443 #define TARGET_PROMOTE_FUNCTION_RETURN sh_promote_prototypes
445 #undef TARGET_STRUCT_VALUE_RTX
446 #define TARGET_STRUCT_VALUE_RTX sh_struct_value_rtx
447 #undef TARGET_RETURN_IN_MEMORY
448 #define TARGET_RETURN_IN_MEMORY sh_return_in_memory
450 #undef TARGET_EXPAND_BUILTIN_SAVEREGS
451 #define TARGET_EXPAND_BUILTIN_SAVEREGS sh_builtin_saveregs
452 #undef TARGET_SETUP_INCOMING_VARARGS
453 #define TARGET_SETUP_INCOMING_VARARGS sh_setup_incoming_varargs
454 #undef TARGET_STRICT_ARGUMENT_NAMING
455 #define TARGET_STRICT_ARGUMENT_NAMING sh_strict_argument_naming
456 #undef TARGET_PRETEND_OUTGOING_VARARGS_NAMED
457 #define TARGET_PRETEND_OUTGOING_VARARGS_NAMED sh_pretend_outgoing_varargs_named
458 #undef TARGET_MUST_PASS_IN_STACK
459 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
460 #undef TARGET_PASS_BY_REFERENCE
461 #define TARGET_PASS_BY_REFERENCE sh_pass_by_reference
462 #undef TARGET_CALLEE_COPIES
463 #define TARGET_CALLEE_COPIES sh_callee_copies
464 #undef TARGET_ARG_PARTIAL_BYTES
465 #define TARGET_ARG_PARTIAL_BYTES sh_arg_partial_bytes
467 #undef TARGET_BUILD_BUILTIN_VA_LIST
468 #define TARGET_BUILD_BUILTIN_VA_LIST sh_build_builtin_va_list
469 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
470 #define TARGET_GIMPLIFY_VA_ARG_EXPR sh_gimplify_va_arg_expr
472 #undef TARGET_VECTOR_MODE_SUPPORTED_P
473 #define TARGET_VECTOR_MODE_SUPPORTED_P sh_vector_mode_supported_p
475 #undef TARGET_CHECK_PCH_TARGET_FLAGS
476 #define TARGET_CHECK_PCH_TARGET_FLAGS sh_check_pch_target_flags
478 #undef TARGET_DWARF_CALLING_CONVENTION
479 #define TARGET_DWARF_CALLING_CONVENTION sh_dwarf_calling_convention
481 /* Return regmode weight for insn. */
482 #define INSN_REGMODE_WEIGHT(INSN, MODE) regmode_weight[((MODE) == SImode) ? 0 : 1][INSN_UID (INSN)]
484 /* Return current register pressure for regmode. */
485 #define CURR_REGMODE_PRESSURE(MODE) curr_regmode_pressure[((MODE) == SImode) ? 0 : 1]
487 #ifdef SYMBIAN
489 #undef TARGET_ENCODE_SECTION_INFO
490 #define TARGET_ENCODE_SECTION_INFO sh_symbian_encode_section_info
491 #undef TARGET_STRIP_NAME_ENCODING
492 #define TARGET_STRIP_NAME_ENCODING sh_symbian_strip_name_encoding
493 #undef TARGET_CXX_IMPORT_EXPORT_CLASS
494 #define TARGET_CXX_IMPORT_EXPORT_CLASS symbian_import_export_class
496 #endif /* SYMBIAN */
498 #ifdef TARGET_ADJUST_UNROLL_MAX
499 #undef TARGET_ADJUST_UNROLL_MAX
500 #define TARGET_ADJUST_UNROLL_MAX sh_adjust_unroll_max
501 #endif
503 #undef TARGET_SECONDARY_RELOAD
504 #define TARGET_SECONDARY_RELOAD sh_secondary_reload
506 struct gcc_target targetm = TARGET_INITIALIZER;
508 /* Implement TARGET_HANDLE_OPTION. */
510 static bool
511 sh_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED,
512 int value ATTRIBUTE_UNUSED)
514 switch (code)
516 case OPT_m1:
517 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH1;
518 return true;
520 case OPT_m2:
521 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2;
522 return true;
524 case OPT_m2a:
525 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2A;
526 return true;
528 case OPT_m2a_nofpu:
529 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2A_NOFPU;
530 return true;
532 case OPT_m2a_single:
533 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2A_SINGLE;
534 return true;
536 case OPT_m2a_single_only:
537 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2A_SINGLE_ONLY;
538 return true;
540 case OPT_m2e:
541 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2E;
542 return true;
544 case OPT_m3:
545 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH3;
546 return true;
548 case OPT_m3e:
549 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH3E;
550 return true;
552 case OPT_m4:
553 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4;
554 return true;
556 case OPT_m4_nofpu:
557 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4_NOFPU;
558 return true;
560 case OPT_m4_single:
561 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4_SINGLE;
562 return true;
564 case OPT_m4_single_only:
565 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4_SINGLE_ONLY;
566 return true;
568 case OPT_m4a:
569 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4A;
570 return true;
572 case OPT_m4a_nofpu:
573 case OPT_m4al:
574 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4A_NOFPU;
575 return true;
577 case OPT_m4a_single:
578 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4A_SINGLE;
579 return true;
581 case OPT_m4a_single_only:
582 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4A_SINGLE_ONLY;
583 return true;
585 case OPT_m5_32media:
586 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_32MEDIA;
587 return true;
589 case OPT_m5_32media_nofpu:
590 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_32MEDIA_NOFPU;
591 return true;
593 case OPT_m5_64media:
594 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_64MEDIA;
595 return true;
597 case OPT_m5_64media_nofpu:
598 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_64MEDIA_NOFPU;
599 return true;
601 case OPT_m5_compact:
602 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_COMPACT;
603 return true;
605 case OPT_m5_compact_nofpu:
606 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_COMPACT_NOFPU;
607 return true;
609 default:
610 return true;
614 /* Print the operand address in x to the stream. */
616 void
617 print_operand_address (FILE *stream, rtx x)
619 switch (GET_CODE (x))
621 case REG:
622 case SUBREG:
623 fprintf (stream, "@%s", reg_names[true_regnum (x)]);
624 break;
626 case PLUS:
628 rtx base = XEXP (x, 0);
629 rtx index = XEXP (x, 1);
631 switch (GET_CODE (index))
633 case CONST_INT:
634 fprintf (stream, "@(%d,%s)", (int) INTVAL (index),
635 reg_names[true_regnum (base)]);
636 break;
638 case REG:
639 case SUBREG:
641 int base_num = true_regnum (base);
642 int index_num = true_regnum (index);
644 fprintf (stream, "@(r0,%s)",
645 reg_names[MAX (base_num, index_num)]);
646 break;
649 default:
650 gcc_unreachable ();
653 break;
655 case PRE_DEC:
656 fprintf (stream, "@-%s", reg_names[true_regnum (XEXP (x, 0))]);
657 break;
659 case POST_INC:
660 fprintf (stream, "@%s+", reg_names[true_regnum (XEXP (x, 0))]);
661 break;
663 default:
664 x = mark_constant_pool_use (x);
665 output_addr_const (stream, x);
666 break;
670 /* Print operand x (an rtx) in assembler syntax to file stream
671 according to modifier code.
673 '.' print a .s if insn needs delay slot
674 ',' print LOCAL_LABEL_PREFIX
675 '@' print trap, rte or rts depending upon pragma interruptness
676 '#' output a nop if there is nothing to put in the delay slot
677 ''' print likelihood suffix (/u for unlikely).
678 '>' print branch target if -fverbose-asm
679 'O' print a constant without the #
680 'R' print the LSW of a dp value - changes if in little endian
681 'S' print the MSW of a dp value - changes if in little endian
682 'T' print the next word of a dp value - same as 'R' in big endian mode.
683 'M' print an `x' if `m' will print `base,index'.
684 'N' print 'r63' if the operand is (const_int 0).
685 'd' print a V2SF reg as dN instead of fpN.
686 'm' print a pair `base,offset' or `base,index', for LD and ST.
687 'U' Likewise for {LD,ST}{HI,LO}.
688 'u' prints the lowest 16 bits of CONST_INT, as an unsigned value.
689 'o' output an operator. */
691 void
692 print_operand (FILE *stream, rtx x, int code)
694 int regno;
695 enum machine_mode mode;
697 switch (code)
699 case '.':
700 if (final_sequence
701 && ! INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))
702 && get_attr_length (XVECEXP (final_sequence, 0, 1)))
703 fprintf (stream, ASSEMBLER_DIALECT ? "/s" : ".s");
704 break;
705 case ',':
706 fprintf (stream, "%s", LOCAL_LABEL_PREFIX);
707 break;
708 case '@':
709 if (trap_exit)
710 fprintf (stream, "trapa #%d", trap_exit);
711 else if (sh_cfun_interrupt_handler_p ())
712 fprintf (stream, "rte");
713 else
714 fprintf (stream, "rts");
715 break;
716 case '#':
717 /* Output a nop if there's nothing in the delay slot. */
718 if (dbr_sequence_length () == 0)
719 fprintf (stream, "\n\tnop");
720 break;
721 case '\'':
723 rtx note = find_reg_note (current_output_insn, REG_BR_PROB, 0);
725 if (note && INTVAL (XEXP (note, 0)) * 2 < REG_BR_PROB_BASE)
726 fputs ("/u", stream);
727 break;
729 case '>':
730 if (flag_verbose_asm && JUMP_LABEL (current_output_insn))
732 fputs ("\t! target: ", stream);
733 output_addr_const (stream, JUMP_LABEL (current_output_insn));
735 break;
736 case 'O':
737 x = mark_constant_pool_use (x);
738 output_addr_const (stream, x);
739 break;
740 /* N.B.: %R / %S / %T adjust memory addresses by four.
741 For SHMEDIA, that means they can be used to access the first and
742 second 32 bit part of a 64 bit (or larger) value that
743 might be held in floating point registers or memory.
744 While they can be used to access 64 bit parts of a larger value
745 held in general purpose registers, that won't work with memory -
746 neither for fp registers, since the frxx names are used. */
747 case 'R':
748 if (REG_P (x) || GET_CODE (x) == SUBREG)
750 regno = true_regnum (x);
751 regno += FP_REGISTER_P (regno) ? 1 : LSW;
752 fputs (reg_names[regno], (stream));
754 else if (MEM_P (x))
756 x = adjust_address (x, SImode, 4 * LSW);
757 print_operand_address (stream, XEXP (x, 0));
759 else
761 rtx sub = NULL_RTX;
763 mode = GET_MODE (x);
764 if (mode == VOIDmode)
765 mode = DImode;
766 if (GET_MODE_SIZE (mode) >= 8)
767 sub = simplify_subreg (SImode, x, mode, 4 * LSW);
768 if (sub)
769 print_operand (stream, sub, 0);
770 else
771 output_operand_lossage ("invalid operand to %%R");
773 break;
774 case 'S':
775 if (REG_P (x) || GET_CODE (x) == SUBREG)
777 regno = true_regnum (x);
778 regno += FP_REGISTER_P (regno) ? 0 : MSW;
779 fputs (reg_names[regno], (stream));
781 else if (MEM_P (x))
783 x = adjust_address (x, SImode, 4 * MSW);
784 print_operand_address (stream, XEXP (x, 0));
786 else
788 rtx sub = NULL_RTX;
790 mode = GET_MODE (x);
791 if (mode == VOIDmode)
792 mode = DImode;
793 if (GET_MODE_SIZE (mode) >= 8)
794 sub = simplify_subreg (SImode, x, mode, 4 * MSW);
795 if (sub)
796 print_operand (stream, sub, 0);
797 else
798 output_operand_lossage ("invalid operand to %%S");
800 break;
801 case 'T':
802 /* Next word of a double. */
803 switch (GET_CODE (x))
805 case REG:
806 fputs (reg_names[REGNO (x) + 1], (stream));
807 break;
808 case MEM:
809 if (GET_CODE (XEXP (x, 0)) != PRE_DEC
810 && GET_CODE (XEXP (x, 0)) != POST_INC)
811 x = adjust_address (x, SImode, 4);
812 print_operand_address (stream, XEXP (x, 0));
813 break;
814 default:
815 break;
817 break;
818 case 'o':
819 switch (GET_CODE (x))
821 case PLUS: fputs ("add", stream); break;
822 case MINUS: fputs ("sub", stream); break;
823 case MULT: fputs ("mul", stream); break;
824 case DIV: fputs ("div", stream); break;
825 case EQ: fputs ("eq", stream); break;
826 case NE: fputs ("ne", stream); break;
827 case GT: case LT: fputs ("gt", stream); break;
828 case GE: case LE: fputs ("ge", stream); break;
829 case GTU: case LTU: fputs ("gtu", stream); break;
830 case GEU: case LEU: fputs ("geu", stream); break;
831 default:
832 break;
834 break;
835 case 'M':
836 if (GET_CODE (x) == MEM
837 && GET_CODE (XEXP (x, 0)) == PLUS
838 && (GET_CODE (XEXP (XEXP (x, 0), 1)) == REG
839 || GET_CODE (XEXP (XEXP (x, 0), 1)) == SUBREG))
840 fputc ('x', stream);
841 break;
843 case 'm':
844 gcc_assert (GET_CODE (x) == MEM);
845 x = XEXP (x, 0);
846 /* Fall through. */
847 case 'U':
848 switch (GET_CODE (x))
850 case REG:
851 case SUBREG:
852 print_operand (stream, x, 0);
853 fputs (", 0", stream);
854 break;
856 case PLUS:
857 print_operand (stream, XEXP (x, 0), 0);
858 fputs (", ", stream);
859 print_operand (stream, XEXP (x, 1), 0);
860 break;
862 default:
863 gcc_unreachable ();
865 break;
867 case 'd':
868 gcc_assert (GET_CODE (x) == REG && GET_MODE (x) == V2SFmode);
870 fprintf ((stream), "d%s", reg_names[REGNO (x)] + 1);
871 break;
873 case 'N':
874 if (x == CONST0_RTX (GET_MODE (x)))
876 fprintf ((stream), "r63");
877 break;
879 goto default_output;
880 case 'u':
881 if (GET_CODE (x) == CONST_INT)
883 fprintf ((stream), "%u", (unsigned) INTVAL (x) & (0x10000 - 1));
884 break;
886 /* Fall through. */
888 default_output:
889 default:
890 regno = 0;
891 mode = GET_MODE (x);
893 switch (GET_CODE (x))
895 case TRUNCATE:
897 rtx inner = XEXP (x, 0);
898 int offset = 0;
899 enum machine_mode inner_mode;
901 /* We might see SUBREGs with vector mode registers inside. */
902 if (GET_CODE (inner) == SUBREG
903 && (GET_MODE_SIZE (GET_MODE (inner))
904 == GET_MODE_SIZE (GET_MODE (SUBREG_REG (inner))))
905 && subreg_lowpart_p (inner))
906 inner = SUBREG_REG (inner);
907 if (GET_CODE (inner) == CONST_INT)
909 x = GEN_INT (trunc_int_for_mode (INTVAL (inner), GET_MODE (x)));
910 goto default_output;
912 inner_mode = GET_MODE (inner);
913 if (GET_CODE (inner) == SUBREG
914 && (GET_MODE_SIZE (GET_MODE (inner))
915 < GET_MODE_SIZE (GET_MODE (SUBREG_REG (inner))))
916 && GET_CODE (SUBREG_REG (inner)) == REG)
918 offset = subreg_regno_offset (REGNO (SUBREG_REG (inner)),
919 GET_MODE (SUBREG_REG (inner)),
920 SUBREG_BYTE (inner),
921 GET_MODE (inner));
922 inner = SUBREG_REG (inner);
924 if (GET_CODE (inner) != REG || GET_MODE_SIZE (inner_mode) > 8)
925 abort ();
926 /* Floating point register pairs are always big endian;
927 general purpose registers are 64 bit wide. */
928 regno = REGNO (inner);
929 regno = (HARD_REGNO_NREGS (regno, inner_mode)
930 - HARD_REGNO_NREGS (regno, mode))
931 + offset;
932 x = inner;
933 goto reg;
935 case SIGN_EXTEND:
936 x = XEXP (x, 0);
937 goto reg;
938 /* FIXME: We need this on SHmedia32 because reload generates
939 some sign-extended HI or QI loads into DImode registers
940 but, because Pmode is SImode, the address ends up with a
941 subreg:SI of the DImode register. Maybe reload should be
942 fixed so as to apply alter_subreg to such loads? */
943 case IF_THEN_ELSE:
944 gcc_assert (trapping_target_operand (x, VOIDmode));
945 x = XEXP (XEXP (x, 2), 0);
946 goto default_output;
947 case SUBREG:
948 gcc_assert (SUBREG_BYTE (x) == 0
949 && GET_CODE (SUBREG_REG (x)) == REG);
951 x = SUBREG_REG (x);
952 /* Fall through. */
954 reg:
955 case REG:
956 regno += REGNO (x);
957 if (FP_REGISTER_P (regno)
958 && mode == V16SFmode)
959 fprintf ((stream), "mtrx%s", reg_names[regno] + 2);
960 else if (FP_REGISTER_P (REGNO (x))
961 && mode == V4SFmode)
962 fprintf ((stream), "fv%s", reg_names[regno] + 2);
963 else if (GET_CODE (x) == REG
964 && mode == V2SFmode)
965 fprintf ((stream), "fp%s", reg_names[regno] + 2);
966 else if (FP_REGISTER_P (REGNO (x))
967 && GET_MODE_SIZE (mode) > 4)
968 fprintf ((stream), "d%s", reg_names[regno] + 1);
969 else
970 fputs (reg_names[regno], (stream));
971 break;
973 case MEM:
974 output_address (XEXP (x, 0));
975 break;
977 case CONST:
978 if (TARGET_SHMEDIA
979 && GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
980 && (GET_MODE (XEXP (x, 0)) == DImode
981 || GET_MODE (XEXP (x, 0)) == SImode)
982 && GET_CODE (XEXP (XEXP (x, 0), 0)) == TRUNCATE
983 && GET_MODE (XEXP (XEXP (x, 0), 0)) == HImode)
985 rtx val = XEXP (XEXP (XEXP (x, 0), 0), 0);
987 fputc ('(', stream);
988 if (GET_CODE (val) == ASHIFTRT)
990 fputc ('(', stream);
991 if (GET_CODE (XEXP (val, 0)) == CONST)
992 fputc ('(', stream);
993 output_addr_const (stream, XEXP (val, 0));
994 if (GET_CODE (XEXP (val, 0)) == CONST)
995 fputc (')', stream);
996 fputs (" >> ", stream);
997 output_addr_const (stream, XEXP (val, 1));
998 fputc (')', stream);
1000 else
1002 if (GET_CODE (val) == CONST)
1003 fputc ('(', stream);
1004 output_addr_const (stream, val);
1005 if (GET_CODE (val) == CONST)
1006 fputc (')', stream);
1008 fputs (" & 65535)", stream);
1009 break;
1012 /* Fall through. */
1013 default:
1014 if (TARGET_SH1)
1015 fputc ('#', stream);
1016 output_addr_const (stream, x);
1017 break;
1019 break;
1023 /* Like force_operand, but guarantees that VALUE ends up in TARGET. */
1024 static void
1025 force_into (rtx value, rtx target)
1027 value = force_operand (value, target);
1028 if (! rtx_equal_p (value, target))
1029 emit_insn (gen_move_insn (target, value));
1032 /* Emit code to perform a block move. Choose the best method.
1034 OPERANDS[0] is the destination.
1035 OPERANDS[1] is the source.
1036 OPERANDS[2] is the size.
1037 OPERANDS[3] is the alignment safe to use. */
1040 expand_block_move (rtx *operands)
1042 int align = INTVAL (operands[3]);
1043 int constp = (GET_CODE (operands[2]) == CONST_INT);
1044 int bytes = (constp ? INTVAL (operands[2]) : 0);
1046 if (! constp)
1047 return 0;
1049 /* If we could use mov.l to move words and dest is word-aligned, we
1050 can use movua.l for loads and still generate a relatively short
1051 and efficient sequence. */
1052 if (TARGET_SH4A_ARCH && align < 4
1053 && MEM_ALIGN (operands[0]) >= 32
1054 && can_move_by_pieces (bytes, 32))
1056 rtx dest = copy_rtx (operands[0]);
1057 rtx src = copy_rtx (operands[1]);
1058 /* We could use different pseudos for each copied word, but
1059 since movua can only load into r0, it's kind of
1060 pointless. */
1061 rtx temp = gen_reg_rtx (SImode);
1062 rtx src_addr = copy_addr_to_reg (XEXP (src, 0));
1063 int copied = 0;
1065 while (copied + 4 <= bytes)
1067 rtx to = adjust_address (dest, SImode, copied);
1068 rtx from = adjust_automodify_address (src, SImode, src_addr, copied);
1070 emit_insn (gen_movua (temp, from));
1071 emit_move_insn (src_addr, plus_constant (src_addr, 4));
1072 emit_move_insn (to, temp);
1073 copied += 4;
1076 if (copied < bytes)
1077 move_by_pieces (adjust_address (dest, BLKmode, copied),
1078 adjust_automodify_address (src, BLKmode,
1079 src_addr, copied),
1080 bytes - copied, align, 0);
1082 return 1;
1085 /* If it isn't a constant number of bytes, or if it doesn't have 4 byte
1086 alignment, or if it isn't a multiple of 4 bytes, then fail. */
1087 if (align < 4 || (bytes % 4 != 0))
1088 return 0;
1090 if (TARGET_HARD_SH4)
1092 if (bytes < 12)
1093 return 0;
1094 else if (bytes == 12)
1096 rtx func_addr_rtx = gen_reg_rtx (Pmode);
1097 rtx r4 = gen_rtx_REG (SImode, 4);
1098 rtx r5 = gen_rtx_REG (SImode, 5);
1100 function_symbol (func_addr_rtx, "__movmemSI12_i4", SFUNC_STATIC);
1101 force_into (XEXP (operands[0], 0), r4);
1102 force_into (XEXP (operands[1], 0), r5);
1103 emit_insn (gen_block_move_real_i4 (func_addr_rtx));
1104 return 1;
1106 else if (! TARGET_SMALLCODE)
1108 const char *entry_name;
1109 rtx func_addr_rtx = gen_reg_rtx (Pmode);
1110 int dwords;
1111 rtx r4 = gen_rtx_REG (SImode, 4);
1112 rtx r5 = gen_rtx_REG (SImode, 5);
1113 rtx r6 = gen_rtx_REG (SImode, 6);
1115 entry_name = (bytes & 4 ? "__movmem_i4_odd" : "__movmem_i4_even");
1116 function_symbol (func_addr_rtx, entry_name, SFUNC_STATIC);
1117 force_into (XEXP (operands[0], 0), r4);
1118 force_into (XEXP (operands[1], 0), r5);
1120 dwords = bytes >> 3;
1121 emit_insn (gen_move_insn (r6, GEN_INT (dwords - 1)));
1122 emit_insn (gen_block_lump_real_i4 (func_addr_rtx));
1123 return 1;
1125 else
1126 return 0;
1128 if (bytes < 64)
1130 char entry[30];
1131 rtx func_addr_rtx = gen_reg_rtx (Pmode);
1132 rtx r4 = gen_rtx_REG (SImode, 4);
1133 rtx r5 = gen_rtx_REG (SImode, 5);
1135 sprintf (entry, "__movmemSI%d", bytes);
1136 function_symbol (func_addr_rtx, entry, SFUNC_STATIC);
1137 force_into (XEXP (operands[0], 0), r4);
1138 force_into (XEXP (operands[1], 0), r5);
1139 emit_insn (gen_block_move_real (func_addr_rtx));
1140 return 1;
1143 /* This is the same number of bytes as a memcpy call, but to a different
1144 less common function name, so this will occasionally use more space. */
1145 if (! TARGET_SMALLCODE)
1147 rtx func_addr_rtx = gen_reg_rtx (Pmode);
1148 int final_switch, while_loop;
1149 rtx r4 = gen_rtx_REG (SImode, 4);
1150 rtx r5 = gen_rtx_REG (SImode, 5);
1151 rtx r6 = gen_rtx_REG (SImode, 6);
1153 function_symbol (func_addr_rtx, "__movmem", SFUNC_STATIC);
1154 force_into (XEXP (operands[0], 0), r4);
1155 force_into (XEXP (operands[1], 0), r5);
1157 /* r6 controls the size of the move. 16 is decremented from it
1158 for each 64 bytes moved. Then the negative bit left over is used
1159 as an index into a list of move instructions. e.g., a 72 byte move
1160 would be set up with size(r6) = 14, for one iteration through the
1161 big while loop, and a switch of -2 for the last part. */
1163 final_switch = 16 - ((bytes / 4) % 16);
1164 while_loop = ((bytes / 4) / 16 - 1) * 16;
1165 emit_insn (gen_move_insn (r6, GEN_INT (while_loop + final_switch)));
1166 emit_insn (gen_block_lump_real (func_addr_rtx));
1167 return 1;
1170 return 0;
1173 /* Prepare operands for a move define_expand; specifically, one of the
1174 operands must be in a register. */
1177 prepare_move_operands (rtx operands[], enum machine_mode mode)
1179 if ((mode == SImode || mode == DImode)
1180 && flag_pic
1181 && ! ((mode == Pmode || mode == ptr_mode)
1182 && tls_symbolic_operand (operands[1], Pmode) != 0))
1184 rtx temp;
1185 if (SYMBOLIC_CONST_P (operands[1]))
1187 if (GET_CODE (operands[0]) == MEM)
1188 operands[1] = force_reg (Pmode, operands[1]);
1189 else if (TARGET_SHMEDIA
1190 && GET_CODE (operands[1]) == LABEL_REF
1191 && target_reg_operand (operands[0], mode))
1192 /* It's ok. */;
1193 else
1195 temp = no_new_pseudos ? operands[0] : gen_reg_rtx (Pmode);
1196 operands[1] = legitimize_pic_address (operands[1], mode, temp);
1199 else if (GET_CODE (operands[1]) == CONST
1200 && GET_CODE (XEXP (operands[1], 0)) == PLUS
1201 && SYMBOLIC_CONST_P (XEXP (XEXP (operands[1], 0), 0)))
1203 temp = no_new_pseudos ? operands[0] : gen_reg_rtx (Pmode);
1204 temp = legitimize_pic_address (XEXP (XEXP (operands[1], 0), 0),
1205 mode, temp);
1206 operands[1] = expand_binop (mode, add_optab, temp,
1207 XEXP (XEXP (operands[1], 0), 1),
1208 no_new_pseudos ? temp
1209 : gen_reg_rtx (Pmode),
1210 0, OPTAB_LIB_WIDEN);
1214 if (! reload_in_progress && ! reload_completed)
1216 /* Copy the source to a register if both operands aren't registers. */
1217 if (! register_operand (operands[0], mode)
1218 && ! sh_register_operand (operands[1], mode))
1219 operands[1] = copy_to_mode_reg (mode, operands[1]);
1221 if (GET_CODE (operands[0]) == MEM && ! memory_operand (operands[0], mode))
1223 /* This is like change_address_1 (operands[0], mode, 0, 1) ,
1224 except that we can't use that function because it is static. */
1225 rtx new = change_address (operands[0], mode, 0);
1226 MEM_COPY_ATTRIBUTES (new, operands[0]);
1227 operands[0] = new;
1230 /* This case can happen while generating code to move the result
1231 of a library call to the target. Reject `st r0,@(rX,rY)' because
1232 reload will fail to find a spill register for rX, since r0 is already
1233 being used for the source. */
1234 else if (TARGET_SH1
1235 && refers_to_regno_p (R0_REG, R0_REG + 1, operands[1], (rtx *)0)
1236 && GET_CODE (operands[0]) == MEM
1237 && GET_CODE (XEXP (operands[0], 0)) == PLUS
1238 && GET_CODE (XEXP (XEXP (operands[0], 0), 1)) == REG)
1239 operands[1] = copy_to_mode_reg (mode, operands[1]);
1242 if (mode == Pmode || mode == ptr_mode)
1244 rtx op0, op1, opc;
1245 enum tls_model tls_kind;
1247 op0 = operands[0];
1248 op1 = operands[1];
1249 if (GET_CODE (op1) == CONST
1250 && GET_CODE (XEXP (op1, 0)) == PLUS
1251 && tls_symbolic_operand (XEXP (XEXP (op1, 0), 0), Pmode))
1253 opc = XEXP (XEXP (op1, 0), 1);
1254 op1 = XEXP (XEXP (op1, 0), 0);
1256 else
1257 opc = NULL_RTX;
1259 if ((tls_kind = tls_symbolic_operand (op1, Pmode)))
1261 rtx tga_op1, tga_ret, tmp, tmp2;
1263 switch (tls_kind)
1265 case TLS_MODEL_GLOBAL_DYNAMIC:
1266 tga_ret = gen_rtx_REG (Pmode, R0_REG);
1267 emit_call_insn (gen_tls_global_dynamic (tga_ret, op1));
1268 op1 = tga_ret;
1269 break;
1271 case TLS_MODEL_LOCAL_DYNAMIC:
1272 tga_ret = gen_rtx_REG (Pmode, R0_REG);
1273 emit_call_insn (gen_tls_local_dynamic (tga_ret, op1));
1275 tmp = gen_reg_rtx (Pmode);
1276 emit_move_insn (tmp, tga_ret);
1278 if (register_operand (op0, Pmode))
1279 tmp2 = op0;
1280 else
1281 tmp2 = gen_reg_rtx (Pmode);
1283 emit_insn (gen_symDTPOFF2reg (tmp2, op1, tmp));
1284 op1 = tmp2;
1285 break;
1287 case TLS_MODEL_INITIAL_EXEC:
1288 if (! flag_pic)
1290 /* Don't schedule insns for getting GOT address when
1291 the first scheduling is enabled, to avoid spill
1292 failures for R0. */
1293 if (flag_schedule_insns)
1294 emit_insn (gen_blockage ());
1295 emit_insn (gen_GOTaddr2picreg ());
1296 emit_insn (gen_rtx_USE (VOIDmode, gen_rtx_REG (SImode,
1297 PIC_REG)));
1298 if (flag_schedule_insns)
1299 emit_insn (gen_blockage ());
1301 tga_op1 = no_new_pseudos ? op0 : gen_reg_rtx (Pmode);
1302 tmp = gen_sym2GOTTPOFF (op1);
1303 emit_insn (gen_tls_initial_exec (tga_op1, tmp));
1304 op1 = tga_op1;
1305 break;
1307 case TLS_MODEL_LOCAL_EXEC:
1308 tmp2 = gen_reg_rtx (Pmode);
1309 emit_insn (gen_load_gbr (tmp2));
1310 tmp = gen_reg_rtx (Pmode);
1311 emit_insn (gen_symTPOFF2reg (tmp, op1));
1313 if (register_operand (op0, Pmode))
1314 op1 = op0;
1315 else
1316 op1 = gen_reg_rtx (Pmode);
1318 emit_insn (gen_addsi3 (op1, tmp, tmp2));
1319 break;
1321 default:
1322 gcc_unreachable ();
1324 if (opc)
1325 emit_insn (gen_addsi3 (op1, op1, force_reg (SImode, opc)));
1326 operands[1] = op1;
1330 return 0;
1333 /* Prepare the operands for an scc instruction; make sure that the
1334 compare has been done. */
1336 prepare_scc_operands (enum rtx_code code)
1338 rtx t_reg = gen_rtx_REG (SImode, T_REG);
1339 enum rtx_code oldcode = code;
1340 enum machine_mode mode;
1342 /* First need a compare insn. */
1343 switch (code)
1345 case NE:
1346 /* It isn't possible to handle this case. */
1347 gcc_unreachable ();
1348 case LT:
1349 code = GT;
1350 break;
1351 case LE:
1352 code = GE;
1353 break;
1354 case LTU:
1355 code = GTU;
1356 break;
1357 case LEU:
1358 code = GEU;
1359 break;
1360 default:
1361 break;
1363 if (code != oldcode)
1365 rtx tmp = sh_compare_op0;
1366 sh_compare_op0 = sh_compare_op1;
1367 sh_compare_op1 = tmp;
1370 mode = GET_MODE (sh_compare_op0);
1371 if (mode == VOIDmode)
1372 mode = GET_MODE (sh_compare_op1);
1374 sh_compare_op0 = force_reg (mode, sh_compare_op0);
1375 if ((code != EQ && code != NE
1376 && (sh_compare_op1 != const0_rtx
1377 || code == GTU || code == GEU || code == LTU || code == LEU))
1378 || (mode == DImode && sh_compare_op1 != const0_rtx)
1379 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
1380 sh_compare_op1 = force_reg (mode, sh_compare_op1);
1382 if ((TARGET_SH4 || TARGET_SH2A) && GET_MODE_CLASS (mode) == MODE_FLOAT)
1383 (mode == SFmode ? emit_sf_insn : emit_df_insn)
1384 (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2,
1385 gen_rtx_SET (VOIDmode, t_reg,
1386 gen_rtx_fmt_ee (code, SImode,
1387 sh_compare_op0, sh_compare_op1)),
1388 gen_rtx_USE (VOIDmode, get_fpscr_rtx ()))));
1389 else
1390 emit_insn (gen_rtx_SET (VOIDmode, t_reg,
1391 gen_rtx_fmt_ee (code, SImode,
1392 sh_compare_op0, sh_compare_op1)));
1394 return t_reg;
1397 /* Called from the md file, set up the operands of a compare instruction. */
1399 void
1400 from_compare (rtx *operands, int code)
1402 enum machine_mode mode = GET_MODE (sh_compare_op0);
1403 rtx insn;
1404 if (mode == VOIDmode)
1405 mode = GET_MODE (sh_compare_op1);
1406 if (code != EQ
1407 || mode == DImode
1408 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
1410 /* Force args into regs, since we can't use constants here. */
1411 sh_compare_op0 = force_reg (mode, sh_compare_op0);
1412 if (sh_compare_op1 != const0_rtx
1413 || code == GTU || code == GEU
1414 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
1415 sh_compare_op1 = force_reg (mode, sh_compare_op1);
1417 if (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT && code == GE)
1419 from_compare (operands, GT);
1420 insn = gen_ieee_ccmpeqsf_t (sh_compare_op0, sh_compare_op1);
1422 else
1423 insn = gen_rtx_SET (VOIDmode,
1424 gen_rtx_REG (SImode, T_REG),
1425 gen_rtx_fmt_ee (code, SImode,
1426 sh_compare_op0, sh_compare_op1));
1427 if ((TARGET_SH4 || TARGET_SH2A) && GET_MODE_CLASS (mode) == MODE_FLOAT)
1429 insn = gen_rtx_PARALLEL (VOIDmode,
1430 gen_rtvec (2, insn,
1431 gen_rtx_USE (VOIDmode, get_fpscr_rtx ())));
1432 (mode == SFmode ? emit_sf_insn : emit_df_insn) (insn);
1434 else
1435 emit_insn (insn);
1438 /* Functions to output assembly code. */
1440 /* Return a sequence of instructions to perform DI or DF move.
1442 Since the SH cannot move a DI or DF in one instruction, we have
1443 to take care when we see overlapping source and dest registers. */
1445 const char *
1446 output_movedouble (rtx insn ATTRIBUTE_UNUSED, rtx operands[],
1447 enum machine_mode mode)
1449 rtx dst = operands[0];
1450 rtx src = operands[1];
1452 if (GET_CODE (dst) == MEM
1453 && GET_CODE (XEXP (dst, 0)) == PRE_DEC)
1454 return "mov.l %T1,%0\n\tmov.l %1,%0";
1456 if (register_operand (dst, mode)
1457 && register_operand (src, mode))
1459 if (REGNO (src) == MACH_REG)
1460 return "sts mach,%S0\n\tsts macl,%R0";
1462 /* When mov.d r1,r2 do r2->r3 then r1->r2;
1463 when mov.d r1,r0 do r1->r0 then r2->r1. */
1465 if (REGNO (src) + 1 == REGNO (dst))
1466 return "mov %T1,%T0\n\tmov %1,%0";
1467 else
1468 return "mov %1,%0\n\tmov %T1,%T0";
1470 else if (GET_CODE (src) == CONST_INT)
1472 if (INTVAL (src) < 0)
1473 output_asm_insn ("mov #-1,%S0", operands);
1474 else
1475 output_asm_insn ("mov #0,%S0", operands);
1477 return "mov %1,%R0";
1479 else if (GET_CODE (src) == MEM)
1481 int ptrreg = -1;
1482 int dreg = REGNO (dst);
1483 rtx inside = XEXP (src, 0);
1485 switch (GET_CODE (inside))
1487 case REG:
1488 ptrreg = REGNO (inside);
1489 break;
1491 case SUBREG:
1492 ptrreg = subreg_regno (inside);
1493 break;
1495 case PLUS:
1496 ptrreg = REGNO (XEXP (inside, 0));
1497 /* ??? A r0+REG address shouldn't be possible here, because it isn't
1498 an offsettable address. Unfortunately, offsettable addresses use
1499 QImode to check the offset, and a QImode offsettable address
1500 requires r0 for the other operand, which is not currently
1501 supported, so we can't use the 'o' constraint.
1502 Thus we must check for and handle r0+REG addresses here.
1503 We punt for now, since this is likely very rare. */
1504 gcc_assert (GET_CODE (XEXP (inside, 1)) != REG);
1505 break;
1507 case LABEL_REF:
1508 return "mov.l %1,%0\n\tmov.l %1+4,%T0";
1509 case POST_INC:
1510 return "mov.l %1,%0\n\tmov.l %1,%T0";
1511 default:
1512 gcc_unreachable ();
1515 /* Work out the safe way to copy. Copy into the second half first. */
1516 if (dreg == ptrreg)
1517 return "mov.l %T1,%T0\n\tmov.l %1,%0";
1520 return "mov.l %1,%0\n\tmov.l %T1,%T0";
1523 /* Print an instruction which would have gone into a delay slot after
1524 another instruction, but couldn't because the other instruction expanded
1525 into a sequence where putting the slot insn at the end wouldn't work. */
1527 static void
1528 print_slot (rtx insn)
1530 final_scan_insn (XVECEXP (insn, 0, 1), asm_out_file, optimize, 1, NULL);
1532 INSN_DELETED_P (XVECEXP (insn, 0, 1)) = 1;
1535 const char *
1536 output_far_jump (rtx insn, rtx op)
1538 struct { rtx lab, reg, op; } this;
1539 rtx braf_base_lab = NULL_RTX;
1540 const char *jump;
1541 int far;
1542 int offset = branch_dest (insn) - INSN_ADDRESSES (INSN_UID (insn));
1543 rtx prev;
1545 this.lab = gen_label_rtx ();
1547 if (TARGET_SH2
1548 && offset >= -32764
1549 && offset - get_attr_length (insn) <= 32766)
1551 far = 0;
1552 jump = "mov.w %O0,%1; braf %1";
1554 else
1556 far = 1;
1557 if (flag_pic)
1559 if (TARGET_SH2)
1560 jump = "mov.l %O0,%1; braf %1";
1561 else
1562 jump = "mov.l r0,@-r15; mova %O0,r0; mov.l @r0,%1; add r0,%1; mov.l @r15+,r0; jmp @%1";
1564 else
1565 jump = "mov.l %O0,%1; jmp @%1";
1567 /* If we have a scratch register available, use it. */
1568 if (GET_CODE ((prev = prev_nonnote_insn (insn))) == INSN
1569 && INSN_CODE (prev) == CODE_FOR_indirect_jump_scratch)
1571 this.reg = SET_DEST (XVECEXP (PATTERN (prev), 0, 0));
1572 if (REGNO (this.reg) == R0_REG && flag_pic && ! TARGET_SH2)
1573 jump = "mov.l r1,@-r15; mova %O0,r0; mov.l @r0,r1; add r1,r0; mov.l @r15+,r1; jmp @%1";
1574 output_asm_insn (jump, &this.lab);
1575 if (dbr_sequence_length ())
1576 print_slot (final_sequence);
1577 else
1578 output_asm_insn ("nop", 0);
1580 else
1582 /* Output the delay slot insn first if any. */
1583 if (dbr_sequence_length ())
1584 print_slot (final_sequence);
1586 this.reg = gen_rtx_REG (SImode, 13);
1587 /* We must keep the stack aligned to 8-byte boundaries on SH5.
1588 Fortunately, MACL is fixed and call-clobbered, and we never
1589 need its value across jumps, so save r13 in it instead of in
1590 the stack. */
1591 if (TARGET_SH5)
1592 output_asm_insn ("lds r13, macl", 0);
1593 else
1594 output_asm_insn ("mov.l r13,@-r15", 0);
1595 output_asm_insn (jump, &this.lab);
1596 if (TARGET_SH5)
1597 output_asm_insn ("sts macl, r13", 0);
1598 else
1599 output_asm_insn ("mov.l @r15+,r13", 0);
1601 if (far && flag_pic && TARGET_SH2)
1603 braf_base_lab = gen_label_rtx ();
1604 (*targetm.asm_out.internal_label) (asm_out_file, "L",
1605 CODE_LABEL_NUMBER (braf_base_lab));
1607 if (far)
1608 output_asm_insn (".align 2", 0);
1609 (*targetm.asm_out.internal_label) (asm_out_file, "L", CODE_LABEL_NUMBER (this.lab));
1610 this.op = op;
1611 if (far && flag_pic)
1613 if (TARGET_SH2)
1614 this.lab = braf_base_lab;
1615 output_asm_insn (".long %O2-%O0", &this.lab);
1617 else
1618 output_asm_insn (far ? ".long %O2" : ".word %O2-%O0", &this.lab);
1619 return "";
1622 /* Local label counter, used for constants in the pool and inside
1623 pattern branches. */
1625 static int lf = 100;
1627 /* Output code for ordinary branches. */
1629 const char *
1630 output_branch (int logic, rtx insn, rtx *operands)
1632 switch (get_attr_length (insn))
1634 case 6:
1635 /* This can happen if filling the delay slot has caused a forward
1636 branch to exceed its range (we could reverse it, but only
1637 when we know we won't overextend other branches; this should
1638 best be handled by relaxation).
1639 It can also happen when other condbranches hoist delay slot insn
1640 from their destination, thus leading to code size increase.
1641 But the branch will still be in the range -4092..+4098 bytes. */
1643 if (! TARGET_RELAX)
1645 int label = lf++;
1646 /* The call to print_slot will clobber the operands. */
1647 rtx op0 = operands[0];
1649 /* If the instruction in the delay slot is annulled (true), then
1650 there is no delay slot where we can put it now. The only safe
1651 place for it is after the label. final will do that by default. */
1653 if (final_sequence
1654 && ! INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))
1655 && get_attr_length (XVECEXP (final_sequence, 0, 1)))
1657 asm_fprintf (asm_out_file, "\tb%s%ss\t%LLF%d\n", logic ? "f" : "t",
1658 ASSEMBLER_DIALECT ? "/" : ".", label);
1659 print_slot (final_sequence);
1661 else
1662 asm_fprintf (asm_out_file, "\tb%s\t%LLF%d\n", logic ? "f" : "t", label);
1664 output_asm_insn ("bra\t%l0", &op0);
1665 fprintf (asm_out_file, "\tnop\n");
1666 (*targetm.asm_out.internal_label) (asm_out_file, "LF", label);
1668 return "";
1670 /* When relaxing, handle this like a short branch. The linker
1671 will fix it up if it still doesn't fit after relaxation. */
1672 case 2:
1673 return logic ? "bt%.\t%l0" : "bf%.\t%l0";
1675 /* These are for SH2e, in which we have to account for the
1676 extra nop because of the hardware bug in annulled branches. */
1677 case 8:
1678 if (! TARGET_RELAX)
1680 int label = lf++;
1682 gcc_assert (!final_sequence
1683 || !(INSN_ANNULLED_BRANCH_P
1684 (XVECEXP (final_sequence, 0, 0))));
1685 asm_fprintf (asm_out_file, "b%s%ss\t%LLF%d\n",
1686 logic ? "f" : "t",
1687 ASSEMBLER_DIALECT ? "/" : ".", label);
1688 fprintf (asm_out_file, "\tnop\n");
1689 output_asm_insn ("bra\t%l0", operands);
1690 fprintf (asm_out_file, "\tnop\n");
1691 (*targetm.asm_out.internal_label) (asm_out_file, "LF", label);
1693 return "";
1695 /* When relaxing, fall through. */
1696 case 4:
1698 char buffer[10];
1700 sprintf (buffer, "b%s%ss\t%%l0",
1701 logic ? "t" : "f",
1702 ASSEMBLER_DIALECT ? "/" : ".");
1703 output_asm_insn (buffer, &operands[0]);
1704 return "nop";
1707 default:
1708 /* There should be no longer branches now - that would
1709 indicate that something has destroyed the branches set
1710 up in machine_dependent_reorg. */
1711 gcc_unreachable ();
1715 const char *
1716 output_branchy_insn (enum rtx_code code, const char *template,
1717 rtx insn, rtx *operands)
1719 rtx next_insn = NEXT_INSN (insn);
1721 if (next_insn && GET_CODE (next_insn) == JUMP_INSN && condjump_p (next_insn))
1723 rtx src = SET_SRC (PATTERN (next_insn));
1724 if (GET_CODE (src) == IF_THEN_ELSE && GET_CODE (XEXP (src, 0)) != code)
1726 /* Following branch not taken */
1727 operands[9] = gen_label_rtx ();
1728 emit_label_after (operands[9], next_insn);
1729 INSN_ADDRESSES_NEW (operands[9],
1730 INSN_ADDRESSES (INSN_UID (next_insn))
1731 + get_attr_length (next_insn));
1732 return template;
1734 else
1736 int offset = (branch_dest (next_insn)
1737 - INSN_ADDRESSES (INSN_UID (next_insn)) + 4);
1738 if (offset >= -252 && offset <= 258)
1740 if (GET_CODE (src) == IF_THEN_ELSE)
1741 /* branch_true */
1742 src = XEXP (src, 1);
1743 operands[9] = src;
1744 return template;
1748 operands[9] = gen_label_rtx ();
1749 emit_label_after (operands[9], insn);
1750 INSN_ADDRESSES_NEW (operands[9],
1751 INSN_ADDRESSES (INSN_UID (insn))
1752 + get_attr_length (insn));
1753 return template;
1756 const char *
1757 output_ieee_ccmpeq (rtx insn, rtx *operands)
1759 return output_branchy_insn (NE, "bt\t%l9\n\tfcmp/eq\t%1,%0",
1760 insn, operands);
1763 /* Output the start of the assembler file. */
1765 static void
1766 sh_file_start (void)
1768 default_file_start ();
1770 #ifdef SYMBIAN
1771 /* Declare the .directive section before it is used. */
1772 fputs ("\t.section .directive, \"SM\", @progbits, 1\n", asm_out_file);
1773 fputs ("\t.asciz \"#<SYMEDIT>#\\n\"\n", asm_out_file);
1774 #endif
1776 if (TARGET_ELF)
1777 /* We need to show the text section with the proper
1778 attributes as in TEXT_SECTION_ASM_OP, before dwarf2out
1779 emits it without attributes in TEXT_SECTION_ASM_OP, else GAS
1780 will complain. We can teach GAS specifically about the
1781 default attributes for our choice of text section, but
1782 then we would have to change GAS again if/when we change
1783 the text section name. */
1784 fprintf (asm_out_file, "%s\n", TEXT_SECTION_ASM_OP);
1785 else
1786 /* Switch to the data section so that the coffsem symbol
1787 isn't in the text section. */
1788 data_section ();
1790 if (TARGET_LITTLE_ENDIAN)
1791 fputs ("\t.little\n", asm_out_file);
1793 if (!TARGET_ELF)
1795 if (TARGET_SHCOMPACT)
1796 fputs ("\t.mode\tSHcompact\n", asm_out_file);
1797 else if (TARGET_SHMEDIA)
1798 fprintf (asm_out_file, "\t.mode\tSHmedia\n\t.abi\t%i\n",
1799 TARGET_SHMEDIA64 ? 64 : 32);
1803 /* Check if PAT includes UNSPEC_CALLER unspec pattern. */
1805 static bool
1806 unspec_caller_rtx_p (rtx pat)
1808 switch (GET_CODE (pat))
1810 case CONST:
1811 return unspec_caller_rtx_p (XEXP (pat, 0));
1812 case PLUS:
1813 case MINUS:
1814 if (unspec_caller_rtx_p (XEXP (pat, 0)))
1815 return true;
1816 return unspec_caller_rtx_p (XEXP (pat, 1));
1817 case UNSPEC:
1818 if (XINT (pat, 1) == UNSPEC_CALLER)
1819 return true;
1820 default:
1821 break;
1824 return false;
1827 /* Indicate that INSN cannot be duplicated. This is true for insn
1828 that generates a unique label. */
1830 static bool
1831 sh_cannot_copy_insn_p (rtx insn)
1833 rtx pat;
1835 if (!reload_completed || !flag_pic)
1836 return false;
1838 if (GET_CODE (insn) != INSN)
1839 return false;
1840 if (asm_noperands (insn) >= 0)
1841 return false;
1843 pat = PATTERN (insn);
1844 if (GET_CODE (pat) != SET)
1845 return false;
1846 pat = SET_SRC (pat);
1848 if (unspec_caller_rtx_p (pat))
1849 return true;
1851 return false;
1854 /* Actual number of instructions used to make a shift by N. */
1855 static const char ashiftrt_insns[] =
1856 { 0,1,2,3,4,5,8,8,8,8,8,8,8,8,8,8,2,3,4,5,8,8,8,8,8,8,8,8,8,8,8,2};
1858 /* Left shift and logical right shift are the same. */
1859 static const char shift_insns[] =
1860 { 0,1,1,2,2,3,3,4,1,2,2,3,3,4,3,3,1,2,2,3,3,4,3,3,2,3,3,4,4,4,3,3};
1862 /* Individual shift amounts needed to get the above length sequences.
1863 One bit right shifts clobber the T bit, so when possible, put one bit
1864 shifts in the middle of the sequence, so the ends are eligible for
1865 branch delay slots. */
1866 static const short shift_amounts[32][5] = {
1867 {0}, {1}, {2}, {2, 1},
1868 {2, 2}, {2, 1, 2}, {2, 2, 2}, {2, 2, 1, 2},
1869 {8}, {8, 1}, {8, 2}, {8, 1, 2},
1870 {8, 2, 2}, {8, 2, 1, 2}, {8, -2, 8}, {8, -1, 8},
1871 {16}, {16, 1}, {16, 2}, {16, 1, 2},
1872 {16, 2, 2}, {16, 2, 1, 2}, {16, -2, 8}, {16, -1, 8},
1873 {16, 8}, {16, 1, 8}, {16, 8, 2}, {16, 8, 1, 2},
1874 {16, 8, 2, 2}, {16, -1, -2, 16}, {16, -2, 16}, {16, -1, 16}};
1876 /* Likewise, but for shift amounts < 16, up to three highmost bits
1877 might be clobbered. This is typically used when combined with some
1878 kind of sign or zero extension. */
1880 static const char ext_shift_insns[] =
1881 { 0,1,1,2,2,3,2,2,1,2,2,3,3,3,2,2,1,2,2,3,3,4,3,3,2,3,3,4,4,4,3,3};
1883 static const short ext_shift_amounts[32][4] = {
1884 {0}, {1}, {2}, {2, 1},
1885 {2, 2}, {2, 1, 2}, {8, -2}, {8, -1},
1886 {8}, {8, 1}, {8, 2}, {8, 1, 2},
1887 {8, 2, 2}, {16, -2, -1}, {16, -2}, {16, -1},
1888 {16}, {16, 1}, {16, 2}, {16, 1, 2},
1889 {16, 2, 2}, {16, 2, 1, 2}, {16, -2, 8}, {16, -1, 8},
1890 {16, 8}, {16, 1, 8}, {16, 8, 2}, {16, 8, 1, 2},
1891 {16, 8, 2, 2}, {16, -1, -2, 16}, {16, -2, 16}, {16, -1, 16}};
1893 /* Assuming we have a value that has been sign-extended by at least one bit,
1894 can we use the ext_shift_amounts with the last shift turned to an arithmetic shift
1895 to shift it by N without data loss, and quicker than by other means? */
1896 #define EXT_SHIFT_SIGNED(n) (((n) | 8) == 15)
1898 /* This is used in length attributes in sh.md to help compute the length
1899 of arbitrary constant shift instructions. */
1902 shift_insns_rtx (rtx insn)
1904 rtx set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
1905 int shift_count = INTVAL (XEXP (set_src, 1));
1906 enum rtx_code shift_code = GET_CODE (set_src);
1908 switch (shift_code)
1910 case ASHIFTRT:
1911 return ashiftrt_insns[shift_count];
1912 case LSHIFTRT:
1913 case ASHIFT:
1914 return shift_insns[shift_count];
1915 default:
1916 gcc_unreachable ();
1920 /* Return the cost of a shift. */
1922 static inline int
1923 shiftcosts (rtx x)
1925 int value;
1927 if (TARGET_SHMEDIA)
1928 return 1;
1930 if (GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD)
1932 if (GET_MODE (x) == DImode
1933 && GET_CODE (XEXP (x, 1)) == CONST_INT
1934 && INTVAL (XEXP (x, 1)) == 1)
1935 return 2;
1937 /* Everything else is invalid, because there is no pattern for it. */
1938 return 10000;
1940 /* If shift by a non constant, then this will be expensive. */
1941 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
1942 return SH_DYNAMIC_SHIFT_COST;
1944 value = INTVAL (XEXP (x, 1));
1946 /* Otherwise, return the true cost in instructions. */
1947 if (GET_CODE (x) == ASHIFTRT)
1949 int cost = ashiftrt_insns[value];
1950 /* If SH3, then we put the constant in a reg and use shad. */
1951 if (cost > 1 + SH_DYNAMIC_SHIFT_COST)
1952 cost = 1 + SH_DYNAMIC_SHIFT_COST;
1953 return cost;
1955 else
1956 return shift_insns[value];
1959 /* Return the cost of an AND operation. */
1961 static inline int
1962 andcosts (rtx x)
1964 int i;
1966 /* Anding with a register is a single cycle and instruction. */
1967 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
1968 return 1;
1970 i = INTVAL (XEXP (x, 1));
1972 if (TARGET_SHMEDIA)
1974 if ((GET_CODE (XEXP (x, 1)) == CONST_INT
1975 && CONST_OK_FOR_I16 (INTVAL (XEXP (x, 1))))
1976 || EXTRA_CONSTRAINT_C16 (XEXP (x, 1)))
1977 return 1;
1978 else
1979 return 2;
1982 /* These constants are single cycle extu.[bw] instructions. */
1983 if (i == 0xff || i == 0xffff)
1984 return 1;
1985 /* Constants that can be used in an and immediate instruction in a single
1986 cycle, but this requires r0, so make it a little more expensive. */
1987 if (CONST_OK_FOR_K08 (i))
1988 return 2;
1989 /* Constants that can be loaded with a mov immediate and an and.
1990 This case is probably unnecessary. */
1991 if (CONST_OK_FOR_I08 (i))
1992 return 2;
1993 /* Any other constants requires a 2 cycle pc-relative load plus an and.
1994 This case is probably unnecessary. */
1995 return 3;
1998 /* Return the cost of an addition or a subtraction. */
2000 static inline int
2001 addsubcosts (rtx x)
2003 /* Adding a register is a single cycle insn. */
2004 if (GET_CODE (XEXP (x, 1)) == REG
2005 || GET_CODE (XEXP (x, 1)) == SUBREG)
2006 return 1;
2008 /* Likewise for small constants. */
2009 if (GET_CODE (XEXP (x, 1)) == CONST_INT
2010 && CONST_OK_FOR_ADD (INTVAL (XEXP (x, 1))))
2011 return 1;
2013 if (TARGET_SHMEDIA)
2014 switch (GET_CODE (XEXP (x, 1)))
2016 case CONST:
2017 case LABEL_REF:
2018 case SYMBOL_REF:
2019 return TARGET_SHMEDIA64 ? 5 : 3;
2021 case CONST_INT:
2022 if (CONST_OK_FOR_I16 (INTVAL (XEXP (x, 1))))
2023 return 2;
2024 else if (CONST_OK_FOR_I16 (INTVAL (XEXP (x, 1)) >> 16))
2025 return 3;
2026 else if (CONST_OK_FOR_I16 ((INTVAL (XEXP (x, 1)) >> 16) >> 16))
2027 return 4;
2029 /* Fall through. */
2030 default:
2031 return 5;
2034 /* Any other constant requires a 2 cycle pc-relative load plus an
2035 addition. */
2036 return 3;
2039 /* Return the cost of a multiply. */
2040 static inline int
2041 multcosts (rtx x ATTRIBUTE_UNUSED)
2043 if (sh_multcost >= 0)
2044 return sh_multcost;
2045 if (TARGET_SHMEDIA)
2046 /* ??? We have a mul insn, but it has a latency of three, and doesn't
2047 accept constants. Ideally, we would use a cost of one or two and
2048 add the cost of the operand, but disregard the latter when inside loops
2049 and loop invariant code motion is still to follow.
2050 Using a multiply first and splitting it later if it's a loss
2051 doesn't work because of different sign / zero extension semantics
2052 of multiplies vs. shifts. */
2053 return TARGET_SMALLCODE ? 2 : 3;
2055 if (TARGET_SH2)
2057 /* We have a mul insn, so we can never take more than the mul and the
2058 read of the mac reg, but count more because of the latency and extra
2059 reg usage. */
2060 if (TARGET_SMALLCODE)
2061 return 2;
2062 return 3;
2065 /* If we're aiming at small code, then just count the number of
2066 insns in a multiply call sequence. */
2067 if (TARGET_SMALLCODE)
2068 return 5;
2070 /* Otherwise count all the insns in the routine we'd be calling too. */
2071 return 20;
2074 /* Compute a (partial) cost for rtx X. Return true if the complete
2075 cost has been computed, and false if subexpressions should be
2076 scanned. In either case, *TOTAL contains the cost result. */
2078 static bool
2079 sh_rtx_costs (rtx x, int code, int outer_code, int *total)
2081 switch (code)
2083 case CONST_INT:
2084 if (TARGET_SHMEDIA)
2086 if (INTVAL (x) == 0)
2087 *total = 0;
2088 else if (outer_code == AND && and_operand ((x), DImode))
2089 *total = 0;
2090 else if ((outer_code == IOR || outer_code == XOR
2091 || outer_code == PLUS)
2092 && CONST_OK_FOR_I10 (INTVAL (x)))
2093 *total = 0;
2094 else if (CONST_OK_FOR_I16 (INTVAL (x)))
2095 *total = COSTS_N_INSNS (outer_code != SET);
2096 else if (CONST_OK_FOR_I16 (INTVAL (x) >> 16))
2097 *total = COSTS_N_INSNS ((outer_code != SET) + 1);
2098 else if (CONST_OK_FOR_I16 ((INTVAL (x) >> 16) >> 16))
2099 *total = COSTS_N_INSNS (3);
2100 else
2101 *total = COSTS_N_INSNS (4);
2102 return true;
2104 if (CONST_OK_FOR_I08 (INTVAL (x)))
2105 *total = 0;
2106 else if ((outer_code == AND || outer_code == IOR || outer_code == XOR)
2107 && CONST_OK_FOR_K08 (INTVAL (x)))
2108 *total = 1;
2109 else
2110 *total = 8;
2111 return true;
2113 case CONST:
2114 case LABEL_REF:
2115 case SYMBOL_REF:
2116 if (TARGET_SHMEDIA64)
2117 *total = COSTS_N_INSNS (4);
2118 else if (TARGET_SHMEDIA32)
2119 *total = COSTS_N_INSNS (2);
2120 else
2121 *total = 5;
2122 return true;
2124 case CONST_DOUBLE:
2125 if (TARGET_SHMEDIA)
2126 *total = COSTS_N_INSNS (4);
2127 else
2128 *total = 10;
2129 return true;
2130 case CONST_VECTOR:
2131 if (x == CONST0_RTX (GET_MODE (x)))
2132 *total = 0;
2133 else if (sh_1el_vec (x, VOIDmode))
2134 *total = outer_code != SET;
2135 if (sh_rep_vec (x, VOIDmode))
2136 *total = ((GET_MODE_UNIT_SIZE (GET_MODE (x)) + 3) / 4
2137 + (outer_code != SET));
2138 *total = COSTS_N_INSNS (3) + (outer_code != SET);
2139 return true;
2141 case PLUS:
2142 case MINUS:
2143 *total = COSTS_N_INSNS (addsubcosts (x));
2144 return true;
2146 case AND:
2147 *total = COSTS_N_INSNS (andcosts (x));
2148 return true;
2150 case MULT:
2151 *total = COSTS_N_INSNS (multcosts (x));
2152 return true;
2154 case ASHIFT:
2155 case ASHIFTRT:
2156 case LSHIFTRT:
2157 *total = COSTS_N_INSNS (shiftcosts (x));
2158 return true;
2160 case DIV:
2161 case UDIV:
2162 case MOD:
2163 case UMOD:
2164 *total = COSTS_N_INSNS (20);
2165 return true;
2167 case PARALLEL:
2168 if (sh_1el_vec (x, VOIDmode))
2169 *total = outer_code != SET;
2170 if (sh_rep_vec (x, VOIDmode))
2171 *total = ((GET_MODE_UNIT_SIZE (GET_MODE (x)) + 3) / 4
2172 + (outer_code != SET));
2173 *total = COSTS_N_INSNS (3) + (outer_code != SET);
2174 return true;
2176 case FLOAT:
2177 case FIX:
2178 *total = 100;
2179 return true;
2181 default:
2182 return false;
2186 /* Compute the cost of an address. For the SH, all valid addresses are
2187 the same cost. Use a slightly higher cost for reg + reg addressing,
2188 since it increases pressure on r0. */
2190 static int
2191 sh_address_cost (rtx X)
2193 return (GET_CODE (X) == PLUS
2194 && ! CONSTANT_P (XEXP (X, 1))
2195 && ! TARGET_SHMEDIA ? 1 : 0);
2198 /* Code to expand a shift. */
2200 void
2201 gen_ashift (int type, int n, rtx reg)
2203 /* Negative values here come from the shift_amounts array. */
2204 if (n < 0)
2206 if (type == ASHIFT)
2207 type = LSHIFTRT;
2208 else
2209 type = ASHIFT;
2210 n = -n;
2213 switch (type)
2215 case ASHIFTRT:
2216 emit_insn (gen_ashrsi3_k (reg, reg, GEN_INT (n)));
2217 break;
2218 case LSHIFTRT:
2219 if (n == 1)
2220 emit_insn (gen_lshrsi3_m (reg, reg, GEN_INT (n)));
2221 else
2222 emit_insn (gen_lshrsi3_k (reg, reg, GEN_INT (n)));
2223 break;
2224 case ASHIFT:
2225 emit_insn (gen_ashlsi3_std (reg, reg, GEN_INT (n)));
2226 break;
2230 /* Same for HImode */
2232 void
2233 gen_ashift_hi (int type, int n, rtx reg)
2235 /* Negative values here come from the shift_amounts array. */
2236 if (n < 0)
2238 if (type == ASHIFT)
2239 type = LSHIFTRT;
2240 else
2241 type = ASHIFT;
2242 n = -n;
2245 switch (type)
2247 case ASHIFTRT:
2248 case LSHIFTRT:
2249 /* We don't have HImode right shift operations because using the
2250 ordinary 32 bit shift instructions for that doesn't generate proper
2251 zero/sign extension.
2252 gen_ashift_hi is only called in contexts where we know that the
2253 sign extension works out correctly. */
2255 int offset = 0;
2256 if (GET_CODE (reg) == SUBREG)
2258 offset = SUBREG_BYTE (reg);
2259 reg = SUBREG_REG (reg);
2261 gen_ashift (type, n, gen_rtx_SUBREG (SImode, reg, offset));
2262 break;
2264 case ASHIFT:
2265 emit_insn (gen_ashlhi3_k (reg, reg, GEN_INT (n)));
2266 break;
2270 /* Output RTL to split a constant shift into its component SH constant
2271 shift instructions. */
2273 void
2274 gen_shifty_op (int code, rtx *operands)
2276 int value = INTVAL (operands[2]);
2277 int max, i;
2279 /* Truncate the shift count in case it is out of bounds. */
2280 value = value & 0x1f;
2282 if (value == 31)
2284 if (code == LSHIFTRT)
2286 emit_insn (gen_rotlsi3_1 (operands[0], operands[0]));
2287 emit_insn (gen_movt (operands[0]));
2288 return;
2290 else if (code == ASHIFT)
2292 /* There is a two instruction sequence for 31 bit left shifts,
2293 but it requires r0. */
2294 if (GET_CODE (operands[0]) == REG && REGNO (operands[0]) == 0)
2296 emit_insn (gen_andsi3 (operands[0], operands[0], const1_rtx));
2297 emit_insn (gen_rotlsi3_31 (operands[0], operands[0]));
2298 return;
2302 else if (value == 0)
2304 /* This can happen even when optimizing, if there were subregs before
2305 reload. Don't output a nop here, as this is never optimized away;
2306 use a no-op move instead. */
2307 emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[0]));
2308 return;
2311 max = shift_insns[value];
2312 for (i = 0; i < max; i++)
2313 gen_ashift (code, shift_amounts[value][i], operands[0]);
2316 /* Same as above, but optimized for values where the topmost bits don't
2317 matter. */
2319 void
2320 gen_shifty_hi_op (int code, rtx *operands)
2322 int value = INTVAL (operands[2]);
2323 int max, i;
2324 void (*gen_fun) (int, int, rtx);
2326 /* This operation is used by and_shl for SImode values with a few
2327 high bits known to be cleared. */
2328 value &= 31;
2329 if (value == 0)
2331 emit_insn (gen_nop ());
2332 return;
2335 gen_fun = GET_MODE (operands[0]) == HImode ? gen_ashift_hi : gen_ashift;
2336 if (code == ASHIFT)
2338 max = ext_shift_insns[value];
2339 for (i = 0; i < max; i++)
2340 gen_fun (code, ext_shift_amounts[value][i], operands[0]);
2342 else
2343 /* When shifting right, emit the shifts in reverse order, so that
2344 solitary negative values come first. */
2345 for (i = ext_shift_insns[value] - 1; i >= 0; i--)
2346 gen_fun (code, ext_shift_amounts[value][i], operands[0]);
2349 /* Output RTL for an arithmetic right shift. */
2351 /* ??? Rewrite to use super-optimizer sequences. */
2354 expand_ashiftrt (rtx *operands)
2356 rtx wrk;
2357 char func[18];
2358 int value;
2360 if (TARGET_SH3)
2362 if (GET_CODE (operands[2]) != CONST_INT)
2364 rtx count = copy_to_mode_reg (SImode, operands[2]);
2365 emit_insn (gen_negsi2 (count, count));
2366 emit_insn (gen_ashrsi3_d (operands[0], operands[1], count));
2367 return 1;
2369 else if (ashiftrt_insns[INTVAL (operands[2]) & 31]
2370 > 1 + SH_DYNAMIC_SHIFT_COST)
2372 rtx count
2373 = force_reg (SImode, GEN_INT (- (INTVAL (operands[2]) & 31)));
2374 emit_insn (gen_ashrsi3_d (operands[0], operands[1], count));
2375 return 1;
2378 if (GET_CODE (operands[2]) != CONST_INT)
2379 return 0;
2381 value = INTVAL (operands[2]) & 31;
2383 if (value == 31)
2385 /* If we are called from abs expansion, arrange things so that we
2386 we can use a single MT instruction that doesn't clobber the source,
2387 if LICM can hoist out the load of the constant zero. */
2388 if (currently_expanding_to_rtl)
2390 emit_insn (gen_cmpgtsi_t (force_reg (SImode, CONST0_RTX (SImode)),
2391 operands[1]));
2392 emit_insn (gen_mov_neg_si_t (operands[0]));
2393 return 1;
2395 emit_insn (gen_ashrsi2_31 (operands[0], operands[1]));
2396 return 1;
2398 else if (value >= 16 && value <= 19)
2400 wrk = gen_reg_rtx (SImode);
2401 emit_insn (gen_ashrsi2_16 (wrk, operands[1]));
2402 value -= 16;
2403 while (value--)
2404 gen_ashift (ASHIFTRT, 1, wrk);
2405 emit_move_insn (operands[0], wrk);
2406 return 1;
2408 /* Expand a short sequence inline, longer call a magic routine. */
2409 else if (value <= 5)
2411 wrk = gen_reg_rtx (SImode);
2412 emit_move_insn (wrk, operands[1]);
2413 while (value--)
2414 gen_ashift (ASHIFTRT, 1, wrk);
2415 emit_move_insn (operands[0], wrk);
2416 return 1;
2419 wrk = gen_reg_rtx (Pmode);
2421 /* Load the value into an arg reg and call a helper. */
2422 emit_move_insn (gen_rtx_REG (SImode, 4), operands[1]);
2423 sprintf (func, "__ashiftrt_r4_%d", value);
2424 function_symbol (wrk, func, SFUNC_STATIC);
2425 emit_insn (gen_ashrsi3_n (GEN_INT (value), wrk));
2426 emit_move_insn (operands[0], gen_rtx_REG (SImode, 4));
2427 return 1;
2431 sh_dynamicalize_shift_p (rtx count)
2433 return shift_insns[INTVAL (count)] > 1 + SH_DYNAMIC_SHIFT_COST;
2436 /* Try to find a good way to implement the combiner pattern
2437 [(set (match_operand:SI 0 "register_operand" "r")
2438 (and:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
2439 (match_operand:SI 2 "const_int_operand" "n"))
2440 (match_operand:SI 3 "const_int_operand" "n"))) .
2441 LEFT_RTX is operand 2 in the above pattern, and MASK_RTX is operand 3.
2442 return 0 for simple right / left or left/right shift combination.
2443 return 1 for a combination of shifts with zero_extend.
2444 return 2 for a combination of shifts with an AND that needs r0.
2445 return 3 for a combination of shifts with an AND that needs an extra
2446 scratch register, when the three highmost bits of the AND mask are clear.
2447 return 4 for a combination of shifts with an AND that needs an extra
2448 scratch register, when any of the three highmost bits of the AND mask
2449 is set.
2450 If ATTRP is set, store an initial right shift width in ATTRP[0],
2451 and the instruction length in ATTRP[1] . These values are not valid
2452 when returning 0.
2453 When ATTRP is set and returning 1, ATTRP[2] gets set to the index into
2454 shift_amounts for the last shift value that is to be used before the
2455 sign extend. */
2457 shl_and_kind (rtx left_rtx, rtx mask_rtx, int *attrp)
2459 unsigned HOST_WIDE_INT mask, lsb, mask2, lsb2;
2460 int left = INTVAL (left_rtx), right;
2461 int best = 0;
2462 int cost, best_cost = 10000;
2463 int best_right = 0, best_len = 0;
2464 int i;
2465 int can_ext;
2467 if (left < 0 || left > 31)
2468 return 0;
2469 if (GET_CODE (mask_rtx) == CONST_INT)
2470 mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> left;
2471 else
2472 mask = (unsigned HOST_WIDE_INT) GET_MODE_MASK (SImode) >> left;
2473 /* Can this be expressed as a right shift / left shift pair? */
2474 lsb = ((mask ^ (mask - 1)) >> 1) + 1;
2475 right = exact_log2 (lsb);
2476 mask2 = ~(mask + lsb - 1);
2477 lsb2 = ((mask2 ^ (mask2 - 1)) >> 1) + 1;
2478 /* mask has no zeroes but trailing zeroes <==> ! mask2 */
2479 if (! mask2)
2480 best_cost = shift_insns[right] + shift_insns[right + left];
2481 /* mask has no trailing zeroes <==> ! right */
2482 else if (! right && mask2 == ~(lsb2 - 1))
2484 int late_right = exact_log2 (lsb2);
2485 best_cost = shift_insns[left + late_right] + shift_insns[late_right];
2487 /* Try to use zero extend. */
2488 if (mask2 == ~(lsb2 - 1))
2490 int width, first;
2492 for (width = 8; width <= 16; width += 8)
2494 /* Can we zero-extend right away? */
2495 if (lsb2 == (unsigned HOST_WIDE_INT) 1 << width)
2497 cost
2498 = 1 + ext_shift_insns[right] + ext_shift_insns[left + right];
2499 if (cost < best_cost)
2501 best = 1;
2502 best_cost = cost;
2503 best_right = right;
2504 best_len = cost;
2505 if (attrp)
2506 attrp[2] = -1;
2508 continue;
2510 /* ??? Could try to put zero extend into initial right shift,
2511 or even shift a bit left before the right shift. */
2512 /* Determine value of first part of left shift, to get to the
2513 zero extend cut-off point. */
2514 first = width - exact_log2 (lsb2) + right;
2515 if (first >= 0 && right + left - first >= 0)
2517 cost = ext_shift_insns[right] + ext_shift_insns[first] + 1
2518 + ext_shift_insns[right + left - first];
2519 if (cost < best_cost)
2521 best = 1;
2522 best_cost = cost;
2523 best_right = right;
2524 best_len = cost;
2525 if (attrp)
2526 attrp[2] = first;
2531 /* Try to use r0 AND pattern */
2532 for (i = 0; i <= 2; i++)
2534 if (i > right)
2535 break;
2536 if (! CONST_OK_FOR_K08 (mask >> i))
2537 continue;
2538 cost = (i != 0) + 2 + ext_shift_insns[left + i];
2539 if (cost < best_cost)
2541 best = 2;
2542 best_cost = cost;
2543 best_right = i;
2544 best_len = cost - 1;
2547 /* Try to use a scratch register to hold the AND operand. */
2548 can_ext = ((mask << left) & ((unsigned HOST_WIDE_INT) 3 << 30)) == 0;
2549 for (i = 0; i <= 2; i++)
2551 if (i > right)
2552 break;
2553 cost = (i != 0) + (CONST_OK_FOR_I08 (mask >> i) ? 2 : 3)
2554 + (can_ext ? ext_shift_insns : shift_insns)[left + i];
2555 if (cost < best_cost)
2557 best = 4 - can_ext;
2558 best_cost = cost;
2559 best_right = i;
2560 best_len = cost - 1 - ! CONST_OK_FOR_I08 (mask >> i);
2564 if (attrp)
2566 attrp[0] = best_right;
2567 attrp[1] = best_len;
2569 return best;
2572 /* This is used in length attributes of the unnamed instructions
2573 corresponding to shl_and_kind return values of 1 and 2. */
2575 shl_and_length (rtx insn)
2577 rtx set_src, left_rtx, mask_rtx;
2578 int attributes[3];
2580 set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
2581 left_rtx = XEXP (XEXP (set_src, 0), 1);
2582 mask_rtx = XEXP (set_src, 1);
2583 shl_and_kind (left_rtx, mask_rtx, attributes);
2584 return attributes[1];
2587 /* This is used in length attribute of the and_shl_scratch instruction. */
2590 shl_and_scr_length (rtx insn)
2592 rtx set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
2593 int len = shift_insns[INTVAL (XEXP (set_src, 1))];
2594 rtx op = XEXP (set_src, 0);
2595 len += shift_insns[INTVAL (XEXP (op, 1))] + 1;
2596 op = XEXP (XEXP (op, 0), 0);
2597 return len + shift_insns[INTVAL (XEXP (op, 1))];
2600 /* Generate rtl for instructions for which shl_and_kind advised a particular
2601 method of generating them, i.e. returned zero. */
2604 gen_shl_and (rtx dest, rtx left_rtx, rtx mask_rtx, rtx source)
2606 int attributes[3];
2607 unsigned HOST_WIDE_INT mask;
2608 int kind = shl_and_kind (left_rtx, mask_rtx, attributes);
2609 int right, total_shift;
2610 void (*shift_gen_fun) (int, rtx *) = gen_shifty_hi_op;
2612 right = attributes[0];
2613 total_shift = INTVAL (left_rtx) + right;
2614 mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> total_shift;
2615 switch (kind)
2617 default:
2618 return -1;
2619 case 1:
2621 int first = attributes[2];
2622 rtx operands[3];
2624 if (first < 0)
2626 emit_insn ((mask << right) <= 0xff
2627 ? gen_zero_extendqisi2 (dest,
2628 gen_lowpart (QImode, source))
2629 : gen_zero_extendhisi2 (dest,
2630 gen_lowpart (HImode, source)));
2631 source = dest;
2633 if (source != dest)
2634 emit_insn (gen_movsi (dest, source));
2635 operands[0] = dest;
2636 if (right)
2638 operands[2] = GEN_INT (right);
2639 gen_shifty_hi_op (LSHIFTRT, operands);
2641 if (first > 0)
2643 operands[2] = GEN_INT (first);
2644 gen_shifty_hi_op (ASHIFT, operands);
2645 total_shift -= first;
2646 mask <<= first;
2648 if (first >= 0)
2649 emit_insn (mask <= 0xff
2650 ? gen_zero_extendqisi2 (dest, gen_lowpart (QImode, dest))
2651 : gen_zero_extendhisi2 (dest, gen_lowpart (HImode, dest)));
2652 if (total_shift > 0)
2654 operands[2] = GEN_INT (total_shift);
2655 gen_shifty_hi_op (ASHIFT, operands);
2657 break;
2659 case 4:
2660 shift_gen_fun = gen_shifty_op;
2661 case 3:
2662 /* If the topmost bit that matters is set, set the topmost bits
2663 that don't matter. This way, we might be able to get a shorter
2664 signed constant. */
2665 if (mask & ((HOST_WIDE_INT) 1 << (31 - total_shift)))
2666 mask |= (HOST_WIDE_INT) ~0 << (31 - total_shift);
2667 case 2:
2668 /* Don't expand fine-grained when combining, because that will
2669 make the pattern fail. */
2670 if (currently_expanding_to_rtl
2671 || reload_in_progress || reload_completed)
2673 rtx operands[3];
2675 /* Cases 3 and 4 should be handled by this split
2676 only while combining */
2677 gcc_assert (kind <= 2);
2678 if (right)
2680 emit_insn (gen_lshrsi3 (dest, source, GEN_INT (right)));
2681 source = dest;
2683 emit_insn (gen_andsi3 (dest, source, GEN_INT (mask)));
2684 if (total_shift)
2686 operands[0] = dest;
2687 operands[1] = dest;
2688 operands[2] = GEN_INT (total_shift);
2689 shift_gen_fun (ASHIFT, operands);
2691 break;
2693 else
2695 int neg = 0;
2696 if (kind != 4 && total_shift < 16)
2698 neg = -ext_shift_amounts[total_shift][1];
2699 if (neg > 0)
2700 neg -= ext_shift_amounts[total_shift][2];
2701 else
2702 neg = 0;
2704 emit_insn (gen_and_shl_scratch (dest, source,
2705 GEN_INT (right),
2706 GEN_INT (mask),
2707 GEN_INT (total_shift + neg),
2708 GEN_INT (neg)));
2709 emit_insn (gen_movsi (dest, dest));
2710 break;
2713 return 0;
2716 /* Try to find a good way to implement the combiner pattern
2717 [(set (match_operand:SI 0 "register_operand" "=r")
2718 (sign_extract:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
2719 (match_operand:SI 2 "const_int_operand" "n")
2720 (match_operand:SI 3 "const_int_operand" "n")
2721 (const_int 0)))
2722 (clobber (reg:SI T_REG))]
2723 LEFT_RTX is operand 2 in the above pattern, and SIZE_RTX is operand 3.
2724 return 0 for simple left / right shift combination.
2725 return 1 for left shift / 8 bit sign extend / left shift.
2726 return 2 for left shift / 16 bit sign extend / left shift.
2727 return 3 for left shift / 8 bit sign extend / shift / sign extend.
2728 return 4 for left shift / 16 bit sign extend / shift / sign extend.
2729 return 5 for left shift / 16 bit sign extend / right shift
2730 return 6 for < 8 bit sign extend / left shift.
2731 return 7 for < 8 bit sign extend / left shift / single right shift.
2732 If COSTP is nonzero, assign the calculated cost to *COSTP. */
2735 shl_sext_kind (rtx left_rtx, rtx size_rtx, int *costp)
2737 int left, size, insize, ext;
2738 int cost = 0, best_cost;
2739 int kind;
2741 left = INTVAL (left_rtx);
2742 size = INTVAL (size_rtx);
2743 insize = size - left;
2744 gcc_assert (insize > 0);
2745 /* Default to left / right shift. */
2746 kind = 0;
2747 best_cost = shift_insns[32 - insize] + ashiftrt_insns[32 - size];
2748 if (size <= 16)
2750 /* 16 bit shift / sign extend / 16 bit shift */
2751 cost = shift_insns[16 - insize] + 1 + ashiftrt_insns[16 - size];
2752 /* If ashiftrt_insns[16 - size] is 8, this choice will be overridden
2753 below, by alternative 3 or something even better. */
2754 if (cost < best_cost)
2756 kind = 5;
2757 best_cost = cost;
2760 /* Try a plain sign extend between two shifts. */
2761 for (ext = 16; ext >= insize; ext -= 8)
2763 if (ext <= size)
2765 cost = ext_shift_insns[ext - insize] + 1 + shift_insns[size - ext];
2766 if (cost < best_cost)
2768 kind = ext / (unsigned) 8;
2769 best_cost = cost;
2772 /* Check if we can do a sloppy shift with a final signed shift
2773 restoring the sign. */
2774 if (EXT_SHIFT_SIGNED (size - ext))
2775 cost = ext_shift_insns[ext - insize] + ext_shift_insns[size - ext] + 1;
2776 /* If not, maybe it's still cheaper to do the second shift sloppy,
2777 and do a final sign extend? */
2778 else if (size <= 16)
2779 cost = ext_shift_insns[ext - insize] + 1
2780 + ext_shift_insns[size > ext ? size - ext : ext - size] + 1;
2781 else
2782 continue;
2783 if (cost < best_cost)
2785 kind = ext / (unsigned) 8 + 2;
2786 best_cost = cost;
2789 /* Check if we can sign extend in r0 */
2790 if (insize < 8)
2792 cost = 3 + shift_insns[left];
2793 if (cost < best_cost)
2795 kind = 6;
2796 best_cost = cost;
2798 /* Try the same with a final signed shift. */
2799 if (left < 31)
2801 cost = 3 + ext_shift_insns[left + 1] + 1;
2802 if (cost < best_cost)
2804 kind = 7;
2805 best_cost = cost;
2809 if (TARGET_SH3)
2811 /* Try to use a dynamic shift. */
2812 cost = shift_insns[32 - insize] + 1 + SH_DYNAMIC_SHIFT_COST;
2813 if (cost < best_cost)
2815 kind = 0;
2816 best_cost = cost;
2819 if (costp)
2820 *costp = cost;
2821 return kind;
2824 /* Function to be used in the length attribute of the instructions
2825 implementing this pattern. */
2828 shl_sext_length (rtx insn)
2830 rtx set_src, left_rtx, size_rtx;
2831 int cost;
2833 set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
2834 left_rtx = XEXP (XEXP (set_src, 0), 1);
2835 size_rtx = XEXP (set_src, 1);
2836 shl_sext_kind (left_rtx, size_rtx, &cost);
2837 return cost;
2840 /* Generate rtl for this pattern */
2843 gen_shl_sext (rtx dest, rtx left_rtx, rtx size_rtx, rtx source)
2845 int kind;
2846 int left, size, insize, cost;
2847 rtx operands[3];
2849 kind = shl_sext_kind (left_rtx, size_rtx, &cost);
2850 left = INTVAL (left_rtx);
2851 size = INTVAL (size_rtx);
2852 insize = size - left;
2853 switch (kind)
2855 case 1:
2856 case 2:
2857 case 3:
2858 case 4:
2860 int ext = kind & 1 ? 8 : 16;
2861 int shift2 = size - ext;
2863 /* Don't expand fine-grained when combining, because that will
2864 make the pattern fail. */
2865 if (! currently_expanding_to_rtl
2866 && ! reload_in_progress && ! reload_completed)
2868 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
2869 emit_insn (gen_movsi (dest, source));
2870 break;
2872 if (dest != source)
2873 emit_insn (gen_movsi (dest, source));
2874 operands[0] = dest;
2875 if (ext - insize)
2877 operands[2] = GEN_INT (ext - insize);
2878 gen_shifty_hi_op (ASHIFT, operands);
2880 emit_insn (kind & 1
2881 ? gen_extendqisi2 (dest, gen_lowpart (QImode, dest))
2882 : gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
2883 if (kind <= 2)
2885 if (shift2)
2887 operands[2] = GEN_INT (shift2);
2888 gen_shifty_op (ASHIFT, operands);
2891 else
2893 if (shift2 > 0)
2895 if (EXT_SHIFT_SIGNED (shift2))
2897 operands[2] = GEN_INT (shift2 + 1);
2898 gen_shifty_op (ASHIFT, operands);
2899 operands[2] = const1_rtx;
2900 gen_shifty_op (ASHIFTRT, operands);
2901 break;
2903 operands[2] = GEN_INT (shift2);
2904 gen_shifty_hi_op (ASHIFT, operands);
2906 else if (shift2)
2908 operands[2] = GEN_INT (-shift2);
2909 gen_shifty_hi_op (LSHIFTRT, operands);
2911 emit_insn (size <= 8
2912 ? gen_extendqisi2 (dest, gen_lowpart (QImode, dest))
2913 : gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
2915 break;
2917 case 5:
2919 int i = 16 - size;
2920 if (! currently_expanding_to_rtl
2921 && ! reload_in_progress && ! reload_completed)
2922 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
2923 else
2925 operands[0] = dest;
2926 operands[2] = GEN_INT (16 - insize);
2927 gen_shifty_hi_op (ASHIFT, operands);
2928 emit_insn (gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
2930 /* Don't use gen_ashrsi3 because it generates new pseudos. */
2931 while (--i >= 0)
2932 gen_ashift (ASHIFTRT, 1, dest);
2933 break;
2935 case 6:
2936 case 7:
2937 /* Don't expand fine-grained when combining, because that will
2938 make the pattern fail. */
2939 if (! currently_expanding_to_rtl
2940 && ! reload_in_progress && ! reload_completed)
2942 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
2943 emit_insn (gen_movsi (dest, source));
2944 break;
2946 emit_insn (gen_andsi3 (dest, source, GEN_INT ((1 << insize) - 1)));
2947 emit_insn (gen_xorsi3 (dest, dest, GEN_INT (1 << (insize - 1))));
2948 emit_insn (gen_addsi3 (dest, dest, GEN_INT (-1 << (insize - 1))));
2949 operands[0] = dest;
2950 operands[2] = kind == 7 ? GEN_INT (left + 1) : left_rtx;
2951 gen_shifty_op (ASHIFT, operands);
2952 if (kind == 7)
2953 emit_insn (gen_ashrsi3_k (dest, dest, const1_rtx));
2954 break;
2955 default:
2956 return -1;
2958 return 0;
2961 /* Prefix a symbol_ref name with "datalabel". */
2964 gen_datalabel_ref (rtx sym)
2966 const char *str;
2968 if (GET_CODE (sym) == LABEL_REF)
2969 return gen_rtx_CONST (GET_MODE (sym),
2970 gen_rtx_UNSPEC (GET_MODE (sym),
2971 gen_rtvec (1, sym),
2972 UNSPEC_DATALABEL));
2974 gcc_assert (GET_CODE (sym) == SYMBOL_REF);
2976 str = XSTR (sym, 0);
2977 /* Share all SYMBOL_REF strings with the same value - that is important
2978 for cse. */
2979 str = IDENTIFIER_POINTER (get_identifier (str));
2980 XSTR (sym, 0) = str;
2982 return sym;
2986 /* The SH cannot load a large constant into a register, constants have to
2987 come from a pc relative load. The reference of a pc relative load
2988 instruction must be less than 1k in front of the instruction. This
2989 means that we often have to dump a constant inside a function, and
2990 generate code to branch around it.
2992 It is important to minimize this, since the branches will slow things
2993 down and make things bigger.
2995 Worst case code looks like:
2997 mov.l L1,rn
2998 bra L2
3000 align
3001 L1: .long value
3005 mov.l L3,rn
3006 bra L4
3008 align
3009 L3: .long value
3013 We fix this by performing a scan before scheduling, which notices which
3014 instructions need to have their operands fetched from the constant table
3015 and builds the table.
3017 The algorithm is:
3019 scan, find an instruction which needs a pcrel move. Look forward, find the
3020 last barrier which is within MAX_COUNT bytes of the requirement.
3021 If there isn't one, make one. Process all the instructions between
3022 the find and the barrier.
3024 In the above example, we can tell that L3 is within 1k of L1, so
3025 the first move can be shrunk from the 3 insn+constant sequence into
3026 just 1 insn, and the constant moved to L3 to make:
3028 mov.l L1,rn
3030 mov.l L3,rn
3031 bra L4
3033 align
3034 L3:.long value
3035 L4:.long value
3037 Then the second move becomes the target for the shortening process. */
3039 typedef struct
3041 rtx value; /* Value in table. */
3042 rtx label; /* Label of value. */
3043 rtx wend; /* End of window. */
3044 enum machine_mode mode; /* Mode of value. */
3046 /* True if this constant is accessed as part of a post-increment
3047 sequence. Note that HImode constants are never accessed in this way. */
3048 bool part_of_sequence_p;
3049 } pool_node;
3051 /* The maximum number of constants that can fit into one pool, since
3052 constants in the range 0..510 are at least 2 bytes long, and in the
3053 range from there to 1018 at least 4 bytes. */
3055 #define MAX_POOL_SIZE 372
3056 static pool_node pool_vector[MAX_POOL_SIZE];
3057 static int pool_size;
3058 static rtx pool_window_label;
3059 static int pool_window_last;
3061 /* ??? If we need a constant in HImode which is the truncated value of a
3062 constant we need in SImode, we could combine the two entries thus saving
3063 two bytes. Is this common enough to be worth the effort of implementing
3064 it? */
3066 /* ??? This stuff should be done at the same time that we shorten branches.
3067 As it is now, we must assume that all branches are the maximum size, and
3068 this causes us to almost always output constant pools sooner than
3069 necessary. */
3071 /* Add a constant to the pool and return its label. */
3073 static rtx
3074 add_constant (rtx x, enum machine_mode mode, rtx last_value)
3076 int i;
3077 rtx lab, new, ref, newref;
3079 /* First see if we've already got it. */
3080 for (i = 0; i < pool_size; i++)
3082 if (x->code == pool_vector[i].value->code
3083 && mode == pool_vector[i].mode)
3085 if (x->code == CODE_LABEL)
3087 if (XINT (x, 3) != XINT (pool_vector[i].value, 3))
3088 continue;
3090 if (rtx_equal_p (x, pool_vector[i].value))
3092 lab = new = 0;
3093 if (! last_value
3094 || ! i
3095 || ! rtx_equal_p (last_value, pool_vector[i-1].value))
3097 new = gen_label_rtx ();
3098 LABEL_REFS (new) = pool_vector[i].label;
3099 pool_vector[i].label = lab = new;
3101 if (lab && pool_window_label)
3103 newref = gen_rtx_LABEL_REF (VOIDmode, pool_window_label);
3104 ref = pool_vector[pool_window_last].wend;
3105 LABEL_NEXTREF (newref) = ref;
3106 pool_vector[pool_window_last].wend = newref;
3108 if (new)
3109 pool_window_label = new;
3110 pool_window_last = i;
3111 return lab;
3116 /* Need a new one. */
3117 pool_vector[pool_size].value = x;
3118 if (last_value && rtx_equal_p (last_value, pool_vector[pool_size - 1].value))
3120 lab = 0;
3121 pool_vector[pool_size - 1].part_of_sequence_p = true;
3123 else
3124 lab = gen_label_rtx ();
3125 pool_vector[pool_size].mode = mode;
3126 pool_vector[pool_size].label = lab;
3127 pool_vector[pool_size].wend = NULL_RTX;
3128 pool_vector[pool_size].part_of_sequence_p = (lab == 0);
3129 if (lab && pool_window_label)
3131 newref = gen_rtx_LABEL_REF (VOIDmode, pool_window_label);
3132 ref = pool_vector[pool_window_last].wend;
3133 LABEL_NEXTREF (newref) = ref;
3134 pool_vector[pool_window_last].wend = newref;
3136 if (lab)
3137 pool_window_label = lab;
3138 pool_window_last = pool_size;
3139 pool_size++;
3140 return lab;
3143 /* Output the literal table. START, if nonzero, is the first instruction
3144 this table is needed for, and also indicates that there is at least one
3145 casesi_worker_2 instruction; We have to emit the operand3 labels from
3146 these insns at a 4-byte aligned position. BARRIER is the barrier
3147 after which we are to place the table. */
3149 static void
3150 dump_table (rtx start, rtx barrier)
3152 rtx scan = barrier;
3153 int i;
3154 int need_align = 1;
3155 rtx lab, ref;
3156 int have_df = 0;
3158 /* Do two passes, first time dump out the HI sized constants. */
3160 for (i = 0; i < pool_size; i++)
3162 pool_node *p = &pool_vector[i];
3164 if (p->mode == HImode)
3166 if (need_align)
3168 scan = emit_insn_after (gen_align_2 (), scan);
3169 need_align = 0;
3171 for (lab = p->label; lab; lab = LABEL_REFS (lab))
3172 scan = emit_label_after (lab, scan);
3173 scan = emit_insn_after (gen_consttable_2 (p->value, const0_rtx),
3174 scan);
3175 for (ref = p->wend; ref; ref = LABEL_NEXTREF (ref))
3177 lab = XEXP (ref, 0);
3178 scan = emit_insn_after (gen_consttable_window_end (lab), scan);
3181 else if (p->mode == DFmode)
3182 have_df = 1;
3185 need_align = 1;
3187 if (start)
3189 scan = emit_insn_after (gen_align_4 (), scan);
3190 need_align = 0;
3191 for (; start != barrier; start = NEXT_INSN (start))
3192 if (GET_CODE (start) == INSN
3193 && recog_memoized (start) == CODE_FOR_casesi_worker_2)
3195 rtx src = SET_SRC (XVECEXP (PATTERN (start), 0, 0));
3196 rtx lab = XEXP (XVECEXP (src, 0, 3), 0);
3198 scan = emit_label_after (lab, scan);
3201 if (TARGET_FMOVD && TARGET_ALIGN_DOUBLE && have_df)
3203 rtx align_insn = NULL_RTX;
3205 scan = emit_label_after (gen_label_rtx (), scan);
3206 scan = emit_insn_after (gen_align_log (GEN_INT (3)), scan);
3207 need_align = 0;
3209 for (i = 0; i < pool_size; i++)
3211 pool_node *p = &pool_vector[i];
3213 switch (p->mode)
3215 case HImode:
3216 break;
3217 case SImode:
3218 case SFmode:
3219 if (align_insn && !p->part_of_sequence_p)
3221 for (lab = p->label; lab; lab = LABEL_REFS (lab))
3222 emit_label_before (lab, align_insn);
3223 emit_insn_before (gen_consttable_4 (p->value, const0_rtx),
3224 align_insn);
3225 for (ref = p->wend; ref; ref = LABEL_NEXTREF (ref))
3227 lab = XEXP (ref, 0);
3228 emit_insn_before (gen_consttable_window_end (lab),
3229 align_insn);
3231 delete_insn (align_insn);
3232 align_insn = NULL_RTX;
3233 continue;
3235 else
3237 for (lab = p->label; lab; lab = LABEL_REFS (lab))
3238 scan = emit_label_after (lab, scan);
3239 scan = emit_insn_after (gen_consttable_4 (p->value,
3240 const0_rtx), scan);
3241 need_align = ! need_align;
3243 break;
3244 case DFmode:
3245 if (need_align)
3247 scan = emit_insn_after (gen_align_log (GEN_INT (3)), scan);
3248 align_insn = scan;
3249 need_align = 0;
3251 case DImode:
3252 for (lab = p->label; lab; lab = LABEL_REFS (lab))
3253 scan = emit_label_after (lab, scan);
3254 scan = emit_insn_after (gen_consttable_8 (p->value, const0_rtx),
3255 scan);
3256 break;
3257 default:
3258 gcc_unreachable ();
3261 if (p->mode != HImode)
3263 for (ref = p->wend; ref; ref = LABEL_NEXTREF (ref))
3265 lab = XEXP (ref, 0);
3266 scan = emit_insn_after (gen_consttable_window_end (lab),
3267 scan);
3272 pool_size = 0;
3275 for (i = 0; i < pool_size; i++)
3277 pool_node *p = &pool_vector[i];
3279 switch (p->mode)
3281 case HImode:
3282 break;
3283 case SImode:
3284 case SFmode:
3285 if (need_align)
3287 need_align = 0;
3288 scan = emit_label_after (gen_label_rtx (), scan);
3289 scan = emit_insn_after (gen_align_4 (), scan);
3291 for (lab = p->label; lab; lab = LABEL_REFS (lab))
3292 scan = emit_label_after (lab, scan);
3293 scan = emit_insn_after (gen_consttable_4 (p->value, const0_rtx),
3294 scan);
3295 break;
3296 case DFmode:
3297 case DImode:
3298 if (need_align)
3300 need_align = 0;
3301 scan = emit_label_after (gen_label_rtx (), scan);
3302 scan = emit_insn_after (gen_align_4 (), scan);
3304 for (lab = p->label; lab; lab = LABEL_REFS (lab))
3305 scan = emit_label_after (lab, scan);
3306 scan = emit_insn_after (gen_consttable_8 (p->value, const0_rtx),
3307 scan);
3308 break;
3309 default:
3310 gcc_unreachable ();
3313 if (p->mode != HImode)
3315 for (ref = p->wend; ref; ref = LABEL_NEXTREF (ref))
3317 lab = XEXP (ref, 0);
3318 scan = emit_insn_after (gen_consttable_window_end (lab), scan);
3323 scan = emit_insn_after (gen_consttable_end (), scan);
3324 scan = emit_barrier_after (scan);
3325 pool_size = 0;
3326 pool_window_label = NULL_RTX;
3327 pool_window_last = 0;
3330 /* Return nonzero if constant would be an ok source for a
3331 mov.w instead of a mov.l. */
3333 static int
3334 hi_const (rtx src)
3336 return (GET_CODE (src) == CONST_INT
3337 && INTVAL (src) >= -32768
3338 && INTVAL (src) <= 32767);
3341 /* Nonzero if the insn is a move instruction which needs to be fixed. */
3343 /* ??? For a DImode/DFmode moves, we don't need to fix it if each half of the
3344 CONST_DOUBLE input value is CONST_OK_FOR_I08. For a SFmode move, we don't
3345 need to fix it if the input value is CONST_OK_FOR_I08. */
3347 static int
3348 broken_move (rtx insn)
3350 if (GET_CODE (insn) == INSN)
3352 rtx pat = PATTERN (insn);
3353 if (GET_CODE (pat) == PARALLEL)
3354 pat = XVECEXP (pat, 0, 0);
3355 if (GET_CODE (pat) == SET
3356 /* We can load any 8 bit value if we don't care what the high
3357 order bits end up as. */
3358 && GET_MODE (SET_DEST (pat)) != QImode
3359 && (CONSTANT_P (SET_SRC (pat))
3360 /* Match mova_const. */
3361 || (GET_CODE (SET_SRC (pat)) == UNSPEC
3362 && XINT (SET_SRC (pat), 1) == UNSPEC_MOVA
3363 && GET_CODE (XVECEXP (SET_SRC (pat), 0, 0)) == CONST))
3364 && ! (TARGET_SH2E
3365 && GET_CODE (SET_SRC (pat)) == CONST_DOUBLE
3366 && (fp_zero_operand (SET_SRC (pat))
3367 || fp_one_operand (SET_SRC (pat)))
3368 /* ??? If this is a -m4 or -m4-single compilation, in general
3369 we don't know the current setting of fpscr, so disable fldi.
3370 There is an exception if this was a register-register move
3371 before reload - and hence it was ascertained that we have
3372 single precision setting - and in a post-reload optimization
3373 we changed this to do a constant load. In that case
3374 we don't have an r0 clobber, hence we must use fldi. */
3375 && (! TARGET_SH4 || TARGET_FMOVD
3376 || (GET_CODE (XEXP (XVECEXP (PATTERN (insn), 0, 2), 0))
3377 == SCRATCH))
3378 && GET_CODE (SET_DEST (pat)) == REG
3379 && FP_REGISTER_P (REGNO (SET_DEST (pat))))
3380 && ! (TARGET_SH2A
3381 && GET_MODE (SET_DEST (pat)) == SImode
3382 && GET_CODE (SET_SRC (pat)) == CONST_INT
3383 && CONST_OK_FOR_I20 (INTVAL (SET_SRC (pat))))
3384 && (GET_CODE (SET_SRC (pat)) != CONST_INT
3385 || ! CONST_OK_FOR_I08 (INTVAL (SET_SRC (pat)))))
3386 return 1;
3389 return 0;
3392 static int
3393 mova_p (rtx insn)
3395 return (GET_CODE (insn) == INSN
3396 && GET_CODE (PATTERN (insn)) == SET
3397 && GET_CODE (SET_SRC (PATTERN (insn))) == UNSPEC
3398 && XINT (SET_SRC (PATTERN (insn)), 1) == UNSPEC_MOVA
3399 /* Don't match mova_const. */
3400 && GET_CODE (XVECEXP (SET_SRC (PATTERN (insn)), 0, 0)) == LABEL_REF);
3403 /* Fix up a mova from a switch that went out of range. */
3404 static void
3405 fixup_mova (rtx mova)
3407 if (! flag_pic)
3409 SET_SRC (PATTERN (mova)) = XVECEXP (SET_SRC (PATTERN (mova)), 0, 0);
3410 INSN_CODE (mova) = -1;
3412 else
3414 rtx worker = mova;
3415 rtx lab = gen_label_rtx ();
3416 rtx wpat, wpat0, wpat1, wsrc, diff;
3420 worker = NEXT_INSN (worker);
3421 gcc_assert (worker
3422 && GET_CODE (worker) != CODE_LABEL
3423 && GET_CODE (worker) != JUMP_INSN);
3424 } while (GET_CODE (worker) == NOTE
3425 || recog_memoized (worker) != CODE_FOR_casesi_worker_1);
3426 wpat = PATTERN (worker);
3427 wpat0 = XVECEXP (wpat, 0, 0);
3428 wpat1 = XVECEXP (wpat, 0, 1);
3429 wsrc = SET_SRC (wpat0);
3430 PATTERN (worker) = (gen_casesi_worker_2
3431 (SET_DEST (wpat0), XVECEXP (wsrc, 0, 1),
3432 XEXP (XVECEXP (wsrc, 0, 2), 0), lab,
3433 XEXP (wpat1, 0)));
3434 INSN_CODE (worker) = -1;
3435 diff = gen_rtx_MINUS (Pmode, XVECEXP (SET_SRC (PATTERN (mova)), 0, 0),
3436 gen_rtx_LABEL_REF (Pmode, lab));
3437 diff = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, diff), UNSPEC_PIC);
3438 SET_SRC (PATTERN (mova)) = gen_rtx_CONST (Pmode, diff);
3439 INSN_CODE (mova) = -1;
3443 /* Find the last barrier from insn FROM which is close enough to hold the
3444 constant pool. If we can't find one, then create one near the end of
3445 the range. */
3447 static rtx
3448 find_barrier (int num_mova, rtx mova, rtx from)
3450 int count_si = 0;
3451 int count_hi = 0;
3452 int found_hi = 0;
3453 int found_si = 0;
3454 int found_di = 0;
3455 int hi_align = 2;
3456 int si_align = 2;
3457 int leading_mova = num_mova;
3458 rtx barrier_before_mova = 0, found_barrier = 0, good_barrier = 0;
3459 int si_limit;
3460 int hi_limit;
3462 /* For HImode: range is 510, add 4 because pc counts from address of
3463 second instruction after this one, subtract 2 for the jump instruction
3464 that we may need to emit before the table, subtract 2 for the instruction
3465 that fills the jump delay slot (in very rare cases, reorg will take an
3466 instruction from after the constant pool or will leave the delay slot
3467 empty). This gives 510.
3468 For SImode: range is 1020, add 4 because pc counts from address of
3469 second instruction after this one, subtract 2 in case pc is 2 byte
3470 aligned, subtract 2 for the jump instruction that we may need to emit
3471 before the table, subtract 2 for the instruction that fills the jump
3472 delay slot. This gives 1018. */
3474 /* The branch will always be shortened now that the reference address for
3475 forward branches is the successor address, thus we need no longer make
3476 adjustments to the [sh]i_limit for -O0. */
3478 si_limit = 1018;
3479 hi_limit = 510;
3481 while (from && count_si < si_limit && count_hi < hi_limit)
3483 int inc = get_attr_length (from);
3484 int new_align = 1;
3486 if (GET_CODE (from) == CODE_LABEL)
3488 if (optimize)
3489 new_align = 1 << label_to_alignment (from);
3490 else if (GET_CODE (prev_nonnote_insn (from)) == BARRIER)
3491 new_align = 1 << barrier_align (from);
3492 else
3493 new_align = 1;
3494 inc = 0;
3497 if (GET_CODE (from) == BARRIER)
3500 found_barrier = from;
3502 /* If we are at the end of the function, or in front of an alignment
3503 instruction, we need not insert an extra alignment. We prefer
3504 this kind of barrier. */
3505 if (barrier_align (from) > 2)
3506 good_barrier = from;
3509 if (broken_move (from))
3511 rtx pat, src, dst;
3512 enum machine_mode mode;
3514 pat = PATTERN (from);
3515 if (GET_CODE (pat) == PARALLEL)
3516 pat = XVECEXP (pat, 0, 0);
3517 src = SET_SRC (pat);
3518 dst = SET_DEST (pat);
3519 mode = GET_MODE (dst);
3521 /* We must explicitly check the mode, because sometimes the
3522 front end will generate code to load unsigned constants into
3523 HImode targets without properly sign extending them. */
3524 if (mode == HImode
3525 || (mode == SImode && hi_const (src) && REGNO (dst) != FPUL_REG))
3527 found_hi += 2;
3528 /* We put the short constants before the long constants, so
3529 we must count the length of short constants in the range
3530 for the long constants. */
3531 /* ??? This isn't optimal, but is easy to do. */
3532 si_limit -= 2;
3534 else
3536 /* We dump DF/DI constants before SF/SI ones, because
3537 the limit is the same, but the alignment requirements
3538 are higher. We may waste up to 4 additional bytes
3539 for alignment, and the DF/DI constant may have
3540 another SF/SI constant placed before it. */
3541 if (TARGET_SHCOMPACT
3542 && ! found_di
3543 && (mode == DFmode || mode == DImode))
3545 found_di = 1;
3546 si_limit -= 8;
3548 while (si_align > 2 && found_si + si_align - 2 > count_si)
3549 si_align >>= 1;
3550 if (found_si > count_si)
3551 count_si = found_si;
3552 found_si += GET_MODE_SIZE (mode);
3553 if (num_mova)
3554 si_limit -= GET_MODE_SIZE (mode);
3558 if (mova_p (from))
3560 if (! num_mova++)
3562 leading_mova = 0;
3563 mova = from;
3564 barrier_before_mova = good_barrier ? good_barrier : found_barrier;
3566 if (found_si > count_si)
3567 count_si = found_si;
3569 else if (GET_CODE (from) == JUMP_INSN
3570 && (GET_CODE (PATTERN (from)) == ADDR_VEC
3571 || GET_CODE (PATTERN (from)) == ADDR_DIFF_VEC))
3573 if (num_mova)
3574 num_mova--;
3575 if (barrier_align (next_real_insn (from)) == align_jumps_log)
3577 /* We have just passed the barrier in front of the
3578 ADDR_DIFF_VEC, which is stored in found_barrier. Since
3579 the ADDR_DIFF_VEC is accessed as data, just like our pool
3580 constants, this is a good opportunity to accommodate what
3581 we have gathered so far.
3582 If we waited any longer, we could end up at a barrier in
3583 front of code, which gives worse cache usage for separated
3584 instruction / data caches. */
3585 good_barrier = found_barrier;
3586 break;
3588 else
3590 rtx body = PATTERN (from);
3591 inc = XVECLEN (body, 1) * GET_MODE_SIZE (GET_MODE (body));
3594 /* For the SH1, we generate alignments even after jumps-around-jumps. */
3595 else if (GET_CODE (from) == JUMP_INSN
3596 && ! TARGET_SH2
3597 && ! TARGET_SMALLCODE)
3598 new_align = 4;
3600 if (found_si)
3602 count_si += inc;
3603 if (new_align > si_align)
3605 si_limit -= (count_si - 1) & (new_align - si_align);
3606 si_align = new_align;
3608 count_si = (count_si + new_align - 1) & -new_align;
3610 if (found_hi)
3612 count_hi += inc;
3613 if (new_align > hi_align)
3615 hi_limit -= (count_hi - 1) & (new_align - hi_align);
3616 hi_align = new_align;
3618 count_hi = (count_hi + new_align - 1) & -new_align;
3620 from = NEXT_INSN (from);
3623 if (num_mova)
3625 if (leading_mova)
3627 /* Try as we might, the leading mova is out of range. Change
3628 it into a load (which will become a pcload) and retry. */
3629 fixup_mova (mova);
3630 return find_barrier (0, 0, mova);
3632 else
3634 /* Insert the constant pool table before the mova instruction,
3635 to prevent the mova label reference from going out of range. */
3636 from = mova;
3637 good_barrier = found_barrier = barrier_before_mova;
3641 if (found_barrier)
3643 if (good_barrier && next_real_insn (found_barrier))
3644 found_barrier = good_barrier;
3646 else
3648 /* We didn't find a barrier in time to dump our stuff,
3649 so we'll make one. */
3650 rtx label = gen_label_rtx ();
3652 /* If we exceeded the range, then we must back up over the last
3653 instruction we looked at. Otherwise, we just need to undo the
3654 NEXT_INSN at the end of the loop. */
3655 if (count_hi > hi_limit || count_si > si_limit)
3656 from = PREV_INSN (PREV_INSN (from));
3657 else
3658 from = PREV_INSN (from);
3660 /* Walk back to be just before any jump or label.
3661 Putting it before a label reduces the number of times the branch
3662 around the constant pool table will be hit. Putting it before
3663 a jump makes it more likely that the bra delay slot will be
3664 filled. */
3665 while (GET_CODE (from) == JUMP_INSN || GET_CODE (from) == NOTE
3666 || GET_CODE (from) == CODE_LABEL)
3667 from = PREV_INSN (from);
3669 from = emit_jump_insn_after (gen_jump (label), from);
3670 JUMP_LABEL (from) = label;
3671 LABEL_NUSES (label) = 1;
3672 found_barrier = emit_barrier_after (from);
3673 emit_label_after (label, found_barrier);
3676 return found_barrier;
3679 /* If the instruction INSN is implemented by a special function, and we can
3680 positively find the register that is used to call the sfunc, and this
3681 register is not used anywhere else in this instruction - except as the
3682 destination of a set, return this register; else, return 0. */
3684 sfunc_uses_reg (rtx insn)
3686 int i;
3687 rtx pattern, part, reg_part, reg;
3689 if (GET_CODE (insn) != INSN)
3690 return 0;
3691 pattern = PATTERN (insn);
3692 if (GET_CODE (pattern) != PARALLEL || get_attr_type (insn) != TYPE_SFUNC)
3693 return 0;
3695 for (reg_part = 0, i = XVECLEN (pattern, 0) - 1; i >= 1; i--)
3697 part = XVECEXP (pattern, 0, i);
3698 if (GET_CODE (part) == USE && GET_MODE (XEXP (part, 0)) == SImode)
3699 reg_part = part;
3701 if (! reg_part)
3702 return 0;
3703 reg = XEXP (reg_part, 0);
3704 for (i = XVECLEN (pattern, 0) - 1; i >= 0; i--)
3706 part = XVECEXP (pattern, 0, i);
3707 if (part == reg_part || GET_CODE (part) == CLOBBER)
3708 continue;
3709 if (reg_mentioned_p (reg, ((GET_CODE (part) == SET
3710 && GET_CODE (SET_DEST (part)) == REG)
3711 ? SET_SRC (part) : part)))
3712 return 0;
3714 return reg;
3717 /* See if the only way in which INSN uses REG is by calling it, or by
3718 setting it while calling it. Set *SET to a SET rtx if the register
3719 is set by INSN. */
3721 static int
3722 noncall_uses_reg (rtx reg, rtx insn, rtx *set)
3724 rtx pattern, reg2;
3726 *set = NULL_RTX;
3728 reg2 = sfunc_uses_reg (insn);
3729 if (reg2 && REGNO (reg2) == REGNO (reg))
3731 pattern = single_set (insn);
3732 if (pattern
3733 && GET_CODE (SET_DEST (pattern)) == REG
3734 && REGNO (reg) == REGNO (SET_DEST (pattern)))
3735 *set = pattern;
3736 return 0;
3738 if (GET_CODE (insn) != CALL_INSN)
3740 /* We don't use rtx_equal_p because we don't care if the mode is
3741 different. */
3742 pattern = single_set (insn);
3743 if (pattern
3744 && GET_CODE (SET_DEST (pattern)) == REG
3745 && REGNO (reg) == REGNO (SET_DEST (pattern)))
3747 rtx par, part;
3748 int i;
3750 *set = pattern;
3751 par = PATTERN (insn);
3752 if (GET_CODE (par) == PARALLEL)
3753 for (i = XVECLEN (par, 0) - 1; i >= 0; i--)
3755 part = XVECEXP (par, 0, i);
3756 if (GET_CODE (part) != SET && reg_mentioned_p (reg, part))
3757 return 1;
3759 return reg_mentioned_p (reg, SET_SRC (pattern));
3762 return 1;
3765 pattern = PATTERN (insn);
3767 if (GET_CODE (pattern) == PARALLEL)
3769 int i;
3771 for (i = XVECLEN (pattern, 0) - 1; i >= 1; i--)
3772 if (reg_mentioned_p (reg, XVECEXP (pattern, 0, i)))
3773 return 1;
3774 pattern = XVECEXP (pattern, 0, 0);
3777 if (GET_CODE (pattern) == SET)
3779 if (reg_mentioned_p (reg, SET_DEST (pattern)))
3781 /* We don't use rtx_equal_p, because we don't care if the
3782 mode is different. */
3783 if (GET_CODE (SET_DEST (pattern)) != REG
3784 || REGNO (reg) != REGNO (SET_DEST (pattern)))
3785 return 1;
3787 *set = pattern;
3790 pattern = SET_SRC (pattern);
3793 if (GET_CODE (pattern) != CALL
3794 || GET_CODE (XEXP (pattern, 0)) != MEM
3795 || ! rtx_equal_p (reg, XEXP (XEXP (pattern, 0), 0)))
3796 return 1;
3798 return 0;
3801 /* Given a X, a pattern of an insn or a part of it, return a mask of used
3802 general registers. Bits 0..15 mean that the respective registers
3803 are used as inputs in the instruction. Bits 16..31 mean that the
3804 registers 0..15, respectively, are used as outputs, or are clobbered.
3805 IS_DEST should be set to 16 if X is the destination of a SET, else to 0. */
3807 regs_used (rtx x, int is_dest)
3809 enum rtx_code code;
3810 const char *fmt;
3811 int i, used = 0;
3813 if (! x)
3814 return used;
3815 code = GET_CODE (x);
3816 switch (code)
3818 case REG:
3819 if (REGNO (x) < 16)
3820 return (((1 << HARD_REGNO_NREGS (0, GET_MODE (x))) - 1)
3821 << (REGNO (x) + is_dest));
3822 return 0;
3823 case SUBREG:
3825 rtx y = SUBREG_REG (x);
3827 if (GET_CODE (y) != REG)
3828 break;
3829 if (REGNO (y) < 16)
3830 return (((1 << HARD_REGNO_NREGS (0, GET_MODE (x))) - 1)
3831 << (REGNO (y) +
3832 subreg_regno_offset (REGNO (y),
3833 GET_MODE (y),
3834 SUBREG_BYTE (x),
3835 GET_MODE (x)) + is_dest));
3836 return 0;
3838 case SET:
3839 return regs_used (SET_SRC (x), 0) | regs_used (SET_DEST (x), 16);
3840 case RETURN:
3841 /* If there was a return value, it must have been indicated with USE. */
3842 return 0x00ffff00;
3843 case CLOBBER:
3844 is_dest = 1;
3845 break;
3846 case MEM:
3847 is_dest = 0;
3848 break;
3849 case CALL:
3850 used |= 0x00ff00f0;
3851 break;
3852 default:
3853 break;
3856 fmt = GET_RTX_FORMAT (code);
3858 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
3860 if (fmt[i] == 'E')
3862 register int j;
3863 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
3864 used |= regs_used (XVECEXP (x, i, j), is_dest);
3866 else if (fmt[i] == 'e')
3867 used |= regs_used (XEXP (x, i), is_dest);
3869 return used;
3872 /* Create an instruction that prevents redirection of a conditional branch
3873 to the destination of the JUMP with address ADDR.
3874 If the branch needs to be implemented as an indirect jump, try to find
3875 a scratch register for it.
3876 If NEED_BLOCK is 0, don't do anything unless we need a scratch register.
3877 If any preceding insn that doesn't fit into a delay slot is good enough,
3878 pass 1. Pass 2 if a definite blocking insn is needed.
3879 -1 is used internally to avoid deep recursion.
3880 If a blocking instruction is made or recognized, return it. */
3882 static rtx
3883 gen_block_redirect (rtx jump, int addr, int need_block)
3885 int dead = 0;
3886 rtx prev = prev_nonnote_insn (jump);
3887 rtx dest;
3889 /* First, check if we already have an instruction that satisfies our need. */
3890 if (prev && GET_CODE (prev) == INSN && ! INSN_DELETED_P (prev))
3892 if (INSN_CODE (prev) == CODE_FOR_indirect_jump_scratch)
3893 return prev;
3894 if (GET_CODE (PATTERN (prev)) == USE
3895 || GET_CODE (PATTERN (prev)) == CLOBBER
3896 || get_attr_in_delay_slot (prev) == IN_DELAY_SLOT_YES)
3897 prev = jump;
3898 else if ((need_block &= ~1) < 0)
3899 return prev;
3900 else if (recog_memoized (prev) == CODE_FOR_block_branch_redirect)
3901 need_block = 0;
3903 if (GET_CODE (PATTERN (jump)) == RETURN)
3905 if (! need_block)
3906 return prev;
3907 /* Reorg even does nasty things with return insns that cause branches
3908 to go out of range - see find_end_label and callers. */
3909 return emit_insn_before (gen_block_branch_redirect (const0_rtx) , jump);
3911 /* We can't use JUMP_LABEL here because it might be undefined
3912 when not optimizing. */
3913 dest = XEXP (SET_SRC (PATTERN (jump)), 0);
3914 /* If the branch is out of range, try to find a scratch register for it. */
3915 if (optimize
3916 && (INSN_ADDRESSES (INSN_UID (dest)) - addr + (unsigned) 4092
3917 > 4092 + 4098))
3919 rtx scan;
3920 /* Don't look for the stack pointer as a scratch register,
3921 it would cause trouble if an interrupt occurred. */
3922 unsigned try = 0x7fff, used;
3923 int jump_left = flag_expensive_optimizations + 1;
3925 /* It is likely that the most recent eligible instruction is wanted for
3926 the delay slot. Therefore, find out which registers it uses, and
3927 try to avoid using them. */
3929 for (scan = jump; (scan = PREV_INSN (scan)); )
3931 enum rtx_code code;
3933 if (INSN_DELETED_P (scan))
3934 continue;
3935 code = GET_CODE (scan);
3936 if (code == CODE_LABEL || code == JUMP_INSN)
3937 break;
3938 if (code == INSN
3939 && GET_CODE (PATTERN (scan)) != USE
3940 && GET_CODE (PATTERN (scan)) != CLOBBER
3941 && get_attr_in_delay_slot (scan) == IN_DELAY_SLOT_YES)
3943 try &= ~regs_used (PATTERN (scan), 0);
3944 break;
3947 for (used = dead = 0, scan = JUMP_LABEL (jump);
3948 (scan = NEXT_INSN (scan)); )
3950 enum rtx_code code;
3952 if (INSN_DELETED_P (scan))
3953 continue;
3954 code = GET_CODE (scan);
3955 if (INSN_P (scan))
3957 used |= regs_used (PATTERN (scan), 0);
3958 if (code == CALL_INSN)
3959 used |= regs_used (CALL_INSN_FUNCTION_USAGE (scan), 0);
3960 dead |= (used >> 16) & ~used;
3961 if (dead & try)
3963 dead &= try;
3964 break;
3966 if (code == JUMP_INSN)
3968 if (jump_left-- && simplejump_p (scan))
3969 scan = JUMP_LABEL (scan);
3970 else
3971 break;
3975 /* Mask out the stack pointer again, in case it was
3976 the only 'free' register we have found. */
3977 dead &= 0x7fff;
3979 /* If the immediate destination is still in range, check for possible
3980 threading with a jump beyond the delay slot insn.
3981 Don't check if we are called recursively; the jump has been or will be
3982 checked in a different invocation then. */
3984 else if (optimize && need_block >= 0)
3986 rtx next = next_active_insn (next_active_insn (dest));
3987 if (next && GET_CODE (next) == JUMP_INSN
3988 && GET_CODE (PATTERN (next)) == SET
3989 && recog_memoized (next) == CODE_FOR_jump_compact)
3991 dest = JUMP_LABEL (next);
3992 if (dest
3993 && (INSN_ADDRESSES (INSN_UID (dest)) - addr + (unsigned) 4092
3994 > 4092 + 4098))
3995 gen_block_redirect (next, INSN_ADDRESSES (INSN_UID (next)), -1);
3999 if (dead)
4001 rtx reg = gen_rtx_REG (SImode, exact_log2 (dead & -dead));
4003 /* It would be nice if we could convert the jump into an indirect
4004 jump / far branch right now, and thus exposing all constituent
4005 instructions to further optimization. However, reorg uses
4006 simplejump_p to determine if there is an unconditional jump where
4007 it should try to schedule instructions from the target of the
4008 branch; simplejump_p fails for indirect jumps even if they have
4009 a JUMP_LABEL. */
4010 rtx insn = emit_insn_before (gen_indirect_jump_scratch
4011 (reg, GEN_INT (INSN_UID (JUMP_LABEL (jump))))
4012 , jump);
4013 /* ??? We would like this to have the scope of the jump, but that
4014 scope will change when a delay slot insn of an inner scope is added.
4015 Hence, after delay slot scheduling, we'll have to expect
4016 NOTE_INSN_BLOCK_END notes between the indirect_jump_scratch and
4017 the jump. */
4019 INSN_LOCATOR (insn) = INSN_LOCATOR (jump);
4020 INSN_CODE (insn) = CODE_FOR_indirect_jump_scratch;
4021 return insn;
4023 else if (need_block)
4024 /* We can't use JUMP_LABEL here because it might be undefined
4025 when not optimizing. */
4026 return emit_insn_before (gen_block_branch_redirect
4027 (GEN_INT (INSN_UID (XEXP (SET_SRC (PATTERN (jump)), 0))))
4028 , jump);
4029 return prev;
4032 #define CONDJUMP_MIN -252
4033 #define CONDJUMP_MAX 262
4034 struct far_branch
4036 /* A label (to be placed) in front of the jump
4037 that jumps to our ultimate destination. */
4038 rtx near_label;
4039 /* Where we are going to insert it if we cannot move the jump any farther,
4040 or the jump itself if we have picked up an existing jump. */
4041 rtx insert_place;
4042 /* The ultimate destination. */
4043 rtx far_label;
4044 struct far_branch *prev;
4045 /* If the branch has already been created, its address;
4046 else the address of its first prospective user. */
4047 int address;
4050 static void gen_far_branch (struct far_branch *);
4051 enum mdep_reorg_phase_e mdep_reorg_phase;
4052 static void
4053 gen_far_branch (struct far_branch *bp)
4055 rtx insn = bp->insert_place;
4056 rtx jump;
4057 rtx label = gen_label_rtx ();
4058 int ok;
4060 emit_label_after (label, insn);
4061 if (bp->far_label)
4063 jump = emit_jump_insn_after (gen_jump (bp->far_label), insn);
4064 LABEL_NUSES (bp->far_label)++;
4066 else
4067 jump = emit_jump_insn_after (gen_return (), insn);
4068 /* Emit a barrier so that reorg knows that any following instructions
4069 are not reachable via a fall-through path.
4070 But don't do this when not optimizing, since we wouldn't suppress the
4071 alignment for the barrier then, and could end up with out-of-range
4072 pc-relative loads. */
4073 if (optimize)
4074 emit_barrier_after (jump);
4075 emit_label_after (bp->near_label, insn);
4076 JUMP_LABEL (jump) = bp->far_label;
4077 ok = invert_jump (insn, label, 1);
4078 gcc_assert (ok);
4080 /* If we are branching around a jump (rather than a return), prevent
4081 reorg from using an insn from the jump target as the delay slot insn -
4082 when reorg did this, it pessimized code (we rather hide the delay slot)
4083 and it could cause branches to go out of range. */
4084 if (bp->far_label)
4085 (emit_insn_after
4086 (gen_stuff_delay_slot
4087 (GEN_INT (INSN_UID (XEXP (SET_SRC (PATTERN (jump)), 0))),
4088 GEN_INT (recog_memoized (insn) == CODE_FOR_branch_false)),
4089 insn));
4090 /* Prevent reorg from undoing our splits. */
4091 gen_block_redirect (jump, bp->address += 2, 2);
4094 /* Fix up ADDR_DIFF_VECs. */
4095 void
4096 fixup_addr_diff_vecs (rtx first)
4098 rtx insn;
4100 for (insn = first; insn; insn = NEXT_INSN (insn))
4102 rtx vec_lab, pat, prev, prevpat, x, braf_label;
4104 if (GET_CODE (insn) != JUMP_INSN
4105 || GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
4106 continue;
4107 pat = PATTERN (insn);
4108 vec_lab = XEXP (XEXP (pat, 0), 0);
4110 /* Search the matching casesi_jump_2. */
4111 for (prev = vec_lab; ; prev = PREV_INSN (prev))
4113 if (GET_CODE (prev) != JUMP_INSN)
4114 continue;
4115 prevpat = PATTERN (prev);
4116 if (GET_CODE (prevpat) != PARALLEL || XVECLEN (prevpat, 0) != 2)
4117 continue;
4118 x = XVECEXP (prevpat, 0, 1);
4119 if (GET_CODE (x) != USE)
4120 continue;
4121 x = XEXP (x, 0);
4122 if (GET_CODE (x) == LABEL_REF && XEXP (x, 0) == vec_lab)
4123 break;
4125 /* FIXME: This is a bug in the optimizer, but it seems harmless
4126 to just avoid panicing. */
4127 if (!prev)
4128 continue;
4130 /* Emit the reference label of the braf where it belongs, right after
4131 the casesi_jump_2 (i.e. braf). */
4132 braf_label = XEXP (XEXP (SET_SRC (XVECEXP (prevpat, 0, 0)), 1), 0);
4133 emit_label_after (braf_label, prev);
4135 /* Fix up the ADDR_DIF_VEC to be relative
4136 to the reference address of the braf. */
4137 XEXP (XEXP (pat, 0), 0) = braf_label;
4141 /* BARRIER_OR_LABEL is either a BARRIER or a CODE_LABEL immediately following
4142 a barrier. Return the base 2 logarithm of the desired alignment. */
4144 barrier_align (rtx barrier_or_label)
4146 rtx next = next_real_insn (barrier_or_label), pat, prev;
4147 int slot, credit, jump_to_next = 0;
4149 if (! next)
4150 return 0;
4152 pat = PATTERN (next);
4154 if (GET_CODE (pat) == ADDR_DIFF_VEC)
4155 return 2;
4157 if (GET_CODE (pat) == UNSPEC_VOLATILE && XINT (pat, 1) == UNSPECV_ALIGN)
4158 /* This is a barrier in front of a constant table. */
4159 return 0;
4161 prev = prev_real_insn (barrier_or_label);
4162 if (GET_CODE (PATTERN (prev)) == ADDR_DIFF_VEC)
4164 pat = PATTERN (prev);
4165 /* If this is a very small table, we want to keep the alignment after
4166 the table to the minimum for proper code alignment. */
4167 return ((TARGET_SMALLCODE
4168 || ((unsigned) XVECLEN (pat, 1) * GET_MODE_SIZE (GET_MODE (pat))
4169 <= (unsigned) 1 << (CACHE_LOG - 2)))
4170 ? 1 << TARGET_SHMEDIA : align_jumps_log);
4173 if (TARGET_SMALLCODE)
4174 return 0;
4176 if (! TARGET_SH2 || ! optimize)
4177 return align_jumps_log;
4179 /* When fixing up pcloads, a constant table might be inserted just before
4180 the basic block that ends with the barrier. Thus, we can't trust the
4181 instruction lengths before that. */
4182 if (mdep_reorg_phase > SH_FIXUP_PCLOAD)
4184 /* Check if there is an immediately preceding branch to the insn beyond
4185 the barrier. We must weight the cost of discarding useful information
4186 from the current cache line when executing this branch and there is
4187 an alignment, against that of fetching unneeded insn in front of the
4188 branch target when there is no alignment. */
4190 /* There are two delay_slot cases to consider. One is the simple case
4191 where the preceding branch is to the insn beyond the barrier (simple
4192 delay slot filling), and the other is where the preceding branch has
4193 a delay slot that is a duplicate of the insn after the barrier
4194 (fill_eager_delay_slots) and the branch is to the insn after the insn
4195 after the barrier. */
4197 /* PREV is presumed to be the JUMP_INSN for the barrier under
4198 investigation. Skip to the insn before it. */
4199 prev = prev_real_insn (prev);
4201 for (slot = 2, credit = (1 << (CACHE_LOG - 2)) + 2;
4202 credit >= 0 && prev && GET_CODE (prev) == INSN;
4203 prev = prev_real_insn (prev))
4205 jump_to_next = 0;
4206 if (GET_CODE (PATTERN (prev)) == USE
4207 || GET_CODE (PATTERN (prev)) == CLOBBER)
4208 continue;
4209 if (GET_CODE (PATTERN (prev)) == SEQUENCE)
4211 prev = XVECEXP (PATTERN (prev), 0, 1);
4212 if (INSN_UID (prev) == INSN_UID (next))
4214 /* Delay slot was filled with insn at jump target. */
4215 jump_to_next = 1;
4216 continue;
4220 if (slot &&
4221 get_attr_in_delay_slot (prev) == IN_DELAY_SLOT_YES)
4222 slot = 0;
4223 credit -= get_attr_length (prev);
4225 if (prev
4226 && GET_CODE (prev) == JUMP_INSN
4227 && JUMP_LABEL (prev))
4229 rtx x;
4230 if (jump_to_next
4231 || next_real_insn (JUMP_LABEL (prev)) == next
4232 /* If relax_delay_slots() decides NEXT was redundant
4233 with some previous instruction, it will have
4234 redirected PREV's jump to the following insn. */
4235 || JUMP_LABEL (prev) == next_nonnote_insn (next)
4236 /* There is no upper bound on redundant instructions
4237 that might have been skipped, but we must not put an
4238 alignment where none had been before. */
4239 || (x = (NEXT_INSN (NEXT_INSN (PREV_INSN (prev)))),
4240 (INSN_P (x)
4241 && (INSN_CODE (x) == CODE_FOR_block_branch_redirect
4242 || INSN_CODE (x) == CODE_FOR_indirect_jump_scratch
4243 || INSN_CODE (x) == CODE_FOR_stuff_delay_slot))))
4245 rtx pat = PATTERN (prev);
4246 if (GET_CODE (pat) == PARALLEL)
4247 pat = XVECEXP (pat, 0, 0);
4248 if (credit - slot >= (GET_CODE (SET_SRC (pat)) == PC ? 2 : 0))
4249 return 0;
4254 return align_jumps_log;
4257 /* If we are inside a phony loop, almost any kind of label can turn up as the
4258 first one in the loop. Aligning a braf label causes incorrect switch
4259 destination addresses; we can detect braf labels because they are
4260 followed by a BARRIER.
4261 Applying loop alignment to small constant or switch tables is a waste
4262 of space, so we suppress this too. */
4264 sh_loop_align (rtx label)
4266 rtx next = label;
4269 next = next_nonnote_insn (next);
4270 while (next && GET_CODE (next) == CODE_LABEL);
4272 if (! next
4273 || ! INSN_P (next)
4274 || GET_CODE (PATTERN (next)) == ADDR_DIFF_VEC
4275 || recog_memoized (next) == CODE_FOR_consttable_2)
4276 return 0;
4278 return align_loops_log;
4281 /* Do a final pass over the function, just before delayed branch
4282 scheduling. */
4284 static void
4285 sh_reorg (void)
4287 rtx first, insn, mova = NULL_RTX;
4288 int num_mova;
4289 rtx r0_rtx = gen_rtx_REG (Pmode, 0);
4290 rtx r0_inc_rtx = gen_rtx_POST_INC (Pmode, r0_rtx);
4292 first = get_insns ();
4294 /* We must split call insns before introducing `mova's. If we're
4295 optimizing, they'll have already been split. Otherwise, make
4296 sure we don't split them too late. */
4297 if (! optimize)
4298 split_all_insns_noflow ();
4300 if (TARGET_SHMEDIA)
4301 return;
4303 /* If relaxing, generate pseudo-ops to associate function calls with
4304 the symbols they call. It does no harm to not generate these
4305 pseudo-ops. However, when we can generate them, it enables to
4306 linker to potentially relax the jsr to a bsr, and eliminate the
4307 register load and, possibly, the constant pool entry. */
4309 mdep_reorg_phase = SH_INSERT_USES_LABELS;
4310 if (TARGET_RELAX)
4312 /* Remove all REG_LABEL notes. We want to use them for our own
4313 purposes. This works because none of the remaining passes
4314 need to look at them.
4316 ??? But it may break in the future. We should use a machine
4317 dependent REG_NOTE, or some other approach entirely. */
4318 for (insn = first; insn; insn = NEXT_INSN (insn))
4320 if (INSN_P (insn))
4322 rtx note;
4324 while ((note = find_reg_note (insn, REG_LABEL, NULL_RTX)) != 0)
4325 remove_note (insn, note);
4329 for (insn = first; insn; insn = NEXT_INSN (insn))
4331 rtx pattern, reg, link, set, scan, dies, label;
4332 int rescan = 0, foundinsn = 0;
4334 if (GET_CODE (insn) == CALL_INSN)
4336 pattern = PATTERN (insn);
4338 if (GET_CODE (pattern) == PARALLEL)
4339 pattern = XVECEXP (pattern, 0, 0);
4340 if (GET_CODE (pattern) == SET)
4341 pattern = SET_SRC (pattern);
4343 if (GET_CODE (pattern) != CALL
4344 || GET_CODE (XEXP (pattern, 0)) != MEM)
4345 continue;
4347 reg = XEXP (XEXP (pattern, 0), 0);
4349 else
4351 reg = sfunc_uses_reg (insn);
4352 if (! reg)
4353 continue;
4356 if (GET_CODE (reg) != REG)
4357 continue;
4359 /* This is a function call via REG. If the only uses of REG
4360 between the time that it is set and the time that it dies
4361 are in function calls, then we can associate all the
4362 function calls with the setting of REG. */
4364 for (link = LOG_LINKS (insn); link; link = XEXP (link, 1))
4366 if (REG_NOTE_KIND (link) != 0)
4367 continue;
4368 set = single_set (XEXP (link, 0));
4369 if (set && rtx_equal_p (reg, SET_DEST (set)))
4371 link = XEXP (link, 0);
4372 break;
4376 if (! link)
4378 /* ??? Sometimes global register allocation will have
4379 deleted the insn pointed to by LOG_LINKS. Try
4380 scanning backward to find where the register is set. */
4381 for (scan = PREV_INSN (insn);
4382 scan && GET_CODE (scan) != CODE_LABEL;
4383 scan = PREV_INSN (scan))
4385 if (! INSN_P (scan))
4386 continue;
4388 if (! reg_mentioned_p (reg, scan))
4389 continue;
4391 if (noncall_uses_reg (reg, scan, &set))
4392 break;
4394 if (set)
4396 link = scan;
4397 break;
4402 if (! link)
4403 continue;
4405 /* The register is set at LINK. */
4407 /* We can only optimize the function call if the register is
4408 being set to a symbol. In theory, we could sometimes
4409 optimize calls to a constant location, but the assembler
4410 and linker do not support that at present. */
4411 if (GET_CODE (SET_SRC (set)) != SYMBOL_REF
4412 && GET_CODE (SET_SRC (set)) != LABEL_REF)
4413 continue;
4415 /* Scan forward from LINK to the place where REG dies, and
4416 make sure that the only insns which use REG are
4417 themselves function calls. */
4419 /* ??? This doesn't work for call targets that were allocated
4420 by reload, since there may not be a REG_DEAD note for the
4421 register. */
4423 dies = NULL_RTX;
4424 for (scan = NEXT_INSN (link); scan; scan = NEXT_INSN (scan))
4426 rtx scanset;
4428 /* Don't try to trace forward past a CODE_LABEL if we haven't
4429 seen INSN yet. Ordinarily, we will only find the setting insn
4430 in LOG_LINKS if it is in the same basic block. However,
4431 cross-jumping can insert code labels in between the load and
4432 the call, and can result in situations where a single call
4433 insn may have two targets depending on where we came from. */
4435 if (GET_CODE (scan) == CODE_LABEL && ! foundinsn)
4436 break;
4438 if (! INSN_P (scan))
4439 continue;
4441 /* Don't try to trace forward past a JUMP. To optimize
4442 safely, we would have to check that all the
4443 instructions at the jump destination did not use REG. */
4445 if (GET_CODE (scan) == JUMP_INSN)
4446 break;
4448 if (! reg_mentioned_p (reg, scan))
4449 continue;
4451 if (noncall_uses_reg (reg, scan, &scanset))
4452 break;
4454 if (scan == insn)
4455 foundinsn = 1;
4457 if (scan != insn
4458 && (GET_CODE (scan) == CALL_INSN || sfunc_uses_reg (scan)))
4460 /* There is a function call to this register other
4461 than the one we are checking. If we optimize
4462 this call, we need to rescan again below. */
4463 rescan = 1;
4466 /* ??? We shouldn't have to worry about SCANSET here.
4467 We should just be able to check for a REG_DEAD note
4468 on a function call. However, the REG_DEAD notes are
4469 apparently not dependable around libcalls; c-torture
4470 execute/920501-2 is a test case. If SCANSET is set,
4471 then this insn sets the register, so it must have
4472 died earlier. Unfortunately, this will only handle
4473 the cases in which the register is, in fact, set in a
4474 later insn. */
4476 /* ??? We shouldn't have to use FOUNDINSN here.
4477 However, the LOG_LINKS fields are apparently not
4478 entirely reliable around libcalls;
4479 newlib/libm/math/e_pow.c is a test case. Sometimes
4480 an insn will appear in LOG_LINKS even though it is
4481 not the most recent insn which sets the register. */
4483 if (foundinsn
4484 && (scanset
4485 || find_reg_note (scan, REG_DEAD, reg)))
4487 dies = scan;
4488 break;
4492 if (! dies)
4494 /* Either there was a branch, or some insn used REG
4495 other than as a function call address. */
4496 continue;
4499 /* Create a code label, and put it in a REG_LABEL note on
4500 the insn which sets the register, and on each call insn
4501 which uses the register. In final_prescan_insn we look
4502 for the REG_LABEL notes, and output the appropriate label
4503 or pseudo-op. */
4505 label = gen_label_rtx ();
4506 REG_NOTES (link) = gen_rtx_INSN_LIST (REG_LABEL, label,
4507 REG_NOTES (link));
4508 REG_NOTES (insn) = gen_rtx_INSN_LIST (REG_LABEL, label,
4509 REG_NOTES (insn));
4510 if (rescan)
4512 scan = link;
4515 rtx reg2;
4517 scan = NEXT_INSN (scan);
4518 if (scan != insn
4519 && ((GET_CODE (scan) == CALL_INSN
4520 && reg_mentioned_p (reg, scan))
4521 || ((reg2 = sfunc_uses_reg (scan))
4522 && REGNO (reg2) == REGNO (reg))))
4523 REG_NOTES (scan)
4524 = gen_rtx_INSN_LIST (REG_LABEL, label, REG_NOTES (scan));
4526 while (scan != dies);
4531 if (TARGET_SH2)
4532 fixup_addr_diff_vecs (first);
4534 if (optimize)
4536 mdep_reorg_phase = SH_SHORTEN_BRANCHES0;
4537 shorten_branches (first);
4539 /* Scan the function looking for move instructions which have to be
4540 changed to pc-relative loads and insert the literal tables. */
4542 mdep_reorg_phase = SH_FIXUP_PCLOAD;
4543 for (insn = first, num_mova = 0; insn; insn = NEXT_INSN (insn))
4545 if (mova_p (insn))
4547 /* ??? basic block reordering can move a switch table dispatch
4548 below the switch table. Check if that has happened.
4549 We only have the addresses available when optimizing; but then,
4550 this check shouldn't be needed when not optimizing. */
4551 rtx label_ref = XVECEXP (SET_SRC (PATTERN (insn)), 0, 0);
4552 if (optimize
4553 && (INSN_ADDRESSES (INSN_UID (insn))
4554 > INSN_ADDRESSES (INSN_UID (XEXP (label_ref, 0)))))
4556 /* Change the mova into a load.
4557 broken_move will then return true for it. */
4558 fixup_mova (insn);
4560 else if (! num_mova++)
4561 mova = insn;
4563 else if (GET_CODE (insn) == JUMP_INSN
4564 && GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC
4565 && num_mova)
4567 rtx scan;
4568 int total;
4570 num_mova--;
4572 /* Some code might have been inserted between the mova and
4573 its ADDR_DIFF_VEC. Check if the mova is still in range. */
4574 for (scan = mova, total = 0; scan != insn; scan = NEXT_INSN (scan))
4575 total += get_attr_length (scan);
4577 /* range of mova is 1020, add 4 because pc counts from address of
4578 second instruction after this one, subtract 2 in case pc is 2
4579 byte aligned. Possible alignment needed for the ADDR_DIFF_VEC
4580 cancels out with alignment effects of the mova itself. */
4581 if (total > 1022)
4583 /* Change the mova into a load, and restart scanning
4584 there. broken_move will then return true for mova. */
4585 fixup_mova (mova);
4586 insn = mova;
4589 if (broken_move (insn)
4590 || (GET_CODE (insn) == INSN
4591 && recog_memoized (insn) == CODE_FOR_casesi_worker_2))
4593 rtx scan;
4594 /* Scan ahead looking for a barrier to stick the constant table
4595 behind. */
4596 rtx barrier = find_barrier (num_mova, mova, insn);
4597 rtx last_float_move = NULL_RTX, last_float = 0, *last_float_addr = NULL;
4598 int need_aligned_label = 0;
4600 if (num_mova && ! mova_p (mova))
4602 /* find_barrier had to change the first mova into a
4603 pcload; thus, we have to start with this new pcload. */
4604 insn = mova;
4605 num_mova = 0;
4607 /* Now find all the moves between the points and modify them. */
4608 for (scan = insn; scan != barrier; scan = NEXT_INSN (scan))
4610 if (GET_CODE (scan) == CODE_LABEL)
4611 last_float = 0;
4612 if (GET_CODE (scan) == INSN
4613 && recog_memoized (scan) == CODE_FOR_casesi_worker_2)
4614 need_aligned_label = 1;
4615 if (broken_move (scan))
4617 rtx *patp = &PATTERN (scan), pat = *patp;
4618 rtx src, dst;
4619 rtx lab;
4620 rtx newsrc;
4621 enum machine_mode mode;
4623 if (GET_CODE (pat) == PARALLEL)
4624 patp = &XVECEXP (pat, 0, 0), pat = *patp;
4625 src = SET_SRC (pat);
4626 dst = SET_DEST (pat);
4627 mode = GET_MODE (dst);
4629 if (mode == SImode && hi_const (src)
4630 && REGNO (dst) != FPUL_REG)
4632 int offset = 0;
4634 mode = HImode;
4635 while (GET_CODE (dst) == SUBREG)
4637 offset += subreg_regno_offset (REGNO (SUBREG_REG (dst)),
4638 GET_MODE (SUBREG_REG (dst)),
4639 SUBREG_BYTE (dst),
4640 GET_MODE (dst));
4641 dst = SUBREG_REG (dst);
4643 dst = gen_rtx_REG (HImode, REGNO (dst) + offset);
4645 if (GET_CODE (dst) == REG && FP_ANY_REGISTER_P (REGNO (dst)))
4647 /* This must be an insn that clobbers r0. */
4648 rtx *clobberp = &XVECEXP (PATTERN (scan), 0,
4649 XVECLEN (PATTERN (scan), 0)
4650 - 1);
4651 rtx clobber = *clobberp;
4653 gcc_assert (GET_CODE (clobber) == CLOBBER
4654 && rtx_equal_p (XEXP (clobber, 0), r0_rtx));
4656 if (last_float
4657 && reg_set_between_p (r0_rtx, last_float_move, scan))
4658 last_float = 0;
4659 if (last_float
4660 && TARGET_SHCOMPACT
4661 && GET_MODE_SIZE (mode) != 4
4662 && GET_MODE_SIZE (GET_MODE (last_float)) == 4)
4663 last_float = 0;
4664 lab = add_constant (src, mode, last_float);
4665 if (lab)
4666 emit_insn_before (gen_mova (lab), scan);
4667 else
4669 /* There will be a REG_UNUSED note for r0 on
4670 LAST_FLOAT_MOVE; we have to change it to REG_INC,
4671 lest reorg:mark_target_live_regs will not
4672 consider r0 to be used, and we end up with delay
4673 slot insn in front of SCAN that clobbers r0. */
4674 rtx note
4675 = find_regno_note (last_float_move, REG_UNUSED, 0);
4677 /* If we are not optimizing, then there may not be
4678 a note. */
4679 if (note)
4680 PUT_MODE (note, REG_INC);
4682 *last_float_addr = r0_inc_rtx;
4684 last_float_move = scan;
4685 last_float = src;
4686 newsrc = gen_const_mem (mode,
4687 (((TARGET_SH4 && ! TARGET_FMOVD)
4688 || REGNO (dst) == FPUL_REG)
4689 ? r0_inc_rtx
4690 : r0_rtx));
4691 last_float_addr = &XEXP (newsrc, 0);
4693 /* Remove the clobber of r0. */
4694 *clobberp = gen_rtx_CLOBBER (GET_MODE (clobber),
4695 gen_rtx_SCRATCH (Pmode));
4697 /* This is a mova needing a label. Create it. */
4698 else if (GET_CODE (src) == UNSPEC
4699 && XINT (src, 1) == UNSPEC_MOVA
4700 && GET_CODE (XVECEXP (src, 0, 0)) == CONST)
4702 lab = add_constant (XVECEXP (src, 0, 0), mode, 0);
4703 newsrc = gen_rtx_LABEL_REF (VOIDmode, lab);
4704 newsrc = gen_rtx_UNSPEC (SImode,
4705 gen_rtvec (1, newsrc),
4706 UNSPEC_MOVA);
4708 else
4710 lab = add_constant (src, mode, 0);
4711 newsrc = gen_rtx_LABEL_REF (VOIDmode, lab);
4712 newsrc = gen_const_mem (mode, newsrc);
4714 *patp = gen_rtx_SET (VOIDmode, dst, newsrc);
4715 INSN_CODE (scan) = -1;
4718 dump_table (need_aligned_label ? insn : 0, barrier);
4719 insn = barrier;
4723 mdep_reorg_phase = SH_SHORTEN_BRANCHES1;
4724 INSN_ADDRESSES_FREE ();
4725 split_branches (first);
4727 /* The INSN_REFERENCES_ARE_DELAYED in sh.h is problematic because it
4728 also has an effect on the register that holds the address of the sfunc.
4729 Insert an extra dummy insn in front of each sfunc that pretends to
4730 use this register. */
4731 if (flag_delayed_branch)
4733 for (insn = first; insn; insn = NEXT_INSN (insn))
4735 rtx reg = sfunc_uses_reg (insn);
4737 if (! reg)
4738 continue;
4739 emit_insn_before (gen_use_sfunc_addr (reg), insn);
4742 #if 0
4743 /* fpscr is not actually a user variable, but we pretend it is for the
4744 sake of the previous optimization passes, since we want it handled like
4745 one. However, we don't have any debugging information for it, so turn
4746 it into a non-user variable now. */
4747 if (TARGET_SH4)
4748 REG_USERVAR_P (get_fpscr_rtx ()) = 0;
4749 #endif
4750 mdep_reorg_phase = SH_AFTER_MDEP_REORG;
4754 get_dest_uid (rtx label, int max_uid)
4756 rtx dest = next_real_insn (label);
4757 int dest_uid;
4758 if (! dest)
4759 /* This can happen for an undefined label. */
4760 return 0;
4761 dest_uid = INSN_UID (dest);
4762 /* If this is a newly created branch redirection blocking instruction,
4763 we cannot index the branch_uid or insn_addresses arrays with its
4764 uid. But then, we won't need to, because the actual destination is
4765 the following branch. */
4766 while (dest_uid >= max_uid)
4768 dest = NEXT_INSN (dest);
4769 dest_uid = INSN_UID (dest);
4771 if (GET_CODE (dest) == JUMP_INSN && GET_CODE (PATTERN (dest)) == RETURN)
4772 return 0;
4773 return dest_uid;
4776 /* Split condbranches that are out of range. Also add clobbers for
4777 scratch registers that are needed in far jumps.
4778 We do this before delay slot scheduling, so that it can take our
4779 newly created instructions into account. It also allows us to
4780 find branches with common targets more easily. */
4782 static void
4783 split_branches (rtx first)
4785 rtx insn;
4786 struct far_branch **uid_branch, *far_branch_list = 0;
4787 int max_uid = get_max_uid ();
4788 int ok;
4790 /* Find out which branches are out of range. */
4791 shorten_branches (first);
4793 uid_branch = (struct far_branch **) alloca (max_uid * sizeof *uid_branch);
4794 memset ((char *) uid_branch, 0, max_uid * sizeof *uid_branch);
4796 for (insn = first; insn; insn = NEXT_INSN (insn))
4797 if (! INSN_P (insn))
4798 continue;
4799 else if (INSN_DELETED_P (insn))
4801 /* Shorten_branches would split this instruction again,
4802 so transform it into a note. */
4803 PUT_CODE (insn, NOTE);
4804 NOTE_LINE_NUMBER (insn) = NOTE_INSN_DELETED;
4805 NOTE_SOURCE_FILE (insn) = 0;
4807 else if (GET_CODE (insn) == JUMP_INSN
4808 /* Don't mess with ADDR_DIFF_VEC */
4809 && (GET_CODE (PATTERN (insn)) == SET
4810 || GET_CODE (PATTERN (insn)) == RETURN))
4812 enum attr_type type = get_attr_type (insn);
4813 if (type == TYPE_CBRANCH)
4815 rtx next, beyond;
4817 if (get_attr_length (insn) > 4)
4819 rtx src = SET_SRC (PATTERN (insn));
4820 rtx olabel = XEXP (XEXP (src, 1), 0);
4821 int addr = INSN_ADDRESSES (INSN_UID (insn));
4822 rtx label = 0;
4823 int dest_uid = get_dest_uid (olabel, max_uid);
4824 struct far_branch *bp = uid_branch[dest_uid];
4826 /* redirect_jump needs a valid JUMP_LABEL, and it might delete
4827 the label if the LABEL_NUSES count drops to zero. There is
4828 always a jump_optimize pass that sets these values, but it
4829 proceeds to delete unreferenced code, and then if not
4830 optimizing, to un-delete the deleted instructions, thus
4831 leaving labels with too low uses counts. */
4832 if (! optimize)
4834 JUMP_LABEL (insn) = olabel;
4835 LABEL_NUSES (olabel)++;
4837 if (! bp)
4839 bp = (struct far_branch *) alloca (sizeof *bp);
4840 uid_branch[dest_uid] = bp;
4841 bp->prev = far_branch_list;
4842 far_branch_list = bp;
4843 bp->far_label
4844 = XEXP (XEXP (SET_SRC (PATTERN (insn)), 1), 0);
4845 LABEL_NUSES (bp->far_label)++;
4847 else
4849 label = bp->near_label;
4850 if (! label && bp->address - addr >= CONDJUMP_MIN)
4852 rtx block = bp->insert_place;
4854 if (GET_CODE (PATTERN (block)) == RETURN)
4855 block = PREV_INSN (block);
4856 else
4857 block = gen_block_redirect (block,
4858 bp->address, 2);
4859 label = emit_label_after (gen_label_rtx (),
4860 PREV_INSN (block));
4861 bp->near_label = label;
4863 else if (label && ! NEXT_INSN (label))
4865 if (addr + 2 - bp->address <= CONDJUMP_MAX)
4866 bp->insert_place = insn;
4867 else
4868 gen_far_branch (bp);
4871 if (! label
4872 || (NEXT_INSN (label) && bp->address - addr < CONDJUMP_MIN))
4874 bp->near_label = label = gen_label_rtx ();
4875 bp->insert_place = insn;
4876 bp->address = addr;
4878 ok = redirect_jump (insn, label, 1);
4879 gcc_assert (ok);
4881 else
4883 /* get_attr_length (insn) == 2 */
4884 /* Check if we have a pattern where reorg wants to redirect
4885 the branch to a label from an unconditional branch that
4886 is too far away. */
4887 /* We can't use JUMP_LABEL here because it might be undefined
4888 when not optimizing. */
4889 /* A syntax error might cause beyond to be NULL_RTX. */
4890 beyond
4891 = next_active_insn (XEXP (XEXP (SET_SRC (PATTERN (insn)), 1),
4892 0));
4894 if (beyond
4895 && (GET_CODE (beyond) == JUMP_INSN
4896 || ((beyond = next_active_insn (beyond))
4897 && GET_CODE (beyond) == JUMP_INSN))
4898 && GET_CODE (PATTERN (beyond)) == SET
4899 && recog_memoized (beyond) == CODE_FOR_jump_compact
4900 && ((INSN_ADDRESSES
4901 (INSN_UID (XEXP (SET_SRC (PATTERN (beyond)), 0)))
4902 - INSN_ADDRESSES (INSN_UID (insn)) + (unsigned) 252)
4903 > 252 + 258 + 2))
4904 gen_block_redirect (beyond,
4905 INSN_ADDRESSES (INSN_UID (beyond)), 1);
4908 next = next_active_insn (insn);
4910 if ((GET_CODE (next) == JUMP_INSN
4911 || ((next = next_active_insn (next))
4912 && GET_CODE (next) == JUMP_INSN))
4913 && GET_CODE (PATTERN (next)) == SET
4914 && recog_memoized (next) == CODE_FOR_jump_compact
4915 && ((INSN_ADDRESSES
4916 (INSN_UID (XEXP (SET_SRC (PATTERN (next)), 0)))
4917 - INSN_ADDRESSES (INSN_UID (insn)) + (unsigned) 252)
4918 > 252 + 258 + 2))
4919 gen_block_redirect (next, INSN_ADDRESSES (INSN_UID (next)), 1);
4921 else if (type == TYPE_JUMP || type == TYPE_RETURN)
4923 int addr = INSN_ADDRESSES (INSN_UID (insn));
4924 rtx far_label = 0;
4925 int dest_uid = 0;
4926 struct far_branch *bp;
4928 if (type == TYPE_JUMP)
4930 far_label = XEXP (SET_SRC (PATTERN (insn)), 0);
4931 dest_uid = get_dest_uid (far_label, max_uid);
4932 if (! dest_uid)
4934 /* Parse errors can lead to labels outside
4935 the insn stream. */
4936 if (! NEXT_INSN (far_label))
4937 continue;
4939 if (! optimize)
4941 JUMP_LABEL (insn) = far_label;
4942 LABEL_NUSES (far_label)++;
4944 redirect_jump (insn, NULL_RTX, 1);
4945 far_label = 0;
4948 bp = uid_branch[dest_uid];
4949 if (! bp)
4951 bp = (struct far_branch *) alloca (sizeof *bp);
4952 uid_branch[dest_uid] = bp;
4953 bp->prev = far_branch_list;
4954 far_branch_list = bp;
4955 bp->near_label = 0;
4956 bp->far_label = far_label;
4957 if (far_label)
4958 LABEL_NUSES (far_label)++;
4960 else if (bp->near_label && ! NEXT_INSN (bp->near_label))
4961 if (addr - bp->address <= CONDJUMP_MAX)
4962 emit_label_after (bp->near_label, PREV_INSN (insn));
4963 else
4965 gen_far_branch (bp);
4966 bp->near_label = 0;
4968 else
4969 bp->near_label = 0;
4970 bp->address = addr;
4971 bp->insert_place = insn;
4972 if (! far_label)
4973 emit_insn_before (gen_block_branch_redirect (const0_rtx), insn);
4974 else
4975 gen_block_redirect (insn, addr, bp->near_label ? 2 : 0);
4978 /* Generate all pending far branches,
4979 and free our references to the far labels. */
4980 while (far_branch_list)
4982 if (far_branch_list->near_label
4983 && ! NEXT_INSN (far_branch_list->near_label))
4984 gen_far_branch (far_branch_list);
4985 if (optimize
4986 && far_branch_list->far_label
4987 && ! --LABEL_NUSES (far_branch_list->far_label))
4988 delete_insn (far_branch_list->far_label);
4989 far_branch_list = far_branch_list->prev;
4992 /* Instruction length information is no longer valid due to the new
4993 instructions that have been generated. */
4994 init_insn_lengths ();
4997 /* Dump out instruction addresses, which is useful for debugging the
4998 constant pool table stuff.
5000 If relaxing, output the label and pseudo-ops used to link together
5001 calls and the instruction which set the registers. */
5003 /* ??? The addresses printed by this routine for insns are nonsense for
5004 insns which are inside of a sequence where none of the inner insns have
5005 variable length. This is because the second pass of shorten_branches
5006 does not bother to update them. */
5008 void
5009 final_prescan_insn (rtx insn, rtx *opvec ATTRIBUTE_UNUSED,
5010 int noperands ATTRIBUTE_UNUSED)
5012 if (TARGET_DUMPISIZE)
5013 fprintf (asm_out_file, "\n! at %04x\n", INSN_ADDRESSES (INSN_UID (insn)));
5015 if (TARGET_RELAX)
5017 rtx note;
5019 note = find_reg_note (insn, REG_LABEL, NULL_RTX);
5020 if (note)
5022 rtx pattern;
5024 pattern = PATTERN (insn);
5025 if (GET_CODE (pattern) == PARALLEL)
5026 pattern = XVECEXP (pattern, 0, 0);
5027 switch (GET_CODE (pattern))
5029 case SET:
5030 if (GET_CODE (SET_SRC (pattern)) != CALL
5031 && get_attr_type (insn) != TYPE_SFUNC)
5033 targetm.asm_out.internal_label
5034 (asm_out_file, "L", CODE_LABEL_NUMBER (XEXP (note, 0)));
5035 break;
5037 /* else FALLTHROUGH */
5038 case CALL:
5039 asm_fprintf (asm_out_file, "\t.uses %LL%d\n",
5040 CODE_LABEL_NUMBER (XEXP (note, 0)));
5041 break;
5043 default:
5044 gcc_unreachable ();
5050 /* Dump out any constants accumulated in the final pass. These will
5051 only be labels. */
5053 const char *
5054 output_jump_label_table (void)
5056 int i;
5058 if (pool_size)
5060 fprintf (asm_out_file, "\t.align 2\n");
5061 for (i = 0; i < pool_size; i++)
5063 pool_node *p = &pool_vector[i];
5065 (*targetm.asm_out.internal_label) (asm_out_file, "L",
5066 CODE_LABEL_NUMBER (p->label));
5067 output_asm_insn (".long %O0", &p->value);
5069 pool_size = 0;
5072 return "";
5075 /* A full frame looks like:
5077 arg-5
5078 arg-4
5079 [ if current_function_anonymous_args
5080 arg-3
5081 arg-2
5082 arg-1
5083 arg-0 ]
5084 saved-fp
5085 saved-r10
5086 saved-r11
5087 saved-r12
5088 saved-pr
5089 local-n
5091 local-1
5092 local-0 <- fp points here. */
5094 /* Number of bytes pushed for anonymous args, used to pass information
5095 between expand_prologue and expand_epilogue. */
5097 /* Adjust the stack by SIZE bytes. REG holds the rtl of the register to be
5098 adjusted. If epilogue_p is zero, this is for a prologue; otherwise, it's
5099 for an epilogue and a negative value means that it's for a sibcall
5100 epilogue. If LIVE_REGS_MASK is nonzero, it points to a HARD_REG_SET of
5101 all the registers that are about to be restored, and hence dead. */
5103 static void
5104 output_stack_adjust (int size, rtx reg, int epilogue_p,
5105 HARD_REG_SET *live_regs_mask)
5107 rtx (*emit_fn) (rtx) = epilogue_p ? &emit_insn : &frame_insn;
5108 if (size)
5110 HOST_WIDE_INT align = STACK_BOUNDARY / BITS_PER_UNIT;
5112 /* This test is bogus, as output_stack_adjust is used to re-align the
5113 stack. */
5114 #if 0
5115 gcc_assert (!(size % align));
5116 #endif
5118 if (CONST_OK_FOR_ADD (size))
5119 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size)));
5120 /* Try to do it with two partial adjustments; however, we must make
5121 sure that the stack is properly aligned at all times, in case
5122 an interrupt occurs between the two partial adjustments. */
5123 else if (CONST_OK_FOR_ADD (size / 2 & -align)
5124 && CONST_OK_FOR_ADD (size - (size / 2 & -align)))
5126 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size / 2 & -align)));
5127 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size - (size / 2 & -align))));
5129 else
5131 rtx const_reg;
5132 rtx insn;
5133 int temp = epilogue_p ? 7 : (TARGET_SH5 ? 0 : 1);
5134 int i;
5136 /* If TEMP is invalid, we could temporarily save a general
5137 register to MACL. However, there is currently no need
5138 to handle this case, so just die when we see it. */
5139 if (epilogue_p < 0
5140 || current_function_interrupt
5141 || ! call_really_used_regs[temp] || fixed_regs[temp])
5142 temp = -1;
5143 if (temp < 0 && ! current_function_interrupt
5144 && (TARGET_SHMEDIA || epilogue_p >= 0))
5146 HARD_REG_SET temps;
5147 COPY_HARD_REG_SET (temps, call_used_reg_set);
5148 AND_COMPL_HARD_REG_SET (temps, call_fixed_reg_set);
5149 if (epilogue_p > 0)
5151 int nreg = 0;
5152 if (current_function_return_rtx)
5154 enum machine_mode mode;
5155 mode = GET_MODE (current_function_return_rtx);
5156 if (BASE_RETURN_VALUE_REG (mode) == FIRST_RET_REG)
5157 nreg = HARD_REGNO_NREGS (FIRST_RET_REG, mode);
5159 for (i = 0; i < nreg; i++)
5160 CLEAR_HARD_REG_BIT (temps, FIRST_RET_REG + i);
5161 if (current_function_calls_eh_return)
5163 CLEAR_HARD_REG_BIT (temps, EH_RETURN_STACKADJ_REGNO);
5164 for (i = 0; i <= 3; i++)
5165 CLEAR_HARD_REG_BIT (temps, EH_RETURN_DATA_REGNO (i));
5168 if (TARGET_SHMEDIA && epilogue_p < 0)
5169 for (i = FIRST_TARGET_REG; i <= LAST_TARGET_REG; i++)
5170 CLEAR_HARD_REG_BIT (temps, i);
5171 if (epilogue_p <= 0)
5173 for (i = FIRST_PARM_REG;
5174 i < FIRST_PARM_REG + NPARM_REGS (SImode); i++)
5175 CLEAR_HARD_REG_BIT (temps, i);
5176 if (cfun->static_chain_decl != NULL)
5177 CLEAR_HARD_REG_BIT (temps, STATIC_CHAIN_REGNUM);
5179 temp = scavenge_reg (&temps);
5181 if (temp < 0 && live_regs_mask)
5182 temp = scavenge_reg (live_regs_mask);
5183 if (temp < 0)
5185 rtx adj_reg, tmp_reg, mem;
5187 /* If we reached here, the most likely case is the (sibcall)
5188 epilogue for non SHmedia. Put a special push/pop sequence
5189 for such case as the last resort. This looks lengthy but
5190 would not be problem because it seems to be very
5191 rare. */
5193 gcc_assert (!TARGET_SHMEDIA && epilogue_p);
5196 /* ??? There is still the slight possibility that r4 or
5197 r5 have been reserved as fixed registers or assigned
5198 as global registers, and they change during an
5199 interrupt. There are possible ways to handle this:
5201 - If we are adjusting the frame pointer (r14), we can do
5202 with a single temp register and an ordinary push / pop
5203 on the stack.
5204 - Grab any call-used or call-saved registers (i.e. not
5205 fixed or globals) for the temps we need. We might
5206 also grab r14 if we are adjusting the stack pointer.
5207 If we can't find enough available registers, issue
5208 a diagnostic and die - the user must have reserved
5209 way too many registers.
5210 But since all this is rather unlikely to happen and
5211 would require extra testing, we just die if r4 / r5
5212 are not available. */
5213 gcc_assert (!fixed_regs[4] && !fixed_regs[5]
5214 && !global_regs[4] && !global_regs[5]);
5216 adj_reg = gen_rtx_REG (GET_MODE (reg), 4);
5217 tmp_reg = gen_rtx_REG (GET_MODE (reg), 5);
5218 emit_move_insn (gen_tmp_stack_mem (Pmode, reg), adj_reg);
5219 emit_insn (GEN_MOV (adj_reg, GEN_INT (size)));
5220 emit_insn (GEN_ADD3 (adj_reg, adj_reg, reg));
5221 mem = gen_tmp_stack_mem (Pmode, gen_rtx_PRE_DEC (Pmode, adj_reg));
5222 emit_move_insn (mem, tmp_reg);
5223 emit_move_insn (tmp_reg, gen_tmp_stack_mem (Pmode, reg));
5224 mem = gen_tmp_stack_mem (Pmode, gen_rtx_PRE_DEC (Pmode, adj_reg));
5225 emit_move_insn (mem, tmp_reg);
5226 emit_move_insn (reg, adj_reg);
5227 mem = gen_tmp_stack_mem (Pmode, gen_rtx_POST_INC (Pmode, reg));
5228 emit_move_insn (adj_reg, mem);
5229 mem = gen_tmp_stack_mem (Pmode, gen_rtx_POST_INC (Pmode, reg));
5230 emit_move_insn (tmp_reg, mem);
5231 return;
5233 const_reg = gen_rtx_REG (GET_MODE (reg), temp);
5235 /* If SIZE is negative, subtract the positive value.
5236 This sometimes allows a constant pool entry to be shared
5237 between prologue and epilogue code. */
5238 if (size < 0)
5240 emit_insn (GEN_MOV (const_reg, GEN_INT (-size)));
5241 insn = emit_fn (GEN_SUB3 (reg, reg, const_reg));
5243 else
5245 emit_insn (GEN_MOV (const_reg, GEN_INT (size)));
5246 insn = emit_fn (GEN_ADD3 (reg, reg, const_reg));
5248 if (! epilogue_p)
5249 REG_NOTES (insn)
5250 = (gen_rtx_EXPR_LIST
5251 (REG_FRAME_RELATED_EXPR,
5252 gen_rtx_SET (VOIDmode, reg,
5253 gen_rtx_PLUS (SImode, reg, GEN_INT (size))),
5254 REG_NOTES (insn)));
5259 static rtx
5260 frame_insn (rtx x)
5262 x = emit_insn (x);
5263 RTX_FRAME_RELATED_P (x) = 1;
5264 return x;
5267 /* Output RTL to push register RN onto the stack. */
5269 static rtx
5270 push (int rn)
5272 rtx x;
5273 if (rn == FPUL_REG)
5274 x = gen_push_fpul ();
5275 else if (rn == FPSCR_REG)
5276 x = gen_push_fpscr ();
5277 else if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && ! TARGET_FPU_SINGLE
5278 && FP_OR_XD_REGISTER_P (rn))
5280 if (FP_REGISTER_P (rn) && (rn - FIRST_FP_REG) & 1)
5281 return NULL_RTX;
5282 x = gen_push_4 (gen_rtx_REG (DFmode, rn));
5284 else if (TARGET_SH2E && FP_REGISTER_P (rn))
5285 x = gen_push_e (gen_rtx_REG (SFmode, rn));
5286 else
5287 x = gen_push (gen_rtx_REG (SImode, rn));
5289 x = frame_insn (x);
5290 REG_NOTES (x)
5291 = gen_rtx_EXPR_LIST (REG_INC,
5292 gen_rtx_REG (SImode, STACK_POINTER_REGNUM), 0);
5293 return x;
5296 /* Output RTL to pop register RN from the stack. */
5298 static void
5299 pop (int rn)
5301 rtx x;
5302 if (rn == FPUL_REG)
5303 x = gen_pop_fpul ();
5304 else if (rn == FPSCR_REG)
5305 x = gen_pop_fpscr ();
5306 else if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && ! TARGET_FPU_SINGLE
5307 && FP_OR_XD_REGISTER_P (rn))
5309 if (FP_REGISTER_P (rn) && (rn - FIRST_FP_REG) & 1)
5310 return;
5311 x = gen_pop_4 (gen_rtx_REG (DFmode, rn));
5313 else if (TARGET_SH2E && FP_REGISTER_P (rn))
5314 x = gen_pop_e (gen_rtx_REG (SFmode, rn));
5315 else
5316 x = gen_pop (gen_rtx_REG (SImode, rn));
5318 x = emit_insn (x);
5319 REG_NOTES (x)
5320 = gen_rtx_EXPR_LIST (REG_INC,
5321 gen_rtx_REG (SImode, STACK_POINTER_REGNUM), 0);
5324 /* Generate code to push the regs specified in the mask. */
5326 static void
5327 push_regs (HARD_REG_SET *mask, int interrupt_handler)
5329 int i;
5330 int skip_fpscr = 0;
5332 /* Push PR last; this gives better latencies after the prologue, and
5333 candidates for the return delay slot when there are no general
5334 registers pushed. */
5335 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
5337 /* If this is an interrupt handler, and the SZ bit varies,
5338 and we have to push any floating point register, we need
5339 to switch to the correct precision first. */
5340 if (i == FIRST_FP_REG && interrupt_handler && TARGET_FMOVD
5341 && hard_regs_intersect_p (mask, &reg_class_contents[DF_REGS]))
5343 HARD_REG_SET unsaved;
5345 push (FPSCR_REG);
5346 COMPL_HARD_REG_SET (unsaved, *mask);
5347 fpscr_set_from_mem (NORMAL_MODE (FP_MODE), unsaved);
5348 skip_fpscr = 1;
5350 if (i != PR_REG
5351 && (i != FPSCR_REG || ! skip_fpscr)
5352 && TEST_HARD_REG_BIT (*mask, i))
5353 push (i);
5355 if (TEST_HARD_REG_BIT (*mask, PR_REG))
5356 push (PR_REG);
5359 /* Calculate how much extra space is needed to save all callee-saved
5360 target registers.
5361 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
5363 static int
5364 shmedia_target_regs_stack_space (HARD_REG_SET *live_regs_mask)
5366 int reg;
5367 int stack_space = 0;
5368 int interrupt_handler = sh_cfun_interrupt_handler_p ();
5370 for (reg = LAST_TARGET_REG; reg >= FIRST_TARGET_REG; reg--)
5371 if ((! call_really_used_regs[reg] || interrupt_handler)
5372 && ! TEST_HARD_REG_BIT (*live_regs_mask, reg))
5373 /* Leave space to save this target register on the stack,
5374 in case target register allocation wants to use it. */
5375 stack_space += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
5376 return stack_space;
5379 /* Decide whether we should reserve space for callee-save target registers,
5380 in case target register allocation wants to use them. REGS_SAVED is
5381 the space, in bytes, that is already required for register saves.
5382 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
5384 static int
5385 shmedia_reserve_space_for_target_registers_p (int regs_saved,
5386 HARD_REG_SET *live_regs_mask)
5388 if (optimize_size)
5389 return 0;
5390 return shmedia_target_regs_stack_space (live_regs_mask) <= regs_saved;
5393 /* Decide how much space to reserve for callee-save target registers
5394 in case target register allocation wants to use them.
5395 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
5397 static int
5398 shmedia_target_regs_stack_adjust (HARD_REG_SET *live_regs_mask)
5400 if (shmedia_space_reserved_for_target_registers)
5401 return shmedia_target_regs_stack_space (live_regs_mask);
5402 else
5403 return 0;
5406 /* Work out the registers which need to be saved, both as a mask and a
5407 count of saved words. Return the count.
5409 If doing a pragma interrupt function, then push all regs used by the
5410 function, and if we call another function (we can tell by looking at PR),
5411 make sure that all the regs it clobbers are safe too. */
5413 static int
5414 calc_live_regs (HARD_REG_SET *live_regs_mask)
5416 unsigned int reg;
5417 int count;
5418 int interrupt_handler;
5419 int pr_live, has_call;
5421 interrupt_handler = sh_cfun_interrupt_handler_p ();
5423 CLEAR_HARD_REG_SET (*live_regs_mask);
5424 if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && interrupt_handler
5425 && regs_ever_live[FPSCR_REG])
5426 target_flags &= ~MASK_FPU_SINGLE;
5427 /* If we can save a lot of saves by switching to double mode, do that. */
5428 else if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && TARGET_FPU_SINGLE)
5429 for (count = 0, reg = FIRST_FP_REG; reg <= LAST_FP_REG; reg += 2)
5430 if (regs_ever_live[reg] && regs_ever_live[reg+1]
5431 && (! call_really_used_regs[reg]
5432 || (interrupt_handler && ! pragma_trapa))
5433 && ++count > 2)
5435 target_flags &= ~MASK_FPU_SINGLE;
5436 break;
5438 /* PR_MEDIA_REG is a general purpose register, thus global_alloc already
5439 knows how to use it. That means the pseudo originally allocated for
5440 the initial value can become the PR_MEDIA_REG hard register, as seen for
5441 execute/20010122-1.c:test9. */
5442 if (TARGET_SHMEDIA)
5443 /* ??? this function is called from initial_elimination_offset, hence we
5444 can't use the result of sh_media_register_for_return here. */
5445 pr_live = sh_pr_n_sets ();
5446 else
5448 rtx pr_initial = has_hard_reg_initial_val (Pmode, PR_REG);
5449 pr_live = (pr_initial
5450 ? (GET_CODE (pr_initial) != REG
5451 || REGNO (pr_initial) != (PR_REG))
5452 : regs_ever_live[PR_REG]);
5453 /* For Shcompact, if not optimizing, we end up with a memory reference
5454 using the return address pointer for __builtin_return_address even
5455 though there is no actual need to put the PR register on the stack. */
5456 pr_live |= regs_ever_live[RETURN_ADDRESS_POINTER_REGNUM];
5458 /* Force PR to be live if the prologue has to call the SHmedia
5459 argument decoder or register saver. */
5460 if (TARGET_SHCOMPACT
5461 && ((current_function_args_info.call_cookie
5462 & ~ CALL_COOKIE_RET_TRAMP (1))
5463 || current_function_has_nonlocal_label))
5464 pr_live = 1;
5465 has_call = TARGET_SHMEDIA ? ! leaf_function_p () : pr_live;
5466 for (count = 0, reg = FIRST_PSEUDO_REGISTER; reg-- != 0; )
5468 if (reg == (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG)
5469 ? pr_live
5470 : (interrupt_handler && ! pragma_trapa)
5471 ? (/* Need to save all the regs ever live. */
5472 (regs_ever_live[reg]
5473 || (call_really_used_regs[reg]
5474 && (! fixed_regs[reg] || reg == MACH_REG || reg == MACL_REG
5475 || reg == PIC_OFFSET_TABLE_REGNUM)
5476 && has_call)
5477 || (has_call && REGISTER_NATURAL_MODE (reg) == SImode
5478 && (GENERAL_REGISTER_P (reg) || TARGET_REGISTER_P (reg))))
5479 && reg != STACK_POINTER_REGNUM && reg != ARG_POINTER_REGNUM
5480 && reg != RETURN_ADDRESS_POINTER_REGNUM
5481 && reg != T_REG && reg != GBR_REG
5482 /* Push fpscr only on targets which have FPU */
5483 && (reg != FPSCR_REG || TARGET_FPU_ANY))
5484 : (/* Only push those regs which are used and need to be saved. */
5485 (TARGET_SHCOMPACT
5486 && flag_pic
5487 && current_function_args_info.call_cookie
5488 && reg == PIC_OFFSET_TABLE_REGNUM)
5489 || (regs_ever_live[reg] && ! call_really_used_regs[reg])
5490 || (current_function_calls_eh_return
5491 && (reg == EH_RETURN_DATA_REGNO (0)
5492 || reg == EH_RETURN_DATA_REGNO (1)
5493 || reg == EH_RETURN_DATA_REGNO (2)
5494 || reg == EH_RETURN_DATA_REGNO (3)))
5495 || ((reg == MACL_REG || reg == MACH_REG)
5496 && regs_ever_live[reg]
5497 && sh_cfun_attr_renesas_p ())
5500 SET_HARD_REG_BIT (*live_regs_mask, reg);
5501 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
5503 if ((TARGET_SH4 || TARGET_SH2A_DOUBLE || TARGET_SH5) && TARGET_FMOVD
5504 && GET_MODE_CLASS (REGISTER_NATURAL_MODE (reg)) == MODE_FLOAT)
5506 if (FP_REGISTER_P (reg))
5508 if (! TARGET_FPU_SINGLE && ! regs_ever_live[reg ^ 1])
5510 SET_HARD_REG_BIT (*live_regs_mask, (reg ^ 1));
5511 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg ^ 1));
5514 else if (XD_REGISTER_P (reg))
5516 /* Must switch to double mode to access these registers. */
5517 target_flags &= ~MASK_FPU_SINGLE;
5522 /* If we have a target register optimization pass after prologue / epilogue
5523 threading, we need to assume all target registers will be live even if
5524 they aren't now. */
5525 if (flag_branch_target_load_optimize2
5526 && TARGET_SAVE_ALL_TARGET_REGS
5527 && shmedia_space_reserved_for_target_registers)
5528 for (reg = LAST_TARGET_REG; reg >= FIRST_TARGET_REG; reg--)
5529 if ((! call_really_used_regs[reg] || interrupt_handler)
5530 && ! TEST_HARD_REG_BIT (*live_regs_mask, reg))
5532 SET_HARD_REG_BIT (*live_regs_mask, reg);
5533 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
5535 /* If this is an interrupt handler, we don't have any call-clobbered
5536 registers we can conveniently use for target register save/restore.
5537 Make sure we save at least one general purpose register when we need
5538 to save target registers. */
5539 if (interrupt_handler
5540 && hard_regs_intersect_p (live_regs_mask,
5541 &reg_class_contents[TARGET_REGS])
5542 && ! hard_regs_intersect_p (live_regs_mask,
5543 &reg_class_contents[GENERAL_REGS]))
5545 SET_HARD_REG_BIT (*live_regs_mask, R0_REG);
5546 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (R0_REG));
5549 return count;
5552 /* Code to generate prologue and epilogue sequences */
5554 /* PUSHED is the number of bytes that are being pushed on the
5555 stack for register saves. Return the frame size, padded
5556 appropriately so that the stack stays properly aligned. */
5557 static HOST_WIDE_INT
5558 rounded_frame_size (int pushed)
5560 HOST_WIDE_INT size = get_frame_size ();
5561 HOST_WIDE_INT align = STACK_BOUNDARY / BITS_PER_UNIT;
5563 return ((size + pushed + align - 1) & -align) - pushed;
5566 /* Choose a call-clobbered target-branch register that remains
5567 unchanged along the whole function. We set it up as the return
5568 value in the prologue. */
5570 sh_media_register_for_return (void)
5572 int regno;
5573 int tr0_used;
5575 if (! current_function_is_leaf)
5576 return -1;
5577 if (lookup_attribute ("interrupt_handler",
5578 DECL_ATTRIBUTES (current_function_decl)))
5579 return -1;
5580 if (sh_cfun_interrupt_handler_p ())
5581 return -1;
5583 tr0_used = flag_pic && regs_ever_live[PIC_OFFSET_TABLE_REGNUM];
5585 for (regno = FIRST_TARGET_REG + tr0_used; regno <= LAST_TARGET_REG; regno++)
5586 if (call_really_used_regs[regno] && ! regs_ever_live[regno])
5587 return regno;
5589 return -1;
5592 /* The maximum registers we need to save are:
5593 - 62 general purpose registers (r15 is stack pointer, r63 is zero)
5594 - 32 floating point registers (for each pair, we save none,
5595 one single precision value, or a double precision value).
5596 - 8 target registers
5597 - add 1 entry for a delimiter. */
5598 #define MAX_SAVED_REGS (62+32+8)
5600 typedef struct save_entry_s
5602 unsigned char reg;
5603 unsigned char mode;
5604 short offset;
5605 } save_entry;
5607 #define MAX_TEMPS 4
5609 /* There will be a delimiter entry with VOIDmode both at the start and the
5610 end of a filled in schedule. The end delimiter has the offset of the
5611 save with the smallest (i.e. most negative) offset. */
5612 typedef struct save_schedule_s
5614 save_entry entries[MAX_SAVED_REGS + 2];
5615 int temps[MAX_TEMPS+1];
5616 } save_schedule;
5618 /* Fill in SCHEDULE according to LIVE_REGS_MASK. If RESTORE is nonzero,
5619 use reverse order. Returns the last entry written to (not counting
5620 the delimiter). OFFSET_BASE is a number to be added to all offset
5621 entries. */
5623 static save_entry *
5624 sh5_schedule_saves (HARD_REG_SET *live_regs_mask, save_schedule *schedule,
5625 int offset_base)
5627 int align, i;
5628 save_entry *entry = schedule->entries;
5629 int tmpx = 0;
5630 int offset;
5632 if (! current_function_interrupt)
5633 for (i = FIRST_GENERAL_REG; tmpx < MAX_TEMPS && i <= LAST_GENERAL_REG; i++)
5634 if (call_really_used_regs[i] && ! fixed_regs[i] && i != PR_MEDIA_REG
5635 && ! FUNCTION_ARG_REGNO_P (i)
5636 && i != FIRST_RET_REG
5637 && ! (cfun->static_chain_decl != NULL && i == STATIC_CHAIN_REGNUM)
5638 && ! (current_function_calls_eh_return
5639 && (i == EH_RETURN_STACKADJ_REGNO
5640 || ((unsigned) i >= EH_RETURN_DATA_REGNO (0)
5641 && (unsigned) i <= EH_RETURN_DATA_REGNO (3)))))
5642 schedule->temps[tmpx++] = i;
5643 entry->reg = -1;
5644 entry->mode = VOIDmode;
5645 entry->offset = offset_base;
5646 entry++;
5647 /* We loop twice: first, we save 8-byte aligned registers in the
5648 higher addresses, that are known to be aligned. Then, we
5649 proceed to saving 32-bit registers that don't need 8-byte
5650 alignment.
5651 If this is an interrupt function, all registers that need saving
5652 need to be saved in full. moreover, we need to postpone saving
5653 target registers till we have saved some general purpose registers
5654 we can then use as scratch registers. */
5655 offset = offset_base;
5656 for (align = 1; align >= 0; align--)
5658 for (i = FIRST_PSEUDO_REGISTER - 1; i >= 0; i--)
5659 if (TEST_HARD_REG_BIT (*live_regs_mask, i))
5661 enum machine_mode mode = REGISTER_NATURAL_MODE (i);
5662 int reg = i;
5664 if (current_function_interrupt)
5666 if (TARGET_REGISTER_P (i))
5667 continue;
5668 if (GENERAL_REGISTER_P (i))
5669 mode = DImode;
5671 if (mode == SFmode && (i % 2) == 1
5672 && ! TARGET_FPU_SINGLE && FP_REGISTER_P (i)
5673 && (TEST_HARD_REG_BIT (*live_regs_mask, (i ^ 1))))
5675 mode = DFmode;
5676 i--;
5677 reg--;
5680 /* If we're doing the aligned pass and this is not aligned,
5681 or we're doing the unaligned pass and this is aligned,
5682 skip it. */
5683 if ((GET_MODE_SIZE (mode) % (STACK_BOUNDARY / BITS_PER_UNIT) == 0)
5684 != align)
5685 continue;
5687 if (current_function_interrupt
5688 && GENERAL_REGISTER_P (i)
5689 && tmpx < MAX_TEMPS)
5690 schedule->temps[tmpx++] = i;
5692 offset -= GET_MODE_SIZE (mode);
5693 entry->reg = i;
5694 entry->mode = mode;
5695 entry->offset = offset;
5696 entry++;
5698 if (align && current_function_interrupt)
5699 for (i = LAST_TARGET_REG; i >= FIRST_TARGET_REG; i--)
5700 if (TEST_HARD_REG_BIT (*live_regs_mask, i))
5702 offset -= GET_MODE_SIZE (DImode);
5703 entry->reg = i;
5704 entry->mode = DImode;
5705 entry->offset = offset;
5706 entry++;
5709 entry->reg = -1;
5710 entry->mode = VOIDmode;
5711 entry->offset = offset;
5712 schedule->temps[tmpx] = -1;
5713 return entry - 1;
5716 void
5717 sh_expand_prologue (void)
5719 HARD_REG_SET live_regs_mask;
5720 int d, i;
5721 int d_rounding = 0;
5722 int save_flags = target_flags;
5723 int pretend_args;
5725 current_function_interrupt = sh_cfun_interrupt_handler_p ();
5727 /* We have pretend args if we had an object sent partially in registers
5728 and partially on the stack, e.g. a large structure. */
5729 pretend_args = current_function_pretend_args_size;
5730 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl)
5731 && (NPARM_REGS(SImode)
5732 > current_function_args_info.arg_count[(int) SH_ARG_INT]))
5733 pretend_args = 0;
5734 output_stack_adjust (-pretend_args
5735 - current_function_args_info.stack_regs * 8,
5736 stack_pointer_rtx, 0, NULL);
5738 if (TARGET_SHCOMPACT && flag_pic && current_function_args_info.call_cookie)
5739 /* We're going to use the PIC register to load the address of the
5740 incoming-argument decoder and/or of the return trampoline from
5741 the GOT, so make sure the PIC register is preserved and
5742 initialized. */
5743 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
5745 if (TARGET_SHCOMPACT
5746 && (current_function_args_info.call_cookie & ~ CALL_COOKIE_RET_TRAMP(1)))
5748 int reg;
5750 /* First, make all registers with incoming arguments that will
5751 be pushed onto the stack live, so that register renaming
5752 doesn't overwrite them. */
5753 for (reg = 0; reg < NPARM_REGS (SImode); reg++)
5754 if (CALL_COOKIE_STACKSEQ_GET (current_function_args_info.call_cookie)
5755 >= NPARM_REGS (SImode) - reg)
5756 for (; reg < NPARM_REGS (SImode); reg++)
5757 emit_insn (gen_shcompact_preserve_incoming_args
5758 (gen_rtx_REG (SImode, FIRST_PARM_REG + reg)));
5759 else if (CALL_COOKIE_INT_REG_GET
5760 (current_function_args_info.call_cookie, reg) == 1)
5761 emit_insn (gen_shcompact_preserve_incoming_args
5762 (gen_rtx_REG (SImode, FIRST_PARM_REG + reg)));
5764 emit_move_insn (gen_rtx_REG (Pmode, MACL_REG),
5765 stack_pointer_rtx);
5766 emit_move_insn (gen_rtx_REG (SImode, R0_REG),
5767 GEN_INT (current_function_args_info.call_cookie));
5768 emit_move_insn (gen_rtx_REG (SImode, MACH_REG),
5769 gen_rtx_REG (SImode, R0_REG));
5771 else if (TARGET_SHMEDIA)
5773 int tr = sh_media_register_for_return ();
5775 if (tr >= 0)
5777 rtx insn = emit_move_insn (gen_rtx_REG (DImode, tr),
5778 gen_rtx_REG (DImode, PR_MEDIA_REG));
5780 /* ??? We should suppress saving pr when we don't need it, but this
5781 is tricky because of builtin_return_address. */
5783 /* If this function only exits with sibcalls, this copy
5784 will be flagged as dead. */
5785 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD,
5786 const0_rtx,
5787 REG_NOTES (insn));
5791 /* Emit the code for SETUP_VARARGS. */
5792 if (current_function_stdarg)
5794 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl))
5796 /* Push arg regs as if they'd been provided by caller in stack. */
5797 for (i = 0; i < NPARM_REGS(SImode); i++)
5799 int rn = NPARM_REGS(SImode) + FIRST_PARM_REG - i - 1;
5800 rtx insn;
5802 if (i >= (NPARM_REGS(SImode)
5803 - current_function_args_info.arg_count[(int) SH_ARG_INT]
5805 break;
5806 insn = push (rn);
5807 RTX_FRAME_RELATED_P (insn) = 0;
5812 /* If we're supposed to switch stacks at function entry, do so now. */
5813 if (sp_switch)
5814 emit_insn (gen_sp_switch_1 ());
5816 d = calc_live_regs (&live_regs_mask);
5817 /* ??? Maybe we could save some switching if we can move a mode switch
5818 that already happens to be at the function start into the prologue. */
5819 if (target_flags != save_flags && ! current_function_interrupt)
5820 emit_insn (gen_toggle_sz ());
5822 if (TARGET_SH5)
5824 int offset_base, offset;
5825 rtx r0 = NULL_RTX;
5826 int offset_in_r0 = -1;
5827 int sp_in_r0 = 0;
5828 int tregs_space = shmedia_target_regs_stack_adjust (&live_regs_mask);
5829 int total_size, save_size;
5830 save_schedule schedule;
5831 save_entry *entry;
5832 int *tmp_pnt;
5834 if (call_really_used_regs[R0_REG] && ! fixed_regs[R0_REG]
5835 && ! current_function_interrupt)
5836 r0 = gen_rtx_REG (Pmode, R0_REG);
5838 /* D is the actual number of bytes that we need for saving registers,
5839 however, in initial_elimination_offset we have committed to using
5840 an additional TREGS_SPACE amount of bytes - in order to keep both
5841 addresses to arguments supplied by the caller and local variables
5842 valid, we must keep this gap. Place it between the incoming
5843 arguments and the actually saved registers in a bid to optimize
5844 locality of reference. */
5845 total_size = d + tregs_space;
5846 total_size += rounded_frame_size (total_size);
5847 save_size = total_size - rounded_frame_size (d);
5848 if (save_size % (STACK_BOUNDARY / BITS_PER_UNIT))
5849 d_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
5850 - save_size % (STACK_BOUNDARY / BITS_PER_UNIT));
5852 /* If adjusting the stack in a single step costs nothing extra, do so.
5853 I.e. either if a single addi is enough, or we need a movi anyway,
5854 and we don't exceed the maximum offset range (the test for the
5855 latter is conservative for simplicity). */
5856 if (TARGET_SHMEDIA
5857 && (CONST_OK_FOR_I10 (-total_size)
5858 || (! CONST_OK_FOR_I10 (-(save_size + d_rounding))
5859 && total_size <= 2044)))
5860 d_rounding = total_size - save_size;
5862 offset_base = d + d_rounding;
5864 output_stack_adjust (-(save_size + d_rounding), stack_pointer_rtx,
5865 0, NULL);
5867 sh5_schedule_saves (&live_regs_mask, &schedule, offset_base);
5868 tmp_pnt = schedule.temps;
5869 for (entry = &schedule.entries[1]; entry->mode != VOIDmode; entry++)
5871 enum machine_mode mode = entry->mode;
5872 unsigned int reg = entry->reg;
5873 rtx reg_rtx, mem_rtx, pre_dec = NULL_RTX;
5874 rtx orig_reg_rtx;
5876 offset = entry->offset;
5878 reg_rtx = gen_rtx_REG (mode, reg);
5880 mem_rtx = gen_frame_mem (mode,
5881 gen_rtx_PLUS (Pmode,
5882 stack_pointer_rtx,
5883 GEN_INT (offset)));
5885 GO_IF_LEGITIMATE_ADDRESS (mode, XEXP (mem_rtx, 0), try_pre_dec);
5887 gcc_assert (r0);
5888 mem_rtx = NULL_RTX;
5890 try_pre_dec:
5892 if (HAVE_PRE_DECREMENT
5893 && (offset_in_r0 - offset == GET_MODE_SIZE (mode)
5894 || mem_rtx == NULL_RTX
5895 || reg == PR_REG || SPECIAL_REGISTER_P (reg)))
5897 pre_dec = gen_frame_mem (mode, gen_rtx_PRE_DEC (Pmode, r0));
5899 GO_IF_LEGITIMATE_ADDRESS (mode, XEXP (pre_dec, 0),
5900 pre_dec_ok);
5902 pre_dec = NULL_RTX;
5904 break;
5906 pre_dec_ok:
5907 mem_rtx = NULL_RTX;
5908 offset += GET_MODE_SIZE (mode);
5910 while (0);
5912 if (mem_rtx != NULL_RTX)
5913 goto addr_ok;
5915 if (offset_in_r0 == -1)
5917 emit_move_insn (r0, GEN_INT (offset));
5918 offset_in_r0 = offset;
5920 else if (offset != offset_in_r0)
5922 emit_move_insn (r0,
5923 gen_rtx_PLUS
5924 (Pmode, r0,
5925 GEN_INT (offset - offset_in_r0)));
5926 offset_in_r0 += offset - offset_in_r0;
5929 if (pre_dec != NULL_RTX)
5931 if (! sp_in_r0)
5933 emit_move_insn (r0,
5934 gen_rtx_PLUS
5935 (Pmode, r0, stack_pointer_rtx));
5936 sp_in_r0 = 1;
5939 offset -= GET_MODE_SIZE (mode);
5940 offset_in_r0 -= GET_MODE_SIZE (mode);
5942 mem_rtx = pre_dec;
5944 else if (sp_in_r0)
5945 mem_rtx = gen_frame_mem (mode, r0);
5946 else
5947 mem_rtx = gen_frame_mem (mode,
5948 gen_rtx_PLUS (Pmode,
5949 stack_pointer_rtx,
5950 r0));
5952 /* We must not use an r0-based address for target-branch
5953 registers or for special registers without pre-dec
5954 memory addresses, since we store their values in r0
5955 first. */
5956 gcc_assert (!TARGET_REGISTER_P (reg)
5957 && ((reg != PR_REG && !SPECIAL_REGISTER_P (reg))
5958 || mem_rtx == pre_dec));
5960 addr_ok:
5961 orig_reg_rtx = reg_rtx;
5962 if (TARGET_REGISTER_P (reg)
5963 || ((reg == PR_REG || SPECIAL_REGISTER_P (reg))
5964 && mem_rtx != pre_dec))
5966 rtx tmp_reg = gen_rtx_REG (GET_MODE (reg_rtx), *tmp_pnt);
5968 emit_move_insn (tmp_reg, reg_rtx);
5970 if (REGNO (tmp_reg) == R0_REG)
5972 offset_in_r0 = -1;
5973 sp_in_r0 = 0;
5974 gcc_assert (!refers_to_regno_p
5975 (R0_REG, R0_REG+1, mem_rtx, (rtx *) 0));
5978 if (*++tmp_pnt <= 0)
5979 tmp_pnt = schedule.temps;
5981 reg_rtx = tmp_reg;
5984 rtx insn;
5986 /* Mark as interesting for dwarf cfi generator */
5987 insn = emit_move_insn (mem_rtx, reg_rtx);
5988 RTX_FRAME_RELATED_P (insn) = 1;
5989 /* If we use an intermediate register for the save, we can't
5990 describe this exactly in cfi as a copy of the to-be-saved
5991 register into the temporary register and then the temporary
5992 register on the stack, because the temporary register can
5993 have a different natural size than the to-be-saved register.
5994 Thus, we gloss over the intermediate copy and pretend we do
5995 a direct save from the to-be-saved register. */
5996 if (REGNO (reg_rtx) != reg)
5998 rtx set, note_rtx;
6000 set = gen_rtx_SET (VOIDmode, mem_rtx, orig_reg_rtx);
6001 note_rtx = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, set,
6002 REG_NOTES (insn));
6003 REG_NOTES (insn) = note_rtx;
6006 if (TARGET_SHCOMPACT && (offset_in_r0 != -1))
6008 rtx reg_rtx = gen_rtx_REG (mode, reg);
6009 rtx set, note_rtx;
6010 rtx mem_rtx = gen_frame_mem (mode,
6011 gen_rtx_PLUS (Pmode,
6012 stack_pointer_rtx,
6013 GEN_INT (offset)));
6015 set = gen_rtx_SET (VOIDmode, mem_rtx, reg_rtx);
6016 note_rtx = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, set,
6017 REG_NOTES (insn));
6018 REG_NOTES (insn) = note_rtx;
6023 gcc_assert (entry->offset == d_rounding);
6025 else
6026 push_regs (&live_regs_mask, current_function_interrupt);
6028 if (flag_pic && regs_ever_live[PIC_OFFSET_TABLE_REGNUM])
6030 rtx insn = get_last_insn ();
6031 rtx last = emit_insn (gen_GOTaddr2picreg ());
6033 /* Mark these insns as possibly dead. Sometimes, flow2 may
6034 delete all uses of the PIC register. In this case, let it
6035 delete the initialization too. */
6038 insn = NEXT_INSN (insn);
6040 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD,
6041 const0_rtx,
6042 REG_NOTES (insn));
6044 while (insn != last);
6047 if (SHMEDIA_REGS_STACK_ADJUST ())
6049 /* This must NOT go through the PLT, otherwise mach and macl
6050 may be clobbered. */
6051 function_symbol (gen_rtx_REG (Pmode, R0_REG),
6052 (TARGET_FPU_ANY
6053 ? "__GCC_push_shmedia_regs"
6054 : "__GCC_push_shmedia_regs_nofpu"), SFUNC_GOT);
6055 emit_insn (gen_shmedia_save_restore_regs_compact
6056 (GEN_INT (-SHMEDIA_REGS_STACK_ADJUST ())));
6059 if (target_flags != save_flags && ! current_function_interrupt)
6061 rtx insn = emit_insn (gen_toggle_sz ());
6063 /* If we're lucky, a mode switch in the function body will
6064 overwrite fpscr, turning this insn dead. Tell flow this
6065 insn is ok to delete. */
6066 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD,
6067 const0_rtx,
6068 REG_NOTES (insn));
6071 target_flags = save_flags;
6073 output_stack_adjust (-rounded_frame_size (d) + d_rounding,
6074 stack_pointer_rtx, 0, NULL);
6076 if (frame_pointer_needed)
6077 frame_insn (GEN_MOV (hard_frame_pointer_rtx, stack_pointer_rtx));
6079 if (TARGET_SHCOMPACT
6080 && (current_function_args_info.call_cookie & ~ CALL_COOKIE_RET_TRAMP(1)))
6082 /* This must NOT go through the PLT, otherwise mach and macl
6083 may be clobbered. */
6084 function_symbol (gen_rtx_REG (Pmode, R0_REG),
6085 "__GCC_shcompact_incoming_args", SFUNC_GOT);
6086 emit_insn (gen_shcompact_incoming_args ());
6090 void
6091 sh_expand_epilogue (bool sibcall_p)
6093 HARD_REG_SET live_regs_mask;
6094 int d, i;
6095 int d_rounding = 0;
6097 int save_flags = target_flags;
6098 int frame_size, save_size;
6099 int fpscr_deferred = 0;
6100 int e = sibcall_p ? -1 : 1;
6102 d = calc_live_regs (&live_regs_mask);
6104 save_size = d;
6105 frame_size = rounded_frame_size (d);
6107 if (TARGET_SH5)
6109 int tregs_space = shmedia_target_regs_stack_adjust (&live_regs_mask);
6110 int total_size;
6111 if (d % (STACK_BOUNDARY / BITS_PER_UNIT))
6112 d_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
6113 - d % (STACK_BOUNDARY / BITS_PER_UNIT));
6115 total_size = d + tregs_space;
6116 total_size += rounded_frame_size (total_size);
6117 save_size = total_size - frame_size;
6119 /* If adjusting the stack in a single step costs nothing extra, do so.
6120 I.e. either if a single addi is enough, or we need a movi anyway,
6121 and we don't exceed the maximum offset range (the test for the
6122 latter is conservative for simplicity). */
6123 if (TARGET_SHMEDIA
6124 && ! frame_pointer_needed
6125 && (CONST_OK_FOR_I10 (total_size)
6126 || (! CONST_OK_FOR_I10 (save_size + d_rounding)
6127 && total_size <= 2044)))
6128 d_rounding = frame_size;
6130 frame_size -= d_rounding;
6133 if (frame_pointer_needed)
6135 /* We must avoid scheduling the epilogue with previous basic blocks
6136 when exception handling is enabled. See PR/18032. */
6137 if (flag_exceptions)
6138 emit_insn (gen_blockage ());
6139 output_stack_adjust (frame_size, hard_frame_pointer_rtx, e,
6140 &live_regs_mask);
6142 /* We must avoid moving the stack pointer adjustment past code
6143 which reads from the local frame, else an interrupt could
6144 occur after the SP adjustment and clobber data in the local
6145 frame. */
6146 emit_insn (gen_blockage ());
6147 emit_insn (GEN_MOV (stack_pointer_rtx, hard_frame_pointer_rtx));
6149 else if (frame_size)
6151 /* We must avoid moving the stack pointer adjustment past code
6152 which reads from the local frame, else an interrupt could
6153 occur after the SP adjustment and clobber data in the local
6154 frame. */
6155 emit_insn (gen_blockage ());
6156 output_stack_adjust (frame_size, stack_pointer_rtx, e, &live_regs_mask);
6159 if (SHMEDIA_REGS_STACK_ADJUST ())
6161 function_symbol (gen_rtx_REG (Pmode, R0_REG),
6162 (TARGET_FPU_ANY
6163 ? "__GCC_pop_shmedia_regs"
6164 : "__GCC_pop_shmedia_regs_nofpu"), SFUNC_GOT);
6165 /* This must NOT go through the PLT, otherwise mach and macl
6166 may be clobbered. */
6167 emit_insn (gen_shmedia_save_restore_regs_compact
6168 (GEN_INT (SHMEDIA_REGS_STACK_ADJUST ())));
6171 /* Pop all the registers. */
6173 if (target_flags != save_flags && ! current_function_interrupt)
6174 emit_insn (gen_toggle_sz ());
6175 if (TARGET_SH5)
6177 int offset_base, offset;
6178 int offset_in_r0 = -1;
6179 int sp_in_r0 = 0;
6180 rtx r0 = gen_rtx_REG (Pmode, R0_REG);
6181 save_schedule schedule;
6182 save_entry *entry;
6183 int *tmp_pnt;
6185 entry = sh5_schedule_saves (&live_regs_mask, &schedule, d_rounding);
6186 offset_base = -entry[1].offset + d_rounding;
6187 tmp_pnt = schedule.temps;
6188 for (; entry->mode != VOIDmode; entry--)
6190 enum machine_mode mode = entry->mode;
6191 int reg = entry->reg;
6192 rtx reg_rtx, mem_rtx, post_inc = NULL_RTX, insn;
6194 offset = offset_base + entry->offset;
6195 reg_rtx = gen_rtx_REG (mode, reg);
6197 mem_rtx = gen_frame_mem (mode,
6198 gen_rtx_PLUS (Pmode,
6199 stack_pointer_rtx,
6200 GEN_INT (offset)));
6202 GO_IF_LEGITIMATE_ADDRESS (mode, XEXP (mem_rtx, 0), try_post_inc);
6204 mem_rtx = NULL_RTX;
6206 try_post_inc:
6208 if (HAVE_POST_INCREMENT
6209 && (offset == offset_in_r0
6210 || (offset + GET_MODE_SIZE (mode) != d + d_rounding
6211 && mem_rtx == NULL_RTX)
6212 || reg == PR_REG || SPECIAL_REGISTER_P (reg)))
6214 post_inc = gen_frame_mem (mode, gen_rtx_POST_INC (Pmode, r0));
6216 GO_IF_LEGITIMATE_ADDRESS (mode, XEXP (post_inc, 0),
6217 post_inc_ok);
6219 post_inc = NULL_RTX;
6221 break;
6223 post_inc_ok:
6224 mem_rtx = NULL_RTX;
6226 while (0);
6228 if (mem_rtx != NULL_RTX)
6229 goto addr_ok;
6231 if (offset_in_r0 == -1)
6233 emit_move_insn (r0, GEN_INT (offset));
6234 offset_in_r0 = offset;
6236 else if (offset != offset_in_r0)
6238 emit_move_insn (r0,
6239 gen_rtx_PLUS
6240 (Pmode, r0,
6241 GEN_INT (offset - offset_in_r0)));
6242 offset_in_r0 += offset - offset_in_r0;
6245 if (post_inc != NULL_RTX)
6247 if (! sp_in_r0)
6249 emit_move_insn (r0,
6250 gen_rtx_PLUS
6251 (Pmode, r0, stack_pointer_rtx));
6252 sp_in_r0 = 1;
6255 mem_rtx = post_inc;
6257 offset_in_r0 += GET_MODE_SIZE (mode);
6259 else if (sp_in_r0)
6260 mem_rtx = gen_frame_mem (mode, r0);
6261 else
6262 mem_rtx = gen_frame_mem (mode,
6263 gen_rtx_PLUS (Pmode,
6264 stack_pointer_rtx,
6265 r0));
6267 gcc_assert ((reg != PR_REG && !SPECIAL_REGISTER_P (reg))
6268 || mem_rtx == post_inc);
6270 addr_ok:
6271 if ((reg == PR_REG || SPECIAL_REGISTER_P (reg))
6272 && mem_rtx != post_inc)
6274 insn = emit_move_insn (r0, mem_rtx);
6275 mem_rtx = r0;
6277 else if (TARGET_REGISTER_P (reg))
6279 rtx tmp_reg = gen_rtx_REG (mode, *tmp_pnt);
6281 /* Give the scheduler a bit of freedom by using up to
6282 MAX_TEMPS registers in a round-robin fashion. */
6283 insn = emit_move_insn (tmp_reg, mem_rtx);
6284 mem_rtx = tmp_reg;
6285 if (*++tmp_pnt < 0)
6286 tmp_pnt = schedule.temps;
6289 insn = emit_move_insn (reg_rtx, mem_rtx);
6290 if (reg == PR_MEDIA_REG && sh_media_register_for_return () >= 0)
6291 /* This is dead, unless we return with a sibcall. */
6292 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD,
6293 const0_rtx,
6294 REG_NOTES (insn));
6297 gcc_assert (entry->offset + offset_base == d + d_rounding);
6299 else /* ! TARGET_SH5 */
6301 save_size = 0;
6302 if (TEST_HARD_REG_BIT (live_regs_mask, PR_REG))
6303 pop (PR_REG);
6304 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
6306 int j = (FIRST_PSEUDO_REGISTER - 1) - i;
6308 if (j == FPSCR_REG && current_function_interrupt && TARGET_FMOVD
6309 && hard_regs_intersect_p (&live_regs_mask,
6310 &reg_class_contents[DF_REGS]))
6311 fpscr_deferred = 1;
6312 else if (j != PR_REG && TEST_HARD_REG_BIT (live_regs_mask, j))
6313 pop (j);
6314 if (j == FIRST_FP_REG && fpscr_deferred)
6315 pop (FPSCR_REG);
6319 if (target_flags != save_flags && ! current_function_interrupt)
6320 emit_insn (gen_toggle_sz ());
6321 target_flags = save_flags;
6323 output_stack_adjust (current_function_pretend_args_size
6324 + save_size + d_rounding
6325 + current_function_args_info.stack_regs * 8,
6326 stack_pointer_rtx, e, NULL);
6328 if (current_function_calls_eh_return)
6329 emit_insn (GEN_ADD3 (stack_pointer_rtx, stack_pointer_rtx,
6330 EH_RETURN_STACKADJ_RTX));
6332 /* Switch back to the normal stack if necessary. */
6333 if (sp_switch)
6334 emit_insn (gen_sp_switch_2 ());
6336 /* Tell flow the insn that pops PR isn't dead. */
6337 /* PR_REG will never be live in SHmedia mode, and we don't need to
6338 USE PR_MEDIA_REG, since it will be explicitly copied to TR0_REG
6339 by the return pattern. */
6340 if (TEST_HARD_REG_BIT (live_regs_mask, PR_REG))
6341 emit_insn (gen_rtx_USE (VOIDmode, gen_rtx_REG (SImode, PR_REG)));
6344 static int sh_need_epilogue_known = 0;
6347 sh_need_epilogue (void)
6349 if (! sh_need_epilogue_known)
6351 rtx epilogue;
6353 start_sequence ();
6354 sh_expand_epilogue (0);
6355 epilogue = get_insns ();
6356 end_sequence ();
6357 sh_need_epilogue_known = (epilogue == NULL ? -1 : 1);
6359 return sh_need_epilogue_known > 0;
6362 /* Emit code to change the current function's return address to RA.
6363 TEMP is available as a scratch register, if needed. */
6365 void
6366 sh_set_return_address (rtx ra, rtx tmp)
6368 HARD_REG_SET live_regs_mask;
6369 int d;
6370 int pr_reg = TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG;
6371 int pr_offset;
6373 d = calc_live_regs (&live_regs_mask);
6375 /* If pr_reg isn't life, we can set it (or the register given in
6376 sh_media_register_for_return) directly. */
6377 if (! TEST_HARD_REG_BIT (live_regs_mask, pr_reg))
6379 rtx rr;
6381 if (TARGET_SHMEDIA)
6383 int rr_regno = sh_media_register_for_return ();
6385 if (rr_regno < 0)
6386 rr_regno = pr_reg;
6388 rr = gen_rtx_REG (DImode, rr_regno);
6390 else
6391 rr = gen_rtx_REG (SImode, pr_reg);
6393 emit_insn (GEN_MOV (rr, ra));
6394 /* Tell flow the register for return isn't dead. */
6395 emit_insn (gen_rtx_USE (VOIDmode, rr));
6396 return;
6399 if (TARGET_SH5)
6401 int offset;
6402 save_schedule schedule;
6403 save_entry *entry;
6405 entry = sh5_schedule_saves (&live_regs_mask, &schedule, 0);
6406 offset = entry[1].offset;
6407 for (; entry->mode != VOIDmode; entry--)
6408 if (entry->reg == pr_reg)
6409 goto found;
6411 /* We can't find pr register. */
6412 gcc_unreachable ();
6414 found:
6415 offset = entry->offset - offset;
6416 pr_offset = (rounded_frame_size (d) + offset
6417 + SHMEDIA_REGS_STACK_ADJUST ());
6419 else
6420 pr_offset = rounded_frame_size (d);
6422 emit_insn (GEN_MOV (tmp, GEN_INT (pr_offset)));
6423 emit_insn (GEN_ADD3 (tmp, tmp, hard_frame_pointer_rtx));
6425 tmp = gen_frame_mem (Pmode, tmp);
6426 emit_insn (GEN_MOV (tmp, ra));
6429 /* Clear variables at function end. */
6431 static void
6432 sh_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
6433 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
6435 trap_exit = pragma_interrupt = pragma_trapa = pragma_nosave_low_regs = 0;
6436 sh_need_epilogue_known = 0;
6437 sp_switch = NULL_RTX;
6440 static rtx
6441 sh_builtin_saveregs (void)
6443 /* First unnamed integer register. */
6444 int first_intreg = current_function_args_info.arg_count[(int) SH_ARG_INT];
6445 /* Number of integer registers we need to save. */
6446 int n_intregs = MAX (0, NPARM_REGS (SImode) - first_intreg);
6447 /* First unnamed SFmode float reg */
6448 int first_floatreg = current_function_args_info.arg_count[(int) SH_ARG_FLOAT];
6449 /* Number of SFmode float regs to save. */
6450 int n_floatregs = MAX (0, NPARM_REGS (SFmode) - first_floatreg);
6451 rtx regbuf, fpregs;
6452 int bufsize, regno;
6453 HOST_WIDE_INT alias_set;
6455 if (TARGET_SH5)
6457 if (n_intregs)
6459 int pushregs = n_intregs;
6461 while (pushregs < NPARM_REGS (SImode) - 1
6462 && (CALL_COOKIE_INT_REG_GET
6463 (current_function_args_info.call_cookie,
6464 NPARM_REGS (SImode) - pushregs)
6465 == 1))
6467 current_function_args_info.call_cookie
6468 &= ~ CALL_COOKIE_INT_REG (NPARM_REGS (SImode)
6469 - pushregs, 1);
6470 pushregs++;
6473 if (pushregs == NPARM_REGS (SImode))
6474 current_function_args_info.call_cookie
6475 |= (CALL_COOKIE_INT_REG (0, 1)
6476 | CALL_COOKIE_STACKSEQ (pushregs - 1));
6477 else
6478 current_function_args_info.call_cookie
6479 |= CALL_COOKIE_STACKSEQ (pushregs);
6481 current_function_pretend_args_size += 8 * n_intregs;
6483 if (TARGET_SHCOMPACT)
6484 return const0_rtx;
6487 if (! TARGET_SH2E && ! TARGET_SH4 && ! TARGET_SH5)
6489 error ("__builtin_saveregs not supported by this subtarget");
6490 return const0_rtx;
6493 if (TARGET_SHMEDIA)
6494 n_floatregs = 0;
6496 /* Allocate block of memory for the regs. */
6497 /* ??? If n_intregs + n_floatregs == 0, should we allocate at least 1 byte?
6498 Or can assign_stack_local accept a 0 SIZE argument? */
6499 bufsize = (n_intregs * UNITS_PER_WORD) + (n_floatregs * UNITS_PER_WORD);
6501 if (TARGET_SHMEDIA)
6502 regbuf = gen_frame_mem (BLKmode, gen_rtx_REG (Pmode, ARG_POINTER_REGNUM));
6503 else if (n_floatregs & 1)
6505 rtx addr;
6507 regbuf = assign_stack_local (BLKmode, bufsize + UNITS_PER_WORD, 0);
6508 addr = copy_to_mode_reg (Pmode, XEXP (regbuf, 0));
6509 emit_insn (gen_iorsi3 (addr, addr, GEN_INT (UNITS_PER_WORD)));
6510 regbuf = change_address (regbuf, BLKmode, addr);
6512 else if (STACK_BOUNDARY < 64 && TARGET_FPU_DOUBLE && n_floatregs)
6514 rtx addr, mask;
6516 regbuf = assign_stack_local (BLKmode, bufsize + UNITS_PER_WORD, 0);
6517 addr = copy_to_mode_reg (Pmode, plus_constant (XEXP (regbuf, 0), 4));
6518 mask = copy_to_mode_reg (Pmode, GEN_INT (-8));
6519 emit_insn (gen_andsi3 (addr, addr, mask));
6520 regbuf = change_address (regbuf, BLKmode, addr);
6522 else
6523 regbuf = assign_stack_local (BLKmode, bufsize, TARGET_FPU_DOUBLE ? 64 : 0);
6524 alias_set = get_varargs_alias_set ();
6525 set_mem_alias_set (regbuf, alias_set);
6527 /* Save int args.
6528 This is optimized to only save the regs that are necessary. Explicitly
6529 named args need not be saved. */
6530 if (n_intregs > 0)
6531 move_block_from_reg (BASE_ARG_REG (SImode) + first_intreg,
6532 adjust_address (regbuf, BLKmode,
6533 n_floatregs * UNITS_PER_WORD),
6534 n_intregs);
6536 if (TARGET_SHMEDIA)
6537 /* Return the address of the regbuf. */
6538 return XEXP (regbuf, 0);
6540 /* Save float args.
6541 This is optimized to only save the regs that are necessary. Explicitly
6542 named args need not be saved.
6543 We explicitly build a pointer to the buffer because it halves the insn
6544 count when not optimizing (otherwise the pointer is built for each reg
6545 saved).
6546 We emit the moves in reverse order so that we can use predecrement. */
6548 fpregs = copy_to_mode_reg (Pmode,
6549 plus_constant (XEXP (regbuf, 0),
6550 n_floatregs * UNITS_PER_WORD));
6551 if (TARGET_SH4 || TARGET_SH2A_DOUBLE)
6553 rtx mem;
6554 for (regno = NPARM_REGS (DFmode) - 2; regno >= first_floatreg; regno -= 2)
6556 emit_insn (gen_addsi3 (fpregs, fpregs,
6557 GEN_INT (-2 * UNITS_PER_WORD)));
6558 mem = change_address (regbuf, DFmode, fpregs);
6559 emit_move_insn (mem,
6560 gen_rtx_REG (DFmode, BASE_ARG_REG (DFmode) + regno));
6562 regno = first_floatreg;
6563 if (regno & 1)
6565 emit_insn (gen_addsi3 (fpregs, fpregs, GEN_INT (-UNITS_PER_WORD)));
6566 mem = change_address (regbuf, SFmode, fpregs);
6567 emit_move_insn (mem,
6568 gen_rtx_REG (SFmode, BASE_ARG_REG (SFmode) + regno
6569 - (TARGET_LITTLE_ENDIAN != 0)));
6572 else
6573 for (regno = NPARM_REGS (SFmode) - 1; regno >= first_floatreg; regno--)
6575 rtx mem;
6577 emit_insn (gen_addsi3 (fpregs, fpregs, GEN_INT (-UNITS_PER_WORD)));
6578 mem = change_address (regbuf, SFmode, fpregs);
6579 emit_move_insn (mem,
6580 gen_rtx_REG (SFmode, BASE_ARG_REG (SFmode) + regno));
6583 /* Return the address of the regbuf. */
6584 return XEXP (regbuf, 0);
6587 /* Define the `__builtin_va_list' type for the ABI. */
6589 static tree
6590 sh_build_builtin_va_list (void)
6592 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
6593 tree record;
6595 if (TARGET_SH5 || (! TARGET_SH2E && ! TARGET_SH4)
6596 || TARGET_HITACHI || sh_cfun_attr_renesas_p ())
6597 return ptr_type_node;
6599 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
6601 f_next_o = build_decl (FIELD_DECL, get_identifier ("__va_next_o"),
6602 ptr_type_node);
6603 f_next_o_limit = build_decl (FIELD_DECL,
6604 get_identifier ("__va_next_o_limit"),
6605 ptr_type_node);
6606 f_next_fp = build_decl (FIELD_DECL, get_identifier ("__va_next_fp"),
6607 ptr_type_node);
6608 f_next_fp_limit = build_decl (FIELD_DECL,
6609 get_identifier ("__va_next_fp_limit"),
6610 ptr_type_node);
6611 f_next_stack = build_decl (FIELD_DECL, get_identifier ("__va_next_stack"),
6612 ptr_type_node);
6614 DECL_FIELD_CONTEXT (f_next_o) = record;
6615 DECL_FIELD_CONTEXT (f_next_o_limit) = record;
6616 DECL_FIELD_CONTEXT (f_next_fp) = record;
6617 DECL_FIELD_CONTEXT (f_next_fp_limit) = record;
6618 DECL_FIELD_CONTEXT (f_next_stack) = record;
6620 TYPE_FIELDS (record) = f_next_o;
6621 TREE_CHAIN (f_next_o) = f_next_o_limit;
6622 TREE_CHAIN (f_next_o_limit) = f_next_fp;
6623 TREE_CHAIN (f_next_fp) = f_next_fp_limit;
6624 TREE_CHAIN (f_next_fp_limit) = f_next_stack;
6626 layout_type (record);
6628 return record;
6631 /* Implement `va_start' for varargs and stdarg. */
6633 void
6634 sh_va_start (tree valist, rtx nextarg)
6636 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
6637 tree next_o, next_o_limit, next_fp, next_fp_limit, next_stack;
6638 tree t, u;
6639 int nfp, nint;
6641 if (TARGET_SH5)
6643 expand_builtin_saveregs ();
6644 std_expand_builtin_va_start (valist, nextarg);
6645 return;
6648 if ((! TARGET_SH2E && ! TARGET_SH4)
6649 || TARGET_HITACHI || sh_cfun_attr_renesas_p ())
6651 std_expand_builtin_va_start (valist, nextarg);
6652 return;
6655 f_next_o = TYPE_FIELDS (va_list_type_node);
6656 f_next_o_limit = TREE_CHAIN (f_next_o);
6657 f_next_fp = TREE_CHAIN (f_next_o_limit);
6658 f_next_fp_limit = TREE_CHAIN (f_next_fp);
6659 f_next_stack = TREE_CHAIN (f_next_fp_limit);
6661 next_o = build (COMPONENT_REF, TREE_TYPE (f_next_o), valist, f_next_o,
6662 NULL_TREE);
6663 next_o_limit = build (COMPONENT_REF, TREE_TYPE (f_next_o_limit),
6664 valist, f_next_o_limit, NULL_TREE);
6665 next_fp = build (COMPONENT_REF, TREE_TYPE (f_next_fp), valist, f_next_fp,
6666 NULL_TREE);
6667 next_fp_limit = build (COMPONENT_REF, TREE_TYPE (f_next_fp_limit),
6668 valist, f_next_fp_limit, NULL_TREE);
6669 next_stack = build (COMPONENT_REF, TREE_TYPE (f_next_stack),
6670 valist, f_next_stack, NULL_TREE);
6672 /* Call __builtin_saveregs. */
6673 u = make_tree (ptr_type_node, expand_builtin_saveregs ());
6674 t = build (MODIFY_EXPR, ptr_type_node, next_fp, u);
6675 TREE_SIDE_EFFECTS (t) = 1;
6676 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6678 nfp = current_function_args_info.arg_count[SH_ARG_FLOAT];
6679 if (nfp < 8)
6680 nfp = 8 - nfp;
6681 else
6682 nfp = 0;
6683 u = fold (build (PLUS_EXPR, ptr_type_node, u,
6684 build_int_cst (NULL_TREE, UNITS_PER_WORD * nfp)));
6685 t = build (MODIFY_EXPR, ptr_type_node, next_fp_limit, u);
6686 TREE_SIDE_EFFECTS (t) = 1;
6687 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6689 t = build (MODIFY_EXPR, ptr_type_node, next_o, u);
6690 TREE_SIDE_EFFECTS (t) = 1;
6691 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6693 nint = current_function_args_info.arg_count[SH_ARG_INT];
6694 if (nint < 4)
6695 nint = 4 - nint;
6696 else
6697 nint = 0;
6698 u = fold (build (PLUS_EXPR, ptr_type_node, u,
6699 build_int_cst (NULL_TREE, UNITS_PER_WORD * nint)));
6700 t = build (MODIFY_EXPR, ptr_type_node, next_o_limit, u);
6701 TREE_SIDE_EFFECTS (t) = 1;
6702 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6704 u = make_tree (ptr_type_node, nextarg);
6705 t = build (MODIFY_EXPR, ptr_type_node, next_stack, u);
6706 TREE_SIDE_EFFECTS (t) = 1;
6707 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6710 /* TYPE is a RECORD_TYPE. If there is only a single non-zero-sized
6711 member, return it. */
6712 static tree
6713 find_sole_member (tree type)
6715 tree field, member = NULL_TREE;
6717 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
6719 if (TREE_CODE (field) != FIELD_DECL)
6720 continue;
6721 if (!DECL_SIZE (field))
6722 return NULL_TREE;
6723 if (integer_zerop (DECL_SIZE (field)))
6724 continue;
6725 if (member)
6726 return NULL_TREE;
6727 member = field;
6729 return member;
6731 /* Implement `va_arg'. */
6733 static tree
6734 sh_gimplify_va_arg_expr (tree valist, tree type, tree *pre_p,
6735 tree *post_p ATTRIBUTE_UNUSED)
6737 HOST_WIDE_INT size, rsize;
6738 tree tmp, pptr_type_node;
6739 tree addr, lab_over = NULL, result = NULL;
6740 int pass_by_ref = targetm.calls.must_pass_in_stack (TYPE_MODE (type), type);
6742 if (pass_by_ref)
6743 type = build_pointer_type (type);
6745 size = int_size_in_bytes (type);
6746 rsize = (size + UNITS_PER_WORD - 1) & -UNITS_PER_WORD;
6747 pptr_type_node = build_pointer_type (ptr_type_node);
6749 if (! TARGET_SH5 && (TARGET_SH2E || TARGET_SH4)
6750 && ! (TARGET_HITACHI || sh_cfun_attr_renesas_p ()))
6752 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
6753 tree next_o, next_o_limit, next_fp, next_fp_limit, next_stack;
6754 int pass_as_float;
6755 tree lab_false;
6756 tree member;
6758 f_next_o = TYPE_FIELDS (va_list_type_node);
6759 f_next_o_limit = TREE_CHAIN (f_next_o);
6760 f_next_fp = TREE_CHAIN (f_next_o_limit);
6761 f_next_fp_limit = TREE_CHAIN (f_next_fp);
6762 f_next_stack = TREE_CHAIN (f_next_fp_limit);
6764 next_o = build (COMPONENT_REF, TREE_TYPE (f_next_o), valist, f_next_o,
6765 NULL_TREE);
6766 next_o_limit = build (COMPONENT_REF, TREE_TYPE (f_next_o_limit),
6767 valist, f_next_o_limit, NULL_TREE);
6768 next_fp = build (COMPONENT_REF, TREE_TYPE (f_next_fp),
6769 valist, f_next_fp, NULL_TREE);
6770 next_fp_limit = build (COMPONENT_REF, TREE_TYPE (f_next_fp_limit),
6771 valist, f_next_fp_limit, NULL_TREE);
6772 next_stack = build (COMPONENT_REF, TREE_TYPE (f_next_stack),
6773 valist, f_next_stack, NULL_TREE);
6775 /* Structures with a single member with a distinct mode are passed
6776 like their member. This is relevant if the latter has a REAL_TYPE
6777 or COMPLEX_TYPE type. */
6778 while (TREE_CODE (type) == RECORD_TYPE
6779 && (member = find_sole_member (type))
6780 && (TREE_CODE (TREE_TYPE (member)) == REAL_TYPE
6781 || TREE_CODE (TREE_TYPE (member)) == COMPLEX_TYPE
6782 || TREE_CODE (TREE_TYPE (member)) == RECORD_TYPE))
6784 tree field_type = TREE_TYPE (member);
6786 if (TYPE_MODE (type) == TYPE_MODE (field_type))
6787 type = field_type;
6788 else
6790 gcc_assert ((TYPE_ALIGN (type)
6791 < GET_MODE_ALIGNMENT (TYPE_MODE (field_type)))
6792 || (TYPE_ALIGN (type)
6793 > GET_MODE_BITSIZE (TYPE_MODE (field_type))));
6794 break;
6798 if (TARGET_SH4)
6800 pass_as_float = ((TREE_CODE (type) == REAL_TYPE && size <= 8)
6801 || (TREE_CODE (type) == COMPLEX_TYPE
6802 && TREE_CODE (TREE_TYPE (type)) == REAL_TYPE
6803 && size <= 16));
6805 else
6807 pass_as_float = (TREE_CODE (type) == REAL_TYPE && size == 4);
6810 addr = create_tmp_var (pptr_type_node, NULL);
6811 lab_false = create_artificial_label ();
6812 lab_over = create_artificial_label ();
6814 valist = build1 (INDIRECT_REF, ptr_type_node, addr);
6816 if (pass_as_float)
6818 tree next_fp_tmp = create_tmp_var (TREE_TYPE (f_next_fp), NULL);
6819 tree cmp;
6820 bool is_double = size == 8 && TREE_CODE (type) == REAL_TYPE;
6822 tmp = build1 (ADDR_EXPR, pptr_type_node, next_fp);
6823 tmp = build2 (MODIFY_EXPR, void_type_node, addr, tmp);
6824 gimplify_and_add (tmp, pre_p);
6826 tmp = build2 (MODIFY_EXPR, ptr_type_node, next_fp_tmp, valist);
6827 gimplify_and_add (tmp, pre_p);
6828 tmp = next_fp_limit;
6829 if (size > 4 && !is_double)
6830 tmp = build2 (PLUS_EXPR, TREE_TYPE (tmp), tmp,
6831 fold_convert (TREE_TYPE (tmp), size_int (4 - size)));
6832 tmp = build (GE_EXPR, boolean_type_node, next_fp_tmp, tmp);
6833 cmp = build (COND_EXPR, void_type_node, tmp,
6834 build (GOTO_EXPR, void_type_node, lab_false),
6835 NULL);
6836 if (!is_double)
6837 gimplify_and_add (cmp, pre_p);
6839 if (TYPE_ALIGN (type) > BITS_PER_WORD || (is_double || size == 16))
6841 tmp = fold_convert (ptr_type_node, size_int (UNITS_PER_WORD));
6842 tmp = build (BIT_AND_EXPR, ptr_type_node, next_fp_tmp, tmp);
6843 tmp = build (PLUS_EXPR, ptr_type_node, next_fp_tmp, tmp);
6844 tmp = build (MODIFY_EXPR, ptr_type_node, next_fp_tmp, tmp);
6845 gimplify_and_add (tmp, pre_p);
6847 if (is_double)
6848 gimplify_and_add (cmp, pre_p);
6850 #ifdef FUNCTION_ARG_SCmode_WART
6851 if (TYPE_MODE (type) == SCmode && TARGET_SH4 && TARGET_LITTLE_ENDIAN)
6853 tree subtype = TREE_TYPE (type);
6854 tree real, imag;
6856 imag
6857 = std_gimplify_va_arg_expr (next_fp_tmp, subtype, pre_p, NULL);
6858 imag = get_initialized_tmp_var (imag, pre_p, NULL);
6860 real
6861 = std_gimplify_va_arg_expr (next_fp_tmp, subtype, pre_p, NULL);
6862 real = get_initialized_tmp_var (real, pre_p, NULL);
6864 result = build (COMPLEX_EXPR, type, real, imag);
6865 result = get_initialized_tmp_var (result, pre_p, NULL);
6867 #endif /* FUNCTION_ARG_SCmode_WART */
6869 tmp = build (GOTO_EXPR, void_type_node, lab_over);
6870 gimplify_and_add (tmp, pre_p);
6872 tmp = build (LABEL_EXPR, void_type_node, lab_false);
6873 gimplify_and_add (tmp, pre_p);
6875 tmp = build1 (ADDR_EXPR, pptr_type_node, next_stack);
6876 tmp = build (MODIFY_EXPR, void_type_node, addr, tmp);
6877 gimplify_and_add (tmp, pre_p);
6878 tmp = build2 (MODIFY_EXPR, ptr_type_node, next_fp_tmp, valist);
6879 gimplify_and_add (tmp, pre_p);
6881 tmp = build2 (MODIFY_EXPR, ptr_type_node, valist, next_fp_tmp);
6882 gimplify_and_add (tmp, post_p);
6883 valist = next_fp_tmp;
6885 else
6887 tmp = fold_convert (ptr_type_node, size_int (rsize));
6888 tmp = build (PLUS_EXPR, ptr_type_node, next_o, tmp);
6889 tmp = build (GT_EXPR, boolean_type_node, tmp, next_o_limit);
6890 tmp = build (COND_EXPR, void_type_node, tmp,
6891 build (GOTO_EXPR, void_type_node, lab_false),
6892 NULL);
6893 gimplify_and_add (tmp, pre_p);
6895 tmp = build1 (ADDR_EXPR, pptr_type_node, next_o);
6896 tmp = build (MODIFY_EXPR, void_type_node, addr, tmp);
6897 gimplify_and_add (tmp, pre_p);
6899 tmp = build (GOTO_EXPR, void_type_node, lab_over);
6900 gimplify_and_add (tmp, pre_p);
6902 tmp = build (LABEL_EXPR, void_type_node, lab_false);
6903 gimplify_and_add (tmp, pre_p);
6905 if (size > 4 && ! TARGET_SH4)
6907 tmp = build (MODIFY_EXPR, ptr_type_node, next_o, next_o_limit);
6908 gimplify_and_add (tmp, pre_p);
6911 tmp = build1 (ADDR_EXPR, pptr_type_node, next_stack);
6912 tmp = build (MODIFY_EXPR, void_type_node, addr, tmp);
6913 gimplify_and_add (tmp, pre_p);
6916 if (!result)
6918 tmp = build (LABEL_EXPR, void_type_node, lab_over);
6919 gimplify_and_add (tmp, pre_p);
6923 /* ??? In va-sh.h, there had been code to make values larger than
6924 size 8 indirect. This does not match the FUNCTION_ARG macros. */
6926 tmp = std_gimplify_va_arg_expr (valist, type, pre_p, NULL);
6927 if (result)
6929 tmp = build (MODIFY_EXPR, void_type_node, result, tmp);
6930 gimplify_and_add (tmp, pre_p);
6932 tmp = build (LABEL_EXPR, void_type_node, lab_over);
6933 gimplify_and_add (tmp, pre_p);
6935 else
6936 result = tmp;
6938 if (pass_by_ref)
6939 result = build_va_arg_indirect_ref (result);
6941 return result;
6944 bool
6945 sh_promote_prototypes (tree type)
6947 if (TARGET_HITACHI)
6948 return 0;
6949 if (! type)
6950 return 1;
6951 return ! sh_attr_renesas_p (type);
6954 /* Whether an argument must be passed by reference. On SHcompact, we
6955 pretend arguments wider than 32-bits that would have been passed in
6956 registers are passed by reference, so that an SHmedia trampoline
6957 loads them into the full 64-bits registers. */
6959 static int
6960 shcompact_byref (CUMULATIVE_ARGS *cum, enum machine_mode mode,
6961 tree type, bool named)
6963 unsigned HOST_WIDE_INT size;
6965 if (type)
6966 size = int_size_in_bytes (type);
6967 else
6968 size = GET_MODE_SIZE (mode);
6970 if (cum->arg_count[SH_ARG_INT] < NPARM_REGS (SImode)
6971 && (!named
6972 || GET_SH_ARG_CLASS (mode) == SH_ARG_INT
6973 || (GET_SH_ARG_CLASS (mode) == SH_ARG_FLOAT
6974 && cum->arg_count[SH_ARG_FLOAT] >= NPARM_REGS (SFmode)))
6975 && size > 4
6976 && !SHCOMPACT_FORCE_ON_STACK (mode, type)
6977 && !SH5_WOULD_BE_PARTIAL_NREGS (*cum, mode, type, named))
6978 return size;
6979 else
6980 return 0;
6983 static bool
6984 sh_pass_by_reference (CUMULATIVE_ARGS *cum, enum machine_mode mode,
6985 tree type, bool named)
6987 if (targetm.calls.must_pass_in_stack (mode, type))
6988 return true;
6990 /* ??? std_gimplify_va_arg_expr passes NULL for cum. That function
6991 wants to know about pass-by-reference semantics for incoming
6992 arguments. */
6993 if (! cum)
6994 return false;
6996 if (TARGET_SHCOMPACT)
6998 cum->byref = shcompact_byref (cum, mode, type, named);
6999 return cum->byref != 0;
7002 return false;
7005 static bool
7006 sh_callee_copies (CUMULATIVE_ARGS *cum, enum machine_mode mode,
7007 tree type, bool named ATTRIBUTE_UNUSED)
7009 /* ??? How can it possibly be correct to return true only on the
7010 caller side of the equation? Is there someplace else in the
7011 sh backend that's magically producing the copies? */
7012 return (cum->outgoing
7013 && ((mode == BLKmode ? TYPE_ALIGN (type) : GET_MODE_ALIGNMENT (mode))
7014 % SH_MIN_ALIGN_FOR_CALLEE_COPY == 0));
7017 static int
7018 sh_arg_partial_bytes (CUMULATIVE_ARGS *cum, enum machine_mode mode,
7019 tree type, bool named ATTRIBUTE_UNUSED)
7021 int words = 0;
7023 if (!TARGET_SH5
7024 && PASS_IN_REG_P (*cum, mode, type)
7025 && !(TARGET_SH4 || TARGET_SH2A_DOUBLE)
7026 && (ROUND_REG (*cum, mode)
7027 + (mode != BLKmode
7028 ? ROUND_ADVANCE (GET_MODE_SIZE (mode))
7029 : ROUND_ADVANCE (int_size_in_bytes (type)))
7030 > NPARM_REGS (mode)))
7031 words = NPARM_REGS (mode) - ROUND_REG (*cum, mode);
7033 else if (!TARGET_SHCOMPACT
7034 && SH5_WOULD_BE_PARTIAL_NREGS (*cum, mode, type, named))
7035 words = NPARM_REGS (SImode) - cum->arg_count[SH_ARG_INT];
7037 return words * UNITS_PER_WORD;
7041 /* Define where to put the arguments to a function.
7042 Value is zero to push the argument on the stack,
7043 or a hard register in which to store the argument.
7045 MODE is the argument's machine mode.
7046 TYPE is the data type of the argument (as a tree).
7047 This is null for libcalls where that information may
7048 not be available.
7049 CUM is a variable of type CUMULATIVE_ARGS which gives info about
7050 the preceding args and about the function being called.
7051 NAMED is nonzero if this argument is a named parameter
7052 (otherwise it is an extra parameter matching an ellipsis).
7054 On SH the first args are normally in registers
7055 and the rest are pushed. Any arg that starts within the first
7056 NPARM_REGS words is at least partially passed in a register unless
7057 its data type forbids. */
7061 sh_function_arg (CUMULATIVE_ARGS *ca, enum machine_mode mode,
7062 tree type, int named)
7064 if (! TARGET_SH5 && mode == VOIDmode)
7065 return GEN_INT (ca->renesas_abi ? 1 : 0);
7067 if (! TARGET_SH5
7068 && PASS_IN_REG_P (*ca, mode, type)
7069 && (named || ! (TARGET_HITACHI || ca->renesas_abi)))
7071 int regno;
7073 if (mode == SCmode && TARGET_SH4 && TARGET_LITTLE_ENDIAN
7074 && (! FUNCTION_ARG_SCmode_WART || (ROUND_REG (*ca, mode) & 1)))
7076 rtx r1 = gen_rtx_EXPR_LIST (VOIDmode,
7077 gen_rtx_REG (SFmode,
7078 BASE_ARG_REG (mode)
7079 + (ROUND_REG (*ca, mode) ^ 1)),
7080 const0_rtx);
7081 rtx r2 = gen_rtx_EXPR_LIST (VOIDmode,
7082 gen_rtx_REG (SFmode,
7083 BASE_ARG_REG (mode)
7084 + ((ROUND_REG (*ca, mode) + 1) ^ 1)),
7085 GEN_INT (4));
7086 return gen_rtx_PARALLEL(SCmode, gen_rtvec(2, r1, r2));
7089 /* If the alignment of a DF value causes an SF register to be
7090 skipped, we will use that skipped register for the next SF
7091 value. */
7092 if ((TARGET_HITACHI || ca->renesas_abi)
7093 && ca->free_single_fp_reg
7094 && mode == SFmode)
7095 return gen_rtx_REG (mode, ca->free_single_fp_reg);
7097 regno = (BASE_ARG_REG (mode) + ROUND_REG (*ca, mode))
7098 ^ (mode == SFmode && TARGET_SH4
7099 && TARGET_LITTLE_ENDIAN != 0
7100 && ! TARGET_HITACHI && ! ca->renesas_abi);
7101 return gen_rtx_REG (mode, regno);
7105 if (TARGET_SH5)
7107 if (mode == VOIDmode && TARGET_SHCOMPACT)
7108 return GEN_INT (ca->call_cookie);
7110 /* The following test assumes unnamed arguments are promoted to
7111 DFmode. */
7112 if (mode == SFmode && ca->free_single_fp_reg)
7113 return SH5_PROTOTYPED_FLOAT_ARG (*ca, mode, ca->free_single_fp_reg);
7115 if ((GET_SH_ARG_CLASS (mode) == SH_ARG_FLOAT)
7116 && (named || ! ca->prototype_p)
7117 && ca->arg_count[(int) SH_ARG_FLOAT] < NPARM_REGS (SFmode))
7119 if (! ca->prototype_p && TARGET_SHMEDIA)
7120 return SH5_PROTOTYPELESS_FLOAT_ARG (*ca, mode);
7122 return SH5_PROTOTYPED_FLOAT_ARG (*ca, mode,
7123 FIRST_FP_PARM_REG
7124 + ca->arg_count[(int) SH_ARG_FLOAT]);
7127 if (ca->arg_count[(int) SH_ARG_INT] < NPARM_REGS (SImode)
7128 && (! TARGET_SHCOMPACT
7129 || (! SHCOMPACT_FORCE_ON_STACK (mode, type)
7130 && ! SH5_WOULD_BE_PARTIAL_NREGS (*ca, mode,
7131 type, named))))
7133 return gen_rtx_REG (mode, (FIRST_PARM_REG
7134 + ca->arg_count[(int) SH_ARG_INT]));
7137 return 0;
7140 return 0;
7143 /* Update the data in CUM to advance over an argument
7144 of mode MODE and data type TYPE.
7145 (TYPE is null for libcalls where that information may not be
7146 available.) */
7148 void
7149 sh_function_arg_advance (CUMULATIVE_ARGS *ca, enum machine_mode mode,
7150 tree type, int named)
7152 if (ca->force_mem)
7153 ca->force_mem = 0;
7154 else if (TARGET_SH5)
7156 tree type2 = (ca->byref && type
7157 ? TREE_TYPE (type)
7158 : type);
7159 enum machine_mode mode2 = (ca->byref && type
7160 ? TYPE_MODE (type2)
7161 : mode);
7162 int dwords = ((ca->byref
7163 ? ca->byref
7164 : mode2 == BLKmode
7165 ? int_size_in_bytes (type2)
7166 : GET_MODE_SIZE (mode2)) + 7) / 8;
7167 int numregs = MIN (dwords, NPARM_REGS (SImode)
7168 - ca->arg_count[(int) SH_ARG_INT]);
7170 if (numregs)
7172 ca->arg_count[(int) SH_ARG_INT] += numregs;
7173 if (TARGET_SHCOMPACT
7174 && SHCOMPACT_FORCE_ON_STACK (mode2, type2))
7176 ca->call_cookie
7177 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
7178 - numregs, 1);
7179 /* N.B. We want this also for outgoing. */
7180 ca->stack_regs += numregs;
7182 else if (ca->byref)
7184 if (! ca->outgoing)
7185 ca->stack_regs += numregs;
7186 ca->byref_regs += numregs;
7187 ca->byref = 0;
7189 ca->call_cookie
7190 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
7191 - numregs, 2);
7192 while (--numregs);
7193 ca->call_cookie
7194 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
7195 - 1, 1);
7197 else if (dwords > numregs)
7199 int pushregs = numregs;
7201 if (TARGET_SHCOMPACT)
7202 ca->stack_regs += numregs;
7203 while (pushregs < NPARM_REGS (SImode) - 1
7204 && (CALL_COOKIE_INT_REG_GET
7205 (ca->call_cookie,
7206 NPARM_REGS (SImode) - pushregs)
7207 == 1))
7209 ca->call_cookie
7210 &= ~ CALL_COOKIE_INT_REG (NPARM_REGS (SImode)
7211 - pushregs, 1);
7212 pushregs++;
7214 if (numregs == NPARM_REGS (SImode))
7215 ca->call_cookie
7216 |= CALL_COOKIE_INT_REG (0, 1)
7217 | CALL_COOKIE_STACKSEQ (numregs - 1);
7218 else
7219 ca->call_cookie
7220 |= CALL_COOKIE_STACKSEQ (numregs);
7223 if (GET_SH_ARG_CLASS (mode2) == SH_ARG_FLOAT
7224 && (named || ! ca->prototype_p))
7226 if (mode2 == SFmode && ca->free_single_fp_reg)
7227 ca->free_single_fp_reg = 0;
7228 else if (ca->arg_count[(int) SH_ARG_FLOAT]
7229 < NPARM_REGS (SFmode))
7231 int numfpregs
7232 = MIN ((GET_MODE_SIZE (mode2) + 7) / 8 * 2,
7233 NPARM_REGS (SFmode)
7234 - ca->arg_count[(int) SH_ARG_FLOAT]);
7236 ca->arg_count[(int) SH_ARG_FLOAT] += numfpregs;
7238 if (TARGET_SHCOMPACT && ! ca->prototype_p)
7240 if (ca->outgoing && numregs > 0)
7243 ca->call_cookie
7244 |= (CALL_COOKIE_INT_REG
7245 (ca->arg_count[(int) SH_ARG_INT]
7246 - numregs + ((numfpregs - 2) / 2),
7247 4 + (ca->arg_count[(int) SH_ARG_FLOAT]
7248 - numfpregs) / 2));
7250 while (numfpregs -= 2);
7252 else if (mode2 == SFmode && (named)
7253 && (ca->arg_count[(int) SH_ARG_FLOAT]
7254 < NPARM_REGS (SFmode)))
7255 ca->free_single_fp_reg
7256 = FIRST_FP_PARM_REG - numfpregs
7257 + ca->arg_count[(int) SH_ARG_FLOAT] + 1;
7260 return;
7263 if ((TARGET_HITACHI || ca->renesas_abi) && TARGET_FPU_DOUBLE)
7265 /* Note that we've used the skipped register. */
7266 if (mode == SFmode && ca->free_single_fp_reg)
7268 ca->free_single_fp_reg = 0;
7269 return;
7271 /* When we have a DF after an SF, there's an SF register that get
7272 skipped in order to align the DF value. We note this skipped
7273 register, because the next SF value will use it, and not the
7274 SF that follows the DF. */
7275 if (mode == DFmode
7276 && ROUND_REG (*ca, DFmode) != ROUND_REG (*ca, SFmode))
7278 ca->free_single_fp_reg = (ROUND_REG (*ca, SFmode)
7279 + BASE_ARG_REG (mode));
7283 if (! ((TARGET_SH4 || TARGET_SH2A) || ca->renesas_abi)
7284 || PASS_IN_REG_P (*ca, mode, type))
7285 (ca->arg_count[(int) GET_SH_ARG_CLASS (mode)]
7286 = (ROUND_REG (*ca, mode)
7287 + (mode == BLKmode
7288 ? ROUND_ADVANCE (int_size_in_bytes (type))
7289 : ROUND_ADVANCE (GET_MODE_SIZE (mode)))));
7292 /* The Renesas calling convention doesn't quite fit into this scheme since
7293 the address is passed like an invisible argument, but one that is always
7294 passed in memory. */
7295 static rtx
7296 sh_struct_value_rtx (tree fndecl, int incoming ATTRIBUTE_UNUSED)
7298 if (TARGET_HITACHI || sh_attr_renesas_p (fndecl))
7299 return 0;
7300 return gen_rtx_REG (Pmode, 2);
7303 /* Worker function for TARGET_RETURN_IN_MEMORY. */
7305 static bool
7306 sh_return_in_memory (tree type, tree fndecl)
7308 if (TARGET_SH5)
7310 if (TYPE_MODE (type) == BLKmode)
7311 return ((unsigned HOST_WIDE_INT) int_size_in_bytes (type)) > 8;
7312 else
7313 return GET_MODE_SIZE (TYPE_MODE (type)) > 8;
7315 else
7317 return (TYPE_MODE (type) == BLKmode
7318 || ((TARGET_HITACHI || sh_attr_renesas_p (fndecl))
7319 && TREE_CODE (type) == RECORD_TYPE));
7323 /* We actually emit the code in sh_expand_prologue. We used to use
7324 a static variable to flag that we need to emit this code, but that
7325 doesn't when inlining, when functions are deferred and then emitted
7326 later. Fortunately, we already have two flags that are part of struct
7327 function that tell if a function uses varargs or stdarg. */
7328 static void
7329 sh_setup_incoming_varargs (CUMULATIVE_ARGS *ca,
7330 enum machine_mode mode,
7331 tree type,
7332 int *pretend_arg_size,
7333 int second_time ATTRIBUTE_UNUSED)
7335 gcc_assert (current_function_stdarg);
7336 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl))
7338 int named_parm_regs, anon_parm_regs;
7340 named_parm_regs = (ROUND_REG (*ca, mode)
7341 + (mode == BLKmode
7342 ? ROUND_ADVANCE (int_size_in_bytes (type))
7343 : ROUND_ADVANCE (GET_MODE_SIZE (mode))));
7344 anon_parm_regs = NPARM_REGS (SImode) - named_parm_regs;
7345 if (anon_parm_regs > 0)
7346 *pretend_arg_size = anon_parm_regs * 4;
7350 static bool
7351 sh_strict_argument_naming (CUMULATIVE_ARGS *ca ATTRIBUTE_UNUSED)
7353 return TARGET_SH5;
7356 static bool
7357 sh_pretend_outgoing_varargs_named (CUMULATIVE_ARGS *ca)
7359 return ! (TARGET_HITACHI || ca->renesas_abi) && ! TARGET_SH5;
7363 /* Define the offset between two registers, one to be eliminated, and
7364 the other its replacement, at the start of a routine. */
7367 initial_elimination_offset (int from, int to)
7369 int regs_saved;
7370 int regs_saved_rounding = 0;
7371 int total_saved_regs_space;
7372 int total_auto_space;
7373 int save_flags = target_flags;
7374 int copy_flags;
7375 HARD_REG_SET live_regs_mask;
7377 shmedia_space_reserved_for_target_registers = false;
7378 regs_saved = calc_live_regs (&live_regs_mask);
7379 regs_saved += SHMEDIA_REGS_STACK_ADJUST ();
7381 if (shmedia_reserve_space_for_target_registers_p (regs_saved, &live_regs_mask))
7383 shmedia_space_reserved_for_target_registers = true;
7384 regs_saved += shmedia_target_regs_stack_adjust (&live_regs_mask);
7387 if (TARGET_SH5 && regs_saved % (STACK_BOUNDARY / BITS_PER_UNIT))
7388 regs_saved_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
7389 - regs_saved % (STACK_BOUNDARY / BITS_PER_UNIT));
7391 total_auto_space = rounded_frame_size (regs_saved) - regs_saved_rounding;
7392 copy_flags = target_flags;
7393 target_flags = save_flags;
7395 total_saved_regs_space = regs_saved + regs_saved_rounding;
7397 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
7398 return total_saved_regs_space + total_auto_space
7399 + current_function_args_info.byref_regs * 8;
7401 if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
7402 return total_saved_regs_space + total_auto_space
7403 + current_function_args_info.byref_regs * 8;
7405 /* Initial gap between fp and sp is 0. */
7406 if (from == HARD_FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
7407 return 0;
7409 if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
7410 return rounded_frame_size (0);
7412 if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
7413 return rounded_frame_size (0);
7415 gcc_assert (from == RETURN_ADDRESS_POINTER_REGNUM
7416 && (to == HARD_FRAME_POINTER_REGNUM
7417 || to == STACK_POINTER_REGNUM));
7418 if (TARGET_SH5)
7420 int n = total_saved_regs_space;
7421 int pr_reg = TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG;
7422 save_schedule schedule;
7423 save_entry *entry;
7425 n += total_auto_space;
7427 /* If it wasn't saved, there's not much we can do. */
7428 if (! TEST_HARD_REG_BIT (live_regs_mask, pr_reg))
7429 return n;
7431 target_flags = copy_flags;
7433 sh5_schedule_saves (&live_regs_mask, &schedule, n);
7434 for (entry = &schedule.entries[1]; entry->mode != VOIDmode; entry++)
7435 if (entry->reg == pr_reg)
7437 target_flags = save_flags;
7438 return entry->offset;
7440 gcc_unreachable ();
7442 else
7443 return total_auto_space;
7446 /* Handle machine specific pragmas to be semi-compatible with Renesas
7447 compiler. */
7449 void
7450 sh_pr_interrupt (struct cpp_reader *pfile ATTRIBUTE_UNUSED)
7452 pragma_interrupt = 1;
7455 void
7456 sh_pr_trapa (struct cpp_reader *pfile ATTRIBUTE_UNUSED)
7458 pragma_interrupt = pragma_trapa = 1;
7461 void
7462 sh_pr_nosave_low_regs (struct cpp_reader *pfile ATTRIBUTE_UNUSED)
7464 pragma_nosave_low_regs = 1;
7467 /* Generate 'handle_interrupt' attribute for decls */
7469 static void
7470 sh_insert_attributes (tree node, tree *attributes)
7472 if (! pragma_interrupt
7473 || TREE_CODE (node) != FUNCTION_DECL)
7474 return;
7476 /* We are only interested in fields. */
7477 if (!DECL_P (node))
7478 return;
7480 /* Add a 'handle_interrupt' attribute. */
7481 * attributes = tree_cons (get_identifier ("interrupt_handler"), NULL, * attributes);
7483 return;
7486 /* Supported attributes:
7488 interrupt_handler -- specifies this function is an interrupt handler.
7490 sp_switch -- specifies an alternate stack for an interrupt handler
7491 to run on.
7493 trap_exit -- use a trapa to exit an interrupt function instead of
7494 an rte instruction.
7496 renesas -- use Renesas calling/layout conventions (functions and
7497 structures).
7501 const struct attribute_spec sh_attribute_table[] =
7503 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
7504 { "interrupt_handler", 0, 0, true, false, false, sh_handle_interrupt_handler_attribute },
7505 { "sp_switch", 1, 1, true, false, false, sh_handle_sp_switch_attribute },
7506 { "trap_exit", 1, 1, true, false, false, sh_handle_trap_exit_attribute },
7507 { "renesas", 0, 0, false, true, false, sh_handle_renesas_attribute },
7508 #ifdef SYMBIAN
7509 /* Symbian support adds three new attributes:
7510 dllexport - for exporting a function/variable that will live in a dll
7511 dllimport - for importing a function/variable from a dll
7513 Microsoft allows multiple declspecs in one __declspec, separating
7514 them with spaces. We do NOT support this. Instead, use __declspec
7515 multiple times. */
7516 { "dllimport", 0, 0, true, false, false, sh_symbian_handle_dll_attribute },
7517 { "dllexport", 0, 0, true, false, false, sh_symbian_handle_dll_attribute },
7518 #endif
7519 { NULL, 0, 0, false, false, false, NULL }
7522 /* Handle an "interrupt_handler" attribute; arguments as in
7523 struct attribute_spec.handler. */
7524 static tree
7525 sh_handle_interrupt_handler_attribute (tree *node, tree name,
7526 tree args ATTRIBUTE_UNUSED,
7527 int flags ATTRIBUTE_UNUSED,
7528 bool *no_add_attrs)
7530 if (TREE_CODE (*node) != FUNCTION_DECL)
7532 warning (OPT_Wattributes, "%qs attribute only applies to functions",
7533 IDENTIFIER_POINTER (name));
7534 *no_add_attrs = true;
7536 else if (TARGET_SHCOMPACT)
7538 error ("attribute interrupt_handler is not compatible with -m5-compact");
7539 *no_add_attrs = true;
7542 return NULL_TREE;
7545 /* Handle an "sp_switch" attribute; arguments as in
7546 struct attribute_spec.handler. */
7547 static tree
7548 sh_handle_sp_switch_attribute (tree *node, tree name, tree args,
7549 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
7551 if (TREE_CODE (*node) != FUNCTION_DECL)
7553 warning (OPT_Wattributes, "%qs attribute only applies to functions",
7554 IDENTIFIER_POINTER (name));
7555 *no_add_attrs = true;
7557 else if (!pragma_interrupt)
7559 /* The sp_switch attribute only has meaning for interrupt functions. */
7560 warning (OPT_Wattributes, "%qs attribute only applies to "
7561 "interrupt functions", IDENTIFIER_POINTER (name));
7562 *no_add_attrs = true;
7564 else if (TREE_CODE (TREE_VALUE (args)) != STRING_CST)
7566 /* The argument must be a constant string. */
7567 warning (OPT_Wattributes, "%qs attribute argument not a string constant",
7568 IDENTIFIER_POINTER (name));
7569 *no_add_attrs = true;
7571 else
7573 const char *s = ggc_strdup (TREE_STRING_POINTER (TREE_VALUE (args)));
7574 sp_switch = gen_rtx_SYMBOL_REF (VOIDmode, s);
7577 return NULL_TREE;
7580 /* Handle an "trap_exit" attribute; arguments as in
7581 struct attribute_spec.handler. */
7582 static tree
7583 sh_handle_trap_exit_attribute (tree *node, tree name, tree args,
7584 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
7586 if (TREE_CODE (*node) != FUNCTION_DECL)
7588 warning (OPT_Wattributes, "%qs attribute only applies to functions",
7589 IDENTIFIER_POINTER (name));
7590 *no_add_attrs = true;
7592 else if (!pragma_interrupt)
7594 /* The trap_exit attribute only has meaning for interrupt functions. */
7595 warning (OPT_Wattributes, "%qs attribute only applies to "
7596 "interrupt functions", IDENTIFIER_POINTER (name));
7597 *no_add_attrs = true;
7599 else if (TREE_CODE (TREE_VALUE (args)) != INTEGER_CST)
7601 /* The argument must be a constant integer. */
7602 warning (OPT_Wattributes, "%qs attribute argument not an "
7603 "integer constant", IDENTIFIER_POINTER (name));
7604 *no_add_attrs = true;
7606 else
7608 trap_exit = TREE_INT_CST_LOW (TREE_VALUE (args));
7611 return NULL_TREE;
7614 static tree
7615 sh_handle_renesas_attribute (tree *node ATTRIBUTE_UNUSED,
7616 tree name ATTRIBUTE_UNUSED,
7617 tree args ATTRIBUTE_UNUSED,
7618 int flags ATTRIBUTE_UNUSED,
7619 bool *no_add_attrs ATTRIBUTE_UNUSED)
7621 return NULL_TREE;
7624 /* True if __attribute__((renesas)) or -mrenesas. */
7626 sh_attr_renesas_p (tree td)
7628 if (TARGET_HITACHI)
7629 return 1;
7630 if (td == 0)
7631 return 0;
7632 if (DECL_P (td))
7633 td = TREE_TYPE (td);
7634 if (td == error_mark_node)
7635 return 0;
7636 return (lookup_attribute ("renesas", TYPE_ATTRIBUTES (td))
7637 != NULL_TREE);
7640 /* True if __attribute__((renesas)) or -mrenesas, for the current
7641 function. */
7643 sh_cfun_attr_renesas_p (void)
7645 return sh_attr_renesas_p (current_function_decl);
7649 sh_cfun_interrupt_handler_p (void)
7651 return (lookup_attribute ("interrupt_handler",
7652 DECL_ATTRIBUTES (current_function_decl))
7653 != NULL_TREE);
7656 /* Implement TARGET_CHECK_PCH_TARGET_FLAGS. */
7658 static const char *
7659 sh_check_pch_target_flags (int old_flags)
7661 if ((old_flags ^ target_flags) & (MASK_SH1 | MASK_SH2 | MASK_SH3
7662 | MASK_SH_E | MASK_HARD_SH4
7663 | MASK_FPU_SINGLE | MASK_SH4))
7664 return _("created and used with different architectures / ABIs");
7665 if ((old_flags ^ target_flags) & MASK_HITACHI)
7666 return _("created and used with different ABIs");
7667 if ((old_flags ^ target_flags) & MASK_LITTLE_ENDIAN)
7668 return _("created and used with different endianness");
7669 return NULL;
7672 /* Predicates used by the templates. */
7674 /* Returns 1 if OP is MACL, MACH or PR. The input must be a REG rtx.
7675 Used only in general_movsrc_operand. */
7678 system_reg_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
7680 switch (REGNO (op))
7682 case PR_REG:
7683 case MACL_REG:
7684 case MACH_REG:
7685 return 1;
7687 return 0;
7690 /* Nonzero if OP is a floating point value with value 0.0. */
7693 fp_zero_operand (rtx op)
7695 REAL_VALUE_TYPE r;
7697 if (GET_MODE (op) != SFmode)
7698 return 0;
7700 REAL_VALUE_FROM_CONST_DOUBLE (r, op);
7701 return REAL_VALUES_EQUAL (r, dconst0) && ! REAL_VALUE_MINUS_ZERO (r);
7704 /* Nonzero if OP is a floating point value with value 1.0. */
7707 fp_one_operand (rtx op)
7709 REAL_VALUE_TYPE r;
7711 if (GET_MODE (op) != SFmode)
7712 return 0;
7714 REAL_VALUE_FROM_CONST_DOUBLE (r, op);
7715 return REAL_VALUES_EQUAL (r, dconst1);
7718 /* For -m4 and -m4-single-only, mode switching is used. If we are
7719 compiling without -mfmovd, movsf_ie isn't taken into account for
7720 mode switching. We could check in machine_dependent_reorg for
7721 cases where we know we are in single precision mode, but there is
7722 interface to find that out during reload, so we must avoid
7723 choosing an fldi alternative during reload and thus failing to
7724 allocate a scratch register for the constant loading. */
7726 fldi_ok (void)
7728 return ! TARGET_SH4 || TARGET_FMOVD || reload_completed;
7732 tertiary_reload_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
7734 enum rtx_code code = GET_CODE (op);
7735 return code == MEM || (TARGET_SH4 && code == CONST_DOUBLE);
7738 /* Return the TLS type for TLS symbols, 0 for otherwise. */
7740 tls_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
7742 if (GET_CODE (op) != SYMBOL_REF)
7743 return 0;
7744 return SYMBOL_REF_TLS_MODEL (op);
7747 /* Return the destination address of a branch. */
7749 static int
7750 branch_dest (rtx branch)
7752 rtx dest = SET_SRC (PATTERN (branch));
7753 int dest_uid;
7755 if (GET_CODE (dest) == IF_THEN_ELSE)
7756 dest = XEXP (dest, 1);
7757 dest = XEXP (dest, 0);
7758 dest_uid = INSN_UID (dest);
7759 return INSN_ADDRESSES (dest_uid);
7762 /* Return nonzero if REG is not used after INSN.
7763 We assume REG is a reload reg, and therefore does
7764 not live past labels. It may live past calls or jumps though. */
7766 reg_unused_after (rtx reg, rtx insn)
7768 enum rtx_code code;
7769 rtx set;
7771 /* If the reg is set by this instruction, then it is safe for our
7772 case. Disregard the case where this is a store to memory, since
7773 we are checking a register used in the store address. */
7774 set = single_set (insn);
7775 if (set && GET_CODE (SET_DEST (set)) != MEM
7776 && reg_overlap_mentioned_p (reg, SET_DEST (set)))
7777 return 1;
7779 while ((insn = NEXT_INSN (insn)))
7781 rtx set;
7782 if (!INSN_P (insn))
7783 continue;
7785 code = GET_CODE (insn);
7787 #if 0
7788 /* If this is a label that existed before reload, then the register
7789 if dead here. However, if this is a label added by reorg, then
7790 the register may still be live here. We can't tell the difference,
7791 so we just ignore labels completely. */
7792 if (code == CODE_LABEL)
7793 return 1;
7794 /* else */
7795 #endif
7797 if (code == JUMP_INSN)
7798 return 0;
7800 /* If this is a sequence, we must handle them all at once.
7801 We could have for instance a call that sets the target register,
7802 and an insn in a delay slot that uses the register. In this case,
7803 we must return 0. */
7804 else if (code == INSN && GET_CODE (PATTERN (insn)) == SEQUENCE)
7806 int i;
7807 int retval = 0;
7809 for (i = 0; i < XVECLEN (PATTERN (insn), 0); i++)
7811 rtx this_insn = XVECEXP (PATTERN (insn), 0, i);
7812 rtx set = single_set (this_insn);
7814 if (GET_CODE (this_insn) == CALL_INSN)
7815 code = CALL_INSN;
7816 else if (GET_CODE (this_insn) == JUMP_INSN)
7818 if (INSN_ANNULLED_BRANCH_P (this_insn))
7819 return 0;
7820 code = JUMP_INSN;
7823 if (set && reg_overlap_mentioned_p (reg, SET_SRC (set)))
7824 return 0;
7825 if (set && reg_overlap_mentioned_p (reg, SET_DEST (set)))
7827 if (GET_CODE (SET_DEST (set)) != MEM)
7828 retval = 1;
7829 else
7830 return 0;
7832 if (set == 0
7833 && reg_overlap_mentioned_p (reg, PATTERN (this_insn)))
7834 return 0;
7836 if (retval == 1)
7837 return 1;
7838 else if (code == JUMP_INSN)
7839 return 0;
7842 set = single_set (insn);
7843 if (set && reg_overlap_mentioned_p (reg, SET_SRC (set)))
7844 return 0;
7845 if (set && reg_overlap_mentioned_p (reg, SET_DEST (set)))
7846 return GET_CODE (SET_DEST (set)) != MEM;
7847 if (set == 0 && reg_overlap_mentioned_p (reg, PATTERN (insn)))
7848 return 0;
7850 if (code == CALL_INSN && call_really_used_regs[REGNO (reg)])
7851 return 1;
7853 return 1;
7856 #include "ggc.h"
7858 static GTY(()) rtx fpscr_rtx;
7860 get_fpscr_rtx (void)
7862 if (! fpscr_rtx)
7864 fpscr_rtx = gen_rtx_REG (PSImode, FPSCR_REG);
7865 REG_USERVAR_P (fpscr_rtx) = 1;
7866 mark_user_reg (fpscr_rtx);
7868 if (! reload_completed || mdep_reorg_phase != SH_AFTER_MDEP_REORG)
7869 mark_user_reg (fpscr_rtx);
7870 return fpscr_rtx;
7873 static GTY(()) tree fpscr_values;
7875 static void
7876 emit_fpu_switch (rtx scratch, int index)
7878 rtx dst, src;
7880 if (fpscr_values == NULL)
7882 tree t;
7884 t = build_index_type (integer_one_node);
7885 t = build_array_type (integer_type_node, t);
7886 t = build_decl (VAR_DECL, get_identifier ("__fpscr_values"), t);
7887 DECL_ARTIFICIAL (t) = 1;
7888 DECL_IGNORED_P (t) = 1;
7889 DECL_EXTERNAL (t) = 1;
7890 TREE_STATIC (t) = 1;
7891 TREE_PUBLIC (t) = 1;
7892 TREE_USED (t) = 1;
7894 fpscr_values = t;
7897 src = DECL_RTL (fpscr_values);
7898 if (no_new_pseudos)
7900 emit_move_insn (scratch, XEXP (src, 0));
7901 if (index != 0)
7902 emit_insn (gen_addsi3 (scratch, scratch, GEN_INT (index * 4)));
7903 src = adjust_automodify_address (src, PSImode, scratch, index * 4);
7905 else
7906 src = adjust_address (src, PSImode, index * 4);
7908 dst = get_fpscr_rtx ();
7909 emit_move_insn (dst, src);
7912 void
7913 emit_sf_insn (rtx pat)
7915 emit_insn (pat);
7918 void
7919 emit_df_insn (rtx pat)
7921 emit_insn (pat);
7924 void
7925 expand_sf_unop (rtx (*fun) (rtx, rtx, rtx), rtx *operands)
7927 emit_sf_insn ((*fun) (operands[0], operands[1], get_fpscr_rtx ()));
7930 void
7931 expand_sf_binop (rtx (*fun) (rtx, rtx, rtx, rtx), rtx *operands)
7933 emit_sf_insn ((*fun) (operands[0], operands[1], operands[2],
7934 get_fpscr_rtx ()));
7937 void
7938 expand_df_unop (rtx (*fun) (rtx, rtx, rtx), rtx *operands)
7940 emit_df_insn ((*fun) (operands[0], operands[1], get_fpscr_rtx ()));
7943 void
7944 expand_df_binop (rtx (*fun) (rtx, rtx, rtx, rtx), rtx *operands)
7946 emit_df_insn ((*fun) (operands[0], operands[1], operands[2],
7947 get_fpscr_rtx ()));
7950 /* ??? gcc does flow analysis strictly after common subexpression
7951 elimination. As a result, common subexpression elimination fails
7952 when there are some intervening statements setting the same register.
7953 If we did nothing about this, this would hurt the precision switching
7954 for SH4 badly. There is some cse after reload, but it is unable to
7955 undo the extra register pressure from the unused instructions, and
7956 it cannot remove auto-increment loads.
7958 A C code example that shows this flow/cse weakness for (at least) SH
7959 and sparc (as of gcc ss-970706) is this:
7961 double
7962 f(double a)
7964 double d;
7965 d = 0.1;
7966 a += d;
7967 d = 1.1;
7968 d = 0.1;
7969 a *= d;
7970 return a;
7973 So we add another pass before common subexpression elimination, to
7974 remove assignments that are dead due to a following assignment in the
7975 same basic block. */
7977 static void
7978 mark_use (rtx x, rtx *reg_set_block)
7980 enum rtx_code code;
7982 if (! x)
7983 return;
7984 code = GET_CODE (x);
7985 switch (code)
7987 case REG:
7989 int regno = REGNO (x);
7990 int nregs = (regno < FIRST_PSEUDO_REGISTER
7991 ? HARD_REGNO_NREGS (regno, GET_MODE (x))
7992 : 1);
7995 reg_set_block[regno + nregs - 1] = 0;
7997 while (--nregs);
7998 break;
8000 case SET:
8002 rtx dest = SET_DEST (x);
8004 if (GET_CODE (dest) == SUBREG)
8005 dest = SUBREG_REG (dest);
8006 if (GET_CODE (dest) != REG)
8007 mark_use (dest, reg_set_block);
8008 mark_use (SET_SRC (x), reg_set_block);
8009 break;
8011 case CLOBBER:
8012 break;
8013 default:
8015 const char *fmt = GET_RTX_FORMAT (code);
8016 int i, j;
8017 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
8019 if (fmt[i] == 'e')
8020 mark_use (XEXP (x, i), reg_set_block);
8021 else if (fmt[i] == 'E')
8022 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
8023 mark_use (XVECEXP (x, i, j), reg_set_block);
8025 break;
8030 static rtx get_free_reg (HARD_REG_SET);
8032 /* This function returns a register to use to load the address to load
8033 the fpscr from. Currently it always returns r1 or r7, but when we are
8034 able to use pseudo registers after combine, or have a better mechanism
8035 for choosing a register, it should be done here. */
8036 /* REGS_LIVE is the liveness information for the point for which we
8037 need this allocation. In some bare-bones exit blocks, r1 is live at the
8038 start. We can even have all of r0..r3 being live:
8039 __complex__ long long f (double d) { if (d == 0) return 2; else return 3; }
8040 INSN before which new insns are placed with will clobber the register
8041 we return. If a basic block consists only of setting the return value
8042 register to a pseudo and using that register, the return value is not
8043 live before or after this block, yet we we'll insert our insns right in
8044 the middle. */
8046 static rtx
8047 get_free_reg (HARD_REG_SET regs_live)
8049 if (! TEST_HARD_REG_BIT (regs_live, 1))
8050 return gen_rtx_REG (Pmode, 1);
8052 /* Hard reg 1 is live; since this is a SMALL_REGISTER_CLASSES target,
8053 there shouldn't be anything but a jump before the function end. */
8054 gcc_assert (!TEST_HARD_REG_BIT (regs_live, 7));
8055 return gen_rtx_REG (Pmode, 7);
8058 /* This function will set the fpscr from memory.
8059 MODE is the mode we are setting it to. */
8060 void
8061 fpscr_set_from_mem (int mode, HARD_REG_SET regs_live)
8063 enum attr_fp_mode fp_mode = mode;
8064 enum attr_fp_mode norm_mode = ACTUAL_NORMAL_MODE (FP_MODE);
8065 rtx addr_reg = get_free_reg (regs_live);
8067 emit_fpu_switch (addr_reg, fp_mode == norm_mode);
8070 /* Is the given character a logical line separator for the assembler? */
8071 #ifndef IS_ASM_LOGICAL_LINE_SEPARATOR
8072 #define IS_ASM_LOGICAL_LINE_SEPARATOR(C) ((C) == ';')
8073 #endif
8076 sh_insn_length_adjustment (rtx insn)
8078 /* Instructions with unfilled delay slots take up an extra two bytes for
8079 the nop in the delay slot. */
8080 if (((GET_CODE (insn) == INSN
8081 && GET_CODE (PATTERN (insn)) != USE
8082 && GET_CODE (PATTERN (insn)) != CLOBBER)
8083 || GET_CODE (insn) == CALL_INSN
8084 || (GET_CODE (insn) == JUMP_INSN
8085 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC
8086 && GET_CODE (PATTERN (insn)) != ADDR_VEC))
8087 && GET_CODE (PATTERN (NEXT_INSN (PREV_INSN (insn)))) != SEQUENCE
8088 && get_attr_needs_delay_slot (insn) == NEEDS_DELAY_SLOT_YES)
8089 return 2;
8091 /* SH2e has a bug that prevents the use of annulled branches, so if
8092 the delay slot is not filled, we'll have to put a NOP in it. */
8093 if (sh_cpu == CPU_SH2E
8094 && GET_CODE (insn) == JUMP_INSN
8095 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC
8096 && GET_CODE (PATTERN (insn)) != ADDR_VEC
8097 && get_attr_type (insn) == TYPE_CBRANCH
8098 && GET_CODE (PATTERN (NEXT_INSN (PREV_INSN (insn)))) != SEQUENCE)
8099 return 2;
8101 /* sh-dsp parallel processing insn take four bytes instead of two. */
8103 if (GET_CODE (insn) == INSN)
8105 int sum = 0;
8106 rtx body = PATTERN (insn);
8107 const char *template;
8108 char c;
8109 int maybe_label = 1;
8111 if (GET_CODE (body) == ASM_INPUT)
8112 template = XSTR (body, 0);
8113 else if (asm_noperands (body) >= 0)
8114 template
8115 = decode_asm_operands (body, NULL, NULL, NULL, NULL);
8116 else
8117 return 0;
8120 int ppi_adjust = 0;
8123 c = *template++;
8124 while (c == ' ' || c == '\t');
8125 /* all sh-dsp parallel-processing insns start with p.
8126 The only non-ppi sh insn starting with p is pref.
8127 The only ppi starting with pr is prnd. */
8128 if ((c == 'p' || c == 'P') && strncasecmp ("re", template, 2))
8129 ppi_adjust = 2;
8130 /* The repeat pseudo-insn expands two three insns, a total of
8131 six bytes in size. */
8132 else if ((c == 'r' || c == 'R')
8133 && ! strncasecmp ("epeat", template, 5))
8134 ppi_adjust = 4;
8135 while (c && c != '\n' && ! IS_ASM_LOGICAL_LINE_SEPARATOR (c))
8137 /* If this is a label, it is obviously not a ppi insn. */
8138 if (c == ':' && maybe_label)
8140 ppi_adjust = 0;
8141 break;
8143 else if (c == '\'' || c == '"')
8144 maybe_label = 0;
8145 c = *template++;
8147 sum += ppi_adjust;
8148 maybe_label = c != ':';
8150 while (c);
8151 return sum;
8153 return 0;
8156 /* Return TRUE if X references a SYMBOL_REF or LABEL_REF whose symbol
8157 isn't protected by a PIC unspec. */
8159 nonpic_symbol_mentioned_p (rtx x)
8161 register const char *fmt;
8162 register int i;
8164 if (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF
8165 || GET_CODE (x) == PC)
8166 return 1;
8168 /* We don't want to look into the possible MEM location of a
8169 CONST_DOUBLE, since we're not going to use it, in general. */
8170 if (GET_CODE (x) == CONST_DOUBLE)
8171 return 0;
8173 if (GET_CODE (x) == UNSPEC
8174 && (XINT (x, 1) == UNSPEC_PIC
8175 || XINT (x, 1) == UNSPEC_GOT
8176 || XINT (x, 1) == UNSPEC_GOTOFF
8177 || XINT (x, 1) == UNSPEC_GOTPLT
8178 || XINT (x, 1) == UNSPEC_GOTTPOFF
8179 || XINT (x, 1) == UNSPEC_DTPOFF
8180 || XINT (x, 1) == UNSPEC_PLT))
8181 return 0;
8183 fmt = GET_RTX_FORMAT (GET_CODE (x));
8184 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
8186 if (fmt[i] == 'E')
8188 register int j;
8190 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
8191 if (nonpic_symbol_mentioned_p (XVECEXP (x, i, j)))
8192 return 1;
8194 else if (fmt[i] == 'e' && nonpic_symbol_mentioned_p (XEXP (x, i)))
8195 return 1;
8198 return 0;
8201 /* Convert a non-PIC address in `orig' to a PIC address using @GOT or
8202 @GOTOFF in `reg'. */
8204 legitimize_pic_address (rtx orig, enum machine_mode mode ATTRIBUTE_UNUSED,
8205 rtx reg)
8207 if (tls_symbolic_operand (orig, Pmode))
8208 return orig;
8210 if (GET_CODE (orig) == LABEL_REF
8211 || (GET_CODE (orig) == SYMBOL_REF && SYMBOL_REF_LOCAL_P (orig)))
8213 if (reg == 0)
8214 reg = gen_reg_rtx (Pmode);
8216 emit_insn (gen_symGOTOFF2reg (reg, orig));
8217 return reg;
8219 else if (GET_CODE (orig) == SYMBOL_REF)
8221 if (reg == 0)
8222 reg = gen_reg_rtx (Pmode);
8224 emit_insn (gen_symGOT2reg (reg, orig));
8225 return reg;
8227 return orig;
8230 /* Mark the use of a constant in the literal table. If the constant
8231 has multiple labels, make it unique. */
8232 static rtx
8233 mark_constant_pool_use (rtx x)
8235 rtx insn, lab, pattern;
8237 if (x == NULL)
8238 return x;
8240 switch (GET_CODE (x))
8242 case LABEL_REF:
8243 x = XEXP (x, 0);
8244 case CODE_LABEL:
8245 break;
8246 default:
8247 return x;
8250 /* Get the first label in the list of labels for the same constant
8251 and delete another labels in the list. */
8252 lab = x;
8253 for (insn = PREV_INSN (x); insn; insn = PREV_INSN (insn))
8255 if (GET_CODE (insn) != CODE_LABEL
8256 || LABEL_REFS (insn) != NEXT_INSN (insn))
8257 break;
8258 lab = insn;
8261 for (insn = LABEL_REFS (lab); insn; insn = LABEL_REFS (insn))
8262 INSN_DELETED_P (insn) = 1;
8264 /* Mark constants in a window. */
8265 for (insn = NEXT_INSN (x); insn; insn = NEXT_INSN (insn))
8267 if (GET_CODE (insn) != INSN)
8268 continue;
8270 pattern = PATTERN (insn);
8271 if (GET_CODE (pattern) != UNSPEC_VOLATILE)
8272 continue;
8274 switch (XINT (pattern, 1))
8276 case UNSPECV_CONST2:
8277 case UNSPECV_CONST4:
8278 case UNSPECV_CONST8:
8279 XVECEXP (pattern, 0, 1) = const1_rtx;
8280 break;
8281 case UNSPECV_WINDOW_END:
8282 if (XVECEXP (pattern, 0, 0) == x)
8283 return lab;
8284 break;
8285 case UNSPECV_CONST_END:
8286 return lab;
8287 default:
8288 break;
8292 return lab;
8295 /* Return true if it's possible to redirect BRANCH1 to the destination
8296 of an unconditional jump BRANCH2. We only want to do this if the
8297 resulting branch will have a short displacement. */
8299 sh_can_redirect_branch (rtx branch1, rtx branch2)
8301 if (flag_expensive_optimizations && simplejump_p (branch2))
8303 rtx dest = XEXP (SET_SRC (single_set (branch2)), 0);
8304 rtx insn;
8305 int distance;
8307 for (distance = 0, insn = NEXT_INSN (branch1);
8308 insn && distance < 256;
8309 insn = PREV_INSN (insn))
8311 if (insn == dest)
8312 return 1;
8313 else
8314 distance += get_attr_length (insn);
8316 for (distance = 0, insn = NEXT_INSN (branch1);
8317 insn && distance < 256;
8318 insn = NEXT_INSN (insn))
8320 if (insn == dest)
8321 return 1;
8322 else
8323 distance += get_attr_length (insn);
8326 return 0;
8329 /* Return nonzero if register old_reg can be renamed to register new_reg. */
8331 sh_hard_regno_rename_ok (unsigned int old_reg ATTRIBUTE_UNUSED,
8332 unsigned int new_reg)
8334 /* Interrupt functions can only use registers that have already been
8335 saved by the prologue, even if they would normally be
8336 call-clobbered. */
8338 if (sh_cfun_interrupt_handler_p () && !regs_ever_live[new_reg])
8339 return 0;
8341 return 1;
8344 /* Function to update the integer COST
8345 based on the relationship between INSN that is dependent on
8346 DEP_INSN through the dependence LINK. The default is to make no
8347 adjustment to COST. This can be used for example to specify to
8348 the scheduler that an output- or anti-dependence does not incur
8349 the same cost as a data-dependence. The return value should be
8350 the new value for COST. */
8351 static int
8352 sh_adjust_cost (rtx insn, rtx link ATTRIBUTE_UNUSED, rtx dep_insn, int cost)
8354 rtx reg, use_pat;
8356 if (TARGET_SHMEDIA)
8358 /* On SHmedia, if the dependence is an anti-dependence or
8359 output-dependence, there is no cost. */
8360 if (REG_NOTE_KIND (link) != 0)
8362 /* However, dependencies between target register loads and
8363 uses of the register in a subsequent block that are separated
8364 by a conditional branch are not modelled - we have to do with
8365 the anti-dependency between the target register load and the
8366 conditional branch that ends the current block. */
8367 if (REG_NOTE_KIND (link) == REG_DEP_ANTI
8368 && GET_CODE (PATTERN (dep_insn)) == SET
8369 && (get_attr_type (dep_insn) == TYPE_PT_MEDIA
8370 || get_attr_type (dep_insn) == TYPE_PTABS_MEDIA)
8371 && get_attr_type (insn) == TYPE_CBRANCH_MEDIA)
8373 int orig_cost = cost;
8374 rtx note = find_reg_note (insn, REG_BR_PROB, 0);
8375 rtx target = ((! note
8376 || INTVAL (XEXP (note, 0)) * 2 < REG_BR_PROB_BASE)
8377 ? insn : JUMP_LABEL (insn));
8378 /* On the likely path, the branch costs 1, on the unlikely path,
8379 it costs 3. */
8380 cost--;
8382 target = next_active_insn (target);
8383 while (target && ! flow_dependent_p (target, dep_insn)
8384 && --cost > 0);
8385 /* If two branches are executed in immediate succession, with the
8386 first branch properly predicted, this causes a stall at the
8387 second branch, hence we won't need the target for the
8388 second branch for two cycles after the launch of the first
8389 branch. */
8390 if (cost > orig_cost - 2)
8391 cost = orig_cost - 2;
8393 else
8394 cost = 0;
8397 else if (get_attr_is_mac_media (insn)
8398 && get_attr_is_mac_media (dep_insn))
8399 cost = 1;
8401 else if (! reload_completed
8402 && GET_CODE (PATTERN (insn)) == SET
8403 && GET_CODE (SET_SRC (PATTERN (insn))) == FLOAT
8404 && GET_CODE (PATTERN (dep_insn)) == SET
8405 && fp_arith_reg_operand (SET_SRC (PATTERN (dep_insn)), VOIDmode)
8406 && cost < 4)
8407 cost = 4;
8408 /* Schedule the ptabs for a casesi_jump_media in preference to stuff
8409 that is needed at the target. */
8410 else if (get_attr_type (insn) == TYPE_JUMP_MEDIA
8411 && ! flow_dependent_p (insn, dep_insn))
8412 cost--;
8414 else if (REG_NOTE_KIND (link) == 0)
8416 enum attr_type dep_type, type;
8418 if (recog_memoized (insn) < 0
8419 || recog_memoized (dep_insn) < 0)
8420 return cost;
8422 dep_type = get_attr_type (dep_insn);
8423 if (dep_type == TYPE_FLOAD || dep_type == TYPE_PCFLOAD)
8424 cost--;
8425 if ((dep_type == TYPE_LOAD_SI || dep_type == TYPE_PCLOAD_SI)
8426 && (type = get_attr_type (insn)) != TYPE_CALL
8427 && type != TYPE_SFUNC)
8428 cost--;
8430 /* The only input for a call that is timing-critical is the
8431 function's address. */
8432 if (GET_CODE(insn) == CALL_INSN)
8434 rtx call = PATTERN (insn);
8436 if (GET_CODE (call) == PARALLEL)
8437 call = XVECEXP (call, 0 ,0);
8438 if (GET_CODE (call) == SET)
8439 call = SET_SRC (call);
8440 if (GET_CODE (call) == CALL && GET_CODE (XEXP (call, 0)) == MEM
8441 /* sibcalli_thunk uses a symbol_ref in an unspec. */
8442 && (GET_CODE (XEXP (XEXP (call, 0), 0)) == UNSPEC
8443 || ! reg_set_p (XEXP (XEXP (call, 0), 0), dep_insn)))
8444 cost = 0;
8446 /* Likewise, the most timing critical input for an sfuncs call
8447 is the function address. However, sfuncs typically start
8448 using their arguments pretty quickly.
8449 Assume a four cycle delay before they are needed. */
8450 /* All sfunc calls are parallels with at least four components.
8451 Exploit this to avoid unnecessary calls to sfunc_uses_reg. */
8452 else if (GET_CODE (PATTERN (insn)) == PARALLEL
8453 && XVECLEN (PATTERN (insn), 0) >= 4
8454 && (reg = sfunc_uses_reg (insn)))
8456 if (! reg_set_p (reg, dep_insn))
8457 cost -= 4;
8459 /* When the preceding instruction loads the shift amount of
8460 the following SHAD/SHLD, the latency of the load is increased
8461 by 1 cycle. */
8462 else if (TARGET_SH4
8463 && get_attr_type (insn) == TYPE_DYN_SHIFT
8464 && get_attr_any_int_load (dep_insn) == ANY_INT_LOAD_YES
8465 && reg_overlap_mentioned_p (SET_DEST (PATTERN (dep_insn)),
8466 XEXP (SET_SRC (single_set (insn)),
8467 1)))
8468 cost++;
8469 /* When an LS group instruction with a latency of less than
8470 3 cycles is followed by a double-precision floating-point
8471 instruction, FIPR, or FTRV, the latency of the first
8472 instruction is increased to 3 cycles. */
8473 else if (cost < 3
8474 && get_attr_insn_class (dep_insn) == INSN_CLASS_LS_GROUP
8475 && get_attr_dfp_comp (insn) == DFP_COMP_YES)
8476 cost = 3;
8477 /* The lsw register of a double-precision computation is ready one
8478 cycle earlier. */
8479 else if (reload_completed
8480 && get_attr_dfp_comp (dep_insn) == DFP_COMP_YES
8481 && (use_pat = single_set (insn))
8482 && ! regno_use_in (REGNO (SET_DEST (single_set (dep_insn))),
8483 SET_SRC (use_pat)))
8484 cost -= 1;
8486 if (get_attr_any_fp_comp (dep_insn) == ANY_FP_COMP_YES
8487 && get_attr_late_fp_use (insn) == LATE_FP_USE_YES)
8488 cost -= 1;
8490 /* An anti-dependence penalty of two applies if the first insn is a double
8491 precision fadd / fsub / fmul. */
8492 else if (REG_NOTE_KIND (link) == REG_DEP_ANTI
8493 && recog_memoized (dep_insn) >= 0
8494 && get_attr_type (dep_insn) == TYPE_DFP_ARITH
8495 /* A lot of alleged anti-flow dependences are fake,
8496 so check this one is real. */
8497 && flow_dependent_p (dep_insn, insn))
8498 cost = 2;
8501 return cost;
8504 /* Check if INSN is flow-dependent on DEP_INSN. Can also be used to check
8505 if DEP_INSN is anti-flow dependent on INSN. */
8506 static int
8507 flow_dependent_p (rtx insn, rtx dep_insn)
8509 rtx tmp = PATTERN (insn);
8511 note_stores (PATTERN (dep_insn), flow_dependent_p_1, &tmp);
8512 return tmp == NULL_RTX;
8515 /* A helper function for flow_dependent_p called through note_stores. */
8516 static void
8517 flow_dependent_p_1 (rtx x, rtx pat ATTRIBUTE_UNUSED, void *data)
8519 rtx * pinsn = (rtx *) data;
8521 if (*pinsn && reg_referenced_p (x, *pinsn))
8522 *pinsn = NULL_RTX;
8525 /* For use by sh_allocate_initial_value. Note that sh.md contains some
8526 'special function' patterns (type sfunc) that clobber pr, but that
8527 do not look like function calls to leaf_function_p. Hence we must
8528 do this extra check. */
8529 static int
8530 sh_pr_n_sets (void)
8532 return REG_N_SETS (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG);
8535 /* Return where to allocate pseudo for a given hard register initial
8536 value. */
8537 static rtx
8538 sh_allocate_initial_value (rtx hard_reg)
8540 rtx x;
8542 if (REGNO (hard_reg) == (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG))
8544 if (current_function_is_leaf
8545 && ! sh_pr_n_sets ()
8546 && ! (TARGET_SHCOMPACT
8547 && ((current_function_args_info.call_cookie
8548 & ~ CALL_COOKIE_RET_TRAMP (1))
8549 || current_function_has_nonlocal_label)))
8550 x = hard_reg;
8551 else
8552 x = gen_frame_mem (Pmode, return_address_pointer_rtx);
8554 else
8555 x = NULL_RTX;
8557 return x;
8560 /* This function returns "2" to indicate dual issue for the SH4
8561 processor. To be used by the DFA pipeline description. */
8562 static int
8563 sh_issue_rate (void)
8565 if (TARGET_SUPERSCALAR)
8566 return 2;
8567 else
8568 return 1;
8571 /* Functions for ready queue reordering for sched1. */
8573 /* Get weight for mode for a set x. */
8574 static short
8575 find_set_regmode_weight (rtx x, enum machine_mode mode)
8577 if (GET_CODE (x) == CLOBBER && register_operand (SET_DEST (x), mode))
8578 return 1;
8579 if (GET_CODE (x) == SET && register_operand (SET_DEST (x), mode))
8581 if (GET_CODE (SET_DEST (x)) == REG)
8583 if (!reg_mentioned_p (SET_DEST (x), SET_SRC (x)))
8584 return 1;
8585 else
8586 return 0;
8588 return 1;
8590 return 0;
8593 /* Get regmode weight for insn. */
8594 static short
8595 find_insn_regmode_weight (rtx insn, enum machine_mode mode)
8597 short reg_weight = 0;
8598 rtx x;
8600 /* Increment weight for each register born here. */
8601 x = PATTERN (insn);
8602 reg_weight += find_set_regmode_weight (x, mode);
8603 if (GET_CODE (x) == PARALLEL)
8605 int j;
8606 for (j = XVECLEN (x, 0) - 1; j >= 0; j--)
8608 x = XVECEXP (PATTERN (insn), 0, j);
8609 reg_weight += find_set_regmode_weight (x, mode);
8612 /* Decrement weight for each register that dies here. */
8613 for (x = REG_NOTES (insn); x; x = XEXP (x, 1))
8615 if (REG_NOTE_KIND (x) == REG_DEAD || REG_NOTE_KIND (x) == REG_UNUSED)
8617 rtx note = XEXP (x, 0);
8618 if (GET_CODE (note) == REG && GET_MODE (note) == mode)
8619 reg_weight--;
8622 return reg_weight;
8625 /* Calculate regmode weights for all insns of a basic block. */
8626 static void
8627 find_regmode_weight (int b, enum machine_mode mode)
8629 rtx insn, next_tail, head, tail;
8631 get_block_head_tail (b, &head, &tail);
8632 next_tail = NEXT_INSN (tail);
8634 for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
8636 /* Handle register life information. */
8637 if (!INSN_P (insn))
8638 continue;
8640 if (mode == SFmode)
8641 INSN_REGMODE_WEIGHT (insn, mode) =
8642 find_insn_regmode_weight (insn, mode) + 2 * find_insn_regmode_weight (insn, DFmode);
8643 else if (mode == SImode)
8644 INSN_REGMODE_WEIGHT (insn, mode) =
8645 find_insn_regmode_weight (insn, mode) + 2 * find_insn_regmode_weight (insn, DImode);
8649 /* Comparison function for ready queue sorting. */
8650 static int
8651 rank_for_reorder (const void *x, const void *y)
8653 rtx tmp = *(const rtx *) y;
8654 rtx tmp2 = *(const rtx *) x;
8656 /* The insn in a schedule group should be issued the first. */
8657 if (SCHED_GROUP_P (tmp) != SCHED_GROUP_P (tmp2))
8658 return SCHED_GROUP_P (tmp2) ? 1 : -1;
8660 /* If insns are equally good, sort by INSN_LUID (original insn order), This
8661 minimizes instruction movement, thus minimizing sched's effect on
8662 register pressure. */
8663 return INSN_LUID (tmp) - INSN_LUID (tmp2);
8666 /* Resort the array A in which only element at index N may be out of order. */
8667 static void
8668 swap_reorder (rtx *a, int n)
8670 rtx insn = a[n - 1];
8671 int i = n - 2;
8673 while (i >= 0 && rank_for_reorder (a + i, &insn) >= 0)
8675 a[i + 1] = a[i];
8676 i -= 1;
8678 a[i + 1] = insn;
8681 #define SCHED_REORDER(READY, N_READY) \
8682 do \
8684 if ((N_READY) == 2) \
8685 swap_reorder (READY, N_READY); \
8686 else if ((N_READY) > 2) \
8687 qsort (READY, N_READY, sizeof (rtx), rank_for_reorder); \
8689 while (0)
8691 /* Sort the ready list READY by ascending priority, using the SCHED_REORDER
8692 macro. */
8693 static void
8694 ready_reorder (rtx *ready, int nready)
8696 SCHED_REORDER (ready, nready);
8699 /* Calculate regmode weights for all insns of all basic block. */
8700 static void
8701 sh_md_init_global (FILE *dump ATTRIBUTE_UNUSED,
8702 int verbose ATTRIBUTE_UNUSED,
8703 int old_max_uid)
8705 basic_block b;
8707 regmode_weight[0] = (short *) xcalloc (old_max_uid, sizeof (short));
8708 regmode_weight[1] = (short *) xcalloc (old_max_uid, sizeof (short));
8710 FOR_EACH_BB_REVERSE (b)
8712 find_regmode_weight (b->index, SImode);
8713 find_regmode_weight (b->index, SFmode);
8716 CURR_REGMODE_PRESSURE (SImode) = 0;
8717 CURR_REGMODE_PRESSURE (SFmode) = 0;
8721 /* Cleanup. */
8722 static void
8723 sh_md_finish_global (FILE *dump ATTRIBUTE_UNUSED,
8724 int verbose ATTRIBUTE_UNUSED)
8726 if (regmode_weight[0])
8728 free (regmode_weight[0]);
8729 regmode_weight[0] = NULL;
8731 if (regmode_weight[1])
8733 free (regmode_weight[1]);
8734 regmode_weight[1] = NULL;
8738 /* Cache the can_issue_more so that we can return it from reorder2. Also,
8739 keep count of register pressures on SImode and SFmode. */
8740 static int
8741 sh_variable_issue (FILE *dump ATTRIBUTE_UNUSED,
8742 int sched_verbose ATTRIBUTE_UNUSED,
8743 rtx insn,
8744 int can_issue_more)
8746 if (GET_CODE (PATTERN (insn)) != USE
8747 && GET_CODE (PATTERN (insn)) != CLOBBER)
8748 cached_can_issue_more = can_issue_more - 1;
8749 else
8750 cached_can_issue_more = can_issue_more;
8752 if (reload_completed)
8753 return cached_can_issue_more;
8755 CURR_REGMODE_PRESSURE (SImode) += INSN_REGMODE_WEIGHT (insn, SImode);
8756 CURR_REGMODE_PRESSURE (SFmode) += INSN_REGMODE_WEIGHT (insn, SFmode);
8758 return cached_can_issue_more;
8761 static void
8762 sh_md_init (FILE *dump ATTRIBUTE_UNUSED,
8763 int verbose ATTRIBUTE_UNUSED,
8764 int veclen ATTRIBUTE_UNUSED)
8766 CURR_REGMODE_PRESSURE (SImode) = 0;
8767 CURR_REGMODE_PRESSURE (SFmode) = 0;
8770 /* Some magic numbers. */
8771 /* Pressure on register r0 can lead to spill failures. so avoid sched1 for
8772 functions that already have high pressure on r0. */
8773 #define R0_MAX_LIFE_REGIONS 2
8774 #define R0_MAX_LIVE_LENGTH 12
8775 /* Register Pressure thresholds for SImode and SFmode registers. */
8776 #define SIMODE_MAX_WEIGHT 5
8777 #define SFMODE_MAX_WEIGHT 10
8779 /* Return true if the pressure is high for MODE. */
8780 static short
8781 high_pressure (enum machine_mode mode)
8783 /* Pressure on register r0 can lead to spill failures. so avoid sched1 for
8784 functions that already have high pressure on r0. */
8785 if ((REG_N_SETS (0) - REG_N_DEATHS (0)) >= R0_MAX_LIFE_REGIONS
8786 && REG_LIVE_LENGTH (0) >= R0_MAX_LIVE_LENGTH)
8787 return 1;
8789 if (mode == SFmode)
8790 return (CURR_REGMODE_PRESSURE (SFmode) > SFMODE_MAX_WEIGHT);
8791 else
8792 return (CURR_REGMODE_PRESSURE (SImode) > SIMODE_MAX_WEIGHT);
8795 /* Reorder ready queue if register pressure is high. */
8796 static int
8797 sh_reorder (FILE *dump ATTRIBUTE_UNUSED,
8798 int sched_verbose ATTRIBUTE_UNUSED,
8799 rtx *ready,
8800 int *n_readyp,
8801 int clock_var ATTRIBUTE_UNUSED)
8803 if (reload_completed)
8804 return sh_issue_rate ();
8806 if (high_pressure (SFmode) || high_pressure (SImode))
8808 ready_reorder (ready, *n_readyp);
8811 return sh_issue_rate ();
8814 /* Skip cycles if the current register pressure is high. */
8815 static int
8816 sh_reorder2 (FILE *dump ATTRIBUTE_UNUSED,
8817 int sched_verbose ATTRIBUTE_UNUSED,
8818 rtx *ready ATTRIBUTE_UNUSED,
8819 int *n_readyp ATTRIBUTE_UNUSED,
8820 int clock_var ATTRIBUTE_UNUSED)
8822 if (reload_completed)
8823 return cached_can_issue_more;
8825 if (high_pressure(SFmode) || high_pressure (SImode))
8826 skip_cycles = 1;
8828 return cached_can_issue_more;
8831 /* Skip cycles without sorting the ready queue. This will move insn from
8832 Q->R. If this is the last cycle we are skipping; allow sorting of ready
8833 queue by sh_reorder. */
8835 /* Generally, skipping these many cycles are sufficient for all insns to move
8836 from Q -> R. */
8837 #define MAX_SKIPS 8
8839 static int
8840 sh_dfa_new_cycle (FILE *sched_dump ATTRIBUTE_UNUSED,
8841 int sched_verbose ATTRIBUTE_UNUSED,
8842 rtx insn ATTRIBUTE_UNUSED,
8843 int last_clock_var,
8844 int clock_var,
8845 int *sort_p)
8847 if (reload_completed)
8848 return 0;
8850 if (skip_cycles)
8852 if ((clock_var - last_clock_var) < MAX_SKIPS)
8854 *sort_p = 0;
8855 return 1;
8857 /* If this is the last cycle we are skipping, allow reordering of R. */
8858 if ((clock_var - last_clock_var) == MAX_SKIPS)
8860 *sort_p = 1;
8861 return 1;
8865 skip_cycles = 0;
8867 return 0;
8870 /* SHmedia requires registers for branches, so we can't generate new
8871 branches past reload. */
8872 static bool
8873 sh_cannot_modify_jumps_p (void)
8875 return (TARGET_SHMEDIA && (reload_in_progress || reload_completed));
8878 static int
8879 sh_target_reg_class (void)
8881 return TARGET_SHMEDIA ? TARGET_REGS : NO_REGS;
8884 static bool
8885 sh_optimize_target_register_callee_saved (bool after_prologue_epilogue_gen)
8887 HARD_REG_SET dummy;
8888 rtx insn;
8890 if (! shmedia_space_reserved_for_target_registers)
8891 return 0;
8892 if (after_prologue_epilogue_gen && ! TARGET_SAVE_ALL_TARGET_REGS)
8893 return 0;
8894 if (calc_live_regs (&dummy) >= 6 * 8)
8895 return 1;
8896 /* This is a borderline case. See if we got a nested loop, or a loop
8897 with a call, or with more than 4 labels inside. */
8898 for (insn = get_insns(); insn; insn = NEXT_INSN (insn))
8900 if (GET_CODE (insn) == NOTE
8901 && NOTE_LINE_NUMBER (insn) == NOTE_INSN_LOOP_BEG)
8903 int labels = 0;
8907 insn = NEXT_INSN (insn);
8908 if ((GET_CODE (insn) == NOTE
8909 && NOTE_LINE_NUMBER (insn) == NOTE_INSN_LOOP_BEG)
8910 || GET_CODE (insn) == CALL_INSN
8911 || (GET_CODE (insn) == CODE_LABEL && ++labels > 4))
8912 return 1;
8914 while (GET_CODE (insn) != NOTE
8915 || NOTE_LINE_NUMBER (insn) != NOTE_INSN_LOOP_END);
8918 return 0;
8921 static bool
8922 sh_ms_bitfield_layout_p (tree record_type ATTRIBUTE_UNUSED)
8924 return (TARGET_SH5 || TARGET_HITACHI || sh_attr_renesas_p (record_type));
8928 On the SH1..SH4, the trampoline looks like
8929 2 0002 D202 mov.l l2,r2
8930 1 0000 D301 mov.l l1,r3
8931 3 0004 422B jmp @r2
8932 4 0006 0009 nop
8933 5 0008 00000000 l1: .long area
8934 6 000c 00000000 l2: .long function
8936 SH5 (compact) uses r1 instead of r3 for the static chain. */
8939 /* Emit RTL insns to initialize the variable parts of a trampoline.
8940 FNADDR is an RTX for the address of the function's pure code.
8941 CXT is an RTX for the static chain value for the function. */
8943 void
8944 sh_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
8946 rtx tramp_mem = gen_frame_mem (BLKmode, tramp);
8948 if (TARGET_SHMEDIA64)
8950 rtx tramp_templ;
8951 int fixed_len;
8953 rtx movi1 = GEN_INT (0xcc000010);
8954 rtx shori1 = GEN_INT (0xc8000010);
8955 rtx src, dst;
8957 /* The following trampoline works within a +- 128 KB range for cxt:
8958 ptb/u cxt,tr1; movi fnaddr >> 48,r0; shori fnaddr >> 32,r0;
8959 shori fnaddr >> 16,r0; shori fnaddr,r0; ptabs/l r0,tr0
8960 gettr tr1,r1; blink tr0,r63 */
8961 /* Address rounding makes it hard to compute the exact bounds of the
8962 offset for this trampoline, but we have a rather generous offset
8963 range, so frame_offset should do fine as an upper bound. */
8964 if (cxt == virtual_stack_vars_rtx && frame_offset < 0x20000)
8966 /* ??? could optimize this trampoline initialization
8967 by writing DImode words with two insns each. */
8968 rtx mask = force_reg (DImode, GEN_INT (0x3fffc00));
8969 rtx insn = gen_rtx_MINUS (DImode, cxt, tramp);
8970 insn = gen_rtx_ASHIFT (DImode, insn, GEN_INT (10-2));
8971 insn = gen_rtx_AND (DImode, insn, mask);
8972 /* Or in ptb/u .,tr1 pattern */
8973 insn = gen_rtx_IOR (DImode, insn, gen_int_mode (0xec000010, SImode));
8974 insn = force_operand (insn, NULL_RTX);
8975 insn = gen_lowpart (SImode, insn);
8976 emit_move_insn (change_address (tramp_mem, SImode, NULL_RTX), insn);
8977 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (38));
8978 insn = gen_rtx_AND (DImode, insn, mask);
8979 insn = force_operand (gen_rtx_IOR (DImode, movi1, insn), NULL_RTX);
8980 insn = gen_lowpart (SImode, insn);
8981 emit_move_insn (adjust_address (tramp_mem, SImode, 4), insn);
8982 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (22));
8983 insn = gen_rtx_AND (DImode, insn, mask);
8984 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
8985 insn = gen_lowpart (SImode, insn);
8986 emit_move_insn (adjust_address (tramp_mem, SImode, 8), insn);
8987 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (6));
8988 insn = gen_rtx_AND (DImode, insn, mask);
8989 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
8990 insn = gen_lowpart (SImode, insn);
8991 emit_move_insn (adjust_address (tramp_mem, SImode, 12), insn);
8992 insn = gen_rtx_ASHIFT (DImode, fnaddr, GEN_INT (10));
8993 insn = gen_rtx_AND (DImode, insn, mask);
8994 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
8995 insn = gen_lowpart (SImode, insn);
8996 emit_move_insn (adjust_address (tramp_mem, SImode, 16), insn);
8997 emit_move_insn (adjust_address (tramp_mem, SImode, 20),
8998 GEN_INT (0x6bf10600));
8999 emit_move_insn (adjust_address (tramp_mem, SImode, 24),
9000 GEN_INT (0x4415fc10));
9001 emit_move_insn (adjust_address (tramp_mem, SImode, 28),
9002 GEN_INT (0x4401fff0));
9003 emit_insn (gen_ic_invalidate_line (tramp));
9004 return;
9006 tramp_templ = gen_rtx_SYMBOL_REF (Pmode,"__GCC_nested_trampoline");
9007 fixed_len = TRAMPOLINE_SIZE - 2 * GET_MODE_SIZE (Pmode);
9009 tramp_templ = gen_datalabel_ref (tramp_templ);
9010 dst = tramp_mem;
9011 src = gen_const_mem (BLKmode, tramp_templ);
9012 set_mem_align (dst, 256);
9013 set_mem_align (src, 64);
9014 emit_block_move (dst, src, GEN_INT (fixed_len), BLOCK_OP_NORMAL);
9016 emit_move_insn (adjust_address (tramp_mem, Pmode, fixed_len), fnaddr);
9017 emit_move_insn (adjust_address (tramp_mem, Pmode,
9018 fixed_len + GET_MODE_SIZE (Pmode)),
9019 cxt);
9020 emit_insn (gen_ic_invalidate_line (tramp));
9021 return;
9023 else if (TARGET_SHMEDIA)
9025 /* movi fnaddr >> 16,r1; shori fnaddr,r1; ptabs/l r1,tr0
9026 movi cxt >> 16,r1; shori cxt,r1; blink tr0,r63 */
9027 rtx quad0 = gen_reg_rtx (DImode), cxtload = gen_reg_rtx (DImode);
9028 rtx quad1 = gen_reg_rtx (DImode), quad2 = gen_reg_rtx (DImode);
9029 /* movi 0,r1: 0xcc000010 shori 0,r1: c8000010 concatenated,
9030 rotated 10 right, and higher 16 bit of every 32 selected. */
9031 rtx movishori
9032 = force_reg (V2HImode, (simplify_gen_subreg
9033 (V2HImode, GEN_INT (0x4330432), SImode, 0)));
9034 rtx ptabs = force_reg (DImode, GEN_INT (0x6bf10600));
9035 rtx blink = force_reg (DImode, GEN_INT (0x4401fff0));
9037 tramp = force_reg (Pmode, tramp);
9038 fnaddr = force_reg (SImode, fnaddr);
9039 cxt = force_reg (SImode, cxt);
9040 emit_insn (gen_mshflo_w_x (gen_rtx_SUBREG (V4HImode, quad0, 0),
9041 gen_rtx_SUBREG (V2HImode, fnaddr, 0),
9042 movishori));
9043 emit_insn (gen_rotrdi3_mextr (quad0, quad0,
9044 GEN_INT (TARGET_LITTLE_ENDIAN ? 24 : 56)));
9045 emit_insn (gen_ashldi3_media (quad0, quad0, const2_rtx));
9046 emit_move_insn (change_address (tramp_mem, DImode, NULL_RTX), quad0);
9047 emit_insn (gen_mshflo_w_x (gen_rtx_SUBREG (V4HImode, cxtload, 0),
9048 gen_rtx_SUBREG (V2HImode, cxt, 0),
9049 movishori));
9050 emit_insn (gen_rotrdi3_mextr (cxtload, cxtload,
9051 GEN_INT (TARGET_LITTLE_ENDIAN ? 24 : 56)));
9052 emit_insn (gen_ashldi3_media (cxtload, cxtload, const2_rtx));
9053 if (TARGET_LITTLE_ENDIAN)
9055 emit_insn (gen_mshflo_l_di (quad1, ptabs, cxtload));
9056 emit_insn (gen_mextr4 (quad2, cxtload, blink));
9058 else
9060 emit_insn (gen_mextr4 (quad1, cxtload, ptabs));
9061 emit_insn (gen_mshflo_l_di (quad2, blink, cxtload));
9063 emit_move_insn (adjust_address (tramp_mem, DImode, 8), quad1);
9064 emit_move_insn (adjust_address (tramp_mem, DImode, 16), quad2);
9065 emit_insn (gen_ic_invalidate_line (tramp));
9066 return;
9068 else if (TARGET_SHCOMPACT)
9070 emit_insn (gen_initialize_trampoline (tramp, cxt, fnaddr));
9071 return;
9073 emit_move_insn (change_address (tramp_mem, SImode, NULL_RTX),
9074 gen_int_mode (TARGET_LITTLE_ENDIAN ? 0xd301d202 : 0xd202d301,
9075 SImode));
9076 emit_move_insn (adjust_address (tramp_mem, SImode, 4),
9077 gen_int_mode (TARGET_LITTLE_ENDIAN ? 0x0009422b : 0x422b0009,
9078 SImode));
9079 emit_move_insn (adjust_address (tramp_mem, SImode, 8), cxt);
9080 emit_move_insn (adjust_address (tramp_mem, SImode, 12), fnaddr);
9081 if (TARGET_HARVARD)
9083 if (TARGET_USERMODE)
9084 emit_library_call (function_symbol (NULL, "__ic_invalidate",
9085 FUNCTION_ORDINARY),
9086 0, VOIDmode, 1, tramp, SImode);
9087 else
9088 emit_insn (gen_ic_invalidate_line (tramp));
9092 /* FIXME: This is overly conservative. A SHcompact function that
9093 receives arguments ``by reference'' will have them stored in its
9094 own stack frame, so it must not pass pointers or references to
9095 these arguments to other functions by means of sibling calls. */
9096 /* If PIC, we cannot make sibling calls to global functions
9097 because the PLT requires r12 to be live. */
9098 static bool
9099 sh_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
9101 return (1
9102 && (! TARGET_SHCOMPACT
9103 || current_function_args_info.stack_regs == 0)
9104 && ! sh_cfun_interrupt_handler_p ()
9105 && (! flag_pic
9106 || (decl && ! TREE_PUBLIC (decl))
9107 || (decl && DECL_VISIBILITY (decl) != VISIBILITY_DEFAULT)));
9110 /* Machine specific built-in functions. */
9112 struct builtin_description
9114 const enum insn_code icode;
9115 const char *const name;
9116 int signature;
9119 /* describe number and signedness of arguments; arg[0] == result
9120 (1: unsigned, 2: signed, 4: don't care, 8: pointer 0: no argument */
9121 /* 9: 64 bit pointer, 10: 32 bit pointer */
9122 static const char signature_args[][4] =
9124 #define SH_BLTIN_V2SI2 0
9125 { 4, 4 },
9126 #define SH_BLTIN_V4HI2 1
9127 { 4, 4 },
9128 #define SH_BLTIN_V2SI3 2
9129 { 4, 4, 4 },
9130 #define SH_BLTIN_V4HI3 3
9131 { 4, 4, 4 },
9132 #define SH_BLTIN_V8QI3 4
9133 { 4, 4, 4 },
9134 #define SH_BLTIN_MAC_HISI 5
9135 { 1, 4, 4, 1 },
9136 #define SH_BLTIN_SH_HI 6
9137 { 4, 4, 1 },
9138 #define SH_BLTIN_SH_SI 7
9139 { 4, 4, 1 },
9140 #define SH_BLTIN_V4HI2V2SI 8
9141 { 4, 4, 4 },
9142 #define SH_BLTIN_V4HI2V8QI 9
9143 { 4, 4, 4 },
9144 #define SH_BLTIN_SISF 10
9145 { 4, 2 },
9146 #define SH_BLTIN_LDUA_L 11
9147 { 2, 10 },
9148 #define SH_BLTIN_LDUA_Q 12
9149 { 1, 10 },
9150 #define SH_BLTIN_STUA_L 13
9151 { 0, 10, 2 },
9152 #define SH_BLTIN_STUA_Q 14
9153 { 0, 10, 1 },
9154 #define SH_BLTIN_LDUA_L64 15
9155 { 2, 9 },
9156 #define SH_BLTIN_LDUA_Q64 16
9157 { 1, 9 },
9158 #define SH_BLTIN_STUA_L64 17
9159 { 0, 9, 2 },
9160 #define SH_BLTIN_STUA_Q64 18
9161 { 0, 9, 1 },
9162 #define SH_BLTIN_NUM_SHARED_SIGNATURES 19
9163 #define SH_BLTIN_2 19
9164 #define SH_BLTIN_SU 19
9165 { 1, 2 },
9166 #define SH_BLTIN_3 20
9167 #define SH_BLTIN_SUS 20
9168 { 2, 2, 1 },
9169 #define SH_BLTIN_PSSV 21
9170 { 0, 8, 2, 2 },
9171 #define SH_BLTIN_XXUU 22
9172 #define SH_BLTIN_UUUU 22
9173 { 1, 1, 1, 1 },
9174 #define SH_BLTIN_PV 23
9175 { 0, 8 },
9177 /* mcmv: operands considered unsigned. */
9178 /* mmulsum_wq, msad_ubq: result considered unsigned long long. */
9179 /* mperm: control value considered unsigned int. */
9180 /* mshalds, mshard, mshards, mshlld, mshlrd: shift count is unsigned int. */
9181 /* mshards_q: returns signed short. */
9182 /* nsb: takes long long arg, returns unsigned char. */
9183 static const struct builtin_description bdesc[] =
9185 { CODE_FOR_absv2si2, "__builtin_absv2si2", SH_BLTIN_V2SI2 },
9186 { CODE_FOR_absv4hi2, "__builtin_absv4hi2", SH_BLTIN_V4HI2 },
9187 { CODE_FOR_addv2si3, "__builtin_addv2si3", SH_BLTIN_V2SI3 },
9188 { CODE_FOR_addv4hi3, "__builtin_addv4hi3", SH_BLTIN_V4HI3 },
9189 { CODE_FOR_ssaddv2si3,"__builtin_ssaddv2si3", SH_BLTIN_V2SI3 },
9190 { CODE_FOR_usaddv8qi3,"__builtin_usaddv8qi3", SH_BLTIN_V8QI3 },
9191 { CODE_FOR_ssaddv4hi3,"__builtin_ssaddv4hi3", SH_BLTIN_V4HI3 },
9192 { CODE_FOR_alloco_i, "__builtin_sh_media_ALLOCO", SH_BLTIN_PV },
9193 { CODE_FOR_negcmpeqv8qi,"__builtin_sh_media_MCMPEQ_B", SH_BLTIN_V8QI3 },
9194 { CODE_FOR_negcmpeqv2si,"__builtin_sh_media_MCMPEQ_L", SH_BLTIN_V2SI3 },
9195 { CODE_FOR_negcmpeqv4hi,"__builtin_sh_media_MCMPEQ_W", SH_BLTIN_V4HI3 },
9196 { CODE_FOR_negcmpgtuv8qi,"__builtin_sh_media_MCMPGT_UB", SH_BLTIN_V8QI3 },
9197 { CODE_FOR_negcmpgtv2si,"__builtin_sh_media_MCMPGT_L", SH_BLTIN_V2SI3 },
9198 { CODE_FOR_negcmpgtv4hi,"__builtin_sh_media_MCMPGT_W", SH_BLTIN_V4HI3 },
9199 { CODE_FOR_mcmv, "__builtin_sh_media_MCMV", SH_BLTIN_UUUU },
9200 { CODE_FOR_mcnvs_lw, "__builtin_sh_media_MCNVS_LW", SH_BLTIN_3 },
9201 { CODE_FOR_mcnvs_wb, "__builtin_sh_media_MCNVS_WB", SH_BLTIN_V4HI2V8QI },
9202 { CODE_FOR_mcnvs_wub, "__builtin_sh_media_MCNVS_WUB", SH_BLTIN_V4HI2V8QI },
9203 { CODE_FOR_mextr1, "__builtin_sh_media_MEXTR1", SH_BLTIN_V8QI3 },
9204 { CODE_FOR_mextr2, "__builtin_sh_media_MEXTR2", SH_BLTIN_V8QI3 },
9205 { CODE_FOR_mextr3, "__builtin_sh_media_MEXTR3", SH_BLTIN_V8QI3 },
9206 { CODE_FOR_mextr4, "__builtin_sh_media_MEXTR4", SH_BLTIN_V8QI3 },
9207 { CODE_FOR_mextr5, "__builtin_sh_media_MEXTR5", SH_BLTIN_V8QI3 },
9208 { CODE_FOR_mextr6, "__builtin_sh_media_MEXTR6", SH_BLTIN_V8QI3 },
9209 { CODE_FOR_mextr7, "__builtin_sh_media_MEXTR7", SH_BLTIN_V8QI3 },
9210 { CODE_FOR_mmacfx_wl, "__builtin_sh_media_MMACFX_WL", SH_BLTIN_MAC_HISI },
9211 { CODE_FOR_mmacnfx_wl,"__builtin_sh_media_MMACNFX_WL", SH_BLTIN_MAC_HISI },
9212 { CODE_FOR_mulv2si3, "__builtin_mulv2si3", SH_BLTIN_V2SI3, },
9213 { CODE_FOR_mulv4hi3, "__builtin_mulv4hi3", SH_BLTIN_V4HI3 },
9214 { CODE_FOR_mmulfx_l, "__builtin_sh_media_MMULFX_L", SH_BLTIN_V2SI3 },
9215 { CODE_FOR_mmulfx_w, "__builtin_sh_media_MMULFX_W", SH_BLTIN_V4HI3 },
9216 { CODE_FOR_mmulfxrp_w,"__builtin_sh_media_MMULFXRP_W", SH_BLTIN_V4HI3 },
9217 { CODE_FOR_mmulhi_wl, "__builtin_sh_media_MMULHI_WL", SH_BLTIN_V4HI2V2SI },
9218 { CODE_FOR_mmullo_wl, "__builtin_sh_media_MMULLO_WL", SH_BLTIN_V4HI2V2SI },
9219 { CODE_FOR_mmulsum_wq,"__builtin_sh_media_MMULSUM_WQ", SH_BLTIN_XXUU },
9220 { CODE_FOR_mperm_w, "__builtin_sh_media_MPERM_W", SH_BLTIN_SH_HI },
9221 { CODE_FOR_msad_ubq, "__builtin_sh_media_MSAD_UBQ", SH_BLTIN_XXUU },
9222 { CODE_FOR_mshalds_l, "__builtin_sh_media_MSHALDS_L", SH_BLTIN_SH_SI },
9223 { CODE_FOR_mshalds_w, "__builtin_sh_media_MSHALDS_W", SH_BLTIN_SH_HI },
9224 { CODE_FOR_ashrv2si3, "__builtin_ashrv2si3", SH_BLTIN_SH_SI },
9225 { CODE_FOR_ashrv4hi3, "__builtin_ashrv4hi3", SH_BLTIN_SH_HI },
9226 { CODE_FOR_mshards_q, "__builtin_sh_media_MSHARDS_Q", SH_BLTIN_SUS },
9227 { CODE_FOR_mshfhi_b, "__builtin_sh_media_MSHFHI_B", SH_BLTIN_V8QI3 },
9228 { CODE_FOR_mshfhi_l, "__builtin_sh_media_MSHFHI_L", SH_BLTIN_V2SI3 },
9229 { CODE_FOR_mshfhi_w, "__builtin_sh_media_MSHFHI_W", SH_BLTIN_V4HI3 },
9230 { CODE_FOR_mshflo_b, "__builtin_sh_media_MSHFLO_B", SH_BLTIN_V8QI3 },
9231 { CODE_FOR_mshflo_l, "__builtin_sh_media_MSHFLO_L", SH_BLTIN_V2SI3 },
9232 { CODE_FOR_mshflo_w, "__builtin_sh_media_MSHFLO_W", SH_BLTIN_V4HI3 },
9233 { CODE_FOR_ashlv2si3, "__builtin_ashlv2si3", SH_BLTIN_SH_SI },
9234 { CODE_FOR_ashlv4hi3, "__builtin_ashlv4hi3", SH_BLTIN_SH_HI },
9235 { CODE_FOR_lshrv2si3, "__builtin_lshrv2si3", SH_BLTIN_SH_SI },
9236 { CODE_FOR_lshrv4hi3, "__builtin_lshrv4hi3", SH_BLTIN_SH_HI },
9237 { CODE_FOR_subv2si3, "__builtin_subv2si3", SH_BLTIN_V2SI3 },
9238 { CODE_FOR_subv4hi3, "__builtin_subv4hi3", SH_BLTIN_V4HI3 },
9239 { CODE_FOR_sssubv2si3,"__builtin_sssubv2si3", SH_BLTIN_V2SI3 },
9240 { CODE_FOR_ussubv8qi3,"__builtin_ussubv8qi3", SH_BLTIN_V8QI3 },
9241 { CODE_FOR_sssubv4hi3,"__builtin_sssubv4hi3", SH_BLTIN_V4HI3 },
9242 { CODE_FOR_fcosa_s, "__builtin_sh_media_FCOSA_S", SH_BLTIN_SISF },
9243 { CODE_FOR_fsina_s, "__builtin_sh_media_FSINA_S", SH_BLTIN_SISF },
9244 { CODE_FOR_fipr, "__builtin_sh_media_FIPR_S", SH_BLTIN_3 },
9245 { CODE_FOR_ftrv, "__builtin_sh_media_FTRV_S", SH_BLTIN_3 },
9246 { CODE_FOR_mac_media, "__builtin_sh_media_FMAC_S", SH_BLTIN_3 },
9247 { CODE_FOR_sqrtdf2, "__builtin_sh_media_FSQRT_D", SH_BLTIN_2 },
9248 { CODE_FOR_sqrtsf2, "__builtin_sh_media_FSQRT_S", SH_BLTIN_2 },
9249 { CODE_FOR_fsrra_s, "__builtin_sh_media_FSRRA_S", SH_BLTIN_2 },
9250 { CODE_FOR_ldhi_l, "__builtin_sh_media_LDHI_L", SH_BLTIN_LDUA_L },
9251 { CODE_FOR_ldhi_q, "__builtin_sh_media_LDHI_Q", SH_BLTIN_LDUA_Q },
9252 { CODE_FOR_ldlo_l, "__builtin_sh_media_LDLO_L", SH_BLTIN_LDUA_L },
9253 { CODE_FOR_ldlo_q, "__builtin_sh_media_LDLO_Q", SH_BLTIN_LDUA_Q },
9254 { CODE_FOR_sthi_l, "__builtin_sh_media_STHI_L", SH_BLTIN_STUA_L },
9255 { CODE_FOR_sthi_q, "__builtin_sh_media_STHI_Q", SH_BLTIN_STUA_Q },
9256 { CODE_FOR_stlo_l, "__builtin_sh_media_STLO_L", SH_BLTIN_STUA_L },
9257 { CODE_FOR_stlo_q, "__builtin_sh_media_STLO_Q", SH_BLTIN_STUA_Q },
9258 { CODE_FOR_ldhi_l64, "__builtin_sh_media_LDHI_L", SH_BLTIN_LDUA_L64 },
9259 { CODE_FOR_ldhi_q64, "__builtin_sh_media_LDHI_Q", SH_BLTIN_LDUA_Q64 },
9260 { CODE_FOR_ldlo_l64, "__builtin_sh_media_LDLO_L", SH_BLTIN_LDUA_L64 },
9261 { CODE_FOR_ldlo_q64, "__builtin_sh_media_LDLO_Q", SH_BLTIN_LDUA_Q64 },
9262 { CODE_FOR_sthi_l64, "__builtin_sh_media_STHI_L", SH_BLTIN_STUA_L64 },
9263 { CODE_FOR_sthi_q64, "__builtin_sh_media_STHI_Q", SH_BLTIN_STUA_Q64 },
9264 { CODE_FOR_stlo_l64, "__builtin_sh_media_STLO_L", SH_BLTIN_STUA_L64 },
9265 { CODE_FOR_stlo_q64, "__builtin_sh_media_STLO_Q", SH_BLTIN_STUA_Q64 },
9266 { CODE_FOR_nsb, "__builtin_sh_media_NSB", SH_BLTIN_SU },
9267 { CODE_FOR_byterev, "__builtin_sh_media_BYTEREV", SH_BLTIN_2 },
9268 { CODE_FOR_prefetch, "__builtin_sh_media_PREFO", SH_BLTIN_PSSV },
9271 static void
9272 sh_media_init_builtins (void)
9274 tree shared[SH_BLTIN_NUM_SHARED_SIGNATURES];
9275 const struct builtin_description *d;
9277 memset (shared, 0, sizeof shared);
9278 for (d = bdesc; d - bdesc < (int) ARRAY_SIZE (bdesc); d++)
9280 tree type, arg_type = 0;
9281 int signature = d->signature;
9282 int i;
9284 if (signature < SH_BLTIN_NUM_SHARED_SIGNATURES && shared[signature])
9285 type = shared[signature];
9286 else
9288 int has_result = signature_args[signature][0] != 0;
9290 if ((signature_args[signature][1] & 8)
9291 && (((signature_args[signature][1] & 1) && TARGET_SHMEDIA32)
9292 || ((signature_args[signature][1] & 2) && TARGET_SHMEDIA64)))
9293 continue;
9294 if (! TARGET_FPU_ANY
9295 && FLOAT_MODE_P (insn_data[d->icode].operand[0].mode))
9296 continue;
9297 type = void_list_node;
9298 for (i = 3; ; i--)
9300 int arg = signature_args[signature][i];
9301 int opno = i - 1 + has_result;
9303 if (arg & 8)
9304 arg_type = ptr_type_node;
9305 else if (arg)
9306 arg_type = (*lang_hooks.types.type_for_mode)
9307 (insn_data[d->icode].operand[opno].mode,
9308 (arg & 1));
9309 else if (i)
9310 continue;
9311 else
9312 arg_type = void_type_node;
9313 if (i == 0)
9314 break;
9315 type = tree_cons (NULL_TREE, arg_type, type);
9317 type = build_function_type (arg_type, type);
9318 if (signature < SH_BLTIN_NUM_SHARED_SIGNATURES)
9319 shared[signature] = type;
9321 lang_hooks.builtin_function (d->name, type, d - bdesc, BUILT_IN_MD,
9322 NULL, NULL_TREE);
9326 /* Implements target hook vector_mode_supported_p. */
9327 bool
9328 sh_vector_mode_supported_p (enum machine_mode mode)
9330 if (TARGET_FPU_ANY
9331 && ((mode == V2SFmode)
9332 || (mode == V4SFmode)
9333 || (mode == V16SFmode)))
9334 return true;
9336 else if (TARGET_SHMEDIA
9337 && ((mode == V8QImode)
9338 || (mode == V2HImode)
9339 || (mode == V4HImode)
9340 || (mode == V2SImode)))
9341 return true;
9343 return false;
9346 /* Implements target hook dwarf_calling_convention. Return an enum
9347 of dwarf_calling_convention. */
9349 sh_dwarf_calling_convention (tree func)
9351 if (sh_attr_renesas_p (func))
9352 return DW_CC_GNU_renesas_sh;
9354 return DW_CC_normal;
9357 static void
9358 sh_init_builtins (void)
9360 if (TARGET_SHMEDIA)
9361 sh_media_init_builtins ();
9364 /* Expand an expression EXP that calls a built-in function,
9365 with result going to TARGET if that's convenient
9366 (and in mode MODE if that's convenient).
9367 SUBTARGET may be used as the target for computing one of EXP's operands.
9368 IGNORE is nonzero if the value is to be ignored. */
9370 static rtx
9371 sh_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
9372 enum machine_mode mode ATTRIBUTE_UNUSED, int ignore)
9374 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
9375 tree arglist = TREE_OPERAND (exp, 1);
9376 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
9377 const struct builtin_description *d = &bdesc[fcode];
9378 enum insn_code icode = d->icode;
9379 int signature = d->signature;
9380 enum machine_mode tmode = VOIDmode;
9381 int nop = 0, i;
9382 rtx op[4];
9383 rtx pat = 0;
9385 if (signature_args[signature][0])
9387 if (ignore)
9388 return 0;
9390 tmode = insn_data[icode].operand[0].mode;
9391 if (! target
9392 || GET_MODE (target) != tmode
9393 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
9394 target = gen_reg_rtx (tmode);
9395 op[nop++] = target;
9397 else
9398 target = 0;
9400 for (i = 1; i <= 3; i++, nop++)
9402 tree arg;
9403 enum machine_mode opmode, argmode;
9404 tree optype;
9406 if (! signature_args[signature][i])
9407 break;
9408 arg = TREE_VALUE (arglist);
9409 if (arg == error_mark_node)
9410 return const0_rtx;
9411 arglist = TREE_CHAIN (arglist);
9412 if (signature_args[signature][i] & 8)
9414 opmode = ptr_mode;
9415 optype = ptr_type_node;
9417 else
9419 opmode = insn_data[icode].operand[nop].mode;
9420 optype = (*lang_hooks.types.type_for_mode) (opmode, 0);
9422 argmode = TYPE_MODE (TREE_TYPE (arg));
9423 if (argmode != opmode)
9424 arg = build1 (NOP_EXPR, optype, arg);
9425 op[nop] = expand_expr (arg, NULL_RTX, opmode, 0);
9426 if (! (*insn_data[icode].operand[nop].predicate) (op[nop], opmode))
9427 op[nop] = copy_to_mode_reg (opmode, op[nop]);
9430 switch (nop)
9432 case 1:
9433 pat = (*insn_data[d->icode].genfun) (op[0]);
9434 break;
9435 case 2:
9436 pat = (*insn_data[d->icode].genfun) (op[0], op[1]);
9437 break;
9438 case 3:
9439 pat = (*insn_data[d->icode].genfun) (op[0], op[1], op[2]);
9440 break;
9441 case 4:
9442 pat = (*insn_data[d->icode].genfun) (op[0], op[1], op[2], op[3]);
9443 break;
9444 default:
9445 gcc_unreachable ();
9447 if (! pat)
9448 return 0;
9449 emit_insn (pat);
9450 return target;
9453 void
9454 sh_expand_unop_v2sf (enum rtx_code code, rtx op0, rtx op1)
9456 rtx sel0 = const0_rtx;
9457 rtx sel1 = const1_rtx;
9458 rtx (*fn) (rtx, rtx, rtx, rtx, rtx) = gen_unary_sf_op;
9459 rtx op = gen_rtx_fmt_e (code, SFmode, op1);
9461 emit_insn ((*fn) (op0, op1, op, sel0, sel0));
9462 emit_insn ((*fn) (op0, op1, op, sel1, sel1));
9465 void
9466 sh_expand_binop_v2sf (enum rtx_code code, rtx op0, rtx op1, rtx op2)
9468 rtx sel0 = const0_rtx;
9469 rtx sel1 = const1_rtx;
9470 rtx (*fn) (rtx, rtx, rtx, rtx, rtx, rtx, rtx, rtx)
9471 = gen_binary_sf_op;
9472 rtx op = gen_rtx_fmt_ee (code, SFmode, op1, op2);
9474 emit_insn ((*fn) (op0, op1, op2, op, sel0, sel0, sel0, sel1));
9475 emit_insn ((*fn) (op0, op1, op2, op, sel1, sel1, sel1, sel0));
9478 /* Return the class of registers for which a mode change from FROM to TO
9479 is invalid. */
9480 bool
9481 sh_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
9482 enum reg_class class)
9484 /* We want to enable the use of SUBREGs as a means to
9485 VEC_SELECT a single element of a vector. */
9486 if (to == SFmode && VECTOR_MODE_P (from) && GET_MODE_INNER (from) == SFmode)
9487 return (reg_classes_intersect_p (GENERAL_REGS, class));
9489 if (GET_MODE_SIZE (from) != GET_MODE_SIZE (to))
9491 if (TARGET_LITTLE_ENDIAN)
9493 if (GET_MODE_SIZE (to) < 8 || GET_MODE_SIZE (from) < 8)
9494 return reg_classes_intersect_p (DF_REGS, class);
9496 else
9498 if (GET_MODE_SIZE (from) < 8)
9499 return reg_classes_intersect_p (DF_HI_REGS, class);
9502 return 0;
9506 /* If ADDRESS refers to a CODE_LABEL, add NUSES to the number of times
9507 that label is used. */
9509 void
9510 sh_mark_label (rtx address, int nuses)
9512 if (GOTOFF_P (address))
9514 /* Extract the label or symbol. */
9515 address = XEXP (address, 0);
9516 if (GET_CODE (address) == PLUS)
9517 address = XEXP (address, 0);
9518 address = XVECEXP (address, 0, 0);
9520 if (GET_CODE (address) == LABEL_REF
9521 && GET_CODE (XEXP (address, 0)) == CODE_LABEL)
9522 LABEL_NUSES (XEXP (address, 0)) += nuses;
9525 /* Compute extra cost of moving data between one register class
9526 and another. */
9528 /* If SECONDARY*_RELOAD_CLASS says something about the src/dst pair, regclass
9529 uses this information. Hence, the general register <-> floating point
9530 register information here is not used for SFmode. */
9533 sh_register_move_cost (enum machine_mode mode,
9534 enum reg_class srcclass, enum reg_class dstclass)
9536 if (dstclass == T_REGS || dstclass == PR_REGS)
9537 return 10;
9539 if (dstclass == MAC_REGS && srcclass == MAC_REGS)
9540 return 4;
9542 if (mode == SImode && ! TARGET_SHMEDIA && TARGET_FMOVD
9543 && REGCLASS_HAS_FP_REG (srcclass)
9544 && REGCLASS_HAS_FP_REG (dstclass))
9545 return 4;
9547 if (REGCLASS_HAS_FP_REG (dstclass) && srcclass == T_REGS)
9548 return ((TARGET_HARD_SH4 && !optimize_size) ? 10 : 7);
9550 if ((REGCLASS_HAS_FP_REG (dstclass) && srcclass == MAC_REGS)
9551 || (dstclass == MAC_REGS && REGCLASS_HAS_FP_REG (srcclass)))
9552 return 9;
9554 if ((REGCLASS_HAS_FP_REG (dstclass)
9555 && REGCLASS_HAS_GENERAL_REG (srcclass))
9556 || (REGCLASS_HAS_GENERAL_REG (dstclass)
9557 && REGCLASS_HAS_FP_REG (srcclass)))
9558 return ((TARGET_SHMEDIA ? 4 : TARGET_FMOVD ? 8 : 12)
9559 * ((GET_MODE_SIZE (mode) + 7) / 8U));
9561 if ((dstclass == FPUL_REGS
9562 && REGCLASS_HAS_GENERAL_REG (srcclass))
9563 || (srcclass == FPUL_REGS
9564 && REGCLASS_HAS_GENERAL_REG (dstclass)))
9565 return 5;
9567 if ((dstclass == FPUL_REGS
9568 && (srcclass == PR_REGS || srcclass == MAC_REGS || srcclass == T_REGS))
9569 || (srcclass == FPUL_REGS
9570 && (dstclass == PR_REGS || dstclass == MAC_REGS)))
9571 return 7;
9573 if ((srcclass == TARGET_REGS && ! REGCLASS_HAS_GENERAL_REG (dstclass))
9574 || ((dstclass) == TARGET_REGS && ! REGCLASS_HAS_GENERAL_REG (srcclass)))
9575 return 20;
9577 /* ??? ptabs faults on (value & 0x3) == 0x3 */
9578 if (TARGET_SHMEDIA
9579 && ((srcclass) == TARGET_REGS || (srcclass) == SIBCALL_REGS))
9581 if (sh_gettrcost >= 0)
9582 return sh_gettrcost;
9583 else if (!TARGET_PT_FIXED)
9584 return 100;
9587 if ((srcclass == FPSCR_REGS && ! REGCLASS_HAS_GENERAL_REG (dstclass))
9588 || (dstclass == FPSCR_REGS && ! REGCLASS_HAS_GENERAL_REG (srcclass)))
9589 return 4;
9591 if (TARGET_SHMEDIA
9592 || (TARGET_FMOVD
9593 && ! REGCLASS_HAS_GENERAL_REG (srcclass)
9594 && ! REGCLASS_HAS_GENERAL_REG (dstclass)))
9595 return 2 * ((GET_MODE_SIZE (mode) + 7) / 8U);
9597 return 2 * ((GET_MODE_SIZE (mode) + 3) / 4U);
9600 static rtx emit_load_ptr (rtx, rtx);
9602 static rtx
9603 emit_load_ptr (rtx reg, rtx addr)
9605 rtx mem = gen_const_mem (ptr_mode, addr);
9607 if (Pmode != ptr_mode)
9608 mem = gen_rtx_SIGN_EXTEND (Pmode, mem);
9609 return emit_move_insn (reg, mem);
9612 static void
9613 sh_output_mi_thunk (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED,
9614 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
9615 tree function)
9617 CUMULATIVE_ARGS cum;
9618 int structure_value_byref = 0;
9619 rtx this, this_value, sibcall, insns, funexp;
9620 tree funtype = TREE_TYPE (function);
9621 int simple_add = CONST_OK_FOR_ADD (delta);
9622 int did_load = 0;
9623 rtx scratch0, scratch1, scratch2;
9624 unsigned i;
9626 reload_completed = 1;
9627 epilogue_completed = 1;
9628 no_new_pseudos = 1;
9629 current_function_uses_only_leaf_regs = 1;
9630 reset_block_changes ();
9632 emit_note (NOTE_INSN_PROLOGUE_END);
9634 /* Find the "this" pointer. We have such a wide range of ABIs for the
9635 SH that it's best to do this completely machine independently.
9636 "this" is passed as first argument, unless a structure return pointer
9637 comes first, in which case "this" comes second. */
9638 INIT_CUMULATIVE_ARGS (cum, funtype, NULL_RTX, 0, 1);
9639 #ifndef PCC_STATIC_STRUCT_RETURN
9640 if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
9641 structure_value_byref = 1;
9642 #endif /* not PCC_STATIC_STRUCT_RETURN */
9643 if (structure_value_byref && sh_struct_value_rtx (function, 0) == 0)
9645 tree ptype = build_pointer_type (TREE_TYPE (funtype));
9647 FUNCTION_ARG_ADVANCE (cum, Pmode, ptype, 1);
9649 this = FUNCTION_ARG (cum, Pmode, ptr_type_node, 1);
9651 /* For SHcompact, we only have r0 for a scratch register: r1 is the
9652 static chain pointer (even if you can't have nested virtual functions
9653 right now, someone might implement them sometime), and the rest of the
9654 registers are used for argument passing, are callee-saved, or reserved. */
9655 /* We need to check call_used_regs / fixed_regs in case -fcall_saved-reg /
9656 -ffixed-reg has been used. */
9657 if (! call_used_regs[0] || fixed_regs[0])
9658 error ("r0 needs to be available as a call-clobbered register");
9659 scratch0 = scratch1 = scratch2 = gen_rtx_REG (Pmode, 0);
9660 if (! TARGET_SH5)
9662 if (call_used_regs[1] && ! fixed_regs[1])
9663 scratch1 = gen_rtx_REG (ptr_mode, 1);
9664 /* N.B., if not TARGET_HITACHI, register 2 is used to pass the pointer
9665 pointing where to return struct values. */
9666 if (call_used_regs[3] && ! fixed_regs[3])
9667 scratch2 = gen_rtx_REG (Pmode, 3);
9669 else if (TARGET_SHMEDIA)
9671 for (i = FIRST_GENERAL_REG; i <= LAST_GENERAL_REG; i++)
9672 if (i != REGNO (scratch0) &&
9673 call_used_regs[i] && ! fixed_regs[i] && ! FUNCTION_ARG_REGNO_P (i))
9675 scratch1 = gen_rtx_REG (ptr_mode, i);
9676 break;
9678 if (scratch1 == scratch0)
9679 error ("Need a second call-clobbered general purpose register");
9680 for (i = FIRST_TARGET_REG; i <= LAST_TARGET_REG; i++)
9681 if (call_used_regs[i] && ! fixed_regs[i])
9683 scratch2 = gen_rtx_REG (Pmode, i);
9684 break;
9686 if (scratch2 == scratch0)
9687 error ("Need a call-clobbered target register");
9690 this_value = plus_constant (this, delta);
9691 if (vcall_offset
9692 && (simple_add || scratch0 != scratch1)
9693 && strict_memory_address_p (ptr_mode, this_value))
9695 emit_load_ptr (scratch0, this_value);
9696 did_load = 1;
9699 if (!delta)
9700 ; /* Do nothing. */
9701 else if (simple_add)
9702 emit_move_insn (this, this_value);
9703 else
9705 emit_move_insn (scratch1, GEN_INT (delta));
9706 emit_insn (gen_add2_insn (this, scratch1));
9709 if (vcall_offset)
9711 rtx offset_addr;
9713 if (!did_load)
9714 emit_load_ptr (scratch0, this);
9716 offset_addr = plus_constant (scratch0, vcall_offset);
9717 if (strict_memory_address_p (ptr_mode, offset_addr))
9718 ; /* Do nothing. */
9719 else if (! TARGET_SH5 && scratch0 != scratch1)
9721 /* scratch0 != scratch1, and we have indexed loads. Get better
9722 schedule by loading the offset into r1 and using an indexed
9723 load - then the load of r1 can issue before the load from
9724 (this + delta) finishes. */
9725 emit_move_insn (scratch1, GEN_INT (vcall_offset));
9726 offset_addr = gen_rtx_PLUS (Pmode, scratch0, scratch1);
9728 else if (CONST_OK_FOR_ADD (vcall_offset))
9730 emit_insn (gen_add2_insn (scratch0, GEN_INT (vcall_offset)));
9731 offset_addr = scratch0;
9733 else if (scratch0 != scratch1)
9735 emit_move_insn (scratch1, GEN_INT (vcall_offset));
9736 emit_insn (gen_add2_insn (scratch0, scratch1));
9737 offset_addr = scratch0;
9739 else
9740 gcc_unreachable (); /* FIXME */
9741 emit_load_ptr (scratch0, offset_addr);
9743 if (Pmode != ptr_mode)
9744 scratch0 = gen_rtx_TRUNCATE (ptr_mode, scratch0);
9745 emit_insn (gen_add2_insn (this, scratch0));
9748 /* Generate a tail call to the target function. */
9749 if (! TREE_USED (function))
9751 assemble_external (function);
9752 TREE_USED (function) = 1;
9754 funexp = XEXP (DECL_RTL (function), 0);
9755 /* If the function is overridden, so is the thunk, hence we don't
9756 need GOT addressing even if this is a public symbol. */
9757 #if 0
9758 if (TARGET_SH1 && ! flag_weak)
9759 sibcall = gen_sibcalli_thunk (funexp, const0_rtx);
9760 else
9761 #endif
9762 if (TARGET_SH2 && flag_pic)
9764 sibcall = gen_sibcall_pcrel (funexp, const0_rtx);
9765 XEXP (XVECEXP (sibcall, 0, 2), 0) = scratch2;
9767 else
9769 if (TARGET_SHMEDIA && flag_pic)
9771 funexp = gen_sym2PIC (funexp);
9772 PUT_MODE (funexp, Pmode);
9774 emit_move_insn (scratch2, funexp);
9775 funexp = gen_rtx_MEM (FUNCTION_MODE, scratch2);
9776 sibcall = gen_sibcall (funexp, const0_rtx, NULL_RTX);
9778 sibcall = emit_call_insn (sibcall);
9779 SIBLING_CALL_P (sibcall) = 1;
9780 use_reg (&CALL_INSN_FUNCTION_USAGE (sibcall), this);
9781 emit_barrier ();
9783 /* Run just enough of rest_of_compilation to do scheduling and get
9784 the insns emitted. Note that use_thunk calls
9785 assemble_start_function and assemble_end_function. */
9787 insn_locators_initialize ();
9788 insns = get_insns ();
9790 if (optimize > 0)
9792 /* Initialize the bitmap obstacks. */
9793 bitmap_obstack_initialize (NULL);
9794 bitmap_obstack_initialize (&reg_obstack);
9795 if (! cfun->cfg)
9796 init_flow ();
9797 rtl_register_cfg_hooks ();
9798 init_rtl_bb_info (ENTRY_BLOCK_PTR);
9799 init_rtl_bb_info (EXIT_BLOCK_PTR);
9800 ENTRY_BLOCK_PTR->flags |= BB_RTL;
9801 EXIT_BLOCK_PTR->flags |= BB_RTL;
9802 find_basic_blocks (insns);
9804 if (flag_schedule_insns_after_reload)
9806 life_analysis (dump_file, PROP_FINAL);
9808 split_all_insns (1);
9810 schedule_insns (dump_file);
9812 /* We must split jmp insn in PIC case. */
9813 else if (flag_pic)
9814 split_all_insns_noflow ();
9817 sh_reorg ();
9819 if (optimize > 0 && flag_delayed_branch)
9820 dbr_schedule (insns, dump_file);
9822 shorten_branches (insns);
9823 final_start_function (insns, file, 1);
9824 final (insns, file, 1);
9825 final_end_function ();
9827 if (optimize > 0)
9829 /* Release all memory allocated by flow. */
9830 free_basic_block_vars ();
9832 /* Release the bitmap obstacks. */
9833 bitmap_obstack_release (&reg_obstack);
9834 bitmap_obstack_release (NULL);
9837 reload_completed = 0;
9838 epilogue_completed = 0;
9839 no_new_pseudos = 0;
9843 function_symbol (rtx target, const char *name, enum sh_function_kind kind)
9845 rtx sym;
9847 /* If this is not an ordinary function, the name usually comes from a
9848 string literal or an sprintf buffer. Make sure we use the same
9849 string consistently, so that cse will be able to unify address loads. */
9850 if (kind != FUNCTION_ORDINARY)
9851 name = IDENTIFIER_POINTER (get_identifier (name));
9852 sym = gen_rtx_SYMBOL_REF (Pmode, name);
9853 SYMBOL_REF_FLAGS (sym) = SYMBOL_FLAG_FUNCTION;
9854 if (flag_pic)
9855 switch (kind)
9857 case FUNCTION_ORDINARY:
9858 break;
9859 case SFUNC_GOT:
9861 rtx reg = target ? target : gen_reg_rtx (Pmode);
9863 emit_insn (gen_symGOT2reg (reg, sym));
9864 sym = reg;
9865 break;
9867 case SFUNC_STATIC:
9869 /* ??? To allow cse to work, we use GOTOFF relocations.
9870 we could add combiner patterns to transform this into
9871 straight pc-relative calls with sym2PIC / bsrf when
9872 label load and function call are still 1:1 and in the
9873 same basic block during combine. */
9874 rtx reg = target ? target : gen_reg_rtx (Pmode);
9876 emit_insn (gen_symGOTOFF2reg (reg, sym));
9877 sym = reg;
9878 break;
9881 if (target && sym != target)
9883 emit_move_insn (target, sym);
9884 return target;
9886 return sym;
9889 /* Find the number of a general purpose register in S. */
9890 static int
9891 scavenge_reg (HARD_REG_SET *s)
9893 int r;
9894 for (r = FIRST_GENERAL_REG; r <= LAST_GENERAL_REG; r++)
9895 if (TEST_HARD_REG_BIT (*s, r))
9896 return r;
9897 return -1;
9901 sh_get_pr_initial_val (void)
9903 rtx val;
9905 /* ??? Unfortunately, get_hard_reg_initial_val doesn't always work for the
9906 PR register on SHcompact, because it might be clobbered by the prologue.
9907 We check first if that is known to be the case. */
9908 if (TARGET_SHCOMPACT
9909 && ((current_function_args_info.call_cookie
9910 & ~ CALL_COOKIE_RET_TRAMP (1))
9911 || current_function_has_nonlocal_label))
9912 return gen_frame_mem (SImode, return_address_pointer_rtx);
9914 /* If we haven't finished rtl generation, there might be a nonlocal label
9915 that we haven't seen yet.
9916 ??? get_hard_reg_initial_val fails if it is called while no_new_pseudos
9917 is set, unless it has been called before for the same register. And even
9918 then, we end in trouble if we didn't use the register in the same
9919 basic block before. So call get_hard_reg_initial_val now and wrap it
9920 in an unspec if we might need to replace it. */
9921 /* ??? We also must do this for TARGET_SH1 in general, because otherwise
9922 combine can put the pseudo returned by get_hard_reg_initial_val into
9923 instructions that need a general purpose registers, which will fail to
9924 be recognized when the pseudo becomes allocated to PR. */
9926 = get_hard_reg_initial_val (Pmode, TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG);
9927 if (TARGET_SH1)
9928 return gen_rtx_UNSPEC (SImode, gen_rtvec (1, val), UNSPEC_RA);
9929 return val;
9933 sh_expand_t_scc (enum rtx_code code, rtx target)
9935 rtx result = target;
9936 HOST_WIDE_INT val;
9938 if (GET_CODE (sh_compare_op0) != REG || REGNO (sh_compare_op0) != T_REG
9939 || GET_CODE (sh_compare_op1) != CONST_INT)
9940 return 0;
9941 if (GET_CODE (result) != REG)
9942 result = gen_reg_rtx (SImode);
9943 val = INTVAL (sh_compare_op1);
9944 if ((code == EQ && val == 1) || (code == NE && val == 0))
9945 emit_insn (gen_movt (result));
9946 else if ((code == EQ && val == 0) || (code == NE && val == 1))
9948 emit_insn (gen_rtx_CLOBBER (VOIDmode, result));
9949 emit_insn (gen_subc (result, result, result));
9950 emit_insn (gen_addsi3 (result, result, const1_rtx));
9952 else if (code == EQ || code == NE)
9953 emit_insn (gen_move_insn (result, GEN_INT (code == NE)));
9954 else
9955 return 0;
9956 if (result != target)
9957 emit_move_insn (target, result);
9958 return 1;
9961 /* INSN is an sfunc; return the rtx that describes the address used. */
9962 static rtx
9963 extract_sfunc_addr (rtx insn)
9965 rtx pattern, part = NULL_RTX;
9966 int len, i;
9968 pattern = PATTERN (insn);
9969 len = XVECLEN (pattern, 0);
9970 for (i = 0; i < len; i++)
9972 part = XVECEXP (pattern, 0, i);
9973 if (GET_CODE (part) == USE && GET_MODE (XEXP (part, 0)) == Pmode
9974 && GENERAL_REGISTER_P (true_regnum (XEXP (part, 0))))
9975 return XEXP (part, 0);
9977 gcc_assert (GET_CODE (XVECEXP (pattern, 0, 0)) == UNSPEC_VOLATILE);
9978 return XVECEXP (XVECEXP (pattern, 0, 0), 0, 1);
9981 /* Verify that the register in use_sfunc_addr still agrees with the address
9982 used in the sfunc. This prevents fill_slots_from_thread from changing
9983 use_sfunc_addr.
9984 INSN is the use_sfunc_addr instruction, and REG is the register it
9985 guards. */
9987 check_use_sfunc_addr (rtx insn, rtx reg)
9989 /* Search for the sfunc. It should really come right after INSN. */
9990 while ((insn = NEXT_INSN (insn)))
9992 if (GET_CODE (insn) == CODE_LABEL || GET_CODE (insn) == JUMP_INSN)
9993 break;
9994 if (! INSN_P (insn))
9995 continue;
9997 if (GET_CODE (PATTERN (insn)) == SEQUENCE)
9998 insn = XVECEXP (PATTERN (insn), 0, 0);
9999 if (GET_CODE (PATTERN (insn)) != PARALLEL
10000 || get_attr_type (insn) != TYPE_SFUNC)
10001 continue;
10002 return rtx_equal_p (extract_sfunc_addr (insn), reg);
10004 gcc_unreachable ();
10007 /* This function returns a constant rtx that represents pi / 2**15 in
10008 SFmode. it's used to scale SFmode angles, in radians, to a
10009 fixed-point signed 16.16-bit fraction of a full circle, i.e., 2*pi
10010 maps to 0x10000). */
10012 static GTY(()) rtx sh_fsca_sf2int_rtx;
10015 sh_fsca_sf2int (void)
10017 if (! sh_fsca_sf2int_rtx)
10019 REAL_VALUE_TYPE rv;
10021 real_from_string (&rv, "10430.378350470453");
10022 sh_fsca_sf2int_rtx = const_double_from_real_value (rv, SFmode);
10025 return sh_fsca_sf2int_rtx;
10028 /* This function returns a constant rtx that represents pi / 2**15 in
10029 DFmode. it's used to scale DFmode angles, in radians, to a
10030 fixed-point signed 16.16-bit fraction of a full circle, i.e., 2*pi
10031 maps to 0x10000). */
10033 static GTY(()) rtx sh_fsca_df2int_rtx;
10036 sh_fsca_df2int (void)
10038 if (! sh_fsca_df2int_rtx)
10040 REAL_VALUE_TYPE rv;
10042 real_from_string (&rv, "10430.378350470453");
10043 sh_fsca_df2int_rtx = const_double_from_real_value (rv, DFmode);
10046 return sh_fsca_df2int_rtx;
10049 /* This function returns a constant rtx that represents 2**15 / pi in
10050 SFmode. it's used to scale a fixed-point signed 16.16-bit fraction
10051 of a full circle back to a SFmode value, i.e., 0x10000 maps to
10052 2*pi). */
10054 static GTY(()) rtx sh_fsca_int2sf_rtx;
10057 sh_fsca_int2sf (void)
10059 if (! sh_fsca_int2sf_rtx)
10061 REAL_VALUE_TYPE rv;
10063 real_from_string (&rv, "9.587379924285257e-5");
10064 sh_fsca_int2sf_rtx = const_double_from_real_value (rv, SFmode);
10067 return sh_fsca_int2sf_rtx;
10070 /* Initialize the CUMULATIVE_ARGS structure. */
10072 void
10073 sh_init_cumulative_args (CUMULATIVE_ARGS * pcum,
10074 tree fntype,
10075 rtx libname ATTRIBUTE_UNUSED,
10076 tree fndecl,
10077 signed int n_named_args,
10078 enum machine_mode mode)
10080 pcum->arg_count [(int) SH_ARG_FLOAT] = 0;
10081 pcum->free_single_fp_reg = 0;
10082 pcum->stack_regs = 0;
10083 pcum->byref_regs = 0;
10084 pcum->byref = 0;
10085 pcum->outgoing = (n_named_args == -1) ? 0 : 1;
10087 /* XXX - Should we check TARGET_HITACHI here ??? */
10088 pcum->renesas_abi = sh_attr_renesas_p (fntype) ? 1 : 0;
10090 if (fntype)
10092 pcum->force_mem = ((TARGET_HITACHI || pcum->renesas_abi)
10093 && aggregate_value_p (TREE_TYPE (fntype), fndecl));
10094 pcum->prototype_p = TYPE_ARG_TYPES (fntype) ? TRUE : FALSE;
10095 pcum->arg_count [(int) SH_ARG_INT]
10096 = TARGET_SH5 && aggregate_value_p (TREE_TYPE (fntype), fndecl);
10098 pcum->call_cookie
10099 = CALL_COOKIE_RET_TRAMP (TARGET_SHCOMPACT
10100 && pcum->arg_count [(int) SH_ARG_INT] == 0
10101 && (TYPE_MODE (TREE_TYPE (fntype)) == BLKmode
10102 ? int_size_in_bytes (TREE_TYPE (fntype))
10103 : GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (fntype)))) > 4
10104 && (BASE_RETURN_VALUE_REG (TYPE_MODE (TREE_TYPE (fntype)))
10105 == FIRST_RET_REG));
10107 else
10109 pcum->arg_count [(int) SH_ARG_INT] = 0;
10110 pcum->prototype_p = FALSE;
10111 if (mode != VOIDmode)
10113 pcum->call_cookie =
10114 CALL_COOKIE_RET_TRAMP (TARGET_SHCOMPACT
10115 && GET_MODE_SIZE (mode) > 4
10116 && BASE_RETURN_VALUE_REG (mode) == FIRST_RET_REG);
10118 /* If the default ABI is the Renesas ABI then all library
10119 calls must assume that the library will be using the
10120 Renesas ABI. So if the function would return its result
10121 in memory then we must force the address of this memory
10122 block onto the stack. Ideally we would like to call
10123 targetm.calls.return_in_memory() here but we do not have
10124 the TYPE or the FNDECL available so we synthesize the
10125 contents of that function as best we can. */
10126 pcum->force_mem =
10127 (TARGET_DEFAULT & MASK_HITACHI)
10128 && (mode == BLKmode
10129 || (GET_MODE_SIZE (mode) > 4
10130 && !(mode == DFmode
10131 && TARGET_FPU_DOUBLE)));
10133 else
10135 pcum->call_cookie = 0;
10136 pcum->force_mem = FALSE;
10141 /* Determine if two hard register sets intersect.
10142 Return 1 if they do. */
10144 static int
10145 hard_regs_intersect_p (HARD_REG_SET *a, HARD_REG_SET *b)
10147 HARD_REG_SET c;
10148 COPY_HARD_REG_SET (c, *a);
10149 AND_HARD_REG_SET (c, *b);
10150 GO_IF_HARD_REG_SUBSET (c, reg_class_contents[(int) NO_REGS], lose);
10151 return 1;
10152 lose:
10153 return 0;
10156 #ifdef TARGET_ADJUST_UNROLL_MAX
10157 static int
10158 sh_adjust_unroll_max (struct loop * loop, int insn_count,
10159 int max_unrolled_insns, int strength_reduce_p,
10160 int unroll_type)
10162 /* This doesn't work in 4.0 because the old unroller & loop.h is gone. */
10163 if (TARGET_ADJUST_UNROLL && TARGET_SHMEDIA)
10165 /* Throttle back loop unrolling so that the costs of using more
10166 targets than the eight target register we have don't outweigh
10167 the benefits of unrolling. */
10168 rtx insn;
10169 int n_labels = 0, n_calls = 0, n_exit_dest = 0, n_inner_loops = -1;
10170 int n_barriers = 0;
10171 rtx dest;
10172 int i;
10173 rtx exit_dest[8];
10174 int threshold;
10175 int unroll_benefit = 0, mem_latency = 0;
10176 int base_cost, best_cost, cost;
10177 int factor, best_factor;
10178 int n_dest;
10179 unsigned max_iterations = 32767;
10180 int n_iterations;
10181 int need_precond = 0, precond = 0;
10182 basic_block * bbs = get_loop_body (loop);
10183 struct niter_desc *desc;
10185 /* Assume that all labels inside the loop are used from inside the
10186 loop. If the loop has multiple entry points, it is unlikely to
10187 be unrolled anyways.
10188 Also assume that all calls are to different functions. That is
10189 somewhat pessimistic, but if you have lots of calls, unrolling the
10190 loop is not likely to gain you much in the first place. */
10191 i = loop->num_nodes - 1;
10192 for (insn = BB_HEAD (bbs[i]); ; )
10194 if (GET_CODE (insn) == CODE_LABEL)
10195 n_labels++;
10196 else if (GET_CODE (insn) == CALL_INSN)
10197 n_calls++;
10198 else if (GET_CODE (insn) == NOTE
10199 && NOTE_LINE_NUMBER (insn) == NOTE_INSN_LOOP_BEG)
10200 n_inner_loops++;
10201 else if (GET_CODE (insn) == BARRIER)
10202 n_barriers++;
10203 if (insn != BB_END (bbs[i]))
10204 insn = NEXT_INSN (insn);
10205 else if (--i >= 0)
10206 insn = BB_HEAD (bbs[i]);
10207 else
10208 break;
10210 free (bbs);
10211 /* One label for the loop top is normal, and it won't be duplicated by
10212 unrolling. */
10213 if (n_labels <= 1)
10214 return max_unrolled_insns;
10215 if (n_inner_loops > 0)
10216 return 0;
10217 for (dest = loop->exit_labels; dest && n_exit_dest < 8;
10218 dest = LABEL_NEXTREF (dest))
10220 for (i = n_exit_dest - 1;
10221 i >= 0 && XEXP (dest, 0) != XEXP (exit_dest[i], 0); i--);
10222 if (i < 0)
10223 exit_dest[n_exit_dest++] = dest;
10225 /* If the loop top and call and exit destinations are enough to fill up
10226 the target registers, we're unlikely to do any more damage by
10227 unrolling. */
10228 if (n_calls + n_exit_dest >= 7)
10229 return max_unrolled_insns;
10231 /* ??? In the new loop unroller, there is no longer any strength
10232 reduction information available. Thus, when it comes to unrolling,
10233 we know the cost of everything, but we know the value of nothing. */
10234 #if 0
10235 if (strength_reduce_p
10236 && (unroll_type == LPT_UNROLL_RUNTIME
10237 || unroll_type == LPT_UNROLL_CONSTANT
10238 || unroll_type == LPT_PEEL_COMPLETELY))
10240 struct loop_ivs *ivs = LOOP_IVS (loop);
10241 struct iv_class *bl;
10243 /* We'll save one compare-and-branch in each loop body copy
10244 but the last one. */
10245 unroll_benefit = 1;
10246 /* Assess the benefit of removing biv & giv updates. */
10247 for (bl = ivs->list; bl; bl = bl->next)
10249 rtx increment = biv_total_increment (bl);
10250 struct induction *v;
10252 if (increment && GET_CODE (increment) == CONST_INT)
10254 unroll_benefit++;
10255 for (v = bl->giv; v; v = v->next_iv)
10257 if (! v->ignore && v->same == 0
10258 && GET_CODE (v->mult_val) == CONST_INT)
10259 unroll_benefit++;
10260 /* If this giv uses an array, try to determine
10261 a maximum iteration count from the size of the
10262 array. This need not be correct all the time,
10263 but should not be too far off the mark too often. */
10264 while (v->giv_type == DEST_ADDR)
10266 rtx mem = PATTERN (v->insn);
10267 tree mem_expr, type, size_tree;
10269 if (GET_CODE (SET_SRC (mem)) == MEM)
10270 mem = SET_SRC (mem);
10271 else if (GET_CODE (SET_DEST (mem)) == MEM)
10272 mem = SET_DEST (mem);
10273 else
10274 break;
10275 mem_expr = MEM_EXPR (mem);
10276 if (! mem_expr)
10277 break;
10278 type = TREE_TYPE (mem_expr);
10279 if (TREE_CODE (type) != ARRAY_TYPE
10280 || ! TYPE_SIZE (type) || ! TYPE_SIZE_UNIT (type))
10281 break;
10282 size_tree = fold (build (TRUNC_DIV_EXPR,
10283 bitsizetype,
10284 TYPE_SIZE (type),
10285 TYPE_SIZE_UNIT (type)));
10286 if (TREE_CODE (size_tree) == INTEGER_CST
10287 && ! TREE_INT_CST_HIGH (size_tree)
10288 && TREE_INT_CST_LOW (size_tree) < max_iterations)
10289 max_iterations = TREE_INT_CST_LOW (size_tree);
10290 break;
10296 #else /* 0 */
10297 /* Assume there is at least some benefit. */
10298 unroll_benefit = 1;
10299 #endif /* 0 */
10301 desc = get_simple_loop_desc (loop);
10302 n_iterations = desc->const_iter ? desc->niter : 0;
10303 max_iterations
10304 = max_iterations < desc->niter_max ? max_iterations : desc->niter_max;
10306 if (! strength_reduce_p || ! n_iterations)
10307 need_precond = 1;
10308 if (! n_iterations)
10310 n_iterations
10311 = max_iterations < 3 ? max_iterations : max_iterations * 3 / 4;
10312 if (! n_iterations)
10313 return 0;
10315 #if 0 /* ??? See above - missing induction variable information. */
10316 while (unroll_benefit > 1) /* no loop */
10318 /* We include the benefit of biv/ giv updates. Check if some or
10319 all of these updates are likely to fit into a scheduling
10320 bubble of a load.
10321 We check for the following case:
10322 - All the insns leading to the first JUMP_INSN are in a strict
10323 dependency chain.
10324 - there is at least one memory reference in them.
10326 When we find such a pattern, we assume that we can hide as many
10327 updates as the total of the load latency is, if we have an
10328 unroll factor of at least two. We might or might not also do
10329 this without unrolling, so rather than considering this as an
10330 extra unroll benefit, discount it in the unroll benefits of unroll
10331 factors higher than two. */
10333 rtx set, last_set;
10335 insn = next_active_insn (loop->start);
10336 last_set = single_set (insn);
10337 if (! last_set)
10338 break;
10339 if (GET_CODE (SET_SRC (last_set)) == MEM)
10340 mem_latency += 2;
10341 for (insn = NEXT_INSN (insn); insn != end; insn = NEXT_INSN (insn))
10343 if (! INSN_P (insn))
10344 continue;
10345 if (GET_CODE (insn) == JUMP_INSN)
10346 break;
10347 if (! reg_referenced_p (SET_DEST (last_set), PATTERN (insn)))
10349 /* Check if this is a to-be-reduced giv insn. */
10350 struct loop_ivs *ivs = LOOP_IVS (loop);
10351 struct iv_class *bl;
10352 struct induction *v;
10353 for (bl = ivs->list; bl; bl = bl->next)
10355 if (bl->biv->insn == insn)
10356 goto is_biv;
10357 for (v = bl->giv; v; v = v->next_iv)
10358 if (v->insn == insn)
10359 goto is_giv;
10361 mem_latency--;
10362 is_biv:
10363 is_giv:
10364 continue;
10366 set = single_set (insn);
10367 if (! set)
10368 continue;
10369 if (GET_CODE (SET_SRC (set)) == MEM)
10370 mem_latency += 2;
10371 last_set = set;
10373 if (mem_latency < 0)
10374 mem_latency = 0;
10375 else if (mem_latency > unroll_benefit - 1)
10376 mem_latency = unroll_benefit - 1;
10377 break;
10379 #endif /* 0 */
10380 if (n_labels + (unroll_benefit + n_labels * 8) / n_iterations
10381 <= unroll_benefit)
10382 return max_unrolled_insns;
10384 n_dest = n_labels + n_calls + n_exit_dest;
10385 base_cost = n_dest <= 8 ? 0 : n_dest - 7;
10386 best_cost = 0;
10387 best_factor = 1;
10388 if (n_barriers * 2 > n_labels - 1)
10389 n_barriers = (n_labels - 1) / 2;
10390 for (factor = 2; factor <= 8; factor++)
10392 /* Bump up preconditioning cost for each power of two. */
10393 if (! (factor & (factor-1)))
10394 precond += 4;
10395 /* When preconditioning, only powers of two will be considered. */
10396 else if (need_precond)
10397 continue;
10398 n_dest = ((unroll_type != LPT_PEEL_COMPLETELY)
10399 + (n_labels - 1) * factor + n_calls + n_exit_dest
10400 - (n_barriers * factor >> 1)
10401 + need_precond);
10402 cost
10403 = ((n_dest <= 8 ? 0 : n_dest - 7)
10404 - base_cost * factor
10405 - ((factor > 2 ? unroll_benefit - mem_latency : unroll_benefit)
10406 * (factor - (unroll_type != LPT_PEEL_COMPLETELY)))
10407 + ((unroll_benefit + 1 + (n_labels - 1) * factor)
10408 / n_iterations));
10409 if (need_precond)
10410 cost += (precond + unroll_benefit * factor / 2) / n_iterations;
10411 if (cost < best_cost)
10413 best_cost = cost;
10414 best_factor = factor;
10417 threshold = best_factor * insn_count;
10418 if (max_unrolled_insns > threshold)
10419 max_unrolled_insns = threshold;
10421 return max_unrolled_insns;
10423 #endif /* TARGET_ADJUST_UNROLL_MAX */
10425 /* Replace any occurrence of FROM(n) in X with TO(n). The function does
10426 not enter into CONST_DOUBLE for the replace.
10428 Note that copying is not done so X must not be shared unless all copies
10429 are to be modified.
10431 This is like replace_rtx, except that we operate on N_REPLACEMENTS
10432 replacements simultaneously - FROM(n) is replacements[n*2] and to(n) is
10433 replacements[n*2+1] - and that we take mode changes into account.
10435 If a replacement is ambiguous, return NULL_RTX.
10437 If MODIFY is zero, don't modify any rtl in place,
10438 just return zero or nonzero for failure / success. */
10441 replace_n_hard_rtx (rtx x, rtx *replacements, int n_replacements, int modify)
10443 int i, j;
10444 const char *fmt;
10446 /* The following prevents loops occurrence when we change MEM in
10447 CONST_DOUBLE onto the same CONST_DOUBLE. */
10448 if (x != 0 && GET_CODE (x) == CONST_DOUBLE)
10449 return x;
10451 for (i = n_replacements - 1; i >= 0 ; i--)
10452 if (x == replacements[i*2] && GET_MODE (x) == GET_MODE (replacements[i*2+1]))
10453 return replacements[i*2+1];
10455 /* Allow this function to make replacements in EXPR_LISTs. */
10456 if (x == 0)
10457 return 0;
10459 if (GET_CODE (x) == SUBREG)
10461 rtx new = replace_n_hard_rtx (SUBREG_REG (x), replacements,
10462 n_replacements, modify);
10464 if (GET_CODE (new) == CONST_INT)
10466 x = simplify_subreg (GET_MODE (x), new,
10467 GET_MODE (SUBREG_REG (x)),
10468 SUBREG_BYTE (x));
10469 if (! x)
10470 abort ();
10472 else if (modify)
10473 SUBREG_REG (x) = new;
10475 return x;
10477 else if (GET_CODE (x) == REG)
10479 unsigned regno = REGNO (x);
10480 unsigned nregs = (regno < FIRST_PSEUDO_REGISTER
10481 ? HARD_REGNO_NREGS (regno, GET_MODE (x)) : 1);
10482 rtx result = NULL_RTX;
10484 for (i = n_replacements - 1; i >= 0; i--)
10486 rtx from = replacements[i*2];
10487 rtx to = replacements[i*2+1];
10488 unsigned from_regno, from_nregs, to_regno, new_regno;
10490 if (GET_CODE (from) != REG)
10491 continue;
10492 from_regno = REGNO (from);
10493 from_nregs = (from_regno < FIRST_PSEUDO_REGISTER
10494 ? HARD_REGNO_NREGS (from_regno, GET_MODE (from)) : 1);
10495 if (regno < from_regno + from_nregs && regno + nregs > from_regno)
10497 if (regno < from_regno
10498 || regno + nregs > from_regno + nregs
10499 || GET_CODE (to) != REG
10500 || result)
10501 return NULL_RTX;
10502 to_regno = REGNO (to);
10503 if (to_regno < FIRST_PSEUDO_REGISTER)
10505 new_regno = regno + to_regno - from_regno;
10506 if ((unsigned) HARD_REGNO_NREGS (new_regno, GET_MODE (x))
10507 != nregs)
10508 return NULL_RTX;
10509 result = gen_rtx_REG (GET_MODE (x), new_regno);
10511 else if (GET_MODE (x) <= GET_MODE (to))
10512 result = gen_lowpart_common (GET_MODE (x), to);
10513 else
10514 result = gen_lowpart_SUBREG (GET_MODE (x), to);
10517 return result ? result : x;
10519 else if (GET_CODE (x) == ZERO_EXTEND)
10521 rtx new = replace_n_hard_rtx (XEXP (x, 0), replacements,
10522 n_replacements, modify);
10524 if (GET_CODE (new) == CONST_INT)
10526 x = simplify_unary_operation (ZERO_EXTEND, GET_MODE (x),
10527 new, GET_MODE (XEXP (x, 0)));
10528 if (! x)
10529 abort ();
10531 else if (modify)
10532 XEXP (x, 0) = new;
10534 return x;
10537 fmt = GET_RTX_FORMAT (GET_CODE (x));
10538 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
10540 rtx new;
10542 if (fmt[i] == 'e')
10544 new = replace_n_hard_rtx (XEXP (x, i), replacements,
10545 n_replacements, modify);
10546 if (!new)
10547 return NULL_RTX;
10548 if (modify)
10549 XEXP (x, i) = new;
10551 else if (fmt[i] == 'E')
10552 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
10554 new = replace_n_hard_rtx (XVECEXP (x, i, j), replacements,
10555 n_replacements, modify);
10556 if (!new)
10557 return NULL_RTX;
10558 if (modify)
10559 XVECEXP (x, i, j) = new;
10563 return x;
10567 sh_gen_truncate (enum machine_mode mode, rtx x, int need_sign_ext)
10569 enum rtx_code code = TRUNCATE;
10571 if (GET_CODE (x) == ZERO_EXTEND || GET_CODE (x) == SIGN_EXTEND)
10573 rtx inner = XEXP (x, 0);
10574 enum machine_mode inner_mode = GET_MODE (inner);
10576 if (inner_mode == mode)
10577 return inner;
10578 else if (GET_MODE_SIZE (inner_mode) >= GET_MODE_SIZE (mode))
10579 x = inner;
10580 else if (GET_MODE_SIZE (inner_mode) < GET_MODE_SIZE (mode)
10581 && (! need_sign_ext || GET_CODE (x) == SIGN_EXTEND))
10583 code = GET_CODE (x);
10584 x = inner;
10587 return gen_rtx_fmt_e (code, mode, x);
10590 /* called via for_each_rtx after reload, to clean up truncates of
10591 registers that span multiple actual hard registers. */
10593 shmedia_cleanup_truncate (rtx *p, void *n_changes)
10595 rtx x = *p, reg;
10597 if (GET_CODE (x) != TRUNCATE)
10598 return 0;
10599 reg = XEXP (x, 0);
10600 if (GET_MODE_SIZE (GET_MODE (reg)) > 8 && GET_CODE (reg) == REG)
10602 enum machine_mode reg_mode = GET_MODE (reg);
10603 XEXP (x, 0) = simplify_subreg (DImode, reg, reg_mode,
10604 subreg_lowpart_offset (DImode, reg_mode));
10605 *(int*) n_changes += 1;
10606 return -1;
10608 return 0;
10611 /* Load and store depend on the highpart of the address. However,
10612 set_attr_alternative does not give well-defined results before reload,
10613 so we must look at the rtl ourselves to see if any of the feeding
10614 registers is used in a memref. */
10616 /* Called by sh_contains_memref_p via for_each_rtx. */
10617 static int
10618 sh_contains_memref_p_1 (rtx *loc, void *data ATTRIBUTE_UNUSED)
10620 return (GET_CODE (*loc) == MEM);
10623 /* Return nonzero iff INSN contains a MEM. */
10625 sh_contains_memref_p (rtx insn)
10627 return for_each_rtx (&PATTERN (insn), &sh_contains_memref_p_1, NULL);
10630 /* FNADDR is the MEM expression from a call expander. Return an address
10631 to use in an SHmedia insn pattern. */
10633 shmedia_prepare_call_address (rtx fnaddr, int is_sibcall)
10635 int is_sym;
10637 fnaddr = XEXP (fnaddr, 0);
10638 is_sym = GET_CODE (fnaddr) == SYMBOL_REF;
10639 if (flag_pic && is_sym)
10641 if (! SYMBOL_REF_LOCAL_P (fnaddr))
10643 rtx reg = gen_reg_rtx (Pmode);
10645 /* We must not use GOTPLT for sibcalls, because PIC_REG
10646 must be restored before the PLT code gets to run. */
10647 if (is_sibcall)
10648 emit_insn (gen_symGOT2reg (reg, fnaddr));
10649 else
10650 emit_insn (gen_symGOTPLT2reg (reg, fnaddr));
10651 fnaddr = reg;
10653 else
10655 fnaddr = gen_sym2PIC (fnaddr);
10656 PUT_MODE (fnaddr, Pmode);
10659 /* If ptabs might trap, make this visible to the rest of the compiler.
10660 We generally assume that symbols pertain to valid locations, but
10661 it is possible to generate invalid symbols with asm or linker tricks.
10662 In a list of functions where each returns its successor, an invalid
10663 symbol might denote an empty list. */
10664 if (!TARGET_PT_FIXED
10665 && (!is_sym || TARGET_INVALID_SYMBOLS)
10666 && (!REG_P (fnaddr) || ! TARGET_REGISTER_P (REGNO (fnaddr))))
10668 rtx tr = gen_reg_rtx (PDImode);
10670 emit_insn (gen_ptabs (tr, fnaddr));
10671 fnaddr = tr;
10673 else if (! target_reg_operand (fnaddr, Pmode))
10674 fnaddr = copy_to_mode_reg (Pmode, fnaddr);
10675 return fnaddr;
10678 enum reg_class
10679 sh_secondary_reload (bool in_p, rtx x, enum reg_class class,
10680 enum machine_mode mode, secondary_reload_info *sri)
10682 if (in_p)
10684 if (REGCLASS_HAS_FP_REG (class)
10685 && ! TARGET_SHMEDIA
10686 && immediate_operand ((x), mode)
10687 && ! ((fp_zero_operand (x) || fp_one_operand (x))
10688 && mode == SFmode && fldi_ok ()))
10689 switch (mode)
10691 case SFmode:
10692 sri->icode = CODE_FOR_reload_insf__frn;
10693 return NO_REGS;
10694 case DFmode:
10695 sri->icode = CODE_FOR_reload_indf__frn;
10696 return NO_REGS;
10697 case SImode:
10698 /* ??? If we knew that we are in the appropriate mode -
10699 single precision - we could use a reload pattern directly. */
10700 return FPUL_REGS;
10701 default:
10702 abort ();
10704 if (class == FPUL_REGS
10705 && ((GET_CODE (x) == REG
10706 && (REGNO (x) == MACL_REG || REGNO (x) == MACH_REG
10707 || REGNO (x) == T_REG))
10708 || GET_CODE (x) == PLUS))
10709 return GENERAL_REGS;
10710 if (class == FPUL_REGS && immediate_operand (x, mode))
10712 if (GET_CODE (x) == CONST_INT && CONST_OK_FOR_I08 (INTVAL (x)))
10713 return GENERAL_REGS;
10714 sri->icode = CODE_FOR_reload_insi__i_fpul;
10715 return NO_REGS;
10717 if (class == FPSCR_REGS
10718 && ((GET_CODE (x) == REG && REGNO (x) >= FIRST_PSEUDO_REGISTER)
10719 || (GET_CODE (x) == MEM && GET_CODE (XEXP (x, 0)) == PLUS)))
10720 return GENERAL_REGS;
10721 if (REGCLASS_HAS_FP_REG (class)
10722 && TARGET_SHMEDIA
10723 && immediate_operand (x, mode)
10724 && x != CONST0_RTX (GET_MODE (x))
10725 && GET_MODE (x) != V4SFmode)
10726 return GENERAL_REGS;
10727 if ((mode == QImode || mode == HImode)
10728 && TARGET_SHMEDIA && inqhi_operand (x, mode))
10730 sri->icode = ((mode == QImode)
10731 ? CODE_FOR_reload_inqi : CODE_FOR_reload_inhi);
10732 return NO_REGS;
10734 if (TARGET_SHMEDIA && class == GENERAL_REGS
10735 && (GET_CODE (x) == LABEL_REF || PIC_DIRECT_ADDR_P (x)))
10736 return TARGET_REGS;
10737 } /* end of input-only processing. */
10739 if (((REGCLASS_HAS_FP_REG (class)
10740 && (GET_CODE (x) == REG
10741 && (GENERAL_OR_AP_REGISTER_P (REGNO (x))
10742 || (FP_REGISTER_P (REGNO (x)) && mode == SImode
10743 && TARGET_FMOVD))))
10744 || (REGCLASS_HAS_GENERAL_REG (class)
10745 && GET_CODE (x) == REG
10746 && FP_REGISTER_P (REGNO (x))))
10747 && ! TARGET_SHMEDIA
10748 && (mode == SFmode || mode == SImode))
10749 return FPUL_REGS;
10750 if ((class == FPUL_REGS
10751 || (REGCLASS_HAS_FP_REG (class)
10752 && ! TARGET_SHMEDIA && mode == SImode))
10753 && (GET_CODE (x) == MEM
10754 || (GET_CODE (x) == REG
10755 && (REGNO (x) >= FIRST_PSEUDO_REGISTER
10756 || REGNO (x) == T_REG
10757 || system_reg_operand (x, VOIDmode)))))
10759 if (class == FPUL_REGS)
10760 return GENERAL_REGS;
10761 return FPUL_REGS;
10763 if ((class == TARGET_REGS
10764 || (TARGET_SHMEDIA && class == SIBCALL_REGS))
10765 && !EXTRA_CONSTRAINT_Csy (x)
10766 && (GET_CODE (x) != REG || ! GENERAL_REGISTER_P (REGNO (x))))
10767 return GENERAL_REGS;
10768 if ((class == MAC_REGS || class == PR_REGS)
10769 && GET_CODE (x) == REG && ! GENERAL_REGISTER_P (REGNO (x))
10770 && class != REGNO_REG_CLASS (REGNO (x)))
10771 return GENERAL_REGS;
10772 if (class != GENERAL_REGS && GET_CODE (x) == REG
10773 && TARGET_REGISTER_P (REGNO (x)))
10774 return GENERAL_REGS;
10775 return NO_REGS;
10778 enum sh_divide_strategy_e sh_div_strategy = SH_DIV_STRATEGY_DEFAULT;
10780 /* This defines the storage for the variable part of a -mboard= option.
10781 It is only required when using the sh-superh-elf target */
10782 #ifdef _SUPERH_H
10783 const char * boardtype = "7750p2";
10784 const char * osruntime = "bare";
10785 #endif
10787 #include "gt-sh.h"