PR 23706
[official-gcc.git] / gcc / config / sh / sh.c
blobf1ac27c617da7e0e094d7cdc5b47ec8587a2a1e2
1 /* Output routines for GCC for Renesas / SuperH SH.
2 Copyright (C) 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
3 2003, 2004, 2005 Free Software Foundation, Inc.
4 Contributed by Steve Chamberlain (sac@cygnus.com).
5 Improved by Jim Wilson (wilson@cygnus.com).
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 2, or (at your option)
12 any later version.
14 GCC is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING. If not, write to
21 the Free Software Foundation, 51 Franklin Street, Fifth Floor,
22 Boston, MA 02110-1301, USA. */
24 #include "config.h"
25 #include "system.h"
26 #include "coretypes.h"
27 #include "tm.h"
28 #include "insn-config.h"
29 #include "rtl.h"
30 #include "tree.h"
31 #include "flags.h"
32 #include "expr.h"
33 #include "optabs.h"
34 #include "function.h"
35 #include "regs.h"
36 #include "hard-reg-set.h"
37 #include "output.h"
38 #include "insn-attr.h"
39 #include "toplev.h"
40 #include "recog.h"
41 #include "c-pragma.h"
42 #include "integrate.h"
43 #include "dwarf2.h"
44 #include "tm_p.h"
45 #include "target.h"
46 #include "target-def.h"
47 #include "real.h"
48 #include "langhooks.h"
49 #include "basic-block.h"
50 #include "cfglayout.h"
51 #include "intl.h"
52 #include "sched-int.h"
53 #include "ggc.h"
54 #include "tree-gimple.h"
55 #include "cfgloop.h"
58 int code_for_indirect_jump_scratch = CODE_FOR_indirect_jump_scratch;
60 #define MSW (TARGET_LITTLE_ENDIAN ? 1 : 0)
61 #define LSW (TARGET_LITTLE_ENDIAN ? 0 : 1)
63 /* These are some macros to abstract register modes. */
64 #define CONST_OK_FOR_ADD(size) \
65 (TARGET_SHMEDIA ? CONST_OK_FOR_I10 (size) : CONST_OK_FOR_I08 (size))
66 #define GEN_MOV (*(TARGET_SHMEDIA64 ? gen_movdi : gen_movsi))
67 #define GEN_ADD3 (*(TARGET_SHMEDIA64 ? gen_adddi3 : gen_addsi3))
68 #define GEN_SUB3 (*(TARGET_SHMEDIA64 ? gen_subdi3 : gen_subsi3))
70 /* Set to 1 by expand_prologue() when the function is an interrupt handler. */
71 int current_function_interrupt;
73 /* ??? The pragma interrupt support will not work for SH3. */
74 /* This is set by #pragma interrupt and #pragma trapa, and causes gcc to
75 output code for the next function appropriate for an interrupt handler. */
76 int pragma_interrupt;
78 /* This is set by the trap_exit attribute for functions. It specifies
79 a trap number to be used in a trapa instruction at function exit
80 (instead of an rte instruction). */
81 int trap_exit;
83 /* This is used by the sp_switch attribute for functions. It specifies
84 a variable holding the address of the stack the interrupt function
85 should switch to/from at entry/exit. */
86 rtx sp_switch;
88 /* This is set by #pragma trapa, and is similar to the above, except that
89 the compiler doesn't emit code to preserve all registers. */
90 static int pragma_trapa;
92 /* This is set by #pragma nosave_low_regs. This is useful on the SH3,
93 which has a separate set of low regs for User and Supervisor modes.
94 This should only be used for the lowest level of interrupts. Higher levels
95 of interrupts must save the registers in case they themselves are
96 interrupted. */
97 int pragma_nosave_low_regs;
99 /* This is used for communication between TARGET_SETUP_INCOMING_VARARGS and
100 sh_expand_prologue. */
101 int current_function_anonymous_args;
103 /* Global variables for machine-dependent things. */
105 /* Which cpu are we scheduling for. */
106 enum processor_type sh_cpu;
108 /* Definitions used in ready queue reordering for first scheduling pass. */
110 /* Reg weights arrays for modes SFmode and SImode, indexed by insn LUID. */
111 static short *regmode_weight[2];
113 /* Total SFmode and SImode weights of scheduled insns. */
114 static int curr_regmode_pressure[2];
116 /* If true, skip cycles for Q -> R movement. */
117 static int skip_cycles = 0;
119 /* Cached value of can_issue_more. This is cached in sh_variable_issue hook
120 and returned from sh_reorder2. */
121 static short cached_can_issue_more;
123 /* Saved operands from the last compare to use when we generate an scc
124 or bcc insn. */
126 rtx sh_compare_op0;
127 rtx sh_compare_op1;
129 /* Provides the class number of the smallest class containing
130 reg number. */
132 enum reg_class regno_reg_class[FIRST_PSEUDO_REGISTER] =
134 R0_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
135 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
136 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
137 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
138 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
139 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
140 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
141 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
142 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
143 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
144 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
145 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
146 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
147 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
148 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
149 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
150 FP0_REGS,FP_REGS, FP_REGS, FP_REGS,
151 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
152 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
153 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
154 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
155 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
156 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
157 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
158 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
159 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
160 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
161 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
162 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
163 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
164 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
165 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
166 TARGET_REGS, TARGET_REGS, TARGET_REGS, TARGET_REGS,
167 TARGET_REGS, TARGET_REGS, TARGET_REGS, TARGET_REGS,
168 DF_REGS, DF_REGS, DF_REGS, DF_REGS,
169 DF_REGS, DF_REGS, DF_REGS, DF_REGS,
170 NO_REGS, GENERAL_REGS, PR_REGS, T_REGS,
171 MAC_REGS, MAC_REGS, FPUL_REGS, FPSCR_REGS,
172 GENERAL_REGS, GENERAL_REGS,
175 char sh_register_names[FIRST_PSEUDO_REGISTER] \
176 [MAX_REGISTER_NAME_LENGTH + 1] = SH_REGISTER_NAMES_INITIALIZER;
178 char sh_additional_register_names[ADDREGNAMES_SIZE] \
179 [MAX_ADDITIONAL_REGISTER_NAME_LENGTH + 1]
180 = SH_ADDITIONAL_REGISTER_NAMES_INITIALIZER;
182 /* Provide reg_class from a letter such as appears in the machine
183 description. *: target independently reserved letter.
184 reg_class_from_letter['e' - 'a'] is set to NO_REGS for TARGET_FMOVD. */
186 enum reg_class reg_class_from_letter[] =
188 /* a */ ALL_REGS, /* b */ TARGET_REGS, /* c */ FPSCR_REGS, /* d */ DF_REGS,
189 /* e */ FP_REGS, /* f */ FP_REGS, /* g **/ NO_REGS, /* h */ NO_REGS,
190 /* i **/ NO_REGS, /* j */ NO_REGS, /* k */ SIBCALL_REGS, /* l */ PR_REGS,
191 /* m **/ NO_REGS, /* n **/ NO_REGS, /* o **/ NO_REGS, /* p **/ NO_REGS,
192 /* q */ NO_REGS, /* r **/ NO_REGS, /* s **/ NO_REGS, /* t */ T_REGS,
193 /* u */ NO_REGS, /* v */ NO_REGS, /* w */ FP0_REGS, /* x */ MAC_REGS,
194 /* y */ FPUL_REGS, /* z */ R0_REGS
197 int assembler_dialect;
199 static bool shmedia_space_reserved_for_target_registers;
201 static bool sh_handle_option (size_t, const char *, int);
202 static void split_branches (rtx);
203 static int branch_dest (rtx);
204 static void force_into (rtx, rtx);
205 static void print_slot (rtx);
206 static rtx add_constant (rtx, enum machine_mode, rtx);
207 static void dump_table (rtx, rtx);
208 static int hi_const (rtx);
209 static int broken_move (rtx);
210 static int mova_p (rtx);
211 static rtx find_barrier (int, rtx, rtx);
212 static int noncall_uses_reg (rtx, rtx, rtx *);
213 static rtx gen_block_redirect (rtx, int, int);
214 static void sh_reorg (void);
215 static void output_stack_adjust (int, rtx, int, HARD_REG_SET *);
216 static rtx frame_insn (rtx);
217 static rtx push (int);
218 static void pop (int);
219 static void push_regs (HARD_REG_SET *, int);
220 static int calc_live_regs (HARD_REG_SET *);
221 static void mark_use (rtx, rtx *);
222 static HOST_WIDE_INT rounded_frame_size (int);
223 static rtx mark_constant_pool_use (rtx);
224 const struct attribute_spec sh_attribute_table[];
225 static tree sh_handle_interrupt_handler_attribute (tree *, tree, tree, int, bool *);
226 static tree sh_handle_sp_switch_attribute (tree *, tree, tree, int, bool *);
227 static tree sh_handle_trap_exit_attribute (tree *, tree, tree, int, bool *);
228 static tree sh_handle_renesas_attribute (tree *, tree, tree, int, bool *);
229 static void sh_output_function_epilogue (FILE *, HOST_WIDE_INT);
230 static void sh_insert_attributes (tree, tree *);
231 static const char *sh_check_pch_target_flags (int);
232 static int sh_adjust_cost (rtx, rtx, rtx, int);
233 static int sh_issue_rate (void);
234 static int sh_dfa_new_cycle (FILE *, int, rtx, int, int, int *sort_p);
235 static short find_set_regmode_weight (rtx, enum machine_mode);
236 static short find_insn_regmode_weight (rtx, enum machine_mode);
237 static void find_regmode_weight (int, enum machine_mode);
238 static void sh_md_init_global (FILE *, int, int);
239 static void sh_md_finish_global (FILE *, int);
240 static int rank_for_reorder (const void *, const void *);
241 static void swap_reorder (rtx *, int);
242 static void ready_reorder (rtx *, int);
243 static short high_pressure (enum machine_mode);
244 static int sh_reorder (FILE *, int, rtx *, int *, int);
245 static int sh_reorder2 (FILE *, int, rtx *, int *, int);
246 static void sh_md_init (FILE *, int, int);
247 static int sh_variable_issue (FILE *, int, rtx, int);
249 static bool sh_function_ok_for_sibcall (tree, tree);
251 static bool sh_cannot_modify_jumps_p (void);
252 static int sh_target_reg_class (void);
253 static bool sh_optimize_target_register_callee_saved (bool);
254 static bool sh_ms_bitfield_layout_p (tree);
256 static void sh_init_builtins (void);
257 static void sh_media_init_builtins (void);
258 static rtx sh_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
259 static void sh_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT, tree);
260 static void sh_file_start (void);
261 static int flow_dependent_p (rtx, rtx);
262 static void flow_dependent_p_1 (rtx, rtx, void *);
263 static int shiftcosts (rtx);
264 static int andcosts (rtx);
265 static int addsubcosts (rtx);
266 static int multcosts (rtx);
267 static bool unspec_caller_rtx_p (rtx);
268 static bool sh_cannot_copy_insn_p (rtx);
269 static bool sh_rtx_costs (rtx, int, int, int *);
270 static int sh_address_cost (rtx);
271 #ifdef TARGET_ADJUST_UNROLL_MAX
272 static int sh_adjust_unroll_max (struct loop *, int, int, int, int);
273 #endif
274 static int sh_pr_n_sets (void);
275 static rtx sh_allocate_initial_value (rtx);
276 static int shmedia_target_regs_stack_space (HARD_REG_SET *);
277 static int shmedia_reserve_space_for_target_registers_p (int, HARD_REG_SET *);
278 static int shmedia_target_regs_stack_adjust (HARD_REG_SET *);
279 static int scavenge_reg (HARD_REG_SET *s);
280 struct save_schedule_s;
281 static struct save_entry_s *sh5_schedule_saves (HARD_REG_SET *,
282 struct save_schedule_s *, int);
284 static rtx sh_struct_value_rtx (tree, int);
285 static bool sh_return_in_memory (tree, tree);
286 static rtx sh_builtin_saveregs (void);
287 static void sh_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode, tree, int *, int);
288 static bool sh_strict_argument_naming (CUMULATIVE_ARGS *);
289 static bool sh_pretend_outgoing_varargs_named (CUMULATIVE_ARGS *);
290 static tree sh_build_builtin_va_list (void);
291 static tree sh_gimplify_va_arg_expr (tree, tree, tree *, tree *);
292 static bool sh_pass_by_reference (CUMULATIVE_ARGS *, enum machine_mode,
293 tree, bool);
294 static bool sh_callee_copies (CUMULATIVE_ARGS *, enum machine_mode,
295 tree, bool);
296 static int sh_arg_partial_bytes (CUMULATIVE_ARGS *, enum machine_mode,
297 tree, bool);
298 static int sh_dwarf_calling_convention (tree);
299 static int hard_regs_intersect_p (HARD_REG_SET *, HARD_REG_SET *);
302 /* Initialize the GCC target structure. */
303 #undef TARGET_ATTRIBUTE_TABLE
304 #define TARGET_ATTRIBUTE_TABLE sh_attribute_table
306 /* The next two are used for debug info when compiling with -gdwarf. */
307 #undef TARGET_ASM_UNALIGNED_HI_OP
308 #define TARGET_ASM_UNALIGNED_HI_OP "\t.uaword\t"
309 #undef TARGET_ASM_UNALIGNED_SI_OP
310 #define TARGET_ASM_UNALIGNED_SI_OP "\t.ualong\t"
312 /* These are NULLed out on non-SH5 in OVERRIDE_OPTIONS. */
313 #undef TARGET_ASM_UNALIGNED_DI_OP
314 #define TARGET_ASM_UNALIGNED_DI_OP "\t.uaquad\t"
315 #undef TARGET_ASM_ALIGNED_DI_OP
316 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
318 #undef TARGET_ASM_FUNCTION_EPILOGUE
319 #define TARGET_ASM_FUNCTION_EPILOGUE sh_output_function_epilogue
321 #undef TARGET_ASM_OUTPUT_MI_THUNK
322 #define TARGET_ASM_OUTPUT_MI_THUNK sh_output_mi_thunk
324 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
325 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_tree_hwi_hwi_tree_true
327 #undef TARGET_ASM_FILE_START
328 #define TARGET_ASM_FILE_START sh_file_start
329 #undef TARGET_ASM_FILE_START_FILE_DIRECTIVE
330 #define TARGET_ASM_FILE_START_FILE_DIRECTIVE true
332 #undef TARGET_DEFAULT_TARGET_FLAGS
333 #define TARGET_DEFAULT_TARGET_FLAGS TARGET_DEFAULT
334 #undef TARGET_HANDLE_OPTION
335 #define TARGET_HANDLE_OPTION sh_handle_option
337 #undef TARGET_INSERT_ATTRIBUTES
338 #define TARGET_INSERT_ATTRIBUTES sh_insert_attributes
340 #undef TARGET_SCHED_ADJUST_COST
341 #define TARGET_SCHED_ADJUST_COST sh_adjust_cost
343 #undef TARGET_SCHED_ISSUE_RATE
344 #define TARGET_SCHED_ISSUE_RATE sh_issue_rate
346 /* The next 5 hooks have been implemented for reenabling sched1. With the
347 help of these macros we are limiting the movement of insns in sched1 to
348 reduce the register pressure. The overall idea is to keep count of SImode
349 and SFmode regs required by already scheduled insns. When these counts
350 cross some threshold values; give priority to insns that free registers.
351 The insn that frees registers is most likely to be the insn with lowest
352 LUID (original insn order); but such an insn might be there in the stalled
353 queue (Q) instead of the ready queue (R). To solve this, we skip cycles
354 upto a max of 8 cycles so that such insns may move from Q -> R.
356 The description of the hooks are as below:
358 TARGET_SCHED_INIT_GLOBAL: Added a new target hook in the generic
359 scheduler; it is called inside the sched_init function just after
360 find_insn_reg_weights function call. It is used to calculate the SImode
361 and SFmode weights of insns of basic blocks; much similar to what
362 find_insn_reg_weights does.
363 TARGET_SCHED_FINISH_GLOBAL: Corresponding cleanup hook.
365 TARGET_SCHED_DFA_NEW_CYCLE: Skip cycles if high register pressure is
366 indicated by TARGET_SCHED_REORDER2; doing this may move insns from
367 (Q)->(R).
369 TARGET_SCHED_REORDER: If the register pressure for SImode or SFmode is
370 high; reorder the ready queue so that the insn with lowest LUID will be
371 issued next.
373 TARGET_SCHED_REORDER2: If the register pressure is high, indicate to
374 TARGET_SCHED_DFA_NEW_CYCLE to skip cycles.
376 TARGET_SCHED_VARIABLE_ISSUE: Cache the value of can_issue_more so that it
377 can be returned from TARGET_SCHED_REORDER2.
379 TARGET_SCHED_INIT: Reset the register pressure counting variables. */
381 #undef TARGET_SCHED_DFA_NEW_CYCLE
382 #define TARGET_SCHED_DFA_NEW_CYCLE sh_dfa_new_cycle
384 #undef TARGET_SCHED_INIT_GLOBAL
385 #define TARGET_SCHED_INIT_GLOBAL sh_md_init_global
387 #undef TARGET_SCHED_FINISH_GLOBAL
388 #define TARGET_SCHED_FINISH_GLOBAL sh_md_finish_global
390 #undef TARGET_SCHED_VARIABLE_ISSUE
391 #define TARGET_SCHED_VARIABLE_ISSUE sh_variable_issue
393 #undef TARGET_SCHED_REORDER
394 #define TARGET_SCHED_REORDER sh_reorder
396 #undef TARGET_SCHED_REORDER2
397 #define TARGET_SCHED_REORDER2 sh_reorder2
399 #undef TARGET_SCHED_INIT
400 #define TARGET_SCHED_INIT sh_md_init
402 #undef TARGET_CANNOT_MODIFY_JUMPS_P
403 #define TARGET_CANNOT_MODIFY_JUMPS_P sh_cannot_modify_jumps_p
404 #undef TARGET_BRANCH_TARGET_REGISTER_CLASS
405 #define TARGET_BRANCH_TARGET_REGISTER_CLASS sh_target_reg_class
406 #undef TARGET_BRANCH_TARGET_REGISTER_CALLEE_SAVED
407 #define TARGET_BRANCH_TARGET_REGISTER_CALLEE_SAVED \
408 sh_optimize_target_register_callee_saved
410 #undef TARGET_MS_BITFIELD_LAYOUT_P
411 #define TARGET_MS_BITFIELD_LAYOUT_P sh_ms_bitfield_layout_p
413 #undef TARGET_INIT_BUILTINS
414 #define TARGET_INIT_BUILTINS sh_init_builtins
415 #undef TARGET_EXPAND_BUILTIN
416 #define TARGET_EXPAND_BUILTIN sh_expand_builtin
418 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
419 #define TARGET_FUNCTION_OK_FOR_SIBCALL sh_function_ok_for_sibcall
421 #undef TARGET_CANNOT_COPY_INSN_P
422 #define TARGET_CANNOT_COPY_INSN_P sh_cannot_copy_insn_p
423 #undef TARGET_RTX_COSTS
424 #define TARGET_RTX_COSTS sh_rtx_costs
425 #undef TARGET_ADDRESS_COST
426 #define TARGET_ADDRESS_COST sh_address_cost
427 #undef TARGET_ALLOCATE_INITIAL_VALUE
428 #define TARGET_ALLOCATE_INITIAL_VALUE sh_allocate_initial_value
430 #undef TARGET_MACHINE_DEPENDENT_REORG
431 #define TARGET_MACHINE_DEPENDENT_REORG sh_reorg
433 #ifdef HAVE_AS_TLS
434 #undef TARGET_HAVE_TLS
435 #define TARGET_HAVE_TLS true
436 #endif
438 #undef TARGET_PROMOTE_PROTOTYPES
439 #define TARGET_PROMOTE_PROTOTYPES sh_promote_prototypes
440 #undef TARGET_PROMOTE_FUNCTION_ARGS
441 #define TARGET_PROMOTE_FUNCTION_ARGS sh_promote_prototypes
442 #undef TARGET_PROMOTE_FUNCTION_RETURN
443 #define TARGET_PROMOTE_FUNCTION_RETURN sh_promote_prototypes
445 #undef TARGET_STRUCT_VALUE_RTX
446 #define TARGET_STRUCT_VALUE_RTX sh_struct_value_rtx
447 #undef TARGET_RETURN_IN_MEMORY
448 #define TARGET_RETURN_IN_MEMORY sh_return_in_memory
450 #undef TARGET_EXPAND_BUILTIN_SAVEREGS
451 #define TARGET_EXPAND_BUILTIN_SAVEREGS sh_builtin_saveregs
452 #undef TARGET_SETUP_INCOMING_VARARGS
453 #define TARGET_SETUP_INCOMING_VARARGS sh_setup_incoming_varargs
454 #undef TARGET_STRICT_ARGUMENT_NAMING
455 #define TARGET_STRICT_ARGUMENT_NAMING sh_strict_argument_naming
456 #undef TARGET_PRETEND_OUTGOING_VARARGS_NAMED
457 #define TARGET_PRETEND_OUTGOING_VARARGS_NAMED sh_pretend_outgoing_varargs_named
458 #undef TARGET_MUST_PASS_IN_STACK
459 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
460 #undef TARGET_PASS_BY_REFERENCE
461 #define TARGET_PASS_BY_REFERENCE sh_pass_by_reference
462 #undef TARGET_CALLEE_COPIES
463 #define TARGET_CALLEE_COPIES sh_callee_copies
464 #undef TARGET_ARG_PARTIAL_BYTES
465 #define TARGET_ARG_PARTIAL_BYTES sh_arg_partial_bytes
467 #undef TARGET_BUILD_BUILTIN_VA_LIST
468 #define TARGET_BUILD_BUILTIN_VA_LIST sh_build_builtin_va_list
469 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
470 #define TARGET_GIMPLIFY_VA_ARG_EXPR sh_gimplify_va_arg_expr
472 #undef TARGET_VECTOR_MODE_SUPPORTED_P
473 #define TARGET_VECTOR_MODE_SUPPORTED_P sh_vector_mode_supported_p
475 #undef TARGET_CHECK_PCH_TARGET_FLAGS
476 #define TARGET_CHECK_PCH_TARGET_FLAGS sh_check_pch_target_flags
478 #undef TARGET_DWARF_CALLING_CONVENTION
479 #define TARGET_DWARF_CALLING_CONVENTION sh_dwarf_calling_convention
481 /* Return regmode weight for insn. */
482 #define INSN_REGMODE_WEIGHT(INSN, MODE) regmode_weight[((MODE) == SImode) ? 0 : 1][INSN_UID (INSN)]
484 /* Return current register pressure for regmode. */
485 #define CURR_REGMODE_PRESSURE(MODE) curr_regmode_pressure[((MODE) == SImode) ? 0 : 1]
487 #ifdef SYMBIAN
489 #undef TARGET_ENCODE_SECTION_INFO
490 #define TARGET_ENCODE_SECTION_INFO sh_symbian_encode_section_info
491 #undef TARGET_STRIP_NAME_ENCODING
492 #define TARGET_STRIP_NAME_ENCODING sh_symbian_strip_name_encoding
493 #undef TARGET_CXX_IMPORT_EXPORT_CLASS
494 #define TARGET_CXX_IMPORT_EXPORT_CLASS symbian_import_export_class
496 #endif /* SYMBIAN */
498 #ifdef TARGET_ADJUST_UNROLL_MAX
499 #undef TARGET_ADJUST_UNROLL_MAX
500 #define TARGET_ADJUST_UNROLL_MAX sh_adjust_unroll_max
501 #endif
503 struct gcc_target targetm = TARGET_INITIALIZER;
505 /* Implement TARGET_HANDLE_OPTION. */
507 static bool
508 sh_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED,
509 int value ATTRIBUTE_UNUSED)
511 switch (code)
513 case OPT_m1:
514 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH1;
515 return true;
517 case OPT_m2:
518 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2;
519 return true;
521 case OPT_m2a:
522 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2A;
523 return true;
525 case OPT_m2a_nofpu:
526 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2A_NOFPU;
527 return true;
529 case OPT_m2a_single:
530 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2A_SINGLE;
531 return true;
533 case OPT_m2a_single_only:
534 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2A_SINGLE_ONLY;
535 return true;
537 case OPT_m2e:
538 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2E;
539 return true;
541 case OPT_m3:
542 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH3;
543 return true;
545 case OPT_m3e:
546 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH3E;
547 return true;
549 case OPT_m4:
550 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4;
551 return true;
553 case OPT_m4_nofpu:
554 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4_NOFPU;
555 return true;
557 case OPT_m4_single:
558 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4_SINGLE;
559 return true;
561 case OPT_m4_single_only:
562 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4_SINGLE_ONLY;
563 return true;
565 case OPT_m4a:
566 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4A;
567 return true;
569 case OPT_m4a_nofpu:
570 case OPT_m4al:
571 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4A_NOFPU;
572 return true;
574 case OPT_m4a_single:
575 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4A_SINGLE;
576 return true;
578 case OPT_m4a_single_only:
579 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4A_SINGLE_ONLY;
580 return true;
582 case OPT_m5_32media:
583 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_32MEDIA;
584 return true;
586 case OPT_m5_32media_nofpu:
587 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_32MEDIA_NOFPU;
588 return true;
590 case OPT_m5_64media:
591 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_64MEDIA;
592 return true;
594 case OPT_m5_64media_nofpu:
595 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_64MEDIA_NOFPU;
596 return true;
598 case OPT_m5_compact:
599 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_COMPACT;
600 return true;
602 case OPT_m5_compact_nofpu:
603 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_COMPACT_NOFPU;
604 return true;
606 default:
607 return true;
611 /* Print the operand address in x to the stream. */
613 void
614 print_operand_address (FILE *stream, rtx x)
616 switch (GET_CODE (x))
618 case REG:
619 case SUBREG:
620 fprintf (stream, "@%s", reg_names[true_regnum (x)]);
621 break;
623 case PLUS:
625 rtx base = XEXP (x, 0);
626 rtx index = XEXP (x, 1);
628 switch (GET_CODE (index))
630 case CONST_INT:
631 fprintf (stream, "@(%d,%s)", (int) INTVAL (index),
632 reg_names[true_regnum (base)]);
633 break;
635 case REG:
636 case SUBREG:
638 int base_num = true_regnum (base);
639 int index_num = true_regnum (index);
641 fprintf (stream, "@(r0,%s)",
642 reg_names[MAX (base_num, index_num)]);
643 break;
646 default:
647 gcc_unreachable ();
650 break;
652 case PRE_DEC:
653 fprintf (stream, "@-%s", reg_names[true_regnum (XEXP (x, 0))]);
654 break;
656 case POST_INC:
657 fprintf (stream, "@%s+", reg_names[true_regnum (XEXP (x, 0))]);
658 break;
660 default:
661 x = mark_constant_pool_use (x);
662 output_addr_const (stream, x);
663 break;
667 /* Print operand x (an rtx) in assembler syntax to file stream
668 according to modifier code.
670 '.' print a .s if insn needs delay slot
671 ',' print LOCAL_LABEL_PREFIX
672 '@' print trap, rte or rts depending upon pragma interruptness
673 '#' output a nop if there is nothing to put in the delay slot
674 ''' print likelihood suffix (/u for unlikely).
675 '>' print branch target if -fverbose-asm
676 'O' print a constant without the #
677 'R' print the LSW of a dp value - changes if in little endian
678 'S' print the MSW of a dp value - changes if in little endian
679 'T' print the next word of a dp value - same as 'R' in big endian mode.
680 'M' print an `x' if `m' will print `base,index'.
681 'N' print 'r63' if the operand is (const_int 0).
682 'd' print a V2SF reg as dN instead of fpN.
683 'm' print a pair `base,offset' or `base,index', for LD and ST.
684 'U' Likewise for {LD,ST}{HI,LO}.
685 'u' prints the lowest 16 bits of CONST_INT, as an unsigned value.
686 'o' output an operator. */
688 void
689 print_operand (FILE *stream, rtx x, int code)
691 int regno;
692 enum machine_mode mode;
694 switch (code)
696 case '.':
697 if (final_sequence
698 && ! INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))
699 && get_attr_length (XVECEXP (final_sequence, 0, 1)))
700 fprintf (stream, ASSEMBLER_DIALECT ? "/s" : ".s");
701 break;
702 case ',':
703 fprintf (stream, "%s", LOCAL_LABEL_PREFIX);
704 break;
705 case '@':
706 if (trap_exit)
707 fprintf (stream, "trapa #%d", trap_exit);
708 else if (sh_cfun_interrupt_handler_p ())
709 fprintf (stream, "rte");
710 else
711 fprintf (stream, "rts");
712 break;
713 case '#':
714 /* Output a nop if there's nothing in the delay slot. */
715 if (dbr_sequence_length () == 0)
716 fprintf (stream, "\n\tnop");
717 break;
718 case '\'':
720 rtx note = find_reg_note (current_output_insn, REG_BR_PROB, 0);
722 if (note && INTVAL (XEXP (note, 0)) * 2 < REG_BR_PROB_BASE)
723 fputs ("/u", stream);
724 break;
726 case '>':
727 if (flag_verbose_asm && JUMP_LABEL (current_output_insn))
729 fputs ("\t! target: ", stream);
730 output_addr_const (stream, JUMP_LABEL (current_output_insn));
732 break;
733 case 'O':
734 x = mark_constant_pool_use (x);
735 output_addr_const (stream, x);
736 break;
737 /* N.B.: %R / %S / %T adjust memory addresses by four.
738 For SHMEDIA, that means they can be used to access the first and
739 second 32 bit part of a 64 bit (or larger) value that
740 might be held in floating point registers or memory.
741 While they can be used to access 64 bit parts of a larger value
742 held in general purpose registers, that won't work with memory -
743 neither for fp registers, since the frxx names are used. */
744 case 'R':
745 if (REG_P (x) || GET_CODE (x) == SUBREG)
747 regno = true_regnum (x);
748 regno += FP_REGISTER_P (regno) ? 1 : LSW;
749 fputs (reg_names[regno], (stream));
751 else if (MEM_P (x))
753 x = adjust_address (x, SImode, 4 * LSW);
754 print_operand_address (stream, XEXP (x, 0));
756 else
758 rtx sub = NULL_RTX;
760 mode = GET_MODE (x);
761 if (mode == VOIDmode)
762 mode = DImode;
763 if (GET_MODE_SIZE (mode) >= 8)
764 sub = simplify_subreg (SImode, x, mode, 4 * LSW);
765 if (sub)
766 print_operand (stream, sub, 0);
767 else
768 output_operand_lossage ("invalid operand to %%R");
770 break;
771 case 'S':
772 if (REG_P (x) || GET_CODE (x) == SUBREG)
774 regno = true_regnum (x);
775 regno += FP_REGISTER_P (regno) ? 0 : MSW;
776 fputs (reg_names[regno], (stream));
778 else if (MEM_P (x))
780 x = adjust_address (x, SImode, 4 * MSW);
781 print_operand_address (stream, XEXP (x, 0));
783 else
785 rtx sub = NULL_RTX;
787 mode = GET_MODE (x);
788 if (mode == VOIDmode)
789 mode = DImode;
790 if (GET_MODE_SIZE (mode) >= 8)
791 sub = simplify_subreg (SImode, x, mode, 4 * MSW);
792 if (sub)
793 print_operand (stream, sub, 0);
794 else
795 output_operand_lossage ("invalid operand to %%S");
797 break;
798 case 'T':
799 /* Next word of a double. */
800 switch (GET_CODE (x))
802 case REG:
803 fputs (reg_names[REGNO (x) + 1], (stream));
804 break;
805 case MEM:
806 if (GET_CODE (XEXP (x, 0)) != PRE_DEC
807 && GET_CODE (XEXP (x, 0)) != POST_INC)
808 x = adjust_address (x, SImode, 4);
809 print_operand_address (stream, XEXP (x, 0));
810 break;
811 default:
812 break;
814 break;
815 case 'o':
816 switch (GET_CODE (x))
818 case PLUS: fputs ("add", stream); break;
819 case MINUS: fputs ("sub", stream); break;
820 case MULT: fputs ("mul", stream); break;
821 case DIV: fputs ("div", stream); break;
822 case EQ: fputs ("eq", stream); break;
823 case NE: fputs ("ne", stream); break;
824 case GT: case LT: fputs ("gt", stream); break;
825 case GE: case LE: fputs ("ge", stream); break;
826 case GTU: case LTU: fputs ("gtu", stream); break;
827 case GEU: case LEU: fputs ("geu", stream); break;
828 default:
829 break;
831 break;
832 case 'M':
833 if (GET_CODE (x) == MEM
834 && GET_CODE (XEXP (x, 0)) == PLUS
835 && (GET_CODE (XEXP (XEXP (x, 0), 1)) == REG
836 || GET_CODE (XEXP (XEXP (x, 0), 1)) == SUBREG))
837 fputc ('x', stream);
838 break;
840 case 'm':
841 gcc_assert (GET_CODE (x) == MEM);
842 x = XEXP (x, 0);
843 /* Fall through. */
844 case 'U':
845 switch (GET_CODE (x))
847 case REG:
848 case SUBREG:
849 print_operand (stream, x, 0);
850 fputs (", 0", stream);
851 break;
853 case PLUS:
854 print_operand (stream, XEXP (x, 0), 0);
855 fputs (", ", stream);
856 print_operand (stream, XEXP (x, 1), 0);
857 break;
859 default:
860 gcc_unreachable ();
862 break;
864 case 'd':
865 gcc_assert (GET_CODE (x) == REG && GET_MODE (x) == V2SFmode);
867 fprintf ((stream), "d%s", reg_names[REGNO (x)] + 1);
868 break;
870 case 'N':
871 if (x == CONST0_RTX (GET_MODE (x)))
873 fprintf ((stream), "r63");
874 break;
876 goto default_output;
877 case 'u':
878 if (GET_CODE (x) == CONST_INT)
880 fprintf ((stream), "%u", (unsigned) INTVAL (x) & (0x10000 - 1));
881 break;
883 /* Fall through. */
885 default_output:
886 default:
887 regno = 0;
888 mode = GET_MODE (x);
890 switch (GET_CODE (x))
892 case TRUNCATE:
894 rtx inner = XEXP (x, 0);
895 int offset = 0;
896 enum machine_mode inner_mode;
898 /* We might see SUBREGs with vector mode registers inside. */
899 if (GET_CODE (inner) == SUBREG
900 && (GET_MODE_SIZE (GET_MODE (inner))
901 == GET_MODE_SIZE (GET_MODE (SUBREG_REG (inner))))
902 && subreg_lowpart_p (inner))
903 inner = SUBREG_REG (inner);
904 if (GET_CODE (inner) == CONST_INT)
906 x = GEN_INT (trunc_int_for_mode (INTVAL (inner), GET_MODE (x)));
907 goto default_output;
909 inner_mode = GET_MODE (inner);
910 if (GET_CODE (inner) == SUBREG
911 && (GET_MODE_SIZE (GET_MODE (inner))
912 < GET_MODE_SIZE (GET_MODE (SUBREG_REG (inner))))
913 && GET_CODE (SUBREG_REG (inner)) == REG)
915 offset = subreg_regno_offset (REGNO (SUBREG_REG (inner)),
916 GET_MODE (SUBREG_REG (inner)),
917 SUBREG_BYTE (inner),
918 GET_MODE (inner));
919 inner = SUBREG_REG (inner);
921 if (GET_CODE (inner) != REG || GET_MODE_SIZE (inner_mode) > 8)
922 abort ();
923 /* Floating point register pairs are always big endian;
924 general purpose registers are 64 bit wide. */
925 regno = REGNO (inner);
926 regno = (HARD_REGNO_NREGS (regno, inner_mode)
927 - HARD_REGNO_NREGS (regno, mode))
928 + offset;
929 x = inner;
930 goto reg;
932 case SIGN_EXTEND:
933 x = XEXP (x, 0);
934 goto reg;
935 /* FIXME: We need this on SHmedia32 because reload generates
936 some sign-extended HI or QI loads into DImode registers
937 but, because Pmode is SImode, the address ends up with a
938 subreg:SI of the DImode register. Maybe reload should be
939 fixed so as to apply alter_subreg to such loads? */
940 case IF_THEN_ELSE:
941 gcc_assert (trapping_target_operand (x, VOIDmode));
942 x = XEXP (XEXP (x, 2), 0);
943 goto default_output;
944 case SUBREG:
945 gcc_assert (SUBREG_BYTE (x) == 0
946 && GET_CODE (SUBREG_REG (x)) == REG);
948 x = SUBREG_REG (x);
949 /* Fall through. */
951 reg:
952 case REG:
953 regno += REGNO (x);
954 if (FP_REGISTER_P (regno)
955 && mode == V16SFmode)
956 fprintf ((stream), "mtrx%s", reg_names[regno] + 2);
957 else if (FP_REGISTER_P (REGNO (x))
958 && mode == V4SFmode)
959 fprintf ((stream), "fv%s", reg_names[regno] + 2);
960 else if (GET_CODE (x) == REG
961 && mode == V2SFmode)
962 fprintf ((stream), "fp%s", reg_names[regno] + 2);
963 else if (FP_REGISTER_P (REGNO (x))
964 && GET_MODE_SIZE (mode) > 4)
965 fprintf ((stream), "d%s", reg_names[regno] + 1);
966 else
967 fputs (reg_names[regno], (stream));
968 break;
970 case MEM:
971 output_address (XEXP (x, 0));
972 break;
974 case CONST:
975 if (TARGET_SHMEDIA
976 && GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
977 && (GET_MODE (XEXP (x, 0)) == DImode
978 || GET_MODE (XEXP (x, 0)) == SImode)
979 && GET_CODE (XEXP (XEXP (x, 0), 0)) == TRUNCATE
980 && GET_MODE (XEXP (XEXP (x, 0), 0)) == HImode)
982 rtx val = XEXP (XEXP (XEXP (x, 0), 0), 0);
984 fputc ('(', stream);
985 if (GET_CODE (val) == ASHIFTRT)
987 fputc ('(', stream);
988 if (GET_CODE (XEXP (val, 0)) == CONST)
989 fputc ('(', stream);
990 output_addr_const (stream, XEXP (val, 0));
991 if (GET_CODE (XEXP (val, 0)) == CONST)
992 fputc (')', stream);
993 fputs (" >> ", stream);
994 output_addr_const (stream, XEXP (val, 1));
995 fputc (')', stream);
997 else
999 if (GET_CODE (val) == CONST)
1000 fputc ('(', stream);
1001 output_addr_const (stream, val);
1002 if (GET_CODE (val) == CONST)
1003 fputc (')', stream);
1005 fputs (" & 65535)", stream);
1006 break;
1009 /* Fall through. */
1010 default:
1011 if (TARGET_SH1)
1012 fputc ('#', stream);
1013 output_addr_const (stream, x);
1014 break;
1016 break;
1020 /* Like force_operand, but guarantees that VALUE ends up in TARGET. */
1021 static void
1022 force_into (rtx value, rtx target)
1024 value = force_operand (value, target);
1025 if (! rtx_equal_p (value, target))
1026 emit_insn (gen_move_insn (target, value));
1029 /* Emit code to perform a block move. Choose the best method.
1031 OPERANDS[0] is the destination.
1032 OPERANDS[1] is the source.
1033 OPERANDS[2] is the size.
1034 OPERANDS[3] is the alignment safe to use. */
1037 expand_block_move (rtx *operands)
1039 int align = INTVAL (operands[3]);
1040 int constp = (GET_CODE (operands[2]) == CONST_INT);
1041 int bytes = (constp ? INTVAL (operands[2]) : 0);
1043 if (! constp)
1044 return 0;
1046 /* If we could use mov.l to move words and dest is word-aligned, we
1047 can use movua.l for loads and still generate a relatively short
1048 and efficient sequence. */
1049 if (TARGET_SH4A_ARCH && align < 4
1050 && MEM_ALIGN (operands[0]) >= 32
1051 && can_move_by_pieces (bytes, 32))
1053 rtx dest = copy_rtx (operands[0]);
1054 rtx src = copy_rtx (operands[1]);
1055 /* We could use different pseudos for each copied word, but
1056 since movua can only load into r0, it's kind of
1057 pointless. */
1058 rtx temp = gen_reg_rtx (SImode);
1059 rtx src_addr = copy_addr_to_reg (XEXP (src, 0));
1060 int copied = 0;
1062 while (copied + 4 <= bytes)
1064 rtx to = adjust_address (dest, SImode, copied);
1065 rtx from = adjust_automodify_address (src, SImode, src_addr, copied);
1067 emit_insn (gen_movua (temp, from));
1068 emit_move_insn (src_addr, plus_constant (src_addr, 4));
1069 emit_move_insn (to, temp);
1070 copied += 4;
1073 if (copied < bytes)
1074 move_by_pieces (adjust_address (dest, BLKmode, copied),
1075 adjust_automodify_address (src, BLKmode,
1076 src_addr, copied),
1077 bytes - copied, align, 0);
1079 return 1;
1082 /* If it isn't a constant number of bytes, or if it doesn't have 4 byte
1083 alignment, or if it isn't a multiple of 4 bytes, then fail. */
1084 if (align < 4 || (bytes % 4 != 0))
1085 return 0;
1087 if (TARGET_HARD_SH4)
1089 if (bytes < 12)
1090 return 0;
1091 else if (bytes == 12)
1093 rtx func_addr_rtx = gen_reg_rtx (Pmode);
1094 rtx r4 = gen_rtx_REG (SImode, 4);
1095 rtx r5 = gen_rtx_REG (SImode, 5);
1097 function_symbol (func_addr_rtx, "__movmemSI12_i4", SFUNC_STATIC);
1098 force_into (XEXP (operands[0], 0), r4);
1099 force_into (XEXP (operands[1], 0), r5);
1100 emit_insn (gen_block_move_real_i4 (func_addr_rtx));
1101 return 1;
1103 else if (! TARGET_SMALLCODE)
1105 const char *entry_name;
1106 rtx func_addr_rtx = gen_reg_rtx (Pmode);
1107 int dwords;
1108 rtx r4 = gen_rtx_REG (SImode, 4);
1109 rtx r5 = gen_rtx_REG (SImode, 5);
1110 rtx r6 = gen_rtx_REG (SImode, 6);
1112 entry_name = (bytes & 4 ? "__movmem_i4_odd" : "__movmem_i4_even");
1113 function_symbol (func_addr_rtx, entry_name, SFUNC_STATIC);
1114 force_into (XEXP (operands[0], 0), r4);
1115 force_into (XEXP (operands[1], 0), r5);
1117 dwords = bytes >> 3;
1118 emit_insn (gen_move_insn (r6, GEN_INT (dwords - 1)));
1119 emit_insn (gen_block_lump_real_i4 (func_addr_rtx));
1120 return 1;
1122 else
1123 return 0;
1125 if (bytes < 64)
1127 char entry[30];
1128 rtx func_addr_rtx = gen_reg_rtx (Pmode);
1129 rtx r4 = gen_rtx_REG (SImode, 4);
1130 rtx r5 = gen_rtx_REG (SImode, 5);
1132 sprintf (entry, "__movmemSI%d", bytes);
1133 function_symbol (func_addr_rtx, entry, SFUNC_STATIC);
1134 force_into (XEXP (operands[0], 0), r4);
1135 force_into (XEXP (operands[1], 0), r5);
1136 emit_insn (gen_block_move_real (func_addr_rtx));
1137 return 1;
1140 /* This is the same number of bytes as a memcpy call, but to a different
1141 less common function name, so this will occasionally use more space. */
1142 if (! TARGET_SMALLCODE)
1144 rtx func_addr_rtx = gen_reg_rtx (Pmode);
1145 int final_switch, while_loop;
1146 rtx r4 = gen_rtx_REG (SImode, 4);
1147 rtx r5 = gen_rtx_REG (SImode, 5);
1148 rtx r6 = gen_rtx_REG (SImode, 6);
1150 function_symbol (func_addr_rtx, "__movmem", SFUNC_STATIC);
1151 force_into (XEXP (operands[0], 0), r4);
1152 force_into (XEXP (operands[1], 0), r5);
1154 /* r6 controls the size of the move. 16 is decremented from it
1155 for each 64 bytes moved. Then the negative bit left over is used
1156 as an index into a list of move instructions. e.g., a 72 byte move
1157 would be set up with size(r6) = 14, for one iteration through the
1158 big while loop, and a switch of -2 for the last part. */
1160 final_switch = 16 - ((bytes / 4) % 16);
1161 while_loop = ((bytes / 4) / 16 - 1) * 16;
1162 emit_insn (gen_move_insn (r6, GEN_INT (while_loop + final_switch)));
1163 emit_insn (gen_block_lump_real (func_addr_rtx));
1164 return 1;
1167 return 0;
1170 /* Prepare operands for a move define_expand; specifically, one of the
1171 operands must be in a register. */
1174 prepare_move_operands (rtx operands[], enum machine_mode mode)
1176 if ((mode == SImode || mode == DImode)
1177 && flag_pic
1178 && ! ((mode == Pmode || mode == ptr_mode)
1179 && tls_symbolic_operand (operands[1], Pmode) != 0))
1181 rtx temp;
1182 if (SYMBOLIC_CONST_P (operands[1]))
1184 if (GET_CODE (operands[0]) == MEM)
1185 operands[1] = force_reg (Pmode, operands[1]);
1186 else if (TARGET_SHMEDIA
1187 && GET_CODE (operands[1]) == LABEL_REF
1188 && target_reg_operand (operands[0], mode))
1189 /* It's ok. */;
1190 else
1192 temp = no_new_pseudos ? operands[0] : gen_reg_rtx (Pmode);
1193 operands[1] = legitimize_pic_address (operands[1], mode, temp);
1196 else if (GET_CODE (operands[1]) == CONST
1197 && GET_CODE (XEXP (operands[1], 0)) == PLUS
1198 && SYMBOLIC_CONST_P (XEXP (XEXP (operands[1], 0), 0)))
1200 temp = no_new_pseudos ? operands[0] : gen_reg_rtx (Pmode);
1201 temp = legitimize_pic_address (XEXP (XEXP (operands[1], 0), 0),
1202 mode, temp);
1203 operands[1] = expand_binop (mode, add_optab, temp,
1204 XEXP (XEXP (operands[1], 0), 1),
1205 no_new_pseudos ? temp
1206 : gen_reg_rtx (Pmode),
1207 0, OPTAB_LIB_WIDEN);
1211 if (! reload_in_progress && ! reload_completed)
1213 /* Copy the source to a register if both operands aren't registers. */
1214 if (! register_operand (operands[0], mode)
1215 && ! sh_register_operand (operands[1], mode))
1216 operands[1] = copy_to_mode_reg (mode, operands[1]);
1218 if (GET_CODE (operands[0]) == MEM && ! memory_operand (operands[0], mode))
1220 /* This is like change_address_1 (operands[0], mode, 0, 1) ,
1221 except that we can't use that function because it is static. */
1222 rtx new = change_address (operands[0], mode, 0);
1223 MEM_COPY_ATTRIBUTES (new, operands[0]);
1224 operands[0] = new;
1227 /* This case can happen while generating code to move the result
1228 of a library call to the target. Reject `st r0,@(rX,rY)' because
1229 reload will fail to find a spill register for rX, since r0 is already
1230 being used for the source. */
1231 else if (TARGET_SH1
1232 && refers_to_regno_p (R0_REG, R0_REG + 1, operands[1], (rtx *)0)
1233 && GET_CODE (operands[0]) == MEM
1234 && GET_CODE (XEXP (operands[0], 0)) == PLUS
1235 && GET_CODE (XEXP (XEXP (operands[0], 0), 1)) == REG)
1236 operands[1] = copy_to_mode_reg (mode, operands[1]);
1239 if (mode == Pmode || mode == ptr_mode)
1241 rtx op0, op1;
1242 enum tls_model tls_kind;
1244 op0 = operands[0];
1245 op1 = operands[1];
1246 if ((tls_kind = tls_symbolic_operand (op1, Pmode)))
1248 rtx tga_op1, tga_ret, tmp, tmp2;
1250 switch (tls_kind)
1252 case TLS_MODEL_GLOBAL_DYNAMIC:
1253 tga_ret = gen_rtx_REG (Pmode, R0_REG);
1254 emit_call_insn (gen_tls_global_dynamic (tga_ret, op1));
1255 op1 = tga_ret;
1256 break;
1258 case TLS_MODEL_LOCAL_DYNAMIC:
1259 tga_ret = gen_rtx_REG (Pmode, R0_REG);
1260 emit_call_insn (gen_tls_local_dynamic (tga_ret, op1));
1262 tmp = gen_reg_rtx (Pmode);
1263 emit_move_insn (tmp, tga_ret);
1265 if (register_operand (op0, Pmode))
1266 tmp2 = op0;
1267 else
1268 tmp2 = gen_reg_rtx (Pmode);
1270 emit_insn (gen_symDTPOFF2reg (tmp2, op1, tmp));
1271 op1 = tmp2;
1272 break;
1274 case TLS_MODEL_INITIAL_EXEC:
1275 if (! flag_pic)
1277 /* Don't schedule insns for getting GOT address when
1278 the first scheduling is enabled, to avoid spill
1279 failures for R0. */
1280 if (flag_schedule_insns)
1281 emit_insn (gen_blockage ());
1282 emit_insn (gen_GOTaddr2picreg ());
1283 emit_insn (gen_rtx_USE (VOIDmode, gen_rtx_REG (SImode,
1284 PIC_REG)));
1285 if (flag_schedule_insns)
1286 emit_insn (gen_blockage ());
1288 tga_op1 = no_new_pseudos ? op0 : gen_reg_rtx (Pmode);
1289 tmp = gen_sym2GOTTPOFF (op1);
1290 emit_insn (gen_tls_initial_exec (tga_op1, tmp));
1291 op1 = tga_op1;
1292 break;
1294 case TLS_MODEL_LOCAL_EXEC:
1295 tmp2 = gen_reg_rtx (Pmode);
1296 emit_insn (gen_load_gbr (tmp2));
1297 tmp = gen_reg_rtx (Pmode);
1298 emit_insn (gen_symTPOFF2reg (tmp, op1));
1300 if (register_operand (op0, Pmode))
1301 op1 = op0;
1302 else
1303 op1 = gen_reg_rtx (Pmode);
1305 emit_insn (gen_addsi3 (op1, tmp, tmp2));
1306 break;
1308 default:
1309 gcc_unreachable ();
1311 operands[1] = op1;
1315 return 0;
1318 /* Prepare the operands for an scc instruction; make sure that the
1319 compare has been done. */
1321 prepare_scc_operands (enum rtx_code code)
1323 rtx t_reg = gen_rtx_REG (SImode, T_REG);
1324 enum rtx_code oldcode = code;
1325 enum machine_mode mode;
1327 /* First need a compare insn. */
1328 switch (code)
1330 case NE:
1331 /* It isn't possible to handle this case. */
1332 gcc_unreachable ();
1333 case LT:
1334 code = GT;
1335 break;
1336 case LE:
1337 code = GE;
1338 break;
1339 case LTU:
1340 code = GTU;
1341 break;
1342 case LEU:
1343 code = GEU;
1344 break;
1345 default:
1346 break;
1348 if (code != oldcode)
1350 rtx tmp = sh_compare_op0;
1351 sh_compare_op0 = sh_compare_op1;
1352 sh_compare_op1 = tmp;
1355 mode = GET_MODE (sh_compare_op0);
1356 if (mode == VOIDmode)
1357 mode = GET_MODE (sh_compare_op1);
1359 sh_compare_op0 = force_reg (mode, sh_compare_op0);
1360 if ((code != EQ && code != NE
1361 && (sh_compare_op1 != const0_rtx
1362 || code == GTU || code == GEU || code == LTU || code == LEU))
1363 || (mode == DImode && sh_compare_op1 != const0_rtx)
1364 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
1365 sh_compare_op1 = force_reg (mode, sh_compare_op1);
1367 if ((TARGET_SH4 || TARGET_SH2A) && GET_MODE_CLASS (mode) == MODE_FLOAT)
1368 (mode == SFmode ? emit_sf_insn : emit_df_insn)
1369 (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2,
1370 gen_rtx_SET (VOIDmode, t_reg,
1371 gen_rtx_fmt_ee (code, SImode,
1372 sh_compare_op0, sh_compare_op1)),
1373 gen_rtx_USE (VOIDmode, get_fpscr_rtx ()))));
1374 else
1375 emit_insn (gen_rtx_SET (VOIDmode, t_reg,
1376 gen_rtx_fmt_ee (code, SImode,
1377 sh_compare_op0, sh_compare_op1)));
1379 return t_reg;
1382 /* Called from the md file, set up the operands of a compare instruction. */
1384 void
1385 from_compare (rtx *operands, int code)
1387 enum machine_mode mode = GET_MODE (sh_compare_op0);
1388 rtx insn;
1389 if (mode == VOIDmode)
1390 mode = GET_MODE (sh_compare_op1);
1391 if (code != EQ
1392 || mode == DImode
1393 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
1395 /* Force args into regs, since we can't use constants here. */
1396 sh_compare_op0 = force_reg (mode, sh_compare_op0);
1397 if (sh_compare_op1 != const0_rtx
1398 || code == GTU || code == GEU
1399 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
1400 sh_compare_op1 = force_reg (mode, sh_compare_op1);
1402 if (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT && code == GE)
1404 from_compare (operands, GT);
1405 insn = gen_ieee_ccmpeqsf_t (sh_compare_op0, sh_compare_op1);
1407 else
1408 insn = gen_rtx_SET (VOIDmode,
1409 gen_rtx_REG (SImode, T_REG),
1410 gen_rtx_fmt_ee (code, SImode,
1411 sh_compare_op0, sh_compare_op1));
1412 if ((TARGET_SH4 || TARGET_SH2A) && GET_MODE_CLASS (mode) == MODE_FLOAT)
1414 insn = gen_rtx_PARALLEL (VOIDmode,
1415 gen_rtvec (2, insn,
1416 gen_rtx_USE (VOIDmode, get_fpscr_rtx ())));
1417 (mode == SFmode ? emit_sf_insn : emit_df_insn) (insn);
1419 else
1420 emit_insn (insn);
1423 /* Functions to output assembly code. */
1425 /* Return a sequence of instructions to perform DI or DF move.
1427 Since the SH cannot move a DI or DF in one instruction, we have
1428 to take care when we see overlapping source and dest registers. */
1430 const char *
1431 output_movedouble (rtx insn ATTRIBUTE_UNUSED, rtx operands[],
1432 enum machine_mode mode)
1434 rtx dst = operands[0];
1435 rtx src = operands[1];
1437 if (GET_CODE (dst) == MEM
1438 && GET_CODE (XEXP (dst, 0)) == PRE_DEC)
1439 return "mov.l %T1,%0\n\tmov.l %1,%0";
1441 if (register_operand (dst, mode)
1442 && register_operand (src, mode))
1444 if (REGNO (src) == MACH_REG)
1445 return "sts mach,%S0\n\tsts macl,%R0";
1447 /* When mov.d r1,r2 do r2->r3 then r1->r2;
1448 when mov.d r1,r0 do r1->r0 then r2->r1. */
1450 if (REGNO (src) + 1 == REGNO (dst))
1451 return "mov %T1,%T0\n\tmov %1,%0";
1452 else
1453 return "mov %1,%0\n\tmov %T1,%T0";
1455 else if (GET_CODE (src) == CONST_INT)
1457 if (INTVAL (src) < 0)
1458 output_asm_insn ("mov #-1,%S0", operands);
1459 else
1460 output_asm_insn ("mov #0,%S0", operands);
1462 return "mov %1,%R0";
1464 else if (GET_CODE (src) == MEM)
1466 int ptrreg = -1;
1467 int dreg = REGNO (dst);
1468 rtx inside = XEXP (src, 0);
1470 switch (GET_CODE (inside))
1472 case REG:
1473 ptrreg = REGNO (inside);
1474 break;
1476 case SUBREG:
1477 ptrreg = subreg_regno (inside);
1478 break;
1480 case PLUS:
1481 ptrreg = REGNO (XEXP (inside, 0));
1482 /* ??? A r0+REG address shouldn't be possible here, because it isn't
1483 an offsettable address. Unfortunately, offsettable addresses use
1484 QImode to check the offset, and a QImode offsettable address
1485 requires r0 for the other operand, which is not currently
1486 supported, so we can't use the 'o' constraint.
1487 Thus we must check for and handle r0+REG addresses here.
1488 We punt for now, since this is likely very rare. */
1489 gcc_assert (GET_CODE (XEXP (inside, 1)) != REG);
1490 break;
1492 case LABEL_REF:
1493 return "mov.l %1,%0\n\tmov.l %1+4,%T0";
1494 case POST_INC:
1495 return "mov.l %1,%0\n\tmov.l %1,%T0";
1496 default:
1497 gcc_unreachable ();
1500 /* Work out the safe way to copy. Copy into the second half first. */
1501 if (dreg == ptrreg)
1502 return "mov.l %T1,%T0\n\tmov.l %1,%0";
1505 return "mov.l %1,%0\n\tmov.l %T1,%T0";
1508 /* Print an instruction which would have gone into a delay slot after
1509 another instruction, but couldn't because the other instruction expanded
1510 into a sequence where putting the slot insn at the end wouldn't work. */
1512 static void
1513 print_slot (rtx insn)
1515 final_scan_insn (XVECEXP (insn, 0, 1), asm_out_file, optimize, 1, NULL);
1517 INSN_DELETED_P (XVECEXP (insn, 0, 1)) = 1;
1520 const char *
1521 output_far_jump (rtx insn, rtx op)
1523 struct { rtx lab, reg, op; } this;
1524 rtx braf_base_lab = NULL_RTX;
1525 const char *jump;
1526 int far;
1527 int offset = branch_dest (insn) - INSN_ADDRESSES (INSN_UID (insn));
1528 rtx prev;
1530 this.lab = gen_label_rtx ();
1532 if (TARGET_SH2
1533 && offset >= -32764
1534 && offset - get_attr_length (insn) <= 32766)
1536 far = 0;
1537 jump = "mov.w %O0,%1; braf %1";
1539 else
1541 far = 1;
1542 if (flag_pic)
1544 if (TARGET_SH2)
1545 jump = "mov.l %O0,%1; braf %1";
1546 else
1547 jump = "mov.l r0,@-r15; mova %O0,r0; mov.l @r0,%1; add r0,%1; mov.l @r15+,r0; jmp @%1";
1549 else
1550 jump = "mov.l %O0,%1; jmp @%1";
1552 /* If we have a scratch register available, use it. */
1553 if (GET_CODE ((prev = prev_nonnote_insn (insn))) == INSN
1554 && INSN_CODE (prev) == CODE_FOR_indirect_jump_scratch)
1556 this.reg = SET_DEST (XVECEXP (PATTERN (prev), 0, 0));
1557 if (REGNO (this.reg) == R0_REG && flag_pic && ! TARGET_SH2)
1558 jump = "mov.l r1,@-r15; mova %O0,r0; mov.l @r0,r1; add r1,r0; mov.l @r15+,r1; jmp @%1";
1559 output_asm_insn (jump, &this.lab);
1560 if (dbr_sequence_length ())
1561 print_slot (final_sequence);
1562 else
1563 output_asm_insn ("nop", 0);
1565 else
1567 /* Output the delay slot insn first if any. */
1568 if (dbr_sequence_length ())
1569 print_slot (final_sequence);
1571 this.reg = gen_rtx_REG (SImode, 13);
1572 /* We must keep the stack aligned to 8-byte boundaries on SH5.
1573 Fortunately, MACL is fixed and call-clobbered, and we never
1574 need its value across jumps, so save r13 in it instead of in
1575 the stack. */
1576 if (TARGET_SH5)
1577 output_asm_insn ("lds r13, macl", 0);
1578 else
1579 output_asm_insn ("mov.l r13,@-r15", 0);
1580 output_asm_insn (jump, &this.lab);
1581 if (TARGET_SH5)
1582 output_asm_insn ("sts macl, r13", 0);
1583 else
1584 output_asm_insn ("mov.l @r15+,r13", 0);
1586 if (far && flag_pic && TARGET_SH2)
1588 braf_base_lab = gen_label_rtx ();
1589 (*targetm.asm_out.internal_label) (asm_out_file, "L",
1590 CODE_LABEL_NUMBER (braf_base_lab));
1592 if (far)
1593 output_asm_insn (".align 2", 0);
1594 (*targetm.asm_out.internal_label) (asm_out_file, "L", CODE_LABEL_NUMBER (this.lab));
1595 this.op = op;
1596 if (far && flag_pic)
1598 if (TARGET_SH2)
1599 this.lab = braf_base_lab;
1600 output_asm_insn (".long %O2-%O0", &this.lab);
1602 else
1603 output_asm_insn (far ? ".long %O2" : ".word %O2-%O0", &this.lab);
1604 return "";
1607 /* Local label counter, used for constants in the pool and inside
1608 pattern branches. */
1610 static int lf = 100;
1612 /* Output code for ordinary branches. */
1614 const char *
1615 output_branch (int logic, rtx insn, rtx *operands)
1617 switch (get_attr_length (insn))
1619 case 6:
1620 /* This can happen if filling the delay slot has caused a forward
1621 branch to exceed its range (we could reverse it, but only
1622 when we know we won't overextend other branches; this should
1623 best be handled by relaxation).
1624 It can also happen when other condbranches hoist delay slot insn
1625 from their destination, thus leading to code size increase.
1626 But the branch will still be in the range -4092..+4098 bytes. */
1628 if (! TARGET_RELAX)
1630 int label = lf++;
1631 /* The call to print_slot will clobber the operands. */
1632 rtx op0 = operands[0];
1634 /* If the instruction in the delay slot is annulled (true), then
1635 there is no delay slot where we can put it now. The only safe
1636 place for it is after the label. final will do that by default. */
1638 if (final_sequence
1639 && ! INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))
1640 && get_attr_length (XVECEXP (final_sequence, 0, 1)))
1642 asm_fprintf (asm_out_file, "\tb%s%ss\t%LLF%d\n", logic ? "f" : "t",
1643 ASSEMBLER_DIALECT ? "/" : ".", label);
1644 print_slot (final_sequence);
1646 else
1647 asm_fprintf (asm_out_file, "\tb%s\t%LLF%d\n", logic ? "f" : "t", label);
1649 output_asm_insn ("bra\t%l0", &op0);
1650 fprintf (asm_out_file, "\tnop\n");
1651 (*targetm.asm_out.internal_label) (asm_out_file, "LF", label);
1653 return "";
1655 /* When relaxing, handle this like a short branch. The linker
1656 will fix it up if it still doesn't fit after relaxation. */
1657 case 2:
1658 return logic ? "bt%.\t%l0" : "bf%.\t%l0";
1660 /* These are for SH2e, in which we have to account for the
1661 extra nop because of the hardware bug in annulled branches. */
1662 case 8:
1663 if (! TARGET_RELAX)
1665 int label = lf++;
1667 gcc_assert (!final_sequence
1668 || !(INSN_ANNULLED_BRANCH_P
1669 (XVECEXP (final_sequence, 0, 0))));
1670 asm_fprintf (asm_out_file, "b%s%ss\t%LLF%d\n",
1671 logic ? "f" : "t",
1672 ASSEMBLER_DIALECT ? "/" : ".", label);
1673 fprintf (asm_out_file, "\tnop\n");
1674 output_asm_insn ("bra\t%l0", operands);
1675 fprintf (asm_out_file, "\tnop\n");
1676 (*targetm.asm_out.internal_label) (asm_out_file, "LF", label);
1678 return "";
1680 /* When relaxing, fall through. */
1681 case 4:
1683 char buffer[10];
1685 sprintf (buffer, "b%s%ss\t%%l0",
1686 logic ? "t" : "f",
1687 ASSEMBLER_DIALECT ? "/" : ".");
1688 output_asm_insn (buffer, &operands[0]);
1689 return "nop";
1692 default:
1693 /* There should be no longer branches now - that would
1694 indicate that something has destroyed the branches set
1695 up in machine_dependent_reorg. */
1696 gcc_unreachable ();
1700 const char *
1701 output_branchy_insn (enum rtx_code code, const char *template,
1702 rtx insn, rtx *operands)
1704 rtx next_insn = NEXT_INSN (insn);
1706 if (next_insn && GET_CODE (next_insn) == JUMP_INSN && condjump_p (next_insn))
1708 rtx src = SET_SRC (PATTERN (next_insn));
1709 if (GET_CODE (src) == IF_THEN_ELSE && GET_CODE (XEXP (src, 0)) != code)
1711 /* Following branch not taken */
1712 operands[9] = gen_label_rtx ();
1713 emit_label_after (operands[9], next_insn);
1714 INSN_ADDRESSES_NEW (operands[9],
1715 INSN_ADDRESSES (INSN_UID (next_insn))
1716 + get_attr_length (next_insn));
1717 return template;
1719 else
1721 int offset = (branch_dest (next_insn)
1722 - INSN_ADDRESSES (INSN_UID (next_insn)) + 4);
1723 if (offset >= -252 && offset <= 258)
1725 if (GET_CODE (src) == IF_THEN_ELSE)
1726 /* branch_true */
1727 src = XEXP (src, 1);
1728 operands[9] = src;
1729 return template;
1733 operands[9] = gen_label_rtx ();
1734 emit_label_after (operands[9], insn);
1735 INSN_ADDRESSES_NEW (operands[9],
1736 INSN_ADDRESSES (INSN_UID (insn))
1737 + get_attr_length (insn));
1738 return template;
1741 const char *
1742 output_ieee_ccmpeq (rtx insn, rtx *operands)
1744 return output_branchy_insn (NE, "bt\t%l9\n\tfcmp/eq\t%1,%0",
1745 insn, operands);
1748 /* Output the start of the assembler file. */
1750 static void
1751 sh_file_start (void)
1753 default_file_start ();
1755 #ifdef SYMBIAN
1756 /* Declare the .directive section before it is used. */
1757 fputs ("\t.section .directive, \"SM\", @progbits, 1\n", asm_out_file);
1758 fputs ("\t.asciz \"#<SYMEDIT>#\\n\"\n", asm_out_file);
1759 #endif
1761 if (TARGET_ELF)
1762 /* We need to show the text section with the proper
1763 attributes as in TEXT_SECTION_ASM_OP, before dwarf2out
1764 emits it without attributes in TEXT_SECTION_ASM_OP, else GAS
1765 will complain. We can teach GAS specifically about the
1766 default attributes for our choice of text section, but
1767 then we would have to change GAS again if/when we change
1768 the text section name. */
1769 fprintf (asm_out_file, "%s\n", TEXT_SECTION_ASM_OP);
1770 else
1771 /* Switch to the data section so that the coffsem symbol
1772 isn't in the text section. */
1773 data_section ();
1775 if (TARGET_LITTLE_ENDIAN)
1776 fputs ("\t.little\n", asm_out_file);
1778 if (!TARGET_ELF)
1780 if (TARGET_SHCOMPACT)
1781 fputs ("\t.mode\tSHcompact\n", asm_out_file);
1782 else if (TARGET_SHMEDIA)
1783 fprintf (asm_out_file, "\t.mode\tSHmedia\n\t.abi\t%i\n",
1784 TARGET_SHMEDIA64 ? 64 : 32);
1788 /* Check if PAT includes UNSPEC_CALLER unspec pattern. */
1790 static bool
1791 unspec_caller_rtx_p (rtx pat)
1793 switch (GET_CODE (pat))
1795 case CONST:
1796 return unspec_caller_rtx_p (XEXP (pat, 0));
1797 case PLUS:
1798 case MINUS:
1799 if (unspec_caller_rtx_p (XEXP (pat, 0)))
1800 return true;
1801 return unspec_caller_rtx_p (XEXP (pat, 1));
1802 case UNSPEC:
1803 if (XINT (pat, 1) == UNSPEC_CALLER)
1804 return true;
1805 default:
1806 break;
1809 return false;
1812 /* Indicate that INSN cannot be duplicated. This is true for insn
1813 that generates a unique label. */
1815 static bool
1816 sh_cannot_copy_insn_p (rtx insn)
1818 rtx pat;
1820 if (!reload_completed || !flag_pic)
1821 return false;
1823 if (GET_CODE (insn) != INSN)
1824 return false;
1825 if (asm_noperands (insn) >= 0)
1826 return false;
1828 pat = PATTERN (insn);
1829 if (GET_CODE (pat) != SET)
1830 return false;
1831 pat = SET_SRC (pat);
1833 if (unspec_caller_rtx_p (pat))
1834 return true;
1836 return false;
1839 /* Actual number of instructions used to make a shift by N. */
1840 static const char ashiftrt_insns[] =
1841 { 0,1,2,3,4,5,8,8,8,8,8,8,8,8,8,8,2,3,4,5,8,8,8,8,8,8,8,8,8,8,8,2};
1843 /* Left shift and logical right shift are the same. */
1844 static const char shift_insns[] =
1845 { 0,1,1,2,2,3,3,4,1,2,2,3,3,4,3,3,1,2,2,3,3,4,3,3,2,3,3,4,4,4,3,3};
1847 /* Individual shift amounts needed to get the above length sequences.
1848 One bit right shifts clobber the T bit, so when possible, put one bit
1849 shifts in the middle of the sequence, so the ends are eligible for
1850 branch delay slots. */
1851 static const short shift_amounts[32][5] = {
1852 {0}, {1}, {2}, {2, 1},
1853 {2, 2}, {2, 1, 2}, {2, 2, 2}, {2, 2, 1, 2},
1854 {8}, {8, 1}, {8, 2}, {8, 1, 2},
1855 {8, 2, 2}, {8, 2, 1, 2}, {8, -2, 8}, {8, -1, 8},
1856 {16}, {16, 1}, {16, 2}, {16, 1, 2},
1857 {16, 2, 2}, {16, 2, 1, 2}, {16, -2, 8}, {16, -1, 8},
1858 {16, 8}, {16, 1, 8}, {16, 8, 2}, {16, 8, 1, 2},
1859 {16, 8, 2, 2}, {16, -1, -2, 16}, {16, -2, 16}, {16, -1, 16}};
1861 /* Likewise, but for shift amounts < 16, up to three highmost bits
1862 might be clobbered. This is typically used when combined with some
1863 kind of sign or zero extension. */
1865 static const char ext_shift_insns[] =
1866 { 0,1,1,2,2,3,2,2,1,2,2,3,3,3,2,2,1,2,2,3,3,4,3,3,2,3,3,4,4,4,3,3};
1868 static const short ext_shift_amounts[32][4] = {
1869 {0}, {1}, {2}, {2, 1},
1870 {2, 2}, {2, 1, 2}, {8, -2}, {8, -1},
1871 {8}, {8, 1}, {8, 2}, {8, 1, 2},
1872 {8, 2, 2}, {16, -2, -1}, {16, -2}, {16, -1},
1873 {16}, {16, 1}, {16, 2}, {16, 1, 2},
1874 {16, 2, 2}, {16, 2, 1, 2}, {16, -2, 8}, {16, -1, 8},
1875 {16, 8}, {16, 1, 8}, {16, 8, 2}, {16, 8, 1, 2},
1876 {16, 8, 2, 2}, {16, -1, -2, 16}, {16, -2, 16}, {16, -1, 16}};
1878 /* Assuming we have a value that has been sign-extended by at least one bit,
1879 can we use the ext_shift_amounts with the last shift turned to an arithmetic shift
1880 to shift it by N without data loss, and quicker than by other means? */
1881 #define EXT_SHIFT_SIGNED(n) (((n) | 8) == 15)
1883 /* This is used in length attributes in sh.md to help compute the length
1884 of arbitrary constant shift instructions. */
1887 shift_insns_rtx (rtx insn)
1889 rtx set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
1890 int shift_count = INTVAL (XEXP (set_src, 1));
1891 enum rtx_code shift_code = GET_CODE (set_src);
1893 switch (shift_code)
1895 case ASHIFTRT:
1896 return ashiftrt_insns[shift_count];
1897 case LSHIFTRT:
1898 case ASHIFT:
1899 return shift_insns[shift_count];
1900 default:
1901 gcc_unreachable ();
1905 /* Return the cost of a shift. */
1907 static inline int
1908 shiftcosts (rtx x)
1910 int value;
1912 if (TARGET_SHMEDIA)
1913 return 1;
1915 if (GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD)
1917 if (GET_MODE (x) == DImode
1918 && GET_CODE (XEXP (x, 1)) == CONST_INT
1919 && INTVAL (XEXP (x, 1)) == 1)
1920 return 2;
1922 /* Everything else is invalid, because there is no pattern for it. */
1923 return 10000;
1925 /* If shift by a non constant, then this will be expensive. */
1926 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
1927 return SH_DYNAMIC_SHIFT_COST;
1929 value = INTVAL (XEXP (x, 1));
1931 /* Otherwise, return the true cost in instructions. */
1932 if (GET_CODE (x) == ASHIFTRT)
1934 int cost = ashiftrt_insns[value];
1935 /* If SH3, then we put the constant in a reg and use shad. */
1936 if (cost > 1 + SH_DYNAMIC_SHIFT_COST)
1937 cost = 1 + SH_DYNAMIC_SHIFT_COST;
1938 return cost;
1940 else
1941 return shift_insns[value];
1944 /* Return the cost of an AND operation. */
1946 static inline int
1947 andcosts (rtx x)
1949 int i;
1951 /* Anding with a register is a single cycle and instruction. */
1952 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
1953 return 1;
1955 i = INTVAL (XEXP (x, 1));
1957 if (TARGET_SHMEDIA)
1959 if ((GET_CODE (XEXP (x, 1)) == CONST_INT
1960 && CONST_OK_FOR_I16 (INTVAL (XEXP (x, 1))))
1961 || EXTRA_CONSTRAINT_C16 (XEXP (x, 1)))
1962 return 1;
1963 else
1964 return 2;
1967 /* These constants are single cycle extu.[bw] instructions. */
1968 if (i == 0xff || i == 0xffff)
1969 return 1;
1970 /* Constants that can be used in an and immediate instruction in a single
1971 cycle, but this requires r0, so make it a little more expensive. */
1972 if (CONST_OK_FOR_K08 (i))
1973 return 2;
1974 /* Constants that can be loaded with a mov immediate and an and.
1975 This case is probably unnecessary. */
1976 if (CONST_OK_FOR_I08 (i))
1977 return 2;
1978 /* Any other constants requires a 2 cycle pc-relative load plus an and.
1979 This case is probably unnecessary. */
1980 return 3;
1983 /* Return the cost of an addition or a subtraction. */
1985 static inline int
1986 addsubcosts (rtx x)
1988 /* Adding a register is a single cycle insn. */
1989 if (GET_CODE (XEXP (x, 1)) == REG
1990 || GET_CODE (XEXP (x, 1)) == SUBREG)
1991 return 1;
1993 /* Likewise for small constants. */
1994 if (GET_CODE (XEXP (x, 1)) == CONST_INT
1995 && CONST_OK_FOR_ADD (INTVAL (XEXP (x, 1))))
1996 return 1;
1998 if (TARGET_SHMEDIA)
1999 switch (GET_CODE (XEXP (x, 1)))
2001 case CONST:
2002 case LABEL_REF:
2003 case SYMBOL_REF:
2004 return TARGET_SHMEDIA64 ? 5 : 3;
2006 case CONST_INT:
2007 if (CONST_OK_FOR_I16 (INTVAL (XEXP (x, 1))))
2008 return 2;
2009 else if (CONST_OK_FOR_I16 (INTVAL (XEXP (x, 1)) >> 16))
2010 return 3;
2011 else if (CONST_OK_FOR_I16 ((INTVAL (XEXP (x, 1)) >> 16) >> 16))
2012 return 4;
2014 /* Fall through. */
2015 default:
2016 return 5;
2019 /* Any other constant requires a 2 cycle pc-relative load plus an
2020 addition. */
2021 return 3;
2024 /* Return the cost of a multiply. */
2025 static inline int
2026 multcosts (rtx x ATTRIBUTE_UNUSED)
2028 if (sh_multcost >= 0)
2029 return sh_multcost;
2030 if (TARGET_SHMEDIA)
2031 /* ??? We have a mul insn, but it has a latency of three, and doesn't
2032 accept constants. Ideally, we would use a cost of one or two and
2033 add the cost of the operand, but disregard the latter when inside loops
2034 and loop invariant code motion is still to follow.
2035 Using a multiply first and splitting it later if it's a loss
2036 doesn't work because of different sign / zero extension semantics
2037 of multiplies vs. shifts. */
2038 return TARGET_SMALLCODE ? 2 : 3;
2040 if (TARGET_SH2)
2042 /* We have a mul insn, so we can never take more than the mul and the
2043 read of the mac reg, but count more because of the latency and extra
2044 reg usage. */
2045 if (TARGET_SMALLCODE)
2046 return 2;
2047 return 3;
2050 /* If we're aiming at small code, then just count the number of
2051 insns in a multiply call sequence. */
2052 if (TARGET_SMALLCODE)
2053 return 5;
2055 /* Otherwise count all the insns in the routine we'd be calling too. */
2056 return 20;
2059 /* Compute a (partial) cost for rtx X. Return true if the complete
2060 cost has been computed, and false if subexpressions should be
2061 scanned. In either case, *TOTAL contains the cost result. */
2063 static bool
2064 sh_rtx_costs (rtx x, int code, int outer_code, int *total)
2066 switch (code)
2068 case CONST_INT:
2069 if (TARGET_SHMEDIA)
2071 if (INTVAL (x) == 0)
2072 *total = 0;
2073 else if (outer_code == AND && and_operand ((x), DImode))
2074 *total = 0;
2075 else if ((outer_code == IOR || outer_code == XOR
2076 || outer_code == PLUS)
2077 && CONST_OK_FOR_I10 (INTVAL (x)))
2078 *total = 0;
2079 else if (CONST_OK_FOR_I16 (INTVAL (x)))
2080 *total = COSTS_N_INSNS (outer_code != SET);
2081 else if (CONST_OK_FOR_I16 (INTVAL (x) >> 16))
2082 *total = COSTS_N_INSNS ((outer_code != SET) + 1);
2083 else if (CONST_OK_FOR_I16 ((INTVAL (x) >> 16) >> 16))
2084 *total = COSTS_N_INSNS (3);
2085 else
2086 *total = COSTS_N_INSNS (4);
2087 return true;
2089 if (CONST_OK_FOR_I08 (INTVAL (x)))
2090 *total = 0;
2091 else if ((outer_code == AND || outer_code == IOR || outer_code == XOR)
2092 && CONST_OK_FOR_K08 (INTVAL (x)))
2093 *total = 1;
2094 else
2095 *total = 8;
2096 return true;
2098 case CONST:
2099 case LABEL_REF:
2100 case SYMBOL_REF:
2101 if (TARGET_SHMEDIA64)
2102 *total = COSTS_N_INSNS (4);
2103 else if (TARGET_SHMEDIA32)
2104 *total = COSTS_N_INSNS (2);
2105 else
2106 *total = 5;
2107 return true;
2109 case CONST_DOUBLE:
2110 if (TARGET_SHMEDIA)
2111 *total = COSTS_N_INSNS (4);
2112 else
2113 *total = 10;
2114 return true;
2115 case CONST_VECTOR:
2116 if (x == CONST0_RTX (GET_MODE (x)))
2117 *total = 0;
2118 else if (sh_1el_vec (x, VOIDmode))
2119 *total = outer_code != SET;
2120 if (sh_rep_vec (x, VOIDmode))
2121 *total = ((GET_MODE_UNIT_SIZE (GET_MODE (x)) + 3) / 4
2122 + (outer_code != SET));
2123 *total = COSTS_N_INSNS (3) + (outer_code != SET);
2124 return true;
2126 case PLUS:
2127 case MINUS:
2128 *total = COSTS_N_INSNS (addsubcosts (x));
2129 return true;
2131 case AND:
2132 *total = COSTS_N_INSNS (andcosts (x));
2133 return true;
2135 case MULT:
2136 *total = COSTS_N_INSNS (multcosts (x));
2137 return true;
2139 case ASHIFT:
2140 case ASHIFTRT:
2141 case LSHIFTRT:
2142 *total = COSTS_N_INSNS (shiftcosts (x));
2143 return true;
2145 case DIV:
2146 case UDIV:
2147 case MOD:
2148 case UMOD:
2149 *total = COSTS_N_INSNS (20);
2150 return true;
2152 case PARALLEL:
2153 if (sh_1el_vec (x, VOIDmode))
2154 *total = outer_code != SET;
2155 if (sh_rep_vec (x, VOIDmode))
2156 *total = ((GET_MODE_UNIT_SIZE (GET_MODE (x)) + 3) / 4
2157 + (outer_code != SET));
2158 *total = COSTS_N_INSNS (3) + (outer_code != SET);
2159 return true;
2161 case FLOAT:
2162 case FIX:
2163 *total = 100;
2164 return true;
2166 default:
2167 return false;
2171 /* Compute the cost of an address. For the SH, all valid addresses are
2172 the same cost. Use a slightly higher cost for reg + reg addressing,
2173 since it increases pressure on r0. */
2175 static int
2176 sh_address_cost (rtx X)
2178 return (GET_CODE (X) == PLUS
2179 && ! CONSTANT_P (XEXP (X, 1))
2180 && ! TARGET_SHMEDIA ? 1 : 0);
2183 /* Code to expand a shift. */
2185 void
2186 gen_ashift (int type, int n, rtx reg)
2188 /* Negative values here come from the shift_amounts array. */
2189 if (n < 0)
2191 if (type == ASHIFT)
2192 type = LSHIFTRT;
2193 else
2194 type = ASHIFT;
2195 n = -n;
2198 switch (type)
2200 case ASHIFTRT:
2201 emit_insn (gen_ashrsi3_k (reg, reg, GEN_INT (n)));
2202 break;
2203 case LSHIFTRT:
2204 if (n == 1)
2205 emit_insn (gen_lshrsi3_m (reg, reg, GEN_INT (n)));
2206 else
2207 emit_insn (gen_lshrsi3_k (reg, reg, GEN_INT (n)));
2208 break;
2209 case ASHIFT:
2210 emit_insn (gen_ashlsi3_std (reg, reg, GEN_INT (n)));
2211 break;
2215 /* Same for HImode */
2217 void
2218 gen_ashift_hi (int type, int n, rtx reg)
2220 /* Negative values here come from the shift_amounts array. */
2221 if (n < 0)
2223 if (type == ASHIFT)
2224 type = LSHIFTRT;
2225 else
2226 type = ASHIFT;
2227 n = -n;
2230 switch (type)
2232 case ASHIFTRT:
2233 case LSHIFTRT:
2234 /* We don't have HImode right shift operations because using the
2235 ordinary 32 bit shift instructions for that doesn't generate proper
2236 zero/sign extension.
2237 gen_ashift_hi is only called in contexts where we know that the
2238 sign extension works out correctly. */
2240 int offset = 0;
2241 if (GET_CODE (reg) == SUBREG)
2243 offset = SUBREG_BYTE (reg);
2244 reg = SUBREG_REG (reg);
2246 gen_ashift (type, n, gen_rtx_SUBREG (SImode, reg, offset));
2247 break;
2249 case ASHIFT:
2250 emit_insn (gen_ashlhi3_k (reg, reg, GEN_INT (n)));
2251 break;
2255 /* Output RTL to split a constant shift into its component SH constant
2256 shift instructions. */
2258 void
2259 gen_shifty_op (int code, rtx *operands)
2261 int value = INTVAL (operands[2]);
2262 int max, i;
2264 /* Truncate the shift count in case it is out of bounds. */
2265 value = value & 0x1f;
2267 if (value == 31)
2269 if (code == LSHIFTRT)
2271 emit_insn (gen_rotlsi3_1 (operands[0], operands[0]));
2272 emit_insn (gen_movt (operands[0]));
2273 return;
2275 else if (code == ASHIFT)
2277 /* There is a two instruction sequence for 31 bit left shifts,
2278 but it requires r0. */
2279 if (GET_CODE (operands[0]) == REG && REGNO (operands[0]) == 0)
2281 emit_insn (gen_andsi3 (operands[0], operands[0], const1_rtx));
2282 emit_insn (gen_rotlsi3_31 (operands[0], operands[0]));
2283 return;
2287 else if (value == 0)
2289 /* This can happen even when optimizing, if there were subregs before
2290 reload. Don't output a nop here, as this is never optimized away;
2291 use a no-op move instead. */
2292 emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[0]));
2293 return;
2296 max = shift_insns[value];
2297 for (i = 0; i < max; i++)
2298 gen_ashift (code, shift_amounts[value][i], operands[0]);
2301 /* Same as above, but optimized for values where the topmost bits don't
2302 matter. */
2304 void
2305 gen_shifty_hi_op (int code, rtx *operands)
2307 int value = INTVAL (operands[2]);
2308 int max, i;
2309 void (*gen_fun) (int, int, rtx);
2311 /* This operation is used by and_shl for SImode values with a few
2312 high bits known to be cleared. */
2313 value &= 31;
2314 if (value == 0)
2316 emit_insn (gen_nop ());
2317 return;
2320 gen_fun = GET_MODE (operands[0]) == HImode ? gen_ashift_hi : gen_ashift;
2321 if (code == ASHIFT)
2323 max = ext_shift_insns[value];
2324 for (i = 0; i < max; i++)
2325 gen_fun (code, ext_shift_amounts[value][i], operands[0]);
2327 else
2328 /* When shifting right, emit the shifts in reverse order, so that
2329 solitary negative values come first. */
2330 for (i = ext_shift_insns[value] - 1; i >= 0; i--)
2331 gen_fun (code, ext_shift_amounts[value][i], operands[0]);
2334 /* Output RTL for an arithmetic right shift. */
2336 /* ??? Rewrite to use super-optimizer sequences. */
2339 expand_ashiftrt (rtx *operands)
2341 rtx wrk;
2342 char func[18];
2343 int value;
2345 if (TARGET_SH3)
2347 if (GET_CODE (operands[2]) != CONST_INT)
2349 rtx count = copy_to_mode_reg (SImode, operands[2]);
2350 emit_insn (gen_negsi2 (count, count));
2351 emit_insn (gen_ashrsi3_d (operands[0], operands[1], count));
2352 return 1;
2354 else if (ashiftrt_insns[INTVAL (operands[2]) & 31]
2355 > 1 + SH_DYNAMIC_SHIFT_COST)
2357 rtx count
2358 = force_reg (SImode, GEN_INT (- (INTVAL (operands[2]) & 31)));
2359 emit_insn (gen_ashrsi3_d (operands[0], operands[1], count));
2360 return 1;
2363 if (GET_CODE (operands[2]) != CONST_INT)
2364 return 0;
2366 value = INTVAL (operands[2]) & 31;
2368 if (value == 31)
2370 /* If we are called from abs expansion, arrange things so that we
2371 we can use a single MT instruction that doesn't clobber the source,
2372 if LICM can hoist out the load of the constant zero. */
2373 if (currently_expanding_to_rtl)
2375 emit_insn (gen_cmpgtsi_t (force_reg (SImode, CONST0_RTX (SImode)),
2376 operands[1]));
2377 emit_insn (gen_mov_neg_si_t (operands[0]));
2378 return 1;
2380 emit_insn (gen_ashrsi2_31 (operands[0], operands[1]));
2381 return 1;
2383 else if (value >= 16 && value <= 19)
2385 wrk = gen_reg_rtx (SImode);
2386 emit_insn (gen_ashrsi2_16 (wrk, operands[1]));
2387 value -= 16;
2388 while (value--)
2389 gen_ashift (ASHIFTRT, 1, wrk);
2390 emit_move_insn (operands[0], wrk);
2391 return 1;
2393 /* Expand a short sequence inline, longer call a magic routine. */
2394 else if (value <= 5)
2396 wrk = gen_reg_rtx (SImode);
2397 emit_move_insn (wrk, operands[1]);
2398 while (value--)
2399 gen_ashift (ASHIFTRT, 1, wrk);
2400 emit_move_insn (operands[0], wrk);
2401 return 1;
2404 wrk = gen_reg_rtx (Pmode);
2406 /* Load the value into an arg reg and call a helper. */
2407 emit_move_insn (gen_rtx_REG (SImode, 4), operands[1]);
2408 sprintf (func, "__ashiftrt_r4_%d", value);
2409 function_symbol (wrk, func, SFUNC_STATIC);
2410 emit_insn (gen_ashrsi3_n (GEN_INT (value), wrk));
2411 emit_move_insn (operands[0], gen_rtx_REG (SImode, 4));
2412 return 1;
2416 sh_dynamicalize_shift_p (rtx count)
2418 return shift_insns[INTVAL (count)] > 1 + SH_DYNAMIC_SHIFT_COST;
2421 /* Try to find a good way to implement the combiner pattern
2422 [(set (match_operand:SI 0 "register_operand" "r")
2423 (and:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
2424 (match_operand:SI 2 "const_int_operand" "n"))
2425 (match_operand:SI 3 "const_int_operand" "n"))) .
2426 LEFT_RTX is operand 2 in the above pattern, and MASK_RTX is operand 3.
2427 return 0 for simple right / left or left/right shift combination.
2428 return 1 for a combination of shifts with zero_extend.
2429 return 2 for a combination of shifts with an AND that needs r0.
2430 return 3 for a combination of shifts with an AND that needs an extra
2431 scratch register, when the three highmost bits of the AND mask are clear.
2432 return 4 for a combination of shifts with an AND that needs an extra
2433 scratch register, when any of the three highmost bits of the AND mask
2434 is set.
2435 If ATTRP is set, store an initial right shift width in ATTRP[0],
2436 and the instruction length in ATTRP[1] . These values are not valid
2437 when returning 0.
2438 When ATTRP is set and returning 1, ATTRP[2] gets set to the index into
2439 shift_amounts for the last shift value that is to be used before the
2440 sign extend. */
2442 shl_and_kind (rtx left_rtx, rtx mask_rtx, int *attrp)
2444 unsigned HOST_WIDE_INT mask, lsb, mask2, lsb2;
2445 int left = INTVAL (left_rtx), right;
2446 int best = 0;
2447 int cost, best_cost = 10000;
2448 int best_right = 0, best_len = 0;
2449 int i;
2450 int can_ext;
2452 if (left < 0 || left > 31)
2453 return 0;
2454 if (GET_CODE (mask_rtx) == CONST_INT)
2455 mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> left;
2456 else
2457 mask = (unsigned HOST_WIDE_INT) GET_MODE_MASK (SImode) >> left;
2458 /* Can this be expressed as a right shift / left shift pair? */
2459 lsb = ((mask ^ (mask - 1)) >> 1) + 1;
2460 right = exact_log2 (lsb);
2461 mask2 = ~(mask + lsb - 1);
2462 lsb2 = ((mask2 ^ (mask2 - 1)) >> 1) + 1;
2463 /* mask has no zeroes but trailing zeroes <==> ! mask2 */
2464 if (! mask2)
2465 best_cost = shift_insns[right] + shift_insns[right + left];
2466 /* mask has no trailing zeroes <==> ! right */
2467 else if (! right && mask2 == ~(lsb2 - 1))
2469 int late_right = exact_log2 (lsb2);
2470 best_cost = shift_insns[left + late_right] + shift_insns[late_right];
2472 /* Try to use zero extend. */
2473 if (mask2 == ~(lsb2 - 1))
2475 int width, first;
2477 for (width = 8; width <= 16; width += 8)
2479 /* Can we zero-extend right away? */
2480 if (lsb2 == (unsigned HOST_WIDE_INT) 1 << width)
2482 cost
2483 = 1 + ext_shift_insns[right] + ext_shift_insns[left + right];
2484 if (cost < best_cost)
2486 best = 1;
2487 best_cost = cost;
2488 best_right = right;
2489 best_len = cost;
2490 if (attrp)
2491 attrp[2] = -1;
2493 continue;
2495 /* ??? Could try to put zero extend into initial right shift,
2496 or even shift a bit left before the right shift. */
2497 /* Determine value of first part of left shift, to get to the
2498 zero extend cut-off point. */
2499 first = width - exact_log2 (lsb2) + right;
2500 if (first >= 0 && right + left - first >= 0)
2502 cost = ext_shift_insns[right] + ext_shift_insns[first] + 1
2503 + ext_shift_insns[right + left - first];
2504 if (cost < best_cost)
2506 best = 1;
2507 best_cost = cost;
2508 best_right = right;
2509 best_len = cost;
2510 if (attrp)
2511 attrp[2] = first;
2516 /* Try to use r0 AND pattern */
2517 for (i = 0; i <= 2; i++)
2519 if (i > right)
2520 break;
2521 if (! CONST_OK_FOR_K08 (mask >> i))
2522 continue;
2523 cost = (i != 0) + 2 + ext_shift_insns[left + i];
2524 if (cost < best_cost)
2526 best = 2;
2527 best_cost = cost;
2528 best_right = i;
2529 best_len = cost - 1;
2532 /* Try to use a scratch register to hold the AND operand. */
2533 can_ext = ((mask << left) & ((unsigned HOST_WIDE_INT) 3 << 30)) == 0;
2534 for (i = 0; i <= 2; i++)
2536 if (i > right)
2537 break;
2538 cost = (i != 0) + (CONST_OK_FOR_I08 (mask >> i) ? 2 : 3)
2539 + (can_ext ? ext_shift_insns : shift_insns)[left + i];
2540 if (cost < best_cost)
2542 best = 4 - can_ext;
2543 best_cost = cost;
2544 best_right = i;
2545 best_len = cost - 1 - ! CONST_OK_FOR_I08 (mask >> i);
2549 if (attrp)
2551 attrp[0] = best_right;
2552 attrp[1] = best_len;
2554 return best;
2557 /* This is used in length attributes of the unnamed instructions
2558 corresponding to shl_and_kind return values of 1 and 2. */
2560 shl_and_length (rtx insn)
2562 rtx set_src, left_rtx, mask_rtx;
2563 int attributes[3];
2565 set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
2566 left_rtx = XEXP (XEXP (set_src, 0), 1);
2567 mask_rtx = XEXP (set_src, 1);
2568 shl_and_kind (left_rtx, mask_rtx, attributes);
2569 return attributes[1];
2572 /* This is used in length attribute of the and_shl_scratch instruction. */
2575 shl_and_scr_length (rtx insn)
2577 rtx set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
2578 int len = shift_insns[INTVAL (XEXP (set_src, 1))];
2579 rtx op = XEXP (set_src, 0);
2580 len += shift_insns[INTVAL (XEXP (op, 1))] + 1;
2581 op = XEXP (XEXP (op, 0), 0);
2582 return len + shift_insns[INTVAL (XEXP (op, 1))];
2585 /* Generate rtl for instructions for which shl_and_kind advised a particular
2586 method of generating them, i.e. returned zero. */
2589 gen_shl_and (rtx dest, rtx left_rtx, rtx mask_rtx, rtx source)
2591 int attributes[3];
2592 unsigned HOST_WIDE_INT mask;
2593 int kind = shl_and_kind (left_rtx, mask_rtx, attributes);
2594 int right, total_shift;
2595 void (*shift_gen_fun) (int, rtx *) = gen_shifty_hi_op;
2597 right = attributes[0];
2598 total_shift = INTVAL (left_rtx) + right;
2599 mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> total_shift;
2600 switch (kind)
2602 default:
2603 return -1;
2604 case 1:
2606 int first = attributes[2];
2607 rtx operands[3];
2609 if (first < 0)
2611 emit_insn ((mask << right) <= 0xff
2612 ? gen_zero_extendqisi2 (dest,
2613 gen_lowpart (QImode, source))
2614 : gen_zero_extendhisi2 (dest,
2615 gen_lowpart (HImode, source)));
2616 source = dest;
2618 if (source != dest)
2619 emit_insn (gen_movsi (dest, source));
2620 operands[0] = dest;
2621 if (right)
2623 operands[2] = GEN_INT (right);
2624 gen_shifty_hi_op (LSHIFTRT, operands);
2626 if (first > 0)
2628 operands[2] = GEN_INT (first);
2629 gen_shifty_hi_op (ASHIFT, operands);
2630 total_shift -= first;
2631 mask <<= first;
2633 if (first >= 0)
2634 emit_insn (mask <= 0xff
2635 ? gen_zero_extendqisi2 (dest, gen_lowpart (QImode, dest))
2636 : gen_zero_extendhisi2 (dest, gen_lowpart (HImode, dest)));
2637 if (total_shift > 0)
2639 operands[2] = GEN_INT (total_shift);
2640 gen_shifty_hi_op (ASHIFT, operands);
2642 break;
2644 case 4:
2645 shift_gen_fun = gen_shifty_op;
2646 case 3:
2647 /* If the topmost bit that matters is set, set the topmost bits
2648 that don't matter. This way, we might be able to get a shorter
2649 signed constant. */
2650 if (mask & ((HOST_WIDE_INT) 1 << (31 - total_shift)))
2651 mask |= (HOST_WIDE_INT) ~0 << (31 - total_shift);
2652 case 2:
2653 /* Don't expand fine-grained when combining, because that will
2654 make the pattern fail. */
2655 if (currently_expanding_to_rtl
2656 || reload_in_progress || reload_completed)
2658 rtx operands[3];
2660 /* Cases 3 and 4 should be handled by this split
2661 only while combining */
2662 gcc_assert (kind <= 2);
2663 if (right)
2665 emit_insn (gen_lshrsi3 (dest, source, GEN_INT (right)));
2666 source = dest;
2668 emit_insn (gen_andsi3 (dest, source, GEN_INT (mask)));
2669 if (total_shift)
2671 operands[0] = dest;
2672 operands[1] = dest;
2673 operands[2] = GEN_INT (total_shift);
2674 shift_gen_fun (ASHIFT, operands);
2676 break;
2678 else
2680 int neg = 0;
2681 if (kind != 4 && total_shift < 16)
2683 neg = -ext_shift_amounts[total_shift][1];
2684 if (neg > 0)
2685 neg -= ext_shift_amounts[total_shift][2];
2686 else
2687 neg = 0;
2689 emit_insn (gen_and_shl_scratch (dest, source,
2690 GEN_INT (right),
2691 GEN_INT (mask),
2692 GEN_INT (total_shift + neg),
2693 GEN_INT (neg)));
2694 emit_insn (gen_movsi (dest, dest));
2695 break;
2698 return 0;
2701 /* Try to find a good way to implement the combiner pattern
2702 [(set (match_operand:SI 0 "register_operand" "=r")
2703 (sign_extract:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
2704 (match_operand:SI 2 "const_int_operand" "n")
2705 (match_operand:SI 3 "const_int_operand" "n")
2706 (const_int 0)))
2707 (clobber (reg:SI T_REG))]
2708 LEFT_RTX is operand 2 in the above pattern, and SIZE_RTX is operand 3.
2709 return 0 for simple left / right shift combination.
2710 return 1 for left shift / 8 bit sign extend / left shift.
2711 return 2 for left shift / 16 bit sign extend / left shift.
2712 return 3 for left shift / 8 bit sign extend / shift / sign extend.
2713 return 4 for left shift / 16 bit sign extend / shift / sign extend.
2714 return 5 for left shift / 16 bit sign extend / right shift
2715 return 6 for < 8 bit sign extend / left shift.
2716 return 7 for < 8 bit sign extend / left shift / single right shift.
2717 If COSTP is nonzero, assign the calculated cost to *COSTP. */
2720 shl_sext_kind (rtx left_rtx, rtx size_rtx, int *costp)
2722 int left, size, insize, ext;
2723 int cost = 0, best_cost;
2724 int kind;
2726 left = INTVAL (left_rtx);
2727 size = INTVAL (size_rtx);
2728 insize = size - left;
2729 gcc_assert (insize > 0);
2730 /* Default to left / right shift. */
2731 kind = 0;
2732 best_cost = shift_insns[32 - insize] + ashiftrt_insns[32 - size];
2733 if (size <= 16)
2735 /* 16 bit shift / sign extend / 16 bit shift */
2736 cost = shift_insns[16 - insize] + 1 + ashiftrt_insns[16 - size];
2737 /* If ashiftrt_insns[16 - size] is 8, this choice will be overridden
2738 below, by alternative 3 or something even better. */
2739 if (cost < best_cost)
2741 kind = 5;
2742 best_cost = cost;
2745 /* Try a plain sign extend between two shifts. */
2746 for (ext = 16; ext >= insize; ext -= 8)
2748 if (ext <= size)
2750 cost = ext_shift_insns[ext - insize] + 1 + shift_insns[size - ext];
2751 if (cost < best_cost)
2753 kind = ext / (unsigned) 8;
2754 best_cost = cost;
2757 /* Check if we can do a sloppy shift with a final signed shift
2758 restoring the sign. */
2759 if (EXT_SHIFT_SIGNED (size - ext))
2760 cost = ext_shift_insns[ext - insize] + ext_shift_insns[size - ext] + 1;
2761 /* If not, maybe it's still cheaper to do the second shift sloppy,
2762 and do a final sign extend? */
2763 else if (size <= 16)
2764 cost = ext_shift_insns[ext - insize] + 1
2765 + ext_shift_insns[size > ext ? size - ext : ext - size] + 1;
2766 else
2767 continue;
2768 if (cost < best_cost)
2770 kind = ext / (unsigned) 8 + 2;
2771 best_cost = cost;
2774 /* Check if we can sign extend in r0 */
2775 if (insize < 8)
2777 cost = 3 + shift_insns[left];
2778 if (cost < best_cost)
2780 kind = 6;
2781 best_cost = cost;
2783 /* Try the same with a final signed shift. */
2784 if (left < 31)
2786 cost = 3 + ext_shift_insns[left + 1] + 1;
2787 if (cost < best_cost)
2789 kind = 7;
2790 best_cost = cost;
2794 if (TARGET_SH3)
2796 /* Try to use a dynamic shift. */
2797 cost = shift_insns[32 - insize] + 1 + SH_DYNAMIC_SHIFT_COST;
2798 if (cost < best_cost)
2800 kind = 0;
2801 best_cost = cost;
2804 if (costp)
2805 *costp = cost;
2806 return kind;
2809 /* Function to be used in the length attribute of the instructions
2810 implementing this pattern. */
2813 shl_sext_length (rtx insn)
2815 rtx set_src, left_rtx, size_rtx;
2816 int cost;
2818 set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
2819 left_rtx = XEXP (XEXP (set_src, 0), 1);
2820 size_rtx = XEXP (set_src, 1);
2821 shl_sext_kind (left_rtx, size_rtx, &cost);
2822 return cost;
2825 /* Generate rtl for this pattern */
2828 gen_shl_sext (rtx dest, rtx left_rtx, rtx size_rtx, rtx source)
2830 int kind;
2831 int left, size, insize, cost;
2832 rtx operands[3];
2834 kind = shl_sext_kind (left_rtx, size_rtx, &cost);
2835 left = INTVAL (left_rtx);
2836 size = INTVAL (size_rtx);
2837 insize = size - left;
2838 switch (kind)
2840 case 1:
2841 case 2:
2842 case 3:
2843 case 4:
2845 int ext = kind & 1 ? 8 : 16;
2846 int shift2 = size - ext;
2848 /* Don't expand fine-grained when combining, because that will
2849 make the pattern fail. */
2850 if (! currently_expanding_to_rtl
2851 && ! reload_in_progress && ! reload_completed)
2853 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
2854 emit_insn (gen_movsi (dest, source));
2855 break;
2857 if (dest != source)
2858 emit_insn (gen_movsi (dest, source));
2859 operands[0] = dest;
2860 if (ext - insize)
2862 operands[2] = GEN_INT (ext - insize);
2863 gen_shifty_hi_op (ASHIFT, operands);
2865 emit_insn (kind & 1
2866 ? gen_extendqisi2 (dest, gen_lowpart (QImode, dest))
2867 : gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
2868 if (kind <= 2)
2870 if (shift2)
2872 operands[2] = GEN_INT (shift2);
2873 gen_shifty_op (ASHIFT, operands);
2876 else
2878 if (shift2 > 0)
2880 if (EXT_SHIFT_SIGNED (shift2))
2882 operands[2] = GEN_INT (shift2 + 1);
2883 gen_shifty_op (ASHIFT, operands);
2884 operands[2] = const1_rtx;
2885 gen_shifty_op (ASHIFTRT, operands);
2886 break;
2888 operands[2] = GEN_INT (shift2);
2889 gen_shifty_hi_op (ASHIFT, operands);
2891 else if (shift2)
2893 operands[2] = GEN_INT (-shift2);
2894 gen_shifty_hi_op (LSHIFTRT, operands);
2896 emit_insn (size <= 8
2897 ? gen_extendqisi2 (dest, gen_lowpart (QImode, dest))
2898 : gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
2900 break;
2902 case 5:
2904 int i = 16 - size;
2905 if (! currently_expanding_to_rtl
2906 && ! reload_in_progress && ! reload_completed)
2907 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
2908 else
2910 operands[0] = dest;
2911 operands[2] = GEN_INT (16 - insize);
2912 gen_shifty_hi_op (ASHIFT, operands);
2913 emit_insn (gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
2915 /* Don't use gen_ashrsi3 because it generates new pseudos. */
2916 while (--i >= 0)
2917 gen_ashift (ASHIFTRT, 1, dest);
2918 break;
2920 case 6:
2921 case 7:
2922 /* Don't expand fine-grained when combining, because that will
2923 make the pattern fail. */
2924 if (! currently_expanding_to_rtl
2925 && ! reload_in_progress && ! reload_completed)
2927 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
2928 emit_insn (gen_movsi (dest, source));
2929 break;
2931 emit_insn (gen_andsi3 (dest, source, GEN_INT ((1 << insize) - 1)));
2932 emit_insn (gen_xorsi3 (dest, dest, GEN_INT (1 << (insize - 1))));
2933 emit_insn (gen_addsi3 (dest, dest, GEN_INT (-1 << (insize - 1))));
2934 operands[0] = dest;
2935 operands[2] = kind == 7 ? GEN_INT (left + 1) : left_rtx;
2936 gen_shifty_op (ASHIFT, operands);
2937 if (kind == 7)
2938 emit_insn (gen_ashrsi3_k (dest, dest, const1_rtx));
2939 break;
2940 default:
2941 return -1;
2943 return 0;
2946 /* Prefix a symbol_ref name with "datalabel". */
2949 gen_datalabel_ref (rtx sym)
2951 const char *str;
2953 if (GET_CODE (sym) == LABEL_REF)
2954 return gen_rtx_CONST (GET_MODE (sym),
2955 gen_rtx_UNSPEC (GET_MODE (sym),
2956 gen_rtvec (1, sym),
2957 UNSPEC_DATALABEL));
2959 gcc_assert (GET_CODE (sym) == SYMBOL_REF);
2961 str = XSTR (sym, 0);
2962 /* Share all SYMBOL_REF strings with the same value - that is important
2963 for cse. */
2964 str = IDENTIFIER_POINTER (get_identifier (str));
2965 XSTR (sym, 0) = str;
2967 return sym;
2971 /* The SH cannot load a large constant into a register, constants have to
2972 come from a pc relative load. The reference of a pc relative load
2973 instruction must be less than 1k in front of the instruction. This
2974 means that we often have to dump a constant inside a function, and
2975 generate code to branch around it.
2977 It is important to minimize this, since the branches will slow things
2978 down and make things bigger.
2980 Worst case code looks like:
2982 mov.l L1,rn
2983 bra L2
2985 align
2986 L1: .long value
2990 mov.l L3,rn
2991 bra L4
2993 align
2994 L3: .long value
2998 We fix this by performing a scan before scheduling, which notices which
2999 instructions need to have their operands fetched from the constant table
3000 and builds the table.
3002 The algorithm is:
3004 scan, find an instruction which needs a pcrel move. Look forward, find the
3005 last barrier which is within MAX_COUNT bytes of the requirement.
3006 If there isn't one, make one. Process all the instructions between
3007 the find and the barrier.
3009 In the above example, we can tell that L3 is within 1k of L1, so
3010 the first move can be shrunk from the 3 insn+constant sequence into
3011 just 1 insn, and the constant moved to L3 to make:
3013 mov.l L1,rn
3015 mov.l L3,rn
3016 bra L4
3018 align
3019 L3:.long value
3020 L4:.long value
3022 Then the second move becomes the target for the shortening process. */
3024 typedef struct
3026 rtx value; /* Value in table. */
3027 rtx label; /* Label of value. */
3028 rtx wend; /* End of window. */
3029 enum machine_mode mode; /* Mode of value. */
3031 /* True if this constant is accessed as part of a post-increment
3032 sequence. Note that HImode constants are never accessed in this way. */
3033 bool part_of_sequence_p;
3034 } pool_node;
3036 /* The maximum number of constants that can fit into one pool, since
3037 constants in the range 0..510 are at least 2 bytes long, and in the
3038 range from there to 1018 at least 4 bytes. */
3040 #define MAX_POOL_SIZE 372
3041 static pool_node pool_vector[MAX_POOL_SIZE];
3042 static int pool_size;
3043 static rtx pool_window_label;
3044 static int pool_window_last;
3046 /* ??? If we need a constant in HImode which is the truncated value of a
3047 constant we need in SImode, we could combine the two entries thus saving
3048 two bytes. Is this common enough to be worth the effort of implementing
3049 it? */
3051 /* ??? This stuff should be done at the same time that we shorten branches.
3052 As it is now, we must assume that all branches are the maximum size, and
3053 this causes us to almost always output constant pools sooner than
3054 necessary. */
3056 /* Add a constant to the pool and return its label. */
3058 static rtx
3059 add_constant (rtx x, enum machine_mode mode, rtx last_value)
3061 int i;
3062 rtx lab, new, ref, newref;
3064 /* First see if we've already got it. */
3065 for (i = 0; i < pool_size; i++)
3067 if (x->code == pool_vector[i].value->code
3068 && mode == pool_vector[i].mode)
3070 if (x->code == CODE_LABEL)
3072 if (XINT (x, 3) != XINT (pool_vector[i].value, 3))
3073 continue;
3075 if (rtx_equal_p (x, pool_vector[i].value))
3077 lab = new = 0;
3078 if (! last_value
3079 || ! i
3080 || ! rtx_equal_p (last_value, pool_vector[i-1].value))
3082 new = gen_label_rtx ();
3083 LABEL_REFS (new) = pool_vector[i].label;
3084 pool_vector[i].label = lab = new;
3086 if (lab && pool_window_label)
3088 newref = gen_rtx_LABEL_REF (VOIDmode, pool_window_label);
3089 ref = pool_vector[pool_window_last].wend;
3090 LABEL_NEXTREF (newref) = ref;
3091 pool_vector[pool_window_last].wend = newref;
3093 if (new)
3094 pool_window_label = new;
3095 pool_window_last = i;
3096 return lab;
3101 /* Need a new one. */
3102 pool_vector[pool_size].value = x;
3103 if (last_value && rtx_equal_p (last_value, pool_vector[pool_size - 1].value))
3105 lab = 0;
3106 pool_vector[pool_size - 1].part_of_sequence_p = true;
3108 else
3109 lab = gen_label_rtx ();
3110 pool_vector[pool_size].mode = mode;
3111 pool_vector[pool_size].label = lab;
3112 pool_vector[pool_size].wend = NULL_RTX;
3113 pool_vector[pool_size].part_of_sequence_p = (lab == 0);
3114 if (lab && pool_window_label)
3116 newref = gen_rtx_LABEL_REF (VOIDmode, pool_window_label);
3117 ref = pool_vector[pool_window_last].wend;
3118 LABEL_NEXTREF (newref) = ref;
3119 pool_vector[pool_window_last].wend = newref;
3121 if (lab)
3122 pool_window_label = lab;
3123 pool_window_last = pool_size;
3124 pool_size++;
3125 return lab;
3128 /* Output the literal table. START, if nonzero, is the first instruction
3129 this table is needed for, and also indicates that there is at least one
3130 casesi_worker_2 instruction; We have to emit the operand3 labels from
3131 these insns at a 4-byte aligned position. BARRIER is the barrier
3132 after which we are to place the table. */
3134 static void
3135 dump_table (rtx start, rtx barrier)
3137 rtx scan = barrier;
3138 int i;
3139 int need_align = 1;
3140 rtx lab, ref;
3141 int have_df = 0;
3143 /* Do two passes, first time dump out the HI sized constants. */
3145 for (i = 0; i < pool_size; i++)
3147 pool_node *p = &pool_vector[i];
3149 if (p->mode == HImode)
3151 if (need_align)
3153 scan = emit_insn_after (gen_align_2 (), scan);
3154 need_align = 0;
3156 for (lab = p->label; lab; lab = LABEL_REFS (lab))
3157 scan = emit_label_after (lab, scan);
3158 scan = emit_insn_after (gen_consttable_2 (p->value, const0_rtx),
3159 scan);
3160 for (ref = p->wend; ref; ref = LABEL_NEXTREF (ref))
3162 lab = XEXP (ref, 0);
3163 scan = emit_insn_after (gen_consttable_window_end (lab), scan);
3166 else if (p->mode == DFmode)
3167 have_df = 1;
3170 need_align = 1;
3172 if (start)
3174 scan = emit_insn_after (gen_align_4 (), scan);
3175 need_align = 0;
3176 for (; start != barrier; start = NEXT_INSN (start))
3177 if (GET_CODE (start) == INSN
3178 && recog_memoized (start) == CODE_FOR_casesi_worker_2)
3180 rtx src = SET_SRC (XVECEXP (PATTERN (start), 0, 0));
3181 rtx lab = XEXP (XVECEXP (src, 0, 3), 0);
3183 scan = emit_label_after (lab, scan);
3186 if (TARGET_FMOVD && TARGET_ALIGN_DOUBLE && have_df)
3188 rtx align_insn = NULL_RTX;
3190 scan = emit_label_after (gen_label_rtx (), scan);
3191 scan = emit_insn_after (gen_align_log (GEN_INT (3)), scan);
3192 need_align = 0;
3194 for (i = 0; i < pool_size; i++)
3196 pool_node *p = &pool_vector[i];
3198 switch (p->mode)
3200 case HImode:
3201 break;
3202 case SImode:
3203 case SFmode:
3204 if (align_insn && !p->part_of_sequence_p)
3206 for (lab = p->label; lab; lab = LABEL_REFS (lab))
3207 emit_label_before (lab, align_insn);
3208 emit_insn_before (gen_consttable_4 (p->value, const0_rtx),
3209 align_insn);
3210 for (ref = p->wend; ref; ref = LABEL_NEXTREF (ref))
3212 lab = XEXP (ref, 0);
3213 emit_insn_before (gen_consttable_window_end (lab),
3214 align_insn);
3216 delete_insn (align_insn);
3217 align_insn = NULL_RTX;
3218 continue;
3220 else
3222 for (lab = p->label; lab; lab = LABEL_REFS (lab))
3223 scan = emit_label_after (lab, scan);
3224 scan = emit_insn_after (gen_consttable_4 (p->value,
3225 const0_rtx), scan);
3226 need_align = ! need_align;
3228 break;
3229 case DFmode:
3230 if (need_align)
3232 scan = emit_insn_after (gen_align_log (GEN_INT (3)), scan);
3233 align_insn = scan;
3234 need_align = 0;
3236 case DImode:
3237 for (lab = p->label; lab; lab = LABEL_REFS (lab))
3238 scan = emit_label_after (lab, scan);
3239 scan = emit_insn_after (gen_consttable_8 (p->value, const0_rtx),
3240 scan);
3241 break;
3242 default:
3243 gcc_unreachable ();
3246 if (p->mode != HImode)
3248 for (ref = p->wend; ref; ref = LABEL_NEXTREF (ref))
3250 lab = XEXP (ref, 0);
3251 scan = emit_insn_after (gen_consttable_window_end (lab),
3252 scan);
3257 pool_size = 0;
3260 for (i = 0; i < pool_size; i++)
3262 pool_node *p = &pool_vector[i];
3264 switch (p->mode)
3266 case HImode:
3267 break;
3268 case SImode:
3269 case SFmode:
3270 if (need_align)
3272 need_align = 0;
3273 scan = emit_label_after (gen_label_rtx (), scan);
3274 scan = emit_insn_after (gen_align_4 (), scan);
3276 for (lab = p->label; lab; lab = LABEL_REFS (lab))
3277 scan = emit_label_after (lab, scan);
3278 scan = emit_insn_after (gen_consttable_4 (p->value, const0_rtx),
3279 scan);
3280 break;
3281 case DFmode:
3282 case DImode:
3283 if (need_align)
3285 need_align = 0;
3286 scan = emit_label_after (gen_label_rtx (), scan);
3287 scan = emit_insn_after (gen_align_4 (), scan);
3289 for (lab = p->label; lab; lab = LABEL_REFS (lab))
3290 scan = emit_label_after (lab, scan);
3291 scan = emit_insn_after (gen_consttable_8 (p->value, const0_rtx),
3292 scan);
3293 break;
3294 default:
3295 gcc_unreachable ();
3298 if (p->mode != HImode)
3300 for (ref = p->wend; ref; ref = LABEL_NEXTREF (ref))
3302 lab = XEXP (ref, 0);
3303 scan = emit_insn_after (gen_consttable_window_end (lab), scan);
3308 scan = emit_insn_after (gen_consttable_end (), scan);
3309 scan = emit_barrier_after (scan);
3310 pool_size = 0;
3311 pool_window_label = NULL_RTX;
3312 pool_window_last = 0;
3315 /* Return nonzero if constant would be an ok source for a
3316 mov.w instead of a mov.l. */
3318 static int
3319 hi_const (rtx src)
3321 return (GET_CODE (src) == CONST_INT
3322 && INTVAL (src) >= -32768
3323 && INTVAL (src) <= 32767);
3326 /* Nonzero if the insn is a move instruction which needs to be fixed. */
3328 /* ??? For a DImode/DFmode moves, we don't need to fix it if each half of the
3329 CONST_DOUBLE input value is CONST_OK_FOR_I08. For a SFmode move, we don't
3330 need to fix it if the input value is CONST_OK_FOR_I08. */
3332 static int
3333 broken_move (rtx insn)
3335 if (GET_CODE (insn) == INSN)
3337 rtx pat = PATTERN (insn);
3338 if (GET_CODE (pat) == PARALLEL)
3339 pat = XVECEXP (pat, 0, 0);
3340 if (GET_CODE (pat) == SET
3341 /* We can load any 8 bit value if we don't care what the high
3342 order bits end up as. */
3343 && GET_MODE (SET_DEST (pat)) != QImode
3344 && (CONSTANT_P (SET_SRC (pat))
3345 /* Match mova_const. */
3346 || (GET_CODE (SET_SRC (pat)) == UNSPEC
3347 && XINT (SET_SRC (pat), 1) == UNSPEC_MOVA
3348 && GET_CODE (XVECEXP (SET_SRC (pat), 0, 0)) == CONST))
3349 && ! (TARGET_SH2E
3350 && GET_CODE (SET_SRC (pat)) == CONST_DOUBLE
3351 && (fp_zero_operand (SET_SRC (pat))
3352 || fp_one_operand (SET_SRC (pat)))
3353 /* ??? If this is a -m4 or -m4-single compilation, in general
3354 we don't know the current setting of fpscr, so disable fldi.
3355 There is an exception if this was a register-register move
3356 before reload - and hence it was ascertained that we have
3357 single precision setting - and in a post-reload optimization
3358 we changed this to do a constant load. In that case
3359 we don't have an r0 clobber, hence we must use fldi. */
3360 && (! TARGET_SH4 || TARGET_FMOVD
3361 || (GET_CODE (XEXP (XVECEXP (PATTERN (insn), 0, 2), 0))
3362 == SCRATCH))
3363 && GET_CODE (SET_DEST (pat)) == REG
3364 && FP_REGISTER_P (REGNO (SET_DEST (pat))))
3365 && ! (TARGET_SH2A
3366 && GET_MODE (SET_DEST (pat)) == SImode
3367 && GET_CODE (SET_SRC (pat)) == CONST_INT
3368 && CONST_OK_FOR_I20 (INTVAL (SET_SRC (pat))))
3369 && (GET_CODE (SET_SRC (pat)) != CONST_INT
3370 || ! CONST_OK_FOR_I08 (INTVAL (SET_SRC (pat)))))
3371 return 1;
3374 return 0;
3377 static int
3378 mova_p (rtx insn)
3380 return (GET_CODE (insn) == INSN
3381 && GET_CODE (PATTERN (insn)) == SET
3382 && GET_CODE (SET_SRC (PATTERN (insn))) == UNSPEC
3383 && XINT (SET_SRC (PATTERN (insn)), 1) == UNSPEC_MOVA
3384 /* Don't match mova_const. */
3385 && GET_CODE (XVECEXP (SET_SRC (PATTERN (insn)), 0, 0)) == LABEL_REF);
3388 /* Fix up a mova from a switch that went out of range. */
3389 static void
3390 fixup_mova (rtx mova)
3392 if (! flag_pic)
3394 SET_SRC (PATTERN (mova)) = XVECEXP (SET_SRC (PATTERN (mova)), 0, 0);
3395 INSN_CODE (mova) = -1;
3397 else
3399 rtx worker = mova;
3400 rtx lab = gen_label_rtx ();
3401 rtx wpat, wpat0, wpat1, wsrc, diff;
3405 worker = NEXT_INSN (worker);
3406 gcc_assert (worker
3407 && GET_CODE (worker) != CODE_LABEL
3408 && GET_CODE (worker) != JUMP_INSN);
3409 } while (recog_memoized (worker) != CODE_FOR_casesi_worker_1);
3410 wpat = PATTERN (worker);
3411 wpat0 = XVECEXP (wpat, 0, 0);
3412 wpat1 = XVECEXP (wpat, 0, 1);
3413 wsrc = SET_SRC (wpat0);
3414 PATTERN (worker) = (gen_casesi_worker_2
3415 (SET_DEST (wpat0), XVECEXP (wsrc, 0, 1),
3416 XEXP (XVECEXP (wsrc, 0, 2), 0), lab,
3417 XEXP (wpat1, 0)));
3418 INSN_CODE (worker) = -1;
3419 diff = gen_rtx_MINUS (Pmode, XVECEXP (SET_SRC (PATTERN (mova)), 0, 0),
3420 gen_rtx_LABEL_REF (Pmode, lab));
3421 diff = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, diff), UNSPEC_PIC);
3422 SET_SRC (PATTERN (mova)) = gen_rtx_CONST (Pmode, diff);
3423 INSN_CODE (mova) = -1;
3427 /* Find the last barrier from insn FROM which is close enough to hold the
3428 constant pool. If we can't find one, then create one near the end of
3429 the range. */
3431 static rtx
3432 find_barrier (int num_mova, rtx mova, rtx from)
3434 int count_si = 0;
3435 int count_hi = 0;
3436 int found_hi = 0;
3437 int found_si = 0;
3438 int found_di = 0;
3439 int hi_align = 2;
3440 int si_align = 2;
3441 int leading_mova = num_mova;
3442 rtx barrier_before_mova = 0, found_barrier = 0, good_barrier = 0;
3443 int si_limit;
3444 int hi_limit;
3446 /* For HImode: range is 510, add 4 because pc counts from address of
3447 second instruction after this one, subtract 2 for the jump instruction
3448 that we may need to emit before the table, subtract 2 for the instruction
3449 that fills the jump delay slot (in very rare cases, reorg will take an
3450 instruction from after the constant pool or will leave the delay slot
3451 empty). This gives 510.
3452 For SImode: range is 1020, add 4 because pc counts from address of
3453 second instruction after this one, subtract 2 in case pc is 2 byte
3454 aligned, subtract 2 for the jump instruction that we may need to emit
3455 before the table, subtract 2 for the instruction that fills the jump
3456 delay slot. This gives 1018. */
3458 /* The branch will always be shortened now that the reference address for
3459 forward branches is the successor address, thus we need no longer make
3460 adjustments to the [sh]i_limit for -O0. */
3462 si_limit = 1018;
3463 hi_limit = 510;
3465 while (from && count_si < si_limit && count_hi < hi_limit)
3467 int inc = get_attr_length (from);
3468 int new_align = 1;
3470 if (GET_CODE (from) == CODE_LABEL)
3472 if (optimize)
3473 new_align = 1 << label_to_alignment (from);
3474 else if (GET_CODE (prev_nonnote_insn (from)) == BARRIER)
3475 new_align = 1 << barrier_align (from);
3476 else
3477 new_align = 1;
3478 inc = 0;
3481 if (GET_CODE (from) == BARRIER)
3484 found_barrier = from;
3486 /* If we are at the end of the function, or in front of an alignment
3487 instruction, we need not insert an extra alignment. We prefer
3488 this kind of barrier. */
3489 if (barrier_align (from) > 2)
3490 good_barrier = from;
3493 if (broken_move (from))
3495 rtx pat, src, dst;
3496 enum machine_mode mode;
3498 pat = PATTERN (from);
3499 if (GET_CODE (pat) == PARALLEL)
3500 pat = XVECEXP (pat, 0, 0);
3501 src = SET_SRC (pat);
3502 dst = SET_DEST (pat);
3503 mode = GET_MODE (dst);
3505 /* We must explicitly check the mode, because sometimes the
3506 front end will generate code to load unsigned constants into
3507 HImode targets without properly sign extending them. */
3508 if (mode == HImode
3509 || (mode == SImode && hi_const (src) && REGNO (dst) != FPUL_REG))
3511 found_hi += 2;
3512 /* We put the short constants before the long constants, so
3513 we must count the length of short constants in the range
3514 for the long constants. */
3515 /* ??? This isn't optimal, but is easy to do. */
3516 si_limit -= 2;
3518 else
3520 /* We dump DF/DI constants before SF/SI ones, because
3521 the limit is the same, but the alignment requirements
3522 are higher. We may waste up to 4 additional bytes
3523 for alignment, and the DF/DI constant may have
3524 another SF/SI constant placed before it. */
3525 if (TARGET_SHCOMPACT
3526 && ! found_di
3527 && (mode == DFmode || mode == DImode))
3529 found_di = 1;
3530 si_limit -= 8;
3532 while (si_align > 2 && found_si + si_align - 2 > count_si)
3533 si_align >>= 1;
3534 if (found_si > count_si)
3535 count_si = found_si;
3536 found_si += GET_MODE_SIZE (mode);
3537 if (num_mova)
3538 si_limit -= GET_MODE_SIZE (mode);
3542 if (mova_p (from))
3544 if (! num_mova++)
3546 leading_mova = 0;
3547 mova = from;
3548 barrier_before_mova = good_barrier ? good_barrier : found_barrier;
3550 if (found_si > count_si)
3551 count_si = found_si;
3553 else if (GET_CODE (from) == JUMP_INSN
3554 && (GET_CODE (PATTERN (from)) == ADDR_VEC
3555 || GET_CODE (PATTERN (from)) == ADDR_DIFF_VEC))
3557 if (num_mova)
3558 num_mova--;
3559 if (barrier_align (next_real_insn (from)) == align_jumps_log)
3561 /* We have just passed the barrier in front of the
3562 ADDR_DIFF_VEC, which is stored in found_barrier. Since
3563 the ADDR_DIFF_VEC is accessed as data, just like our pool
3564 constants, this is a good opportunity to accommodate what
3565 we have gathered so far.
3566 If we waited any longer, we could end up at a barrier in
3567 front of code, which gives worse cache usage for separated
3568 instruction / data caches. */
3569 good_barrier = found_barrier;
3570 break;
3572 else
3574 rtx body = PATTERN (from);
3575 inc = XVECLEN (body, 1) * GET_MODE_SIZE (GET_MODE (body));
3578 /* For the SH1, we generate alignments even after jumps-around-jumps. */
3579 else if (GET_CODE (from) == JUMP_INSN
3580 && ! TARGET_SH2
3581 && ! TARGET_SMALLCODE)
3582 new_align = 4;
3584 if (found_si)
3586 count_si += inc;
3587 if (new_align > si_align)
3589 si_limit -= (count_si - 1) & (new_align - si_align);
3590 si_align = new_align;
3592 count_si = (count_si + new_align - 1) & -new_align;
3594 if (found_hi)
3596 count_hi += inc;
3597 if (new_align > hi_align)
3599 hi_limit -= (count_hi - 1) & (new_align - hi_align);
3600 hi_align = new_align;
3602 count_hi = (count_hi + new_align - 1) & -new_align;
3604 from = NEXT_INSN (from);
3607 if (num_mova)
3609 if (leading_mova)
3611 /* Try as we might, the leading mova is out of range. Change
3612 it into a load (which will become a pcload) and retry. */
3613 fixup_mova (mova);
3614 return find_barrier (0, 0, mova);
3616 else
3618 /* Insert the constant pool table before the mova instruction,
3619 to prevent the mova label reference from going out of range. */
3620 from = mova;
3621 good_barrier = found_barrier = barrier_before_mova;
3625 if (found_barrier)
3627 if (good_barrier && next_real_insn (found_barrier))
3628 found_barrier = good_barrier;
3630 else
3632 /* We didn't find a barrier in time to dump our stuff,
3633 so we'll make one. */
3634 rtx label = gen_label_rtx ();
3636 /* If we exceeded the range, then we must back up over the last
3637 instruction we looked at. Otherwise, we just need to undo the
3638 NEXT_INSN at the end of the loop. */
3639 if (count_hi > hi_limit || count_si > si_limit)
3640 from = PREV_INSN (PREV_INSN (from));
3641 else
3642 from = PREV_INSN (from);
3644 /* Walk back to be just before any jump or label.
3645 Putting it before a label reduces the number of times the branch
3646 around the constant pool table will be hit. Putting it before
3647 a jump makes it more likely that the bra delay slot will be
3648 filled. */
3649 while (GET_CODE (from) == JUMP_INSN || GET_CODE (from) == NOTE
3650 || GET_CODE (from) == CODE_LABEL)
3651 from = PREV_INSN (from);
3653 from = emit_jump_insn_after (gen_jump (label), from);
3654 JUMP_LABEL (from) = label;
3655 LABEL_NUSES (label) = 1;
3656 found_barrier = emit_barrier_after (from);
3657 emit_label_after (label, found_barrier);
3660 return found_barrier;
3663 /* If the instruction INSN is implemented by a special function, and we can
3664 positively find the register that is used to call the sfunc, and this
3665 register is not used anywhere else in this instruction - except as the
3666 destination of a set, return this register; else, return 0. */
3668 sfunc_uses_reg (rtx insn)
3670 int i;
3671 rtx pattern, part, reg_part, reg;
3673 if (GET_CODE (insn) != INSN)
3674 return 0;
3675 pattern = PATTERN (insn);
3676 if (GET_CODE (pattern) != PARALLEL || get_attr_type (insn) != TYPE_SFUNC)
3677 return 0;
3679 for (reg_part = 0, i = XVECLEN (pattern, 0) - 1; i >= 1; i--)
3681 part = XVECEXP (pattern, 0, i);
3682 if (GET_CODE (part) == USE && GET_MODE (XEXP (part, 0)) == SImode)
3683 reg_part = part;
3685 if (! reg_part)
3686 return 0;
3687 reg = XEXP (reg_part, 0);
3688 for (i = XVECLEN (pattern, 0) - 1; i >= 0; i--)
3690 part = XVECEXP (pattern, 0, i);
3691 if (part == reg_part || GET_CODE (part) == CLOBBER)
3692 continue;
3693 if (reg_mentioned_p (reg, ((GET_CODE (part) == SET
3694 && GET_CODE (SET_DEST (part)) == REG)
3695 ? SET_SRC (part) : part)))
3696 return 0;
3698 return reg;
3701 /* See if the only way in which INSN uses REG is by calling it, or by
3702 setting it while calling it. Set *SET to a SET rtx if the register
3703 is set by INSN. */
3705 static int
3706 noncall_uses_reg (rtx reg, rtx insn, rtx *set)
3708 rtx pattern, reg2;
3710 *set = NULL_RTX;
3712 reg2 = sfunc_uses_reg (insn);
3713 if (reg2 && REGNO (reg2) == REGNO (reg))
3715 pattern = single_set (insn);
3716 if (pattern
3717 && GET_CODE (SET_DEST (pattern)) == REG
3718 && REGNO (reg) == REGNO (SET_DEST (pattern)))
3719 *set = pattern;
3720 return 0;
3722 if (GET_CODE (insn) != CALL_INSN)
3724 /* We don't use rtx_equal_p because we don't care if the mode is
3725 different. */
3726 pattern = single_set (insn);
3727 if (pattern
3728 && GET_CODE (SET_DEST (pattern)) == REG
3729 && REGNO (reg) == REGNO (SET_DEST (pattern)))
3731 rtx par, part;
3732 int i;
3734 *set = pattern;
3735 par = PATTERN (insn);
3736 if (GET_CODE (par) == PARALLEL)
3737 for (i = XVECLEN (par, 0) - 1; i >= 0; i--)
3739 part = XVECEXP (par, 0, i);
3740 if (GET_CODE (part) != SET && reg_mentioned_p (reg, part))
3741 return 1;
3743 return reg_mentioned_p (reg, SET_SRC (pattern));
3746 return 1;
3749 pattern = PATTERN (insn);
3751 if (GET_CODE (pattern) == PARALLEL)
3753 int i;
3755 for (i = XVECLEN (pattern, 0) - 1; i >= 1; i--)
3756 if (reg_mentioned_p (reg, XVECEXP (pattern, 0, i)))
3757 return 1;
3758 pattern = XVECEXP (pattern, 0, 0);
3761 if (GET_CODE (pattern) == SET)
3763 if (reg_mentioned_p (reg, SET_DEST (pattern)))
3765 /* We don't use rtx_equal_p, because we don't care if the
3766 mode is different. */
3767 if (GET_CODE (SET_DEST (pattern)) != REG
3768 || REGNO (reg) != REGNO (SET_DEST (pattern)))
3769 return 1;
3771 *set = pattern;
3774 pattern = SET_SRC (pattern);
3777 if (GET_CODE (pattern) != CALL
3778 || GET_CODE (XEXP (pattern, 0)) != MEM
3779 || ! rtx_equal_p (reg, XEXP (XEXP (pattern, 0), 0)))
3780 return 1;
3782 return 0;
3785 /* Given a X, a pattern of an insn or a part of it, return a mask of used
3786 general registers. Bits 0..15 mean that the respective registers
3787 are used as inputs in the instruction. Bits 16..31 mean that the
3788 registers 0..15, respectively, are used as outputs, or are clobbered.
3789 IS_DEST should be set to 16 if X is the destination of a SET, else to 0. */
3791 regs_used (rtx x, int is_dest)
3793 enum rtx_code code;
3794 const char *fmt;
3795 int i, used = 0;
3797 if (! x)
3798 return used;
3799 code = GET_CODE (x);
3800 switch (code)
3802 case REG:
3803 if (REGNO (x) < 16)
3804 return (((1 << HARD_REGNO_NREGS (0, GET_MODE (x))) - 1)
3805 << (REGNO (x) + is_dest));
3806 return 0;
3807 case SUBREG:
3809 rtx y = SUBREG_REG (x);
3811 if (GET_CODE (y) != REG)
3812 break;
3813 if (REGNO (y) < 16)
3814 return (((1 << HARD_REGNO_NREGS (0, GET_MODE (x))) - 1)
3815 << (REGNO (y) +
3816 subreg_regno_offset (REGNO (y),
3817 GET_MODE (y),
3818 SUBREG_BYTE (x),
3819 GET_MODE (x)) + is_dest));
3820 return 0;
3822 case SET:
3823 return regs_used (SET_SRC (x), 0) | regs_used (SET_DEST (x), 16);
3824 case RETURN:
3825 /* If there was a return value, it must have been indicated with USE. */
3826 return 0x00ffff00;
3827 case CLOBBER:
3828 is_dest = 1;
3829 break;
3830 case MEM:
3831 is_dest = 0;
3832 break;
3833 case CALL:
3834 used |= 0x00ff00f0;
3835 break;
3836 default:
3837 break;
3840 fmt = GET_RTX_FORMAT (code);
3842 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
3844 if (fmt[i] == 'E')
3846 register int j;
3847 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
3848 used |= regs_used (XVECEXP (x, i, j), is_dest);
3850 else if (fmt[i] == 'e')
3851 used |= regs_used (XEXP (x, i), is_dest);
3853 return used;
3856 /* Create an instruction that prevents redirection of a conditional branch
3857 to the destination of the JUMP with address ADDR.
3858 If the branch needs to be implemented as an indirect jump, try to find
3859 a scratch register for it.
3860 If NEED_BLOCK is 0, don't do anything unless we need a scratch register.
3861 If any preceding insn that doesn't fit into a delay slot is good enough,
3862 pass 1. Pass 2 if a definite blocking insn is needed.
3863 -1 is used internally to avoid deep recursion.
3864 If a blocking instruction is made or recognized, return it. */
3866 static rtx
3867 gen_block_redirect (rtx jump, int addr, int need_block)
3869 int dead = 0;
3870 rtx prev = prev_nonnote_insn (jump);
3871 rtx dest;
3873 /* First, check if we already have an instruction that satisfies our need. */
3874 if (prev && GET_CODE (prev) == INSN && ! INSN_DELETED_P (prev))
3876 if (INSN_CODE (prev) == CODE_FOR_indirect_jump_scratch)
3877 return prev;
3878 if (GET_CODE (PATTERN (prev)) == USE
3879 || GET_CODE (PATTERN (prev)) == CLOBBER
3880 || get_attr_in_delay_slot (prev) == IN_DELAY_SLOT_YES)
3881 prev = jump;
3882 else if ((need_block &= ~1) < 0)
3883 return prev;
3884 else if (recog_memoized (prev) == CODE_FOR_block_branch_redirect)
3885 need_block = 0;
3887 if (GET_CODE (PATTERN (jump)) == RETURN)
3889 if (! need_block)
3890 return prev;
3891 /* Reorg even does nasty things with return insns that cause branches
3892 to go out of range - see find_end_label and callers. */
3893 return emit_insn_before (gen_block_branch_redirect (const0_rtx) , jump);
3895 /* We can't use JUMP_LABEL here because it might be undefined
3896 when not optimizing. */
3897 dest = XEXP (SET_SRC (PATTERN (jump)), 0);
3898 /* If the branch is out of range, try to find a scratch register for it. */
3899 if (optimize
3900 && (INSN_ADDRESSES (INSN_UID (dest)) - addr + (unsigned) 4092
3901 > 4092 + 4098))
3903 rtx scan;
3904 /* Don't look for the stack pointer as a scratch register,
3905 it would cause trouble if an interrupt occurred. */
3906 unsigned try = 0x7fff, used;
3907 int jump_left = flag_expensive_optimizations + 1;
3909 /* It is likely that the most recent eligible instruction is wanted for
3910 the delay slot. Therefore, find out which registers it uses, and
3911 try to avoid using them. */
3913 for (scan = jump; (scan = PREV_INSN (scan)); )
3915 enum rtx_code code;
3917 if (INSN_DELETED_P (scan))
3918 continue;
3919 code = GET_CODE (scan);
3920 if (code == CODE_LABEL || code == JUMP_INSN)
3921 break;
3922 if (code == INSN
3923 && GET_CODE (PATTERN (scan)) != USE
3924 && GET_CODE (PATTERN (scan)) != CLOBBER
3925 && get_attr_in_delay_slot (scan) == IN_DELAY_SLOT_YES)
3927 try &= ~regs_used (PATTERN (scan), 0);
3928 break;
3931 for (used = dead = 0, scan = JUMP_LABEL (jump);
3932 (scan = NEXT_INSN (scan)); )
3934 enum rtx_code code;
3936 if (INSN_DELETED_P (scan))
3937 continue;
3938 code = GET_CODE (scan);
3939 if (INSN_P (scan))
3941 used |= regs_used (PATTERN (scan), 0);
3942 if (code == CALL_INSN)
3943 used |= regs_used (CALL_INSN_FUNCTION_USAGE (scan), 0);
3944 dead |= (used >> 16) & ~used;
3945 if (dead & try)
3947 dead &= try;
3948 break;
3950 if (code == JUMP_INSN)
3952 if (jump_left-- && simplejump_p (scan))
3953 scan = JUMP_LABEL (scan);
3954 else
3955 break;
3959 /* Mask out the stack pointer again, in case it was
3960 the only 'free' register we have found. */
3961 dead &= 0x7fff;
3963 /* If the immediate destination is still in range, check for possible
3964 threading with a jump beyond the delay slot insn.
3965 Don't check if we are called recursively; the jump has been or will be
3966 checked in a different invocation then. */
3968 else if (optimize && need_block >= 0)
3970 rtx next = next_active_insn (next_active_insn (dest));
3971 if (next && GET_CODE (next) == JUMP_INSN
3972 && GET_CODE (PATTERN (next)) == SET
3973 && recog_memoized (next) == CODE_FOR_jump_compact)
3975 dest = JUMP_LABEL (next);
3976 if (dest
3977 && (INSN_ADDRESSES (INSN_UID (dest)) - addr + (unsigned) 4092
3978 > 4092 + 4098))
3979 gen_block_redirect (next, INSN_ADDRESSES (INSN_UID (next)), -1);
3983 if (dead)
3985 rtx reg = gen_rtx_REG (SImode, exact_log2 (dead & -dead));
3987 /* It would be nice if we could convert the jump into an indirect
3988 jump / far branch right now, and thus exposing all constituent
3989 instructions to further optimization. However, reorg uses
3990 simplejump_p to determine if there is an unconditional jump where
3991 it should try to schedule instructions from the target of the
3992 branch; simplejump_p fails for indirect jumps even if they have
3993 a JUMP_LABEL. */
3994 rtx insn = emit_insn_before (gen_indirect_jump_scratch
3995 (reg, GEN_INT (INSN_UID (JUMP_LABEL (jump))))
3996 , jump);
3997 /* ??? We would like this to have the scope of the jump, but that
3998 scope will change when a delay slot insn of an inner scope is added.
3999 Hence, after delay slot scheduling, we'll have to expect
4000 NOTE_INSN_BLOCK_END notes between the indirect_jump_scratch and
4001 the jump. */
4003 INSN_LOCATOR (insn) = INSN_LOCATOR (jump);
4004 INSN_CODE (insn) = CODE_FOR_indirect_jump_scratch;
4005 return insn;
4007 else if (need_block)
4008 /* We can't use JUMP_LABEL here because it might be undefined
4009 when not optimizing. */
4010 return emit_insn_before (gen_block_branch_redirect
4011 (GEN_INT (INSN_UID (XEXP (SET_SRC (PATTERN (jump)), 0))))
4012 , jump);
4013 return prev;
4016 #define CONDJUMP_MIN -252
4017 #define CONDJUMP_MAX 262
4018 struct far_branch
4020 /* A label (to be placed) in front of the jump
4021 that jumps to our ultimate destination. */
4022 rtx near_label;
4023 /* Where we are going to insert it if we cannot move the jump any farther,
4024 or the jump itself if we have picked up an existing jump. */
4025 rtx insert_place;
4026 /* The ultimate destination. */
4027 rtx far_label;
4028 struct far_branch *prev;
4029 /* If the branch has already been created, its address;
4030 else the address of its first prospective user. */
4031 int address;
4034 static void gen_far_branch (struct far_branch *);
4035 enum mdep_reorg_phase_e mdep_reorg_phase;
4036 static void
4037 gen_far_branch (struct far_branch *bp)
4039 rtx insn = bp->insert_place;
4040 rtx jump;
4041 rtx label = gen_label_rtx ();
4042 int ok;
4044 emit_label_after (label, insn);
4045 if (bp->far_label)
4047 jump = emit_jump_insn_after (gen_jump (bp->far_label), insn);
4048 LABEL_NUSES (bp->far_label)++;
4050 else
4051 jump = emit_jump_insn_after (gen_return (), insn);
4052 /* Emit a barrier so that reorg knows that any following instructions
4053 are not reachable via a fall-through path.
4054 But don't do this when not optimizing, since we wouldn't suppress the
4055 alignment for the barrier then, and could end up with out-of-range
4056 pc-relative loads. */
4057 if (optimize)
4058 emit_barrier_after (jump);
4059 emit_label_after (bp->near_label, insn);
4060 JUMP_LABEL (jump) = bp->far_label;
4061 ok = invert_jump (insn, label, 1);
4062 gcc_assert (ok);
4064 /* If we are branching around a jump (rather than a return), prevent
4065 reorg from using an insn from the jump target as the delay slot insn -
4066 when reorg did this, it pessimized code (we rather hide the delay slot)
4067 and it could cause branches to go out of range. */
4068 if (bp->far_label)
4069 (emit_insn_after
4070 (gen_stuff_delay_slot
4071 (GEN_INT (INSN_UID (XEXP (SET_SRC (PATTERN (jump)), 0))),
4072 GEN_INT (recog_memoized (insn) == CODE_FOR_branch_false)),
4073 insn));
4074 /* Prevent reorg from undoing our splits. */
4075 gen_block_redirect (jump, bp->address += 2, 2);
4078 /* Fix up ADDR_DIFF_VECs. */
4079 void
4080 fixup_addr_diff_vecs (rtx first)
4082 rtx insn;
4084 for (insn = first; insn; insn = NEXT_INSN (insn))
4086 rtx vec_lab, pat, prev, prevpat, x, braf_label;
4088 if (GET_CODE (insn) != JUMP_INSN
4089 || GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
4090 continue;
4091 pat = PATTERN (insn);
4092 vec_lab = XEXP (XEXP (pat, 0), 0);
4094 /* Search the matching casesi_jump_2. */
4095 for (prev = vec_lab; ; prev = PREV_INSN (prev))
4097 if (GET_CODE (prev) != JUMP_INSN)
4098 continue;
4099 prevpat = PATTERN (prev);
4100 if (GET_CODE (prevpat) != PARALLEL || XVECLEN (prevpat, 0) != 2)
4101 continue;
4102 x = XVECEXP (prevpat, 0, 1);
4103 if (GET_CODE (x) != USE)
4104 continue;
4105 x = XEXP (x, 0);
4106 if (GET_CODE (x) == LABEL_REF && XEXP (x, 0) == vec_lab)
4107 break;
4109 /* FIXME: This is a bug in the optimizer, but it seems harmless
4110 to just avoid panicing. */
4111 if (!prev)
4112 continue;
4114 /* Emit the reference label of the braf where it belongs, right after
4115 the casesi_jump_2 (i.e. braf). */
4116 braf_label = XEXP (XEXP (SET_SRC (XVECEXP (prevpat, 0, 0)), 1), 0);
4117 emit_label_after (braf_label, prev);
4119 /* Fix up the ADDR_DIF_VEC to be relative
4120 to the reference address of the braf. */
4121 XEXP (XEXP (pat, 0), 0) = braf_label;
4125 /* BARRIER_OR_LABEL is either a BARRIER or a CODE_LABEL immediately following
4126 a barrier. Return the base 2 logarithm of the desired alignment. */
4128 barrier_align (rtx barrier_or_label)
4130 rtx next = next_real_insn (barrier_or_label), pat, prev;
4131 int slot, credit, jump_to_next = 0;
4133 if (! next)
4134 return 0;
4136 pat = PATTERN (next);
4138 if (GET_CODE (pat) == ADDR_DIFF_VEC)
4139 return 2;
4141 if (GET_CODE (pat) == UNSPEC_VOLATILE && XINT (pat, 1) == UNSPECV_ALIGN)
4142 /* This is a barrier in front of a constant table. */
4143 return 0;
4145 prev = prev_real_insn (barrier_or_label);
4146 if (GET_CODE (PATTERN (prev)) == ADDR_DIFF_VEC)
4148 pat = PATTERN (prev);
4149 /* If this is a very small table, we want to keep the alignment after
4150 the table to the minimum for proper code alignment. */
4151 return ((TARGET_SMALLCODE
4152 || ((unsigned) XVECLEN (pat, 1) * GET_MODE_SIZE (GET_MODE (pat))
4153 <= (unsigned) 1 << (CACHE_LOG - 2)))
4154 ? 1 << TARGET_SHMEDIA : align_jumps_log);
4157 if (TARGET_SMALLCODE)
4158 return 0;
4160 if (! TARGET_SH2 || ! optimize)
4161 return align_jumps_log;
4163 /* When fixing up pcloads, a constant table might be inserted just before
4164 the basic block that ends with the barrier. Thus, we can't trust the
4165 instruction lengths before that. */
4166 if (mdep_reorg_phase > SH_FIXUP_PCLOAD)
4168 /* Check if there is an immediately preceding branch to the insn beyond
4169 the barrier. We must weight the cost of discarding useful information
4170 from the current cache line when executing this branch and there is
4171 an alignment, against that of fetching unneeded insn in front of the
4172 branch target when there is no alignment. */
4174 /* There are two delay_slot cases to consider. One is the simple case
4175 where the preceding branch is to the insn beyond the barrier (simple
4176 delay slot filling), and the other is where the preceding branch has
4177 a delay slot that is a duplicate of the insn after the barrier
4178 (fill_eager_delay_slots) and the branch is to the insn after the insn
4179 after the barrier. */
4181 /* PREV is presumed to be the JUMP_INSN for the barrier under
4182 investigation. Skip to the insn before it. */
4183 prev = prev_real_insn (prev);
4185 for (slot = 2, credit = (1 << (CACHE_LOG - 2)) + 2;
4186 credit >= 0 && prev && GET_CODE (prev) == INSN;
4187 prev = prev_real_insn (prev))
4189 jump_to_next = 0;
4190 if (GET_CODE (PATTERN (prev)) == USE
4191 || GET_CODE (PATTERN (prev)) == CLOBBER)
4192 continue;
4193 if (GET_CODE (PATTERN (prev)) == SEQUENCE)
4195 prev = XVECEXP (PATTERN (prev), 0, 1);
4196 if (INSN_UID (prev) == INSN_UID (next))
4198 /* Delay slot was filled with insn at jump target. */
4199 jump_to_next = 1;
4200 continue;
4204 if (slot &&
4205 get_attr_in_delay_slot (prev) == IN_DELAY_SLOT_YES)
4206 slot = 0;
4207 credit -= get_attr_length (prev);
4209 if (prev
4210 && GET_CODE (prev) == JUMP_INSN
4211 && JUMP_LABEL (prev))
4213 rtx x;
4214 if (jump_to_next
4215 || next_real_insn (JUMP_LABEL (prev)) == next
4216 /* If relax_delay_slots() decides NEXT was redundant
4217 with some previous instruction, it will have
4218 redirected PREV's jump to the following insn. */
4219 || JUMP_LABEL (prev) == next_nonnote_insn (next)
4220 /* There is no upper bound on redundant instructions
4221 that might have been skipped, but we must not put an
4222 alignment where none had been before. */
4223 || (x = (NEXT_INSN (NEXT_INSN (PREV_INSN (prev)))),
4224 (INSN_P (x)
4225 && (INSN_CODE (x) == CODE_FOR_block_branch_redirect
4226 || INSN_CODE (x) == CODE_FOR_indirect_jump_scratch
4227 || INSN_CODE (x) == CODE_FOR_stuff_delay_slot))))
4229 rtx pat = PATTERN (prev);
4230 if (GET_CODE (pat) == PARALLEL)
4231 pat = XVECEXP (pat, 0, 0);
4232 if (credit - slot >= (GET_CODE (SET_SRC (pat)) == PC ? 2 : 0))
4233 return 0;
4238 return align_jumps_log;
4241 /* If we are inside a phony loop, almost any kind of label can turn up as the
4242 first one in the loop. Aligning a braf label causes incorrect switch
4243 destination addresses; we can detect braf labels because they are
4244 followed by a BARRIER.
4245 Applying loop alignment to small constant or switch tables is a waste
4246 of space, so we suppress this too. */
4248 sh_loop_align (rtx label)
4250 rtx next = label;
4253 next = next_nonnote_insn (next);
4254 while (next && GET_CODE (next) == CODE_LABEL);
4256 if (! next
4257 || ! INSN_P (next)
4258 || GET_CODE (PATTERN (next)) == ADDR_DIFF_VEC
4259 || recog_memoized (next) == CODE_FOR_consttable_2)
4260 return 0;
4262 return align_loops_log;
4265 /* Do a final pass over the function, just before delayed branch
4266 scheduling. */
4268 static void
4269 sh_reorg (void)
4271 rtx first, insn, mova = NULL_RTX;
4272 int num_mova;
4273 rtx r0_rtx = gen_rtx_REG (Pmode, 0);
4274 rtx r0_inc_rtx = gen_rtx_POST_INC (Pmode, r0_rtx);
4276 first = get_insns ();
4278 /* We must split call insns before introducing `mova's. If we're
4279 optimizing, they'll have already been split. Otherwise, make
4280 sure we don't split them too late. */
4281 if (! optimize)
4282 split_all_insns_noflow ();
4284 if (TARGET_SHMEDIA)
4285 return;
4287 /* If relaxing, generate pseudo-ops to associate function calls with
4288 the symbols they call. It does no harm to not generate these
4289 pseudo-ops. However, when we can generate them, it enables to
4290 linker to potentially relax the jsr to a bsr, and eliminate the
4291 register load and, possibly, the constant pool entry. */
4293 mdep_reorg_phase = SH_INSERT_USES_LABELS;
4294 if (TARGET_RELAX)
4296 /* Remove all REG_LABEL notes. We want to use them for our own
4297 purposes. This works because none of the remaining passes
4298 need to look at them.
4300 ??? But it may break in the future. We should use a machine
4301 dependent REG_NOTE, or some other approach entirely. */
4302 for (insn = first; insn; insn = NEXT_INSN (insn))
4304 if (INSN_P (insn))
4306 rtx note;
4308 while ((note = find_reg_note (insn, REG_LABEL, NULL_RTX)) != 0)
4309 remove_note (insn, note);
4313 for (insn = first; insn; insn = NEXT_INSN (insn))
4315 rtx pattern, reg, link, set, scan, dies, label;
4316 int rescan = 0, foundinsn = 0;
4318 if (GET_CODE (insn) == CALL_INSN)
4320 pattern = PATTERN (insn);
4322 if (GET_CODE (pattern) == PARALLEL)
4323 pattern = XVECEXP (pattern, 0, 0);
4324 if (GET_CODE (pattern) == SET)
4325 pattern = SET_SRC (pattern);
4327 if (GET_CODE (pattern) != CALL
4328 || GET_CODE (XEXP (pattern, 0)) != MEM)
4329 continue;
4331 reg = XEXP (XEXP (pattern, 0), 0);
4333 else
4335 reg = sfunc_uses_reg (insn);
4336 if (! reg)
4337 continue;
4340 if (GET_CODE (reg) != REG)
4341 continue;
4343 /* This is a function call via REG. If the only uses of REG
4344 between the time that it is set and the time that it dies
4345 are in function calls, then we can associate all the
4346 function calls with the setting of REG. */
4348 for (link = LOG_LINKS (insn); link; link = XEXP (link, 1))
4350 if (REG_NOTE_KIND (link) != 0)
4351 continue;
4352 set = single_set (XEXP (link, 0));
4353 if (set && rtx_equal_p (reg, SET_DEST (set)))
4355 link = XEXP (link, 0);
4356 break;
4360 if (! link)
4362 /* ??? Sometimes global register allocation will have
4363 deleted the insn pointed to by LOG_LINKS. Try
4364 scanning backward to find where the register is set. */
4365 for (scan = PREV_INSN (insn);
4366 scan && GET_CODE (scan) != CODE_LABEL;
4367 scan = PREV_INSN (scan))
4369 if (! INSN_P (scan))
4370 continue;
4372 if (! reg_mentioned_p (reg, scan))
4373 continue;
4375 if (noncall_uses_reg (reg, scan, &set))
4376 break;
4378 if (set)
4380 link = scan;
4381 break;
4386 if (! link)
4387 continue;
4389 /* The register is set at LINK. */
4391 /* We can only optimize the function call if the register is
4392 being set to a symbol. In theory, we could sometimes
4393 optimize calls to a constant location, but the assembler
4394 and linker do not support that at present. */
4395 if (GET_CODE (SET_SRC (set)) != SYMBOL_REF
4396 && GET_CODE (SET_SRC (set)) != LABEL_REF)
4397 continue;
4399 /* Scan forward from LINK to the place where REG dies, and
4400 make sure that the only insns which use REG are
4401 themselves function calls. */
4403 /* ??? This doesn't work for call targets that were allocated
4404 by reload, since there may not be a REG_DEAD note for the
4405 register. */
4407 dies = NULL_RTX;
4408 for (scan = NEXT_INSN (link); scan; scan = NEXT_INSN (scan))
4410 rtx scanset;
4412 /* Don't try to trace forward past a CODE_LABEL if we haven't
4413 seen INSN yet. Ordinarily, we will only find the setting insn
4414 in LOG_LINKS if it is in the same basic block. However,
4415 cross-jumping can insert code labels in between the load and
4416 the call, and can result in situations where a single call
4417 insn may have two targets depending on where we came from. */
4419 if (GET_CODE (scan) == CODE_LABEL && ! foundinsn)
4420 break;
4422 if (! INSN_P (scan))
4423 continue;
4425 /* Don't try to trace forward past a JUMP. To optimize
4426 safely, we would have to check that all the
4427 instructions at the jump destination did not use REG. */
4429 if (GET_CODE (scan) == JUMP_INSN)
4430 break;
4432 if (! reg_mentioned_p (reg, scan))
4433 continue;
4435 if (noncall_uses_reg (reg, scan, &scanset))
4436 break;
4438 if (scan == insn)
4439 foundinsn = 1;
4441 if (scan != insn
4442 && (GET_CODE (scan) == CALL_INSN || sfunc_uses_reg (scan)))
4444 /* There is a function call to this register other
4445 than the one we are checking. If we optimize
4446 this call, we need to rescan again below. */
4447 rescan = 1;
4450 /* ??? We shouldn't have to worry about SCANSET here.
4451 We should just be able to check for a REG_DEAD note
4452 on a function call. However, the REG_DEAD notes are
4453 apparently not dependable around libcalls; c-torture
4454 execute/920501-2 is a test case. If SCANSET is set,
4455 then this insn sets the register, so it must have
4456 died earlier. Unfortunately, this will only handle
4457 the cases in which the register is, in fact, set in a
4458 later insn. */
4460 /* ??? We shouldn't have to use FOUNDINSN here.
4461 However, the LOG_LINKS fields are apparently not
4462 entirely reliable around libcalls;
4463 newlib/libm/math/e_pow.c is a test case. Sometimes
4464 an insn will appear in LOG_LINKS even though it is
4465 not the most recent insn which sets the register. */
4467 if (foundinsn
4468 && (scanset
4469 || find_reg_note (scan, REG_DEAD, reg)))
4471 dies = scan;
4472 break;
4476 if (! dies)
4478 /* Either there was a branch, or some insn used REG
4479 other than as a function call address. */
4480 continue;
4483 /* Create a code label, and put it in a REG_LABEL note on
4484 the insn which sets the register, and on each call insn
4485 which uses the register. In final_prescan_insn we look
4486 for the REG_LABEL notes, and output the appropriate label
4487 or pseudo-op. */
4489 label = gen_label_rtx ();
4490 REG_NOTES (link) = gen_rtx_INSN_LIST (REG_LABEL, label,
4491 REG_NOTES (link));
4492 REG_NOTES (insn) = gen_rtx_INSN_LIST (REG_LABEL, label,
4493 REG_NOTES (insn));
4494 if (rescan)
4496 scan = link;
4499 rtx reg2;
4501 scan = NEXT_INSN (scan);
4502 if (scan != insn
4503 && ((GET_CODE (scan) == CALL_INSN
4504 && reg_mentioned_p (reg, scan))
4505 || ((reg2 = sfunc_uses_reg (scan))
4506 && REGNO (reg2) == REGNO (reg))))
4507 REG_NOTES (scan)
4508 = gen_rtx_INSN_LIST (REG_LABEL, label, REG_NOTES (scan));
4510 while (scan != dies);
4515 if (TARGET_SH2)
4516 fixup_addr_diff_vecs (first);
4518 if (optimize)
4520 mdep_reorg_phase = SH_SHORTEN_BRANCHES0;
4521 shorten_branches (first);
4523 /* Scan the function looking for move instructions which have to be
4524 changed to pc-relative loads and insert the literal tables. */
4526 mdep_reorg_phase = SH_FIXUP_PCLOAD;
4527 for (insn = first, num_mova = 0; insn; insn = NEXT_INSN (insn))
4529 if (mova_p (insn))
4531 /* ??? basic block reordering can move a switch table dispatch
4532 below the switch table. Check if that has happened.
4533 We only have the addresses available when optimizing; but then,
4534 this check shouldn't be needed when not optimizing. */
4535 rtx label_ref = XVECEXP (SET_SRC (PATTERN (insn)), 0, 0);
4536 if (optimize
4537 && (INSN_ADDRESSES (INSN_UID (insn))
4538 > INSN_ADDRESSES (INSN_UID (XEXP (label_ref, 0)))))
4540 /* Change the mova into a load.
4541 broken_move will then return true for it. */
4542 fixup_mova (insn);
4544 else if (! num_mova++)
4545 mova = insn;
4547 else if (GET_CODE (insn) == JUMP_INSN
4548 && GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC
4549 && num_mova)
4551 rtx scan;
4552 int total;
4554 num_mova--;
4556 /* Some code might have been inserted between the mova and
4557 its ADDR_DIFF_VEC. Check if the mova is still in range. */
4558 for (scan = mova, total = 0; scan != insn; scan = NEXT_INSN (scan))
4559 total += get_attr_length (scan);
4561 /* range of mova is 1020, add 4 because pc counts from address of
4562 second instruction after this one, subtract 2 in case pc is 2
4563 byte aligned. Possible alignment needed for the ADDR_DIFF_VEC
4564 cancels out with alignment effects of the mova itself. */
4565 if (total > 1022)
4567 /* Change the mova into a load, and restart scanning
4568 there. broken_move will then return true for mova. */
4569 fixup_mova (mova);
4570 insn = mova;
4573 if (broken_move (insn)
4574 || (GET_CODE (insn) == INSN
4575 && recog_memoized (insn) == CODE_FOR_casesi_worker_2))
4577 rtx scan;
4578 /* Scan ahead looking for a barrier to stick the constant table
4579 behind. */
4580 rtx barrier = find_barrier (num_mova, mova, insn);
4581 rtx last_float_move = NULL_RTX, last_float = 0, *last_float_addr = NULL;
4582 int need_aligned_label = 0;
4584 if (num_mova && ! mova_p (mova))
4586 /* find_barrier had to change the first mova into a
4587 pcload; thus, we have to start with this new pcload. */
4588 insn = mova;
4589 num_mova = 0;
4591 /* Now find all the moves between the points and modify them. */
4592 for (scan = insn; scan != barrier; scan = NEXT_INSN (scan))
4594 if (GET_CODE (scan) == CODE_LABEL)
4595 last_float = 0;
4596 if (GET_CODE (scan) == INSN
4597 && recog_memoized (scan) == CODE_FOR_casesi_worker_2)
4598 need_aligned_label = 1;
4599 if (broken_move (scan))
4601 rtx *patp = &PATTERN (scan), pat = *patp;
4602 rtx src, dst;
4603 rtx lab;
4604 rtx newsrc;
4605 enum machine_mode mode;
4607 if (GET_CODE (pat) == PARALLEL)
4608 patp = &XVECEXP (pat, 0, 0), pat = *patp;
4609 src = SET_SRC (pat);
4610 dst = SET_DEST (pat);
4611 mode = GET_MODE (dst);
4613 if (mode == SImode && hi_const (src)
4614 && REGNO (dst) != FPUL_REG)
4616 int offset = 0;
4618 mode = HImode;
4619 while (GET_CODE (dst) == SUBREG)
4621 offset += subreg_regno_offset (REGNO (SUBREG_REG (dst)),
4622 GET_MODE (SUBREG_REG (dst)),
4623 SUBREG_BYTE (dst),
4624 GET_MODE (dst));
4625 dst = SUBREG_REG (dst);
4627 dst = gen_rtx_REG (HImode, REGNO (dst) + offset);
4629 if (GET_CODE (dst) == REG && FP_ANY_REGISTER_P (REGNO (dst)))
4631 /* This must be an insn that clobbers r0. */
4632 rtx *clobberp = &XVECEXP (PATTERN (scan), 0,
4633 XVECLEN (PATTERN (scan), 0)
4634 - 1);
4635 rtx clobber = *clobberp;
4637 gcc_assert (GET_CODE (clobber) == CLOBBER
4638 && rtx_equal_p (XEXP (clobber, 0), r0_rtx));
4640 if (last_float
4641 && reg_set_between_p (r0_rtx, last_float_move, scan))
4642 last_float = 0;
4643 if (last_float
4644 && TARGET_SHCOMPACT
4645 && GET_MODE_SIZE (mode) != 4
4646 && GET_MODE_SIZE (GET_MODE (last_float)) == 4)
4647 last_float = 0;
4648 lab = add_constant (src, mode, last_float);
4649 if (lab)
4650 emit_insn_before (gen_mova (lab), scan);
4651 else
4653 /* There will be a REG_UNUSED note for r0 on
4654 LAST_FLOAT_MOVE; we have to change it to REG_INC,
4655 lest reorg:mark_target_live_regs will not
4656 consider r0 to be used, and we end up with delay
4657 slot insn in front of SCAN that clobbers r0. */
4658 rtx note
4659 = find_regno_note (last_float_move, REG_UNUSED, 0);
4661 /* If we are not optimizing, then there may not be
4662 a note. */
4663 if (note)
4664 PUT_MODE (note, REG_INC);
4666 *last_float_addr = r0_inc_rtx;
4668 last_float_move = scan;
4669 last_float = src;
4670 newsrc = gen_const_mem (mode,
4671 (((TARGET_SH4 && ! TARGET_FMOVD)
4672 || REGNO (dst) == FPUL_REG)
4673 ? r0_inc_rtx
4674 : r0_rtx));
4675 last_float_addr = &XEXP (newsrc, 0);
4677 /* Remove the clobber of r0. */
4678 *clobberp = gen_rtx_CLOBBER (GET_MODE (clobber),
4679 gen_rtx_SCRATCH (Pmode));
4681 /* This is a mova needing a label. Create it. */
4682 else if (GET_CODE (src) == UNSPEC
4683 && XINT (src, 1) == UNSPEC_MOVA
4684 && GET_CODE (XVECEXP (src, 0, 0)) == CONST)
4686 lab = add_constant (XVECEXP (src, 0, 0), mode, 0);
4687 newsrc = gen_rtx_LABEL_REF (VOIDmode, lab);
4688 newsrc = gen_rtx_UNSPEC (SImode,
4689 gen_rtvec (1, newsrc),
4690 UNSPEC_MOVA);
4692 else
4694 lab = add_constant (src, mode, 0);
4695 newsrc = gen_rtx_LABEL_REF (VOIDmode, lab);
4696 newsrc = gen_const_mem (mode, newsrc);
4698 *patp = gen_rtx_SET (VOIDmode, dst, newsrc);
4699 INSN_CODE (scan) = -1;
4702 dump_table (need_aligned_label ? insn : 0, barrier);
4703 insn = barrier;
4707 mdep_reorg_phase = SH_SHORTEN_BRANCHES1;
4708 INSN_ADDRESSES_FREE ();
4709 split_branches (first);
4711 /* The INSN_REFERENCES_ARE_DELAYED in sh.h is problematic because it
4712 also has an effect on the register that holds the address of the sfunc.
4713 Insert an extra dummy insn in front of each sfunc that pretends to
4714 use this register. */
4715 if (flag_delayed_branch)
4717 for (insn = first; insn; insn = NEXT_INSN (insn))
4719 rtx reg = sfunc_uses_reg (insn);
4721 if (! reg)
4722 continue;
4723 emit_insn_before (gen_use_sfunc_addr (reg), insn);
4726 #if 0
4727 /* fpscr is not actually a user variable, but we pretend it is for the
4728 sake of the previous optimization passes, since we want it handled like
4729 one. However, we don't have any debugging information for it, so turn
4730 it into a non-user variable now. */
4731 if (TARGET_SH4)
4732 REG_USERVAR_P (get_fpscr_rtx ()) = 0;
4733 #endif
4734 mdep_reorg_phase = SH_AFTER_MDEP_REORG;
4738 get_dest_uid (rtx label, int max_uid)
4740 rtx dest = next_real_insn (label);
4741 int dest_uid;
4742 if (! dest)
4743 /* This can happen for an undefined label. */
4744 return 0;
4745 dest_uid = INSN_UID (dest);
4746 /* If this is a newly created branch redirection blocking instruction,
4747 we cannot index the branch_uid or insn_addresses arrays with its
4748 uid. But then, we won't need to, because the actual destination is
4749 the following branch. */
4750 while (dest_uid >= max_uid)
4752 dest = NEXT_INSN (dest);
4753 dest_uid = INSN_UID (dest);
4755 if (GET_CODE (dest) == JUMP_INSN && GET_CODE (PATTERN (dest)) == RETURN)
4756 return 0;
4757 return dest_uid;
4760 /* Split condbranches that are out of range. Also add clobbers for
4761 scratch registers that are needed in far jumps.
4762 We do this before delay slot scheduling, so that it can take our
4763 newly created instructions into account. It also allows us to
4764 find branches with common targets more easily. */
4766 static void
4767 split_branches (rtx first)
4769 rtx insn;
4770 struct far_branch **uid_branch, *far_branch_list = 0;
4771 int max_uid = get_max_uid ();
4772 int ok;
4774 /* Find out which branches are out of range. */
4775 shorten_branches (first);
4777 uid_branch = (struct far_branch **) alloca (max_uid * sizeof *uid_branch);
4778 memset ((char *) uid_branch, 0, max_uid * sizeof *uid_branch);
4780 for (insn = first; insn; insn = NEXT_INSN (insn))
4781 if (! INSN_P (insn))
4782 continue;
4783 else if (INSN_DELETED_P (insn))
4785 /* Shorten_branches would split this instruction again,
4786 so transform it into a note. */
4787 PUT_CODE (insn, NOTE);
4788 NOTE_LINE_NUMBER (insn) = NOTE_INSN_DELETED;
4789 NOTE_SOURCE_FILE (insn) = 0;
4791 else if (GET_CODE (insn) == JUMP_INSN
4792 /* Don't mess with ADDR_DIFF_VEC */
4793 && (GET_CODE (PATTERN (insn)) == SET
4794 || GET_CODE (PATTERN (insn)) == RETURN))
4796 enum attr_type type = get_attr_type (insn);
4797 if (type == TYPE_CBRANCH)
4799 rtx next, beyond;
4801 if (get_attr_length (insn) > 4)
4803 rtx src = SET_SRC (PATTERN (insn));
4804 rtx olabel = XEXP (XEXP (src, 1), 0);
4805 int addr = INSN_ADDRESSES (INSN_UID (insn));
4806 rtx label = 0;
4807 int dest_uid = get_dest_uid (olabel, max_uid);
4808 struct far_branch *bp = uid_branch[dest_uid];
4810 /* redirect_jump needs a valid JUMP_LABEL, and it might delete
4811 the label if the LABEL_NUSES count drops to zero. There is
4812 always a jump_optimize pass that sets these values, but it
4813 proceeds to delete unreferenced code, and then if not
4814 optimizing, to un-delete the deleted instructions, thus
4815 leaving labels with too low uses counts. */
4816 if (! optimize)
4818 JUMP_LABEL (insn) = olabel;
4819 LABEL_NUSES (olabel)++;
4821 if (! bp)
4823 bp = (struct far_branch *) alloca (sizeof *bp);
4824 uid_branch[dest_uid] = bp;
4825 bp->prev = far_branch_list;
4826 far_branch_list = bp;
4827 bp->far_label
4828 = XEXP (XEXP (SET_SRC (PATTERN (insn)), 1), 0);
4829 LABEL_NUSES (bp->far_label)++;
4831 else
4833 label = bp->near_label;
4834 if (! label && bp->address - addr >= CONDJUMP_MIN)
4836 rtx block = bp->insert_place;
4838 if (GET_CODE (PATTERN (block)) == RETURN)
4839 block = PREV_INSN (block);
4840 else
4841 block = gen_block_redirect (block,
4842 bp->address, 2);
4843 label = emit_label_after (gen_label_rtx (),
4844 PREV_INSN (block));
4845 bp->near_label = label;
4847 else if (label && ! NEXT_INSN (label))
4849 if (addr + 2 - bp->address <= CONDJUMP_MAX)
4850 bp->insert_place = insn;
4851 else
4852 gen_far_branch (bp);
4855 if (! label
4856 || (NEXT_INSN (label) && bp->address - addr < CONDJUMP_MIN))
4858 bp->near_label = label = gen_label_rtx ();
4859 bp->insert_place = insn;
4860 bp->address = addr;
4862 ok = redirect_jump (insn, label, 1);
4863 gcc_assert (ok);
4865 else
4867 /* get_attr_length (insn) == 2 */
4868 /* Check if we have a pattern where reorg wants to redirect
4869 the branch to a label from an unconditional branch that
4870 is too far away. */
4871 /* We can't use JUMP_LABEL here because it might be undefined
4872 when not optimizing. */
4873 /* A syntax error might cause beyond to be NULL_RTX. */
4874 beyond
4875 = next_active_insn (XEXP (XEXP (SET_SRC (PATTERN (insn)), 1),
4876 0));
4878 if (beyond
4879 && (GET_CODE (beyond) == JUMP_INSN
4880 || ((beyond = next_active_insn (beyond))
4881 && GET_CODE (beyond) == JUMP_INSN))
4882 && GET_CODE (PATTERN (beyond)) == SET
4883 && recog_memoized (beyond) == CODE_FOR_jump_compact
4884 && ((INSN_ADDRESSES
4885 (INSN_UID (XEXP (SET_SRC (PATTERN (beyond)), 0)))
4886 - INSN_ADDRESSES (INSN_UID (insn)) + (unsigned) 252)
4887 > 252 + 258 + 2))
4888 gen_block_redirect (beyond,
4889 INSN_ADDRESSES (INSN_UID (beyond)), 1);
4892 next = next_active_insn (insn);
4894 if ((GET_CODE (next) == JUMP_INSN
4895 || ((next = next_active_insn (next))
4896 && GET_CODE (next) == JUMP_INSN))
4897 && GET_CODE (PATTERN (next)) == SET
4898 && recog_memoized (next) == CODE_FOR_jump_compact
4899 && ((INSN_ADDRESSES
4900 (INSN_UID (XEXP (SET_SRC (PATTERN (next)), 0)))
4901 - INSN_ADDRESSES (INSN_UID (insn)) + (unsigned) 252)
4902 > 252 + 258 + 2))
4903 gen_block_redirect (next, INSN_ADDRESSES (INSN_UID (next)), 1);
4905 else if (type == TYPE_JUMP || type == TYPE_RETURN)
4907 int addr = INSN_ADDRESSES (INSN_UID (insn));
4908 rtx far_label = 0;
4909 int dest_uid = 0;
4910 struct far_branch *bp;
4912 if (type == TYPE_JUMP)
4914 far_label = XEXP (SET_SRC (PATTERN (insn)), 0);
4915 dest_uid = get_dest_uid (far_label, max_uid);
4916 if (! dest_uid)
4918 /* Parse errors can lead to labels outside
4919 the insn stream. */
4920 if (! NEXT_INSN (far_label))
4921 continue;
4923 if (! optimize)
4925 JUMP_LABEL (insn) = far_label;
4926 LABEL_NUSES (far_label)++;
4928 redirect_jump (insn, NULL_RTX, 1);
4929 far_label = 0;
4932 bp = uid_branch[dest_uid];
4933 if (! bp)
4935 bp = (struct far_branch *) alloca (sizeof *bp);
4936 uid_branch[dest_uid] = bp;
4937 bp->prev = far_branch_list;
4938 far_branch_list = bp;
4939 bp->near_label = 0;
4940 bp->far_label = far_label;
4941 if (far_label)
4942 LABEL_NUSES (far_label)++;
4944 else if (bp->near_label && ! NEXT_INSN (bp->near_label))
4945 if (addr - bp->address <= CONDJUMP_MAX)
4946 emit_label_after (bp->near_label, PREV_INSN (insn));
4947 else
4949 gen_far_branch (bp);
4950 bp->near_label = 0;
4952 else
4953 bp->near_label = 0;
4954 bp->address = addr;
4955 bp->insert_place = insn;
4956 if (! far_label)
4957 emit_insn_before (gen_block_branch_redirect (const0_rtx), insn);
4958 else
4959 gen_block_redirect (insn, addr, bp->near_label ? 2 : 0);
4962 /* Generate all pending far branches,
4963 and free our references to the far labels. */
4964 while (far_branch_list)
4966 if (far_branch_list->near_label
4967 && ! NEXT_INSN (far_branch_list->near_label))
4968 gen_far_branch (far_branch_list);
4969 if (optimize
4970 && far_branch_list->far_label
4971 && ! --LABEL_NUSES (far_branch_list->far_label))
4972 delete_insn (far_branch_list->far_label);
4973 far_branch_list = far_branch_list->prev;
4976 /* Instruction length information is no longer valid due to the new
4977 instructions that have been generated. */
4978 init_insn_lengths ();
4981 /* Dump out instruction addresses, which is useful for debugging the
4982 constant pool table stuff.
4984 If relaxing, output the label and pseudo-ops used to link together
4985 calls and the instruction which set the registers. */
4987 /* ??? The addresses printed by this routine for insns are nonsense for
4988 insns which are inside of a sequence where none of the inner insns have
4989 variable length. This is because the second pass of shorten_branches
4990 does not bother to update them. */
4992 void
4993 final_prescan_insn (rtx insn, rtx *opvec ATTRIBUTE_UNUSED,
4994 int noperands ATTRIBUTE_UNUSED)
4996 if (TARGET_DUMPISIZE)
4997 fprintf (asm_out_file, "\n! at %04x\n", INSN_ADDRESSES (INSN_UID (insn)));
4999 if (TARGET_RELAX)
5001 rtx note;
5003 note = find_reg_note (insn, REG_LABEL, NULL_RTX);
5004 if (note)
5006 rtx pattern;
5008 pattern = PATTERN (insn);
5009 if (GET_CODE (pattern) == PARALLEL)
5010 pattern = XVECEXP (pattern, 0, 0);
5011 switch (GET_CODE (pattern))
5013 case SET:
5014 if (GET_CODE (SET_SRC (pattern)) != CALL
5015 && get_attr_type (insn) != TYPE_SFUNC)
5017 targetm.asm_out.internal_label
5018 (asm_out_file, "L", CODE_LABEL_NUMBER (XEXP (note, 0)));
5019 break;
5021 /* else FALLTHROUGH */
5022 case CALL:
5023 asm_fprintf (asm_out_file, "\t.uses %LL%d\n",
5024 CODE_LABEL_NUMBER (XEXP (note, 0)));
5025 break;
5027 default:
5028 gcc_unreachable ();
5034 /* Dump out any constants accumulated in the final pass. These will
5035 only be labels. */
5037 const char *
5038 output_jump_label_table (void)
5040 int i;
5042 if (pool_size)
5044 fprintf (asm_out_file, "\t.align 2\n");
5045 for (i = 0; i < pool_size; i++)
5047 pool_node *p = &pool_vector[i];
5049 (*targetm.asm_out.internal_label) (asm_out_file, "L",
5050 CODE_LABEL_NUMBER (p->label));
5051 output_asm_insn (".long %O0", &p->value);
5053 pool_size = 0;
5056 return "";
5059 /* A full frame looks like:
5061 arg-5
5062 arg-4
5063 [ if current_function_anonymous_args
5064 arg-3
5065 arg-2
5066 arg-1
5067 arg-0 ]
5068 saved-fp
5069 saved-r10
5070 saved-r11
5071 saved-r12
5072 saved-pr
5073 local-n
5075 local-1
5076 local-0 <- fp points here. */
5078 /* Number of bytes pushed for anonymous args, used to pass information
5079 between expand_prologue and expand_epilogue. */
5081 /* Adjust the stack by SIZE bytes. REG holds the rtl of the register to be
5082 adjusted. If epilogue_p is zero, this is for a prologue; otherwise, it's
5083 for an epilogue and a negative value means that it's for a sibcall
5084 epilogue. If LIVE_REGS_MASK is nonzero, it points to a HARD_REG_SET of
5085 all the registers that are about to be restored, and hence dead. */
5087 static void
5088 output_stack_adjust (int size, rtx reg, int epilogue_p,
5089 HARD_REG_SET *live_regs_mask)
5091 rtx (*emit_fn) (rtx) = epilogue_p ? &emit_insn : &frame_insn;
5092 if (size)
5094 HOST_WIDE_INT align = STACK_BOUNDARY / BITS_PER_UNIT;
5096 /* This test is bogus, as output_stack_adjust is used to re-align the
5097 stack. */
5098 #if 0
5099 gcc_assert (!(size % align));
5100 #endif
5102 if (CONST_OK_FOR_ADD (size))
5103 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size)));
5104 /* Try to do it with two partial adjustments; however, we must make
5105 sure that the stack is properly aligned at all times, in case
5106 an interrupt occurs between the two partial adjustments. */
5107 else if (CONST_OK_FOR_ADD (size / 2 & -align)
5108 && CONST_OK_FOR_ADD (size - (size / 2 & -align)))
5110 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size / 2 & -align)));
5111 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size - (size / 2 & -align))));
5113 else
5115 rtx const_reg;
5116 rtx insn;
5117 int temp = epilogue_p ? 7 : (TARGET_SH5 ? 0 : 1);
5118 int i;
5120 /* If TEMP is invalid, we could temporarily save a general
5121 register to MACL. However, there is currently no need
5122 to handle this case, so just die when we see it. */
5123 if (epilogue_p < 0
5124 || current_function_interrupt
5125 || ! call_really_used_regs[temp] || fixed_regs[temp])
5126 temp = -1;
5127 if (temp < 0 && ! current_function_interrupt
5128 && (TARGET_SHMEDIA || epilogue_p >= 0))
5130 HARD_REG_SET temps;
5131 COPY_HARD_REG_SET (temps, call_used_reg_set);
5132 AND_COMPL_HARD_REG_SET (temps, call_fixed_reg_set);
5133 if (epilogue_p > 0)
5135 int nreg = 0;
5136 if (current_function_return_rtx)
5138 enum machine_mode mode;
5139 mode = GET_MODE (current_function_return_rtx);
5140 if (BASE_RETURN_VALUE_REG (mode) == FIRST_RET_REG)
5141 nreg = HARD_REGNO_NREGS (FIRST_RET_REG, mode);
5143 for (i = 0; i < nreg; i++)
5144 CLEAR_HARD_REG_BIT (temps, FIRST_RET_REG + i);
5145 if (current_function_calls_eh_return)
5147 CLEAR_HARD_REG_BIT (temps, EH_RETURN_STACKADJ_REGNO);
5148 for (i = 0; i <= 3; i++)
5149 CLEAR_HARD_REG_BIT (temps, EH_RETURN_DATA_REGNO (i));
5152 if (TARGET_SHMEDIA && epilogue_p < 0)
5153 for (i = FIRST_TARGET_REG; i <= LAST_TARGET_REG; i++)
5154 CLEAR_HARD_REG_BIT (temps, i);
5155 if (epilogue_p <= 0)
5157 for (i = FIRST_PARM_REG;
5158 i < FIRST_PARM_REG + NPARM_REGS (SImode); i++)
5159 CLEAR_HARD_REG_BIT (temps, i);
5160 if (cfun->static_chain_decl != NULL)
5161 CLEAR_HARD_REG_BIT (temps, STATIC_CHAIN_REGNUM);
5163 temp = scavenge_reg (&temps);
5165 if (temp < 0 && live_regs_mask)
5166 temp = scavenge_reg (live_regs_mask);
5167 if (temp < 0)
5169 rtx adj_reg, tmp_reg, mem;
5171 /* If we reached here, the most likely case is the (sibcall)
5172 epilogue for non SHmedia. Put a special push/pop sequence
5173 for such case as the last resort. This looks lengthy but
5174 would not be problem because it seems to be very
5175 rare. */
5177 gcc_assert (!TARGET_SHMEDIA && epilogue_p);
5180 /* ??? There is still the slight possibility that r4 or
5181 r5 have been reserved as fixed registers or assigned
5182 as global registers, and they change during an
5183 interrupt. There are possible ways to handle this:
5185 - If we are adjusting the frame pointer (r14), we can do
5186 with a single temp register and an ordinary push / pop
5187 on the stack.
5188 - Grab any call-used or call-saved registers (i.e. not
5189 fixed or globals) for the temps we need. We might
5190 also grab r14 if we are adjusting the stack pointer.
5191 If we can't find enough available registers, issue
5192 a diagnostic and die - the user must have reserved
5193 way too many registers.
5194 But since all this is rather unlikely to happen and
5195 would require extra testing, we just die if r4 / r5
5196 are not available. */
5197 gcc_assert (!fixed_regs[4] && !fixed_regs[5]
5198 && !global_regs[4] && !global_regs[5]);
5200 adj_reg = gen_rtx_REG (GET_MODE (reg), 4);
5201 tmp_reg = gen_rtx_REG (GET_MODE (reg), 5);
5202 emit_move_insn (gen_tmp_stack_mem (Pmode, reg), adj_reg);
5203 emit_insn (GEN_MOV (adj_reg, GEN_INT (size)));
5204 emit_insn (GEN_ADD3 (adj_reg, adj_reg, reg));
5205 mem = gen_tmp_stack_mem (Pmode, gen_rtx_PRE_DEC (Pmode, adj_reg));
5206 emit_move_insn (mem, tmp_reg);
5207 emit_move_insn (tmp_reg, gen_tmp_stack_mem (Pmode, reg));
5208 mem = gen_tmp_stack_mem (Pmode, gen_rtx_PRE_DEC (Pmode, adj_reg));
5209 emit_move_insn (mem, tmp_reg);
5210 emit_move_insn (reg, adj_reg);
5211 mem = gen_tmp_stack_mem (Pmode, gen_rtx_POST_INC (Pmode, reg));
5212 emit_move_insn (adj_reg, mem);
5213 mem = gen_tmp_stack_mem (Pmode, gen_rtx_POST_INC (Pmode, reg));
5214 emit_move_insn (tmp_reg, mem);
5215 return;
5217 const_reg = gen_rtx_REG (GET_MODE (reg), temp);
5219 /* If SIZE is negative, subtract the positive value.
5220 This sometimes allows a constant pool entry to be shared
5221 between prologue and epilogue code. */
5222 if (size < 0)
5224 emit_insn (GEN_MOV (const_reg, GEN_INT (-size)));
5225 insn = emit_fn (GEN_SUB3 (reg, reg, const_reg));
5227 else
5229 emit_insn (GEN_MOV (const_reg, GEN_INT (size)));
5230 insn = emit_fn (GEN_ADD3 (reg, reg, const_reg));
5232 if (! epilogue_p)
5233 REG_NOTES (insn)
5234 = (gen_rtx_EXPR_LIST
5235 (REG_FRAME_RELATED_EXPR,
5236 gen_rtx_SET (VOIDmode, reg,
5237 gen_rtx_PLUS (SImode, reg, GEN_INT (size))),
5238 REG_NOTES (insn)));
5243 static rtx
5244 frame_insn (rtx x)
5246 x = emit_insn (x);
5247 RTX_FRAME_RELATED_P (x) = 1;
5248 return x;
5251 /* Output RTL to push register RN onto the stack. */
5253 static rtx
5254 push (int rn)
5256 rtx x;
5257 if (rn == FPUL_REG)
5258 x = gen_push_fpul ();
5259 else if (rn == FPSCR_REG)
5260 x = gen_push_fpscr ();
5261 else if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && ! TARGET_FPU_SINGLE
5262 && FP_OR_XD_REGISTER_P (rn))
5264 if (FP_REGISTER_P (rn) && (rn - FIRST_FP_REG) & 1)
5265 return NULL_RTX;
5266 x = gen_push_4 (gen_rtx_REG (DFmode, rn));
5268 else if (TARGET_SH2E && FP_REGISTER_P (rn))
5269 x = gen_push_e (gen_rtx_REG (SFmode, rn));
5270 else
5271 x = gen_push (gen_rtx_REG (SImode, rn));
5273 x = frame_insn (x);
5274 REG_NOTES (x)
5275 = gen_rtx_EXPR_LIST (REG_INC,
5276 gen_rtx_REG (SImode, STACK_POINTER_REGNUM), 0);
5277 return x;
5280 /* Output RTL to pop register RN from the stack. */
5282 static void
5283 pop (int rn)
5285 rtx x;
5286 if (rn == FPUL_REG)
5287 x = gen_pop_fpul ();
5288 else if (rn == FPSCR_REG)
5289 x = gen_pop_fpscr ();
5290 else if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && ! TARGET_FPU_SINGLE
5291 && FP_OR_XD_REGISTER_P (rn))
5293 if (FP_REGISTER_P (rn) && (rn - FIRST_FP_REG) & 1)
5294 return;
5295 x = gen_pop_4 (gen_rtx_REG (DFmode, rn));
5297 else if (TARGET_SH2E && FP_REGISTER_P (rn))
5298 x = gen_pop_e (gen_rtx_REG (SFmode, rn));
5299 else
5300 x = gen_pop (gen_rtx_REG (SImode, rn));
5302 x = emit_insn (x);
5303 REG_NOTES (x)
5304 = gen_rtx_EXPR_LIST (REG_INC,
5305 gen_rtx_REG (SImode, STACK_POINTER_REGNUM), 0);
5308 /* Generate code to push the regs specified in the mask. */
5310 static void
5311 push_regs (HARD_REG_SET *mask, int interrupt_handler)
5313 int i;
5314 int skip_fpscr = 0;
5316 /* Push PR last; this gives better latencies after the prologue, and
5317 candidates for the return delay slot when there are no general
5318 registers pushed. */
5319 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
5321 /* If this is an interrupt handler, and the SZ bit varies,
5322 and we have to push any floating point register, we need
5323 to switch to the correct precision first. */
5324 if (i == FIRST_FP_REG && interrupt_handler && TARGET_FMOVD
5325 && hard_regs_intersect_p (mask, &reg_class_contents[DF_REGS]))
5327 HARD_REG_SET unsaved;
5329 push (FPSCR_REG);
5330 COMPL_HARD_REG_SET (unsaved, *mask);
5331 fpscr_set_from_mem (NORMAL_MODE (FP_MODE), unsaved);
5332 skip_fpscr = 1;
5334 if (i != PR_REG
5335 && (i != FPSCR_REG || ! skip_fpscr)
5336 && TEST_HARD_REG_BIT (*mask, i))
5337 push (i);
5339 if (TEST_HARD_REG_BIT (*mask, PR_REG))
5340 push (PR_REG);
5343 /* Calculate how much extra space is needed to save all callee-saved
5344 target registers.
5345 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
5347 static int
5348 shmedia_target_regs_stack_space (HARD_REG_SET *live_regs_mask)
5350 int reg;
5351 int stack_space = 0;
5352 int interrupt_handler = sh_cfun_interrupt_handler_p ();
5354 for (reg = LAST_TARGET_REG; reg >= FIRST_TARGET_REG; reg--)
5355 if ((! call_really_used_regs[reg] || interrupt_handler)
5356 && ! TEST_HARD_REG_BIT (*live_regs_mask, reg))
5357 /* Leave space to save this target register on the stack,
5358 in case target register allocation wants to use it. */
5359 stack_space += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
5360 return stack_space;
5363 /* Decide whether we should reserve space for callee-save target registers,
5364 in case target register allocation wants to use them. REGS_SAVED is
5365 the space, in bytes, that is already required for register saves.
5366 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
5368 static int
5369 shmedia_reserve_space_for_target_registers_p (int regs_saved,
5370 HARD_REG_SET *live_regs_mask)
5372 if (optimize_size)
5373 return 0;
5374 return shmedia_target_regs_stack_space (live_regs_mask) <= regs_saved;
5377 /* Decide how much space to reserve for callee-save target registers
5378 in case target register allocation wants to use them.
5379 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
5381 static int
5382 shmedia_target_regs_stack_adjust (HARD_REG_SET *live_regs_mask)
5384 if (shmedia_space_reserved_for_target_registers)
5385 return shmedia_target_regs_stack_space (live_regs_mask);
5386 else
5387 return 0;
5390 /* Work out the registers which need to be saved, both as a mask and a
5391 count of saved words. Return the count.
5393 If doing a pragma interrupt function, then push all regs used by the
5394 function, and if we call another function (we can tell by looking at PR),
5395 make sure that all the regs it clobbers are safe too. */
5397 static int
5398 calc_live_regs (HARD_REG_SET *live_regs_mask)
5400 unsigned int reg;
5401 int count;
5402 int interrupt_handler;
5403 int pr_live, has_call;
5405 interrupt_handler = sh_cfun_interrupt_handler_p ();
5407 CLEAR_HARD_REG_SET (*live_regs_mask);
5408 if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && interrupt_handler
5409 && regs_ever_live[FPSCR_REG])
5410 target_flags &= ~MASK_FPU_SINGLE;
5411 /* If we can save a lot of saves by switching to double mode, do that. */
5412 else if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && TARGET_FPU_SINGLE)
5413 for (count = 0, reg = FIRST_FP_REG; reg <= LAST_FP_REG; reg += 2)
5414 if (regs_ever_live[reg] && regs_ever_live[reg+1]
5415 && (! call_really_used_regs[reg]
5416 || (interrupt_handler && ! pragma_trapa))
5417 && ++count > 2)
5419 target_flags &= ~MASK_FPU_SINGLE;
5420 break;
5422 /* PR_MEDIA_REG is a general purpose register, thus global_alloc already
5423 knows how to use it. That means the pseudo originally allocated for
5424 the initial value can become the PR_MEDIA_REG hard register, as seen for
5425 execute/20010122-1.c:test9. */
5426 if (TARGET_SHMEDIA)
5427 /* ??? this function is called from initial_elimination_offset, hence we
5428 can't use the result of sh_media_register_for_return here. */
5429 pr_live = sh_pr_n_sets ();
5430 else
5432 rtx pr_initial = has_hard_reg_initial_val (Pmode, PR_REG);
5433 pr_live = (pr_initial
5434 ? (GET_CODE (pr_initial) != REG
5435 || REGNO (pr_initial) != (PR_REG))
5436 : regs_ever_live[PR_REG]);
5437 /* For Shcompact, if not optimizing, we end up with a memory reference
5438 using the return address pointer for __builtin_return_address even
5439 though there is no actual need to put the PR register on the stack. */
5440 pr_live |= regs_ever_live[RETURN_ADDRESS_POINTER_REGNUM];
5442 /* Force PR to be live if the prologue has to call the SHmedia
5443 argument decoder or register saver. */
5444 if (TARGET_SHCOMPACT
5445 && ((current_function_args_info.call_cookie
5446 & ~ CALL_COOKIE_RET_TRAMP (1))
5447 || current_function_has_nonlocal_label))
5448 pr_live = 1;
5449 has_call = TARGET_SHMEDIA ? ! leaf_function_p () : pr_live;
5450 for (count = 0, reg = FIRST_PSEUDO_REGISTER; reg-- != 0; )
5452 if (reg == (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG)
5453 ? pr_live
5454 : (interrupt_handler && ! pragma_trapa)
5455 ? (/* Need to save all the regs ever live. */
5456 (regs_ever_live[reg]
5457 || (call_really_used_regs[reg]
5458 && (! fixed_regs[reg] || reg == MACH_REG || reg == MACL_REG
5459 || reg == PIC_OFFSET_TABLE_REGNUM)
5460 && has_call)
5461 || (has_call && REGISTER_NATURAL_MODE (reg) == SImode
5462 && (GENERAL_REGISTER_P (reg) || TARGET_REGISTER_P (reg))))
5463 && reg != STACK_POINTER_REGNUM && reg != ARG_POINTER_REGNUM
5464 && reg != RETURN_ADDRESS_POINTER_REGNUM
5465 && reg != T_REG && reg != GBR_REG
5466 /* Push fpscr only on targets which have FPU */
5467 && (reg != FPSCR_REG || TARGET_FPU_ANY))
5468 : (/* Only push those regs which are used and need to be saved. */
5469 (TARGET_SHCOMPACT
5470 && flag_pic
5471 && current_function_args_info.call_cookie
5472 && reg == PIC_OFFSET_TABLE_REGNUM)
5473 || (regs_ever_live[reg] && ! call_really_used_regs[reg])
5474 || (current_function_calls_eh_return
5475 && (reg == EH_RETURN_DATA_REGNO (0)
5476 || reg == EH_RETURN_DATA_REGNO (1)
5477 || reg == EH_RETURN_DATA_REGNO (2)
5478 || reg == EH_RETURN_DATA_REGNO (3)))
5479 || ((reg == MACL_REG || reg == MACH_REG)
5480 && regs_ever_live[reg]
5481 && sh_cfun_attr_renesas_p ())
5484 SET_HARD_REG_BIT (*live_regs_mask, reg);
5485 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
5487 if ((TARGET_SH4 || TARGET_SH2A_DOUBLE || TARGET_SH5) && TARGET_FMOVD
5488 && GET_MODE_CLASS (REGISTER_NATURAL_MODE (reg)) == MODE_FLOAT)
5490 if (FP_REGISTER_P (reg))
5492 if (! TARGET_FPU_SINGLE && ! regs_ever_live[reg ^ 1])
5494 SET_HARD_REG_BIT (*live_regs_mask, (reg ^ 1));
5495 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg ^ 1));
5498 else if (XD_REGISTER_P (reg))
5500 /* Must switch to double mode to access these registers. */
5501 target_flags &= ~MASK_FPU_SINGLE;
5506 /* If we have a target register optimization pass after prologue / epilogue
5507 threading, we need to assume all target registers will be live even if
5508 they aren't now. */
5509 if (flag_branch_target_load_optimize2
5510 && TARGET_SAVE_ALL_TARGET_REGS
5511 && shmedia_space_reserved_for_target_registers)
5512 for (reg = LAST_TARGET_REG; reg >= FIRST_TARGET_REG; reg--)
5513 if ((! call_really_used_regs[reg] || interrupt_handler)
5514 && ! TEST_HARD_REG_BIT (*live_regs_mask, reg))
5516 SET_HARD_REG_BIT (*live_regs_mask, reg);
5517 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
5519 /* If this is an interrupt handler, we don't have any call-clobbered
5520 registers we can conveniently use for target register save/restore.
5521 Make sure we save at least one general purpose register when we need
5522 to save target registers. */
5523 if (interrupt_handler
5524 && hard_regs_intersect_p (live_regs_mask,
5525 &reg_class_contents[TARGET_REGS])
5526 && ! hard_regs_intersect_p (live_regs_mask,
5527 &reg_class_contents[GENERAL_REGS]))
5529 SET_HARD_REG_BIT (*live_regs_mask, R0_REG);
5530 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (R0_REG));
5533 return count;
5536 /* Code to generate prologue and epilogue sequences */
5538 /* PUSHED is the number of bytes that are being pushed on the
5539 stack for register saves. Return the frame size, padded
5540 appropriately so that the stack stays properly aligned. */
5541 static HOST_WIDE_INT
5542 rounded_frame_size (int pushed)
5544 HOST_WIDE_INT size = get_frame_size ();
5545 HOST_WIDE_INT align = STACK_BOUNDARY / BITS_PER_UNIT;
5547 return ((size + pushed + align - 1) & -align) - pushed;
5550 /* Choose a call-clobbered target-branch register that remains
5551 unchanged along the whole function. We set it up as the return
5552 value in the prologue. */
5554 sh_media_register_for_return (void)
5556 int regno;
5557 int tr0_used;
5559 if (! current_function_is_leaf)
5560 return -1;
5561 if (lookup_attribute ("interrupt_handler",
5562 DECL_ATTRIBUTES (current_function_decl)))
5563 return -1;
5564 if (sh_cfun_interrupt_handler_p ())
5565 return -1;
5567 tr0_used = flag_pic && regs_ever_live[PIC_OFFSET_TABLE_REGNUM];
5569 for (regno = FIRST_TARGET_REG + tr0_used; regno <= LAST_TARGET_REG; regno++)
5570 if (call_really_used_regs[regno] && ! regs_ever_live[regno])
5571 return regno;
5573 return -1;
5576 /* The maximum registers we need to save are:
5577 - 62 general purpose registers (r15 is stack pointer, r63 is zero)
5578 - 32 floating point registers (for each pair, we save none,
5579 one single precision value, or a double precision value).
5580 - 8 target registers
5581 - add 1 entry for a delimiter. */
5582 #define MAX_SAVED_REGS (62+32+8)
5584 typedef struct save_entry_s
5586 unsigned char reg;
5587 unsigned char mode;
5588 short offset;
5589 } save_entry;
5591 #define MAX_TEMPS 4
5593 /* There will be a delimiter entry with VOIDmode both at the start and the
5594 end of a filled in schedule. The end delimiter has the offset of the
5595 save with the smallest (i.e. most negative) offset. */
5596 typedef struct save_schedule_s
5598 save_entry entries[MAX_SAVED_REGS + 2];
5599 int temps[MAX_TEMPS+1];
5600 } save_schedule;
5602 /* Fill in SCHEDULE according to LIVE_REGS_MASK. If RESTORE is nonzero,
5603 use reverse order. Returns the last entry written to (not counting
5604 the delimiter). OFFSET_BASE is a number to be added to all offset
5605 entries. */
5607 static save_entry *
5608 sh5_schedule_saves (HARD_REG_SET *live_regs_mask, save_schedule *schedule,
5609 int offset_base)
5611 int align, i;
5612 save_entry *entry = schedule->entries;
5613 int tmpx = 0;
5614 int offset;
5616 if (! current_function_interrupt)
5617 for (i = FIRST_GENERAL_REG; tmpx < MAX_TEMPS && i <= LAST_GENERAL_REG; i++)
5618 if (call_really_used_regs[i] && ! fixed_regs[i] && i != PR_MEDIA_REG
5619 && ! FUNCTION_ARG_REGNO_P (i)
5620 && i != FIRST_RET_REG
5621 && ! (cfun->static_chain_decl != NULL && i == STATIC_CHAIN_REGNUM)
5622 && ! (current_function_calls_eh_return
5623 && (i == EH_RETURN_STACKADJ_REGNO
5624 || ((unsigned) i >= EH_RETURN_DATA_REGNO (0)
5625 && (unsigned) i <= EH_RETURN_DATA_REGNO (3)))))
5626 schedule->temps[tmpx++] = i;
5627 entry->reg = -1;
5628 entry->mode = VOIDmode;
5629 entry->offset = offset_base;
5630 entry++;
5631 /* We loop twice: first, we save 8-byte aligned registers in the
5632 higher addresses, that are known to be aligned. Then, we
5633 proceed to saving 32-bit registers that don't need 8-byte
5634 alignment.
5635 If this is an interrupt function, all registers that need saving
5636 need to be saved in full. moreover, we need to postpone saving
5637 target registers till we have saved some general purpose registers
5638 we can then use as scratch registers. */
5639 offset = offset_base;
5640 for (align = 1; align >= 0; align--)
5642 for (i = FIRST_PSEUDO_REGISTER - 1; i >= 0; i--)
5643 if (TEST_HARD_REG_BIT (*live_regs_mask, i))
5645 enum machine_mode mode = REGISTER_NATURAL_MODE (i);
5646 int reg = i;
5648 if (current_function_interrupt)
5650 if (TARGET_REGISTER_P (i))
5651 continue;
5652 if (GENERAL_REGISTER_P (i))
5653 mode = DImode;
5655 if (mode == SFmode && (i % 2) == 1
5656 && ! TARGET_FPU_SINGLE && FP_REGISTER_P (i)
5657 && (TEST_HARD_REG_BIT (*live_regs_mask, (i ^ 1))))
5659 mode = DFmode;
5660 i--;
5661 reg--;
5664 /* If we're doing the aligned pass and this is not aligned,
5665 or we're doing the unaligned pass and this is aligned,
5666 skip it. */
5667 if ((GET_MODE_SIZE (mode) % (STACK_BOUNDARY / BITS_PER_UNIT) == 0)
5668 != align)
5669 continue;
5671 if (current_function_interrupt
5672 && GENERAL_REGISTER_P (i)
5673 && tmpx < MAX_TEMPS)
5674 schedule->temps[tmpx++] = i;
5676 offset -= GET_MODE_SIZE (mode);
5677 entry->reg = i;
5678 entry->mode = mode;
5679 entry->offset = offset;
5680 entry++;
5682 if (align && current_function_interrupt)
5683 for (i = LAST_TARGET_REG; i >= FIRST_TARGET_REG; i--)
5684 if (TEST_HARD_REG_BIT (*live_regs_mask, i))
5686 offset -= GET_MODE_SIZE (DImode);
5687 entry->reg = i;
5688 entry->mode = DImode;
5689 entry->offset = offset;
5690 entry++;
5693 entry->reg = -1;
5694 entry->mode = VOIDmode;
5695 entry->offset = offset;
5696 schedule->temps[tmpx] = -1;
5697 return entry - 1;
5700 void
5701 sh_expand_prologue (void)
5703 HARD_REG_SET live_regs_mask;
5704 int d, i;
5705 int d_rounding = 0;
5706 int save_flags = target_flags;
5707 int pretend_args;
5709 current_function_interrupt = sh_cfun_interrupt_handler_p ();
5711 /* We have pretend args if we had an object sent partially in registers
5712 and partially on the stack, e.g. a large structure. */
5713 pretend_args = current_function_pretend_args_size;
5714 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl)
5715 && (NPARM_REGS(SImode)
5716 > current_function_args_info.arg_count[(int) SH_ARG_INT]))
5717 pretend_args = 0;
5718 output_stack_adjust (-pretend_args
5719 - current_function_args_info.stack_regs * 8,
5720 stack_pointer_rtx, 0, NULL);
5722 if (TARGET_SHCOMPACT && flag_pic && current_function_args_info.call_cookie)
5723 /* We're going to use the PIC register to load the address of the
5724 incoming-argument decoder and/or of the return trampoline from
5725 the GOT, so make sure the PIC register is preserved and
5726 initialized. */
5727 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
5729 if (TARGET_SHCOMPACT
5730 && (current_function_args_info.call_cookie & ~ CALL_COOKIE_RET_TRAMP(1)))
5732 int reg;
5734 /* First, make all registers with incoming arguments that will
5735 be pushed onto the stack live, so that register renaming
5736 doesn't overwrite them. */
5737 for (reg = 0; reg < NPARM_REGS (SImode); reg++)
5738 if (CALL_COOKIE_STACKSEQ_GET (current_function_args_info.call_cookie)
5739 >= NPARM_REGS (SImode) - reg)
5740 for (; reg < NPARM_REGS (SImode); reg++)
5741 emit_insn (gen_shcompact_preserve_incoming_args
5742 (gen_rtx_REG (SImode, FIRST_PARM_REG + reg)));
5743 else if (CALL_COOKIE_INT_REG_GET
5744 (current_function_args_info.call_cookie, reg) == 1)
5745 emit_insn (gen_shcompact_preserve_incoming_args
5746 (gen_rtx_REG (SImode, FIRST_PARM_REG + reg)));
5748 emit_move_insn (gen_rtx_REG (Pmode, MACL_REG),
5749 stack_pointer_rtx);
5750 emit_move_insn (gen_rtx_REG (SImode, R0_REG),
5751 GEN_INT (current_function_args_info.call_cookie));
5752 emit_move_insn (gen_rtx_REG (SImode, MACH_REG),
5753 gen_rtx_REG (SImode, R0_REG));
5755 else if (TARGET_SHMEDIA)
5757 int tr = sh_media_register_for_return ();
5759 if (tr >= 0)
5761 rtx insn = emit_move_insn (gen_rtx_REG (DImode, tr),
5762 gen_rtx_REG (DImode, PR_MEDIA_REG));
5764 /* ??? We should suppress saving pr when we don't need it, but this
5765 is tricky because of builtin_return_address. */
5767 /* If this function only exits with sibcalls, this copy
5768 will be flagged as dead. */
5769 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD,
5770 const0_rtx,
5771 REG_NOTES (insn));
5775 /* Emit the code for SETUP_VARARGS. */
5776 if (current_function_stdarg)
5778 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl))
5780 /* Push arg regs as if they'd been provided by caller in stack. */
5781 for (i = 0; i < NPARM_REGS(SImode); i++)
5783 int rn = NPARM_REGS(SImode) + FIRST_PARM_REG - i - 1;
5784 rtx insn;
5786 if (i >= (NPARM_REGS(SImode)
5787 - current_function_args_info.arg_count[(int) SH_ARG_INT]
5789 break;
5790 insn = push (rn);
5791 RTX_FRAME_RELATED_P (insn) = 0;
5796 /* If we're supposed to switch stacks at function entry, do so now. */
5797 if (sp_switch)
5798 emit_insn (gen_sp_switch_1 ());
5800 d = calc_live_regs (&live_regs_mask);
5801 /* ??? Maybe we could save some switching if we can move a mode switch
5802 that already happens to be at the function start into the prologue. */
5803 if (target_flags != save_flags && ! current_function_interrupt)
5804 emit_insn (gen_toggle_sz ());
5806 if (TARGET_SH5)
5808 int offset_base, offset;
5809 rtx r0 = NULL_RTX;
5810 int offset_in_r0 = -1;
5811 int sp_in_r0 = 0;
5812 int tregs_space = shmedia_target_regs_stack_adjust (&live_regs_mask);
5813 int total_size, save_size;
5814 save_schedule schedule;
5815 save_entry *entry;
5816 int *tmp_pnt;
5818 if (call_really_used_regs[R0_REG] && ! fixed_regs[R0_REG]
5819 && ! current_function_interrupt)
5820 r0 = gen_rtx_REG (Pmode, R0_REG);
5822 /* D is the actual number of bytes that we need for saving registers,
5823 however, in initial_elimination_offset we have committed to using
5824 an additional TREGS_SPACE amount of bytes - in order to keep both
5825 addresses to arguments supplied by the caller and local variables
5826 valid, we must keep this gap. Place it between the incoming
5827 arguments and the actually saved registers in a bid to optimize
5828 locality of reference. */
5829 total_size = d + tregs_space;
5830 total_size += rounded_frame_size (total_size);
5831 save_size = total_size - rounded_frame_size (d);
5832 if (save_size % (STACK_BOUNDARY / BITS_PER_UNIT))
5833 d_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
5834 - save_size % (STACK_BOUNDARY / BITS_PER_UNIT));
5836 /* If adjusting the stack in a single step costs nothing extra, do so.
5837 I.e. either if a single addi is enough, or we need a movi anyway,
5838 and we don't exceed the maximum offset range (the test for the
5839 latter is conservative for simplicity). */
5840 if (TARGET_SHMEDIA
5841 && (CONST_OK_FOR_I10 (-total_size)
5842 || (! CONST_OK_FOR_I10 (-(save_size + d_rounding))
5843 && total_size <= 2044)))
5844 d_rounding = total_size - save_size;
5846 offset_base = d + d_rounding;
5848 output_stack_adjust (-(save_size + d_rounding), stack_pointer_rtx,
5849 0, NULL);
5851 sh5_schedule_saves (&live_regs_mask, &schedule, offset_base);
5852 tmp_pnt = schedule.temps;
5853 for (entry = &schedule.entries[1]; entry->mode != VOIDmode; entry++)
5855 enum machine_mode mode = entry->mode;
5856 unsigned int reg = entry->reg;
5857 rtx reg_rtx, mem_rtx, pre_dec = NULL_RTX;
5858 rtx orig_reg_rtx;
5860 offset = entry->offset;
5862 reg_rtx = gen_rtx_REG (mode, reg);
5864 mem_rtx = gen_frame_mem (mode,
5865 gen_rtx_PLUS (Pmode,
5866 stack_pointer_rtx,
5867 GEN_INT (offset)));
5869 GO_IF_LEGITIMATE_ADDRESS (mode, XEXP (mem_rtx, 0), try_pre_dec);
5871 gcc_assert (r0);
5872 mem_rtx = NULL_RTX;
5874 try_pre_dec:
5876 if (HAVE_PRE_DECREMENT
5877 && (offset_in_r0 - offset == GET_MODE_SIZE (mode)
5878 || mem_rtx == NULL_RTX
5879 || reg == PR_REG || SPECIAL_REGISTER_P (reg)))
5881 pre_dec = gen_frame_mem (mode, gen_rtx_PRE_DEC (Pmode, r0));
5883 GO_IF_LEGITIMATE_ADDRESS (mode, XEXP (pre_dec, 0),
5884 pre_dec_ok);
5886 pre_dec = NULL_RTX;
5888 break;
5890 pre_dec_ok:
5891 mem_rtx = NULL_RTX;
5892 offset += GET_MODE_SIZE (mode);
5894 while (0);
5896 if (mem_rtx != NULL_RTX)
5897 goto addr_ok;
5899 if (offset_in_r0 == -1)
5901 emit_move_insn (r0, GEN_INT (offset));
5902 offset_in_r0 = offset;
5904 else if (offset != offset_in_r0)
5906 emit_move_insn (r0,
5907 gen_rtx_PLUS
5908 (Pmode, r0,
5909 GEN_INT (offset - offset_in_r0)));
5910 offset_in_r0 += offset - offset_in_r0;
5913 if (pre_dec != NULL_RTX)
5915 if (! sp_in_r0)
5917 emit_move_insn (r0,
5918 gen_rtx_PLUS
5919 (Pmode, r0, stack_pointer_rtx));
5920 sp_in_r0 = 1;
5923 offset -= GET_MODE_SIZE (mode);
5924 offset_in_r0 -= GET_MODE_SIZE (mode);
5926 mem_rtx = pre_dec;
5928 else if (sp_in_r0)
5929 mem_rtx = gen_frame_mem (mode, r0);
5930 else
5931 mem_rtx = gen_frame_mem (mode,
5932 gen_rtx_PLUS (Pmode,
5933 stack_pointer_rtx,
5934 r0));
5936 /* We must not use an r0-based address for target-branch
5937 registers or for special registers without pre-dec
5938 memory addresses, since we store their values in r0
5939 first. */
5940 gcc_assert (!TARGET_REGISTER_P (reg)
5941 && ((reg != PR_REG && !SPECIAL_REGISTER_P (reg))
5942 || mem_rtx == pre_dec));
5944 addr_ok:
5945 orig_reg_rtx = reg_rtx;
5946 if (TARGET_REGISTER_P (reg)
5947 || ((reg == PR_REG || SPECIAL_REGISTER_P (reg))
5948 && mem_rtx != pre_dec))
5950 rtx tmp_reg = gen_rtx_REG (GET_MODE (reg_rtx), *tmp_pnt);
5952 emit_move_insn (tmp_reg, reg_rtx);
5954 if (REGNO (tmp_reg) == R0_REG)
5956 offset_in_r0 = -1;
5957 sp_in_r0 = 0;
5958 gcc_assert (!refers_to_regno_p
5959 (R0_REG, R0_REG+1, mem_rtx, (rtx *) 0));
5962 if (*++tmp_pnt <= 0)
5963 tmp_pnt = schedule.temps;
5965 reg_rtx = tmp_reg;
5968 rtx insn;
5970 /* Mark as interesting for dwarf cfi generator */
5971 insn = emit_move_insn (mem_rtx, reg_rtx);
5972 RTX_FRAME_RELATED_P (insn) = 1;
5973 /* If we use an intermediate register for the save, we can't
5974 describe this exactly in cfi as a copy of the to-be-saved
5975 register into the temporary register and then the temporary
5976 register on the stack, because the temporary register can
5977 have a different natural size than the to-be-saved register.
5978 Thus, we gloss over the intermediate copy and pretend we do
5979 a direct save from the to-be-saved register. */
5980 if (REGNO (reg_rtx) != reg)
5982 rtx set, note_rtx;
5984 set = gen_rtx_SET (VOIDmode, mem_rtx, orig_reg_rtx);
5985 note_rtx = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, set,
5986 REG_NOTES (insn));
5987 REG_NOTES (insn) = note_rtx;
5990 if (TARGET_SHCOMPACT && (offset_in_r0 != -1))
5992 rtx reg_rtx = gen_rtx_REG (mode, reg);
5993 rtx set, note_rtx;
5994 rtx mem_rtx = gen_frame_mem (mode,
5995 gen_rtx_PLUS (Pmode,
5996 stack_pointer_rtx,
5997 GEN_INT (offset)));
5999 set = gen_rtx_SET (VOIDmode, mem_rtx, reg_rtx);
6000 note_rtx = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, set,
6001 REG_NOTES (insn));
6002 REG_NOTES (insn) = note_rtx;
6007 gcc_assert (entry->offset == d_rounding);
6009 else
6010 push_regs (&live_regs_mask, current_function_interrupt);
6012 if (flag_pic && regs_ever_live[PIC_OFFSET_TABLE_REGNUM])
6014 rtx insn = get_last_insn ();
6015 rtx last = emit_insn (gen_GOTaddr2picreg ());
6017 /* Mark these insns as possibly dead. Sometimes, flow2 may
6018 delete all uses of the PIC register. In this case, let it
6019 delete the initialization too. */
6022 insn = NEXT_INSN (insn);
6024 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD,
6025 const0_rtx,
6026 REG_NOTES (insn));
6028 while (insn != last);
6031 if (SHMEDIA_REGS_STACK_ADJUST ())
6033 /* This must NOT go through the PLT, otherwise mach and macl
6034 may be clobbered. */
6035 function_symbol (gen_rtx_REG (Pmode, R0_REG),
6036 (TARGET_FPU_ANY
6037 ? "__GCC_push_shmedia_regs"
6038 : "__GCC_push_shmedia_regs_nofpu"), SFUNC_GOT);
6039 emit_insn (gen_shmedia_save_restore_regs_compact
6040 (GEN_INT (-SHMEDIA_REGS_STACK_ADJUST ())));
6043 if (target_flags != save_flags && ! current_function_interrupt)
6045 rtx insn = emit_insn (gen_toggle_sz ());
6047 /* If we're lucky, a mode switch in the function body will
6048 overwrite fpscr, turning this insn dead. Tell flow this
6049 insn is ok to delete. */
6050 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD,
6051 const0_rtx,
6052 REG_NOTES (insn));
6055 target_flags = save_flags;
6057 output_stack_adjust (-rounded_frame_size (d) + d_rounding,
6058 stack_pointer_rtx, 0, NULL);
6060 if (frame_pointer_needed)
6061 frame_insn (GEN_MOV (hard_frame_pointer_rtx, stack_pointer_rtx));
6063 if (TARGET_SHCOMPACT
6064 && (current_function_args_info.call_cookie & ~ CALL_COOKIE_RET_TRAMP(1)))
6066 /* This must NOT go through the PLT, otherwise mach and macl
6067 may be clobbered. */
6068 function_symbol (gen_rtx_REG (Pmode, R0_REG),
6069 "__GCC_shcompact_incoming_args", SFUNC_GOT);
6070 emit_insn (gen_shcompact_incoming_args ());
6074 void
6075 sh_expand_epilogue (bool sibcall_p)
6077 HARD_REG_SET live_regs_mask;
6078 int d, i;
6079 int d_rounding = 0;
6081 int save_flags = target_flags;
6082 int frame_size, save_size;
6083 int fpscr_deferred = 0;
6084 int e = sibcall_p ? -1 : 1;
6086 d = calc_live_regs (&live_regs_mask);
6088 save_size = d;
6089 frame_size = rounded_frame_size (d);
6091 if (TARGET_SH5)
6093 int tregs_space = shmedia_target_regs_stack_adjust (&live_regs_mask);
6094 int total_size;
6095 if (d % (STACK_BOUNDARY / BITS_PER_UNIT))
6096 d_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
6097 - d % (STACK_BOUNDARY / BITS_PER_UNIT));
6099 total_size = d + tregs_space;
6100 total_size += rounded_frame_size (total_size);
6101 save_size = total_size - frame_size;
6103 /* If adjusting the stack in a single step costs nothing extra, do so.
6104 I.e. either if a single addi is enough, or we need a movi anyway,
6105 and we don't exceed the maximum offset range (the test for the
6106 latter is conservative for simplicity). */
6107 if (TARGET_SHMEDIA
6108 && ! frame_pointer_needed
6109 && (CONST_OK_FOR_I10 (total_size)
6110 || (! CONST_OK_FOR_I10 (save_size + d_rounding)
6111 && total_size <= 2044)))
6112 d_rounding = frame_size;
6114 frame_size -= d_rounding;
6117 if (frame_pointer_needed)
6119 /* We must avoid scheduling the epilogue with previous basic blocks
6120 when exception handling is enabled. See PR/18032. */
6121 if (flag_exceptions)
6122 emit_insn (gen_blockage ());
6123 output_stack_adjust (frame_size, hard_frame_pointer_rtx, e,
6124 &live_regs_mask);
6126 /* We must avoid moving the stack pointer adjustment past code
6127 which reads from the local frame, else an interrupt could
6128 occur after the SP adjustment and clobber data in the local
6129 frame. */
6130 emit_insn (gen_blockage ());
6131 emit_insn (GEN_MOV (stack_pointer_rtx, hard_frame_pointer_rtx));
6133 else if (frame_size)
6135 /* We must avoid moving the stack pointer adjustment past code
6136 which reads from the local frame, else an interrupt could
6137 occur after the SP adjustment and clobber data in the local
6138 frame. */
6139 emit_insn (gen_blockage ());
6140 output_stack_adjust (frame_size, stack_pointer_rtx, e, &live_regs_mask);
6143 if (SHMEDIA_REGS_STACK_ADJUST ())
6145 function_symbol (gen_rtx_REG (Pmode, R0_REG),
6146 (TARGET_FPU_ANY
6147 ? "__GCC_pop_shmedia_regs"
6148 : "__GCC_pop_shmedia_regs_nofpu"), SFUNC_GOT);
6149 /* This must NOT go through the PLT, otherwise mach and macl
6150 may be clobbered. */
6151 emit_insn (gen_shmedia_save_restore_regs_compact
6152 (GEN_INT (SHMEDIA_REGS_STACK_ADJUST ())));
6155 /* Pop all the registers. */
6157 if (target_flags != save_flags && ! current_function_interrupt)
6158 emit_insn (gen_toggle_sz ());
6159 if (TARGET_SH5)
6161 int offset_base, offset;
6162 int offset_in_r0 = -1;
6163 int sp_in_r0 = 0;
6164 rtx r0 = gen_rtx_REG (Pmode, R0_REG);
6165 save_schedule schedule;
6166 save_entry *entry;
6167 int *tmp_pnt;
6169 entry = sh5_schedule_saves (&live_regs_mask, &schedule, d_rounding);
6170 offset_base = -entry[1].offset + d_rounding;
6171 tmp_pnt = schedule.temps;
6172 for (; entry->mode != VOIDmode; entry--)
6174 enum machine_mode mode = entry->mode;
6175 int reg = entry->reg;
6176 rtx reg_rtx, mem_rtx, post_inc = NULL_RTX, insn;
6178 offset = offset_base + entry->offset;
6179 reg_rtx = gen_rtx_REG (mode, reg);
6181 mem_rtx = gen_frame_mem (mode,
6182 gen_rtx_PLUS (Pmode,
6183 stack_pointer_rtx,
6184 GEN_INT (offset)));
6186 GO_IF_LEGITIMATE_ADDRESS (mode, XEXP (mem_rtx, 0), try_post_inc);
6188 mem_rtx = NULL_RTX;
6190 try_post_inc:
6192 if (HAVE_POST_INCREMENT
6193 && (offset == offset_in_r0
6194 || (offset + GET_MODE_SIZE (mode) != d + d_rounding
6195 && mem_rtx == NULL_RTX)
6196 || reg == PR_REG || SPECIAL_REGISTER_P (reg)))
6198 post_inc = gen_frame_mem (mode, gen_rtx_POST_INC (Pmode, r0));
6200 GO_IF_LEGITIMATE_ADDRESS (mode, XEXP (post_inc, 0),
6201 post_inc_ok);
6203 post_inc = NULL_RTX;
6205 break;
6207 post_inc_ok:
6208 mem_rtx = NULL_RTX;
6210 while (0);
6212 if (mem_rtx != NULL_RTX)
6213 goto addr_ok;
6215 if (offset_in_r0 == -1)
6217 emit_move_insn (r0, GEN_INT (offset));
6218 offset_in_r0 = offset;
6220 else if (offset != offset_in_r0)
6222 emit_move_insn (r0,
6223 gen_rtx_PLUS
6224 (Pmode, r0,
6225 GEN_INT (offset - offset_in_r0)));
6226 offset_in_r0 += offset - offset_in_r0;
6229 if (post_inc != NULL_RTX)
6231 if (! sp_in_r0)
6233 emit_move_insn (r0,
6234 gen_rtx_PLUS
6235 (Pmode, r0, stack_pointer_rtx));
6236 sp_in_r0 = 1;
6239 mem_rtx = post_inc;
6241 offset_in_r0 += GET_MODE_SIZE (mode);
6243 else if (sp_in_r0)
6244 mem_rtx = gen_frame_mem (mode, r0);
6245 else
6246 mem_rtx = gen_frame_mem (mode,
6247 gen_rtx_PLUS (Pmode,
6248 stack_pointer_rtx,
6249 r0));
6251 gcc_assert ((reg != PR_REG && !SPECIAL_REGISTER_P (reg))
6252 || mem_rtx == post_inc);
6254 addr_ok:
6255 if ((reg == PR_REG || SPECIAL_REGISTER_P (reg))
6256 && mem_rtx != post_inc)
6258 insn = emit_move_insn (r0, mem_rtx);
6259 mem_rtx = r0;
6261 else if (TARGET_REGISTER_P (reg))
6263 rtx tmp_reg = gen_rtx_REG (mode, *tmp_pnt);
6265 /* Give the scheduler a bit of freedom by using up to
6266 MAX_TEMPS registers in a round-robin fashion. */
6267 insn = emit_move_insn (tmp_reg, mem_rtx);
6268 mem_rtx = tmp_reg;
6269 if (*++tmp_pnt < 0)
6270 tmp_pnt = schedule.temps;
6273 insn = emit_move_insn (reg_rtx, mem_rtx);
6274 if (reg == PR_MEDIA_REG && sh_media_register_for_return () >= 0)
6275 /* This is dead, unless we return with a sibcall. */
6276 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD,
6277 const0_rtx,
6278 REG_NOTES (insn));
6281 gcc_assert (entry->offset + offset_base == d + d_rounding);
6283 else /* ! TARGET_SH5 */
6285 save_size = 0;
6286 if (TEST_HARD_REG_BIT (live_regs_mask, PR_REG))
6287 pop (PR_REG);
6288 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
6290 int j = (FIRST_PSEUDO_REGISTER - 1) - i;
6292 if (j == FPSCR_REG && current_function_interrupt && TARGET_FMOVD
6293 && hard_regs_intersect_p (&live_regs_mask,
6294 &reg_class_contents[DF_REGS]))
6295 fpscr_deferred = 1;
6296 else if (j != PR_REG && TEST_HARD_REG_BIT (live_regs_mask, j))
6297 pop (j);
6298 if (j == FIRST_FP_REG && fpscr_deferred)
6299 pop (FPSCR_REG);
6303 if (target_flags != save_flags && ! current_function_interrupt)
6304 emit_insn (gen_toggle_sz ());
6305 target_flags = save_flags;
6307 output_stack_adjust (current_function_pretend_args_size
6308 + save_size + d_rounding
6309 + current_function_args_info.stack_regs * 8,
6310 stack_pointer_rtx, e, NULL);
6312 if (current_function_calls_eh_return)
6313 emit_insn (GEN_ADD3 (stack_pointer_rtx, stack_pointer_rtx,
6314 EH_RETURN_STACKADJ_RTX));
6316 /* Switch back to the normal stack if necessary. */
6317 if (sp_switch)
6318 emit_insn (gen_sp_switch_2 ());
6320 /* Tell flow the insn that pops PR isn't dead. */
6321 /* PR_REG will never be live in SHmedia mode, and we don't need to
6322 USE PR_MEDIA_REG, since it will be explicitly copied to TR0_REG
6323 by the return pattern. */
6324 if (TEST_HARD_REG_BIT (live_regs_mask, PR_REG))
6325 emit_insn (gen_rtx_USE (VOIDmode, gen_rtx_REG (SImode, PR_REG)));
6328 static int sh_need_epilogue_known = 0;
6331 sh_need_epilogue (void)
6333 if (! sh_need_epilogue_known)
6335 rtx epilogue;
6337 start_sequence ();
6338 sh_expand_epilogue (0);
6339 epilogue = get_insns ();
6340 end_sequence ();
6341 sh_need_epilogue_known = (epilogue == NULL ? -1 : 1);
6343 return sh_need_epilogue_known > 0;
6346 /* Emit code to change the current function's return address to RA.
6347 TEMP is available as a scratch register, if needed. */
6349 void
6350 sh_set_return_address (rtx ra, rtx tmp)
6352 HARD_REG_SET live_regs_mask;
6353 int d;
6354 int pr_reg = TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG;
6355 int pr_offset;
6357 d = calc_live_regs (&live_regs_mask);
6359 /* If pr_reg isn't life, we can set it (or the register given in
6360 sh_media_register_for_return) directly. */
6361 if (! TEST_HARD_REG_BIT (live_regs_mask, pr_reg))
6363 rtx rr;
6365 if (TARGET_SHMEDIA)
6367 int rr_regno = sh_media_register_for_return ();
6369 if (rr_regno < 0)
6370 rr_regno = pr_reg;
6372 rr = gen_rtx_REG (DImode, rr_regno);
6374 else
6375 rr = gen_rtx_REG (SImode, pr_reg);
6377 emit_insn (GEN_MOV (rr, ra));
6378 /* Tell flow the register for return isn't dead. */
6379 emit_insn (gen_rtx_USE (VOIDmode, rr));
6380 return;
6383 if (TARGET_SH5)
6385 int offset;
6386 save_schedule schedule;
6387 save_entry *entry;
6389 entry = sh5_schedule_saves (&live_regs_mask, &schedule, 0);
6390 offset = entry[1].offset;
6391 for (; entry->mode != VOIDmode; entry--)
6392 if (entry->reg == pr_reg)
6393 goto found;
6395 /* We can't find pr register. */
6396 gcc_unreachable ();
6398 found:
6399 offset = entry->offset - offset;
6400 pr_offset = (rounded_frame_size (d) + offset
6401 + SHMEDIA_REGS_STACK_ADJUST ());
6403 else
6404 pr_offset = rounded_frame_size (d);
6406 emit_insn (GEN_MOV (tmp, GEN_INT (pr_offset)));
6407 emit_insn (GEN_ADD3 (tmp, tmp, hard_frame_pointer_rtx));
6409 tmp = gen_frame_mem (Pmode, tmp);
6410 emit_insn (GEN_MOV (tmp, ra));
6413 /* Clear variables at function end. */
6415 static void
6416 sh_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
6417 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
6419 trap_exit = pragma_interrupt = pragma_trapa = pragma_nosave_low_regs = 0;
6420 sh_need_epilogue_known = 0;
6421 sp_switch = NULL_RTX;
6424 static rtx
6425 sh_builtin_saveregs (void)
6427 /* First unnamed integer register. */
6428 int first_intreg = current_function_args_info.arg_count[(int) SH_ARG_INT];
6429 /* Number of integer registers we need to save. */
6430 int n_intregs = MAX (0, NPARM_REGS (SImode) - first_intreg);
6431 /* First unnamed SFmode float reg */
6432 int first_floatreg = current_function_args_info.arg_count[(int) SH_ARG_FLOAT];
6433 /* Number of SFmode float regs to save. */
6434 int n_floatregs = MAX (0, NPARM_REGS (SFmode) - first_floatreg);
6435 rtx regbuf, fpregs;
6436 int bufsize, regno;
6437 HOST_WIDE_INT alias_set;
6439 if (TARGET_SH5)
6441 if (n_intregs)
6443 int pushregs = n_intregs;
6445 while (pushregs < NPARM_REGS (SImode) - 1
6446 && (CALL_COOKIE_INT_REG_GET
6447 (current_function_args_info.call_cookie,
6448 NPARM_REGS (SImode) - pushregs)
6449 == 1))
6451 current_function_args_info.call_cookie
6452 &= ~ CALL_COOKIE_INT_REG (NPARM_REGS (SImode)
6453 - pushregs, 1);
6454 pushregs++;
6457 if (pushregs == NPARM_REGS (SImode))
6458 current_function_args_info.call_cookie
6459 |= (CALL_COOKIE_INT_REG (0, 1)
6460 | CALL_COOKIE_STACKSEQ (pushregs - 1));
6461 else
6462 current_function_args_info.call_cookie
6463 |= CALL_COOKIE_STACKSEQ (pushregs);
6465 current_function_pretend_args_size += 8 * n_intregs;
6467 if (TARGET_SHCOMPACT)
6468 return const0_rtx;
6471 if (! TARGET_SH2E && ! TARGET_SH4 && ! TARGET_SH5)
6473 error ("__builtin_saveregs not supported by this subtarget");
6474 return const0_rtx;
6477 if (TARGET_SHMEDIA)
6478 n_floatregs = 0;
6480 /* Allocate block of memory for the regs. */
6481 /* ??? If n_intregs + n_floatregs == 0, should we allocate at least 1 byte?
6482 Or can assign_stack_local accept a 0 SIZE argument? */
6483 bufsize = (n_intregs * UNITS_PER_WORD) + (n_floatregs * UNITS_PER_WORD);
6485 if (TARGET_SHMEDIA)
6486 regbuf = gen_frame_mem (BLKmode, gen_rtx_REG (Pmode, ARG_POINTER_REGNUM));
6487 else if (n_floatregs & 1)
6489 rtx addr;
6491 regbuf = assign_stack_local (BLKmode, bufsize + UNITS_PER_WORD, 0);
6492 addr = copy_to_mode_reg (Pmode, XEXP (regbuf, 0));
6493 emit_insn (gen_iorsi3 (addr, addr, GEN_INT (UNITS_PER_WORD)));
6494 regbuf = change_address (regbuf, BLKmode, addr);
6496 else if (STACK_BOUNDARY < 64 && TARGET_FPU_DOUBLE && n_floatregs)
6498 rtx addr, mask;
6500 regbuf = assign_stack_local (BLKmode, bufsize + UNITS_PER_WORD, 0);
6501 addr = copy_to_mode_reg (Pmode, plus_constant (XEXP (regbuf, 0), 4));
6502 mask = copy_to_mode_reg (Pmode, GEN_INT (-8));
6503 emit_insn (gen_andsi3 (addr, addr, mask));
6504 regbuf = change_address (regbuf, BLKmode, addr);
6506 else
6507 regbuf = assign_stack_local (BLKmode, bufsize, TARGET_FPU_DOUBLE ? 64 : 0);
6508 alias_set = get_varargs_alias_set ();
6509 set_mem_alias_set (regbuf, alias_set);
6511 /* Save int args.
6512 This is optimized to only save the regs that are necessary. Explicitly
6513 named args need not be saved. */
6514 if (n_intregs > 0)
6515 move_block_from_reg (BASE_ARG_REG (SImode) + first_intreg,
6516 adjust_address (regbuf, BLKmode,
6517 n_floatregs * UNITS_PER_WORD),
6518 n_intregs);
6520 if (TARGET_SHMEDIA)
6521 /* Return the address of the regbuf. */
6522 return XEXP (regbuf, 0);
6524 /* Save float args.
6525 This is optimized to only save the regs that are necessary. Explicitly
6526 named args need not be saved.
6527 We explicitly build a pointer to the buffer because it halves the insn
6528 count when not optimizing (otherwise the pointer is built for each reg
6529 saved).
6530 We emit the moves in reverse order so that we can use predecrement. */
6532 fpregs = copy_to_mode_reg (Pmode,
6533 plus_constant (XEXP (regbuf, 0),
6534 n_floatregs * UNITS_PER_WORD));
6535 if (TARGET_SH4 || TARGET_SH2A_DOUBLE)
6537 rtx mem;
6538 for (regno = NPARM_REGS (DFmode) - 2; regno >= first_floatreg; regno -= 2)
6540 emit_insn (gen_addsi3 (fpregs, fpregs,
6541 GEN_INT (-2 * UNITS_PER_WORD)));
6542 mem = change_address (regbuf, DFmode, fpregs);
6543 emit_move_insn (mem,
6544 gen_rtx_REG (DFmode, BASE_ARG_REG (DFmode) + regno));
6546 regno = first_floatreg;
6547 if (regno & 1)
6549 emit_insn (gen_addsi3 (fpregs, fpregs, GEN_INT (-UNITS_PER_WORD)));
6550 mem = change_address (regbuf, SFmode, fpregs);
6551 emit_move_insn (mem,
6552 gen_rtx_REG (SFmode, BASE_ARG_REG (SFmode) + regno
6553 - (TARGET_LITTLE_ENDIAN != 0)));
6556 else
6557 for (regno = NPARM_REGS (SFmode) - 1; regno >= first_floatreg; regno--)
6559 rtx mem;
6561 emit_insn (gen_addsi3 (fpregs, fpregs, GEN_INT (-UNITS_PER_WORD)));
6562 mem = change_address (regbuf, SFmode, fpregs);
6563 emit_move_insn (mem,
6564 gen_rtx_REG (SFmode, BASE_ARG_REG (SFmode) + regno));
6567 /* Return the address of the regbuf. */
6568 return XEXP (regbuf, 0);
6571 /* Define the `__builtin_va_list' type for the ABI. */
6573 static tree
6574 sh_build_builtin_va_list (void)
6576 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
6577 tree record;
6579 if (TARGET_SH5 || (! TARGET_SH2E && ! TARGET_SH4)
6580 || TARGET_HITACHI || sh_cfun_attr_renesas_p ())
6581 return ptr_type_node;
6583 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
6585 f_next_o = build_decl (FIELD_DECL, get_identifier ("__va_next_o"),
6586 ptr_type_node);
6587 f_next_o_limit = build_decl (FIELD_DECL,
6588 get_identifier ("__va_next_o_limit"),
6589 ptr_type_node);
6590 f_next_fp = build_decl (FIELD_DECL, get_identifier ("__va_next_fp"),
6591 ptr_type_node);
6592 f_next_fp_limit = build_decl (FIELD_DECL,
6593 get_identifier ("__va_next_fp_limit"),
6594 ptr_type_node);
6595 f_next_stack = build_decl (FIELD_DECL, get_identifier ("__va_next_stack"),
6596 ptr_type_node);
6598 DECL_FIELD_CONTEXT (f_next_o) = record;
6599 DECL_FIELD_CONTEXT (f_next_o_limit) = record;
6600 DECL_FIELD_CONTEXT (f_next_fp) = record;
6601 DECL_FIELD_CONTEXT (f_next_fp_limit) = record;
6602 DECL_FIELD_CONTEXT (f_next_stack) = record;
6604 TYPE_FIELDS (record) = f_next_o;
6605 TREE_CHAIN (f_next_o) = f_next_o_limit;
6606 TREE_CHAIN (f_next_o_limit) = f_next_fp;
6607 TREE_CHAIN (f_next_fp) = f_next_fp_limit;
6608 TREE_CHAIN (f_next_fp_limit) = f_next_stack;
6610 layout_type (record);
6612 return record;
6615 /* Implement `va_start' for varargs and stdarg. */
6617 void
6618 sh_va_start (tree valist, rtx nextarg)
6620 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
6621 tree next_o, next_o_limit, next_fp, next_fp_limit, next_stack;
6622 tree t, u;
6623 int nfp, nint;
6625 if (TARGET_SH5)
6627 expand_builtin_saveregs ();
6628 std_expand_builtin_va_start (valist, nextarg);
6629 return;
6632 if ((! TARGET_SH2E && ! TARGET_SH4)
6633 || TARGET_HITACHI || sh_cfun_attr_renesas_p ())
6635 std_expand_builtin_va_start (valist, nextarg);
6636 return;
6639 f_next_o = TYPE_FIELDS (va_list_type_node);
6640 f_next_o_limit = TREE_CHAIN (f_next_o);
6641 f_next_fp = TREE_CHAIN (f_next_o_limit);
6642 f_next_fp_limit = TREE_CHAIN (f_next_fp);
6643 f_next_stack = TREE_CHAIN (f_next_fp_limit);
6645 next_o = build (COMPONENT_REF, TREE_TYPE (f_next_o), valist, f_next_o,
6646 NULL_TREE);
6647 next_o_limit = build (COMPONENT_REF, TREE_TYPE (f_next_o_limit),
6648 valist, f_next_o_limit, NULL_TREE);
6649 next_fp = build (COMPONENT_REF, TREE_TYPE (f_next_fp), valist, f_next_fp,
6650 NULL_TREE);
6651 next_fp_limit = build (COMPONENT_REF, TREE_TYPE (f_next_fp_limit),
6652 valist, f_next_fp_limit, NULL_TREE);
6653 next_stack = build (COMPONENT_REF, TREE_TYPE (f_next_stack),
6654 valist, f_next_stack, NULL_TREE);
6656 /* Call __builtin_saveregs. */
6657 u = make_tree (ptr_type_node, expand_builtin_saveregs ());
6658 t = build (MODIFY_EXPR, ptr_type_node, next_fp, u);
6659 TREE_SIDE_EFFECTS (t) = 1;
6660 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6662 nfp = current_function_args_info.arg_count[SH_ARG_FLOAT];
6663 if (nfp < 8)
6664 nfp = 8 - nfp;
6665 else
6666 nfp = 0;
6667 u = fold (build (PLUS_EXPR, ptr_type_node, u,
6668 build_int_cst (NULL_TREE, UNITS_PER_WORD * nfp)));
6669 t = build (MODIFY_EXPR, ptr_type_node, next_fp_limit, u);
6670 TREE_SIDE_EFFECTS (t) = 1;
6671 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6673 t = build (MODIFY_EXPR, ptr_type_node, next_o, u);
6674 TREE_SIDE_EFFECTS (t) = 1;
6675 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6677 nint = current_function_args_info.arg_count[SH_ARG_INT];
6678 if (nint < 4)
6679 nint = 4 - nint;
6680 else
6681 nint = 0;
6682 u = fold (build (PLUS_EXPR, ptr_type_node, u,
6683 build_int_cst (NULL_TREE, UNITS_PER_WORD * nint)));
6684 t = build (MODIFY_EXPR, ptr_type_node, next_o_limit, u);
6685 TREE_SIDE_EFFECTS (t) = 1;
6686 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6688 u = make_tree (ptr_type_node, nextarg);
6689 t = build (MODIFY_EXPR, ptr_type_node, next_stack, u);
6690 TREE_SIDE_EFFECTS (t) = 1;
6691 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6694 /* TYPE is a RECORD_TYPE. If there is only a single non-zero-sized
6695 member, return it. */
6696 static tree
6697 find_sole_member (tree type)
6699 tree field, member = NULL_TREE;
6701 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
6703 if (TREE_CODE (field) != FIELD_DECL)
6704 continue;
6705 if (!DECL_SIZE (field))
6706 return NULL_TREE;
6707 if (integer_zerop (DECL_SIZE (field)))
6708 continue;
6709 if (member)
6710 return NULL_TREE;
6711 member = field;
6713 return member;
6715 /* Implement `va_arg'. */
6717 static tree
6718 sh_gimplify_va_arg_expr (tree valist, tree type, tree *pre_p,
6719 tree *post_p ATTRIBUTE_UNUSED)
6721 HOST_WIDE_INT size, rsize;
6722 tree tmp, pptr_type_node;
6723 tree addr, lab_over = NULL, result = NULL;
6724 int pass_by_ref = targetm.calls.must_pass_in_stack (TYPE_MODE (type), type);
6726 if (pass_by_ref)
6727 type = build_pointer_type (type);
6729 size = int_size_in_bytes (type);
6730 rsize = (size + UNITS_PER_WORD - 1) & -UNITS_PER_WORD;
6731 pptr_type_node = build_pointer_type (ptr_type_node);
6733 if (! TARGET_SH5 && (TARGET_SH2E || TARGET_SH4)
6734 && ! (TARGET_HITACHI || sh_cfun_attr_renesas_p ()))
6736 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
6737 tree next_o, next_o_limit, next_fp, next_fp_limit, next_stack;
6738 int pass_as_float;
6739 tree lab_false;
6740 tree member;
6742 f_next_o = TYPE_FIELDS (va_list_type_node);
6743 f_next_o_limit = TREE_CHAIN (f_next_o);
6744 f_next_fp = TREE_CHAIN (f_next_o_limit);
6745 f_next_fp_limit = TREE_CHAIN (f_next_fp);
6746 f_next_stack = TREE_CHAIN (f_next_fp_limit);
6748 next_o = build (COMPONENT_REF, TREE_TYPE (f_next_o), valist, f_next_o,
6749 NULL_TREE);
6750 next_o_limit = build (COMPONENT_REF, TREE_TYPE (f_next_o_limit),
6751 valist, f_next_o_limit, NULL_TREE);
6752 next_fp = build (COMPONENT_REF, TREE_TYPE (f_next_fp),
6753 valist, f_next_fp, NULL_TREE);
6754 next_fp_limit = build (COMPONENT_REF, TREE_TYPE (f_next_fp_limit),
6755 valist, f_next_fp_limit, NULL_TREE);
6756 next_stack = build (COMPONENT_REF, TREE_TYPE (f_next_stack),
6757 valist, f_next_stack, NULL_TREE);
6759 /* Structures with a single member with a distinct mode are passed
6760 like their member. This is relevant if the latter has a REAL_TYPE
6761 or COMPLEX_TYPE type. */
6762 while (TREE_CODE (type) == RECORD_TYPE
6763 && (member = find_sole_member (type))
6764 && (TREE_CODE (TREE_TYPE (member)) == REAL_TYPE
6765 || TREE_CODE (TREE_TYPE (member)) == COMPLEX_TYPE
6766 || TREE_CODE (TREE_TYPE (member)) == RECORD_TYPE))
6768 tree field_type = TREE_TYPE (member);
6770 if (TYPE_MODE (type) == TYPE_MODE (field_type))
6771 type = field_type;
6772 else
6774 gcc_assert ((TYPE_ALIGN (type)
6775 < GET_MODE_ALIGNMENT (TYPE_MODE (field_type)))
6776 || (TYPE_ALIGN (type)
6777 > GET_MODE_BITSIZE (TYPE_MODE (field_type))));
6778 break;
6782 if (TARGET_SH4)
6784 pass_as_float = ((TREE_CODE (type) == REAL_TYPE && size <= 8)
6785 || (TREE_CODE (type) == COMPLEX_TYPE
6786 && TREE_CODE (TREE_TYPE (type)) == REAL_TYPE
6787 && size <= 16));
6789 else
6791 pass_as_float = (TREE_CODE (type) == REAL_TYPE && size == 4);
6794 addr = create_tmp_var (pptr_type_node, NULL);
6795 lab_false = create_artificial_label ();
6796 lab_over = create_artificial_label ();
6798 valist = build1 (INDIRECT_REF, ptr_type_node, addr);
6800 if (pass_as_float)
6802 tree next_fp_tmp = create_tmp_var (TREE_TYPE (f_next_fp), NULL);
6803 tree cmp;
6804 bool is_double = size == 8 && TREE_CODE (type) == REAL_TYPE;
6806 tmp = build1 (ADDR_EXPR, pptr_type_node, next_fp);
6807 tmp = build2 (MODIFY_EXPR, void_type_node, addr, tmp);
6808 gimplify_and_add (tmp, pre_p);
6810 tmp = build2 (MODIFY_EXPR, ptr_type_node, next_fp_tmp, valist);
6811 gimplify_and_add (tmp, pre_p);
6812 tmp = next_fp_limit;
6813 if (size > 4 && !is_double)
6814 tmp = build2 (PLUS_EXPR, TREE_TYPE (tmp), tmp,
6815 fold_convert (TREE_TYPE (tmp), size_int (4 - size)));
6816 tmp = build (GE_EXPR, boolean_type_node, next_fp_tmp, tmp);
6817 cmp = build (COND_EXPR, void_type_node, tmp,
6818 build (GOTO_EXPR, void_type_node, lab_false),
6819 NULL);
6820 if (!is_double)
6821 gimplify_and_add (cmp, pre_p);
6823 if (TYPE_ALIGN (type) > BITS_PER_WORD || (is_double || size == 16))
6825 tmp = fold_convert (ptr_type_node, size_int (UNITS_PER_WORD));
6826 tmp = build (BIT_AND_EXPR, ptr_type_node, next_fp_tmp, tmp);
6827 tmp = build (PLUS_EXPR, ptr_type_node, next_fp_tmp, tmp);
6828 tmp = build (MODIFY_EXPR, ptr_type_node, next_fp_tmp, tmp);
6829 gimplify_and_add (tmp, pre_p);
6831 if (is_double)
6832 gimplify_and_add (cmp, pre_p);
6834 #ifdef FUNCTION_ARG_SCmode_WART
6835 if (TYPE_MODE (type) == SCmode && TARGET_SH4 && TARGET_LITTLE_ENDIAN)
6837 tree subtype = TREE_TYPE (type);
6838 tree real, imag;
6840 imag
6841 = std_gimplify_va_arg_expr (next_fp_tmp, subtype, pre_p, NULL);
6842 imag = get_initialized_tmp_var (imag, pre_p, NULL);
6844 real
6845 = std_gimplify_va_arg_expr (next_fp_tmp, subtype, pre_p, NULL);
6846 real = get_initialized_tmp_var (real, pre_p, NULL);
6848 result = build (COMPLEX_EXPR, type, real, imag);
6849 result = get_initialized_tmp_var (result, pre_p, NULL);
6851 #endif /* FUNCTION_ARG_SCmode_WART */
6853 tmp = build (GOTO_EXPR, void_type_node, lab_over);
6854 gimplify_and_add (tmp, pre_p);
6856 tmp = build (LABEL_EXPR, void_type_node, lab_false);
6857 gimplify_and_add (tmp, pre_p);
6859 tmp = build1 (ADDR_EXPR, pptr_type_node, next_stack);
6860 tmp = build (MODIFY_EXPR, void_type_node, addr, tmp);
6861 gimplify_and_add (tmp, pre_p);
6862 tmp = build2 (MODIFY_EXPR, ptr_type_node, next_fp_tmp, valist);
6863 gimplify_and_add (tmp, pre_p);
6865 tmp = build2 (MODIFY_EXPR, ptr_type_node, valist, next_fp_tmp);
6866 gimplify_and_add (tmp, post_p);
6867 valist = next_fp_tmp;
6869 else
6871 tmp = fold_convert (ptr_type_node, size_int (rsize));
6872 tmp = build (PLUS_EXPR, ptr_type_node, next_o, tmp);
6873 tmp = build (GT_EXPR, boolean_type_node, tmp, next_o_limit);
6874 tmp = build (COND_EXPR, void_type_node, tmp,
6875 build (GOTO_EXPR, void_type_node, lab_false),
6876 NULL);
6877 gimplify_and_add (tmp, pre_p);
6879 tmp = build1 (ADDR_EXPR, pptr_type_node, next_o);
6880 tmp = build (MODIFY_EXPR, void_type_node, addr, tmp);
6881 gimplify_and_add (tmp, pre_p);
6883 tmp = build (GOTO_EXPR, void_type_node, lab_over);
6884 gimplify_and_add (tmp, pre_p);
6886 tmp = build (LABEL_EXPR, void_type_node, lab_false);
6887 gimplify_and_add (tmp, pre_p);
6889 if (size > 4 && ! TARGET_SH4)
6891 tmp = build (MODIFY_EXPR, ptr_type_node, next_o, next_o_limit);
6892 gimplify_and_add (tmp, pre_p);
6895 tmp = build1 (ADDR_EXPR, pptr_type_node, next_stack);
6896 tmp = build (MODIFY_EXPR, void_type_node, addr, tmp);
6897 gimplify_and_add (tmp, pre_p);
6900 if (!result)
6902 tmp = build (LABEL_EXPR, void_type_node, lab_over);
6903 gimplify_and_add (tmp, pre_p);
6907 /* ??? In va-sh.h, there had been code to make values larger than
6908 size 8 indirect. This does not match the FUNCTION_ARG macros. */
6910 tmp = std_gimplify_va_arg_expr (valist, type, pre_p, NULL);
6911 if (result)
6913 tmp = build (MODIFY_EXPR, void_type_node, result, tmp);
6914 gimplify_and_add (tmp, pre_p);
6916 tmp = build (LABEL_EXPR, void_type_node, lab_over);
6917 gimplify_and_add (tmp, pre_p);
6919 else
6920 result = tmp;
6922 if (pass_by_ref)
6923 result = build_va_arg_indirect_ref (result);
6925 return result;
6928 bool
6929 sh_promote_prototypes (tree type)
6931 if (TARGET_HITACHI)
6932 return 0;
6933 if (! type)
6934 return 1;
6935 return ! sh_attr_renesas_p (type);
6938 /* Whether an argument must be passed by reference. On SHcompact, we
6939 pretend arguments wider than 32-bits that would have been passed in
6940 registers are passed by reference, so that an SHmedia trampoline
6941 loads them into the full 64-bits registers. */
6943 static int
6944 shcompact_byref (CUMULATIVE_ARGS *cum, enum machine_mode mode,
6945 tree type, bool named)
6947 unsigned HOST_WIDE_INT size;
6949 if (type)
6950 size = int_size_in_bytes (type);
6951 else
6952 size = GET_MODE_SIZE (mode);
6954 if (cum->arg_count[SH_ARG_INT] < NPARM_REGS (SImode)
6955 && (!named
6956 || GET_SH_ARG_CLASS (mode) == SH_ARG_INT
6957 || (GET_SH_ARG_CLASS (mode) == SH_ARG_FLOAT
6958 && cum->arg_count[SH_ARG_FLOAT] >= NPARM_REGS (SFmode)))
6959 && size > 4
6960 && !SHCOMPACT_FORCE_ON_STACK (mode, type)
6961 && !SH5_WOULD_BE_PARTIAL_NREGS (*cum, mode, type, named))
6962 return size;
6963 else
6964 return 0;
6967 static bool
6968 sh_pass_by_reference (CUMULATIVE_ARGS *cum, enum machine_mode mode,
6969 tree type, bool named)
6971 if (targetm.calls.must_pass_in_stack (mode, type))
6972 return true;
6974 /* ??? std_gimplify_va_arg_expr passes NULL for cum. That function
6975 wants to know about pass-by-reference semantics for incoming
6976 arguments. */
6977 if (! cum)
6978 return false;
6980 if (TARGET_SHCOMPACT)
6982 cum->byref = shcompact_byref (cum, mode, type, named);
6983 return cum->byref != 0;
6986 return false;
6989 static bool
6990 sh_callee_copies (CUMULATIVE_ARGS *cum, enum machine_mode mode,
6991 tree type, bool named ATTRIBUTE_UNUSED)
6993 /* ??? How can it possibly be correct to return true only on the
6994 caller side of the equation? Is there someplace else in the
6995 sh backend that's magically producing the copies? */
6996 return (cum->outgoing
6997 && ((mode == BLKmode ? TYPE_ALIGN (type) : GET_MODE_ALIGNMENT (mode))
6998 % SH_MIN_ALIGN_FOR_CALLEE_COPY == 0));
7001 static int
7002 sh_arg_partial_bytes (CUMULATIVE_ARGS *cum, enum machine_mode mode,
7003 tree type, bool named ATTRIBUTE_UNUSED)
7005 int words = 0;
7007 if (!TARGET_SH5
7008 && PASS_IN_REG_P (*cum, mode, type)
7009 && !(TARGET_SH4 || TARGET_SH2A_DOUBLE)
7010 && (ROUND_REG (*cum, mode)
7011 + (mode != BLKmode
7012 ? ROUND_ADVANCE (GET_MODE_SIZE (mode))
7013 : ROUND_ADVANCE (int_size_in_bytes (type)))
7014 > NPARM_REGS (mode)))
7015 words = NPARM_REGS (mode) - ROUND_REG (*cum, mode);
7017 else if (!TARGET_SHCOMPACT
7018 && SH5_WOULD_BE_PARTIAL_NREGS (*cum, mode, type, named))
7019 words = NPARM_REGS (SImode) - cum->arg_count[SH_ARG_INT];
7021 return words * UNITS_PER_WORD;
7025 /* Define where to put the arguments to a function.
7026 Value is zero to push the argument on the stack,
7027 or a hard register in which to store the argument.
7029 MODE is the argument's machine mode.
7030 TYPE is the data type of the argument (as a tree).
7031 This is null for libcalls where that information may
7032 not be available.
7033 CUM is a variable of type CUMULATIVE_ARGS which gives info about
7034 the preceding args and about the function being called.
7035 NAMED is nonzero if this argument is a named parameter
7036 (otherwise it is an extra parameter matching an ellipsis).
7038 On SH the first args are normally in registers
7039 and the rest are pushed. Any arg that starts within the first
7040 NPARM_REGS words is at least partially passed in a register unless
7041 its data type forbids. */
7045 sh_function_arg (CUMULATIVE_ARGS *ca, enum machine_mode mode,
7046 tree type, int named)
7048 if (! TARGET_SH5 && mode == VOIDmode)
7049 return GEN_INT (ca->renesas_abi ? 1 : 0);
7051 if (! TARGET_SH5
7052 && PASS_IN_REG_P (*ca, mode, type)
7053 && (named || ! (TARGET_HITACHI || ca->renesas_abi)))
7055 int regno;
7057 if (mode == SCmode && TARGET_SH4 && TARGET_LITTLE_ENDIAN
7058 && (! FUNCTION_ARG_SCmode_WART || (ROUND_REG (*ca, mode) & 1)))
7060 rtx r1 = gen_rtx_EXPR_LIST (VOIDmode,
7061 gen_rtx_REG (SFmode,
7062 BASE_ARG_REG (mode)
7063 + (ROUND_REG (*ca, mode) ^ 1)),
7064 const0_rtx);
7065 rtx r2 = gen_rtx_EXPR_LIST (VOIDmode,
7066 gen_rtx_REG (SFmode,
7067 BASE_ARG_REG (mode)
7068 + ((ROUND_REG (*ca, mode) + 1) ^ 1)),
7069 GEN_INT (4));
7070 return gen_rtx_PARALLEL(SCmode, gen_rtvec(2, r1, r2));
7073 /* If the alignment of a DF value causes an SF register to be
7074 skipped, we will use that skipped register for the next SF
7075 value. */
7076 if ((TARGET_HITACHI || ca->renesas_abi)
7077 && ca->free_single_fp_reg
7078 && mode == SFmode)
7079 return gen_rtx_REG (mode, ca->free_single_fp_reg);
7081 regno = (BASE_ARG_REG (mode) + ROUND_REG (*ca, mode))
7082 ^ (mode == SFmode && TARGET_SH4
7083 && TARGET_LITTLE_ENDIAN != 0
7084 && ! TARGET_HITACHI && ! ca->renesas_abi);
7085 return gen_rtx_REG (mode, regno);
7089 if (TARGET_SH5)
7091 if (mode == VOIDmode && TARGET_SHCOMPACT)
7092 return GEN_INT (ca->call_cookie);
7094 /* The following test assumes unnamed arguments are promoted to
7095 DFmode. */
7096 if (mode == SFmode && ca->free_single_fp_reg)
7097 return SH5_PROTOTYPED_FLOAT_ARG (*ca, mode, ca->free_single_fp_reg);
7099 if ((GET_SH_ARG_CLASS (mode) == SH_ARG_FLOAT)
7100 && (named || ! ca->prototype_p)
7101 && ca->arg_count[(int) SH_ARG_FLOAT] < NPARM_REGS (SFmode))
7103 if (! ca->prototype_p && TARGET_SHMEDIA)
7104 return SH5_PROTOTYPELESS_FLOAT_ARG (*ca, mode);
7106 return SH5_PROTOTYPED_FLOAT_ARG (*ca, mode,
7107 FIRST_FP_PARM_REG
7108 + ca->arg_count[(int) SH_ARG_FLOAT]);
7111 if (ca->arg_count[(int) SH_ARG_INT] < NPARM_REGS (SImode)
7112 && (! TARGET_SHCOMPACT
7113 || (! SHCOMPACT_FORCE_ON_STACK (mode, type)
7114 && ! SH5_WOULD_BE_PARTIAL_NREGS (*ca, mode,
7115 type, named))))
7117 return gen_rtx_REG (mode, (FIRST_PARM_REG
7118 + ca->arg_count[(int) SH_ARG_INT]));
7121 return 0;
7124 return 0;
7127 /* Update the data in CUM to advance over an argument
7128 of mode MODE and data type TYPE.
7129 (TYPE is null for libcalls where that information may not be
7130 available.) */
7132 void
7133 sh_function_arg_advance (CUMULATIVE_ARGS *ca, enum machine_mode mode,
7134 tree type, int named)
7136 if (ca->force_mem)
7137 ca->force_mem = 0;
7138 else if (TARGET_SH5)
7140 tree type2 = (ca->byref && type
7141 ? TREE_TYPE (type)
7142 : type);
7143 enum machine_mode mode2 = (ca->byref && type
7144 ? TYPE_MODE (type2)
7145 : mode);
7146 int dwords = ((ca->byref
7147 ? ca->byref
7148 : mode2 == BLKmode
7149 ? int_size_in_bytes (type2)
7150 : GET_MODE_SIZE (mode2)) + 7) / 8;
7151 int numregs = MIN (dwords, NPARM_REGS (SImode)
7152 - ca->arg_count[(int) SH_ARG_INT]);
7154 if (numregs)
7156 ca->arg_count[(int) SH_ARG_INT] += numregs;
7157 if (TARGET_SHCOMPACT
7158 && SHCOMPACT_FORCE_ON_STACK (mode2, type2))
7160 ca->call_cookie
7161 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
7162 - numregs, 1);
7163 /* N.B. We want this also for outgoing. */
7164 ca->stack_regs += numregs;
7166 else if (ca->byref)
7168 if (! ca->outgoing)
7169 ca->stack_regs += numregs;
7170 ca->byref_regs += numregs;
7171 ca->byref = 0;
7173 ca->call_cookie
7174 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
7175 - numregs, 2);
7176 while (--numregs);
7177 ca->call_cookie
7178 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
7179 - 1, 1);
7181 else if (dwords > numregs)
7183 int pushregs = numregs;
7185 if (TARGET_SHCOMPACT)
7186 ca->stack_regs += numregs;
7187 while (pushregs < NPARM_REGS (SImode) - 1
7188 && (CALL_COOKIE_INT_REG_GET
7189 (ca->call_cookie,
7190 NPARM_REGS (SImode) - pushregs)
7191 == 1))
7193 ca->call_cookie
7194 &= ~ CALL_COOKIE_INT_REG (NPARM_REGS (SImode)
7195 - pushregs, 1);
7196 pushregs++;
7198 if (numregs == NPARM_REGS (SImode))
7199 ca->call_cookie
7200 |= CALL_COOKIE_INT_REG (0, 1)
7201 | CALL_COOKIE_STACKSEQ (numregs - 1);
7202 else
7203 ca->call_cookie
7204 |= CALL_COOKIE_STACKSEQ (numregs);
7207 if (GET_SH_ARG_CLASS (mode2) == SH_ARG_FLOAT
7208 && (named || ! ca->prototype_p))
7210 if (mode2 == SFmode && ca->free_single_fp_reg)
7211 ca->free_single_fp_reg = 0;
7212 else if (ca->arg_count[(int) SH_ARG_FLOAT]
7213 < NPARM_REGS (SFmode))
7215 int numfpregs
7216 = MIN ((GET_MODE_SIZE (mode2) + 7) / 8 * 2,
7217 NPARM_REGS (SFmode)
7218 - ca->arg_count[(int) SH_ARG_FLOAT]);
7220 ca->arg_count[(int) SH_ARG_FLOAT] += numfpregs;
7222 if (TARGET_SHCOMPACT && ! ca->prototype_p)
7224 if (ca->outgoing && numregs > 0)
7227 ca->call_cookie
7228 |= (CALL_COOKIE_INT_REG
7229 (ca->arg_count[(int) SH_ARG_INT]
7230 - numregs + ((numfpregs - 2) / 2),
7231 4 + (ca->arg_count[(int) SH_ARG_FLOAT]
7232 - numfpregs) / 2));
7234 while (numfpregs -= 2);
7236 else if (mode2 == SFmode && (named)
7237 && (ca->arg_count[(int) SH_ARG_FLOAT]
7238 < NPARM_REGS (SFmode)))
7239 ca->free_single_fp_reg
7240 = FIRST_FP_PARM_REG - numfpregs
7241 + ca->arg_count[(int) SH_ARG_FLOAT] + 1;
7244 return;
7247 if ((TARGET_HITACHI || ca->renesas_abi) && TARGET_FPU_DOUBLE)
7249 /* Note that we've used the skipped register. */
7250 if (mode == SFmode && ca->free_single_fp_reg)
7252 ca->free_single_fp_reg = 0;
7253 return;
7255 /* When we have a DF after an SF, there's an SF register that get
7256 skipped in order to align the DF value. We note this skipped
7257 register, because the next SF value will use it, and not the
7258 SF that follows the DF. */
7259 if (mode == DFmode
7260 && ROUND_REG (*ca, DFmode) != ROUND_REG (*ca, SFmode))
7262 ca->free_single_fp_reg = (ROUND_REG (*ca, SFmode)
7263 + BASE_ARG_REG (mode));
7267 if (! ((TARGET_SH4 || TARGET_SH2A) || ca->renesas_abi)
7268 || PASS_IN_REG_P (*ca, mode, type))
7269 (ca->arg_count[(int) GET_SH_ARG_CLASS (mode)]
7270 = (ROUND_REG (*ca, mode)
7271 + (mode == BLKmode
7272 ? ROUND_ADVANCE (int_size_in_bytes (type))
7273 : ROUND_ADVANCE (GET_MODE_SIZE (mode)))));
7276 /* The Renesas calling convention doesn't quite fit into this scheme since
7277 the address is passed like an invisible argument, but one that is always
7278 passed in memory. */
7279 static rtx
7280 sh_struct_value_rtx (tree fndecl, int incoming ATTRIBUTE_UNUSED)
7282 if (TARGET_HITACHI || sh_attr_renesas_p (fndecl))
7283 return 0;
7284 return gen_rtx_REG (Pmode, 2);
7287 /* Worker function for TARGET_RETURN_IN_MEMORY. */
7289 static bool
7290 sh_return_in_memory (tree type, tree fndecl)
7292 if (TARGET_SH5)
7294 if (TYPE_MODE (type) == BLKmode)
7295 return ((unsigned HOST_WIDE_INT) int_size_in_bytes (type)) > 8;
7296 else
7297 return GET_MODE_SIZE (TYPE_MODE (type)) > 8;
7299 else
7301 return (TYPE_MODE (type) == BLKmode
7302 || ((TARGET_HITACHI || sh_attr_renesas_p (fndecl))
7303 && TREE_CODE (type) == RECORD_TYPE));
7307 /* We actually emit the code in sh_expand_prologue. We used to use
7308 a static variable to flag that we need to emit this code, but that
7309 doesn't when inlining, when functions are deferred and then emitted
7310 later. Fortunately, we already have two flags that are part of struct
7311 function that tell if a function uses varargs or stdarg. */
7312 static void
7313 sh_setup_incoming_varargs (CUMULATIVE_ARGS *ca,
7314 enum machine_mode mode,
7315 tree type,
7316 int *pretend_arg_size,
7317 int second_time ATTRIBUTE_UNUSED)
7319 gcc_assert (current_function_stdarg);
7320 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl))
7322 int named_parm_regs, anon_parm_regs;
7324 named_parm_regs = (ROUND_REG (*ca, mode)
7325 + (mode == BLKmode
7326 ? ROUND_ADVANCE (int_size_in_bytes (type))
7327 : ROUND_ADVANCE (GET_MODE_SIZE (mode))));
7328 anon_parm_regs = NPARM_REGS (SImode) - named_parm_regs;
7329 if (anon_parm_regs > 0)
7330 *pretend_arg_size = anon_parm_regs * 4;
7334 static bool
7335 sh_strict_argument_naming (CUMULATIVE_ARGS *ca ATTRIBUTE_UNUSED)
7337 return TARGET_SH5;
7340 static bool
7341 sh_pretend_outgoing_varargs_named (CUMULATIVE_ARGS *ca)
7343 return ! (TARGET_HITACHI || ca->renesas_abi) && ! TARGET_SH5;
7347 /* Define the offset between two registers, one to be eliminated, and
7348 the other its replacement, at the start of a routine. */
7351 initial_elimination_offset (int from, int to)
7353 int regs_saved;
7354 int regs_saved_rounding = 0;
7355 int total_saved_regs_space;
7356 int total_auto_space;
7357 int save_flags = target_flags;
7358 int copy_flags;
7359 HARD_REG_SET live_regs_mask;
7361 shmedia_space_reserved_for_target_registers = false;
7362 regs_saved = calc_live_regs (&live_regs_mask);
7363 regs_saved += SHMEDIA_REGS_STACK_ADJUST ();
7365 if (shmedia_reserve_space_for_target_registers_p (regs_saved, &live_regs_mask))
7367 shmedia_space_reserved_for_target_registers = true;
7368 regs_saved += shmedia_target_regs_stack_adjust (&live_regs_mask);
7371 if (TARGET_SH5 && regs_saved % (STACK_BOUNDARY / BITS_PER_UNIT))
7372 regs_saved_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
7373 - regs_saved % (STACK_BOUNDARY / BITS_PER_UNIT));
7375 total_auto_space = rounded_frame_size (regs_saved) - regs_saved_rounding;
7376 copy_flags = target_flags;
7377 target_flags = save_flags;
7379 total_saved_regs_space = regs_saved + regs_saved_rounding;
7381 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
7382 return total_saved_regs_space + total_auto_space
7383 + current_function_args_info.byref_regs * 8;
7385 if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
7386 return total_saved_regs_space + total_auto_space
7387 + current_function_args_info.byref_regs * 8;
7389 /* Initial gap between fp and sp is 0. */
7390 if (from == HARD_FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
7391 return 0;
7393 if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
7394 return rounded_frame_size (0);
7396 if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
7397 return rounded_frame_size (0);
7399 gcc_assert (from == RETURN_ADDRESS_POINTER_REGNUM
7400 && (to == HARD_FRAME_POINTER_REGNUM
7401 || to == STACK_POINTER_REGNUM));
7402 if (TARGET_SH5)
7404 int n = total_saved_regs_space;
7405 int pr_reg = TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG;
7406 save_schedule schedule;
7407 save_entry *entry;
7409 n += total_auto_space;
7411 /* If it wasn't saved, there's not much we can do. */
7412 if (! TEST_HARD_REG_BIT (live_regs_mask, pr_reg))
7413 return n;
7415 target_flags = copy_flags;
7417 sh5_schedule_saves (&live_regs_mask, &schedule, n);
7418 for (entry = &schedule.entries[1]; entry->mode != VOIDmode; entry++)
7419 if (entry->reg == pr_reg)
7421 target_flags = save_flags;
7422 return entry->offset;
7424 gcc_unreachable ();
7426 else
7427 return total_auto_space;
7430 /* Handle machine specific pragmas to be semi-compatible with Renesas
7431 compiler. */
7433 void
7434 sh_pr_interrupt (struct cpp_reader *pfile ATTRIBUTE_UNUSED)
7436 pragma_interrupt = 1;
7439 void
7440 sh_pr_trapa (struct cpp_reader *pfile ATTRIBUTE_UNUSED)
7442 pragma_interrupt = pragma_trapa = 1;
7445 void
7446 sh_pr_nosave_low_regs (struct cpp_reader *pfile ATTRIBUTE_UNUSED)
7448 pragma_nosave_low_regs = 1;
7451 /* Generate 'handle_interrupt' attribute for decls */
7453 static void
7454 sh_insert_attributes (tree node, tree *attributes)
7456 if (! pragma_interrupt
7457 || TREE_CODE (node) != FUNCTION_DECL)
7458 return;
7460 /* We are only interested in fields. */
7461 if (!DECL_P (node))
7462 return;
7464 /* Add a 'handle_interrupt' attribute. */
7465 * attributes = tree_cons (get_identifier ("interrupt_handler"), NULL, * attributes);
7467 return;
7470 /* Supported attributes:
7472 interrupt_handler -- specifies this function is an interrupt handler.
7474 sp_switch -- specifies an alternate stack for an interrupt handler
7475 to run on.
7477 trap_exit -- use a trapa to exit an interrupt function instead of
7478 an rte instruction.
7480 renesas -- use Renesas calling/layout conventions (functions and
7481 structures).
7485 const struct attribute_spec sh_attribute_table[] =
7487 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
7488 { "interrupt_handler", 0, 0, true, false, false, sh_handle_interrupt_handler_attribute },
7489 { "sp_switch", 1, 1, true, false, false, sh_handle_sp_switch_attribute },
7490 { "trap_exit", 1, 1, true, false, false, sh_handle_trap_exit_attribute },
7491 { "renesas", 0, 0, false, true, false, sh_handle_renesas_attribute },
7492 #ifdef SYMBIAN
7493 /* Symbian support adds three new attributes:
7494 dllexport - for exporting a function/variable that will live in a dll
7495 dllimport - for importing a function/variable from a dll
7497 Microsoft allows multiple declspecs in one __declspec, separating
7498 them with spaces. We do NOT support this. Instead, use __declspec
7499 multiple times. */
7500 { "dllimport", 0, 0, true, false, false, sh_symbian_handle_dll_attribute },
7501 { "dllexport", 0, 0, true, false, false, sh_symbian_handle_dll_attribute },
7502 #endif
7503 { NULL, 0, 0, false, false, false, NULL }
7506 /* Handle an "interrupt_handler" attribute; arguments as in
7507 struct attribute_spec.handler. */
7508 static tree
7509 sh_handle_interrupt_handler_attribute (tree *node, tree name,
7510 tree args ATTRIBUTE_UNUSED,
7511 int flags ATTRIBUTE_UNUSED,
7512 bool *no_add_attrs)
7514 if (TREE_CODE (*node) != FUNCTION_DECL)
7516 warning (OPT_Wattributes, "%qs attribute only applies to functions",
7517 IDENTIFIER_POINTER (name));
7518 *no_add_attrs = true;
7520 else if (TARGET_SHCOMPACT)
7522 error ("attribute interrupt_handler is not compatible with -m5-compact");
7523 *no_add_attrs = true;
7526 return NULL_TREE;
7529 /* Handle an "sp_switch" attribute; arguments as in
7530 struct attribute_spec.handler. */
7531 static tree
7532 sh_handle_sp_switch_attribute (tree *node, tree name, tree args,
7533 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
7535 if (TREE_CODE (*node) != FUNCTION_DECL)
7537 warning (OPT_Wattributes, "%qs attribute only applies to functions",
7538 IDENTIFIER_POINTER (name));
7539 *no_add_attrs = true;
7541 else if (!pragma_interrupt)
7543 /* The sp_switch attribute only has meaning for interrupt functions. */
7544 warning (OPT_Wattributes, "%qs attribute only applies to "
7545 "interrupt functions", IDENTIFIER_POINTER (name));
7546 *no_add_attrs = true;
7548 else if (TREE_CODE (TREE_VALUE (args)) != STRING_CST)
7550 /* The argument must be a constant string. */
7551 warning (OPT_Wattributes, "%qs attribute argument not a string constant",
7552 IDENTIFIER_POINTER (name));
7553 *no_add_attrs = true;
7555 else
7557 const char *s = ggc_strdup (TREE_STRING_POINTER (TREE_VALUE (args)));
7558 sp_switch = gen_rtx_SYMBOL_REF (VOIDmode, s);
7561 return NULL_TREE;
7564 /* Handle an "trap_exit" attribute; arguments as in
7565 struct attribute_spec.handler. */
7566 static tree
7567 sh_handle_trap_exit_attribute (tree *node, tree name, tree args,
7568 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
7570 if (TREE_CODE (*node) != FUNCTION_DECL)
7572 warning (OPT_Wattributes, "%qs attribute only applies to functions",
7573 IDENTIFIER_POINTER (name));
7574 *no_add_attrs = true;
7576 else if (!pragma_interrupt)
7578 /* The trap_exit attribute only has meaning for interrupt functions. */
7579 warning (OPT_Wattributes, "%qs attribute only applies to "
7580 "interrupt functions", IDENTIFIER_POINTER (name));
7581 *no_add_attrs = true;
7583 else if (TREE_CODE (TREE_VALUE (args)) != INTEGER_CST)
7585 /* The argument must be a constant integer. */
7586 warning (OPT_Wattributes, "%qs attribute argument not an "
7587 "integer constant", IDENTIFIER_POINTER (name));
7588 *no_add_attrs = true;
7590 else
7592 trap_exit = TREE_INT_CST_LOW (TREE_VALUE (args));
7595 return NULL_TREE;
7598 static tree
7599 sh_handle_renesas_attribute (tree *node ATTRIBUTE_UNUSED,
7600 tree name ATTRIBUTE_UNUSED,
7601 tree args ATTRIBUTE_UNUSED,
7602 int flags ATTRIBUTE_UNUSED,
7603 bool *no_add_attrs ATTRIBUTE_UNUSED)
7605 return NULL_TREE;
7608 /* True if __attribute__((renesas)) or -mrenesas. */
7610 sh_attr_renesas_p (tree td)
7612 if (TARGET_HITACHI)
7613 return 1;
7614 if (td == 0)
7615 return 0;
7616 if (DECL_P (td))
7617 td = TREE_TYPE (td);
7618 if (td == error_mark_node)
7619 return 0;
7620 return (lookup_attribute ("renesas", TYPE_ATTRIBUTES (td))
7621 != NULL_TREE);
7624 /* True if __attribute__((renesas)) or -mrenesas, for the current
7625 function. */
7627 sh_cfun_attr_renesas_p (void)
7629 return sh_attr_renesas_p (current_function_decl);
7633 sh_cfun_interrupt_handler_p (void)
7635 return (lookup_attribute ("interrupt_handler",
7636 DECL_ATTRIBUTES (current_function_decl))
7637 != NULL_TREE);
7640 /* Implement TARGET_CHECK_PCH_TARGET_FLAGS. */
7642 static const char *
7643 sh_check_pch_target_flags (int old_flags)
7645 if ((old_flags ^ target_flags) & (MASK_SH1 | MASK_SH2 | MASK_SH3
7646 | MASK_SH_E | MASK_HARD_SH4
7647 | MASK_FPU_SINGLE | MASK_SH4))
7648 return _("created and used with different architectures / ABIs");
7649 if ((old_flags ^ target_flags) & MASK_HITACHI)
7650 return _("created and used with different ABIs");
7651 if ((old_flags ^ target_flags) & MASK_LITTLE_ENDIAN)
7652 return _("created and used with different endianness");
7653 return NULL;
7656 /* Predicates used by the templates. */
7658 /* Returns 1 if OP is MACL, MACH or PR. The input must be a REG rtx.
7659 Used only in general_movsrc_operand. */
7662 system_reg_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
7664 switch (REGNO (op))
7666 case PR_REG:
7667 case MACL_REG:
7668 case MACH_REG:
7669 return 1;
7671 return 0;
7674 /* Nonzero if OP is a floating point value with value 0.0. */
7677 fp_zero_operand (rtx op)
7679 REAL_VALUE_TYPE r;
7681 if (GET_MODE (op) != SFmode)
7682 return 0;
7684 REAL_VALUE_FROM_CONST_DOUBLE (r, op);
7685 return REAL_VALUES_EQUAL (r, dconst0) && ! REAL_VALUE_MINUS_ZERO (r);
7688 /* Nonzero if OP is a floating point value with value 1.0. */
7691 fp_one_operand (rtx op)
7693 REAL_VALUE_TYPE r;
7695 if (GET_MODE (op) != SFmode)
7696 return 0;
7698 REAL_VALUE_FROM_CONST_DOUBLE (r, op);
7699 return REAL_VALUES_EQUAL (r, dconst1);
7702 /* For -m4 and -m4-single-only, mode switching is used. If we are
7703 compiling without -mfmovd, movsf_ie isn't taken into account for
7704 mode switching. We could check in machine_dependent_reorg for
7705 cases where we know we are in single precision mode, but there is
7706 interface to find that out during reload, so we must avoid
7707 choosing an fldi alternative during reload and thus failing to
7708 allocate a scratch register for the constant loading. */
7710 fldi_ok (void)
7712 return ! TARGET_SH4 || TARGET_FMOVD || reload_completed;
7716 tertiary_reload_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
7718 enum rtx_code code = GET_CODE (op);
7719 return code == MEM || (TARGET_SH4 && code == CONST_DOUBLE);
7722 /* Return the TLS type for TLS symbols, 0 for otherwise. */
7724 tls_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
7726 if (GET_CODE (op) != SYMBOL_REF)
7727 return 0;
7728 return SYMBOL_REF_TLS_MODEL (op);
7731 /* Return the destination address of a branch. */
7733 static int
7734 branch_dest (rtx branch)
7736 rtx dest = SET_SRC (PATTERN (branch));
7737 int dest_uid;
7739 if (GET_CODE (dest) == IF_THEN_ELSE)
7740 dest = XEXP (dest, 1);
7741 dest = XEXP (dest, 0);
7742 dest_uid = INSN_UID (dest);
7743 return INSN_ADDRESSES (dest_uid);
7746 /* Return nonzero if REG is not used after INSN.
7747 We assume REG is a reload reg, and therefore does
7748 not live past labels. It may live past calls or jumps though. */
7750 reg_unused_after (rtx reg, rtx insn)
7752 enum rtx_code code;
7753 rtx set;
7755 /* If the reg is set by this instruction, then it is safe for our
7756 case. Disregard the case where this is a store to memory, since
7757 we are checking a register used in the store address. */
7758 set = single_set (insn);
7759 if (set && GET_CODE (SET_DEST (set)) != MEM
7760 && reg_overlap_mentioned_p (reg, SET_DEST (set)))
7761 return 1;
7763 while ((insn = NEXT_INSN (insn)))
7765 rtx set;
7766 if (!INSN_P (insn))
7767 continue;
7769 code = GET_CODE (insn);
7771 #if 0
7772 /* If this is a label that existed before reload, then the register
7773 if dead here. However, if this is a label added by reorg, then
7774 the register may still be live here. We can't tell the difference,
7775 so we just ignore labels completely. */
7776 if (code == CODE_LABEL)
7777 return 1;
7778 /* else */
7779 #endif
7781 if (code == JUMP_INSN)
7782 return 0;
7784 /* If this is a sequence, we must handle them all at once.
7785 We could have for instance a call that sets the target register,
7786 and an insn in a delay slot that uses the register. In this case,
7787 we must return 0. */
7788 else if (code == INSN && GET_CODE (PATTERN (insn)) == SEQUENCE)
7790 int i;
7791 int retval = 0;
7793 for (i = 0; i < XVECLEN (PATTERN (insn), 0); i++)
7795 rtx this_insn = XVECEXP (PATTERN (insn), 0, i);
7796 rtx set = single_set (this_insn);
7798 if (GET_CODE (this_insn) == CALL_INSN)
7799 code = CALL_INSN;
7800 else if (GET_CODE (this_insn) == JUMP_INSN)
7802 if (INSN_ANNULLED_BRANCH_P (this_insn))
7803 return 0;
7804 code = JUMP_INSN;
7807 if (set && reg_overlap_mentioned_p (reg, SET_SRC (set)))
7808 return 0;
7809 if (set && reg_overlap_mentioned_p (reg, SET_DEST (set)))
7811 if (GET_CODE (SET_DEST (set)) != MEM)
7812 retval = 1;
7813 else
7814 return 0;
7816 if (set == 0
7817 && reg_overlap_mentioned_p (reg, PATTERN (this_insn)))
7818 return 0;
7820 if (retval == 1)
7821 return 1;
7822 else if (code == JUMP_INSN)
7823 return 0;
7826 set = single_set (insn);
7827 if (set && reg_overlap_mentioned_p (reg, SET_SRC (set)))
7828 return 0;
7829 if (set && reg_overlap_mentioned_p (reg, SET_DEST (set)))
7830 return GET_CODE (SET_DEST (set)) != MEM;
7831 if (set == 0 && reg_overlap_mentioned_p (reg, PATTERN (insn)))
7832 return 0;
7834 if (code == CALL_INSN && call_really_used_regs[REGNO (reg)])
7835 return 1;
7837 return 1;
7840 #include "ggc.h"
7842 static GTY(()) rtx fpscr_rtx;
7844 get_fpscr_rtx (void)
7846 if (! fpscr_rtx)
7848 fpscr_rtx = gen_rtx_REG (PSImode, FPSCR_REG);
7849 REG_USERVAR_P (fpscr_rtx) = 1;
7850 mark_user_reg (fpscr_rtx);
7852 if (! reload_completed || mdep_reorg_phase != SH_AFTER_MDEP_REORG)
7853 mark_user_reg (fpscr_rtx);
7854 return fpscr_rtx;
7857 static GTY(()) tree fpscr_values;
7859 static void
7860 emit_fpu_switch (rtx scratch, int index)
7862 rtx dst, src;
7864 if (fpscr_values == NULL)
7866 tree t;
7868 t = build_index_type (integer_one_node);
7869 t = build_array_type (integer_type_node, t);
7870 t = build_decl (VAR_DECL, get_identifier ("__fpscr_values"), t);
7871 DECL_ARTIFICIAL (t) = 1;
7872 DECL_IGNORED_P (t) = 1;
7873 DECL_EXTERNAL (t) = 1;
7874 TREE_STATIC (t) = 1;
7875 TREE_USED (t) = 1;
7877 fpscr_values = t;
7880 src = DECL_RTL (fpscr_values);
7881 if (no_new_pseudos)
7883 emit_move_insn (scratch, XEXP (src, 0));
7884 if (index != 0)
7885 emit_insn (gen_addsi3 (scratch, scratch, GEN_INT (index * 4)));
7886 src = adjust_automodify_address (src, PSImode, scratch, index * 4);
7888 else
7889 src = adjust_address (src, PSImode, index * 4);
7891 dst = get_fpscr_rtx ();
7892 emit_move_insn (dst, src);
7895 void
7896 emit_sf_insn (rtx pat)
7898 emit_insn (pat);
7901 void
7902 emit_df_insn (rtx pat)
7904 emit_insn (pat);
7907 void
7908 expand_sf_unop (rtx (*fun) (rtx, rtx, rtx), rtx *operands)
7910 emit_sf_insn ((*fun) (operands[0], operands[1], get_fpscr_rtx ()));
7913 void
7914 expand_sf_binop (rtx (*fun) (rtx, rtx, rtx, rtx), rtx *operands)
7916 emit_sf_insn ((*fun) (operands[0], operands[1], operands[2],
7917 get_fpscr_rtx ()));
7920 void
7921 expand_df_unop (rtx (*fun) (rtx, rtx, rtx), rtx *operands)
7923 emit_df_insn ((*fun) (operands[0], operands[1], get_fpscr_rtx ()));
7926 void
7927 expand_df_binop (rtx (*fun) (rtx, rtx, rtx, rtx), rtx *operands)
7929 emit_df_insn ((*fun) (operands[0], operands[1], operands[2],
7930 get_fpscr_rtx ()));
7933 /* ??? gcc does flow analysis strictly after common subexpression
7934 elimination. As a result, common subexpression elimination fails
7935 when there are some intervening statements setting the same register.
7936 If we did nothing about this, this would hurt the precision switching
7937 for SH4 badly. There is some cse after reload, but it is unable to
7938 undo the extra register pressure from the unused instructions, and
7939 it cannot remove auto-increment loads.
7941 A C code example that shows this flow/cse weakness for (at least) SH
7942 and sparc (as of gcc ss-970706) is this:
7944 double
7945 f(double a)
7947 double d;
7948 d = 0.1;
7949 a += d;
7950 d = 1.1;
7951 d = 0.1;
7952 a *= d;
7953 return a;
7956 So we add another pass before common subexpression elimination, to
7957 remove assignments that are dead due to a following assignment in the
7958 same basic block. */
7960 static void
7961 mark_use (rtx x, rtx *reg_set_block)
7963 enum rtx_code code;
7965 if (! x)
7966 return;
7967 code = GET_CODE (x);
7968 switch (code)
7970 case REG:
7972 int regno = REGNO (x);
7973 int nregs = (regno < FIRST_PSEUDO_REGISTER
7974 ? HARD_REGNO_NREGS (regno, GET_MODE (x))
7975 : 1);
7978 reg_set_block[regno + nregs - 1] = 0;
7980 while (--nregs);
7981 break;
7983 case SET:
7985 rtx dest = SET_DEST (x);
7987 if (GET_CODE (dest) == SUBREG)
7988 dest = SUBREG_REG (dest);
7989 if (GET_CODE (dest) != REG)
7990 mark_use (dest, reg_set_block);
7991 mark_use (SET_SRC (x), reg_set_block);
7992 break;
7994 case CLOBBER:
7995 break;
7996 default:
7998 const char *fmt = GET_RTX_FORMAT (code);
7999 int i, j;
8000 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
8002 if (fmt[i] == 'e')
8003 mark_use (XEXP (x, i), reg_set_block);
8004 else if (fmt[i] == 'E')
8005 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
8006 mark_use (XVECEXP (x, i, j), reg_set_block);
8008 break;
8013 static rtx get_free_reg (HARD_REG_SET);
8015 /* This function returns a register to use to load the address to load
8016 the fpscr from. Currently it always returns r1 or r7, but when we are
8017 able to use pseudo registers after combine, or have a better mechanism
8018 for choosing a register, it should be done here. */
8019 /* REGS_LIVE is the liveness information for the point for which we
8020 need this allocation. In some bare-bones exit blocks, r1 is live at the
8021 start. We can even have all of r0..r3 being live:
8022 __complex__ long long f (double d) { if (d == 0) return 2; else return 3; }
8023 INSN before which new insns are placed with will clobber the register
8024 we return. If a basic block consists only of setting the return value
8025 register to a pseudo and using that register, the return value is not
8026 live before or after this block, yet we we'll insert our insns right in
8027 the middle. */
8029 static rtx
8030 get_free_reg (HARD_REG_SET regs_live)
8032 if (! TEST_HARD_REG_BIT (regs_live, 1))
8033 return gen_rtx_REG (Pmode, 1);
8035 /* Hard reg 1 is live; since this is a SMALL_REGISTER_CLASSES target,
8036 there shouldn't be anything but a jump before the function end. */
8037 gcc_assert (!TEST_HARD_REG_BIT (regs_live, 7));
8038 return gen_rtx_REG (Pmode, 7);
8041 /* This function will set the fpscr from memory.
8042 MODE is the mode we are setting it to. */
8043 void
8044 fpscr_set_from_mem (int mode, HARD_REG_SET regs_live)
8046 enum attr_fp_mode fp_mode = mode;
8047 enum attr_fp_mode norm_mode = ACTUAL_NORMAL_MODE (FP_MODE);
8048 rtx addr_reg = get_free_reg (regs_live);
8050 emit_fpu_switch (addr_reg, fp_mode == norm_mode);
8053 /* Is the given character a logical line separator for the assembler? */
8054 #ifndef IS_ASM_LOGICAL_LINE_SEPARATOR
8055 #define IS_ASM_LOGICAL_LINE_SEPARATOR(C) ((C) == ';')
8056 #endif
8059 sh_insn_length_adjustment (rtx insn)
8061 /* Instructions with unfilled delay slots take up an extra two bytes for
8062 the nop in the delay slot. */
8063 if (((GET_CODE (insn) == INSN
8064 && GET_CODE (PATTERN (insn)) != USE
8065 && GET_CODE (PATTERN (insn)) != CLOBBER)
8066 || GET_CODE (insn) == CALL_INSN
8067 || (GET_CODE (insn) == JUMP_INSN
8068 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC
8069 && GET_CODE (PATTERN (insn)) != ADDR_VEC))
8070 && GET_CODE (PATTERN (NEXT_INSN (PREV_INSN (insn)))) != SEQUENCE
8071 && get_attr_needs_delay_slot (insn) == NEEDS_DELAY_SLOT_YES)
8072 return 2;
8074 /* SH2e has a bug that prevents the use of annulled branches, so if
8075 the delay slot is not filled, we'll have to put a NOP in it. */
8076 if (sh_cpu == CPU_SH2E
8077 && GET_CODE (insn) == JUMP_INSN
8078 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC
8079 && GET_CODE (PATTERN (insn)) != ADDR_VEC
8080 && get_attr_type (insn) == TYPE_CBRANCH
8081 && GET_CODE (PATTERN (NEXT_INSN (PREV_INSN (insn)))) != SEQUENCE)
8082 return 2;
8084 /* sh-dsp parallel processing insn take four bytes instead of two. */
8086 if (GET_CODE (insn) == INSN)
8088 int sum = 0;
8089 rtx body = PATTERN (insn);
8090 const char *template;
8091 char c;
8092 int maybe_label = 1;
8094 if (GET_CODE (body) == ASM_INPUT)
8095 template = XSTR (body, 0);
8096 else if (asm_noperands (body) >= 0)
8097 template
8098 = decode_asm_operands (body, NULL, NULL, NULL, NULL);
8099 else
8100 return 0;
8103 int ppi_adjust = 0;
8106 c = *template++;
8107 while (c == ' ' || c == '\t');
8108 /* all sh-dsp parallel-processing insns start with p.
8109 The only non-ppi sh insn starting with p is pref.
8110 The only ppi starting with pr is prnd. */
8111 if ((c == 'p' || c == 'P') && strncasecmp ("re", template, 2))
8112 ppi_adjust = 2;
8113 /* The repeat pseudo-insn expands two three insns, a total of
8114 six bytes in size. */
8115 else if ((c == 'r' || c == 'R')
8116 && ! strncasecmp ("epeat", template, 5))
8117 ppi_adjust = 4;
8118 while (c && c != '\n' && ! IS_ASM_LOGICAL_LINE_SEPARATOR (c))
8120 /* If this is a label, it is obviously not a ppi insn. */
8121 if (c == ':' && maybe_label)
8123 ppi_adjust = 0;
8124 break;
8126 else if (c == '\'' || c == '"')
8127 maybe_label = 0;
8128 c = *template++;
8130 sum += ppi_adjust;
8131 maybe_label = c != ':';
8133 while (c);
8134 return sum;
8136 return 0;
8139 /* Return TRUE if X references a SYMBOL_REF or LABEL_REF whose symbol
8140 isn't protected by a PIC unspec. */
8142 nonpic_symbol_mentioned_p (rtx x)
8144 register const char *fmt;
8145 register int i;
8147 if (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF
8148 || GET_CODE (x) == PC)
8149 return 1;
8151 /* We don't want to look into the possible MEM location of a
8152 CONST_DOUBLE, since we're not going to use it, in general. */
8153 if (GET_CODE (x) == CONST_DOUBLE)
8154 return 0;
8156 if (GET_CODE (x) == UNSPEC
8157 && (XINT (x, 1) == UNSPEC_PIC
8158 || XINT (x, 1) == UNSPEC_GOT
8159 || XINT (x, 1) == UNSPEC_GOTOFF
8160 || XINT (x, 1) == UNSPEC_GOTPLT
8161 || XINT (x, 1) == UNSPEC_GOTTPOFF
8162 || XINT (x, 1) == UNSPEC_DTPOFF
8163 || XINT (x, 1) == UNSPEC_PLT))
8164 return 0;
8166 fmt = GET_RTX_FORMAT (GET_CODE (x));
8167 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
8169 if (fmt[i] == 'E')
8171 register int j;
8173 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
8174 if (nonpic_symbol_mentioned_p (XVECEXP (x, i, j)))
8175 return 1;
8177 else if (fmt[i] == 'e' && nonpic_symbol_mentioned_p (XEXP (x, i)))
8178 return 1;
8181 return 0;
8184 /* Convert a non-PIC address in `orig' to a PIC address using @GOT or
8185 @GOTOFF in `reg'. */
8187 legitimize_pic_address (rtx orig, enum machine_mode mode ATTRIBUTE_UNUSED,
8188 rtx reg)
8190 if (tls_symbolic_operand (orig, Pmode))
8191 return orig;
8193 if (GET_CODE (orig) == LABEL_REF
8194 || (GET_CODE (orig) == SYMBOL_REF && SYMBOL_REF_LOCAL_P (orig)))
8196 if (reg == 0)
8197 reg = gen_reg_rtx (Pmode);
8199 emit_insn (gen_symGOTOFF2reg (reg, orig));
8200 return reg;
8202 else if (GET_CODE (orig) == SYMBOL_REF)
8204 if (reg == 0)
8205 reg = gen_reg_rtx (Pmode);
8207 emit_insn (gen_symGOT2reg (reg, orig));
8208 return reg;
8210 return orig;
8213 /* Mark the use of a constant in the literal table. If the constant
8214 has multiple labels, make it unique. */
8215 static rtx
8216 mark_constant_pool_use (rtx x)
8218 rtx insn, lab, pattern;
8220 if (x == NULL)
8221 return x;
8223 switch (GET_CODE (x))
8225 case LABEL_REF:
8226 x = XEXP (x, 0);
8227 case CODE_LABEL:
8228 break;
8229 default:
8230 return x;
8233 /* Get the first label in the list of labels for the same constant
8234 and delete another labels in the list. */
8235 lab = x;
8236 for (insn = PREV_INSN (x); insn; insn = PREV_INSN (insn))
8238 if (GET_CODE (insn) != CODE_LABEL
8239 || LABEL_REFS (insn) != NEXT_INSN (insn))
8240 break;
8241 lab = insn;
8244 for (insn = LABEL_REFS (lab); insn; insn = LABEL_REFS (insn))
8245 INSN_DELETED_P (insn) = 1;
8247 /* Mark constants in a window. */
8248 for (insn = NEXT_INSN (x); insn; insn = NEXT_INSN (insn))
8250 if (GET_CODE (insn) != INSN)
8251 continue;
8253 pattern = PATTERN (insn);
8254 if (GET_CODE (pattern) != UNSPEC_VOLATILE)
8255 continue;
8257 switch (XINT (pattern, 1))
8259 case UNSPECV_CONST2:
8260 case UNSPECV_CONST4:
8261 case UNSPECV_CONST8:
8262 XVECEXP (pattern, 0, 1) = const1_rtx;
8263 break;
8264 case UNSPECV_WINDOW_END:
8265 if (XVECEXP (pattern, 0, 0) == x)
8266 return lab;
8267 break;
8268 case UNSPECV_CONST_END:
8269 return lab;
8270 default:
8271 break;
8275 return lab;
8278 /* Return true if it's possible to redirect BRANCH1 to the destination
8279 of an unconditional jump BRANCH2. We only want to do this if the
8280 resulting branch will have a short displacement. */
8282 sh_can_redirect_branch (rtx branch1, rtx branch2)
8284 if (flag_expensive_optimizations && simplejump_p (branch2))
8286 rtx dest = XEXP (SET_SRC (single_set (branch2)), 0);
8287 rtx insn;
8288 int distance;
8290 for (distance = 0, insn = NEXT_INSN (branch1);
8291 insn && distance < 256;
8292 insn = PREV_INSN (insn))
8294 if (insn == dest)
8295 return 1;
8296 else
8297 distance += get_attr_length (insn);
8299 for (distance = 0, insn = NEXT_INSN (branch1);
8300 insn && distance < 256;
8301 insn = NEXT_INSN (insn))
8303 if (insn == dest)
8304 return 1;
8305 else
8306 distance += get_attr_length (insn);
8309 return 0;
8312 /* Return nonzero if register old_reg can be renamed to register new_reg. */
8314 sh_hard_regno_rename_ok (unsigned int old_reg ATTRIBUTE_UNUSED,
8315 unsigned int new_reg)
8317 /* Interrupt functions can only use registers that have already been
8318 saved by the prologue, even if they would normally be
8319 call-clobbered. */
8321 if (sh_cfun_interrupt_handler_p () && !regs_ever_live[new_reg])
8322 return 0;
8324 return 1;
8327 /* Function to update the integer COST
8328 based on the relationship between INSN that is dependent on
8329 DEP_INSN through the dependence LINK. The default is to make no
8330 adjustment to COST. This can be used for example to specify to
8331 the scheduler that an output- or anti-dependence does not incur
8332 the same cost as a data-dependence. The return value should be
8333 the new value for COST. */
8334 static int
8335 sh_adjust_cost (rtx insn, rtx link ATTRIBUTE_UNUSED, rtx dep_insn, int cost)
8337 rtx reg, use_pat;
8339 if (TARGET_SHMEDIA)
8341 /* On SHmedia, if the dependence is an anti-dependence or
8342 output-dependence, there is no cost. */
8343 if (REG_NOTE_KIND (link) != 0)
8345 /* However, dependencies between target register loads and
8346 uses of the register in a subsequent block that are separated
8347 by a conditional branch are not modelled - we have to do with
8348 the anti-dependency between the target register load and the
8349 conditional branch that ends the current block. */
8350 if (REG_NOTE_KIND (link) == REG_DEP_ANTI
8351 && GET_CODE (PATTERN (dep_insn)) == SET
8352 && (get_attr_type (dep_insn) == TYPE_PT_MEDIA
8353 || get_attr_type (dep_insn) == TYPE_PTABS_MEDIA)
8354 && get_attr_type (insn) == TYPE_CBRANCH_MEDIA)
8356 int orig_cost = cost;
8357 rtx note = find_reg_note (insn, REG_BR_PROB, 0);
8358 rtx target = ((! note
8359 || INTVAL (XEXP (note, 0)) * 2 < REG_BR_PROB_BASE)
8360 ? insn : JUMP_LABEL (insn));
8361 /* On the likely path, the branch costs 1, on the unlikely path,
8362 it costs 3. */
8363 cost--;
8365 target = next_active_insn (target);
8366 while (target && ! flow_dependent_p (target, dep_insn)
8367 && --cost > 0);
8368 /* If two branches are executed in immediate succession, with the
8369 first branch properly predicted, this causes a stall at the
8370 second branch, hence we won't need the target for the
8371 second branch for two cycles after the launch of the first
8372 branch. */
8373 if (cost > orig_cost - 2)
8374 cost = orig_cost - 2;
8376 else
8377 cost = 0;
8380 else if (get_attr_is_mac_media (insn)
8381 && get_attr_is_mac_media (dep_insn))
8382 cost = 1;
8384 else if (! reload_completed
8385 && GET_CODE (PATTERN (insn)) == SET
8386 && GET_CODE (SET_SRC (PATTERN (insn))) == FLOAT
8387 && GET_CODE (PATTERN (dep_insn)) == SET
8388 && fp_arith_reg_operand (SET_SRC (PATTERN (dep_insn)), VOIDmode)
8389 && cost < 4)
8390 cost = 4;
8391 /* Schedule the ptabs for a casesi_jump_media in preference to stuff
8392 that is needed at the target. */
8393 else if (get_attr_type (insn) == TYPE_JUMP_MEDIA
8394 && ! flow_dependent_p (insn, dep_insn))
8395 cost--;
8397 else if (REG_NOTE_KIND (link) == 0)
8399 enum attr_type dep_type, type;
8401 if (recog_memoized (insn) < 0
8402 || recog_memoized (dep_insn) < 0)
8403 return cost;
8405 dep_type = get_attr_type (dep_insn);
8406 if (dep_type == TYPE_FLOAD || dep_type == TYPE_PCFLOAD)
8407 cost--;
8408 if ((dep_type == TYPE_LOAD_SI || dep_type == TYPE_PCLOAD_SI)
8409 && (type = get_attr_type (insn)) != TYPE_CALL
8410 && type != TYPE_SFUNC)
8411 cost--;
8413 /* The only input for a call that is timing-critical is the
8414 function's address. */
8415 if (GET_CODE(insn) == CALL_INSN)
8417 rtx call = PATTERN (insn);
8419 if (GET_CODE (call) == PARALLEL)
8420 call = XVECEXP (call, 0 ,0);
8421 if (GET_CODE (call) == SET)
8422 call = SET_SRC (call);
8423 if (GET_CODE (call) == CALL && GET_CODE (XEXP (call, 0)) == MEM
8424 /* sibcalli_thunk uses a symbol_ref in an unspec. */
8425 && (GET_CODE (XEXP (XEXP (call, 0), 0)) == UNSPEC
8426 || ! reg_set_p (XEXP (XEXP (call, 0), 0), dep_insn)))
8427 cost = 0;
8429 /* Likewise, the most timing critical input for an sfuncs call
8430 is the function address. However, sfuncs typically start
8431 using their arguments pretty quickly.
8432 Assume a four cycle delay before they are needed. */
8433 /* All sfunc calls are parallels with at least four components.
8434 Exploit this to avoid unnecessary calls to sfunc_uses_reg. */
8435 else if (GET_CODE (PATTERN (insn)) == PARALLEL
8436 && XVECLEN (PATTERN (insn), 0) >= 4
8437 && (reg = sfunc_uses_reg (insn)))
8439 if (! reg_set_p (reg, dep_insn))
8440 cost -= 4;
8442 /* When the preceding instruction loads the shift amount of
8443 the following SHAD/SHLD, the latency of the load is increased
8444 by 1 cycle. */
8445 else if (TARGET_SH4
8446 && get_attr_type (insn) == TYPE_DYN_SHIFT
8447 && get_attr_any_int_load (dep_insn) == ANY_INT_LOAD_YES
8448 && reg_overlap_mentioned_p (SET_DEST (PATTERN (dep_insn)),
8449 XEXP (SET_SRC (single_set (insn)),
8450 1)))
8451 cost++;
8452 /* When an LS group instruction with a latency of less than
8453 3 cycles is followed by a double-precision floating-point
8454 instruction, FIPR, or FTRV, the latency of the first
8455 instruction is increased to 3 cycles. */
8456 else if (cost < 3
8457 && get_attr_insn_class (dep_insn) == INSN_CLASS_LS_GROUP
8458 && get_attr_dfp_comp (insn) == DFP_COMP_YES)
8459 cost = 3;
8460 /* The lsw register of a double-precision computation is ready one
8461 cycle earlier. */
8462 else if (reload_completed
8463 && get_attr_dfp_comp (dep_insn) == DFP_COMP_YES
8464 && (use_pat = single_set (insn))
8465 && ! regno_use_in (REGNO (SET_DEST (single_set (dep_insn))),
8466 SET_SRC (use_pat)))
8467 cost -= 1;
8469 if (get_attr_any_fp_comp (dep_insn) == ANY_FP_COMP_YES
8470 && get_attr_late_fp_use (insn) == LATE_FP_USE_YES)
8471 cost -= 1;
8473 /* An anti-dependence penalty of two applies if the first insn is a double
8474 precision fadd / fsub / fmul. */
8475 else if (REG_NOTE_KIND (link) == REG_DEP_ANTI
8476 && recog_memoized (dep_insn) >= 0
8477 && get_attr_type (dep_insn) == TYPE_DFP_ARITH
8478 /* A lot of alleged anti-flow dependences are fake,
8479 so check this one is real. */
8480 && flow_dependent_p (dep_insn, insn))
8481 cost = 2;
8484 return cost;
8487 /* Check if INSN is flow-dependent on DEP_INSN. Can also be used to check
8488 if DEP_INSN is anti-flow dependent on INSN. */
8489 static int
8490 flow_dependent_p (rtx insn, rtx dep_insn)
8492 rtx tmp = PATTERN (insn);
8494 note_stores (PATTERN (dep_insn), flow_dependent_p_1, &tmp);
8495 return tmp == NULL_RTX;
8498 /* A helper function for flow_dependent_p called through note_stores. */
8499 static void
8500 flow_dependent_p_1 (rtx x, rtx pat ATTRIBUTE_UNUSED, void *data)
8502 rtx * pinsn = (rtx *) data;
8504 if (*pinsn && reg_referenced_p (x, *pinsn))
8505 *pinsn = NULL_RTX;
8508 /* For use by sh_allocate_initial_value. Note that sh.md contains some
8509 'special function' patterns (type sfunc) that clobber pr, but that
8510 do not look like function calls to leaf_function_p. Hence we must
8511 do this extra check. */
8512 static int
8513 sh_pr_n_sets (void)
8515 return REG_N_SETS (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG);
8518 /* Return where to allocate pseudo for a given hard register initial
8519 value. */
8520 static rtx
8521 sh_allocate_initial_value (rtx hard_reg)
8523 rtx x;
8525 if (REGNO (hard_reg) == (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG))
8527 if (current_function_is_leaf
8528 && ! sh_pr_n_sets ()
8529 && ! (TARGET_SHCOMPACT
8530 && ((current_function_args_info.call_cookie
8531 & ~ CALL_COOKIE_RET_TRAMP (1))
8532 || current_function_has_nonlocal_label)))
8533 x = hard_reg;
8534 else
8535 x = gen_frame_mem (Pmode, return_address_pointer_rtx);
8537 else
8538 x = NULL_RTX;
8540 return x;
8543 /* This function returns "2" to indicate dual issue for the SH4
8544 processor. To be used by the DFA pipeline description. */
8545 static int
8546 sh_issue_rate (void)
8548 if (TARGET_SUPERSCALAR)
8549 return 2;
8550 else
8551 return 1;
8554 /* Functions for ready queue reordering for sched1. */
8556 /* Get weight for mode for a set x. */
8557 static short
8558 find_set_regmode_weight (rtx x, enum machine_mode mode)
8560 if (GET_CODE (x) == CLOBBER && register_operand (SET_DEST (x), mode))
8561 return 1;
8562 if (GET_CODE (x) == SET && register_operand (SET_DEST (x), mode))
8564 if (GET_CODE (SET_DEST (x)) == REG)
8566 if (!reg_mentioned_p (SET_DEST (x), SET_SRC (x)))
8567 return 1;
8568 else
8569 return 0;
8571 return 1;
8573 return 0;
8576 /* Get regmode weight for insn. */
8577 static short
8578 find_insn_regmode_weight (rtx insn, enum machine_mode mode)
8580 short reg_weight = 0;
8581 rtx x;
8583 /* Increment weight for each register born here. */
8584 x = PATTERN (insn);
8585 reg_weight += find_set_regmode_weight (x, mode);
8586 if (GET_CODE (x) == PARALLEL)
8588 int j;
8589 for (j = XVECLEN (x, 0) - 1; j >= 0; j--)
8591 x = XVECEXP (PATTERN (insn), 0, j);
8592 reg_weight += find_set_regmode_weight (x, mode);
8595 /* Decrement weight for each register that dies here. */
8596 for (x = REG_NOTES (insn); x; x = XEXP (x, 1))
8598 if (REG_NOTE_KIND (x) == REG_DEAD || REG_NOTE_KIND (x) == REG_UNUSED)
8600 rtx note = XEXP (x, 0);
8601 if (GET_CODE (note) == REG && GET_MODE (note) == mode)
8602 reg_weight--;
8605 return reg_weight;
8608 /* Calculate regmode weights for all insns of a basic block. */
8609 static void
8610 find_regmode_weight (int b, enum machine_mode mode)
8612 rtx insn, next_tail, head, tail;
8614 get_block_head_tail (b, &head, &tail);
8615 next_tail = NEXT_INSN (tail);
8617 for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
8619 /* Handle register life information. */
8620 if (!INSN_P (insn))
8621 continue;
8623 if (mode == SFmode)
8624 INSN_REGMODE_WEIGHT (insn, mode) =
8625 find_insn_regmode_weight (insn, mode) + 2 * find_insn_regmode_weight (insn, DFmode);
8626 else if (mode == SImode)
8627 INSN_REGMODE_WEIGHT (insn, mode) =
8628 find_insn_regmode_weight (insn, mode) + 2 * find_insn_regmode_weight (insn, DImode);
8632 /* Comparison function for ready queue sorting. */
8633 static int
8634 rank_for_reorder (const void *x, const void *y)
8636 rtx tmp = *(const rtx *) y;
8637 rtx tmp2 = *(const rtx *) x;
8639 /* The insn in a schedule group should be issued the first. */
8640 if (SCHED_GROUP_P (tmp) != SCHED_GROUP_P (tmp2))
8641 return SCHED_GROUP_P (tmp2) ? 1 : -1;
8643 /* If insns are equally good, sort by INSN_LUID (original insn order), This
8644 minimizes instruction movement, thus minimizing sched's effect on
8645 register pressure. */
8646 return INSN_LUID (tmp) - INSN_LUID (tmp2);
8649 /* Resort the array A in which only element at index N may be out of order. */
8650 static void
8651 swap_reorder (rtx *a, int n)
8653 rtx insn = a[n - 1];
8654 int i = n - 2;
8656 while (i >= 0 && rank_for_reorder (a + i, &insn) >= 0)
8658 a[i + 1] = a[i];
8659 i -= 1;
8661 a[i + 1] = insn;
8664 #define SCHED_REORDER(READY, N_READY) \
8665 do \
8667 if ((N_READY) == 2) \
8668 swap_reorder (READY, N_READY); \
8669 else if ((N_READY) > 2) \
8670 qsort (READY, N_READY, sizeof (rtx), rank_for_reorder); \
8672 while (0)
8674 /* Sort the ready list READY by ascending priority, using the SCHED_REORDER
8675 macro. */
8676 static void
8677 ready_reorder (rtx *ready, int nready)
8679 SCHED_REORDER (ready, nready);
8682 /* Calculate regmode weights for all insns of all basic block. */
8683 static void
8684 sh_md_init_global (FILE *dump ATTRIBUTE_UNUSED,
8685 int verbose ATTRIBUTE_UNUSED,
8686 int old_max_uid)
8688 basic_block b;
8690 regmode_weight[0] = (short *) xcalloc (old_max_uid, sizeof (short));
8691 regmode_weight[1] = (short *) xcalloc (old_max_uid, sizeof (short));
8693 FOR_EACH_BB_REVERSE (b)
8695 find_regmode_weight (b->index, SImode);
8696 find_regmode_weight (b->index, SFmode);
8699 CURR_REGMODE_PRESSURE (SImode) = 0;
8700 CURR_REGMODE_PRESSURE (SFmode) = 0;
8704 /* Cleanup. */
8705 static void
8706 sh_md_finish_global (FILE *dump ATTRIBUTE_UNUSED,
8707 int verbose ATTRIBUTE_UNUSED)
8709 if (regmode_weight[0])
8711 free (regmode_weight[0]);
8712 regmode_weight[0] = NULL;
8714 if (regmode_weight[1])
8716 free (regmode_weight[1]);
8717 regmode_weight[1] = NULL;
8721 /* Cache the can_issue_more so that we can return it from reorder2. Also,
8722 keep count of register pressures on SImode and SFmode. */
8723 static int
8724 sh_variable_issue (FILE *dump ATTRIBUTE_UNUSED,
8725 int sched_verbose ATTRIBUTE_UNUSED,
8726 rtx insn,
8727 int can_issue_more)
8729 if (GET_CODE (PATTERN (insn)) != USE
8730 && GET_CODE (PATTERN (insn)) != CLOBBER)
8731 cached_can_issue_more = can_issue_more - 1;
8732 else
8733 cached_can_issue_more = can_issue_more;
8735 if (reload_completed)
8736 return cached_can_issue_more;
8738 CURR_REGMODE_PRESSURE (SImode) += INSN_REGMODE_WEIGHT (insn, SImode);
8739 CURR_REGMODE_PRESSURE (SFmode) += INSN_REGMODE_WEIGHT (insn, SFmode);
8741 return cached_can_issue_more;
8744 static void
8745 sh_md_init (FILE *dump ATTRIBUTE_UNUSED,
8746 int verbose ATTRIBUTE_UNUSED,
8747 int veclen ATTRIBUTE_UNUSED)
8749 CURR_REGMODE_PRESSURE (SImode) = 0;
8750 CURR_REGMODE_PRESSURE (SFmode) = 0;
8753 /* Some magic numbers. */
8754 /* Pressure on register r0 can lead to spill failures. so avoid sched1 for
8755 functions that already have high pressure on r0. */
8756 #define R0_MAX_LIFE_REGIONS 2
8757 #define R0_MAX_LIVE_LENGTH 12
8758 /* Register Pressure thresholds for SImode and SFmode registers. */
8759 #define SIMODE_MAX_WEIGHT 5
8760 #define SFMODE_MAX_WEIGHT 10
8762 /* Return true if the pressure is high for MODE. */
8763 static short
8764 high_pressure (enum machine_mode mode)
8766 /* Pressure on register r0 can lead to spill failures. so avoid sched1 for
8767 functions that already have high pressure on r0. */
8768 if ((REG_N_SETS (0) - REG_N_DEATHS (0)) >= R0_MAX_LIFE_REGIONS
8769 && REG_LIVE_LENGTH (0) >= R0_MAX_LIVE_LENGTH)
8770 return 1;
8772 if (mode == SFmode)
8773 return (CURR_REGMODE_PRESSURE (SFmode) > SFMODE_MAX_WEIGHT);
8774 else
8775 return (CURR_REGMODE_PRESSURE (SImode) > SIMODE_MAX_WEIGHT);
8778 /* Reorder ready queue if register pressure is high. */
8779 static int
8780 sh_reorder (FILE *dump ATTRIBUTE_UNUSED,
8781 int sched_verbose ATTRIBUTE_UNUSED,
8782 rtx *ready,
8783 int *n_readyp,
8784 int clock_var ATTRIBUTE_UNUSED)
8786 if (reload_completed)
8787 return sh_issue_rate ();
8789 if (high_pressure (SFmode) || high_pressure (SImode))
8791 ready_reorder (ready, *n_readyp);
8794 return sh_issue_rate ();
8797 /* Skip cycles if the current register pressure is high. */
8798 static int
8799 sh_reorder2 (FILE *dump ATTRIBUTE_UNUSED,
8800 int sched_verbose ATTRIBUTE_UNUSED,
8801 rtx *ready ATTRIBUTE_UNUSED,
8802 int *n_readyp ATTRIBUTE_UNUSED,
8803 int clock_var ATTRIBUTE_UNUSED)
8805 if (reload_completed)
8806 return cached_can_issue_more;
8808 if (high_pressure(SFmode) || high_pressure (SImode))
8809 skip_cycles = 1;
8811 return cached_can_issue_more;
8814 /* Skip cycles without sorting the ready queue. This will move insn from
8815 Q->R. If this is the last cycle we are skipping; allow sorting of ready
8816 queue by sh_reorder. */
8818 /* Generally, skipping these many cycles are sufficient for all insns to move
8819 from Q -> R. */
8820 #define MAX_SKIPS 8
8822 static int
8823 sh_dfa_new_cycle (FILE *sched_dump ATTRIBUTE_UNUSED,
8824 int sched_verbose ATTRIBUTE_UNUSED,
8825 rtx insn ATTRIBUTE_UNUSED,
8826 int last_clock_var,
8827 int clock_var,
8828 int *sort_p)
8830 if (reload_completed)
8831 return 0;
8833 if (skip_cycles)
8835 if ((clock_var - last_clock_var) < MAX_SKIPS)
8837 *sort_p = 0;
8838 return 1;
8840 /* If this is the last cycle we are skipping, allow reordering of R. */
8841 if ((clock_var - last_clock_var) == MAX_SKIPS)
8843 *sort_p = 1;
8844 return 1;
8848 skip_cycles = 0;
8850 return 0;
8853 /* SHmedia requires registers for branches, so we can't generate new
8854 branches past reload. */
8855 static bool
8856 sh_cannot_modify_jumps_p (void)
8858 return (TARGET_SHMEDIA && (reload_in_progress || reload_completed));
8861 static int
8862 sh_target_reg_class (void)
8864 return TARGET_SHMEDIA ? TARGET_REGS : NO_REGS;
8867 static bool
8868 sh_optimize_target_register_callee_saved (bool after_prologue_epilogue_gen)
8870 HARD_REG_SET dummy;
8871 rtx insn;
8873 if (! shmedia_space_reserved_for_target_registers)
8874 return 0;
8875 if (after_prologue_epilogue_gen && ! TARGET_SAVE_ALL_TARGET_REGS)
8876 return 0;
8877 if (calc_live_regs (&dummy) >= 6 * 8)
8878 return 1;
8879 /* This is a borderline case. See if we got a nested loop, or a loop
8880 with a call, or with more than 4 labels inside. */
8881 for (insn = get_insns(); insn; insn = NEXT_INSN (insn))
8883 if (GET_CODE (insn) == NOTE
8884 && NOTE_LINE_NUMBER (insn) == NOTE_INSN_LOOP_BEG)
8886 int labels = 0;
8890 insn = NEXT_INSN (insn);
8891 if ((GET_CODE (insn) == NOTE
8892 && NOTE_LINE_NUMBER (insn) == NOTE_INSN_LOOP_BEG)
8893 || GET_CODE (insn) == CALL_INSN
8894 || (GET_CODE (insn) == CODE_LABEL && ++labels > 4))
8895 return 1;
8897 while (GET_CODE (insn) != NOTE
8898 || NOTE_LINE_NUMBER (insn) != NOTE_INSN_LOOP_END);
8901 return 0;
8904 static bool
8905 sh_ms_bitfield_layout_p (tree record_type ATTRIBUTE_UNUSED)
8907 return (TARGET_SH5 || TARGET_HITACHI || sh_attr_renesas_p (record_type));
8911 On the SH1..SH4, the trampoline looks like
8912 2 0002 D202 mov.l l2,r2
8913 1 0000 D301 mov.l l1,r3
8914 3 0004 422B jmp @r2
8915 4 0006 0009 nop
8916 5 0008 00000000 l1: .long area
8917 6 000c 00000000 l2: .long function
8919 SH5 (compact) uses r1 instead of r3 for the static chain. */
8922 /* Emit RTL insns to initialize the variable parts of a trampoline.
8923 FNADDR is an RTX for the address of the function's pure code.
8924 CXT is an RTX for the static chain value for the function. */
8926 void
8927 sh_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
8929 rtx tramp_mem = gen_frame_mem (BLKmode, tramp);
8931 if (TARGET_SHMEDIA64)
8933 rtx tramp_templ;
8934 int fixed_len;
8936 rtx movi1 = GEN_INT (0xcc000010);
8937 rtx shori1 = GEN_INT (0xc8000010);
8938 rtx src, dst;
8940 /* The following trampoline works within a +- 128 KB range for cxt:
8941 ptb/u cxt,tr1; movi fnaddr >> 48,r0; shori fnaddr >> 32,r0;
8942 shori fnaddr >> 16,r0; shori fnaddr,r0; ptabs/l r0,tr0
8943 gettr tr1,r1; blink tr0,r63 */
8944 /* Address rounding makes it hard to compute the exact bounds of the
8945 offset for this trampoline, but we have a rather generous offset
8946 range, so frame_offset should do fine as an upper bound. */
8947 if (cxt == virtual_stack_vars_rtx && frame_offset < 0x20000)
8949 /* ??? could optimize this trampoline initialization
8950 by writing DImode words with two insns each. */
8951 rtx mask = force_reg (DImode, GEN_INT (0x3fffc00));
8952 rtx insn = gen_rtx_MINUS (DImode, cxt, tramp);
8953 insn = gen_rtx_ASHIFT (DImode, insn, GEN_INT (10-2));
8954 insn = gen_rtx_AND (DImode, insn, mask);
8955 /* Or in ptb/u .,tr1 pattern */
8956 insn = gen_rtx_IOR (DImode, insn, gen_int_mode (0xec000010, SImode));
8957 insn = force_operand (insn, NULL_RTX);
8958 insn = gen_lowpart (SImode, insn);
8959 emit_move_insn (change_address (tramp_mem, SImode, NULL_RTX), insn);
8960 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (38));
8961 insn = gen_rtx_AND (DImode, insn, mask);
8962 insn = force_operand (gen_rtx_IOR (DImode, movi1, insn), NULL_RTX);
8963 insn = gen_lowpart (SImode, insn);
8964 emit_move_insn (adjust_address (tramp_mem, SImode, 4), insn);
8965 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (22));
8966 insn = gen_rtx_AND (DImode, insn, mask);
8967 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
8968 insn = gen_lowpart (SImode, insn);
8969 emit_move_insn (adjust_address (tramp_mem, SImode, 8), insn);
8970 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (6));
8971 insn = gen_rtx_AND (DImode, insn, mask);
8972 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
8973 insn = gen_lowpart (SImode, insn);
8974 emit_move_insn (adjust_address (tramp_mem, SImode, 12), insn);
8975 insn = gen_rtx_ASHIFT (DImode, fnaddr, GEN_INT (10));
8976 insn = gen_rtx_AND (DImode, insn, mask);
8977 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
8978 insn = gen_lowpart (SImode, insn);
8979 emit_move_insn (adjust_address (tramp_mem, SImode, 16), insn);
8980 emit_move_insn (adjust_address (tramp_mem, SImode, 20),
8981 GEN_INT (0x6bf10600));
8982 emit_move_insn (adjust_address (tramp_mem, SImode, 24),
8983 GEN_INT (0x4415fc10));
8984 emit_move_insn (adjust_address (tramp_mem, SImode, 28),
8985 GEN_INT (0x4401fff0));
8986 emit_insn (gen_ic_invalidate_line (tramp));
8987 return;
8989 tramp_templ = gen_rtx_SYMBOL_REF (Pmode,"__GCC_nested_trampoline");
8990 fixed_len = TRAMPOLINE_SIZE - 2 * GET_MODE_SIZE (Pmode);
8992 tramp_templ = gen_datalabel_ref (tramp_templ);
8993 dst = tramp_mem;
8994 src = gen_const_mem (BLKmode, tramp_templ);
8995 set_mem_align (dst, 256);
8996 set_mem_align (src, 64);
8997 emit_block_move (dst, src, GEN_INT (fixed_len), BLOCK_OP_NORMAL);
8999 emit_move_insn (adjust_address (tramp_mem, Pmode, fixed_len), fnaddr);
9000 emit_move_insn (adjust_address (tramp_mem, Pmode,
9001 fixed_len + GET_MODE_SIZE (Pmode)),
9002 cxt);
9003 emit_insn (gen_ic_invalidate_line (tramp));
9004 return;
9006 else if (TARGET_SHMEDIA)
9008 /* movi fnaddr >> 16,r1; shori fnaddr,r1; ptabs/l r1,tr0
9009 movi cxt >> 16,r1; shori cxt,r1; blink tr0,r63 */
9010 rtx quad0 = gen_reg_rtx (DImode), cxtload = gen_reg_rtx (DImode);
9011 rtx quad1 = gen_reg_rtx (DImode), quad2 = gen_reg_rtx (DImode);
9012 /* movi 0,r1: 0xcc000010 shori 0,r1: c8000010 concatenated,
9013 rotated 10 right, and higher 16 bit of every 32 selected. */
9014 rtx movishori
9015 = force_reg (V2HImode, (simplify_gen_subreg
9016 (V2HImode, GEN_INT (0x4330432), SImode, 0)));
9017 rtx ptabs = force_reg (DImode, GEN_INT (0x6bf10600));
9018 rtx blink = force_reg (DImode, GEN_INT (0x4401fff0));
9020 tramp = force_reg (Pmode, tramp);
9021 fnaddr = force_reg (SImode, fnaddr);
9022 cxt = force_reg (SImode, cxt);
9023 emit_insn (gen_mshflo_w_x (gen_rtx_SUBREG (V4HImode, quad0, 0),
9024 gen_rtx_SUBREG (V2HImode, fnaddr, 0),
9025 movishori));
9026 emit_insn (gen_rotrdi3_mextr (quad0, quad0,
9027 GEN_INT (TARGET_LITTLE_ENDIAN ? 24 : 56)));
9028 emit_insn (gen_ashldi3_media (quad0, quad0, const2_rtx));
9029 emit_move_insn (change_address (tramp_mem, DImode, NULL_RTX), quad0);
9030 emit_insn (gen_mshflo_w_x (gen_rtx_SUBREG (V4HImode, cxtload, 0),
9031 gen_rtx_SUBREG (V2HImode, cxt, 0),
9032 movishori));
9033 emit_insn (gen_rotrdi3_mextr (cxtload, cxtload,
9034 GEN_INT (TARGET_LITTLE_ENDIAN ? 24 : 56)));
9035 emit_insn (gen_ashldi3_media (cxtload, cxtload, const2_rtx));
9036 if (TARGET_LITTLE_ENDIAN)
9038 emit_insn (gen_mshflo_l_di (quad1, ptabs, cxtload));
9039 emit_insn (gen_mextr4 (quad2, cxtload, blink));
9041 else
9043 emit_insn (gen_mextr4 (quad1, cxtload, ptabs));
9044 emit_insn (gen_mshflo_l_di (quad2, blink, cxtload));
9046 emit_move_insn (adjust_address (tramp_mem, DImode, 8), quad1);
9047 emit_move_insn (adjust_address (tramp_mem, DImode, 16), quad2);
9048 emit_insn (gen_ic_invalidate_line (tramp));
9049 return;
9051 else if (TARGET_SHCOMPACT)
9053 emit_insn (gen_initialize_trampoline (tramp, cxt, fnaddr));
9054 return;
9056 emit_move_insn (change_address (tramp_mem, SImode, NULL_RTX),
9057 gen_int_mode (TARGET_LITTLE_ENDIAN ? 0xd301d202 : 0xd202d301,
9058 SImode));
9059 emit_move_insn (adjust_address (tramp_mem, SImode, 4),
9060 gen_int_mode (TARGET_LITTLE_ENDIAN ? 0x0009422b : 0x422b0009,
9061 SImode));
9062 emit_move_insn (adjust_address (tramp_mem, SImode, 8), cxt);
9063 emit_move_insn (adjust_address (tramp_mem, SImode, 12), fnaddr);
9064 if (TARGET_HARVARD)
9066 if (TARGET_USERMODE)
9067 emit_library_call (function_symbol (NULL, "__ic_invalidate",
9068 FUNCTION_ORDINARY),
9069 0, VOIDmode, 1, tramp, SImode);
9070 else
9071 emit_insn (gen_ic_invalidate_line (tramp));
9075 /* FIXME: This is overly conservative. A SHcompact function that
9076 receives arguments ``by reference'' will have them stored in its
9077 own stack frame, so it must not pass pointers or references to
9078 these arguments to other functions by means of sibling calls. */
9079 /* If PIC, we cannot make sibling calls to global functions
9080 because the PLT requires r12 to be live. */
9081 static bool
9082 sh_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
9084 return (1
9085 && (! TARGET_SHCOMPACT
9086 || current_function_args_info.stack_regs == 0)
9087 && ! sh_cfun_interrupt_handler_p ()
9088 && (! flag_pic
9089 || (decl && ! TREE_PUBLIC (decl))
9090 || (decl && DECL_VISIBILITY (decl) != VISIBILITY_DEFAULT)));
9093 /* Machine specific built-in functions. */
9095 struct builtin_description
9097 const enum insn_code icode;
9098 const char *const name;
9099 int signature;
9102 /* describe number and signedness of arguments; arg[0] == result
9103 (1: unsigned, 2: signed, 4: don't care, 8: pointer 0: no argument */
9104 /* 9: 64 bit pointer, 10: 32 bit pointer */
9105 static const char signature_args[][4] =
9107 #define SH_BLTIN_V2SI2 0
9108 { 4, 4 },
9109 #define SH_BLTIN_V4HI2 1
9110 { 4, 4 },
9111 #define SH_BLTIN_V2SI3 2
9112 { 4, 4, 4 },
9113 #define SH_BLTIN_V4HI3 3
9114 { 4, 4, 4 },
9115 #define SH_BLTIN_V8QI3 4
9116 { 4, 4, 4 },
9117 #define SH_BLTIN_MAC_HISI 5
9118 { 1, 4, 4, 1 },
9119 #define SH_BLTIN_SH_HI 6
9120 { 4, 4, 1 },
9121 #define SH_BLTIN_SH_SI 7
9122 { 4, 4, 1 },
9123 #define SH_BLTIN_V4HI2V2SI 8
9124 { 4, 4, 4 },
9125 #define SH_BLTIN_V4HI2V8QI 9
9126 { 4, 4, 4 },
9127 #define SH_BLTIN_SISF 10
9128 { 4, 2 },
9129 #define SH_BLTIN_LDUA_L 11
9130 { 2, 10 },
9131 #define SH_BLTIN_LDUA_Q 12
9132 { 1, 10 },
9133 #define SH_BLTIN_STUA_L 13
9134 { 0, 10, 2 },
9135 #define SH_BLTIN_STUA_Q 14
9136 { 0, 10, 1 },
9137 #define SH_BLTIN_LDUA_L64 15
9138 { 2, 9 },
9139 #define SH_BLTIN_LDUA_Q64 16
9140 { 1, 9 },
9141 #define SH_BLTIN_STUA_L64 17
9142 { 0, 9, 2 },
9143 #define SH_BLTIN_STUA_Q64 18
9144 { 0, 9, 1 },
9145 #define SH_BLTIN_NUM_SHARED_SIGNATURES 19
9146 #define SH_BLTIN_2 19
9147 #define SH_BLTIN_SU 19
9148 { 1, 2 },
9149 #define SH_BLTIN_3 20
9150 #define SH_BLTIN_SUS 20
9151 { 2, 2, 1 },
9152 #define SH_BLTIN_PSSV 21
9153 { 0, 8, 2, 2 },
9154 #define SH_BLTIN_XXUU 22
9155 #define SH_BLTIN_UUUU 22
9156 { 1, 1, 1, 1 },
9157 #define SH_BLTIN_PV 23
9158 { 0, 8 },
9160 /* mcmv: operands considered unsigned. */
9161 /* mmulsum_wq, msad_ubq: result considered unsigned long long. */
9162 /* mperm: control value considered unsigned int. */
9163 /* mshalds, mshard, mshards, mshlld, mshlrd: shift count is unsigned int. */
9164 /* mshards_q: returns signed short. */
9165 /* nsb: takes long long arg, returns unsigned char. */
9166 static const struct builtin_description bdesc[] =
9168 { CODE_FOR_absv2si2, "__builtin_absv2si2", SH_BLTIN_V2SI2 },
9169 { CODE_FOR_absv4hi2, "__builtin_absv4hi2", SH_BLTIN_V4HI2 },
9170 { CODE_FOR_addv2si3, "__builtin_addv2si3", SH_BLTIN_V2SI3 },
9171 { CODE_FOR_addv4hi3, "__builtin_addv4hi3", SH_BLTIN_V4HI3 },
9172 { CODE_FOR_ssaddv2si3,"__builtin_ssaddv2si3", SH_BLTIN_V2SI3 },
9173 { CODE_FOR_usaddv8qi3,"__builtin_usaddv8qi3", SH_BLTIN_V8QI3 },
9174 { CODE_FOR_ssaddv4hi3,"__builtin_ssaddv4hi3", SH_BLTIN_V4HI3 },
9175 { CODE_FOR_alloco_i, "__builtin_sh_media_ALLOCO", SH_BLTIN_PV },
9176 { CODE_FOR_negcmpeqv8qi,"__builtin_sh_media_MCMPEQ_B", SH_BLTIN_V8QI3 },
9177 { CODE_FOR_negcmpeqv2si,"__builtin_sh_media_MCMPEQ_L", SH_BLTIN_V2SI3 },
9178 { CODE_FOR_negcmpeqv4hi,"__builtin_sh_media_MCMPEQ_W", SH_BLTIN_V4HI3 },
9179 { CODE_FOR_negcmpgtuv8qi,"__builtin_sh_media_MCMPGT_UB", SH_BLTIN_V8QI3 },
9180 { CODE_FOR_negcmpgtv2si,"__builtin_sh_media_MCMPGT_L", SH_BLTIN_V2SI3 },
9181 { CODE_FOR_negcmpgtv4hi,"__builtin_sh_media_MCMPGT_W", SH_BLTIN_V4HI3 },
9182 { CODE_FOR_mcmv, "__builtin_sh_media_MCMV", SH_BLTIN_UUUU },
9183 { CODE_FOR_mcnvs_lw, "__builtin_sh_media_MCNVS_LW", SH_BLTIN_3 },
9184 { CODE_FOR_mcnvs_wb, "__builtin_sh_media_MCNVS_WB", SH_BLTIN_V4HI2V8QI },
9185 { CODE_FOR_mcnvs_wub, "__builtin_sh_media_MCNVS_WUB", SH_BLTIN_V4HI2V8QI },
9186 { CODE_FOR_mextr1, "__builtin_sh_media_MEXTR1", SH_BLTIN_V8QI3 },
9187 { CODE_FOR_mextr2, "__builtin_sh_media_MEXTR2", SH_BLTIN_V8QI3 },
9188 { CODE_FOR_mextr3, "__builtin_sh_media_MEXTR3", SH_BLTIN_V8QI3 },
9189 { CODE_FOR_mextr4, "__builtin_sh_media_MEXTR4", SH_BLTIN_V8QI3 },
9190 { CODE_FOR_mextr5, "__builtin_sh_media_MEXTR5", SH_BLTIN_V8QI3 },
9191 { CODE_FOR_mextr6, "__builtin_sh_media_MEXTR6", SH_BLTIN_V8QI3 },
9192 { CODE_FOR_mextr7, "__builtin_sh_media_MEXTR7", SH_BLTIN_V8QI3 },
9193 { CODE_FOR_mmacfx_wl, "__builtin_sh_media_MMACFX_WL", SH_BLTIN_MAC_HISI },
9194 { CODE_FOR_mmacnfx_wl,"__builtin_sh_media_MMACNFX_WL", SH_BLTIN_MAC_HISI },
9195 { CODE_FOR_mulv2si3, "__builtin_mulv2si3", SH_BLTIN_V2SI3, },
9196 { CODE_FOR_mulv4hi3, "__builtin_mulv4hi3", SH_BLTIN_V4HI3 },
9197 { CODE_FOR_mmulfx_l, "__builtin_sh_media_MMULFX_L", SH_BLTIN_V2SI3 },
9198 { CODE_FOR_mmulfx_w, "__builtin_sh_media_MMULFX_W", SH_BLTIN_V4HI3 },
9199 { CODE_FOR_mmulfxrp_w,"__builtin_sh_media_MMULFXRP_W", SH_BLTIN_V4HI3 },
9200 { CODE_FOR_mmulhi_wl, "__builtin_sh_media_MMULHI_WL", SH_BLTIN_V4HI2V2SI },
9201 { CODE_FOR_mmullo_wl, "__builtin_sh_media_MMULLO_WL", SH_BLTIN_V4HI2V2SI },
9202 { CODE_FOR_mmulsum_wq,"__builtin_sh_media_MMULSUM_WQ", SH_BLTIN_XXUU },
9203 { CODE_FOR_mperm_w, "__builtin_sh_media_MPERM_W", SH_BLTIN_SH_HI },
9204 { CODE_FOR_msad_ubq, "__builtin_sh_media_MSAD_UBQ", SH_BLTIN_XXUU },
9205 { CODE_FOR_mshalds_l, "__builtin_sh_media_MSHALDS_L", SH_BLTIN_SH_SI },
9206 { CODE_FOR_mshalds_w, "__builtin_sh_media_MSHALDS_W", SH_BLTIN_SH_HI },
9207 { CODE_FOR_ashrv2si3, "__builtin_ashrv2si3", SH_BLTIN_SH_SI },
9208 { CODE_FOR_ashrv4hi3, "__builtin_ashrv4hi3", SH_BLTIN_SH_HI },
9209 { CODE_FOR_mshards_q, "__builtin_sh_media_MSHARDS_Q", SH_BLTIN_SUS },
9210 { CODE_FOR_mshfhi_b, "__builtin_sh_media_MSHFHI_B", SH_BLTIN_V8QI3 },
9211 { CODE_FOR_mshfhi_l, "__builtin_sh_media_MSHFHI_L", SH_BLTIN_V2SI3 },
9212 { CODE_FOR_mshfhi_w, "__builtin_sh_media_MSHFHI_W", SH_BLTIN_V4HI3 },
9213 { CODE_FOR_mshflo_b, "__builtin_sh_media_MSHFLO_B", SH_BLTIN_V8QI3 },
9214 { CODE_FOR_mshflo_l, "__builtin_sh_media_MSHFLO_L", SH_BLTIN_V2SI3 },
9215 { CODE_FOR_mshflo_w, "__builtin_sh_media_MSHFLO_W", SH_BLTIN_V4HI3 },
9216 { CODE_FOR_ashlv2si3, "__builtin_ashlv2si3", SH_BLTIN_SH_SI },
9217 { CODE_FOR_ashlv4hi3, "__builtin_ashlv4hi3", SH_BLTIN_SH_HI },
9218 { CODE_FOR_lshrv2si3, "__builtin_lshrv2si3", SH_BLTIN_SH_SI },
9219 { CODE_FOR_lshrv4hi3, "__builtin_lshrv4hi3", SH_BLTIN_SH_HI },
9220 { CODE_FOR_subv2si3, "__builtin_subv2si3", SH_BLTIN_V2SI3 },
9221 { CODE_FOR_subv4hi3, "__builtin_subv4hi3", SH_BLTIN_V4HI3 },
9222 { CODE_FOR_sssubv2si3,"__builtin_sssubv2si3", SH_BLTIN_V2SI3 },
9223 { CODE_FOR_ussubv8qi3,"__builtin_ussubv8qi3", SH_BLTIN_V8QI3 },
9224 { CODE_FOR_sssubv4hi3,"__builtin_sssubv4hi3", SH_BLTIN_V4HI3 },
9225 { CODE_FOR_fcosa_s, "__builtin_sh_media_FCOSA_S", SH_BLTIN_SISF },
9226 { CODE_FOR_fsina_s, "__builtin_sh_media_FSINA_S", SH_BLTIN_SISF },
9227 { CODE_FOR_fipr, "__builtin_sh_media_FIPR_S", SH_BLTIN_3 },
9228 { CODE_FOR_ftrv, "__builtin_sh_media_FTRV_S", SH_BLTIN_3 },
9229 { CODE_FOR_mac_media, "__builtin_sh_media_FMAC_S", SH_BLTIN_3 },
9230 { CODE_FOR_sqrtdf2, "__builtin_sh_media_FSQRT_D", SH_BLTIN_2 },
9231 { CODE_FOR_sqrtsf2, "__builtin_sh_media_FSQRT_S", SH_BLTIN_2 },
9232 { CODE_FOR_fsrra_s, "__builtin_sh_media_FSRRA_S", SH_BLTIN_2 },
9233 { CODE_FOR_ldhi_l, "__builtin_sh_media_LDHI_L", SH_BLTIN_LDUA_L },
9234 { CODE_FOR_ldhi_q, "__builtin_sh_media_LDHI_Q", SH_BLTIN_LDUA_Q },
9235 { CODE_FOR_ldlo_l, "__builtin_sh_media_LDLO_L", SH_BLTIN_LDUA_L },
9236 { CODE_FOR_ldlo_q, "__builtin_sh_media_LDLO_Q", SH_BLTIN_LDUA_Q },
9237 { CODE_FOR_sthi_l, "__builtin_sh_media_STHI_L", SH_BLTIN_STUA_L },
9238 { CODE_FOR_sthi_q, "__builtin_sh_media_STHI_Q", SH_BLTIN_STUA_Q },
9239 { CODE_FOR_stlo_l, "__builtin_sh_media_STLO_L", SH_BLTIN_STUA_L },
9240 { CODE_FOR_stlo_q, "__builtin_sh_media_STLO_Q", SH_BLTIN_STUA_Q },
9241 { CODE_FOR_ldhi_l64, "__builtin_sh_media_LDHI_L", SH_BLTIN_LDUA_L64 },
9242 { CODE_FOR_ldhi_q64, "__builtin_sh_media_LDHI_Q", SH_BLTIN_LDUA_Q64 },
9243 { CODE_FOR_ldlo_l64, "__builtin_sh_media_LDLO_L", SH_BLTIN_LDUA_L64 },
9244 { CODE_FOR_ldlo_q64, "__builtin_sh_media_LDLO_Q", SH_BLTIN_LDUA_Q64 },
9245 { CODE_FOR_sthi_l64, "__builtin_sh_media_STHI_L", SH_BLTIN_STUA_L64 },
9246 { CODE_FOR_sthi_q64, "__builtin_sh_media_STHI_Q", SH_BLTIN_STUA_Q64 },
9247 { CODE_FOR_stlo_l64, "__builtin_sh_media_STLO_L", SH_BLTIN_STUA_L64 },
9248 { CODE_FOR_stlo_q64, "__builtin_sh_media_STLO_Q", SH_BLTIN_STUA_Q64 },
9249 { CODE_FOR_nsb, "__builtin_sh_media_NSB", SH_BLTIN_SU },
9250 { CODE_FOR_byterev, "__builtin_sh_media_BYTEREV", SH_BLTIN_2 },
9251 { CODE_FOR_prefetch, "__builtin_sh_media_PREFO", SH_BLTIN_PSSV },
9254 static void
9255 sh_media_init_builtins (void)
9257 tree shared[SH_BLTIN_NUM_SHARED_SIGNATURES];
9258 const struct builtin_description *d;
9260 memset (shared, 0, sizeof shared);
9261 for (d = bdesc; d - bdesc < (int) ARRAY_SIZE (bdesc); d++)
9263 tree type, arg_type = 0;
9264 int signature = d->signature;
9265 int i;
9267 if (signature < SH_BLTIN_NUM_SHARED_SIGNATURES && shared[signature])
9268 type = shared[signature];
9269 else
9271 int has_result = signature_args[signature][0] != 0;
9273 if ((signature_args[signature][1] & 8)
9274 && (((signature_args[signature][1] & 1) && TARGET_SHMEDIA32)
9275 || ((signature_args[signature][1] & 2) && TARGET_SHMEDIA64)))
9276 continue;
9277 if (! TARGET_FPU_ANY
9278 && FLOAT_MODE_P (insn_data[d->icode].operand[0].mode))
9279 continue;
9280 type = void_list_node;
9281 for (i = 3; ; i--)
9283 int arg = signature_args[signature][i];
9284 int opno = i - 1 + has_result;
9286 if (arg & 8)
9287 arg_type = ptr_type_node;
9288 else if (arg)
9289 arg_type = (*lang_hooks.types.type_for_mode)
9290 (insn_data[d->icode].operand[opno].mode,
9291 (arg & 1));
9292 else if (i)
9293 continue;
9294 else
9295 arg_type = void_type_node;
9296 if (i == 0)
9297 break;
9298 type = tree_cons (NULL_TREE, arg_type, type);
9300 type = build_function_type (arg_type, type);
9301 if (signature < SH_BLTIN_NUM_SHARED_SIGNATURES)
9302 shared[signature] = type;
9304 lang_hooks.builtin_function (d->name, type, d - bdesc, BUILT_IN_MD,
9305 NULL, NULL_TREE);
9309 /* Implements target hook vector_mode_supported_p. */
9310 bool
9311 sh_vector_mode_supported_p (enum machine_mode mode)
9313 if (TARGET_FPU_ANY
9314 && ((mode == V2SFmode)
9315 || (mode == V4SFmode)
9316 || (mode == V16SFmode)))
9317 return true;
9319 else if (TARGET_SHMEDIA
9320 && ((mode == V8QImode)
9321 || (mode == V2HImode)
9322 || (mode == V4HImode)
9323 || (mode == V2SImode)))
9324 return true;
9326 return false;
9329 /* Implements target hook dwarf_calling_convention. Return an enum
9330 of dwarf_calling_convention. */
9332 sh_dwarf_calling_convention (tree func)
9334 if (sh_attr_renesas_p (func))
9335 return DW_CC_GNU_renesas_sh;
9337 return DW_CC_normal;
9340 static void
9341 sh_init_builtins (void)
9343 if (TARGET_SHMEDIA)
9344 sh_media_init_builtins ();
9347 /* Expand an expression EXP that calls a built-in function,
9348 with result going to TARGET if that's convenient
9349 (and in mode MODE if that's convenient).
9350 SUBTARGET may be used as the target for computing one of EXP's operands.
9351 IGNORE is nonzero if the value is to be ignored. */
9353 static rtx
9354 sh_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
9355 enum machine_mode mode ATTRIBUTE_UNUSED, int ignore)
9357 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
9358 tree arglist = TREE_OPERAND (exp, 1);
9359 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
9360 const struct builtin_description *d = &bdesc[fcode];
9361 enum insn_code icode = d->icode;
9362 int signature = d->signature;
9363 enum machine_mode tmode = VOIDmode;
9364 int nop = 0, i;
9365 rtx op[4];
9366 rtx pat = 0;
9368 if (signature_args[signature][0])
9370 if (ignore)
9371 return 0;
9373 tmode = insn_data[icode].operand[0].mode;
9374 if (! target
9375 || GET_MODE (target) != tmode
9376 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
9377 target = gen_reg_rtx (tmode);
9378 op[nop++] = target;
9380 else
9381 target = 0;
9383 for (i = 1; i <= 3; i++, nop++)
9385 tree arg;
9386 enum machine_mode opmode, argmode;
9387 tree optype;
9389 if (! signature_args[signature][i])
9390 break;
9391 arg = TREE_VALUE (arglist);
9392 if (arg == error_mark_node)
9393 return const0_rtx;
9394 arglist = TREE_CHAIN (arglist);
9395 if (signature_args[signature][i] & 8)
9397 opmode = ptr_mode;
9398 optype = ptr_type_node;
9400 else
9402 opmode = insn_data[icode].operand[nop].mode;
9403 optype = (*lang_hooks.types.type_for_mode) (opmode, 0);
9405 argmode = TYPE_MODE (TREE_TYPE (arg));
9406 if (argmode != opmode)
9407 arg = build1 (NOP_EXPR, optype, arg);
9408 op[nop] = expand_expr (arg, NULL_RTX, opmode, 0);
9409 if (! (*insn_data[icode].operand[nop].predicate) (op[nop], opmode))
9410 op[nop] = copy_to_mode_reg (opmode, op[nop]);
9413 switch (nop)
9415 case 1:
9416 pat = (*insn_data[d->icode].genfun) (op[0]);
9417 break;
9418 case 2:
9419 pat = (*insn_data[d->icode].genfun) (op[0], op[1]);
9420 break;
9421 case 3:
9422 pat = (*insn_data[d->icode].genfun) (op[0], op[1], op[2]);
9423 break;
9424 case 4:
9425 pat = (*insn_data[d->icode].genfun) (op[0], op[1], op[2], op[3]);
9426 break;
9427 default:
9428 gcc_unreachable ();
9430 if (! pat)
9431 return 0;
9432 emit_insn (pat);
9433 return target;
9436 void
9437 sh_expand_unop_v2sf (enum rtx_code code, rtx op0, rtx op1)
9439 rtx sel0 = const0_rtx;
9440 rtx sel1 = const1_rtx;
9441 rtx (*fn) (rtx, rtx, rtx, rtx, rtx) = gen_unary_sf_op;
9442 rtx op = gen_rtx_fmt_e (code, SFmode, op1);
9444 emit_insn ((*fn) (op0, op1, op, sel0, sel0));
9445 emit_insn ((*fn) (op0, op1, op, sel1, sel1));
9448 void
9449 sh_expand_binop_v2sf (enum rtx_code code, rtx op0, rtx op1, rtx op2)
9451 rtx sel0 = const0_rtx;
9452 rtx sel1 = const1_rtx;
9453 rtx (*fn) (rtx, rtx, rtx, rtx, rtx, rtx, rtx, rtx)
9454 = gen_binary_sf_op;
9455 rtx op = gen_rtx_fmt_ee (code, SFmode, op1, op2);
9457 emit_insn ((*fn) (op0, op1, op2, op, sel0, sel0, sel0, sel1));
9458 emit_insn ((*fn) (op0, op1, op2, op, sel1, sel1, sel1, sel0));
9461 /* Return the class of registers for which a mode change from FROM to TO
9462 is invalid. */
9463 bool
9464 sh_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
9465 enum reg_class class)
9467 /* We want to enable the use of SUBREGs as a means to
9468 VEC_SELECT a single element of a vector. */
9469 if (to == SFmode && VECTOR_MODE_P (from) && GET_MODE_INNER (from) == SFmode)
9470 return (reg_classes_intersect_p (GENERAL_REGS, class));
9472 if (GET_MODE_SIZE (from) != GET_MODE_SIZE (to))
9474 if (TARGET_LITTLE_ENDIAN)
9476 if (GET_MODE_SIZE (to) < 8 || GET_MODE_SIZE (from) < 8)
9477 return reg_classes_intersect_p (DF_REGS, class);
9479 else
9481 if (GET_MODE_SIZE (from) < 8)
9482 return reg_classes_intersect_p (DF_HI_REGS, class);
9485 return 0;
9489 /* If ADDRESS refers to a CODE_LABEL, add NUSES to the number of times
9490 that label is used. */
9492 void
9493 sh_mark_label (rtx address, int nuses)
9495 if (GOTOFF_P (address))
9497 /* Extract the label or symbol. */
9498 address = XEXP (address, 0);
9499 if (GET_CODE (address) == PLUS)
9500 address = XEXP (address, 0);
9501 address = XVECEXP (address, 0, 0);
9503 if (GET_CODE (address) == LABEL_REF
9504 && GET_CODE (XEXP (address, 0)) == CODE_LABEL)
9505 LABEL_NUSES (XEXP (address, 0)) += nuses;
9508 /* Compute extra cost of moving data between one register class
9509 and another. */
9511 /* If SECONDARY*_RELOAD_CLASS says something about the src/dst pair, regclass
9512 uses this information. Hence, the general register <-> floating point
9513 register information here is not used for SFmode. */
9516 sh_register_move_cost (enum machine_mode mode,
9517 enum reg_class srcclass, enum reg_class dstclass)
9519 if (dstclass == T_REGS || dstclass == PR_REGS)
9520 return 10;
9522 if (dstclass == MAC_REGS && srcclass == MAC_REGS)
9523 return 4;
9525 if (mode == SImode && ! TARGET_SHMEDIA && TARGET_FMOVD
9526 && REGCLASS_HAS_FP_REG (srcclass)
9527 && REGCLASS_HAS_FP_REG (dstclass))
9528 return 4;
9530 if (REGCLASS_HAS_FP_REG (dstclass) && srcclass == T_REGS)
9531 return ((TARGET_HARD_SH4 && !optimize_size) ? 10 : 7);
9533 if ((REGCLASS_HAS_FP_REG (dstclass) && srcclass == MAC_REGS)
9534 || (dstclass == MAC_REGS && REGCLASS_HAS_FP_REG (srcclass)))
9535 return 9;
9537 if ((REGCLASS_HAS_FP_REG (dstclass)
9538 && REGCLASS_HAS_GENERAL_REG (srcclass))
9539 || (REGCLASS_HAS_GENERAL_REG (dstclass)
9540 && REGCLASS_HAS_FP_REG (srcclass)))
9541 return ((TARGET_SHMEDIA ? 4 : TARGET_FMOVD ? 8 : 12)
9542 * ((GET_MODE_SIZE (mode) + 7) / 8U));
9544 if ((dstclass == FPUL_REGS
9545 && REGCLASS_HAS_GENERAL_REG (srcclass))
9546 || (srcclass == FPUL_REGS
9547 && REGCLASS_HAS_GENERAL_REG (dstclass)))
9548 return 5;
9550 if ((dstclass == FPUL_REGS
9551 && (srcclass == PR_REGS || srcclass == MAC_REGS || srcclass == T_REGS))
9552 || (srcclass == FPUL_REGS
9553 && (dstclass == PR_REGS || dstclass == MAC_REGS)))
9554 return 7;
9556 if ((srcclass == TARGET_REGS && ! REGCLASS_HAS_GENERAL_REG (dstclass))
9557 || ((dstclass) == TARGET_REGS && ! REGCLASS_HAS_GENERAL_REG (srcclass)))
9558 return 20;
9560 /* ??? ptabs faults on (value & 0x3) == 0x3 */
9561 if (TARGET_SHMEDIA
9562 && ((srcclass) == TARGET_REGS || (srcclass) == SIBCALL_REGS))
9564 if (sh_gettrcost >= 0)
9565 return sh_gettrcost;
9566 else if (!TARGET_PT_FIXED)
9567 return 100;
9570 if ((srcclass == FPSCR_REGS && ! REGCLASS_HAS_GENERAL_REG (dstclass))
9571 || (dstclass == FPSCR_REGS && ! REGCLASS_HAS_GENERAL_REG (srcclass)))
9572 return 4;
9574 if (TARGET_SHMEDIA
9575 || (TARGET_FMOVD
9576 && ! REGCLASS_HAS_GENERAL_REG (srcclass)
9577 && ! REGCLASS_HAS_GENERAL_REG (dstclass)))
9578 return 2 * ((GET_MODE_SIZE (mode) + 7) / 8U);
9580 return 2 * ((GET_MODE_SIZE (mode) + 3) / 4U);
9583 static rtx emit_load_ptr (rtx, rtx);
9585 static rtx
9586 emit_load_ptr (rtx reg, rtx addr)
9588 rtx mem = gen_const_mem (ptr_mode, addr);
9590 if (Pmode != ptr_mode)
9591 mem = gen_rtx_SIGN_EXTEND (Pmode, mem);
9592 return emit_move_insn (reg, mem);
9595 static void
9596 sh_output_mi_thunk (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED,
9597 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
9598 tree function)
9600 CUMULATIVE_ARGS cum;
9601 int structure_value_byref = 0;
9602 rtx this, this_value, sibcall, insns, funexp;
9603 tree funtype = TREE_TYPE (function);
9604 int simple_add = CONST_OK_FOR_ADD (delta);
9605 int did_load = 0;
9606 rtx scratch0, scratch1, scratch2;
9607 unsigned i;
9609 reload_completed = 1;
9610 epilogue_completed = 1;
9611 no_new_pseudos = 1;
9612 current_function_uses_only_leaf_regs = 1;
9613 reset_block_changes ();
9615 emit_note (NOTE_INSN_PROLOGUE_END);
9617 /* Find the "this" pointer. We have such a wide range of ABIs for the
9618 SH that it's best to do this completely machine independently.
9619 "this" is passed as first argument, unless a structure return pointer
9620 comes first, in which case "this" comes second. */
9621 INIT_CUMULATIVE_ARGS (cum, funtype, NULL_RTX, 0, 1);
9622 #ifndef PCC_STATIC_STRUCT_RETURN
9623 if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
9624 structure_value_byref = 1;
9625 #endif /* not PCC_STATIC_STRUCT_RETURN */
9626 if (structure_value_byref && sh_struct_value_rtx (function, 0) == 0)
9628 tree ptype = build_pointer_type (TREE_TYPE (funtype));
9630 FUNCTION_ARG_ADVANCE (cum, Pmode, ptype, 1);
9632 this = FUNCTION_ARG (cum, Pmode, ptr_type_node, 1);
9634 /* For SHcompact, we only have r0 for a scratch register: r1 is the
9635 static chain pointer (even if you can't have nested virtual functions
9636 right now, someone might implement them sometime), and the rest of the
9637 registers are used for argument passing, are callee-saved, or reserved. */
9638 /* We need to check call_used_regs / fixed_regs in case -fcall_saved-reg /
9639 -ffixed-reg has been used. */
9640 if (! call_used_regs[0] || fixed_regs[0])
9641 error ("r0 needs to be available as a call-clobbered register");
9642 scratch0 = scratch1 = scratch2 = gen_rtx_REG (Pmode, 0);
9643 if (! TARGET_SH5)
9645 if (call_used_regs[1] && ! fixed_regs[1])
9646 scratch1 = gen_rtx_REG (ptr_mode, 1);
9647 /* N.B., if not TARGET_HITACHI, register 2 is used to pass the pointer
9648 pointing where to return struct values. */
9649 if (call_used_regs[3] && ! fixed_regs[3])
9650 scratch2 = gen_rtx_REG (Pmode, 3);
9652 else if (TARGET_SHMEDIA)
9654 for (i = FIRST_GENERAL_REG; i <= LAST_GENERAL_REG; i++)
9655 if (i != REGNO (scratch0) &&
9656 call_used_regs[i] && ! fixed_regs[i] && ! FUNCTION_ARG_REGNO_P (i))
9658 scratch1 = gen_rtx_REG (ptr_mode, i);
9659 break;
9661 if (scratch1 == scratch0)
9662 error ("Need a second call-clobbered general purpose register");
9663 for (i = FIRST_TARGET_REG; i <= LAST_TARGET_REG; i++)
9664 if (call_used_regs[i] && ! fixed_regs[i])
9666 scratch2 = gen_rtx_REG (Pmode, i);
9667 break;
9669 if (scratch2 == scratch0)
9670 error ("Need a call-clobbered target register");
9673 this_value = plus_constant (this, delta);
9674 if (vcall_offset
9675 && (simple_add || scratch0 != scratch1)
9676 && strict_memory_address_p (ptr_mode, this_value))
9678 emit_load_ptr (scratch0, this_value);
9679 did_load = 1;
9682 if (!delta)
9683 ; /* Do nothing. */
9684 else if (simple_add)
9685 emit_move_insn (this, this_value);
9686 else
9688 emit_move_insn (scratch1, GEN_INT (delta));
9689 emit_insn (gen_add2_insn (this, scratch1));
9692 if (vcall_offset)
9694 rtx offset_addr;
9696 if (!did_load)
9697 emit_load_ptr (scratch0, this);
9699 offset_addr = plus_constant (scratch0, vcall_offset);
9700 if (strict_memory_address_p (ptr_mode, offset_addr))
9701 ; /* Do nothing. */
9702 else if (! TARGET_SH5 && scratch0 != scratch1)
9704 /* scratch0 != scratch1, and we have indexed loads. Get better
9705 schedule by loading the offset into r1 and using an indexed
9706 load - then the load of r1 can issue before the load from
9707 (this + delta) finishes. */
9708 emit_move_insn (scratch1, GEN_INT (vcall_offset));
9709 offset_addr = gen_rtx_PLUS (Pmode, scratch0, scratch1);
9711 else if (CONST_OK_FOR_ADD (vcall_offset))
9713 emit_insn (gen_add2_insn (scratch0, GEN_INT (vcall_offset)));
9714 offset_addr = scratch0;
9716 else if (scratch0 != scratch1)
9718 emit_move_insn (scratch1, GEN_INT (vcall_offset));
9719 emit_insn (gen_add2_insn (scratch0, scratch1));
9720 offset_addr = scratch0;
9722 else
9723 gcc_unreachable (); /* FIXME */
9724 emit_load_ptr (scratch0, offset_addr);
9726 if (Pmode != ptr_mode)
9727 scratch0 = gen_rtx_TRUNCATE (ptr_mode, scratch0);
9728 emit_insn (gen_add2_insn (this, scratch0));
9731 /* Generate a tail call to the target function. */
9732 if (! TREE_USED (function))
9734 assemble_external (function);
9735 TREE_USED (function) = 1;
9737 funexp = XEXP (DECL_RTL (function), 0);
9738 /* If the function is overridden, so is the thunk, hence we don't
9739 need GOT addressing even if this is a public symbol. */
9740 #if 0
9741 if (TARGET_SH1 && ! flag_weak)
9742 sibcall = gen_sibcalli_thunk (funexp, const0_rtx);
9743 else
9744 #endif
9745 if (TARGET_SH2 && flag_pic)
9747 sibcall = gen_sibcall_pcrel (funexp, const0_rtx);
9748 XEXP (XVECEXP (sibcall, 0, 2), 0) = scratch2;
9750 else
9752 if (TARGET_SHMEDIA && flag_pic)
9754 funexp = gen_sym2PIC (funexp);
9755 PUT_MODE (funexp, Pmode);
9757 emit_move_insn (scratch2, funexp);
9758 funexp = gen_rtx_MEM (FUNCTION_MODE, scratch2);
9759 sibcall = gen_sibcall (funexp, const0_rtx, NULL_RTX);
9761 sibcall = emit_call_insn (sibcall);
9762 SIBLING_CALL_P (sibcall) = 1;
9763 use_reg (&CALL_INSN_FUNCTION_USAGE (sibcall), this);
9764 emit_barrier ();
9766 /* Run just enough of rest_of_compilation to do scheduling and get
9767 the insns emitted. Note that use_thunk calls
9768 assemble_start_function and assemble_end_function. */
9770 insn_locators_initialize ();
9771 insns = get_insns ();
9773 if (optimize > 0)
9775 /* Initialize the bitmap obstacks. */
9776 bitmap_obstack_initialize (NULL);
9777 bitmap_obstack_initialize (&reg_obstack);
9778 if (! cfun->cfg)
9779 init_flow ();
9780 rtl_register_cfg_hooks ();
9781 init_rtl_bb_info (ENTRY_BLOCK_PTR);
9782 init_rtl_bb_info (EXIT_BLOCK_PTR);
9783 ENTRY_BLOCK_PTR->flags |= BB_RTL;
9784 EXIT_BLOCK_PTR->flags |= BB_RTL;
9785 find_basic_blocks (insns);
9787 if (flag_schedule_insns_after_reload)
9789 life_analysis (dump_file, PROP_FINAL);
9791 split_all_insns (1);
9793 schedule_insns (dump_file);
9795 /* We must split jmp insn in PIC case. */
9796 else if (flag_pic)
9797 split_all_insns_noflow ();
9800 sh_reorg ();
9802 if (optimize > 0 && flag_delayed_branch)
9803 dbr_schedule (insns, dump_file);
9805 shorten_branches (insns);
9806 final_start_function (insns, file, 1);
9807 final (insns, file, 1);
9808 final_end_function ();
9810 if (optimize > 0)
9812 /* Release all memory allocated by flow. */
9813 free_basic_block_vars ();
9815 /* Release the bitmap obstacks. */
9816 bitmap_obstack_release (&reg_obstack);
9817 bitmap_obstack_release (NULL);
9820 reload_completed = 0;
9821 epilogue_completed = 0;
9822 no_new_pseudos = 0;
9826 function_symbol (rtx target, const char *name, enum sh_function_kind kind)
9828 rtx sym;
9830 /* If this is not an ordinary function, the name usually comes from a
9831 string literal or an sprintf buffer. Make sure we use the same
9832 string consistently, so that cse will be able to unify address loads. */
9833 if (kind != FUNCTION_ORDINARY)
9834 name = IDENTIFIER_POINTER (get_identifier (name));
9835 sym = gen_rtx_SYMBOL_REF (Pmode, name);
9836 SYMBOL_REF_FLAGS (sym) = SYMBOL_FLAG_FUNCTION;
9837 if (flag_pic)
9838 switch (kind)
9840 case FUNCTION_ORDINARY:
9841 break;
9842 case SFUNC_GOT:
9844 rtx reg = target ? target : gen_reg_rtx (Pmode);
9846 emit_insn (gen_symGOT2reg (reg, sym));
9847 sym = reg;
9848 break;
9850 case SFUNC_STATIC:
9852 /* ??? To allow cse to work, we use GOTOFF relocations.
9853 we could add combiner patterns to transform this into
9854 straight pc-relative calls with sym2PIC / bsrf when
9855 label load and function call are still 1:1 and in the
9856 same basic block during combine. */
9857 rtx reg = target ? target : gen_reg_rtx (Pmode);
9859 emit_insn (gen_symGOTOFF2reg (reg, sym));
9860 sym = reg;
9861 break;
9864 if (target && sym != target)
9866 emit_move_insn (target, sym);
9867 return target;
9869 return sym;
9872 /* Find the number of a general purpose register in S. */
9873 static int
9874 scavenge_reg (HARD_REG_SET *s)
9876 int r;
9877 for (r = FIRST_GENERAL_REG; r <= LAST_GENERAL_REG; r++)
9878 if (TEST_HARD_REG_BIT (*s, r))
9879 return r;
9880 return -1;
9884 sh_get_pr_initial_val (void)
9886 rtx val;
9888 /* ??? Unfortunately, get_hard_reg_initial_val doesn't always work for the
9889 PR register on SHcompact, because it might be clobbered by the prologue.
9890 We check first if that is known to be the case. */
9891 if (TARGET_SHCOMPACT
9892 && ((current_function_args_info.call_cookie
9893 & ~ CALL_COOKIE_RET_TRAMP (1))
9894 || current_function_has_nonlocal_label))
9895 return gen_frame_mem (SImode, return_address_pointer_rtx);
9897 /* If we haven't finished rtl generation, there might be a nonlocal label
9898 that we haven't seen yet.
9899 ??? get_hard_reg_initial_val fails if it is called while no_new_pseudos
9900 is set, unless it has been called before for the same register. And even
9901 then, we end in trouble if we didn't use the register in the same
9902 basic block before. So call get_hard_reg_initial_val now and wrap it
9903 in an unspec if we might need to replace it. */
9904 /* ??? We also must do this for TARGET_SH1 in general, because otherwise
9905 combine can put the pseudo returned by get_hard_reg_initial_val into
9906 instructions that need a general purpose registers, which will fail to
9907 be recognized when the pseudo becomes allocated to PR. */
9909 = get_hard_reg_initial_val (Pmode, TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG);
9910 if (TARGET_SH1)
9911 return gen_rtx_UNSPEC (SImode, gen_rtvec (1, val), UNSPEC_RA);
9912 return val;
9916 sh_expand_t_scc (enum rtx_code code, rtx target)
9918 rtx result = target;
9919 HOST_WIDE_INT val;
9921 if (GET_CODE (sh_compare_op0) != REG || REGNO (sh_compare_op0) != T_REG
9922 || GET_CODE (sh_compare_op1) != CONST_INT)
9923 return 0;
9924 if (GET_CODE (result) != REG)
9925 result = gen_reg_rtx (SImode);
9926 val = INTVAL (sh_compare_op1);
9927 if ((code == EQ && val == 1) || (code == NE && val == 0))
9928 emit_insn (gen_movt (result));
9929 else if ((code == EQ && val == 0) || (code == NE && val == 1))
9931 emit_insn (gen_rtx_CLOBBER (VOIDmode, result));
9932 emit_insn (gen_subc (result, result, result));
9933 emit_insn (gen_addsi3 (result, result, const1_rtx));
9935 else if (code == EQ || code == NE)
9936 emit_insn (gen_move_insn (result, GEN_INT (code == NE)));
9937 else
9938 return 0;
9939 if (result != target)
9940 emit_move_insn (target, result);
9941 return 1;
9944 /* INSN is an sfunc; return the rtx that describes the address used. */
9945 static rtx
9946 extract_sfunc_addr (rtx insn)
9948 rtx pattern, part = NULL_RTX;
9949 int len, i;
9951 pattern = PATTERN (insn);
9952 len = XVECLEN (pattern, 0);
9953 for (i = 0; i < len; i++)
9955 part = XVECEXP (pattern, 0, i);
9956 if (GET_CODE (part) == USE && GET_MODE (XEXP (part, 0)) == Pmode
9957 && GENERAL_REGISTER_P (true_regnum (XEXP (part, 0))))
9958 return XEXP (part, 0);
9960 gcc_assert (GET_CODE (XVECEXP (pattern, 0, 0)) == UNSPEC_VOLATILE);
9961 return XVECEXP (XVECEXP (pattern, 0, 0), 0, 1);
9964 /* Verify that the register in use_sfunc_addr still agrees with the address
9965 used in the sfunc. This prevents fill_slots_from_thread from changing
9966 use_sfunc_addr.
9967 INSN is the use_sfunc_addr instruction, and REG is the register it
9968 guards. */
9970 check_use_sfunc_addr (rtx insn, rtx reg)
9972 /* Search for the sfunc. It should really come right after INSN. */
9973 while ((insn = NEXT_INSN (insn)))
9975 if (GET_CODE (insn) == CODE_LABEL || GET_CODE (insn) == JUMP_INSN)
9976 break;
9977 if (! INSN_P (insn))
9978 continue;
9980 if (GET_CODE (PATTERN (insn)) == SEQUENCE)
9981 insn = XVECEXP (PATTERN (insn), 0, 0);
9982 if (GET_CODE (PATTERN (insn)) != PARALLEL
9983 || get_attr_type (insn) != TYPE_SFUNC)
9984 continue;
9985 return rtx_equal_p (extract_sfunc_addr (insn), reg);
9987 gcc_unreachable ();
9990 /* This function returns a constant rtx that represents pi / 2**15 in
9991 SFmode. it's used to scale SFmode angles, in radians, to a
9992 fixed-point signed 16.16-bit fraction of a full circle, i.e., 2*pi
9993 maps to 0x10000). */
9995 static GTY(()) rtx sh_fsca_sf2int_rtx;
9998 sh_fsca_sf2int (void)
10000 if (! sh_fsca_sf2int_rtx)
10002 REAL_VALUE_TYPE rv;
10004 real_from_string (&rv, "10430.378350470453");
10005 sh_fsca_sf2int_rtx = const_double_from_real_value (rv, SFmode);
10008 return sh_fsca_sf2int_rtx;
10011 /* This function returns a constant rtx that represents pi / 2**15 in
10012 DFmode. it's used to scale DFmode angles, in radians, to a
10013 fixed-point signed 16.16-bit fraction of a full circle, i.e., 2*pi
10014 maps to 0x10000). */
10016 static GTY(()) rtx sh_fsca_df2int_rtx;
10019 sh_fsca_df2int (void)
10021 if (! sh_fsca_df2int_rtx)
10023 REAL_VALUE_TYPE rv;
10025 real_from_string (&rv, "10430.378350470453");
10026 sh_fsca_df2int_rtx = const_double_from_real_value (rv, DFmode);
10029 return sh_fsca_df2int_rtx;
10032 /* This function returns a constant rtx that represents 2**15 / pi in
10033 SFmode. it's used to scale a fixed-point signed 16.16-bit fraction
10034 of a full circle back to a SFmode value, i.e., 0x10000 maps to
10035 2*pi). */
10037 static GTY(()) rtx sh_fsca_int2sf_rtx;
10040 sh_fsca_int2sf (void)
10042 if (! sh_fsca_int2sf_rtx)
10044 REAL_VALUE_TYPE rv;
10046 real_from_string (&rv, "9.587379924285257e-5");
10047 sh_fsca_int2sf_rtx = const_double_from_real_value (rv, SFmode);
10050 return sh_fsca_int2sf_rtx;
10053 /* Initialize the CUMULATIVE_ARGS structure. */
10055 void
10056 sh_init_cumulative_args (CUMULATIVE_ARGS * pcum,
10057 tree fntype,
10058 rtx libname ATTRIBUTE_UNUSED,
10059 tree fndecl,
10060 signed int n_named_args,
10061 enum machine_mode mode)
10063 pcum->arg_count [(int) SH_ARG_FLOAT] = 0;
10064 pcum->free_single_fp_reg = 0;
10065 pcum->stack_regs = 0;
10066 pcum->byref_regs = 0;
10067 pcum->byref = 0;
10068 pcum->outgoing = (n_named_args == -1) ? 0 : 1;
10070 /* XXX - Should we check TARGET_HITACHI here ??? */
10071 pcum->renesas_abi = sh_attr_renesas_p (fntype) ? 1 : 0;
10073 if (fntype)
10075 pcum->force_mem = ((TARGET_HITACHI || pcum->renesas_abi)
10076 && aggregate_value_p (TREE_TYPE (fntype), fndecl));
10077 pcum->prototype_p = TYPE_ARG_TYPES (fntype) ? TRUE : FALSE;
10078 pcum->arg_count [(int) SH_ARG_INT]
10079 = TARGET_SH5 && aggregate_value_p (TREE_TYPE (fntype), fndecl);
10081 pcum->call_cookie
10082 = CALL_COOKIE_RET_TRAMP (TARGET_SHCOMPACT
10083 && pcum->arg_count [(int) SH_ARG_INT] == 0
10084 && (TYPE_MODE (TREE_TYPE (fntype)) == BLKmode
10085 ? int_size_in_bytes (TREE_TYPE (fntype))
10086 : GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (fntype)))) > 4
10087 && (BASE_RETURN_VALUE_REG (TYPE_MODE (TREE_TYPE (fntype)))
10088 == FIRST_RET_REG));
10090 else
10092 pcum->arg_count [(int) SH_ARG_INT] = 0;
10093 pcum->prototype_p = FALSE;
10094 if (mode != VOIDmode)
10096 pcum->call_cookie =
10097 CALL_COOKIE_RET_TRAMP (TARGET_SHCOMPACT
10098 && GET_MODE_SIZE (mode) > 4
10099 && BASE_RETURN_VALUE_REG (mode) == FIRST_RET_REG);
10101 /* If the default ABI is the Renesas ABI then all library
10102 calls must assume that the library will be using the
10103 Renesas ABI. So if the function would return its result
10104 in memory then we must force the address of this memory
10105 block onto the stack. Ideally we would like to call
10106 targetm.calls.return_in_memory() here but we do not have
10107 the TYPE or the FNDECL available so we synthesize the
10108 contents of that function as best we can. */
10109 pcum->force_mem =
10110 (TARGET_DEFAULT & MASK_HITACHI)
10111 && (mode == BLKmode
10112 || (GET_MODE_SIZE (mode) > 4
10113 && !(mode == DFmode
10114 && TARGET_FPU_DOUBLE)));
10116 else
10118 pcum->call_cookie = 0;
10119 pcum->force_mem = FALSE;
10124 /* Determine if two hard register sets intersect.
10125 Return 1 if they do. */
10127 static int
10128 hard_regs_intersect_p (HARD_REG_SET *a, HARD_REG_SET *b)
10130 HARD_REG_SET c;
10131 COPY_HARD_REG_SET (c, *a);
10132 AND_HARD_REG_SET (c, *b);
10133 GO_IF_HARD_REG_SUBSET (c, reg_class_contents[(int) NO_REGS], lose);
10134 return 1;
10135 lose:
10136 return 0;
10139 #ifdef TARGET_ADJUST_UNROLL_MAX
10140 static int
10141 sh_adjust_unroll_max (struct loop * loop, int insn_count,
10142 int max_unrolled_insns, int strength_reduce_p,
10143 int unroll_type)
10145 /* This doesn't work in 4.0 because the old unroller & loop.h is gone. */
10146 if (TARGET_ADJUST_UNROLL && TARGET_SHMEDIA)
10148 /* Throttle back loop unrolling so that the costs of using more
10149 targets than the eight target register we have don't outweigh
10150 the benefits of unrolling. */
10151 rtx insn;
10152 int n_labels = 0, n_calls = 0, n_exit_dest = 0, n_inner_loops = -1;
10153 int n_barriers = 0;
10154 rtx dest;
10155 int i;
10156 rtx exit_dest[8];
10157 int threshold;
10158 int unroll_benefit = 0, mem_latency = 0;
10159 int base_cost, best_cost, cost;
10160 int factor, best_factor;
10161 int n_dest;
10162 unsigned max_iterations = 32767;
10163 int n_iterations;
10164 int need_precond = 0, precond = 0;
10165 basic_block * bbs = get_loop_body (loop);
10166 struct niter_desc *desc;
10168 /* Assume that all labels inside the loop are used from inside the
10169 loop. If the loop has multiple entry points, it is unlikely to
10170 be unrolled anyways.
10171 Also assume that all calls are to different functions. That is
10172 somewhat pessimistic, but if you have lots of calls, unrolling the
10173 loop is not likely to gain you much in the first place. */
10174 i = loop->num_nodes - 1;
10175 for (insn = BB_HEAD (bbs[i]); ; )
10177 if (GET_CODE (insn) == CODE_LABEL)
10178 n_labels++;
10179 else if (GET_CODE (insn) == CALL_INSN)
10180 n_calls++;
10181 else if (GET_CODE (insn) == NOTE
10182 && NOTE_LINE_NUMBER (insn) == NOTE_INSN_LOOP_BEG)
10183 n_inner_loops++;
10184 else if (GET_CODE (insn) == BARRIER)
10185 n_barriers++;
10186 if (insn != BB_END (bbs[i]))
10187 insn = NEXT_INSN (insn);
10188 else if (--i >= 0)
10189 insn = BB_HEAD (bbs[i]);
10190 else
10191 break;
10193 free (bbs);
10194 /* One label for the loop top is normal, and it won't be duplicated by
10195 unrolling. */
10196 if (n_labels <= 1)
10197 return max_unrolled_insns;
10198 if (n_inner_loops > 0)
10199 return 0;
10200 for (dest = loop->exit_labels; dest && n_exit_dest < 8;
10201 dest = LABEL_NEXTREF (dest))
10203 for (i = n_exit_dest - 1;
10204 i >= 0 && XEXP (dest, 0) != XEXP (exit_dest[i], 0); i--);
10205 if (i < 0)
10206 exit_dest[n_exit_dest++] = dest;
10208 /* If the loop top and call and exit destinations are enough to fill up
10209 the target registers, we're unlikely to do any more damage by
10210 unrolling. */
10211 if (n_calls + n_exit_dest >= 7)
10212 return max_unrolled_insns;
10214 /* ??? In the new loop unroller, there is no longer any strength
10215 reduction information available. Thus, when it comes to unrolling,
10216 we know the cost of everything, but we know the value of nothing. */
10217 #if 0
10218 if (strength_reduce_p
10219 && (unroll_type == LPT_UNROLL_RUNTIME
10220 || unroll_type == LPT_UNROLL_CONSTANT
10221 || unroll_type == LPT_PEEL_COMPLETELY))
10223 struct loop_ivs *ivs = LOOP_IVS (loop);
10224 struct iv_class *bl;
10226 /* We'll save one compare-and-branch in each loop body copy
10227 but the last one. */
10228 unroll_benefit = 1;
10229 /* Assess the benefit of removing biv & giv updates. */
10230 for (bl = ivs->list; bl; bl = bl->next)
10232 rtx increment = biv_total_increment (bl);
10233 struct induction *v;
10235 if (increment && GET_CODE (increment) == CONST_INT)
10237 unroll_benefit++;
10238 for (v = bl->giv; v; v = v->next_iv)
10240 if (! v->ignore && v->same == 0
10241 && GET_CODE (v->mult_val) == CONST_INT)
10242 unroll_benefit++;
10243 /* If this giv uses an array, try to determine
10244 a maximum iteration count from the size of the
10245 array. This need not be correct all the time,
10246 but should not be too far off the mark too often. */
10247 while (v->giv_type == DEST_ADDR)
10249 rtx mem = PATTERN (v->insn);
10250 tree mem_expr, type, size_tree;
10252 if (GET_CODE (SET_SRC (mem)) == MEM)
10253 mem = SET_SRC (mem);
10254 else if (GET_CODE (SET_DEST (mem)) == MEM)
10255 mem = SET_DEST (mem);
10256 else
10257 break;
10258 mem_expr = MEM_EXPR (mem);
10259 if (! mem_expr)
10260 break;
10261 type = TREE_TYPE (mem_expr);
10262 if (TREE_CODE (type) != ARRAY_TYPE
10263 || ! TYPE_SIZE (type) || ! TYPE_SIZE_UNIT (type))
10264 break;
10265 size_tree = fold (build (TRUNC_DIV_EXPR,
10266 bitsizetype,
10267 TYPE_SIZE (type),
10268 TYPE_SIZE_UNIT (type)));
10269 if (TREE_CODE (size_tree) == INTEGER_CST
10270 && ! TREE_INT_CST_HIGH (size_tree)
10271 && TREE_INT_CST_LOW (size_tree) < max_iterations)
10272 max_iterations = TREE_INT_CST_LOW (size_tree);
10273 break;
10279 #else /* 0 */
10280 /* Assume there is at least some benefit. */
10281 unroll_benefit = 1;
10282 #endif /* 0 */
10284 desc = get_simple_loop_desc (loop);
10285 n_iterations = desc->const_iter ? desc->niter : 0;
10286 max_iterations
10287 = max_iterations < desc->niter_max ? max_iterations : desc->niter_max;
10289 if (! strength_reduce_p || ! n_iterations)
10290 need_precond = 1;
10291 if (! n_iterations)
10293 n_iterations
10294 = max_iterations < 3 ? max_iterations : max_iterations * 3 / 4;
10295 if (! n_iterations)
10296 return 0;
10298 #if 0 /* ??? See above - missing induction variable information. */
10299 while (unroll_benefit > 1) /* no loop */
10301 /* We include the benefit of biv/ giv updates. Check if some or
10302 all of these updates are likely to fit into a scheduling
10303 bubble of a load.
10304 We check for the following case:
10305 - All the insns leading to the first JUMP_INSN are in a strict
10306 dependency chain.
10307 - there is at least one memory reference in them.
10309 When we find such a pattern, we assume that we can hide as many
10310 updates as the total of the load latency is, if we have an
10311 unroll factor of at least two. We might or might not also do
10312 this without unrolling, so rather than considering this as an
10313 extra unroll benefit, discount it in the unroll benefits of unroll
10314 factors higher than two. */
10316 rtx set, last_set;
10318 insn = next_active_insn (loop->start);
10319 last_set = single_set (insn);
10320 if (! last_set)
10321 break;
10322 if (GET_CODE (SET_SRC (last_set)) == MEM)
10323 mem_latency += 2;
10324 for (insn = NEXT_INSN (insn); insn != end; insn = NEXT_INSN (insn))
10326 if (! INSN_P (insn))
10327 continue;
10328 if (GET_CODE (insn) == JUMP_INSN)
10329 break;
10330 if (! reg_referenced_p (SET_DEST (last_set), PATTERN (insn)))
10332 /* Check if this is a to-be-reduced giv insn. */
10333 struct loop_ivs *ivs = LOOP_IVS (loop);
10334 struct iv_class *bl;
10335 struct induction *v;
10336 for (bl = ivs->list; bl; bl = bl->next)
10338 if (bl->biv->insn == insn)
10339 goto is_biv;
10340 for (v = bl->giv; v; v = v->next_iv)
10341 if (v->insn == insn)
10342 goto is_giv;
10344 mem_latency--;
10345 is_biv:
10346 is_giv:
10347 continue;
10349 set = single_set (insn);
10350 if (! set)
10351 continue;
10352 if (GET_CODE (SET_SRC (set)) == MEM)
10353 mem_latency += 2;
10354 last_set = set;
10356 if (mem_latency < 0)
10357 mem_latency = 0;
10358 else if (mem_latency > unroll_benefit - 1)
10359 mem_latency = unroll_benefit - 1;
10360 break;
10362 #endif /* 0 */
10363 if (n_labels + (unroll_benefit + n_labels * 8) / n_iterations
10364 <= unroll_benefit)
10365 return max_unrolled_insns;
10367 n_dest = n_labels + n_calls + n_exit_dest;
10368 base_cost = n_dest <= 8 ? 0 : n_dest - 7;
10369 best_cost = 0;
10370 best_factor = 1;
10371 if (n_barriers * 2 > n_labels - 1)
10372 n_barriers = (n_labels - 1) / 2;
10373 for (factor = 2; factor <= 8; factor++)
10375 /* Bump up preconditioning cost for each power of two. */
10376 if (! (factor & (factor-1)))
10377 precond += 4;
10378 /* When preconditioning, only powers of two will be considered. */
10379 else if (need_precond)
10380 continue;
10381 n_dest = ((unroll_type != LPT_PEEL_COMPLETELY)
10382 + (n_labels - 1) * factor + n_calls + n_exit_dest
10383 - (n_barriers * factor >> 1)
10384 + need_precond);
10385 cost
10386 = ((n_dest <= 8 ? 0 : n_dest - 7)
10387 - base_cost * factor
10388 - ((factor > 2 ? unroll_benefit - mem_latency : unroll_benefit)
10389 * (factor - (unroll_type != LPT_PEEL_COMPLETELY)))
10390 + ((unroll_benefit + 1 + (n_labels - 1) * factor)
10391 / n_iterations));
10392 if (need_precond)
10393 cost += (precond + unroll_benefit * factor / 2) / n_iterations;
10394 if (cost < best_cost)
10396 best_cost = cost;
10397 best_factor = factor;
10400 threshold = best_factor * insn_count;
10401 if (max_unrolled_insns > threshold)
10402 max_unrolled_insns = threshold;
10404 return max_unrolled_insns;
10406 #endif /* TARGET_ADJUST_UNROLL_MAX */
10408 /* Replace any occurrence of FROM(n) in X with TO(n). The function does
10409 not enter into CONST_DOUBLE for the replace.
10411 Note that copying is not done so X must not be shared unless all copies
10412 are to be modified.
10414 This is like replace_rtx, except that we operate on N_REPLACEMENTS
10415 replacements simultaneously - FROM(n) is replacements[n*2] and to(n) is
10416 replacements[n*2+1] - and that we take mode changes into account.
10418 If a replacement is ambiguous, return NULL_RTX.
10420 If MODIFY is zero, don't modify any rtl in place,
10421 just return zero or nonzero for failure / success. */
10424 replace_n_hard_rtx (rtx x, rtx *replacements, int n_replacements, int modify)
10426 int i, j;
10427 const char *fmt;
10429 /* The following prevents loops occurrence when we change MEM in
10430 CONST_DOUBLE onto the same CONST_DOUBLE. */
10431 if (x != 0 && GET_CODE (x) == CONST_DOUBLE)
10432 return x;
10434 for (i = n_replacements - 1; i >= 0 ; i--)
10435 if (x == replacements[i*2] && GET_MODE (x) == GET_MODE (replacements[i*2+1]))
10436 return replacements[i*2+1];
10438 /* Allow this function to make replacements in EXPR_LISTs. */
10439 if (x == 0)
10440 return 0;
10442 if (GET_CODE (x) == SUBREG)
10444 rtx new = replace_n_hard_rtx (SUBREG_REG (x), replacements,
10445 n_replacements, modify);
10447 if (GET_CODE (new) == CONST_INT)
10449 x = simplify_subreg (GET_MODE (x), new,
10450 GET_MODE (SUBREG_REG (x)),
10451 SUBREG_BYTE (x));
10452 if (! x)
10453 abort ();
10455 else if (modify)
10456 SUBREG_REG (x) = new;
10458 return x;
10460 else if (GET_CODE (x) == REG)
10462 unsigned regno = REGNO (x);
10463 unsigned nregs = (regno < FIRST_PSEUDO_REGISTER
10464 ? HARD_REGNO_NREGS (regno, GET_MODE (x)) : 1);
10465 rtx result = NULL_RTX;
10467 for (i = n_replacements - 1; i >= 0; i--)
10469 rtx from = replacements[i*2];
10470 rtx to = replacements[i*2+1];
10471 unsigned from_regno, from_nregs, to_regno, new_regno;
10473 if (GET_CODE (from) != REG)
10474 continue;
10475 from_regno = REGNO (from);
10476 from_nregs = (from_regno < FIRST_PSEUDO_REGISTER
10477 ? HARD_REGNO_NREGS (from_regno, GET_MODE (from)) : 1);
10478 if (regno < from_regno + from_nregs && regno + nregs > from_regno)
10480 if (regno < from_regno
10481 || regno + nregs > from_regno + nregs
10482 || GET_CODE (to) != REG
10483 || result)
10484 return NULL_RTX;
10485 to_regno = REGNO (to);
10486 if (to_regno < FIRST_PSEUDO_REGISTER)
10488 new_regno = regno + to_regno - from_regno;
10489 if ((unsigned) HARD_REGNO_NREGS (new_regno, GET_MODE (x))
10490 != nregs)
10491 return NULL_RTX;
10492 result = gen_rtx_REG (GET_MODE (x), new_regno);
10494 else if (GET_MODE (x) <= GET_MODE (to))
10495 result = gen_lowpart_common (GET_MODE (x), to);
10496 else
10497 result = gen_lowpart_SUBREG (GET_MODE (x), to);
10500 return result ? result : x;
10502 else if (GET_CODE (x) == ZERO_EXTEND)
10504 rtx new = replace_n_hard_rtx (XEXP (x, 0), replacements,
10505 n_replacements, modify);
10507 if (GET_CODE (new) == CONST_INT)
10509 x = simplify_unary_operation (ZERO_EXTEND, GET_MODE (x),
10510 new, GET_MODE (XEXP (x, 0)));
10511 if (! x)
10512 abort ();
10514 else if (modify)
10515 XEXP (x, 0) = new;
10517 return x;
10520 fmt = GET_RTX_FORMAT (GET_CODE (x));
10521 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
10523 rtx new;
10525 if (fmt[i] == 'e')
10527 new = replace_n_hard_rtx (XEXP (x, i), replacements,
10528 n_replacements, modify);
10529 if (!new)
10530 return NULL_RTX;
10531 if (modify)
10532 XEXP (x, i) = new;
10534 else if (fmt[i] == 'E')
10535 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
10537 new = replace_n_hard_rtx (XVECEXP (x, i, j), replacements,
10538 n_replacements, modify);
10539 if (!new)
10540 return NULL_RTX;
10541 if (modify)
10542 XVECEXP (x, i, j) = new;
10546 return x;
10550 sh_gen_truncate (enum machine_mode mode, rtx x, int need_sign_ext)
10552 enum rtx_code code = TRUNCATE;
10554 if (GET_CODE (x) == ZERO_EXTEND || GET_CODE (x) == SIGN_EXTEND)
10556 rtx inner = XEXP (x, 0);
10557 enum machine_mode inner_mode = GET_MODE (inner);
10559 if (inner_mode == mode)
10560 return inner;
10561 else if (GET_MODE_SIZE (inner_mode) >= GET_MODE_SIZE (mode))
10562 x = inner;
10563 else if (GET_MODE_SIZE (inner_mode) < GET_MODE_SIZE (mode)
10564 && (! need_sign_ext || GET_CODE (x) == SIGN_EXTEND))
10566 code = GET_CODE (x);
10567 x = inner;
10570 return gen_rtx_fmt_e (code, mode, x);
10573 /* called via for_each_rtx after reload, to clean up truncates of
10574 registers that span multiple actual hard registers. */
10576 shmedia_cleanup_truncate (rtx *p, void *n_changes)
10578 rtx x = *p, reg;
10580 if (GET_CODE (x) != TRUNCATE)
10581 return 0;
10582 reg = XEXP (x, 0);
10583 if (GET_MODE_SIZE (GET_MODE (reg)) > 8 && GET_CODE (reg) == REG)
10585 enum machine_mode reg_mode = GET_MODE (reg);
10586 XEXP (x, 0) = simplify_subreg (DImode, reg, reg_mode,
10587 subreg_lowpart_offset (DImode, reg_mode));
10588 *(int*) n_changes += 1;
10589 return -1;
10591 return 0;
10594 /* Load and store depend on the highpart of the address. However,
10595 set_attr_alternative does not give well-defined results before reload,
10596 so we must look at the rtl ourselves to see if any of the feeding
10597 registers is used in a memref. */
10599 /* Called by sh_contains_memref_p via for_each_rtx. */
10600 static int
10601 sh_contains_memref_p_1 (rtx *loc, void *data ATTRIBUTE_UNUSED)
10603 return (GET_CODE (*loc) == MEM);
10606 /* Return nonzero iff INSN contains a MEM. */
10608 sh_contains_memref_p (rtx insn)
10610 return for_each_rtx (&PATTERN (insn), &sh_contains_memref_p_1, NULL);
10613 /* FNADDR is the MEM expression from a call expander. Return an address
10614 to use in an SHmedia insn pattern. */
10616 shmedia_prepare_call_address (rtx fnaddr, int is_sibcall)
10618 int is_sym;
10620 fnaddr = XEXP (fnaddr, 0);
10621 is_sym = GET_CODE (fnaddr) == SYMBOL_REF;
10622 if (flag_pic && is_sym)
10624 if (! SYMBOL_REF_LOCAL_P (fnaddr))
10626 rtx reg = gen_reg_rtx (Pmode);
10628 /* We must not use GOTPLT for sibcalls, because PIC_REG
10629 must be restored before the PLT code gets to run. */
10630 if (is_sibcall)
10631 emit_insn (gen_symGOT2reg (reg, fnaddr));
10632 else
10633 emit_insn (gen_symGOTPLT2reg (reg, fnaddr));
10634 fnaddr = reg;
10636 else
10638 fnaddr = gen_sym2PIC (fnaddr);
10639 PUT_MODE (fnaddr, Pmode);
10642 /* If ptabs might trap, make this visible to the rest of the compiler.
10643 We generally assume that symbols pertain to valid locations, but
10644 it is possible to generate invalid symbols with asm or linker tricks.
10645 In a list of functions where each returns its successor, an invalid
10646 symbol might denote an empty list. */
10647 if (!TARGET_PT_FIXED
10648 && (!is_sym || TARGET_INVALID_SYMBOLS)
10649 && (!REG_P (fnaddr) || ! TARGET_REGISTER_P (REGNO (fnaddr))))
10651 rtx tr = gen_reg_rtx (PDImode);
10653 emit_insn (gen_ptabs (tr, fnaddr));
10654 fnaddr = tr;
10656 else if (! target_reg_operand (fnaddr, Pmode))
10657 fnaddr = copy_to_mode_reg (Pmode, fnaddr);
10658 return fnaddr;
10661 enum sh_divide_strategy_e sh_div_strategy = SH_DIV_STRATEGY_DEFAULT;
10663 /* This defines the storage for the variable part of a -mboard= option.
10664 It is only required when using the sh-superh-elf target */
10665 #ifdef _SUPERH_H
10666 const char * boardtype = "7750p2";
10667 const char * osruntime = "bare";
10668 #endif
10670 #include "gt-sh.h"