* dwarf2out.c, fold-const.c, ipa-type-escape.c,
[official-gcc.git] / gcc / config / sh / sh.c
blobfbf8e01e8852fa29e1c864bd36106430a613d0af
1 /* Output routines for GCC for Renesas / SuperH SH.
2 Copyright (C) 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
3 2003, 2004, 2005 Free Software Foundation, Inc.
4 Contributed by Steve Chamberlain (sac@cygnus.com).
5 Improved by Jim Wilson (wilson@cygnus.com).
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 2, or (at your option)
12 any later version.
14 GCC is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING. If not, write to
21 the Free Software Foundation, 51 Franklin Street, Fifth Floor,
22 Boston, MA 02110-1301, USA. */
24 #include "config.h"
25 #include "system.h"
26 #include "coretypes.h"
27 #include "tm.h"
28 #include "insn-config.h"
29 #include "rtl.h"
30 #include "tree.h"
31 #include "flags.h"
32 #include "expr.h"
33 #include "optabs.h"
34 #include "function.h"
35 #include "regs.h"
36 #include "hard-reg-set.h"
37 #include "output.h"
38 #include "insn-attr.h"
39 #include "toplev.h"
40 #include "recog.h"
41 #include "c-pragma.h"
42 #include "integrate.h"
43 #include "dwarf2.h"
44 #include "tm_p.h"
45 #include "target.h"
46 #include "target-def.h"
47 #include "real.h"
48 #include "langhooks.h"
49 #include "basic-block.h"
50 #include "cfglayout.h"
51 #include "intl.h"
52 #include "sched-int.h"
53 #include "ggc.h"
54 #include "tree-gimple.h"
55 #include "cfgloop.h"
58 int code_for_indirect_jump_scratch = CODE_FOR_indirect_jump_scratch;
60 #define MSW (TARGET_LITTLE_ENDIAN ? 1 : 0)
61 #define LSW (TARGET_LITTLE_ENDIAN ? 0 : 1)
63 /* These are some macros to abstract register modes. */
64 #define CONST_OK_FOR_ADD(size) \
65 (TARGET_SHMEDIA ? CONST_OK_FOR_I10 (size) : CONST_OK_FOR_I08 (size))
66 #define GEN_MOV (*(TARGET_SHMEDIA64 ? gen_movdi : gen_movsi))
67 #define GEN_ADD3 (*(TARGET_SHMEDIA64 ? gen_adddi3 : gen_addsi3))
68 #define GEN_SUB3 (*(TARGET_SHMEDIA64 ? gen_subdi3 : gen_subsi3))
70 /* Set to 1 by expand_prologue() when the function is an interrupt handler. */
71 int current_function_interrupt;
73 /* ??? The pragma interrupt support will not work for SH3. */
74 /* This is set by #pragma interrupt and #pragma trapa, and causes gcc to
75 output code for the next function appropriate for an interrupt handler. */
76 int pragma_interrupt;
78 /* This is set by the trap_exit attribute for functions. It specifies
79 a trap number to be used in a trapa instruction at function exit
80 (instead of an rte instruction). */
81 int trap_exit;
83 /* This is used by the sp_switch attribute for functions. It specifies
84 a variable holding the address of the stack the interrupt function
85 should switch to/from at entry/exit. */
86 rtx sp_switch;
88 /* This is set by #pragma trapa, and is similar to the above, except that
89 the compiler doesn't emit code to preserve all registers. */
90 static int pragma_trapa;
92 /* This is set by #pragma nosave_low_regs. This is useful on the SH3,
93 which has a separate set of low regs for User and Supervisor modes.
94 This should only be used for the lowest level of interrupts. Higher levels
95 of interrupts must save the registers in case they themselves are
96 interrupted. */
97 int pragma_nosave_low_regs;
99 /* This is used for communication between TARGET_SETUP_INCOMING_VARARGS and
100 sh_expand_prologue. */
101 int current_function_anonymous_args;
103 /* Global variables for machine-dependent things. */
105 /* Which cpu are we scheduling for. */
106 enum processor_type sh_cpu;
108 /* Definitions used in ready queue reordering for first scheduling pass. */
110 /* Reg weights arrays for modes SFmode and SImode, indexed by insn LUID. */
111 static short *regmode_weight[2];
113 /* Total SFmode and SImode weights of scheduled insns. */
114 static int curr_regmode_pressure[2];
116 /* If true, skip cycles for Q -> R movement. */
117 static int skip_cycles = 0;
119 /* Cached value of can_issue_more. This is cached in sh_variable_issue hook
120 and returned from sh_reorder2. */
121 static short cached_can_issue_more;
123 /* Saved operands from the last compare to use when we generate an scc
124 or bcc insn. */
126 rtx sh_compare_op0;
127 rtx sh_compare_op1;
129 /* Provides the class number of the smallest class containing
130 reg number. */
132 enum reg_class regno_reg_class[FIRST_PSEUDO_REGISTER] =
134 R0_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
135 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
136 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
137 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
138 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
139 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
140 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
141 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
142 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
143 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
144 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
145 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
146 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
147 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
148 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
149 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
150 FP0_REGS,FP_REGS, FP_REGS, FP_REGS,
151 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
152 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
153 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
154 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
155 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
156 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
157 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
158 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
159 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
160 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
161 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
162 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
163 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
164 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
165 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
166 TARGET_REGS, TARGET_REGS, TARGET_REGS, TARGET_REGS,
167 TARGET_REGS, TARGET_REGS, TARGET_REGS, TARGET_REGS,
168 DF_REGS, DF_REGS, DF_REGS, DF_REGS,
169 DF_REGS, DF_REGS, DF_REGS, DF_REGS,
170 NO_REGS, GENERAL_REGS, PR_REGS, T_REGS,
171 MAC_REGS, MAC_REGS, FPUL_REGS, FPSCR_REGS,
172 GENERAL_REGS, GENERAL_REGS,
175 char sh_register_names[FIRST_PSEUDO_REGISTER] \
176 [MAX_REGISTER_NAME_LENGTH + 1] = SH_REGISTER_NAMES_INITIALIZER;
178 char sh_additional_register_names[ADDREGNAMES_SIZE] \
179 [MAX_ADDITIONAL_REGISTER_NAME_LENGTH + 1]
180 = SH_ADDITIONAL_REGISTER_NAMES_INITIALIZER;
182 /* Provide reg_class from a letter such as appears in the machine
183 description. *: target independently reserved letter.
184 reg_class_from_letter['e' - 'a'] is set to NO_REGS for TARGET_FMOVD. */
186 enum reg_class reg_class_from_letter[] =
188 /* a */ ALL_REGS, /* b */ TARGET_REGS, /* c */ FPSCR_REGS, /* d */ DF_REGS,
189 /* e */ FP_REGS, /* f */ FP_REGS, /* g **/ NO_REGS, /* h */ NO_REGS,
190 /* i **/ NO_REGS, /* j */ NO_REGS, /* k */ SIBCALL_REGS, /* l */ PR_REGS,
191 /* m **/ NO_REGS, /* n **/ NO_REGS, /* o **/ NO_REGS, /* p **/ NO_REGS,
192 /* q */ NO_REGS, /* r **/ NO_REGS, /* s **/ NO_REGS, /* t */ T_REGS,
193 /* u */ NO_REGS, /* v */ NO_REGS, /* w */ FP0_REGS, /* x */ MAC_REGS,
194 /* y */ FPUL_REGS, /* z */ R0_REGS
197 int assembler_dialect;
199 static bool shmedia_space_reserved_for_target_registers;
201 static bool sh_handle_option (size_t, const char *, int);
202 static void split_branches (rtx);
203 static int branch_dest (rtx);
204 static void force_into (rtx, rtx);
205 static void print_slot (rtx);
206 static rtx add_constant (rtx, enum machine_mode, rtx);
207 static void dump_table (rtx, rtx);
208 static int hi_const (rtx);
209 static int broken_move (rtx);
210 static int mova_p (rtx);
211 static rtx find_barrier (int, rtx, rtx);
212 static int noncall_uses_reg (rtx, rtx, rtx *);
213 static rtx gen_block_redirect (rtx, int, int);
214 static void sh_reorg (void);
215 static void output_stack_adjust (int, rtx, int, HARD_REG_SET *);
216 static rtx frame_insn (rtx);
217 static rtx push (int);
218 static void pop (int);
219 static void push_regs (HARD_REG_SET *, int);
220 static int calc_live_regs (HARD_REG_SET *);
221 static void mark_use (rtx, rtx *);
222 static HOST_WIDE_INT rounded_frame_size (int);
223 static rtx mark_constant_pool_use (rtx);
224 const struct attribute_spec sh_attribute_table[];
225 static tree sh_handle_interrupt_handler_attribute (tree *, tree, tree, int, bool *);
226 static tree sh_handle_sp_switch_attribute (tree *, tree, tree, int, bool *);
227 static tree sh_handle_trap_exit_attribute (tree *, tree, tree, int, bool *);
228 static tree sh_handle_renesas_attribute (tree *, tree, tree, int, bool *);
229 static void sh_output_function_epilogue (FILE *, HOST_WIDE_INT);
230 static void sh_insert_attributes (tree, tree *);
231 static const char *sh_check_pch_target_flags (int);
232 static int sh_adjust_cost (rtx, rtx, rtx, int);
233 static int sh_issue_rate (void);
234 static int sh_dfa_new_cycle (FILE *, int, rtx, int, int, int *sort_p);
235 static short find_set_regmode_weight (rtx, enum machine_mode);
236 static short find_insn_regmode_weight (rtx, enum machine_mode);
237 static void find_regmode_weight (int, enum machine_mode);
238 static void sh_md_init_global (FILE *, int, int);
239 static void sh_md_finish_global (FILE *, int);
240 static int rank_for_reorder (const void *, const void *);
241 static void swap_reorder (rtx *, int);
242 static void ready_reorder (rtx *, int);
243 static short high_pressure (enum machine_mode);
244 static int sh_reorder (FILE *, int, rtx *, int *, int);
245 static int sh_reorder2 (FILE *, int, rtx *, int *, int);
246 static void sh_md_init (FILE *, int, int);
247 static int sh_variable_issue (FILE *, int, rtx, int);
249 static bool sh_function_ok_for_sibcall (tree, tree);
251 static bool sh_cannot_modify_jumps_p (void);
252 static int sh_target_reg_class (void);
253 static bool sh_optimize_target_register_callee_saved (bool);
254 static bool sh_ms_bitfield_layout_p (tree);
256 static void sh_init_builtins (void);
257 static void sh_media_init_builtins (void);
258 static rtx sh_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
259 static void sh_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT, tree);
260 static void sh_file_start (void);
261 static int flow_dependent_p (rtx, rtx);
262 static void flow_dependent_p_1 (rtx, rtx, void *);
263 static int shiftcosts (rtx);
264 static int andcosts (rtx);
265 static int addsubcosts (rtx);
266 static int multcosts (rtx);
267 static bool unspec_caller_rtx_p (rtx);
268 static bool sh_cannot_copy_insn_p (rtx);
269 static bool sh_rtx_costs (rtx, int, int, int *);
270 static int sh_address_cost (rtx);
271 #ifdef TARGET_ADJUST_UNROLL_MAX
272 static int sh_adjust_unroll_max (struct loop *, int, int, int, int);
273 #endif
274 static int sh_pr_n_sets (void);
275 static rtx sh_allocate_initial_value (rtx);
276 static int shmedia_target_regs_stack_space (HARD_REG_SET *);
277 static int shmedia_reserve_space_for_target_registers_p (int, HARD_REG_SET *);
278 static int shmedia_target_regs_stack_adjust (HARD_REG_SET *);
279 static int scavenge_reg (HARD_REG_SET *s);
280 struct save_schedule_s;
281 static struct save_entry_s *sh5_schedule_saves (HARD_REG_SET *,
282 struct save_schedule_s *, int);
284 static rtx sh_struct_value_rtx (tree, int);
285 static bool sh_return_in_memory (tree, tree);
286 static rtx sh_builtin_saveregs (void);
287 static void sh_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode, tree, int *, int);
288 static bool sh_strict_argument_naming (CUMULATIVE_ARGS *);
289 static bool sh_pretend_outgoing_varargs_named (CUMULATIVE_ARGS *);
290 static tree sh_build_builtin_va_list (void);
291 static tree sh_gimplify_va_arg_expr (tree, tree, tree *, tree *);
292 static bool sh_pass_by_reference (CUMULATIVE_ARGS *, enum machine_mode,
293 tree, bool);
294 static bool sh_callee_copies (CUMULATIVE_ARGS *, enum machine_mode,
295 tree, bool);
296 static int sh_arg_partial_bytes (CUMULATIVE_ARGS *, enum machine_mode,
297 tree, bool);
298 static int sh_dwarf_calling_convention (tree);
299 static int hard_regs_intersect_p (HARD_REG_SET *, HARD_REG_SET *);
302 /* Initialize the GCC target structure. */
303 #undef TARGET_ATTRIBUTE_TABLE
304 #define TARGET_ATTRIBUTE_TABLE sh_attribute_table
306 /* The next two are used for debug info when compiling with -gdwarf. */
307 #undef TARGET_ASM_UNALIGNED_HI_OP
308 #define TARGET_ASM_UNALIGNED_HI_OP "\t.uaword\t"
309 #undef TARGET_ASM_UNALIGNED_SI_OP
310 #define TARGET_ASM_UNALIGNED_SI_OP "\t.ualong\t"
312 /* These are NULLed out on non-SH5 in OVERRIDE_OPTIONS. */
313 #undef TARGET_ASM_UNALIGNED_DI_OP
314 #define TARGET_ASM_UNALIGNED_DI_OP "\t.uaquad\t"
315 #undef TARGET_ASM_ALIGNED_DI_OP
316 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
318 #undef TARGET_ASM_FUNCTION_EPILOGUE
319 #define TARGET_ASM_FUNCTION_EPILOGUE sh_output_function_epilogue
321 #undef TARGET_ASM_OUTPUT_MI_THUNK
322 #define TARGET_ASM_OUTPUT_MI_THUNK sh_output_mi_thunk
324 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
325 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_tree_hwi_hwi_tree_true
327 #undef TARGET_ASM_FILE_START
328 #define TARGET_ASM_FILE_START sh_file_start
329 #undef TARGET_ASM_FILE_START_FILE_DIRECTIVE
330 #define TARGET_ASM_FILE_START_FILE_DIRECTIVE true
332 #undef TARGET_DEFAULT_TARGET_FLAGS
333 #define TARGET_DEFAULT_TARGET_FLAGS TARGET_DEFAULT
334 #undef TARGET_HANDLE_OPTION
335 #define TARGET_HANDLE_OPTION sh_handle_option
337 #undef TARGET_INSERT_ATTRIBUTES
338 #define TARGET_INSERT_ATTRIBUTES sh_insert_attributes
340 #undef TARGET_SCHED_ADJUST_COST
341 #define TARGET_SCHED_ADJUST_COST sh_adjust_cost
343 #undef TARGET_SCHED_ISSUE_RATE
344 #define TARGET_SCHED_ISSUE_RATE sh_issue_rate
346 /* The next 5 hooks have been implemented for reenabling sched1. With the
347 help of these macros we are limiting the movement of insns in sched1 to
348 reduce the register pressure. The overall idea is to keep count of SImode
349 and SFmode regs required by already scheduled insns. When these counts
350 cross some threshold values; give priority to insns that free registers.
351 The insn that frees registers is most likely to be the insn with lowest
352 LUID (original insn order); but such an insn might be there in the stalled
353 queue (Q) instead of the ready queue (R). To solve this, we skip cycles
354 upto a max of 8 cycles so that such insns may move from Q -> R.
356 The description of the hooks are as below:
358 TARGET_SCHED_INIT_GLOBAL: Added a new target hook in the generic
359 scheduler; it is called inside the sched_init function just after
360 find_insn_reg_weights function call. It is used to calculate the SImode
361 and SFmode weights of insns of basic blocks; much similar to what
362 find_insn_reg_weights does.
363 TARGET_SCHED_FINISH_GLOBAL: Corresponding cleanup hook.
365 TARGET_SCHED_DFA_NEW_CYCLE: Skip cycles if high register pressure is
366 indicated by TARGET_SCHED_REORDER2; doing this may move insns from
367 (Q)->(R).
369 TARGET_SCHED_REORDER: If the register pressure for SImode or SFmode is
370 high; reorder the ready queue so that the insn with lowest LUID will be
371 issued next.
373 TARGET_SCHED_REORDER2: If the register pressure is high, indicate to
374 TARGET_SCHED_DFA_NEW_CYCLE to skip cycles.
376 TARGET_SCHED_VARIABLE_ISSUE: Cache the value of can_issue_more so that it
377 can be returned from TARGET_SCHED_REORDER2.
379 TARGET_SCHED_INIT: Reset the register pressure counting variables. */
381 #undef TARGET_SCHED_DFA_NEW_CYCLE
382 #define TARGET_SCHED_DFA_NEW_CYCLE sh_dfa_new_cycle
384 #undef TARGET_SCHED_INIT_GLOBAL
385 #define TARGET_SCHED_INIT_GLOBAL sh_md_init_global
387 #undef TARGET_SCHED_FINISH_GLOBAL
388 #define TARGET_SCHED_FINISH_GLOBAL sh_md_finish_global
390 #undef TARGET_SCHED_VARIABLE_ISSUE
391 #define TARGET_SCHED_VARIABLE_ISSUE sh_variable_issue
393 #undef TARGET_SCHED_REORDER
394 #define TARGET_SCHED_REORDER sh_reorder
396 #undef TARGET_SCHED_REORDER2
397 #define TARGET_SCHED_REORDER2 sh_reorder2
399 #undef TARGET_SCHED_INIT
400 #define TARGET_SCHED_INIT sh_md_init
402 #undef TARGET_CANNOT_MODIFY_JUMPS_P
403 #define TARGET_CANNOT_MODIFY_JUMPS_P sh_cannot_modify_jumps_p
404 #undef TARGET_BRANCH_TARGET_REGISTER_CLASS
405 #define TARGET_BRANCH_TARGET_REGISTER_CLASS sh_target_reg_class
406 #undef TARGET_BRANCH_TARGET_REGISTER_CALLEE_SAVED
407 #define TARGET_BRANCH_TARGET_REGISTER_CALLEE_SAVED \
408 sh_optimize_target_register_callee_saved
410 #undef TARGET_MS_BITFIELD_LAYOUT_P
411 #define TARGET_MS_BITFIELD_LAYOUT_P sh_ms_bitfield_layout_p
413 #undef TARGET_INIT_BUILTINS
414 #define TARGET_INIT_BUILTINS sh_init_builtins
415 #undef TARGET_EXPAND_BUILTIN
416 #define TARGET_EXPAND_BUILTIN sh_expand_builtin
418 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
419 #define TARGET_FUNCTION_OK_FOR_SIBCALL sh_function_ok_for_sibcall
421 #undef TARGET_CANNOT_COPY_INSN_P
422 #define TARGET_CANNOT_COPY_INSN_P sh_cannot_copy_insn_p
423 #undef TARGET_RTX_COSTS
424 #define TARGET_RTX_COSTS sh_rtx_costs
425 #undef TARGET_ADDRESS_COST
426 #define TARGET_ADDRESS_COST sh_address_cost
427 #undef TARGET_ALLOCATE_INITIAL_VALUE
428 #define TARGET_ALLOCATE_INITIAL_VALUE sh_allocate_initial_value
430 #undef TARGET_MACHINE_DEPENDENT_REORG
431 #define TARGET_MACHINE_DEPENDENT_REORG sh_reorg
433 #ifdef HAVE_AS_TLS
434 #undef TARGET_HAVE_TLS
435 #define TARGET_HAVE_TLS true
436 #endif
438 #undef TARGET_PROMOTE_PROTOTYPES
439 #define TARGET_PROMOTE_PROTOTYPES sh_promote_prototypes
440 #undef TARGET_PROMOTE_FUNCTION_ARGS
441 #define TARGET_PROMOTE_FUNCTION_ARGS sh_promote_prototypes
442 #undef TARGET_PROMOTE_FUNCTION_RETURN
443 #define TARGET_PROMOTE_FUNCTION_RETURN sh_promote_prototypes
445 #undef TARGET_STRUCT_VALUE_RTX
446 #define TARGET_STRUCT_VALUE_RTX sh_struct_value_rtx
447 #undef TARGET_RETURN_IN_MEMORY
448 #define TARGET_RETURN_IN_MEMORY sh_return_in_memory
450 #undef TARGET_EXPAND_BUILTIN_SAVEREGS
451 #define TARGET_EXPAND_BUILTIN_SAVEREGS sh_builtin_saveregs
452 #undef TARGET_SETUP_INCOMING_VARARGS
453 #define TARGET_SETUP_INCOMING_VARARGS sh_setup_incoming_varargs
454 #undef TARGET_STRICT_ARGUMENT_NAMING
455 #define TARGET_STRICT_ARGUMENT_NAMING sh_strict_argument_naming
456 #undef TARGET_PRETEND_OUTGOING_VARARGS_NAMED
457 #define TARGET_PRETEND_OUTGOING_VARARGS_NAMED sh_pretend_outgoing_varargs_named
458 #undef TARGET_MUST_PASS_IN_STACK
459 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
460 #undef TARGET_PASS_BY_REFERENCE
461 #define TARGET_PASS_BY_REFERENCE sh_pass_by_reference
462 #undef TARGET_CALLEE_COPIES
463 #define TARGET_CALLEE_COPIES sh_callee_copies
464 #undef TARGET_ARG_PARTIAL_BYTES
465 #define TARGET_ARG_PARTIAL_BYTES sh_arg_partial_bytes
467 #undef TARGET_BUILD_BUILTIN_VA_LIST
468 #define TARGET_BUILD_BUILTIN_VA_LIST sh_build_builtin_va_list
469 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
470 #define TARGET_GIMPLIFY_VA_ARG_EXPR sh_gimplify_va_arg_expr
472 #undef TARGET_VECTOR_MODE_SUPPORTED_P
473 #define TARGET_VECTOR_MODE_SUPPORTED_P sh_vector_mode_supported_p
475 #undef TARGET_CHECK_PCH_TARGET_FLAGS
476 #define TARGET_CHECK_PCH_TARGET_FLAGS sh_check_pch_target_flags
478 #undef TARGET_DWARF_CALLING_CONVENTION
479 #define TARGET_DWARF_CALLING_CONVENTION sh_dwarf_calling_convention
481 /* Return regmode weight for insn. */
482 #define INSN_REGMODE_WEIGHT(INSN, MODE) regmode_weight[((MODE) == SImode) ? 0 : 1][INSN_UID (INSN)]
484 /* Return current register pressure for regmode. */
485 #define CURR_REGMODE_PRESSURE(MODE) curr_regmode_pressure[((MODE) == SImode) ? 0 : 1]
487 #ifdef SYMBIAN
489 #undef TARGET_ENCODE_SECTION_INFO
490 #define TARGET_ENCODE_SECTION_INFO sh_symbian_encode_section_info
491 #undef TARGET_STRIP_NAME_ENCODING
492 #define TARGET_STRIP_NAME_ENCODING sh_symbian_strip_name_encoding
493 #undef TARGET_CXX_IMPORT_EXPORT_CLASS
494 #define TARGET_CXX_IMPORT_EXPORT_CLASS symbian_import_export_class
496 #endif /* SYMBIAN */
498 #ifdef TARGET_ADJUST_UNROLL_MAX
499 #undef TARGET_ADJUST_UNROLL_MAX
500 #define TARGET_ADJUST_UNROLL_MAX sh_adjust_unroll_max
501 #endif
503 struct gcc_target targetm = TARGET_INITIALIZER;
505 /* Implement TARGET_HANDLE_OPTION. */
507 static bool
508 sh_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED,
509 int value ATTRIBUTE_UNUSED)
511 switch (code)
513 case OPT_m1:
514 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH1;
515 return true;
517 case OPT_m2:
518 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2;
519 return true;
521 case OPT_m2a:
522 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2A;
523 return true;
525 case OPT_m2a_nofpu:
526 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2A_NOFPU;
527 return true;
529 case OPT_m2a_single:
530 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2A_SINGLE;
531 return true;
533 case OPT_m2a_single_only:
534 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2A_SINGLE_ONLY;
535 return true;
537 case OPT_m2e:
538 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2E;
539 return true;
541 case OPT_m3:
542 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH3;
543 return true;
545 case OPT_m3e:
546 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH3E;
547 return true;
549 case OPT_m4:
550 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4;
551 return true;
553 case OPT_m4_nofpu:
554 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4_NOFPU;
555 return true;
557 case OPT_m4_single:
558 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4_SINGLE;
559 return true;
561 case OPT_m4_single_only:
562 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4_SINGLE_ONLY;
563 return true;
565 case OPT_m4a:
566 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4A;
567 return true;
569 case OPT_m4a_nofpu:
570 case OPT_m4al:
571 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4A_NOFPU;
572 return true;
574 case OPT_m4a_single:
575 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4A_SINGLE;
576 return true;
578 case OPT_m4a_single_only:
579 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4A_SINGLE_ONLY;
580 return true;
582 case OPT_m5_32media:
583 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_32MEDIA;
584 return true;
586 case OPT_m5_32media_nofpu:
587 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_32MEDIA_NOFPU;
588 return true;
590 case OPT_m5_64media:
591 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_64MEDIA;
592 return true;
594 case OPT_m5_64media_nofpu:
595 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_64MEDIA_NOFPU;
596 return true;
598 case OPT_m5_compact:
599 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_COMPACT;
600 return true;
602 case OPT_m5_compact_nofpu:
603 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_COMPACT_NOFPU;
604 return true;
606 default:
607 return true;
611 /* Print the operand address in x to the stream. */
613 void
614 print_operand_address (FILE *stream, rtx x)
616 switch (GET_CODE (x))
618 case REG:
619 case SUBREG:
620 fprintf (stream, "@%s", reg_names[true_regnum (x)]);
621 break;
623 case PLUS:
625 rtx base = XEXP (x, 0);
626 rtx index = XEXP (x, 1);
628 switch (GET_CODE (index))
630 case CONST_INT:
631 fprintf (stream, "@(%d,%s)", (int) INTVAL (index),
632 reg_names[true_regnum (base)]);
633 break;
635 case REG:
636 case SUBREG:
638 int base_num = true_regnum (base);
639 int index_num = true_regnum (index);
641 fprintf (stream, "@(r0,%s)",
642 reg_names[MAX (base_num, index_num)]);
643 break;
646 default:
647 gcc_unreachable ();
650 break;
652 case PRE_DEC:
653 fprintf (stream, "@-%s", reg_names[true_regnum (XEXP (x, 0))]);
654 break;
656 case POST_INC:
657 fprintf (stream, "@%s+", reg_names[true_regnum (XEXP (x, 0))]);
658 break;
660 default:
661 x = mark_constant_pool_use (x);
662 output_addr_const (stream, x);
663 break;
667 /* Print operand x (an rtx) in assembler syntax to file stream
668 according to modifier code.
670 '.' print a .s if insn needs delay slot
671 ',' print LOCAL_LABEL_PREFIX
672 '@' print trap, rte or rts depending upon pragma interruptness
673 '#' output a nop if there is nothing to put in the delay slot
674 ''' print likelihood suffix (/u for unlikely).
675 '>' print branch target if -fverbose-asm
676 'O' print a constant without the #
677 'R' print the LSW of a dp value - changes if in little endian
678 'S' print the MSW of a dp value - changes if in little endian
679 'T' print the next word of a dp value - same as 'R' in big endian mode.
680 'M' print an `x' if `m' will print `base,index'.
681 'N' print 'r63' if the operand is (const_int 0).
682 'd' print a V2SF reg as dN instead of fpN.
683 'm' print a pair `base,offset' or `base,index', for LD and ST.
684 'U' Likewise for {LD,ST}{HI,LO}.
685 'u' prints the lowest 16 bits of CONST_INT, as an unsigned value.
686 'o' output an operator. */
688 void
689 print_operand (FILE *stream, rtx x, int code)
691 int regno;
692 enum machine_mode mode;
694 switch (code)
696 case '.':
697 if (final_sequence
698 && ! INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))
699 && get_attr_length (XVECEXP (final_sequence, 0, 1)))
700 fprintf (stream, ASSEMBLER_DIALECT ? "/s" : ".s");
701 break;
702 case ',':
703 fprintf (stream, "%s", LOCAL_LABEL_PREFIX);
704 break;
705 case '@':
706 if (trap_exit)
707 fprintf (stream, "trapa #%d", trap_exit);
708 else if (sh_cfun_interrupt_handler_p ())
709 fprintf (stream, "rte");
710 else
711 fprintf (stream, "rts");
712 break;
713 case '#':
714 /* Output a nop if there's nothing in the delay slot. */
715 if (dbr_sequence_length () == 0)
716 fprintf (stream, "\n\tnop");
717 break;
718 case '\'':
720 rtx note = find_reg_note (current_output_insn, REG_BR_PROB, 0);
722 if (note && INTVAL (XEXP (note, 0)) * 2 < REG_BR_PROB_BASE)
723 fputs ("/u", stream);
724 break;
726 case '>':
727 if (flag_verbose_asm && JUMP_LABEL (current_output_insn))
729 fputs ("\t! target: ", stream);
730 output_addr_const (stream, JUMP_LABEL (current_output_insn));
732 break;
733 case 'O':
734 x = mark_constant_pool_use (x);
735 output_addr_const (stream, x);
736 break;
737 case 'R':
738 fputs (reg_names[REGNO (x) + LSW], (stream));
739 break;
740 case 'S':
741 fputs (reg_names[REGNO (x) + MSW], (stream));
742 break;
743 case 'T':
744 /* Next word of a double. */
745 switch (GET_CODE (x))
747 case REG:
748 fputs (reg_names[REGNO (x) + 1], (stream));
749 break;
750 case MEM:
751 if (GET_CODE (XEXP (x, 0)) != PRE_DEC
752 && GET_CODE (XEXP (x, 0)) != POST_INC)
753 x = adjust_address (x, SImode, 4);
754 print_operand_address (stream, XEXP (x, 0));
755 break;
756 default:
757 break;
759 break;
760 case 'o':
761 switch (GET_CODE (x))
763 case PLUS: fputs ("add", stream); break;
764 case MINUS: fputs ("sub", stream); break;
765 case MULT: fputs ("mul", stream); break;
766 case DIV: fputs ("div", stream); break;
767 case EQ: fputs ("eq", stream); break;
768 case NE: fputs ("ne", stream); break;
769 case GT: case LT: fputs ("gt", stream); break;
770 case GE: case LE: fputs ("ge", stream); break;
771 case GTU: case LTU: fputs ("gtu", stream); break;
772 case GEU: case LEU: fputs ("geu", stream); break;
773 default:
774 break;
776 break;
777 case 'M':
778 if (GET_CODE (x) == MEM
779 && GET_CODE (XEXP (x, 0)) == PLUS
780 && (GET_CODE (XEXP (XEXP (x, 0), 1)) == REG
781 || GET_CODE (XEXP (XEXP (x, 0), 1)) == SUBREG))
782 fputc ('x', stream);
783 break;
785 case 'm':
786 gcc_assert (GET_CODE (x) == MEM);
787 x = XEXP (x, 0);
788 /* Fall through. */
789 case 'U':
790 switch (GET_CODE (x))
792 case REG:
793 case SUBREG:
794 print_operand (stream, x, 0);
795 fputs (", 0", stream);
796 break;
798 case PLUS:
799 print_operand (stream, XEXP (x, 0), 0);
800 fputs (", ", stream);
801 print_operand (stream, XEXP (x, 1), 0);
802 break;
804 default:
805 gcc_unreachable ();
807 break;
809 case 'd':
810 gcc_assert (GET_CODE (x) == REG && GET_MODE (x) == V2SFmode);
812 fprintf ((stream), "d%s", reg_names[REGNO (x)] + 1);
813 break;
815 case 'N':
816 if (x == CONST0_RTX (GET_MODE (x)))
818 fprintf ((stream), "r63");
819 break;
821 goto default_output;
822 case 'u':
823 if (GET_CODE (x) == CONST_INT)
825 fprintf ((stream), "%u", (unsigned) INTVAL (x) & (0x10000 - 1));
826 break;
828 /* Fall through. */
830 default_output:
831 default:
832 regno = 0;
833 mode = GET_MODE (x);
835 switch (GET_CODE (x))
837 case TRUNCATE:
839 rtx inner = XEXP (x, 0);
840 int offset = 0;
841 enum machine_mode inner_mode;
843 /* We might see SUBREGs with vector mode registers inside. */
844 if (GET_CODE (inner) == SUBREG
845 && (GET_MODE_SIZE (GET_MODE (inner))
846 == GET_MODE_SIZE (GET_MODE (SUBREG_REG (inner))))
847 && subreg_lowpart_p (inner))
848 inner = SUBREG_REG (inner);
849 if (GET_CODE (inner) == CONST_INT)
851 x = GEN_INT (trunc_int_for_mode (INTVAL (inner), GET_MODE (x)));
852 goto default_output;
854 inner_mode = GET_MODE (inner);
855 if (GET_CODE (inner) == SUBREG
856 && (GET_MODE_SIZE (GET_MODE (inner))
857 < GET_MODE_SIZE (GET_MODE (SUBREG_REG (inner))))
858 && GET_CODE (SUBREG_REG (inner)) == REG)
860 offset = subreg_regno_offset (REGNO (SUBREG_REG (inner)),
861 GET_MODE (SUBREG_REG (inner)),
862 SUBREG_BYTE (inner),
863 GET_MODE (inner));
864 inner = SUBREG_REG (inner);
866 if (GET_CODE (inner) != REG || GET_MODE_SIZE (inner_mode) > 8)
867 abort ();
868 /* Floating point register pairs are always big endian;
869 general purpose registers are 64 bit wide. */
870 regno = REGNO (inner);
871 regno = (HARD_REGNO_NREGS (regno, inner_mode)
872 - HARD_REGNO_NREGS (regno, mode))
873 + offset;
874 x = inner;
875 goto reg;
877 case SIGN_EXTEND:
878 x = XEXP (x, 0);
879 goto reg;
880 /* FIXME: We need this on SHmedia32 because reload generates
881 some sign-extended HI or QI loads into DImode registers
882 but, because Pmode is SImode, the address ends up with a
883 subreg:SI of the DImode register. Maybe reload should be
884 fixed so as to apply alter_subreg to such loads? */
885 case IF_THEN_ELSE:
886 gcc_assert (trapping_target_operand (x, VOIDmode));
887 x = XEXP (XEXP (x, 2), 0);
888 goto default_output;
889 case SUBREG:
890 gcc_assert (SUBREG_BYTE (x) == 0
891 && GET_CODE (SUBREG_REG (x)) == REG);
893 x = SUBREG_REG (x);
894 /* Fall through. */
896 reg:
897 case REG:
898 regno += REGNO (x);
899 if (FP_REGISTER_P (regno)
900 && mode == V16SFmode)
901 fprintf ((stream), "mtrx%s", reg_names[regno] + 2);
902 else if (FP_REGISTER_P (REGNO (x))
903 && mode == V4SFmode)
904 fprintf ((stream), "fv%s", reg_names[regno] + 2);
905 else if (GET_CODE (x) == REG
906 && mode == V2SFmode)
907 fprintf ((stream), "fp%s", reg_names[regno] + 2);
908 else if (FP_REGISTER_P (REGNO (x))
909 && GET_MODE_SIZE (mode) > 4)
910 fprintf ((stream), "d%s", reg_names[regno] + 1);
911 else
912 fputs (reg_names[regno], (stream));
913 break;
915 case MEM:
916 output_address (XEXP (x, 0));
917 break;
919 case CONST:
920 if (TARGET_SHMEDIA
921 && GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
922 && (GET_MODE (XEXP (x, 0)) == DImode
923 || GET_MODE (XEXP (x, 0)) == SImode)
924 && GET_CODE (XEXP (XEXP (x, 0), 0)) == TRUNCATE
925 && GET_MODE (XEXP (XEXP (x, 0), 0)) == HImode)
927 rtx val = XEXP (XEXP (XEXP (x, 0), 0), 0);
929 fputc ('(', stream);
930 if (GET_CODE (val) == ASHIFTRT)
932 fputc ('(', stream);
933 if (GET_CODE (XEXP (val, 0)) == CONST)
934 fputc ('(', stream);
935 output_addr_const (stream, XEXP (val, 0));
936 if (GET_CODE (XEXP (val, 0)) == CONST)
937 fputc (')', stream);
938 fputs (" >> ", stream);
939 output_addr_const (stream, XEXP (val, 1));
940 fputc (')', stream);
942 else
944 if (GET_CODE (val) == CONST)
945 fputc ('(', stream);
946 output_addr_const (stream, val);
947 if (GET_CODE (val) == CONST)
948 fputc (')', stream);
950 fputs (" & 65535)", stream);
951 break;
954 /* Fall through. */
955 default:
956 if (TARGET_SH1)
957 fputc ('#', stream);
958 output_addr_const (stream, x);
959 break;
961 break;
965 /* Like force_operand, but guarantees that VALUE ends up in TARGET. */
966 static void
967 force_into (rtx value, rtx target)
969 value = force_operand (value, target);
970 if (! rtx_equal_p (value, target))
971 emit_insn (gen_move_insn (target, value));
974 /* Emit code to perform a block move. Choose the best method.
976 OPERANDS[0] is the destination.
977 OPERANDS[1] is the source.
978 OPERANDS[2] is the size.
979 OPERANDS[3] is the alignment safe to use. */
982 expand_block_move (rtx *operands)
984 int align = INTVAL (operands[3]);
985 int constp = (GET_CODE (operands[2]) == CONST_INT);
986 int bytes = (constp ? INTVAL (operands[2]) : 0);
988 if (! constp)
989 return 0;
991 /* If we could use mov.l to move words and dest is word-aligned, we
992 can use movua.l for loads and still generate a relatively short
993 and efficient sequence. */
994 if (TARGET_SH4A_ARCH && align < 4
995 && MEM_ALIGN (operands[0]) >= 32
996 && can_move_by_pieces (bytes, 32))
998 rtx dest = copy_rtx (operands[0]);
999 rtx src = copy_rtx (operands[1]);
1000 /* We could use different pseudos for each copied word, but
1001 since movua can only load into r0, it's kind of
1002 pointless. */
1003 rtx temp = gen_reg_rtx (SImode);
1004 rtx src_addr = copy_addr_to_reg (XEXP (src, 0));
1005 int copied = 0;
1007 while (copied + 4 <= bytes)
1009 rtx to = adjust_address (dest, SImode, copied);
1010 rtx from = adjust_automodify_address (src, SImode, src_addr, copied);
1012 emit_insn (gen_movua (temp, from));
1013 emit_move_insn (src_addr, plus_constant (src_addr, 4));
1014 emit_move_insn (to, temp);
1015 copied += 4;
1018 if (copied < bytes)
1019 move_by_pieces (adjust_address (dest, BLKmode, copied),
1020 adjust_automodify_address (src, BLKmode,
1021 src_addr, copied),
1022 bytes - copied, align, 0);
1024 return 1;
1027 /* If it isn't a constant number of bytes, or if it doesn't have 4 byte
1028 alignment, or if it isn't a multiple of 4 bytes, then fail. */
1029 if (align < 4 || (bytes % 4 != 0))
1030 return 0;
1032 if (TARGET_HARD_SH4)
1034 if (bytes < 12)
1035 return 0;
1036 else if (bytes == 12)
1038 rtx func_addr_rtx = gen_reg_rtx (Pmode);
1039 rtx r4 = gen_rtx_REG (SImode, 4);
1040 rtx r5 = gen_rtx_REG (SImode, 5);
1042 function_symbol (func_addr_rtx, "__movmemSI12_i4", SFUNC_STATIC);
1043 force_into (XEXP (operands[0], 0), r4);
1044 force_into (XEXP (operands[1], 0), r5);
1045 emit_insn (gen_block_move_real_i4 (func_addr_rtx));
1046 return 1;
1048 else if (! TARGET_SMALLCODE)
1050 const char *entry_name;
1051 rtx func_addr_rtx = gen_reg_rtx (Pmode);
1052 int dwords;
1053 rtx r4 = gen_rtx_REG (SImode, 4);
1054 rtx r5 = gen_rtx_REG (SImode, 5);
1055 rtx r6 = gen_rtx_REG (SImode, 6);
1057 entry_name = (bytes & 4 ? "__movmem_i4_odd" : "__movmem_i4_even");
1058 function_symbol (func_addr_rtx, entry_name, SFUNC_STATIC);
1059 force_into (XEXP (operands[0], 0), r4);
1060 force_into (XEXP (operands[1], 0), r5);
1062 dwords = bytes >> 3;
1063 emit_insn (gen_move_insn (r6, GEN_INT (dwords - 1)));
1064 emit_insn (gen_block_lump_real_i4 (func_addr_rtx));
1065 return 1;
1067 else
1068 return 0;
1070 if (bytes < 64)
1072 char entry[30];
1073 rtx func_addr_rtx = gen_reg_rtx (Pmode);
1074 rtx r4 = gen_rtx_REG (SImode, 4);
1075 rtx r5 = gen_rtx_REG (SImode, 5);
1077 sprintf (entry, "__movmemSI%d", bytes);
1078 function_symbol (func_addr_rtx, entry, SFUNC_STATIC);
1079 force_into (XEXP (operands[0], 0), r4);
1080 force_into (XEXP (operands[1], 0), r5);
1081 emit_insn (gen_block_move_real (func_addr_rtx));
1082 return 1;
1085 /* This is the same number of bytes as a memcpy call, but to a different
1086 less common function name, so this will occasionally use more space. */
1087 if (! TARGET_SMALLCODE)
1089 rtx func_addr_rtx = gen_reg_rtx (Pmode);
1090 int final_switch, while_loop;
1091 rtx r4 = gen_rtx_REG (SImode, 4);
1092 rtx r5 = gen_rtx_REG (SImode, 5);
1093 rtx r6 = gen_rtx_REG (SImode, 6);
1095 function_symbol (func_addr_rtx, "__movmem", SFUNC_STATIC);
1096 force_into (XEXP (operands[0], 0), r4);
1097 force_into (XEXP (operands[1], 0), r5);
1099 /* r6 controls the size of the move. 16 is decremented from it
1100 for each 64 bytes moved. Then the negative bit left over is used
1101 as an index into a list of move instructions. e.g., a 72 byte move
1102 would be set up with size(r6) = 14, for one iteration through the
1103 big while loop, and a switch of -2 for the last part. */
1105 final_switch = 16 - ((bytes / 4) % 16);
1106 while_loop = ((bytes / 4) / 16 - 1) * 16;
1107 emit_insn (gen_move_insn (r6, GEN_INT (while_loop + final_switch)));
1108 emit_insn (gen_block_lump_real (func_addr_rtx));
1109 return 1;
1112 return 0;
1115 /* Prepare operands for a move define_expand; specifically, one of the
1116 operands must be in a register. */
1119 prepare_move_operands (rtx operands[], enum machine_mode mode)
1121 if ((mode == SImode || mode == DImode)
1122 && flag_pic
1123 && ! ((mode == Pmode || mode == ptr_mode)
1124 && tls_symbolic_operand (operands[1], Pmode) != 0))
1126 rtx temp;
1127 if (SYMBOLIC_CONST_P (operands[1]))
1129 if (GET_CODE (operands[0]) == MEM)
1130 operands[1] = force_reg (Pmode, operands[1]);
1131 else if (TARGET_SHMEDIA
1132 && GET_CODE (operands[1]) == LABEL_REF
1133 && target_reg_operand (operands[0], mode))
1134 /* It's ok. */;
1135 else
1137 temp = no_new_pseudos ? operands[0] : gen_reg_rtx (Pmode);
1138 operands[1] = legitimize_pic_address (operands[1], mode, temp);
1141 else if (GET_CODE (operands[1]) == CONST
1142 && GET_CODE (XEXP (operands[1], 0)) == PLUS
1143 && SYMBOLIC_CONST_P (XEXP (XEXP (operands[1], 0), 0)))
1145 temp = no_new_pseudos ? operands[0] : gen_reg_rtx (Pmode);
1146 temp = legitimize_pic_address (XEXP (XEXP (operands[1], 0), 0),
1147 mode, temp);
1148 operands[1] = expand_binop (mode, add_optab, temp,
1149 XEXP (XEXP (operands[1], 0), 1),
1150 no_new_pseudos ? temp
1151 : gen_reg_rtx (Pmode),
1152 0, OPTAB_LIB_WIDEN);
1156 if (! reload_in_progress && ! reload_completed)
1158 /* Copy the source to a register if both operands aren't registers. */
1159 if (! register_operand (operands[0], mode)
1160 && ! sh_register_operand (operands[1], mode))
1161 operands[1] = copy_to_mode_reg (mode, operands[1]);
1163 if (GET_CODE (operands[0]) == MEM && ! memory_operand (operands[0], mode))
1165 /* This is like change_address_1 (operands[0], mode, 0, 1) ,
1166 except that we can't use that function because it is static. */
1167 rtx new = change_address (operands[0], mode, 0);
1168 MEM_COPY_ATTRIBUTES (new, operands[0]);
1169 operands[0] = new;
1172 /* This case can happen while generating code to move the result
1173 of a library call to the target. Reject `st r0,@(rX,rY)' because
1174 reload will fail to find a spill register for rX, since r0 is already
1175 being used for the source. */
1176 else if (TARGET_SH1
1177 && refers_to_regno_p (R0_REG, R0_REG + 1, operands[1], (rtx *)0)
1178 && GET_CODE (operands[0]) == MEM
1179 && GET_CODE (XEXP (operands[0], 0)) == PLUS
1180 && GET_CODE (XEXP (XEXP (operands[0], 0), 1)) == REG)
1181 operands[1] = copy_to_mode_reg (mode, operands[1]);
1184 if (mode == Pmode || mode == ptr_mode)
1186 rtx op0, op1;
1187 enum tls_model tls_kind;
1189 op0 = operands[0];
1190 op1 = operands[1];
1191 if ((tls_kind = tls_symbolic_operand (op1, Pmode)))
1193 rtx tga_op1, tga_ret, tmp, tmp2;
1195 switch (tls_kind)
1197 case TLS_MODEL_GLOBAL_DYNAMIC:
1198 tga_ret = gen_rtx_REG (Pmode, R0_REG);
1199 emit_call_insn (gen_tls_global_dynamic (tga_ret, op1));
1200 op1 = tga_ret;
1201 break;
1203 case TLS_MODEL_LOCAL_DYNAMIC:
1204 tga_ret = gen_rtx_REG (Pmode, R0_REG);
1205 emit_call_insn (gen_tls_local_dynamic (tga_ret, op1));
1207 tmp = gen_reg_rtx (Pmode);
1208 emit_move_insn (tmp, tga_ret);
1210 if (register_operand (op0, Pmode))
1211 tmp2 = op0;
1212 else
1213 tmp2 = gen_reg_rtx (Pmode);
1215 emit_insn (gen_symDTPOFF2reg (tmp2, op1, tmp));
1216 op1 = tmp2;
1217 break;
1219 case TLS_MODEL_INITIAL_EXEC:
1220 if (! flag_pic)
1222 /* Don't schedule insns for getting GOT address when
1223 the first scheduling is enabled, to avoid spill
1224 failures for R0. */
1225 if (flag_schedule_insns)
1226 emit_insn (gen_blockage ());
1227 emit_insn (gen_GOTaddr2picreg ());
1228 emit_insn (gen_rtx_USE (VOIDmode, gen_rtx_REG (SImode,
1229 PIC_REG)));
1230 if (flag_schedule_insns)
1231 emit_insn (gen_blockage ());
1233 tga_op1 = no_new_pseudos ? op0 : gen_reg_rtx (Pmode);
1234 tmp = gen_sym2GOTTPOFF (op1);
1235 emit_insn (gen_tls_initial_exec (tga_op1, tmp));
1236 op1 = tga_op1;
1237 break;
1239 case TLS_MODEL_LOCAL_EXEC:
1240 tmp2 = gen_reg_rtx (Pmode);
1241 emit_insn (gen_load_gbr (tmp2));
1242 tmp = gen_reg_rtx (Pmode);
1243 emit_insn (gen_symTPOFF2reg (tmp, op1));
1245 if (register_operand (op0, Pmode))
1246 op1 = op0;
1247 else
1248 op1 = gen_reg_rtx (Pmode);
1250 emit_insn (gen_addsi3 (op1, tmp, tmp2));
1251 break;
1253 default:
1254 gcc_unreachable ();
1256 operands[1] = op1;
1260 return 0;
1263 /* Prepare the operands for an scc instruction; make sure that the
1264 compare has been done. */
1266 prepare_scc_operands (enum rtx_code code)
1268 rtx t_reg = gen_rtx_REG (SImode, T_REG);
1269 enum rtx_code oldcode = code;
1270 enum machine_mode mode;
1272 /* First need a compare insn. */
1273 switch (code)
1275 case NE:
1276 /* It isn't possible to handle this case. */
1277 gcc_unreachable ();
1278 case LT:
1279 code = GT;
1280 break;
1281 case LE:
1282 code = GE;
1283 break;
1284 case LTU:
1285 code = GTU;
1286 break;
1287 case LEU:
1288 code = GEU;
1289 break;
1290 default:
1291 break;
1293 if (code != oldcode)
1295 rtx tmp = sh_compare_op0;
1296 sh_compare_op0 = sh_compare_op1;
1297 sh_compare_op1 = tmp;
1300 mode = GET_MODE (sh_compare_op0);
1301 if (mode == VOIDmode)
1302 mode = GET_MODE (sh_compare_op1);
1304 sh_compare_op0 = force_reg (mode, sh_compare_op0);
1305 if ((code != EQ && code != NE
1306 && (sh_compare_op1 != const0_rtx
1307 || code == GTU || code == GEU || code == LTU || code == LEU))
1308 || (mode == DImode && sh_compare_op1 != const0_rtx)
1309 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
1310 sh_compare_op1 = force_reg (mode, sh_compare_op1);
1312 if ((TARGET_SH4 || TARGET_SH2A) && GET_MODE_CLASS (mode) == MODE_FLOAT)
1313 (mode == SFmode ? emit_sf_insn : emit_df_insn)
1314 (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2,
1315 gen_rtx_SET (VOIDmode, t_reg,
1316 gen_rtx_fmt_ee (code, SImode,
1317 sh_compare_op0, sh_compare_op1)),
1318 gen_rtx_USE (VOIDmode, get_fpscr_rtx ()))));
1319 else
1320 emit_insn (gen_rtx_SET (VOIDmode, t_reg,
1321 gen_rtx_fmt_ee (code, SImode,
1322 sh_compare_op0, sh_compare_op1)));
1324 return t_reg;
1327 /* Called from the md file, set up the operands of a compare instruction. */
1329 void
1330 from_compare (rtx *operands, int code)
1332 enum machine_mode mode = GET_MODE (sh_compare_op0);
1333 rtx insn;
1334 if (mode == VOIDmode)
1335 mode = GET_MODE (sh_compare_op1);
1336 if (code != EQ
1337 || mode == DImode
1338 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
1340 /* Force args into regs, since we can't use constants here. */
1341 sh_compare_op0 = force_reg (mode, sh_compare_op0);
1342 if (sh_compare_op1 != const0_rtx
1343 || code == GTU || code == GEU
1344 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
1345 sh_compare_op1 = force_reg (mode, sh_compare_op1);
1347 if (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT && code == GE)
1349 from_compare (operands, GT);
1350 insn = gen_ieee_ccmpeqsf_t (sh_compare_op0, sh_compare_op1);
1352 else
1353 insn = gen_rtx_SET (VOIDmode,
1354 gen_rtx_REG (SImode, T_REG),
1355 gen_rtx_fmt_ee (code, SImode,
1356 sh_compare_op0, sh_compare_op1));
1357 if ((TARGET_SH4 || TARGET_SH2A) && GET_MODE_CLASS (mode) == MODE_FLOAT)
1359 insn = gen_rtx_PARALLEL (VOIDmode,
1360 gen_rtvec (2, insn,
1361 gen_rtx_USE (VOIDmode, get_fpscr_rtx ())));
1362 (mode == SFmode ? emit_sf_insn : emit_df_insn) (insn);
1364 else
1365 emit_insn (insn);
1368 /* Functions to output assembly code. */
1370 /* Return a sequence of instructions to perform DI or DF move.
1372 Since the SH cannot move a DI or DF in one instruction, we have
1373 to take care when we see overlapping source and dest registers. */
1375 const char *
1376 output_movedouble (rtx insn ATTRIBUTE_UNUSED, rtx operands[],
1377 enum machine_mode mode)
1379 rtx dst = operands[0];
1380 rtx src = operands[1];
1382 if (GET_CODE (dst) == MEM
1383 && GET_CODE (XEXP (dst, 0)) == PRE_DEC)
1384 return "mov.l %T1,%0\n\tmov.l %1,%0";
1386 if (register_operand (dst, mode)
1387 && register_operand (src, mode))
1389 if (REGNO (src) == MACH_REG)
1390 return "sts mach,%S0\n\tsts macl,%R0";
1392 /* When mov.d r1,r2 do r2->r3 then r1->r2;
1393 when mov.d r1,r0 do r1->r0 then r2->r1. */
1395 if (REGNO (src) + 1 == REGNO (dst))
1396 return "mov %T1,%T0\n\tmov %1,%0";
1397 else
1398 return "mov %1,%0\n\tmov %T1,%T0";
1400 else if (GET_CODE (src) == CONST_INT)
1402 if (INTVAL (src) < 0)
1403 output_asm_insn ("mov #-1,%S0", operands);
1404 else
1405 output_asm_insn ("mov #0,%S0", operands);
1407 return "mov %1,%R0";
1409 else if (GET_CODE (src) == MEM)
1411 int ptrreg = -1;
1412 int dreg = REGNO (dst);
1413 rtx inside = XEXP (src, 0);
1415 switch (GET_CODE (inside))
1417 case REG:
1418 ptrreg = REGNO (inside);
1419 break;
1421 case SUBREG:
1422 ptrreg = subreg_regno (inside);
1423 break;
1425 case PLUS:
1426 ptrreg = REGNO (XEXP (inside, 0));
1427 /* ??? A r0+REG address shouldn't be possible here, because it isn't
1428 an offsettable address. Unfortunately, offsettable addresses use
1429 QImode to check the offset, and a QImode offsettable address
1430 requires r0 for the other operand, which is not currently
1431 supported, so we can't use the 'o' constraint.
1432 Thus we must check for and handle r0+REG addresses here.
1433 We punt for now, since this is likely very rare. */
1434 gcc_assert (GET_CODE (XEXP (inside, 1)) != REG);
1435 break;
1437 case LABEL_REF:
1438 return "mov.l %1,%0\n\tmov.l %1+4,%T0";
1439 case POST_INC:
1440 return "mov.l %1,%0\n\tmov.l %1,%T0";
1441 default:
1442 gcc_unreachable ();
1445 /* Work out the safe way to copy. Copy into the second half first. */
1446 if (dreg == ptrreg)
1447 return "mov.l %T1,%T0\n\tmov.l %1,%0";
1450 return "mov.l %1,%0\n\tmov.l %T1,%T0";
1453 /* Print an instruction which would have gone into a delay slot after
1454 another instruction, but couldn't because the other instruction expanded
1455 into a sequence where putting the slot insn at the end wouldn't work. */
1457 static void
1458 print_slot (rtx insn)
1460 final_scan_insn (XVECEXP (insn, 0, 1), asm_out_file, optimize, 1, NULL);
1462 INSN_DELETED_P (XVECEXP (insn, 0, 1)) = 1;
1465 const char *
1466 output_far_jump (rtx insn, rtx op)
1468 struct { rtx lab, reg, op; } this;
1469 rtx braf_base_lab = NULL_RTX;
1470 const char *jump;
1471 int far;
1472 int offset = branch_dest (insn) - INSN_ADDRESSES (INSN_UID (insn));
1473 rtx prev;
1475 this.lab = gen_label_rtx ();
1477 if (TARGET_SH2
1478 && offset >= -32764
1479 && offset - get_attr_length (insn) <= 32766)
1481 far = 0;
1482 jump = "mov.w %O0,%1; braf %1";
1484 else
1486 far = 1;
1487 if (flag_pic)
1489 if (TARGET_SH2)
1490 jump = "mov.l %O0,%1; braf %1";
1491 else
1492 jump = "mov.l r0,@-r15; mova %O0,r0; mov.l @r0,%1; add r0,%1; mov.l @r15+,r0; jmp @%1";
1494 else
1495 jump = "mov.l %O0,%1; jmp @%1";
1497 /* If we have a scratch register available, use it. */
1498 if (GET_CODE ((prev = prev_nonnote_insn (insn))) == INSN
1499 && INSN_CODE (prev) == CODE_FOR_indirect_jump_scratch)
1501 this.reg = SET_DEST (XVECEXP (PATTERN (prev), 0, 0));
1502 if (REGNO (this.reg) == R0_REG && flag_pic && ! TARGET_SH2)
1503 jump = "mov.l r1,@-r15; mova %O0,r0; mov.l @r0,r1; add r1,r0; mov.l @r15+,r1; jmp @%1";
1504 output_asm_insn (jump, &this.lab);
1505 if (dbr_sequence_length ())
1506 print_slot (final_sequence);
1507 else
1508 output_asm_insn ("nop", 0);
1510 else
1512 /* Output the delay slot insn first if any. */
1513 if (dbr_sequence_length ())
1514 print_slot (final_sequence);
1516 this.reg = gen_rtx_REG (SImode, 13);
1517 /* We must keep the stack aligned to 8-byte boundaries on SH5.
1518 Fortunately, MACL is fixed and call-clobbered, and we never
1519 need its value across jumps, so save r13 in it instead of in
1520 the stack. */
1521 if (TARGET_SH5)
1522 output_asm_insn ("lds r13, macl", 0);
1523 else
1524 output_asm_insn ("mov.l r13,@-r15", 0);
1525 output_asm_insn (jump, &this.lab);
1526 if (TARGET_SH5)
1527 output_asm_insn ("sts macl, r13", 0);
1528 else
1529 output_asm_insn ("mov.l @r15+,r13", 0);
1531 if (far && flag_pic && TARGET_SH2)
1533 braf_base_lab = gen_label_rtx ();
1534 (*targetm.asm_out.internal_label) (asm_out_file, "L",
1535 CODE_LABEL_NUMBER (braf_base_lab));
1537 if (far)
1538 output_asm_insn (".align 2", 0);
1539 (*targetm.asm_out.internal_label) (asm_out_file, "L", CODE_LABEL_NUMBER (this.lab));
1540 this.op = op;
1541 if (far && flag_pic)
1543 if (TARGET_SH2)
1544 this.lab = braf_base_lab;
1545 output_asm_insn (".long %O2-%O0", &this.lab);
1547 else
1548 output_asm_insn (far ? ".long %O2" : ".word %O2-%O0", &this.lab);
1549 return "";
1552 /* Local label counter, used for constants in the pool and inside
1553 pattern branches. */
1555 static int lf = 100;
1557 /* Output code for ordinary branches. */
1559 const char *
1560 output_branch (int logic, rtx insn, rtx *operands)
1562 switch (get_attr_length (insn))
1564 case 6:
1565 /* This can happen if filling the delay slot has caused a forward
1566 branch to exceed its range (we could reverse it, but only
1567 when we know we won't overextend other branches; this should
1568 best be handled by relaxation).
1569 It can also happen when other condbranches hoist delay slot insn
1570 from their destination, thus leading to code size increase.
1571 But the branch will still be in the range -4092..+4098 bytes. */
1573 if (! TARGET_RELAX)
1575 int label = lf++;
1576 /* The call to print_slot will clobber the operands. */
1577 rtx op0 = operands[0];
1579 /* If the instruction in the delay slot is annulled (true), then
1580 there is no delay slot where we can put it now. The only safe
1581 place for it is after the label. final will do that by default. */
1583 if (final_sequence
1584 && ! INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))
1585 && get_attr_length (XVECEXP (final_sequence, 0, 1)))
1587 asm_fprintf (asm_out_file, "\tb%s%ss\t%LLF%d\n", logic ? "f" : "t",
1588 ASSEMBLER_DIALECT ? "/" : ".", label);
1589 print_slot (final_sequence);
1591 else
1592 asm_fprintf (asm_out_file, "\tb%s\t%LLF%d\n", logic ? "f" : "t", label);
1594 output_asm_insn ("bra\t%l0", &op0);
1595 fprintf (asm_out_file, "\tnop\n");
1596 (*targetm.asm_out.internal_label) (asm_out_file, "LF", label);
1598 return "";
1600 /* When relaxing, handle this like a short branch. The linker
1601 will fix it up if it still doesn't fit after relaxation. */
1602 case 2:
1603 return logic ? "bt%.\t%l0" : "bf%.\t%l0";
1605 /* These are for SH2e, in which we have to account for the
1606 extra nop because of the hardware bug in annulled branches. */
1607 case 8:
1608 if (! TARGET_RELAX)
1610 int label = lf++;
1612 gcc_assert (!final_sequence
1613 || !(INSN_ANNULLED_BRANCH_P
1614 (XVECEXP (final_sequence, 0, 0))));
1615 asm_fprintf (asm_out_file, "b%s%ss\t%LLF%d\n",
1616 logic ? "f" : "t",
1617 ASSEMBLER_DIALECT ? "/" : ".", label);
1618 fprintf (asm_out_file, "\tnop\n");
1619 output_asm_insn ("bra\t%l0", operands);
1620 fprintf (asm_out_file, "\tnop\n");
1621 (*targetm.asm_out.internal_label) (asm_out_file, "LF", label);
1623 return "";
1625 /* When relaxing, fall through. */
1626 case 4:
1628 char buffer[10];
1630 sprintf (buffer, "b%s%ss\t%%l0",
1631 logic ? "t" : "f",
1632 ASSEMBLER_DIALECT ? "/" : ".");
1633 output_asm_insn (buffer, &operands[0]);
1634 return "nop";
1637 default:
1638 /* There should be no longer branches now - that would
1639 indicate that something has destroyed the branches set
1640 up in machine_dependent_reorg. */
1641 gcc_unreachable ();
1645 const char *
1646 output_branchy_insn (enum rtx_code code, const char *template,
1647 rtx insn, rtx *operands)
1649 rtx next_insn = NEXT_INSN (insn);
1651 if (next_insn && GET_CODE (next_insn) == JUMP_INSN && condjump_p (next_insn))
1653 rtx src = SET_SRC (PATTERN (next_insn));
1654 if (GET_CODE (src) == IF_THEN_ELSE && GET_CODE (XEXP (src, 0)) != code)
1656 /* Following branch not taken */
1657 operands[9] = gen_label_rtx ();
1658 emit_label_after (operands[9], next_insn);
1659 INSN_ADDRESSES_NEW (operands[9],
1660 INSN_ADDRESSES (INSN_UID (next_insn))
1661 + get_attr_length (next_insn));
1662 return template;
1664 else
1666 int offset = (branch_dest (next_insn)
1667 - INSN_ADDRESSES (INSN_UID (next_insn)) + 4);
1668 if (offset >= -252 && offset <= 258)
1670 if (GET_CODE (src) == IF_THEN_ELSE)
1671 /* branch_true */
1672 src = XEXP (src, 1);
1673 operands[9] = src;
1674 return template;
1678 operands[9] = gen_label_rtx ();
1679 emit_label_after (operands[9], insn);
1680 INSN_ADDRESSES_NEW (operands[9],
1681 INSN_ADDRESSES (INSN_UID (insn))
1682 + get_attr_length (insn));
1683 return template;
1686 const char *
1687 output_ieee_ccmpeq (rtx insn, rtx *operands)
1689 return output_branchy_insn (NE, "bt\t%l9\n\tfcmp/eq\t%1,%0",
1690 insn, operands);
1693 /* Output the start of the assembler file. */
1695 static void
1696 sh_file_start (void)
1698 default_file_start ();
1700 #ifdef SYMBIAN
1701 /* Declare the .directive section before it is used. */
1702 fputs ("\t.section .directive, \"SM\", @progbits, 1\n", asm_out_file);
1703 fputs ("\t.asciz \"#<SYMEDIT>#\\n\"\n", asm_out_file);
1704 #endif
1706 if (TARGET_ELF)
1707 /* We need to show the text section with the proper
1708 attributes as in TEXT_SECTION_ASM_OP, before dwarf2out
1709 emits it without attributes in TEXT_SECTION_ASM_OP, else GAS
1710 will complain. We can teach GAS specifically about the
1711 default attributes for our choice of text section, but
1712 then we would have to change GAS again if/when we change
1713 the text section name. */
1714 fprintf (asm_out_file, "%s\n", TEXT_SECTION_ASM_OP);
1715 else
1716 /* Switch to the data section so that the coffsem symbol
1717 isn't in the text section. */
1718 data_section ();
1720 if (TARGET_LITTLE_ENDIAN)
1721 fputs ("\t.little\n", asm_out_file);
1723 if (!TARGET_ELF)
1725 if (TARGET_SHCOMPACT)
1726 fputs ("\t.mode\tSHcompact\n", asm_out_file);
1727 else if (TARGET_SHMEDIA)
1728 fprintf (asm_out_file, "\t.mode\tSHmedia\n\t.abi\t%i\n",
1729 TARGET_SHMEDIA64 ? 64 : 32);
1733 /* Check if PAT includes UNSPEC_CALLER unspec pattern. */
1735 static bool
1736 unspec_caller_rtx_p (rtx pat)
1738 switch (GET_CODE (pat))
1740 case CONST:
1741 return unspec_caller_rtx_p (XEXP (pat, 0));
1742 case PLUS:
1743 case MINUS:
1744 if (unspec_caller_rtx_p (XEXP (pat, 0)))
1745 return true;
1746 return unspec_caller_rtx_p (XEXP (pat, 1));
1747 case UNSPEC:
1748 if (XINT (pat, 1) == UNSPEC_CALLER)
1749 return true;
1750 default:
1751 break;
1754 return false;
1757 /* Indicate that INSN cannot be duplicated. This is true for insn
1758 that generates a unique label. */
1760 static bool
1761 sh_cannot_copy_insn_p (rtx insn)
1763 rtx pat;
1765 if (!reload_completed || !flag_pic)
1766 return false;
1768 if (GET_CODE (insn) != INSN)
1769 return false;
1770 if (asm_noperands (insn) >= 0)
1771 return false;
1773 pat = PATTERN (insn);
1774 if (GET_CODE (pat) != SET)
1775 return false;
1776 pat = SET_SRC (pat);
1778 if (unspec_caller_rtx_p (pat))
1779 return true;
1781 return false;
1784 /* Actual number of instructions used to make a shift by N. */
1785 static const char ashiftrt_insns[] =
1786 { 0,1,2,3,4,5,8,8,8,8,8,8,8,8,8,8,2,3,4,5,8,8,8,8,8,8,8,8,8,8,8,2};
1788 /* Left shift and logical right shift are the same. */
1789 static const char shift_insns[] =
1790 { 0,1,1,2,2,3,3,4,1,2,2,3,3,4,3,3,1,2,2,3,3,4,3,3,2,3,3,4,4,4,3,3};
1792 /* Individual shift amounts needed to get the above length sequences.
1793 One bit right shifts clobber the T bit, so when possible, put one bit
1794 shifts in the middle of the sequence, so the ends are eligible for
1795 branch delay slots. */
1796 static const short shift_amounts[32][5] = {
1797 {0}, {1}, {2}, {2, 1},
1798 {2, 2}, {2, 1, 2}, {2, 2, 2}, {2, 2, 1, 2},
1799 {8}, {8, 1}, {8, 2}, {8, 1, 2},
1800 {8, 2, 2}, {8, 2, 1, 2}, {8, -2, 8}, {8, -1, 8},
1801 {16}, {16, 1}, {16, 2}, {16, 1, 2},
1802 {16, 2, 2}, {16, 2, 1, 2}, {16, -2, 8}, {16, -1, 8},
1803 {16, 8}, {16, 1, 8}, {16, 8, 2}, {16, 8, 1, 2},
1804 {16, 8, 2, 2}, {16, -1, -2, 16}, {16, -2, 16}, {16, -1, 16}};
1806 /* Likewise, but for shift amounts < 16, up to three highmost bits
1807 might be clobbered. This is typically used when combined with some
1808 kind of sign or zero extension. */
1810 static const char ext_shift_insns[] =
1811 { 0,1,1,2,2,3,2,2,1,2,2,3,3,3,2,2,1,2,2,3,3,4,3,3,2,3,3,4,4,4,3,3};
1813 static const short ext_shift_amounts[32][4] = {
1814 {0}, {1}, {2}, {2, 1},
1815 {2, 2}, {2, 1, 2}, {8, -2}, {8, -1},
1816 {8}, {8, 1}, {8, 2}, {8, 1, 2},
1817 {8, 2, 2}, {16, -2, -1}, {16, -2}, {16, -1},
1818 {16}, {16, 1}, {16, 2}, {16, 1, 2},
1819 {16, 2, 2}, {16, 2, 1, 2}, {16, -2, 8}, {16, -1, 8},
1820 {16, 8}, {16, 1, 8}, {16, 8, 2}, {16, 8, 1, 2},
1821 {16, 8, 2, 2}, {16, -1, -2, 16}, {16, -2, 16}, {16, -1, 16}};
1823 /* Assuming we have a value that has been sign-extended by at least one bit,
1824 can we use the ext_shift_amounts with the last shift turned to an arithmetic shift
1825 to shift it by N without data loss, and quicker than by other means? */
1826 #define EXT_SHIFT_SIGNED(n) (((n) | 8) == 15)
1828 /* This is used in length attributes in sh.md to help compute the length
1829 of arbitrary constant shift instructions. */
1832 shift_insns_rtx (rtx insn)
1834 rtx set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
1835 int shift_count = INTVAL (XEXP (set_src, 1));
1836 enum rtx_code shift_code = GET_CODE (set_src);
1838 switch (shift_code)
1840 case ASHIFTRT:
1841 return ashiftrt_insns[shift_count];
1842 case LSHIFTRT:
1843 case ASHIFT:
1844 return shift_insns[shift_count];
1845 default:
1846 gcc_unreachable ();
1850 /* Return the cost of a shift. */
1852 static inline int
1853 shiftcosts (rtx x)
1855 int value;
1857 if (TARGET_SHMEDIA)
1858 return 1;
1860 if (GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD)
1862 if (GET_MODE (x) == DImode
1863 && GET_CODE (XEXP (x, 1)) == CONST_INT
1864 && INTVAL (XEXP (x, 1)) == 1)
1865 return 2;
1867 /* Everything else is invalid, because there is no pattern for it. */
1868 return 10000;
1870 /* If shift by a non constant, then this will be expensive. */
1871 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
1872 return SH_DYNAMIC_SHIFT_COST;
1874 value = INTVAL (XEXP (x, 1));
1876 /* Otherwise, return the true cost in instructions. */
1877 if (GET_CODE (x) == ASHIFTRT)
1879 int cost = ashiftrt_insns[value];
1880 /* If SH3, then we put the constant in a reg and use shad. */
1881 if (cost > 1 + SH_DYNAMIC_SHIFT_COST)
1882 cost = 1 + SH_DYNAMIC_SHIFT_COST;
1883 return cost;
1885 else
1886 return shift_insns[value];
1889 /* Return the cost of an AND operation. */
1891 static inline int
1892 andcosts (rtx x)
1894 int i;
1896 /* Anding with a register is a single cycle and instruction. */
1897 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
1898 return 1;
1900 i = INTVAL (XEXP (x, 1));
1902 if (TARGET_SHMEDIA)
1904 if ((GET_CODE (XEXP (x, 1)) == CONST_INT
1905 && CONST_OK_FOR_I16 (INTVAL (XEXP (x, 1))))
1906 || EXTRA_CONSTRAINT_C16 (XEXP (x, 1)))
1907 return 1;
1908 else
1909 return 2;
1912 /* These constants are single cycle extu.[bw] instructions. */
1913 if (i == 0xff || i == 0xffff)
1914 return 1;
1915 /* Constants that can be used in an and immediate instruction in a single
1916 cycle, but this requires r0, so make it a little more expensive. */
1917 if (CONST_OK_FOR_K08 (i))
1918 return 2;
1919 /* Constants that can be loaded with a mov immediate and an and.
1920 This case is probably unnecessary. */
1921 if (CONST_OK_FOR_I08 (i))
1922 return 2;
1923 /* Any other constants requires a 2 cycle pc-relative load plus an and.
1924 This case is probably unnecessary. */
1925 return 3;
1928 /* Return the cost of an addition or a subtraction. */
1930 static inline int
1931 addsubcosts (rtx x)
1933 /* Adding a register is a single cycle insn. */
1934 if (GET_CODE (XEXP (x, 1)) == REG
1935 || GET_CODE (XEXP (x, 1)) == SUBREG)
1936 return 1;
1938 /* Likewise for small constants. */
1939 if (GET_CODE (XEXP (x, 1)) == CONST_INT
1940 && CONST_OK_FOR_ADD (INTVAL (XEXP (x, 1))))
1941 return 1;
1943 if (TARGET_SHMEDIA)
1944 switch (GET_CODE (XEXP (x, 1)))
1946 case CONST:
1947 case LABEL_REF:
1948 case SYMBOL_REF:
1949 return TARGET_SHMEDIA64 ? 5 : 3;
1951 case CONST_INT:
1952 if (CONST_OK_FOR_I16 (INTVAL (XEXP (x, 1))))
1953 return 2;
1954 else if (CONST_OK_FOR_I16 (INTVAL (XEXP (x, 1)) >> 16))
1955 return 3;
1956 else if (CONST_OK_FOR_I16 ((INTVAL (XEXP (x, 1)) >> 16) >> 16))
1957 return 4;
1959 /* Fall through. */
1960 default:
1961 return 5;
1964 /* Any other constant requires a 2 cycle pc-relative load plus an
1965 addition. */
1966 return 3;
1969 /* Return the cost of a multiply. */
1970 static inline int
1971 multcosts (rtx x ATTRIBUTE_UNUSED)
1973 if (sh_multcost >= 0)
1974 return sh_multcost;
1975 if (TARGET_SHMEDIA)
1976 /* ??? We have a mul insn, but it has a latency of three, and doesn't
1977 accept constants. Ideally, we would use a cost of one or two and
1978 add the cost of the operand, but disregard the latter when inside loops
1979 and loop invariant code motion is still to follow.
1980 Using a multiply first and splitting it later if it's a loss
1981 doesn't work because of different sign / zero extension semantics
1982 of multiplies vs. shifts. */
1983 return TARGET_SMALLCODE ? 2 : 3;
1985 if (TARGET_SH2)
1987 /* We have a mul insn, so we can never take more than the mul and the
1988 read of the mac reg, but count more because of the latency and extra
1989 reg usage. */
1990 if (TARGET_SMALLCODE)
1991 return 2;
1992 return 3;
1995 /* If we're aiming at small code, then just count the number of
1996 insns in a multiply call sequence. */
1997 if (TARGET_SMALLCODE)
1998 return 5;
2000 /* Otherwise count all the insns in the routine we'd be calling too. */
2001 return 20;
2004 /* Compute a (partial) cost for rtx X. Return true if the complete
2005 cost has been computed, and false if subexpressions should be
2006 scanned. In either case, *TOTAL contains the cost result. */
2008 static bool
2009 sh_rtx_costs (rtx x, int code, int outer_code, int *total)
2011 switch (code)
2013 case CONST_INT:
2014 if (TARGET_SHMEDIA)
2016 if (INTVAL (x) == 0)
2017 *total = 0;
2018 else if (outer_code == AND && and_operand ((x), DImode))
2019 *total = 0;
2020 else if ((outer_code == IOR || outer_code == XOR
2021 || outer_code == PLUS)
2022 && CONST_OK_FOR_I10 (INTVAL (x)))
2023 *total = 0;
2024 else if (CONST_OK_FOR_I16 (INTVAL (x)))
2025 *total = COSTS_N_INSNS (outer_code != SET);
2026 else if (CONST_OK_FOR_I16 (INTVAL (x) >> 16))
2027 *total = COSTS_N_INSNS ((outer_code != SET) + 1);
2028 else if (CONST_OK_FOR_I16 ((INTVAL (x) >> 16) >> 16))
2029 *total = COSTS_N_INSNS (3);
2030 else
2031 *total = COSTS_N_INSNS (4);
2032 return true;
2034 if (CONST_OK_FOR_I08 (INTVAL (x)))
2035 *total = 0;
2036 else if ((outer_code == AND || outer_code == IOR || outer_code == XOR)
2037 && CONST_OK_FOR_K08 (INTVAL (x)))
2038 *total = 1;
2039 else
2040 *total = 8;
2041 return true;
2043 case CONST:
2044 case LABEL_REF:
2045 case SYMBOL_REF:
2046 if (TARGET_SHMEDIA64)
2047 *total = COSTS_N_INSNS (4);
2048 else if (TARGET_SHMEDIA32)
2049 *total = COSTS_N_INSNS (2);
2050 else
2051 *total = 5;
2052 return true;
2054 case CONST_DOUBLE:
2055 if (TARGET_SHMEDIA)
2056 *total = COSTS_N_INSNS (4);
2057 else
2058 *total = 10;
2059 return true;
2060 case CONST_VECTOR:
2061 if (x == CONST0_RTX (GET_MODE (x)))
2062 *total = 0;
2063 else if (sh_1el_vec (x, VOIDmode))
2064 *total = outer_code != SET;
2065 if (sh_rep_vec (x, VOIDmode))
2066 *total = ((GET_MODE_UNIT_SIZE (GET_MODE (x)) + 3) / 4
2067 + (outer_code != SET));
2068 *total = COSTS_N_INSNS (3) + (outer_code != SET);
2069 return true;
2071 case PLUS:
2072 case MINUS:
2073 *total = COSTS_N_INSNS (addsubcosts (x));
2074 return true;
2076 case AND:
2077 *total = COSTS_N_INSNS (andcosts (x));
2078 return true;
2080 case MULT:
2081 *total = COSTS_N_INSNS (multcosts (x));
2082 return true;
2084 case ASHIFT:
2085 case ASHIFTRT:
2086 case LSHIFTRT:
2087 *total = COSTS_N_INSNS (shiftcosts (x));
2088 return true;
2090 case DIV:
2091 case UDIV:
2092 case MOD:
2093 case UMOD:
2094 *total = COSTS_N_INSNS (20);
2095 return true;
2097 case PARALLEL:
2098 if (sh_1el_vec (x, VOIDmode))
2099 *total = outer_code != SET;
2100 if (sh_rep_vec (x, VOIDmode))
2101 *total = ((GET_MODE_UNIT_SIZE (GET_MODE (x)) + 3) / 4
2102 + (outer_code != SET));
2103 *total = COSTS_N_INSNS (3) + (outer_code != SET);
2104 return true;
2106 case FLOAT:
2107 case FIX:
2108 *total = 100;
2109 return true;
2111 default:
2112 return false;
2116 /* Compute the cost of an address. For the SH, all valid addresses are
2117 the same cost. Use a slightly higher cost for reg + reg addressing,
2118 since it increases pressure on r0. */
2120 static int
2121 sh_address_cost (rtx X)
2123 return (GET_CODE (X) == PLUS
2124 && ! CONSTANT_P (XEXP (X, 1))
2125 && ! TARGET_SHMEDIA ? 1 : 0);
2128 /* Code to expand a shift. */
2130 void
2131 gen_ashift (int type, int n, rtx reg)
2133 /* Negative values here come from the shift_amounts array. */
2134 if (n < 0)
2136 if (type == ASHIFT)
2137 type = LSHIFTRT;
2138 else
2139 type = ASHIFT;
2140 n = -n;
2143 switch (type)
2145 case ASHIFTRT:
2146 emit_insn (gen_ashrsi3_k (reg, reg, GEN_INT (n)));
2147 break;
2148 case LSHIFTRT:
2149 if (n == 1)
2150 emit_insn (gen_lshrsi3_m (reg, reg, GEN_INT (n)));
2151 else
2152 emit_insn (gen_lshrsi3_k (reg, reg, GEN_INT (n)));
2153 break;
2154 case ASHIFT:
2155 emit_insn (gen_ashlsi3_std (reg, reg, GEN_INT (n)));
2156 break;
2160 /* Same for HImode */
2162 void
2163 gen_ashift_hi (int type, int n, rtx reg)
2165 /* Negative values here come from the shift_amounts array. */
2166 if (n < 0)
2168 if (type == ASHIFT)
2169 type = LSHIFTRT;
2170 else
2171 type = ASHIFT;
2172 n = -n;
2175 switch (type)
2177 case ASHIFTRT:
2178 case LSHIFTRT:
2179 /* We don't have HImode right shift operations because using the
2180 ordinary 32 bit shift instructions for that doesn't generate proper
2181 zero/sign extension.
2182 gen_ashift_hi is only called in contexts where we know that the
2183 sign extension works out correctly. */
2185 int offset = 0;
2186 if (GET_CODE (reg) == SUBREG)
2188 offset = SUBREG_BYTE (reg);
2189 reg = SUBREG_REG (reg);
2191 gen_ashift (type, n, gen_rtx_SUBREG (SImode, reg, offset));
2192 break;
2194 case ASHIFT:
2195 emit_insn (gen_ashlhi3_k (reg, reg, GEN_INT (n)));
2196 break;
2200 /* Output RTL to split a constant shift into its component SH constant
2201 shift instructions. */
2203 void
2204 gen_shifty_op (int code, rtx *operands)
2206 int value = INTVAL (operands[2]);
2207 int max, i;
2209 /* Truncate the shift count in case it is out of bounds. */
2210 value = value & 0x1f;
2212 if (value == 31)
2214 if (code == LSHIFTRT)
2216 emit_insn (gen_rotlsi3_1 (operands[0], operands[0]));
2217 emit_insn (gen_movt (operands[0]));
2218 return;
2220 else if (code == ASHIFT)
2222 /* There is a two instruction sequence for 31 bit left shifts,
2223 but it requires r0. */
2224 if (GET_CODE (operands[0]) == REG && REGNO (operands[0]) == 0)
2226 emit_insn (gen_andsi3 (operands[0], operands[0], const1_rtx));
2227 emit_insn (gen_rotlsi3_31 (operands[0], operands[0]));
2228 return;
2232 else if (value == 0)
2234 /* This can happen even when optimizing, if there were subregs before
2235 reload. Don't output a nop here, as this is never optimized away;
2236 use a no-op move instead. */
2237 emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[0]));
2238 return;
2241 max = shift_insns[value];
2242 for (i = 0; i < max; i++)
2243 gen_ashift (code, shift_amounts[value][i], operands[0]);
2246 /* Same as above, but optimized for values where the topmost bits don't
2247 matter. */
2249 void
2250 gen_shifty_hi_op (int code, rtx *operands)
2252 int value = INTVAL (operands[2]);
2253 int max, i;
2254 void (*gen_fun) (int, int, rtx);
2256 /* This operation is used by and_shl for SImode values with a few
2257 high bits known to be cleared. */
2258 value &= 31;
2259 if (value == 0)
2261 emit_insn (gen_nop ());
2262 return;
2265 gen_fun = GET_MODE (operands[0]) == HImode ? gen_ashift_hi : gen_ashift;
2266 if (code == ASHIFT)
2268 max = ext_shift_insns[value];
2269 for (i = 0; i < max; i++)
2270 gen_fun (code, ext_shift_amounts[value][i], operands[0]);
2272 else
2273 /* When shifting right, emit the shifts in reverse order, so that
2274 solitary negative values come first. */
2275 for (i = ext_shift_insns[value] - 1; i >= 0; i--)
2276 gen_fun (code, ext_shift_amounts[value][i], operands[0]);
2279 /* Output RTL for an arithmetic right shift. */
2281 /* ??? Rewrite to use super-optimizer sequences. */
2284 expand_ashiftrt (rtx *operands)
2286 rtx wrk;
2287 char func[18];
2288 int value;
2290 if (TARGET_SH3)
2292 if (GET_CODE (operands[2]) != CONST_INT)
2294 rtx count = copy_to_mode_reg (SImode, operands[2]);
2295 emit_insn (gen_negsi2 (count, count));
2296 emit_insn (gen_ashrsi3_d (operands[0], operands[1], count));
2297 return 1;
2299 else if (ashiftrt_insns[INTVAL (operands[2]) & 31]
2300 > 1 + SH_DYNAMIC_SHIFT_COST)
2302 rtx count
2303 = force_reg (SImode, GEN_INT (- (INTVAL (operands[2]) & 31)));
2304 emit_insn (gen_ashrsi3_d (operands[0], operands[1], count));
2305 return 1;
2308 if (GET_CODE (operands[2]) != CONST_INT)
2309 return 0;
2311 value = INTVAL (operands[2]) & 31;
2313 if (value == 31)
2315 /* If we are called from abs expansion, arrange things so that we
2316 we can use a single MT instruction that doesn't clobber the source,
2317 if LICM can hoist out the load of the constant zero. */
2318 if (currently_expanding_to_rtl)
2320 emit_insn (gen_cmpgtsi_t (force_reg (SImode, CONST0_RTX (SImode)),
2321 operands[1]));
2322 emit_insn (gen_mov_neg_si_t (operands[0]));
2323 return 1;
2325 emit_insn (gen_ashrsi2_31 (operands[0], operands[1]));
2326 return 1;
2328 else if (value >= 16 && value <= 19)
2330 wrk = gen_reg_rtx (SImode);
2331 emit_insn (gen_ashrsi2_16 (wrk, operands[1]));
2332 value -= 16;
2333 while (value--)
2334 gen_ashift (ASHIFTRT, 1, wrk);
2335 emit_move_insn (operands[0], wrk);
2336 return 1;
2338 /* Expand a short sequence inline, longer call a magic routine. */
2339 else if (value <= 5)
2341 wrk = gen_reg_rtx (SImode);
2342 emit_move_insn (wrk, operands[1]);
2343 while (value--)
2344 gen_ashift (ASHIFTRT, 1, wrk);
2345 emit_move_insn (operands[0], wrk);
2346 return 1;
2349 wrk = gen_reg_rtx (Pmode);
2351 /* Load the value into an arg reg and call a helper. */
2352 emit_move_insn (gen_rtx_REG (SImode, 4), operands[1]);
2353 sprintf (func, "__ashiftrt_r4_%d", value);
2354 function_symbol (wrk, func, SFUNC_STATIC);
2355 emit_insn (gen_ashrsi3_n (GEN_INT (value), wrk));
2356 emit_move_insn (operands[0], gen_rtx_REG (SImode, 4));
2357 return 1;
2361 sh_dynamicalize_shift_p (rtx count)
2363 return shift_insns[INTVAL (count)] > 1 + SH_DYNAMIC_SHIFT_COST;
2366 /* Try to find a good way to implement the combiner pattern
2367 [(set (match_operand:SI 0 "register_operand" "r")
2368 (and:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
2369 (match_operand:SI 2 "const_int_operand" "n"))
2370 (match_operand:SI 3 "const_int_operand" "n"))) .
2371 LEFT_RTX is operand 2 in the above pattern, and MASK_RTX is operand 3.
2372 return 0 for simple right / left or left/right shift combination.
2373 return 1 for a combination of shifts with zero_extend.
2374 return 2 for a combination of shifts with an AND that needs r0.
2375 return 3 for a combination of shifts with an AND that needs an extra
2376 scratch register, when the three highmost bits of the AND mask are clear.
2377 return 4 for a combination of shifts with an AND that needs an extra
2378 scratch register, when any of the three highmost bits of the AND mask
2379 is set.
2380 If ATTRP is set, store an initial right shift width in ATTRP[0],
2381 and the instruction length in ATTRP[1] . These values are not valid
2382 when returning 0.
2383 When ATTRP is set and returning 1, ATTRP[2] gets set to the index into
2384 shift_amounts for the last shift value that is to be used before the
2385 sign extend. */
2387 shl_and_kind (rtx left_rtx, rtx mask_rtx, int *attrp)
2389 unsigned HOST_WIDE_INT mask, lsb, mask2, lsb2;
2390 int left = INTVAL (left_rtx), right;
2391 int best = 0;
2392 int cost, best_cost = 10000;
2393 int best_right = 0, best_len = 0;
2394 int i;
2395 int can_ext;
2397 if (left < 0 || left > 31)
2398 return 0;
2399 if (GET_CODE (mask_rtx) == CONST_INT)
2400 mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> left;
2401 else
2402 mask = (unsigned HOST_WIDE_INT) GET_MODE_MASK (SImode) >> left;
2403 /* Can this be expressed as a right shift / left shift pair? */
2404 lsb = ((mask ^ (mask - 1)) >> 1) + 1;
2405 right = exact_log2 (lsb);
2406 mask2 = ~(mask + lsb - 1);
2407 lsb2 = ((mask2 ^ (mask2 - 1)) >> 1) + 1;
2408 /* mask has no zeroes but trailing zeroes <==> ! mask2 */
2409 if (! mask2)
2410 best_cost = shift_insns[right] + shift_insns[right + left];
2411 /* mask has no trailing zeroes <==> ! right */
2412 else if (! right && mask2 == ~(lsb2 - 1))
2414 int late_right = exact_log2 (lsb2);
2415 best_cost = shift_insns[left + late_right] + shift_insns[late_right];
2417 /* Try to use zero extend. */
2418 if (mask2 == ~(lsb2 - 1))
2420 int width, first;
2422 for (width = 8; width <= 16; width += 8)
2424 /* Can we zero-extend right away? */
2425 if (lsb2 == (unsigned HOST_WIDE_INT) 1 << width)
2427 cost
2428 = 1 + ext_shift_insns[right] + ext_shift_insns[left + right];
2429 if (cost < best_cost)
2431 best = 1;
2432 best_cost = cost;
2433 best_right = right;
2434 best_len = cost;
2435 if (attrp)
2436 attrp[2] = -1;
2438 continue;
2440 /* ??? Could try to put zero extend into initial right shift,
2441 or even shift a bit left before the right shift. */
2442 /* Determine value of first part of left shift, to get to the
2443 zero extend cut-off point. */
2444 first = width - exact_log2 (lsb2) + right;
2445 if (first >= 0 && right + left - first >= 0)
2447 cost = ext_shift_insns[right] + ext_shift_insns[first] + 1
2448 + ext_shift_insns[right + left - first];
2449 if (cost < best_cost)
2451 best = 1;
2452 best_cost = cost;
2453 best_right = right;
2454 best_len = cost;
2455 if (attrp)
2456 attrp[2] = first;
2461 /* Try to use r0 AND pattern */
2462 for (i = 0; i <= 2; i++)
2464 if (i > right)
2465 break;
2466 if (! CONST_OK_FOR_K08 (mask >> i))
2467 continue;
2468 cost = (i != 0) + 2 + ext_shift_insns[left + i];
2469 if (cost < best_cost)
2471 best = 2;
2472 best_cost = cost;
2473 best_right = i;
2474 best_len = cost - 1;
2477 /* Try to use a scratch register to hold the AND operand. */
2478 can_ext = ((mask << left) & ((unsigned HOST_WIDE_INT) 3 << 30)) == 0;
2479 for (i = 0; i <= 2; i++)
2481 if (i > right)
2482 break;
2483 cost = (i != 0) + (CONST_OK_FOR_I08 (mask >> i) ? 2 : 3)
2484 + (can_ext ? ext_shift_insns : shift_insns)[left + i];
2485 if (cost < best_cost)
2487 best = 4 - can_ext;
2488 best_cost = cost;
2489 best_right = i;
2490 best_len = cost - 1 - ! CONST_OK_FOR_I08 (mask >> i);
2494 if (attrp)
2496 attrp[0] = best_right;
2497 attrp[1] = best_len;
2499 return best;
2502 /* This is used in length attributes of the unnamed instructions
2503 corresponding to shl_and_kind return values of 1 and 2. */
2505 shl_and_length (rtx insn)
2507 rtx set_src, left_rtx, mask_rtx;
2508 int attributes[3];
2510 set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
2511 left_rtx = XEXP (XEXP (set_src, 0), 1);
2512 mask_rtx = XEXP (set_src, 1);
2513 shl_and_kind (left_rtx, mask_rtx, attributes);
2514 return attributes[1];
2517 /* This is used in length attribute of the and_shl_scratch instruction. */
2520 shl_and_scr_length (rtx insn)
2522 rtx set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
2523 int len = shift_insns[INTVAL (XEXP (set_src, 1))];
2524 rtx op = XEXP (set_src, 0);
2525 len += shift_insns[INTVAL (XEXP (op, 1))] + 1;
2526 op = XEXP (XEXP (op, 0), 0);
2527 return len + shift_insns[INTVAL (XEXP (op, 1))];
2530 /* Generate rtl for instructions for which shl_and_kind advised a particular
2531 method of generating them, i.e. returned zero. */
2534 gen_shl_and (rtx dest, rtx left_rtx, rtx mask_rtx, rtx source)
2536 int attributes[3];
2537 unsigned HOST_WIDE_INT mask;
2538 int kind = shl_and_kind (left_rtx, mask_rtx, attributes);
2539 int right, total_shift;
2540 void (*shift_gen_fun) (int, rtx *) = gen_shifty_hi_op;
2542 right = attributes[0];
2543 total_shift = INTVAL (left_rtx) + right;
2544 mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> total_shift;
2545 switch (kind)
2547 default:
2548 return -1;
2549 case 1:
2551 int first = attributes[2];
2552 rtx operands[3];
2554 if (first < 0)
2556 emit_insn ((mask << right) <= 0xff
2557 ? gen_zero_extendqisi2 (dest,
2558 gen_lowpart (QImode, source))
2559 : gen_zero_extendhisi2 (dest,
2560 gen_lowpart (HImode, source)));
2561 source = dest;
2563 if (source != dest)
2564 emit_insn (gen_movsi (dest, source));
2565 operands[0] = dest;
2566 if (right)
2568 operands[2] = GEN_INT (right);
2569 gen_shifty_hi_op (LSHIFTRT, operands);
2571 if (first > 0)
2573 operands[2] = GEN_INT (first);
2574 gen_shifty_hi_op (ASHIFT, operands);
2575 total_shift -= first;
2576 mask <<= first;
2578 if (first >= 0)
2579 emit_insn (mask <= 0xff
2580 ? gen_zero_extendqisi2 (dest, gen_lowpart (QImode, dest))
2581 : gen_zero_extendhisi2 (dest, gen_lowpart (HImode, dest)));
2582 if (total_shift > 0)
2584 operands[2] = GEN_INT (total_shift);
2585 gen_shifty_hi_op (ASHIFT, operands);
2587 break;
2589 case 4:
2590 shift_gen_fun = gen_shifty_op;
2591 case 3:
2592 /* If the topmost bit that matters is set, set the topmost bits
2593 that don't matter. This way, we might be able to get a shorter
2594 signed constant. */
2595 if (mask & ((HOST_WIDE_INT) 1 << (31 - total_shift)))
2596 mask |= (HOST_WIDE_INT) ~0 << (31 - total_shift);
2597 case 2:
2598 /* Don't expand fine-grained when combining, because that will
2599 make the pattern fail. */
2600 if (currently_expanding_to_rtl
2601 || reload_in_progress || reload_completed)
2603 rtx operands[3];
2605 /* Cases 3 and 4 should be handled by this split
2606 only while combining */
2607 gcc_assert (kind <= 2);
2608 if (right)
2610 emit_insn (gen_lshrsi3 (dest, source, GEN_INT (right)));
2611 source = dest;
2613 emit_insn (gen_andsi3 (dest, source, GEN_INT (mask)));
2614 if (total_shift)
2616 operands[0] = dest;
2617 operands[1] = dest;
2618 operands[2] = GEN_INT (total_shift);
2619 shift_gen_fun (ASHIFT, operands);
2621 break;
2623 else
2625 int neg = 0;
2626 if (kind != 4 && total_shift < 16)
2628 neg = -ext_shift_amounts[total_shift][1];
2629 if (neg > 0)
2630 neg -= ext_shift_amounts[total_shift][2];
2631 else
2632 neg = 0;
2634 emit_insn (gen_and_shl_scratch (dest, source,
2635 GEN_INT (right),
2636 GEN_INT (mask),
2637 GEN_INT (total_shift + neg),
2638 GEN_INT (neg)));
2639 emit_insn (gen_movsi (dest, dest));
2640 break;
2643 return 0;
2646 /* Try to find a good way to implement the combiner pattern
2647 [(set (match_operand:SI 0 "register_operand" "=r")
2648 (sign_extract:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
2649 (match_operand:SI 2 "const_int_operand" "n")
2650 (match_operand:SI 3 "const_int_operand" "n")
2651 (const_int 0)))
2652 (clobber (reg:SI T_REG))]
2653 LEFT_RTX is operand 2 in the above pattern, and SIZE_RTX is operand 3.
2654 return 0 for simple left / right shift combination.
2655 return 1 for left shift / 8 bit sign extend / left shift.
2656 return 2 for left shift / 16 bit sign extend / left shift.
2657 return 3 for left shift / 8 bit sign extend / shift / sign extend.
2658 return 4 for left shift / 16 bit sign extend / shift / sign extend.
2659 return 5 for left shift / 16 bit sign extend / right shift
2660 return 6 for < 8 bit sign extend / left shift.
2661 return 7 for < 8 bit sign extend / left shift / single right shift.
2662 If COSTP is nonzero, assign the calculated cost to *COSTP. */
2665 shl_sext_kind (rtx left_rtx, rtx size_rtx, int *costp)
2667 int left, size, insize, ext;
2668 int cost = 0, best_cost;
2669 int kind;
2671 left = INTVAL (left_rtx);
2672 size = INTVAL (size_rtx);
2673 insize = size - left;
2674 gcc_assert (insize > 0);
2675 /* Default to left / right shift. */
2676 kind = 0;
2677 best_cost = shift_insns[32 - insize] + ashiftrt_insns[32 - size];
2678 if (size <= 16)
2680 /* 16 bit shift / sign extend / 16 bit shift */
2681 cost = shift_insns[16 - insize] + 1 + ashiftrt_insns[16 - size];
2682 /* If ashiftrt_insns[16 - size] is 8, this choice will be overridden
2683 below, by alternative 3 or something even better. */
2684 if (cost < best_cost)
2686 kind = 5;
2687 best_cost = cost;
2690 /* Try a plain sign extend between two shifts. */
2691 for (ext = 16; ext >= insize; ext -= 8)
2693 if (ext <= size)
2695 cost = ext_shift_insns[ext - insize] + 1 + shift_insns[size - ext];
2696 if (cost < best_cost)
2698 kind = ext / (unsigned) 8;
2699 best_cost = cost;
2702 /* Check if we can do a sloppy shift with a final signed shift
2703 restoring the sign. */
2704 if (EXT_SHIFT_SIGNED (size - ext))
2705 cost = ext_shift_insns[ext - insize] + ext_shift_insns[size - ext] + 1;
2706 /* If not, maybe it's still cheaper to do the second shift sloppy,
2707 and do a final sign extend? */
2708 else if (size <= 16)
2709 cost = ext_shift_insns[ext - insize] + 1
2710 + ext_shift_insns[size > ext ? size - ext : ext - size] + 1;
2711 else
2712 continue;
2713 if (cost < best_cost)
2715 kind = ext / (unsigned) 8 + 2;
2716 best_cost = cost;
2719 /* Check if we can sign extend in r0 */
2720 if (insize < 8)
2722 cost = 3 + shift_insns[left];
2723 if (cost < best_cost)
2725 kind = 6;
2726 best_cost = cost;
2728 /* Try the same with a final signed shift. */
2729 if (left < 31)
2731 cost = 3 + ext_shift_insns[left + 1] + 1;
2732 if (cost < best_cost)
2734 kind = 7;
2735 best_cost = cost;
2739 if (TARGET_SH3)
2741 /* Try to use a dynamic shift. */
2742 cost = shift_insns[32 - insize] + 1 + SH_DYNAMIC_SHIFT_COST;
2743 if (cost < best_cost)
2745 kind = 0;
2746 best_cost = cost;
2749 if (costp)
2750 *costp = cost;
2751 return kind;
2754 /* Function to be used in the length attribute of the instructions
2755 implementing this pattern. */
2758 shl_sext_length (rtx insn)
2760 rtx set_src, left_rtx, size_rtx;
2761 int cost;
2763 set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
2764 left_rtx = XEXP (XEXP (set_src, 0), 1);
2765 size_rtx = XEXP (set_src, 1);
2766 shl_sext_kind (left_rtx, size_rtx, &cost);
2767 return cost;
2770 /* Generate rtl for this pattern */
2773 gen_shl_sext (rtx dest, rtx left_rtx, rtx size_rtx, rtx source)
2775 int kind;
2776 int left, size, insize, cost;
2777 rtx operands[3];
2779 kind = shl_sext_kind (left_rtx, size_rtx, &cost);
2780 left = INTVAL (left_rtx);
2781 size = INTVAL (size_rtx);
2782 insize = size - left;
2783 switch (kind)
2785 case 1:
2786 case 2:
2787 case 3:
2788 case 4:
2790 int ext = kind & 1 ? 8 : 16;
2791 int shift2 = size - ext;
2793 /* Don't expand fine-grained when combining, because that will
2794 make the pattern fail. */
2795 if (! currently_expanding_to_rtl
2796 && ! reload_in_progress && ! reload_completed)
2798 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
2799 emit_insn (gen_movsi (dest, source));
2800 break;
2802 if (dest != source)
2803 emit_insn (gen_movsi (dest, source));
2804 operands[0] = dest;
2805 if (ext - insize)
2807 operands[2] = GEN_INT (ext - insize);
2808 gen_shifty_hi_op (ASHIFT, operands);
2810 emit_insn (kind & 1
2811 ? gen_extendqisi2 (dest, gen_lowpart (QImode, dest))
2812 : gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
2813 if (kind <= 2)
2815 if (shift2)
2817 operands[2] = GEN_INT (shift2);
2818 gen_shifty_op (ASHIFT, operands);
2821 else
2823 if (shift2 > 0)
2825 if (EXT_SHIFT_SIGNED (shift2))
2827 operands[2] = GEN_INT (shift2 + 1);
2828 gen_shifty_op (ASHIFT, operands);
2829 operands[2] = const1_rtx;
2830 gen_shifty_op (ASHIFTRT, operands);
2831 break;
2833 operands[2] = GEN_INT (shift2);
2834 gen_shifty_hi_op (ASHIFT, operands);
2836 else if (shift2)
2838 operands[2] = GEN_INT (-shift2);
2839 gen_shifty_hi_op (LSHIFTRT, operands);
2841 emit_insn (size <= 8
2842 ? gen_extendqisi2 (dest, gen_lowpart (QImode, dest))
2843 : gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
2845 break;
2847 case 5:
2849 int i = 16 - size;
2850 if (! currently_expanding_to_rtl
2851 && ! reload_in_progress && ! reload_completed)
2852 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
2853 else
2855 operands[0] = dest;
2856 operands[2] = GEN_INT (16 - insize);
2857 gen_shifty_hi_op (ASHIFT, operands);
2858 emit_insn (gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
2860 /* Don't use gen_ashrsi3 because it generates new pseudos. */
2861 while (--i >= 0)
2862 gen_ashift (ASHIFTRT, 1, dest);
2863 break;
2865 case 6:
2866 case 7:
2867 /* Don't expand fine-grained when combining, because that will
2868 make the pattern fail. */
2869 if (! currently_expanding_to_rtl
2870 && ! reload_in_progress && ! reload_completed)
2872 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
2873 emit_insn (gen_movsi (dest, source));
2874 break;
2876 emit_insn (gen_andsi3 (dest, source, GEN_INT ((1 << insize) - 1)));
2877 emit_insn (gen_xorsi3 (dest, dest, GEN_INT (1 << (insize - 1))));
2878 emit_insn (gen_addsi3 (dest, dest, GEN_INT (-1 << (insize - 1))));
2879 operands[0] = dest;
2880 operands[2] = kind == 7 ? GEN_INT (left + 1) : left_rtx;
2881 gen_shifty_op (ASHIFT, operands);
2882 if (kind == 7)
2883 emit_insn (gen_ashrsi3_k (dest, dest, const1_rtx));
2884 break;
2885 default:
2886 return -1;
2888 return 0;
2891 /* Prefix a symbol_ref name with "datalabel". */
2894 gen_datalabel_ref (rtx sym)
2896 const char *str;
2898 if (GET_CODE (sym) == LABEL_REF)
2899 return gen_rtx_CONST (GET_MODE (sym),
2900 gen_rtx_UNSPEC (GET_MODE (sym),
2901 gen_rtvec (1, sym),
2902 UNSPEC_DATALABEL));
2904 gcc_assert (GET_CODE (sym) == SYMBOL_REF);
2906 str = XSTR (sym, 0);
2907 /* Share all SYMBOL_REF strings with the same value - that is important
2908 for cse. */
2909 str = IDENTIFIER_POINTER (get_identifier (str));
2910 XSTR (sym, 0) = str;
2912 return sym;
2916 /* The SH cannot load a large constant into a register, constants have to
2917 come from a pc relative load. The reference of a pc relative load
2918 instruction must be less than 1k in front of the instruction. This
2919 means that we often have to dump a constant inside a function, and
2920 generate code to branch around it.
2922 It is important to minimize this, since the branches will slow things
2923 down and make things bigger.
2925 Worst case code looks like:
2927 mov.l L1,rn
2928 bra L2
2930 align
2931 L1: .long value
2935 mov.l L3,rn
2936 bra L4
2938 align
2939 L3: .long value
2943 We fix this by performing a scan before scheduling, which notices which
2944 instructions need to have their operands fetched from the constant table
2945 and builds the table.
2947 The algorithm is:
2949 scan, find an instruction which needs a pcrel move. Look forward, find the
2950 last barrier which is within MAX_COUNT bytes of the requirement.
2951 If there isn't one, make one. Process all the instructions between
2952 the find and the barrier.
2954 In the above example, we can tell that L3 is within 1k of L1, so
2955 the first move can be shrunk from the 3 insn+constant sequence into
2956 just 1 insn, and the constant moved to L3 to make:
2958 mov.l L1,rn
2960 mov.l L3,rn
2961 bra L4
2963 align
2964 L3:.long value
2965 L4:.long value
2967 Then the second move becomes the target for the shortening process. */
2969 typedef struct
2971 rtx value; /* Value in table. */
2972 rtx label; /* Label of value. */
2973 rtx wend; /* End of window. */
2974 enum machine_mode mode; /* Mode of value. */
2976 /* True if this constant is accessed as part of a post-increment
2977 sequence. Note that HImode constants are never accessed in this way. */
2978 bool part_of_sequence_p;
2979 } pool_node;
2981 /* The maximum number of constants that can fit into one pool, since
2982 constants in the range 0..510 are at least 2 bytes long, and in the
2983 range from there to 1018 at least 4 bytes. */
2985 #define MAX_POOL_SIZE 372
2986 static pool_node pool_vector[MAX_POOL_SIZE];
2987 static int pool_size;
2988 static rtx pool_window_label;
2989 static int pool_window_last;
2991 /* ??? If we need a constant in HImode which is the truncated value of a
2992 constant we need in SImode, we could combine the two entries thus saving
2993 two bytes. Is this common enough to be worth the effort of implementing
2994 it? */
2996 /* ??? This stuff should be done at the same time that we shorten branches.
2997 As it is now, we must assume that all branches are the maximum size, and
2998 this causes us to almost always output constant pools sooner than
2999 necessary. */
3001 /* Add a constant to the pool and return its label. */
3003 static rtx
3004 add_constant (rtx x, enum machine_mode mode, rtx last_value)
3006 int i;
3007 rtx lab, new, ref, newref;
3009 /* First see if we've already got it. */
3010 for (i = 0; i < pool_size; i++)
3012 if (x->code == pool_vector[i].value->code
3013 && mode == pool_vector[i].mode)
3015 if (x->code == CODE_LABEL)
3017 if (XINT (x, 3) != XINT (pool_vector[i].value, 3))
3018 continue;
3020 if (rtx_equal_p (x, pool_vector[i].value))
3022 lab = new = 0;
3023 if (! last_value
3024 || ! i
3025 || ! rtx_equal_p (last_value, pool_vector[i-1].value))
3027 new = gen_label_rtx ();
3028 LABEL_REFS (new) = pool_vector[i].label;
3029 pool_vector[i].label = lab = new;
3031 if (lab && pool_window_label)
3033 newref = gen_rtx_LABEL_REF (VOIDmode, pool_window_label);
3034 ref = pool_vector[pool_window_last].wend;
3035 LABEL_NEXTREF (newref) = ref;
3036 pool_vector[pool_window_last].wend = newref;
3038 if (new)
3039 pool_window_label = new;
3040 pool_window_last = i;
3041 return lab;
3046 /* Need a new one. */
3047 pool_vector[pool_size].value = x;
3048 if (last_value && rtx_equal_p (last_value, pool_vector[pool_size - 1].value))
3050 lab = 0;
3051 pool_vector[pool_size - 1].part_of_sequence_p = true;
3053 else
3054 lab = gen_label_rtx ();
3055 pool_vector[pool_size].mode = mode;
3056 pool_vector[pool_size].label = lab;
3057 pool_vector[pool_size].wend = NULL_RTX;
3058 pool_vector[pool_size].part_of_sequence_p = (lab == 0);
3059 if (lab && pool_window_label)
3061 newref = gen_rtx_LABEL_REF (VOIDmode, pool_window_label);
3062 ref = pool_vector[pool_window_last].wend;
3063 LABEL_NEXTREF (newref) = ref;
3064 pool_vector[pool_window_last].wend = newref;
3066 if (lab)
3067 pool_window_label = lab;
3068 pool_window_last = pool_size;
3069 pool_size++;
3070 return lab;
3073 /* Output the literal table. START, if nonzero, is the first instruction
3074 this table is needed for, and also indicates that there is at least one
3075 casesi_worker_2 instruction; We have to emit the operand3 labels from
3076 these insns at a 4-byte aligned position. BARRIER is the barrier
3077 after which we are to place the table. */
3079 static void
3080 dump_table (rtx start, rtx barrier)
3082 rtx scan = barrier;
3083 int i;
3084 int need_align = 1;
3085 rtx lab, ref;
3086 int have_df = 0;
3088 /* Do two passes, first time dump out the HI sized constants. */
3090 for (i = 0; i < pool_size; i++)
3092 pool_node *p = &pool_vector[i];
3094 if (p->mode == HImode)
3096 if (need_align)
3098 scan = emit_insn_after (gen_align_2 (), scan);
3099 need_align = 0;
3101 for (lab = p->label; lab; lab = LABEL_REFS (lab))
3102 scan = emit_label_after (lab, scan);
3103 scan = emit_insn_after (gen_consttable_2 (p->value, const0_rtx),
3104 scan);
3105 for (ref = p->wend; ref; ref = LABEL_NEXTREF (ref))
3107 lab = XEXP (ref, 0);
3108 scan = emit_insn_after (gen_consttable_window_end (lab), scan);
3111 else if (p->mode == DFmode)
3112 have_df = 1;
3115 need_align = 1;
3117 if (start)
3119 scan = emit_insn_after (gen_align_4 (), scan);
3120 need_align = 0;
3121 for (; start != barrier; start = NEXT_INSN (start))
3122 if (GET_CODE (start) == INSN
3123 && recog_memoized (start) == CODE_FOR_casesi_worker_2)
3125 rtx src = SET_SRC (XVECEXP (PATTERN (start), 0, 0));
3126 rtx lab = XEXP (XVECEXP (src, 0, 3), 0);
3128 scan = emit_label_after (lab, scan);
3131 if (TARGET_FMOVD && TARGET_ALIGN_DOUBLE && have_df)
3133 rtx align_insn = NULL_RTX;
3135 scan = emit_label_after (gen_label_rtx (), scan);
3136 scan = emit_insn_after (gen_align_log (GEN_INT (3)), scan);
3137 need_align = 0;
3139 for (i = 0; i < pool_size; i++)
3141 pool_node *p = &pool_vector[i];
3143 switch (p->mode)
3145 case HImode:
3146 break;
3147 case SImode:
3148 case SFmode:
3149 if (align_insn && !p->part_of_sequence_p)
3151 for (lab = p->label; lab; lab = LABEL_REFS (lab))
3152 emit_label_before (lab, align_insn);
3153 emit_insn_before (gen_consttable_4 (p->value, const0_rtx),
3154 align_insn);
3155 for (ref = p->wend; ref; ref = LABEL_NEXTREF (ref))
3157 lab = XEXP (ref, 0);
3158 emit_insn_before (gen_consttable_window_end (lab),
3159 align_insn);
3161 delete_insn (align_insn);
3162 align_insn = NULL_RTX;
3163 continue;
3165 else
3167 for (lab = p->label; lab; lab = LABEL_REFS (lab))
3168 scan = emit_label_after (lab, scan);
3169 scan = emit_insn_after (gen_consttable_4 (p->value,
3170 const0_rtx), scan);
3171 need_align = ! need_align;
3173 break;
3174 case DFmode:
3175 if (need_align)
3177 scan = emit_insn_after (gen_align_log (GEN_INT (3)), scan);
3178 align_insn = scan;
3179 need_align = 0;
3181 case DImode:
3182 for (lab = p->label; lab; lab = LABEL_REFS (lab))
3183 scan = emit_label_after (lab, scan);
3184 scan = emit_insn_after (gen_consttable_8 (p->value, const0_rtx),
3185 scan);
3186 break;
3187 default:
3188 gcc_unreachable ();
3191 if (p->mode != HImode)
3193 for (ref = p->wend; ref; ref = LABEL_NEXTREF (ref))
3195 lab = XEXP (ref, 0);
3196 scan = emit_insn_after (gen_consttable_window_end (lab),
3197 scan);
3202 pool_size = 0;
3205 for (i = 0; i < pool_size; i++)
3207 pool_node *p = &pool_vector[i];
3209 switch (p->mode)
3211 case HImode:
3212 break;
3213 case SImode:
3214 case SFmode:
3215 if (need_align)
3217 need_align = 0;
3218 scan = emit_label_after (gen_label_rtx (), scan);
3219 scan = emit_insn_after (gen_align_4 (), scan);
3221 for (lab = p->label; lab; lab = LABEL_REFS (lab))
3222 scan = emit_label_after (lab, scan);
3223 scan = emit_insn_after (gen_consttable_4 (p->value, const0_rtx),
3224 scan);
3225 break;
3226 case DFmode:
3227 case DImode:
3228 if (need_align)
3230 need_align = 0;
3231 scan = emit_label_after (gen_label_rtx (), scan);
3232 scan = emit_insn_after (gen_align_4 (), scan);
3234 for (lab = p->label; lab; lab = LABEL_REFS (lab))
3235 scan = emit_label_after (lab, scan);
3236 scan = emit_insn_after (gen_consttable_8 (p->value, const0_rtx),
3237 scan);
3238 break;
3239 default:
3240 gcc_unreachable ();
3243 if (p->mode != HImode)
3245 for (ref = p->wend; ref; ref = LABEL_NEXTREF (ref))
3247 lab = XEXP (ref, 0);
3248 scan = emit_insn_after (gen_consttable_window_end (lab), scan);
3253 scan = emit_insn_after (gen_consttable_end (), scan);
3254 scan = emit_barrier_after (scan);
3255 pool_size = 0;
3256 pool_window_label = NULL_RTX;
3257 pool_window_last = 0;
3260 /* Return nonzero if constant would be an ok source for a
3261 mov.w instead of a mov.l. */
3263 static int
3264 hi_const (rtx src)
3266 return (GET_CODE (src) == CONST_INT
3267 && INTVAL (src) >= -32768
3268 && INTVAL (src) <= 32767);
3271 /* Nonzero if the insn is a move instruction which needs to be fixed. */
3273 /* ??? For a DImode/DFmode moves, we don't need to fix it if each half of the
3274 CONST_DOUBLE input value is CONST_OK_FOR_I08. For a SFmode move, we don't
3275 need to fix it if the input value is CONST_OK_FOR_I08. */
3277 static int
3278 broken_move (rtx insn)
3280 if (GET_CODE (insn) == INSN)
3282 rtx pat = PATTERN (insn);
3283 if (GET_CODE (pat) == PARALLEL)
3284 pat = XVECEXP (pat, 0, 0);
3285 if (GET_CODE (pat) == SET
3286 /* We can load any 8 bit value if we don't care what the high
3287 order bits end up as. */
3288 && GET_MODE (SET_DEST (pat)) != QImode
3289 && (CONSTANT_P (SET_SRC (pat))
3290 /* Match mova_const. */
3291 || (GET_CODE (SET_SRC (pat)) == UNSPEC
3292 && XINT (SET_SRC (pat), 1) == UNSPEC_MOVA
3293 && GET_CODE (XVECEXP (SET_SRC (pat), 0, 0)) == CONST))
3294 && ! (TARGET_SH2E
3295 && GET_CODE (SET_SRC (pat)) == CONST_DOUBLE
3296 && (fp_zero_operand (SET_SRC (pat))
3297 || fp_one_operand (SET_SRC (pat)))
3298 /* ??? If this is a -m4 or -m4-single compilation, in general
3299 we don't know the current setting of fpscr, so disable fldi.
3300 There is an exception if this was a register-register move
3301 before reload - and hence it was ascertained that we have
3302 single precision setting - and in a post-reload optimization
3303 we changed this to do a constant load. In that case
3304 we don't have an r0 clobber, hence we must use fldi. */
3305 && (! TARGET_SH4 || TARGET_FMOVD
3306 || (GET_CODE (XEXP (XVECEXP (PATTERN (insn), 0, 2), 0))
3307 == SCRATCH))
3308 && GET_CODE (SET_DEST (pat)) == REG
3309 && FP_REGISTER_P (REGNO (SET_DEST (pat))))
3310 && ! (TARGET_SH2A
3311 && GET_MODE (SET_DEST (pat)) == SImode
3312 && GET_CODE (SET_SRC (pat)) == CONST_INT
3313 && CONST_OK_FOR_I20 (INTVAL (SET_SRC (pat))))
3314 && (GET_CODE (SET_SRC (pat)) != CONST_INT
3315 || ! CONST_OK_FOR_I08 (INTVAL (SET_SRC (pat)))))
3316 return 1;
3319 return 0;
3322 static int
3323 mova_p (rtx insn)
3325 return (GET_CODE (insn) == INSN
3326 && GET_CODE (PATTERN (insn)) == SET
3327 && GET_CODE (SET_SRC (PATTERN (insn))) == UNSPEC
3328 && XINT (SET_SRC (PATTERN (insn)), 1) == UNSPEC_MOVA
3329 /* Don't match mova_const. */
3330 && GET_CODE (XVECEXP (SET_SRC (PATTERN (insn)), 0, 0)) == LABEL_REF);
3333 /* Fix up a mova from a switch that went out of range. */
3334 static void
3335 fixup_mova (rtx mova)
3337 if (! flag_pic)
3339 SET_SRC (PATTERN (mova)) = XVECEXP (SET_SRC (PATTERN (mova)), 0, 0);
3340 INSN_CODE (mova) = -1;
3342 else
3344 rtx worker = mova;
3345 rtx lab = gen_label_rtx ();
3346 rtx wpat, wpat0, wpat1, wsrc, diff;
3350 worker = NEXT_INSN (worker);
3351 gcc_assert (worker
3352 && GET_CODE (worker) != CODE_LABEL
3353 && GET_CODE (worker) != JUMP_INSN);
3354 } while (recog_memoized (worker) != CODE_FOR_casesi_worker_1);
3355 wpat = PATTERN (worker);
3356 wpat0 = XVECEXP (wpat, 0, 0);
3357 wpat1 = XVECEXP (wpat, 0, 1);
3358 wsrc = SET_SRC (wpat0);
3359 PATTERN (worker) = (gen_casesi_worker_2
3360 (SET_DEST (wpat0), XVECEXP (wsrc, 0, 1),
3361 XEXP (XVECEXP (wsrc, 0, 2), 0), lab,
3362 XEXP (wpat1, 0)));
3363 INSN_CODE (worker) = -1;
3364 diff = gen_rtx_MINUS (Pmode, XVECEXP (SET_SRC (PATTERN (mova)), 0, 0),
3365 gen_rtx_LABEL_REF (Pmode, lab));
3366 diff = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, diff), UNSPEC_PIC);
3367 SET_SRC (PATTERN (mova)) = gen_rtx_CONST (Pmode, diff);
3368 INSN_CODE (mova) = -1;
3372 /* Find the last barrier from insn FROM which is close enough to hold the
3373 constant pool. If we can't find one, then create one near the end of
3374 the range. */
3376 static rtx
3377 find_barrier (int num_mova, rtx mova, rtx from)
3379 int count_si = 0;
3380 int count_hi = 0;
3381 int found_hi = 0;
3382 int found_si = 0;
3383 int found_di = 0;
3384 int hi_align = 2;
3385 int si_align = 2;
3386 int leading_mova = num_mova;
3387 rtx barrier_before_mova = 0, found_barrier = 0, good_barrier = 0;
3388 int si_limit;
3389 int hi_limit;
3391 /* For HImode: range is 510, add 4 because pc counts from address of
3392 second instruction after this one, subtract 2 for the jump instruction
3393 that we may need to emit before the table, subtract 2 for the instruction
3394 that fills the jump delay slot (in very rare cases, reorg will take an
3395 instruction from after the constant pool or will leave the delay slot
3396 empty). This gives 510.
3397 For SImode: range is 1020, add 4 because pc counts from address of
3398 second instruction after this one, subtract 2 in case pc is 2 byte
3399 aligned, subtract 2 for the jump instruction that we may need to emit
3400 before the table, subtract 2 for the instruction that fills the jump
3401 delay slot. This gives 1018. */
3403 /* The branch will always be shortened now that the reference address for
3404 forward branches is the successor address, thus we need no longer make
3405 adjustments to the [sh]i_limit for -O0. */
3407 si_limit = 1018;
3408 hi_limit = 510;
3410 while (from && count_si < si_limit && count_hi < hi_limit)
3412 int inc = get_attr_length (from);
3413 int new_align = 1;
3415 if (GET_CODE (from) == CODE_LABEL)
3417 if (optimize)
3418 new_align = 1 << label_to_alignment (from);
3419 else if (GET_CODE (prev_nonnote_insn (from)) == BARRIER)
3420 new_align = 1 << barrier_align (from);
3421 else
3422 new_align = 1;
3423 inc = 0;
3426 if (GET_CODE (from) == BARRIER)
3429 found_barrier = from;
3431 /* If we are at the end of the function, or in front of an alignment
3432 instruction, we need not insert an extra alignment. We prefer
3433 this kind of barrier. */
3434 if (barrier_align (from) > 2)
3435 good_barrier = from;
3438 if (broken_move (from))
3440 rtx pat, src, dst;
3441 enum machine_mode mode;
3443 pat = PATTERN (from);
3444 if (GET_CODE (pat) == PARALLEL)
3445 pat = XVECEXP (pat, 0, 0);
3446 src = SET_SRC (pat);
3447 dst = SET_DEST (pat);
3448 mode = GET_MODE (dst);
3450 /* We must explicitly check the mode, because sometimes the
3451 front end will generate code to load unsigned constants into
3452 HImode targets without properly sign extending them. */
3453 if (mode == HImode
3454 || (mode == SImode && hi_const (src) && REGNO (dst) != FPUL_REG))
3456 found_hi += 2;
3457 /* We put the short constants before the long constants, so
3458 we must count the length of short constants in the range
3459 for the long constants. */
3460 /* ??? This isn't optimal, but is easy to do. */
3461 si_limit -= 2;
3463 else
3465 /* We dump DF/DI constants before SF/SI ones, because
3466 the limit is the same, but the alignment requirements
3467 are higher. We may waste up to 4 additional bytes
3468 for alignment, and the DF/DI constant may have
3469 another SF/SI constant placed before it. */
3470 if (TARGET_SHCOMPACT
3471 && ! found_di
3472 && (mode == DFmode || mode == DImode))
3474 found_di = 1;
3475 si_limit -= 8;
3477 while (si_align > 2 && found_si + si_align - 2 > count_si)
3478 si_align >>= 1;
3479 if (found_si > count_si)
3480 count_si = found_si;
3481 found_si += GET_MODE_SIZE (mode);
3482 if (num_mova)
3483 si_limit -= GET_MODE_SIZE (mode);
3487 if (mova_p (from))
3489 if (! num_mova++)
3491 leading_mova = 0;
3492 mova = from;
3493 barrier_before_mova = good_barrier ? good_barrier : found_barrier;
3495 if (found_si > count_si)
3496 count_si = found_si;
3498 else if (GET_CODE (from) == JUMP_INSN
3499 && (GET_CODE (PATTERN (from)) == ADDR_VEC
3500 || GET_CODE (PATTERN (from)) == ADDR_DIFF_VEC))
3502 if (num_mova)
3503 num_mova--;
3504 if (barrier_align (next_real_insn (from)) == align_jumps_log)
3506 /* We have just passed the barrier in front of the
3507 ADDR_DIFF_VEC, which is stored in found_barrier. Since
3508 the ADDR_DIFF_VEC is accessed as data, just like our pool
3509 constants, this is a good opportunity to accommodate what
3510 we have gathered so far.
3511 If we waited any longer, we could end up at a barrier in
3512 front of code, which gives worse cache usage for separated
3513 instruction / data caches. */
3514 good_barrier = found_barrier;
3515 break;
3517 else
3519 rtx body = PATTERN (from);
3520 inc = XVECLEN (body, 1) * GET_MODE_SIZE (GET_MODE (body));
3523 /* For the SH1, we generate alignments even after jumps-around-jumps. */
3524 else if (GET_CODE (from) == JUMP_INSN
3525 && ! TARGET_SH2
3526 && ! TARGET_SMALLCODE)
3527 new_align = 4;
3529 if (found_si)
3531 count_si += inc;
3532 if (new_align > si_align)
3534 si_limit -= (count_si - 1) & (new_align - si_align);
3535 si_align = new_align;
3537 count_si = (count_si + new_align - 1) & -new_align;
3539 if (found_hi)
3541 count_hi += inc;
3542 if (new_align > hi_align)
3544 hi_limit -= (count_hi - 1) & (new_align - hi_align);
3545 hi_align = new_align;
3547 count_hi = (count_hi + new_align - 1) & -new_align;
3549 from = NEXT_INSN (from);
3552 if (num_mova)
3554 if (leading_mova)
3556 /* Try as we might, the leading mova is out of range. Change
3557 it into a load (which will become a pcload) and retry. */
3558 fixup_mova (mova);
3559 return find_barrier (0, 0, mova);
3561 else
3563 /* Insert the constant pool table before the mova instruction,
3564 to prevent the mova label reference from going out of range. */
3565 from = mova;
3566 good_barrier = found_barrier = barrier_before_mova;
3570 if (found_barrier)
3572 if (good_barrier && next_real_insn (found_barrier))
3573 found_barrier = good_barrier;
3575 else
3577 /* We didn't find a barrier in time to dump our stuff,
3578 so we'll make one. */
3579 rtx label = gen_label_rtx ();
3581 /* If we exceeded the range, then we must back up over the last
3582 instruction we looked at. Otherwise, we just need to undo the
3583 NEXT_INSN at the end of the loop. */
3584 if (count_hi > hi_limit || count_si > si_limit)
3585 from = PREV_INSN (PREV_INSN (from));
3586 else
3587 from = PREV_INSN (from);
3589 /* Walk back to be just before any jump or label.
3590 Putting it before a label reduces the number of times the branch
3591 around the constant pool table will be hit. Putting it before
3592 a jump makes it more likely that the bra delay slot will be
3593 filled. */
3594 while (GET_CODE (from) == JUMP_INSN || GET_CODE (from) == NOTE
3595 || GET_CODE (from) == CODE_LABEL)
3596 from = PREV_INSN (from);
3598 from = emit_jump_insn_after (gen_jump (label), from);
3599 JUMP_LABEL (from) = label;
3600 LABEL_NUSES (label) = 1;
3601 found_barrier = emit_barrier_after (from);
3602 emit_label_after (label, found_barrier);
3605 return found_barrier;
3608 /* If the instruction INSN is implemented by a special function, and we can
3609 positively find the register that is used to call the sfunc, and this
3610 register is not used anywhere else in this instruction - except as the
3611 destination of a set, return this register; else, return 0. */
3613 sfunc_uses_reg (rtx insn)
3615 int i;
3616 rtx pattern, part, reg_part, reg;
3618 if (GET_CODE (insn) != INSN)
3619 return 0;
3620 pattern = PATTERN (insn);
3621 if (GET_CODE (pattern) != PARALLEL || get_attr_type (insn) != TYPE_SFUNC)
3622 return 0;
3624 for (reg_part = 0, i = XVECLEN (pattern, 0) - 1; i >= 1; i--)
3626 part = XVECEXP (pattern, 0, i);
3627 if (GET_CODE (part) == USE && GET_MODE (XEXP (part, 0)) == SImode)
3628 reg_part = part;
3630 if (! reg_part)
3631 return 0;
3632 reg = XEXP (reg_part, 0);
3633 for (i = XVECLEN (pattern, 0) - 1; i >= 0; i--)
3635 part = XVECEXP (pattern, 0, i);
3636 if (part == reg_part || GET_CODE (part) == CLOBBER)
3637 continue;
3638 if (reg_mentioned_p (reg, ((GET_CODE (part) == SET
3639 && GET_CODE (SET_DEST (part)) == REG)
3640 ? SET_SRC (part) : part)))
3641 return 0;
3643 return reg;
3646 /* See if the only way in which INSN uses REG is by calling it, or by
3647 setting it while calling it. Set *SET to a SET rtx if the register
3648 is set by INSN. */
3650 static int
3651 noncall_uses_reg (rtx reg, rtx insn, rtx *set)
3653 rtx pattern, reg2;
3655 *set = NULL_RTX;
3657 reg2 = sfunc_uses_reg (insn);
3658 if (reg2 && REGNO (reg2) == REGNO (reg))
3660 pattern = single_set (insn);
3661 if (pattern
3662 && GET_CODE (SET_DEST (pattern)) == REG
3663 && REGNO (reg) == REGNO (SET_DEST (pattern)))
3664 *set = pattern;
3665 return 0;
3667 if (GET_CODE (insn) != CALL_INSN)
3669 /* We don't use rtx_equal_p because we don't care if the mode is
3670 different. */
3671 pattern = single_set (insn);
3672 if (pattern
3673 && GET_CODE (SET_DEST (pattern)) == REG
3674 && REGNO (reg) == REGNO (SET_DEST (pattern)))
3676 rtx par, part;
3677 int i;
3679 *set = pattern;
3680 par = PATTERN (insn);
3681 if (GET_CODE (par) == PARALLEL)
3682 for (i = XVECLEN (par, 0) - 1; i >= 0; i--)
3684 part = XVECEXP (par, 0, i);
3685 if (GET_CODE (part) != SET && reg_mentioned_p (reg, part))
3686 return 1;
3688 return reg_mentioned_p (reg, SET_SRC (pattern));
3691 return 1;
3694 pattern = PATTERN (insn);
3696 if (GET_CODE (pattern) == PARALLEL)
3698 int i;
3700 for (i = XVECLEN (pattern, 0) - 1; i >= 1; i--)
3701 if (reg_mentioned_p (reg, XVECEXP (pattern, 0, i)))
3702 return 1;
3703 pattern = XVECEXP (pattern, 0, 0);
3706 if (GET_CODE (pattern) == SET)
3708 if (reg_mentioned_p (reg, SET_DEST (pattern)))
3710 /* We don't use rtx_equal_p, because we don't care if the
3711 mode is different. */
3712 if (GET_CODE (SET_DEST (pattern)) != REG
3713 || REGNO (reg) != REGNO (SET_DEST (pattern)))
3714 return 1;
3716 *set = pattern;
3719 pattern = SET_SRC (pattern);
3722 if (GET_CODE (pattern) != CALL
3723 || GET_CODE (XEXP (pattern, 0)) != MEM
3724 || ! rtx_equal_p (reg, XEXP (XEXP (pattern, 0), 0)))
3725 return 1;
3727 return 0;
3730 /* Given a X, a pattern of an insn or a part of it, return a mask of used
3731 general registers. Bits 0..15 mean that the respective registers
3732 are used as inputs in the instruction. Bits 16..31 mean that the
3733 registers 0..15, respectively, are used as outputs, or are clobbered.
3734 IS_DEST should be set to 16 if X is the destination of a SET, else to 0. */
3736 regs_used (rtx x, int is_dest)
3738 enum rtx_code code;
3739 const char *fmt;
3740 int i, used = 0;
3742 if (! x)
3743 return used;
3744 code = GET_CODE (x);
3745 switch (code)
3747 case REG:
3748 if (REGNO (x) < 16)
3749 return (((1 << HARD_REGNO_NREGS (0, GET_MODE (x))) - 1)
3750 << (REGNO (x) + is_dest));
3751 return 0;
3752 case SUBREG:
3754 rtx y = SUBREG_REG (x);
3756 if (GET_CODE (y) != REG)
3757 break;
3758 if (REGNO (y) < 16)
3759 return (((1 << HARD_REGNO_NREGS (0, GET_MODE (x))) - 1)
3760 << (REGNO (y) +
3761 subreg_regno_offset (REGNO (y),
3762 GET_MODE (y),
3763 SUBREG_BYTE (x),
3764 GET_MODE (x)) + is_dest));
3765 return 0;
3767 case SET:
3768 return regs_used (SET_SRC (x), 0) | regs_used (SET_DEST (x), 16);
3769 case RETURN:
3770 /* If there was a return value, it must have been indicated with USE. */
3771 return 0x00ffff00;
3772 case CLOBBER:
3773 is_dest = 1;
3774 break;
3775 case MEM:
3776 is_dest = 0;
3777 break;
3778 case CALL:
3779 used |= 0x00ff00f0;
3780 break;
3781 default:
3782 break;
3785 fmt = GET_RTX_FORMAT (code);
3787 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
3789 if (fmt[i] == 'E')
3791 register int j;
3792 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
3793 used |= regs_used (XVECEXP (x, i, j), is_dest);
3795 else if (fmt[i] == 'e')
3796 used |= regs_used (XEXP (x, i), is_dest);
3798 return used;
3801 /* Create an instruction that prevents redirection of a conditional branch
3802 to the destination of the JUMP with address ADDR.
3803 If the branch needs to be implemented as an indirect jump, try to find
3804 a scratch register for it.
3805 If NEED_BLOCK is 0, don't do anything unless we need a scratch register.
3806 If any preceding insn that doesn't fit into a delay slot is good enough,
3807 pass 1. Pass 2 if a definite blocking insn is needed.
3808 -1 is used internally to avoid deep recursion.
3809 If a blocking instruction is made or recognized, return it. */
3811 static rtx
3812 gen_block_redirect (rtx jump, int addr, int need_block)
3814 int dead = 0;
3815 rtx prev = prev_nonnote_insn (jump);
3816 rtx dest;
3818 /* First, check if we already have an instruction that satisfies our need. */
3819 if (prev && GET_CODE (prev) == INSN && ! INSN_DELETED_P (prev))
3821 if (INSN_CODE (prev) == CODE_FOR_indirect_jump_scratch)
3822 return prev;
3823 if (GET_CODE (PATTERN (prev)) == USE
3824 || GET_CODE (PATTERN (prev)) == CLOBBER
3825 || get_attr_in_delay_slot (prev) == IN_DELAY_SLOT_YES)
3826 prev = jump;
3827 else if ((need_block &= ~1) < 0)
3828 return prev;
3829 else if (recog_memoized (prev) == CODE_FOR_block_branch_redirect)
3830 need_block = 0;
3832 if (GET_CODE (PATTERN (jump)) == RETURN)
3834 if (! need_block)
3835 return prev;
3836 /* Reorg even does nasty things with return insns that cause branches
3837 to go out of range - see find_end_label and callers. */
3838 return emit_insn_before (gen_block_branch_redirect (const0_rtx) , jump);
3840 /* We can't use JUMP_LABEL here because it might be undefined
3841 when not optimizing. */
3842 dest = XEXP (SET_SRC (PATTERN (jump)), 0);
3843 /* If the branch is out of range, try to find a scratch register for it. */
3844 if (optimize
3845 && (INSN_ADDRESSES (INSN_UID (dest)) - addr + (unsigned) 4092
3846 > 4092 + 4098))
3848 rtx scan;
3849 /* Don't look for the stack pointer as a scratch register,
3850 it would cause trouble if an interrupt occurred. */
3851 unsigned try = 0x7fff, used;
3852 int jump_left = flag_expensive_optimizations + 1;
3854 /* It is likely that the most recent eligible instruction is wanted for
3855 the delay slot. Therefore, find out which registers it uses, and
3856 try to avoid using them. */
3858 for (scan = jump; (scan = PREV_INSN (scan)); )
3860 enum rtx_code code;
3862 if (INSN_DELETED_P (scan))
3863 continue;
3864 code = GET_CODE (scan);
3865 if (code == CODE_LABEL || code == JUMP_INSN)
3866 break;
3867 if (code == INSN
3868 && GET_CODE (PATTERN (scan)) != USE
3869 && GET_CODE (PATTERN (scan)) != CLOBBER
3870 && get_attr_in_delay_slot (scan) == IN_DELAY_SLOT_YES)
3872 try &= ~regs_used (PATTERN (scan), 0);
3873 break;
3876 for (used = dead = 0, scan = JUMP_LABEL (jump);
3877 (scan = NEXT_INSN (scan)); )
3879 enum rtx_code code;
3881 if (INSN_DELETED_P (scan))
3882 continue;
3883 code = GET_CODE (scan);
3884 if (INSN_P (scan))
3886 used |= regs_used (PATTERN (scan), 0);
3887 if (code == CALL_INSN)
3888 used |= regs_used (CALL_INSN_FUNCTION_USAGE (scan), 0);
3889 dead |= (used >> 16) & ~used;
3890 if (dead & try)
3892 dead &= try;
3893 break;
3895 if (code == JUMP_INSN)
3897 if (jump_left-- && simplejump_p (scan))
3898 scan = JUMP_LABEL (scan);
3899 else
3900 break;
3904 /* Mask out the stack pointer again, in case it was
3905 the only 'free' register we have found. */
3906 dead &= 0x7fff;
3908 /* If the immediate destination is still in range, check for possible
3909 threading with a jump beyond the delay slot insn.
3910 Don't check if we are called recursively; the jump has been or will be
3911 checked in a different invocation then. */
3913 else if (optimize && need_block >= 0)
3915 rtx next = next_active_insn (next_active_insn (dest));
3916 if (next && GET_CODE (next) == JUMP_INSN
3917 && GET_CODE (PATTERN (next)) == SET
3918 && recog_memoized (next) == CODE_FOR_jump_compact)
3920 dest = JUMP_LABEL (next);
3921 if (dest
3922 && (INSN_ADDRESSES (INSN_UID (dest)) - addr + (unsigned) 4092
3923 > 4092 + 4098))
3924 gen_block_redirect (next, INSN_ADDRESSES (INSN_UID (next)), -1);
3928 if (dead)
3930 rtx reg = gen_rtx_REG (SImode, exact_log2 (dead & -dead));
3932 /* It would be nice if we could convert the jump into an indirect
3933 jump / far branch right now, and thus exposing all constituent
3934 instructions to further optimization. However, reorg uses
3935 simplejump_p to determine if there is an unconditional jump where
3936 it should try to schedule instructions from the target of the
3937 branch; simplejump_p fails for indirect jumps even if they have
3938 a JUMP_LABEL. */
3939 rtx insn = emit_insn_before (gen_indirect_jump_scratch
3940 (reg, GEN_INT (INSN_UID (JUMP_LABEL (jump))))
3941 , jump);
3942 /* ??? We would like this to have the scope of the jump, but that
3943 scope will change when a delay slot insn of an inner scope is added.
3944 Hence, after delay slot scheduling, we'll have to expect
3945 NOTE_INSN_BLOCK_END notes between the indirect_jump_scratch and
3946 the jump. */
3948 INSN_LOCATOR (insn) = INSN_LOCATOR (jump);
3949 INSN_CODE (insn) = CODE_FOR_indirect_jump_scratch;
3950 return insn;
3952 else if (need_block)
3953 /* We can't use JUMP_LABEL here because it might be undefined
3954 when not optimizing. */
3955 return emit_insn_before (gen_block_branch_redirect
3956 (GEN_INT (INSN_UID (XEXP (SET_SRC (PATTERN (jump)), 0))))
3957 , jump);
3958 return prev;
3961 #define CONDJUMP_MIN -252
3962 #define CONDJUMP_MAX 262
3963 struct far_branch
3965 /* A label (to be placed) in front of the jump
3966 that jumps to our ultimate destination. */
3967 rtx near_label;
3968 /* Where we are going to insert it if we cannot move the jump any farther,
3969 or the jump itself if we have picked up an existing jump. */
3970 rtx insert_place;
3971 /* The ultimate destination. */
3972 rtx far_label;
3973 struct far_branch *prev;
3974 /* If the branch has already been created, its address;
3975 else the address of its first prospective user. */
3976 int address;
3979 static void gen_far_branch (struct far_branch *);
3980 enum mdep_reorg_phase_e mdep_reorg_phase;
3981 static void
3982 gen_far_branch (struct far_branch *bp)
3984 rtx insn = bp->insert_place;
3985 rtx jump;
3986 rtx label = gen_label_rtx ();
3987 int ok;
3989 emit_label_after (label, insn);
3990 if (bp->far_label)
3992 jump = emit_jump_insn_after (gen_jump (bp->far_label), insn);
3993 LABEL_NUSES (bp->far_label)++;
3995 else
3996 jump = emit_jump_insn_after (gen_return (), insn);
3997 /* Emit a barrier so that reorg knows that any following instructions
3998 are not reachable via a fall-through path.
3999 But don't do this when not optimizing, since we wouldn't suppress the
4000 alignment for the barrier then, and could end up with out-of-range
4001 pc-relative loads. */
4002 if (optimize)
4003 emit_barrier_after (jump);
4004 emit_label_after (bp->near_label, insn);
4005 JUMP_LABEL (jump) = bp->far_label;
4006 ok = invert_jump (insn, label, 1);
4007 gcc_assert (ok);
4009 /* If we are branching around a jump (rather than a return), prevent
4010 reorg from using an insn from the jump target as the delay slot insn -
4011 when reorg did this, it pessimized code (we rather hide the delay slot)
4012 and it could cause branches to go out of range. */
4013 if (bp->far_label)
4014 (emit_insn_after
4015 (gen_stuff_delay_slot
4016 (GEN_INT (INSN_UID (XEXP (SET_SRC (PATTERN (jump)), 0))),
4017 GEN_INT (recog_memoized (insn) == CODE_FOR_branch_false)),
4018 insn));
4019 /* Prevent reorg from undoing our splits. */
4020 gen_block_redirect (jump, bp->address += 2, 2);
4023 /* Fix up ADDR_DIFF_VECs. */
4024 void
4025 fixup_addr_diff_vecs (rtx first)
4027 rtx insn;
4029 for (insn = first; insn; insn = NEXT_INSN (insn))
4031 rtx vec_lab, pat, prev, prevpat, x, braf_label;
4033 if (GET_CODE (insn) != JUMP_INSN
4034 || GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
4035 continue;
4036 pat = PATTERN (insn);
4037 vec_lab = XEXP (XEXP (pat, 0), 0);
4039 /* Search the matching casesi_jump_2. */
4040 for (prev = vec_lab; ; prev = PREV_INSN (prev))
4042 if (GET_CODE (prev) != JUMP_INSN)
4043 continue;
4044 prevpat = PATTERN (prev);
4045 if (GET_CODE (prevpat) != PARALLEL || XVECLEN (prevpat, 0) != 2)
4046 continue;
4047 x = XVECEXP (prevpat, 0, 1);
4048 if (GET_CODE (x) != USE)
4049 continue;
4050 x = XEXP (x, 0);
4051 if (GET_CODE (x) == LABEL_REF && XEXP (x, 0) == vec_lab)
4052 break;
4054 /* FIXME: This is a bug in the optimizer, but it seems harmless
4055 to just avoid panicing. */
4056 if (!prev)
4057 continue;
4059 /* Emit the reference label of the braf where it belongs, right after
4060 the casesi_jump_2 (i.e. braf). */
4061 braf_label = XEXP (XEXP (SET_SRC (XVECEXP (prevpat, 0, 0)), 1), 0);
4062 emit_label_after (braf_label, prev);
4064 /* Fix up the ADDR_DIF_VEC to be relative
4065 to the reference address of the braf. */
4066 XEXP (XEXP (pat, 0), 0) = braf_label;
4070 /* BARRIER_OR_LABEL is either a BARRIER or a CODE_LABEL immediately following
4071 a barrier. Return the base 2 logarithm of the desired alignment. */
4073 barrier_align (rtx barrier_or_label)
4075 rtx next = next_real_insn (barrier_or_label), pat, prev;
4076 int slot, credit, jump_to_next = 0;
4078 if (! next)
4079 return 0;
4081 pat = PATTERN (next);
4083 if (GET_CODE (pat) == ADDR_DIFF_VEC)
4084 return 2;
4086 if (GET_CODE (pat) == UNSPEC_VOLATILE && XINT (pat, 1) == UNSPECV_ALIGN)
4087 /* This is a barrier in front of a constant table. */
4088 return 0;
4090 prev = prev_real_insn (barrier_or_label);
4091 if (GET_CODE (PATTERN (prev)) == ADDR_DIFF_VEC)
4093 pat = PATTERN (prev);
4094 /* If this is a very small table, we want to keep the alignment after
4095 the table to the minimum for proper code alignment. */
4096 return ((TARGET_SMALLCODE
4097 || ((unsigned) XVECLEN (pat, 1) * GET_MODE_SIZE (GET_MODE (pat))
4098 <= (unsigned) 1 << (CACHE_LOG - 2)))
4099 ? 1 << TARGET_SHMEDIA : align_jumps_log);
4102 if (TARGET_SMALLCODE)
4103 return 0;
4105 if (! TARGET_SH2 || ! optimize)
4106 return align_jumps_log;
4108 /* When fixing up pcloads, a constant table might be inserted just before
4109 the basic block that ends with the barrier. Thus, we can't trust the
4110 instruction lengths before that. */
4111 if (mdep_reorg_phase > SH_FIXUP_PCLOAD)
4113 /* Check if there is an immediately preceding branch to the insn beyond
4114 the barrier. We must weight the cost of discarding useful information
4115 from the current cache line when executing this branch and there is
4116 an alignment, against that of fetching unneeded insn in front of the
4117 branch target when there is no alignment. */
4119 /* There are two delay_slot cases to consider. One is the simple case
4120 where the preceding branch is to the insn beyond the barrier (simple
4121 delay slot filling), and the other is where the preceding branch has
4122 a delay slot that is a duplicate of the insn after the barrier
4123 (fill_eager_delay_slots) and the branch is to the insn after the insn
4124 after the barrier. */
4126 /* PREV is presumed to be the JUMP_INSN for the barrier under
4127 investigation. Skip to the insn before it. */
4128 prev = prev_real_insn (prev);
4130 for (slot = 2, credit = (1 << (CACHE_LOG - 2)) + 2;
4131 credit >= 0 && prev && GET_CODE (prev) == INSN;
4132 prev = prev_real_insn (prev))
4134 jump_to_next = 0;
4135 if (GET_CODE (PATTERN (prev)) == USE
4136 || GET_CODE (PATTERN (prev)) == CLOBBER)
4137 continue;
4138 if (GET_CODE (PATTERN (prev)) == SEQUENCE)
4140 prev = XVECEXP (PATTERN (prev), 0, 1);
4141 if (INSN_UID (prev) == INSN_UID (next))
4143 /* Delay slot was filled with insn at jump target. */
4144 jump_to_next = 1;
4145 continue;
4149 if (slot &&
4150 get_attr_in_delay_slot (prev) == IN_DELAY_SLOT_YES)
4151 slot = 0;
4152 credit -= get_attr_length (prev);
4154 if (prev
4155 && GET_CODE (prev) == JUMP_INSN
4156 && JUMP_LABEL (prev))
4158 rtx x;
4159 if (jump_to_next
4160 || next_real_insn (JUMP_LABEL (prev)) == next
4161 /* If relax_delay_slots() decides NEXT was redundant
4162 with some previous instruction, it will have
4163 redirected PREV's jump to the following insn. */
4164 || JUMP_LABEL (prev) == next_nonnote_insn (next)
4165 /* There is no upper bound on redundant instructions
4166 that might have been skipped, but we must not put an
4167 alignment where none had been before. */
4168 || (x = (NEXT_INSN (NEXT_INSN (PREV_INSN (prev)))),
4169 (INSN_P (x)
4170 && (INSN_CODE (x) == CODE_FOR_block_branch_redirect
4171 || INSN_CODE (x) == CODE_FOR_indirect_jump_scratch
4172 || INSN_CODE (x) == CODE_FOR_stuff_delay_slot))))
4174 rtx pat = PATTERN (prev);
4175 if (GET_CODE (pat) == PARALLEL)
4176 pat = XVECEXP (pat, 0, 0);
4177 if (credit - slot >= (GET_CODE (SET_SRC (pat)) == PC ? 2 : 0))
4178 return 0;
4183 return align_jumps_log;
4186 /* If we are inside a phony loop, almost any kind of label can turn up as the
4187 first one in the loop. Aligning a braf label causes incorrect switch
4188 destination addresses; we can detect braf labels because they are
4189 followed by a BARRIER.
4190 Applying loop alignment to small constant or switch tables is a waste
4191 of space, so we suppress this too. */
4193 sh_loop_align (rtx label)
4195 rtx next = label;
4198 next = next_nonnote_insn (next);
4199 while (next && GET_CODE (next) == CODE_LABEL);
4201 if (! next
4202 || ! INSN_P (next)
4203 || GET_CODE (PATTERN (next)) == ADDR_DIFF_VEC
4204 || recog_memoized (next) == CODE_FOR_consttable_2)
4205 return 0;
4207 return align_loops_log;
4210 /* Do a final pass over the function, just before delayed branch
4211 scheduling. */
4213 static void
4214 sh_reorg (void)
4216 rtx first, insn, mova = NULL_RTX;
4217 int num_mova;
4218 rtx r0_rtx = gen_rtx_REG (Pmode, 0);
4219 rtx r0_inc_rtx = gen_rtx_POST_INC (Pmode, r0_rtx);
4221 first = get_insns ();
4223 /* We must split call insns before introducing `mova's. If we're
4224 optimizing, they'll have already been split. Otherwise, make
4225 sure we don't split them too late. */
4226 if (! optimize)
4227 split_all_insns_noflow ();
4229 if (TARGET_SHMEDIA)
4230 return;
4232 /* If relaxing, generate pseudo-ops to associate function calls with
4233 the symbols they call. It does no harm to not generate these
4234 pseudo-ops. However, when we can generate them, it enables to
4235 linker to potentially relax the jsr to a bsr, and eliminate the
4236 register load and, possibly, the constant pool entry. */
4238 mdep_reorg_phase = SH_INSERT_USES_LABELS;
4239 if (TARGET_RELAX)
4241 /* Remove all REG_LABEL notes. We want to use them for our own
4242 purposes. This works because none of the remaining passes
4243 need to look at them.
4245 ??? But it may break in the future. We should use a machine
4246 dependent REG_NOTE, or some other approach entirely. */
4247 for (insn = first; insn; insn = NEXT_INSN (insn))
4249 if (INSN_P (insn))
4251 rtx note;
4253 while ((note = find_reg_note (insn, REG_LABEL, NULL_RTX)) != 0)
4254 remove_note (insn, note);
4258 for (insn = first; insn; insn = NEXT_INSN (insn))
4260 rtx pattern, reg, link, set, scan, dies, label;
4261 int rescan = 0, foundinsn = 0;
4263 if (GET_CODE (insn) == CALL_INSN)
4265 pattern = PATTERN (insn);
4267 if (GET_CODE (pattern) == PARALLEL)
4268 pattern = XVECEXP (pattern, 0, 0);
4269 if (GET_CODE (pattern) == SET)
4270 pattern = SET_SRC (pattern);
4272 if (GET_CODE (pattern) != CALL
4273 || GET_CODE (XEXP (pattern, 0)) != MEM)
4274 continue;
4276 reg = XEXP (XEXP (pattern, 0), 0);
4278 else
4280 reg = sfunc_uses_reg (insn);
4281 if (! reg)
4282 continue;
4285 if (GET_CODE (reg) != REG)
4286 continue;
4288 /* This is a function call via REG. If the only uses of REG
4289 between the time that it is set and the time that it dies
4290 are in function calls, then we can associate all the
4291 function calls with the setting of REG. */
4293 for (link = LOG_LINKS (insn); link; link = XEXP (link, 1))
4295 if (REG_NOTE_KIND (link) != 0)
4296 continue;
4297 set = single_set (XEXP (link, 0));
4298 if (set && rtx_equal_p (reg, SET_DEST (set)))
4300 link = XEXP (link, 0);
4301 break;
4305 if (! link)
4307 /* ??? Sometimes global register allocation will have
4308 deleted the insn pointed to by LOG_LINKS. Try
4309 scanning backward to find where the register is set. */
4310 for (scan = PREV_INSN (insn);
4311 scan && GET_CODE (scan) != CODE_LABEL;
4312 scan = PREV_INSN (scan))
4314 if (! INSN_P (scan))
4315 continue;
4317 if (! reg_mentioned_p (reg, scan))
4318 continue;
4320 if (noncall_uses_reg (reg, scan, &set))
4321 break;
4323 if (set)
4325 link = scan;
4326 break;
4331 if (! link)
4332 continue;
4334 /* The register is set at LINK. */
4336 /* We can only optimize the function call if the register is
4337 being set to a symbol. In theory, we could sometimes
4338 optimize calls to a constant location, but the assembler
4339 and linker do not support that at present. */
4340 if (GET_CODE (SET_SRC (set)) != SYMBOL_REF
4341 && GET_CODE (SET_SRC (set)) != LABEL_REF)
4342 continue;
4344 /* Scan forward from LINK to the place where REG dies, and
4345 make sure that the only insns which use REG are
4346 themselves function calls. */
4348 /* ??? This doesn't work for call targets that were allocated
4349 by reload, since there may not be a REG_DEAD note for the
4350 register. */
4352 dies = NULL_RTX;
4353 for (scan = NEXT_INSN (link); scan; scan = NEXT_INSN (scan))
4355 rtx scanset;
4357 /* Don't try to trace forward past a CODE_LABEL if we haven't
4358 seen INSN yet. Ordinarily, we will only find the setting insn
4359 in LOG_LINKS if it is in the same basic block. However,
4360 cross-jumping can insert code labels in between the load and
4361 the call, and can result in situations where a single call
4362 insn may have two targets depending on where we came from. */
4364 if (GET_CODE (scan) == CODE_LABEL && ! foundinsn)
4365 break;
4367 if (! INSN_P (scan))
4368 continue;
4370 /* Don't try to trace forward past a JUMP. To optimize
4371 safely, we would have to check that all the
4372 instructions at the jump destination did not use REG. */
4374 if (GET_CODE (scan) == JUMP_INSN)
4375 break;
4377 if (! reg_mentioned_p (reg, scan))
4378 continue;
4380 if (noncall_uses_reg (reg, scan, &scanset))
4381 break;
4383 if (scan == insn)
4384 foundinsn = 1;
4386 if (scan != insn
4387 && (GET_CODE (scan) == CALL_INSN || sfunc_uses_reg (scan)))
4389 /* There is a function call to this register other
4390 than the one we are checking. If we optimize
4391 this call, we need to rescan again below. */
4392 rescan = 1;
4395 /* ??? We shouldn't have to worry about SCANSET here.
4396 We should just be able to check for a REG_DEAD note
4397 on a function call. However, the REG_DEAD notes are
4398 apparently not dependable around libcalls; c-torture
4399 execute/920501-2 is a test case. If SCANSET is set,
4400 then this insn sets the register, so it must have
4401 died earlier. Unfortunately, this will only handle
4402 the cases in which the register is, in fact, set in a
4403 later insn. */
4405 /* ??? We shouldn't have to use FOUNDINSN here.
4406 However, the LOG_LINKS fields are apparently not
4407 entirely reliable around libcalls;
4408 newlib/libm/math/e_pow.c is a test case. Sometimes
4409 an insn will appear in LOG_LINKS even though it is
4410 not the most recent insn which sets the register. */
4412 if (foundinsn
4413 && (scanset
4414 || find_reg_note (scan, REG_DEAD, reg)))
4416 dies = scan;
4417 break;
4421 if (! dies)
4423 /* Either there was a branch, or some insn used REG
4424 other than as a function call address. */
4425 continue;
4428 /* Create a code label, and put it in a REG_LABEL note on
4429 the insn which sets the register, and on each call insn
4430 which uses the register. In final_prescan_insn we look
4431 for the REG_LABEL notes, and output the appropriate label
4432 or pseudo-op. */
4434 label = gen_label_rtx ();
4435 REG_NOTES (link) = gen_rtx_INSN_LIST (REG_LABEL, label,
4436 REG_NOTES (link));
4437 REG_NOTES (insn) = gen_rtx_INSN_LIST (REG_LABEL, label,
4438 REG_NOTES (insn));
4439 if (rescan)
4441 scan = link;
4444 rtx reg2;
4446 scan = NEXT_INSN (scan);
4447 if (scan != insn
4448 && ((GET_CODE (scan) == CALL_INSN
4449 && reg_mentioned_p (reg, scan))
4450 || ((reg2 = sfunc_uses_reg (scan))
4451 && REGNO (reg2) == REGNO (reg))))
4452 REG_NOTES (scan)
4453 = gen_rtx_INSN_LIST (REG_LABEL, label, REG_NOTES (scan));
4455 while (scan != dies);
4460 if (TARGET_SH2)
4461 fixup_addr_diff_vecs (first);
4463 if (optimize)
4465 mdep_reorg_phase = SH_SHORTEN_BRANCHES0;
4466 shorten_branches (first);
4468 /* Scan the function looking for move instructions which have to be
4469 changed to pc-relative loads and insert the literal tables. */
4471 mdep_reorg_phase = SH_FIXUP_PCLOAD;
4472 for (insn = first, num_mova = 0; insn; insn = NEXT_INSN (insn))
4474 if (mova_p (insn))
4476 /* ??? basic block reordering can move a switch table dispatch
4477 below the switch table. Check if that has happened.
4478 We only have the addresses available when optimizing; but then,
4479 this check shouldn't be needed when not optimizing. */
4480 rtx label_ref = XVECEXP (SET_SRC (PATTERN (insn)), 0, 0);
4481 if (optimize
4482 && (INSN_ADDRESSES (INSN_UID (insn))
4483 > INSN_ADDRESSES (INSN_UID (XEXP (label_ref, 0)))))
4485 /* Change the mova into a load.
4486 broken_move will then return true for it. */
4487 fixup_mova (insn);
4489 else if (! num_mova++)
4490 mova = insn;
4492 else if (GET_CODE (insn) == JUMP_INSN
4493 && GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC
4494 && num_mova)
4496 rtx scan;
4497 int total;
4499 num_mova--;
4501 /* Some code might have been inserted between the mova and
4502 its ADDR_DIFF_VEC. Check if the mova is still in range. */
4503 for (scan = mova, total = 0; scan != insn; scan = NEXT_INSN (scan))
4504 total += get_attr_length (scan);
4506 /* range of mova is 1020, add 4 because pc counts from address of
4507 second instruction after this one, subtract 2 in case pc is 2
4508 byte aligned. Possible alignment needed for the ADDR_DIFF_VEC
4509 cancels out with alignment effects of the mova itself. */
4510 if (total > 1022)
4512 /* Change the mova into a load, and restart scanning
4513 there. broken_move will then return true for mova. */
4514 fixup_mova (mova);
4515 insn = mova;
4518 if (broken_move (insn)
4519 || (GET_CODE (insn) == INSN
4520 && recog_memoized (insn) == CODE_FOR_casesi_worker_2))
4522 rtx scan;
4523 /* Scan ahead looking for a barrier to stick the constant table
4524 behind. */
4525 rtx barrier = find_barrier (num_mova, mova, insn);
4526 rtx last_float_move = NULL_RTX, last_float = 0, *last_float_addr = NULL;
4527 int need_aligned_label = 0;
4529 if (num_mova && ! mova_p (mova))
4531 /* find_barrier had to change the first mova into a
4532 pcload; thus, we have to start with this new pcload. */
4533 insn = mova;
4534 num_mova = 0;
4536 /* Now find all the moves between the points and modify them. */
4537 for (scan = insn; scan != barrier; scan = NEXT_INSN (scan))
4539 if (GET_CODE (scan) == CODE_LABEL)
4540 last_float = 0;
4541 if (GET_CODE (scan) == INSN
4542 && recog_memoized (scan) == CODE_FOR_casesi_worker_2)
4543 need_aligned_label = 1;
4544 if (broken_move (scan))
4546 rtx *patp = &PATTERN (scan), pat = *patp;
4547 rtx src, dst;
4548 rtx lab;
4549 rtx newsrc;
4550 enum machine_mode mode;
4552 if (GET_CODE (pat) == PARALLEL)
4553 patp = &XVECEXP (pat, 0, 0), pat = *patp;
4554 src = SET_SRC (pat);
4555 dst = SET_DEST (pat);
4556 mode = GET_MODE (dst);
4558 if (mode == SImode && hi_const (src)
4559 && REGNO (dst) != FPUL_REG)
4561 int offset = 0;
4563 mode = HImode;
4564 while (GET_CODE (dst) == SUBREG)
4566 offset += subreg_regno_offset (REGNO (SUBREG_REG (dst)),
4567 GET_MODE (SUBREG_REG (dst)),
4568 SUBREG_BYTE (dst),
4569 GET_MODE (dst));
4570 dst = SUBREG_REG (dst);
4572 dst = gen_rtx_REG (HImode, REGNO (dst) + offset);
4574 if (GET_CODE (dst) == REG && FP_ANY_REGISTER_P (REGNO (dst)))
4576 /* This must be an insn that clobbers r0. */
4577 rtx *clobberp = &XVECEXP (PATTERN (scan), 0,
4578 XVECLEN (PATTERN (scan), 0)
4579 - 1);
4580 rtx clobber = *clobberp;
4582 gcc_assert (GET_CODE (clobber) == CLOBBER
4583 && rtx_equal_p (XEXP (clobber, 0), r0_rtx));
4585 if (last_float
4586 && reg_set_between_p (r0_rtx, last_float_move, scan))
4587 last_float = 0;
4588 if (last_float
4589 && TARGET_SHCOMPACT
4590 && GET_MODE_SIZE (mode) != 4
4591 && GET_MODE_SIZE (GET_MODE (last_float)) == 4)
4592 last_float = 0;
4593 lab = add_constant (src, mode, last_float);
4594 if (lab)
4595 emit_insn_before (gen_mova (lab), scan);
4596 else
4598 /* There will be a REG_UNUSED note for r0 on
4599 LAST_FLOAT_MOVE; we have to change it to REG_INC,
4600 lest reorg:mark_target_live_regs will not
4601 consider r0 to be used, and we end up with delay
4602 slot insn in front of SCAN that clobbers r0. */
4603 rtx note
4604 = find_regno_note (last_float_move, REG_UNUSED, 0);
4606 /* If we are not optimizing, then there may not be
4607 a note. */
4608 if (note)
4609 PUT_MODE (note, REG_INC);
4611 *last_float_addr = r0_inc_rtx;
4613 last_float_move = scan;
4614 last_float = src;
4615 newsrc = gen_rtx_MEM (mode,
4616 (((TARGET_SH4 && ! TARGET_FMOVD)
4617 || REGNO (dst) == FPUL_REG)
4618 ? r0_inc_rtx
4619 : r0_rtx));
4620 last_float_addr = &XEXP (newsrc, 0);
4622 /* Remove the clobber of r0. */
4623 *clobberp = gen_rtx_CLOBBER (GET_MODE (clobber),
4624 gen_rtx_SCRATCH (Pmode));
4626 /* This is a mova needing a label. Create it. */
4627 else if (GET_CODE (src) == UNSPEC
4628 && XINT (src, 1) == UNSPEC_MOVA
4629 && GET_CODE (XVECEXP (src, 0, 0)) == CONST)
4631 lab = add_constant (XVECEXP (src, 0, 0), mode, 0);
4632 newsrc = gen_rtx_LABEL_REF (VOIDmode, lab);
4633 newsrc = gen_rtx_UNSPEC (SImode,
4634 gen_rtvec (1, newsrc),
4635 UNSPEC_MOVA);
4637 else
4639 lab = add_constant (src, mode, 0);
4640 newsrc = gen_rtx_LABEL_REF (VOIDmode, lab);
4641 newsrc = gen_const_mem (mode, newsrc);
4643 *patp = gen_rtx_SET (VOIDmode, dst, newsrc);
4644 INSN_CODE (scan) = -1;
4647 dump_table (need_aligned_label ? insn : 0, barrier);
4648 insn = barrier;
4652 mdep_reorg_phase = SH_SHORTEN_BRANCHES1;
4653 INSN_ADDRESSES_FREE ();
4654 split_branches (first);
4656 /* The INSN_REFERENCES_ARE_DELAYED in sh.h is problematic because it
4657 also has an effect on the register that holds the address of the sfunc.
4658 Insert an extra dummy insn in front of each sfunc that pretends to
4659 use this register. */
4660 if (flag_delayed_branch)
4662 for (insn = first; insn; insn = NEXT_INSN (insn))
4664 rtx reg = sfunc_uses_reg (insn);
4666 if (! reg)
4667 continue;
4668 emit_insn_before (gen_use_sfunc_addr (reg), insn);
4671 #if 0
4672 /* fpscr is not actually a user variable, but we pretend it is for the
4673 sake of the previous optimization passes, since we want it handled like
4674 one. However, we don't have any debugging information for it, so turn
4675 it into a non-user variable now. */
4676 if (TARGET_SH4)
4677 REG_USERVAR_P (get_fpscr_rtx ()) = 0;
4678 #endif
4679 mdep_reorg_phase = SH_AFTER_MDEP_REORG;
4683 get_dest_uid (rtx label, int max_uid)
4685 rtx dest = next_real_insn (label);
4686 int dest_uid;
4687 if (! dest)
4688 /* This can happen for an undefined label. */
4689 return 0;
4690 dest_uid = INSN_UID (dest);
4691 /* If this is a newly created branch redirection blocking instruction,
4692 we cannot index the branch_uid or insn_addresses arrays with its
4693 uid. But then, we won't need to, because the actual destination is
4694 the following branch. */
4695 while (dest_uid >= max_uid)
4697 dest = NEXT_INSN (dest);
4698 dest_uid = INSN_UID (dest);
4700 if (GET_CODE (dest) == JUMP_INSN && GET_CODE (PATTERN (dest)) == RETURN)
4701 return 0;
4702 return dest_uid;
4705 /* Split condbranches that are out of range. Also add clobbers for
4706 scratch registers that are needed in far jumps.
4707 We do this before delay slot scheduling, so that it can take our
4708 newly created instructions into account. It also allows us to
4709 find branches with common targets more easily. */
4711 static void
4712 split_branches (rtx first)
4714 rtx insn;
4715 struct far_branch **uid_branch, *far_branch_list = 0;
4716 int max_uid = get_max_uid ();
4717 int ok;
4719 /* Find out which branches are out of range. */
4720 shorten_branches (first);
4722 uid_branch = (struct far_branch **) alloca (max_uid * sizeof *uid_branch);
4723 memset ((char *) uid_branch, 0, max_uid * sizeof *uid_branch);
4725 for (insn = first; insn; insn = NEXT_INSN (insn))
4726 if (! INSN_P (insn))
4727 continue;
4728 else if (INSN_DELETED_P (insn))
4730 /* Shorten_branches would split this instruction again,
4731 so transform it into a note. */
4732 PUT_CODE (insn, NOTE);
4733 NOTE_LINE_NUMBER (insn) = NOTE_INSN_DELETED;
4734 NOTE_SOURCE_FILE (insn) = 0;
4736 else if (GET_CODE (insn) == JUMP_INSN
4737 /* Don't mess with ADDR_DIFF_VEC */
4738 && (GET_CODE (PATTERN (insn)) == SET
4739 || GET_CODE (PATTERN (insn)) == RETURN))
4741 enum attr_type type = get_attr_type (insn);
4742 if (type == TYPE_CBRANCH)
4744 rtx next, beyond;
4746 if (get_attr_length (insn) > 4)
4748 rtx src = SET_SRC (PATTERN (insn));
4749 rtx olabel = XEXP (XEXP (src, 1), 0);
4750 int addr = INSN_ADDRESSES (INSN_UID (insn));
4751 rtx label = 0;
4752 int dest_uid = get_dest_uid (olabel, max_uid);
4753 struct far_branch *bp = uid_branch[dest_uid];
4755 /* redirect_jump needs a valid JUMP_LABEL, and it might delete
4756 the label if the LABEL_NUSES count drops to zero. There is
4757 always a jump_optimize pass that sets these values, but it
4758 proceeds to delete unreferenced code, and then if not
4759 optimizing, to un-delete the deleted instructions, thus
4760 leaving labels with too low uses counts. */
4761 if (! optimize)
4763 JUMP_LABEL (insn) = olabel;
4764 LABEL_NUSES (olabel)++;
4766 if (! bp)
4768 bp = (struct far_branch *) alloca (sizeof *bp);
4769 uid_branch[dest_uid] = bp;
4770 bp->prev = far_branch_list;
4771 far_branch_list = bp;
4772 bp->far_label
4773 = XEXP (XEXP (SET_SRC (PATTERN (insn)), 1), 0);
4774 LABEL_NUSES (bp->far_label)++;
4776 else
4778 label = bp->near_label;
4779 if (! label && bp->address - addr >= CONDJUMP_MIN)
4781 rtx block = bp->insert_place;
4783 if (GET_CODE (PATTERN (block)) == RETURN)
4784 block = PREV_INSN (block);
4785 else
4786 block = gen_block_redirect (block,
4787 bp->address, 2);
4788 label = emit_label_after (gen_label_rtx (),
4789 PREV_INSN (block));
4790 bp->near_label = label;
4792 else if (label && ! NEXT_INSN (label))
4794 if (addr + 2 - bp->address <= CONDJUMP_MAX)
4795 bp->insert_place = insn;
4796 else
4797 gen_far_branch (bp);
4800 if (! label
4801 || (NEXT_INSN (label) && bp->address - addr < CONDJUMP_MIN))
4803 bp->near_label = label = gen_label_rtx ();
4804 bp->insert_place = insn;
4805 bp->address = addr;
4807 ok = redirect_jump (insn, label, 1);
4808 gcc_assert (ok);
4810 else
4812 /* get_attr_length (insn) == 2 */
4813 /* Check if we have a pattern where reorg wants to redirect
4814 the branch to a label from an unconditional branch that
4815 is too far away. */
4816 /* We can't use JUMP_LABEL here because it might be undefined
4817 when not optimizing. */
4818 /* A syntax error might cause beyond to be NULL_RTX. */
4819 beyond
4820 = next_active_insn (XEXP (XEXP (SET_SRC (PATTERN (insn)), 1),
4821 0));
4823 if (beyond
4824 && (GET_CODE (beyond) == JUMP_INSN
4825 || ((beyond = next_active_insn (beyond))
4826 && GET_CODE (beyond) == JUMP_INSN))
4827 && GET_CODE (PATTERN (beyond)) == SET
4828 && recog_memoized (beyond) == CODE_FOR_jump_compact
4829 && ((INSN_ADDRESSES
4830 (INSN_UID (XEXP (SET_SRC (PATTERN (beyond)), 0)))
4831 - INSN_ADDRESSES (INSN_UID (insn)) + (unsigned) 252)
4832 > 252 + 258 + 2))
4833 gen_block_redirect (beyond,
4834 INSN_ADDRESSES (INSN_UID (beyond)), 1);
4837 next = next_active_insn (insn);
4839 if ((GET_CODE (next) == JUMP_INSN
4840 || ((next = next_active_insn (next))
4841 && GET_CODE (next) == JUMP_INSN))
4842 && GET_CODE (PATTERN (next)) == SET
4843 && recog_memoized (next) == CODE_FOR_jump_compact
4844 && ((INSN_ADDRESSES
4845 (INSN_UID (XEXP (SET_SRC (PATTERN (next)), 0)))
4846 - INSN_ADDRESSES (INSN_UID (insn)) + (unsigned) 252)
4847 > 252 + 258 + 2))
4848 gen_block_redirect (next, INSN_ADDRESSES (INSN_UID (next)), 1);
4850 else if (type == TYPE_JUMP || type == TYPE_RETURN)
4852 int addr = INSN_ADDRESSES (INSN_UID (insn));
4853 rtx far_label = 0;
4854 int dest_uid = 0;
4855 struct far_branch *bp;
4857 if (type == TYPE_JUMP)
4859 far_label = XEXP (SET_SRC (PATTERN (insn)), 0);
4860 dest_uid = get_dest_uid (far_label, max_uid);
4861 if (! dest_uid)
4863 /* Parse errors can lead to labels outside
4864 the insn stream. */
4865 if (! NEXT_INSN (far_label))
4866 continue;
4868 if (! optimize)
4870 JUMP_LABEL (insn) = far_label;
4871 LABEL_NUSES (far_label)++;
4873 redirect_jump (insn, NULL_RTX, 1);
4874 far_label = 0;
4877 bp = uid_branch[dest_uid];
4878 if (! bp)
4880 bp = (struct far_branch *) alloca (sizeof *bp);
4881 uid_branch[dest_uid] = bp;
4882 bp->prev = far_branch_list;
4883 far_branch_list = bp;
4884 bp->near_label = 0;
4885 bp->far_label = far_label;
4886 if (far_label)
4887 LABEL_NUSES (far_label)++;
4889 else if (bp->near_label && ! NEXT_INSN (bp->near_label))
4890 if (addr - bp->address <= CONDJUMP_MAX)
4891 emit_label_after (bp->near_label, PREV_INSN (insn));
4892 else
4894 gen_far_branch (bp);
4895 bp->near_label = 0;
4897 else
4898 bp->near_label = 0;
4899 bp->address = addr;
4900 bp->insert_place = insn;
4901 if (! far_label)
4902 emit_insn_before (gen_block_branch_redirect (const0_rtx), insn);
4903 else
4904 gen_block_redirect (insn, addr, bp->near_label ? 2 : 0);
4907 /* Generate all pending far branches,
4908 and free our references to the far labels. */
4909 while (far_branch_list)
4911 if (far_branch_list->near_label
4912 && ! NEXT_INSN (far_branch_list->near_label))
4913 gen_far_branch (far_branch_list);
4914 if (optimize
4915 && far_branch_list->far_label
4916 && ! --LABEL_NUSES (far_branch_list->far_label))
4917 delete_insn (far_branch_list->far_label);
4918 far_branch_list = far_branch_list->prev;
4921 /* Instruction length information is no longer valid due to the new
4922 instructions that have been generated. */
4923 init_insn_lengths ();
4926 /* Dump out instruction addresses, which is useful for debugging the
4927 constant pool table stuff.
4929 If relaxing, output the label and pseudo-ops used to link together
4930 calls and the instruction which set the registers. */
4932 /* ??? The addresses printed by this routine for insns are nonsense for
4933 insns which are inside of a sequence where none of the inner insns have
4934 variable length. This is because the second pass of shorten_branches
4935 does not bother to update them. */
4937 void
4938 final_prescan_insn (rtx insn, rtx *opvec ATTRIBUTE_UNUSED,
4939 int noperands ATTRIBUTE_UNUSED)
4941 if (TARGET_DUMPISIZE)
4942 fprintf (asm_out_file, "\n! at %04x\n", INSN_ADDRESSES (INSN_UID (insn)));
4944 if (TARGET_RELAX)
4946 rtx note;
4948 note = find_reg_note (insn, REG_LABEL, NULL_RTX);
4949 if (note)
4951 rtx pattern;
4953 pattern = PATTERN (insn);
4954 if (GET_CODE (pattern) == PARALLEL)
4955 pattern = XVECEXP (pattern, 0, 0);
4956 switch (GET_CODE (pattern))
4958 case SET:
4959 if (GET_CODE (SET_SRC (pattern)) != CALL
4960 && get_attr_type (insn) != TYPE_SFUNC)
4962 targetm.asm_out.internal_label
4963 (asm_out_file, "L", CODE_LABEL_NUMBER (XEXP (note, 0)));
4964 break;
4966 /* else FALLTHROUGH */
4967 case CALL:
4968 asm_fprintf (asm_out_file, "\t.uses %LL%d\n",
4969 CODE_LABEL_NUMBER (XEXP (note, 0)));
4970 break;
4972 default:
4973 gcc_unreachable ();
4979 /* Dump out any constants accumulated in the final pass. These will
4980 only be labels. */
4982 const char *
4983 output_jump_label_table (void)
4985 int i;
4987 if (pool_size)
4989 fprintf (asm_out_file, "\t.align 2\n");
4990 for (i = 0; i < pool_size; i++)
4992 pool_node *p = &pool_vector[i];
4994 (*targetm.asm_out.internal_label) (asm_out_file, "L",
4995 CODE_LABEL_NUMBER (p->label));
4996 output_asm_insn (".long %O0", &p->value);
4998 pool_size = 0;
5001 return "";
5004 /* A full frame looks like:
5006 arg-5
5007 arg-4
5008 [ if current_function_anonymous_args
5009 arg-3
5010 arg-2
5011 arg-1
5012 arg-0 ]
5013 saved-fp
5014 saved-r10
5015 saved-r11
5016 saved-r12
5017 saved-pr
5018 local-n
5020 local-1
5021 local-0 <- fp points here. */
5023 /* Number of bytes pushed for anonymous args, used to pass information
5024 between expand_prologue and expand_epilogue. */
5026 /* Adjust the stack by SIZE bytes. REG holds the rtl of the register to be
5027 adjusted. If epilogue_p is zero, this is for a prologue; otherwise, it's
5028 for an epilogue and a negative value means that it's for a sibcall
5029 epilogue. If LIVE_REGS_MASK is nonzero, it points to a HARD_REG_SET of
5030 all the registers that are about to be restored, and hence dead. */
5032 static void
5033 output_stack_adjust (int size, rtx reg, int epilogue_p,
5034 HARD_REG_SET *live_regs_mask)
5036 rtx (*emit_fn) (rtx) = epilogue_p ? &emit_insn : &frame_insn;
5037 if (size)
5039 HOST_WIDE_INT align = STACK_BOUNDARY / BITS_PER_UNIT;
5041 /* This test is bogus, as output_stack_adjust is used to re-align the
5042 stack. */
5043 #if 0
5044 gcc_assert (!(size % align));
5045 #endif
5047 if (CONST_OK_FOR_ADD (size))
5048 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size)));
5049 /* Try to do it with two partial adjustments; however, we must make
5050 sure that the stack is properly aligned at all times, in case
5051 an interrupt occurs between the two partial adjustments. */
5052 else if (CONST_OK_FOR_ADD (size / 2 & -align)
5053 && CONST_OK_FOR_ADD (size - (size / 2 & -align)))
5055 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size / 2 & -align)));
5056 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size - (size / 2 & -align))));
5058 else
5060 rtx const_reg;
5061 rtx insn;
5062 int temp = epilogue_p ? 7 : (TARGET_SH5 ? 0 : 1);
5063 int i;
5065 /* If TEMP is invalid, we could temporarily save a general
5066 register to MACL. However, there is currently no need
5067 to handle this case, so just die when we see it. */
5068 if (epilogue_p < 0
5069 || current_function_interrupt
5070 || ! call_really_used_regs[temp] || fixed_regs[temp])
5071 temp = -1;
5072 if (temp < 0 && ! current_function_interrupt
5073 && (TARGET_SHMEDIA || epilogue_p >= 0))
5075 HARD_REG_SET temps;
5076 COPY_HARD_REG_SET (temps, call_used_reg_set);
5077 AND_COMPL_HARD_REG_SET (temps, call_fixed_reg_set);
5078 if (epilogue_p > 0)
5080 int nreg = 0;
5081 if (current_function_return_rtx)
5083 enum machine_mode mode;
5084 mode = GET_MODE (current_function_return_rtx);
5085 if (BASE_RETURN_VALUE_REG (mode) == FIRST_RET_REG)
5086 nreg = HARD_REGNO_NREGS (FIRST_RET_REG, mode);
5088 for (i = 0; i < nreg; i++)
5089 CLEAR_HARD_REG_BIT (temps, FIRST_RET_REG + i);
5090 if (current_function_calls_eh_return)
5092 CLEAR_HARD_REG_BIT (temps, EH_RETURN_STACKADJ_REGNO);
5093 for (i = 0; i <= 3; i++)
5094 CLEAR_HARD_REG_BIT (temps, EH_RETURN_DATA_REGNO (i));
5097 if (TARGET_SHMEDIA && epilogue_p < 0)
5098 for (i = FIRST_TARGET_REG; i <= LAST_TARGET_REG; i++)
5099 CLEAR_HARD_REG_BIT (temps, i);
5100 if (epilogue_p <= 0)
5102 for (i = FIRST_PARM_REG;
5103 i < FIRST_PARM_REG + NPARM_REGS (SImode); i++)
5104 CLEAR_HARD_REG_BIT (temps, i);
5105 if (cfun->static_chain_decl != NULL)
5106 CLEAR_HARD_REG_BIT (temps, STATIC_CHAIN_REGNUM);
5108 temp = scavenge_reg (&temps);
5110 if (temp < 0 && live_regs_mask)
5111 temp = scavenge_reg (live_regs_mask);
5112 if (temp < 0)
5114 rtx adj_reg, tmp_reg, mem;
5116 /* If we reached here, the most likely case is the (sibcall)
5117 epilogue for non SHmedia. Put a special push/pop sequence
5118 for such case as the last resort. This looks lengthy but
5119 would not be problem because it seems to be very
5120 rare. */
5122 gcc_assert (!TARGET_SHMEDIA && epilogue_p);
5125 /* ??? There is still the slight possibility that r4 or
5126 r5 have been reserved as fixed registers or assigned
5127 as global registers, and they change during an
5128 interrupt. There are possible ways to handle this:
5130 - If we are adjusting the frame pointer (r14), we can do
5131 with a single temp register and an ordinary push / pop
5132 on the stack.
5133 - Grab any call-used or call-saved registers (i.e. not
5134 fixed or globals) for the temps we need. We might
5135 also grab r14 if we are adjusting the stack pointer.
5136 If we can't find enough available registers, issue
5137 a diagnostic and die - the user must have reserved
5138 way too many registers.
5139 But since all this is rather unlikely to happen and
5140 would require extra testing, we just die if r4 / r5
5141 are not available. */
5142 gcc_assert (!fixed_regs[4] && !fixed_regs[5]
5143 && !global_regs[4] && !global_regs[5]);
5145 adj_reg = gen_rtx_REG (GET_MODE (reg), 4);
5146 tmp_reg = gen_rtx_REG (GET_MODE (reg), 5);
5147 emit_move_insn (gen_rtx_MEM (Pmode, reg), adj_reg);
5148 emit_insn (GEN_MOV (adj_reg, GEN_INT (size)));
5149 emit_insn (GEN_ADD3 (adj_reg, adj_reg, reg));
5150 mem = gen_rtx_MEM (Pmode, gen_rtx_PRE_DEC (Pmode, adj_reg));
5151 emit_move_insn (mem, tmp_reg);
5152 emit_move_insn (tmp_reg, gen_rtx_MEM (Pmode, reg));
5153 mem = gen_rtx_MEM (Pmode, gen_rtx_PRE_DEC (Pmode, adj_reg));
5154 emit_move_insn (mem, tmp_reg);
5155 emit_move_insn (reg, adj_reg);
5156 mem = gen_rtx_MEM (Pmode, gen_rtx_POST_INC (Pmode, reg));
5157 emit_move_insn (adj_reg, mem);
5158 mem = gen_rtx_MEM (Pmode, gen_rtx_POST_INC (Pmode, reg));
5159 emit_move_insn (tmp_reg, mem);
5160 return;
5162 const_reg = gen_rtx_REG (GET_MODE (reg), temp);
5164 /* If SIZE is negative, subtract the positive value.
5165 This sometimes allows a constant pool entry to be shared
5166 between prologue and epilogue code. */
5167 if (size < 0)
5169 emit_insn (GEN_MOV (const_reg, GEN_INT (-size)));
5170 insn = emit_fn (GEN_SUB3 (reg, reg, const_reg));
5172 else
5174 emit_insn (GEN_MOV (const_reg, GEN_INT (size)));
5175 insn = emit_fn (GEN_ADD3 (reg, reg, const_reg));
5177 if (! epilogue_p)
5178 REG_NOTES (insn)
5179 = (gen_rtx_EXPR_LIST
5180 (REG_FRAME_RELATED_EXPR,
5181 gen_rtx_SET (VOIDmode, reg,
5182 gen_rtx_PLUS (SImode, reg, GEN_INT (size))),
5183 REG_NOTES (insn)));
5188 static rtx
5189 frame_insn (rtx x)
5191 x = emit_insn (x);
5192 RTX_FRAME_RELATED_P (x) = 1;
5193 return x;
5196 /* Output RTL to push register RN onto the stack. */
5198 static rtx
5199 push (int rn)
5201 rtx x;
5202 if (rn == FPUL_REG)
5203 x = gen_push_fpul ();
5204 else if (rn == FPSCR_REG)
5205 x = gen_push_fpscr ();
5206 else if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && ! TARGET_FPU_SINGLE
5207 && FP_OR_XD_REGISTER_P (rn))
5209 if (FP_REGISTER_P (rn) && (rn - FIRST_FP_REG) & 1)
5210 return NULL_RTX;
5211 x = gen_push_4 (gen_rtx_REG (DFmode, rn));
5213 else if (TARGET_SH2E && FP_REGISTER_P (rn))
5214 x = gen_push_e (gen_rtx_REG (SFmode, rn));
5215 else
5216 x = gen_push (gen_rtx_REG (SImode, rn));
5218 x = frame_insn (x);
5219 REG_NOTES (x)
5220 = gen_rtx_EXPR_LIST (REG_INC,
5221 gen_rtx_REG (SImode, STACK_POINTER_REGNUM), 0);
5222 return x;
5225 /* Output RTL to pop register RN from the stack. */
5227 static void
5228 pop (int rn)
5230 rtx x;
5231 if (rn == FPUL_REG)
5232 x = gen_pop_fpul ();
5233 else if (rn == FPSCR_REG)
5234 x = gen_pop_fpscr ();
5235 else if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && ! TARGET_FPU_SINGLE
5236 && FP_OR_XD_REGISTER_P (rn))
5238 if (FP_REGISTER_P (rn) && (rn - FIRST_FP_REG) & 1)
5239 return;
5240 x = gen_pop_4 (gen_rtx_REG (DFmode, rn));
5242 else if (TARGET_SH2E && FP_REGISTER_P (rn))
5243 x = gen_pop_e (gen_rtx_REG (SFmode, rn));
5244 else
5245 x = gen_pop (gen_rtx_REG (SImode, rn));
5247 x = emit_insn (x);
5248 REG_NOTES (x)
5249 = gen_rtx_EXPR_LIST (REG_INC,
5250 gen_rtx_REG (SImode, STACK_POINTER_REGNUM), 0);
5253 /* Generate code to push the regs specified in the mask. */
5255 static void
5256 push_regs (HARD_REG_SET *mask, int interrupt_handler)
5258 int i;
5259 int skip_fpscr = 0;
5261 /* Push PR last; this gives better latencies after the prologue, and
5262 candidates for the return delay slot when there are no general
5263 registers pushed. */
5264 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
5266 /* If this is an interrupt handler, and the SZ bit varies,
5267 and we have to push any floating point register, we need
5268 to switch to the correct precision first. */
5269 if (i == FIRST_FP_REG && interrupt_handler && TARGET_FMOVD
5270 && hard_regs_intersect_p (mask, &reg_class_contents[DF_REGS]))
5272 HARD_REG_SET unsaved;
5274 push (FPSCR_REG);
5275 COMPL_HARD_REG_SET (unsaved, *mask);
5276 fpscr_set_from_mem (NORMAL_MODE (FP_MODE), unsaved);
5277 skip_fpscr = 1;
5279 if (i != PR_REG
5280 && (i != FPSCR_REG || ! skip_fpscr)
5281 && TEST_HARD_REG_BIT (*mask, i))
5282 push (i);
5284 if (TEST_HARD_REG_BIT (*mask, PR_REG))
5285 push (PR_REG);
5288 /* Calculate how much extra space is needed to save all callee-saved
5289 target registers.
5290 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
5292 static int
5293 shmedia_target_regs_stack_space (HARD_REG_SET *live_regs_mask)
5295 int reg;
5296 int stack_space = 0;
5297 int interrupt_handler = sh_cfun_interrupt_handler_p ();
5299 for (reg = LAST_TARGET_REG; reg >= FIRST_TARGET_REG; reg--)
5300 if ((! call_really_used_regs[reg] || interrupt_handler)
5301 && ! TEST_HARD_REG_BIT (*live_regs_mask, reg))
5302 /* Leave space to save this target register on the stack,
5303 in case target register allocation wants to use it. */
5304 stack_space += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
5305 return stack_space;
5308 /* Decide whether we should reserve space for callee-save target registers,
5309 in case target register allocation wants to use them. REGS_SAVED is
5310 the space, in bytes, that is already required for register saves.
5311 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
5313 static int
5314 shmedia_reserve_space_for_target_registers_p (int regs_saved,
5315 HARD_REG_SET *live_regs_mask)
5317 if (optimize_size)
5318 return 0;
5319 return shmedia_target_regs_stack_space (live_regs_mask) <= regs_saved;
5322 /* Decide how much space to reserve for callee-save target registers
5323 in case target register allocation wants to use them.
5324 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
5326 static int
5327 shmedia_target_regs_stack_adjust (HARD_REG_SET *live_regs_mask)
5329 if (shmedia_space_reserved_for_target_registers)
5330 return shmedia_target_regs_stack_space (live_regs_mask);
5331 else
5332 return 0;
5335 /* Work out the registers which need to be saved, both as a mask and a
5336 count of saved words. Return the count.
5338 If doing a pragma interrupt function, then push all regs used by the
5339 function, and if we call another function (we can tell by looking at PR),
5340 make sure that all the regs it clobbers are safe too. */
5342 static int
5343 calc_live_regs (HARD_REG_SET *live_regs_mask)
5345 unsigned int reg;
5346 int count;
5347 int interrupt_handler;
5348 int pr_live, has_call;
5350 interrupt_handler = sh_cfun_interrupt_handler_p ();
5352 CLEAR_HARD_REG_SET (*live_regs_mask);
5353 if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && interrupt_handler
5354 && regs_ever_live[FPSCR_REG])
5355 target_flags &= ~MASK_FPU_SINGLE;
5356 /* If we can save a lot of saves by switching to double mode, do that. */
5357 else if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && TARGET_FPU_SINGLE)
5358 for (count = 0, reg = FIRST_FP_REG; reg <= LAST_FP_REG; reg += 2)
5359 if (regs_ever_live[reg] && regs_ever_live[reg+1]
5360 && (! call_really_used_regs[reg]
5361 || (interrupt_handler && ! pragma_trapa))
5362 && ++count > 2)
5364 target_flags &= ~MASK_FPU_SINGLE;
5365 break;
5367 /* PR_MEDIA_REG is a general purpose register, thus global_alloc already
5368 knows how to use it. That means the pseudo originally allocated for
5369 the initial value can become the PR_MEDIA_REG hard register, as seen for
5370 execute/20010122-1.c:test9. */
5371 if (TARGET_SHMEDIA)
5372 /* ??? this function is called from initial_elimination_offset, hence we
5373 can't use the result of sh_media_register_for_return here. */
5374 pr_live = sh_pr_n_sets ();
5375 else
5377 rtx pr_initial = has_hard_reg_initial_val (Pmode, PR_REG);
5378 pr_live = (pr_initial
5379 ? (GET_CODE (pr_initial) != REG
5380 || REGNO (pr_initial) != (PR_REG))
5381 : regs_ever_live[PR_REG]);
5382 /* For Shcompact, if not optimizing, we end up with a memory reference
5383 using the return address pointer for __builtin_return_address even
5384 though there is no actual need to put the PR register on the stack. */
5385 pr_live |= regs_ever_live[RETURN_ADDRESS_POINTER_REGNUM];
5387 /* Force PR to be live if the prologue has to call the SHmedia
5388 argument decoder or register saver. */
5389 if (TARGET_SHCOMPACT
5390 && ((current_function_args_info.call_cookie
5391 & ~ CALL_COOKIE_RET_TRAMP (1))
5392 || current_function_has_nonlocal_label))
5393 pr_live = 1;
5394 has_call = TARGET_SHMEDIA ? ! leaf_function_p () : pr_live;
5395 for (count = 0, reg = FIRST_PSEUDO_REGISTER; reg-- != 0; )
5397 if (reg == (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG)
5398 ? pr_live
5399 : (interrupt_handler && ! pragma_trapa)
5400 ? (/* Need to save all the regs ever live. */
5401 (regs_ever_live[reg]
5402 || (call_really_used_regs[reg]
5403 && (! fixed_regs[reg] || reg == MACH_REG || reg == MACL_REG
5404 || reg == PIC_OFFSET_TABLE_REGNUM)
5405 && has_call)
5406 || (has_call && REGISTER_NATURAL_MODE (reg) == SImode
5407 && (GENERAL_REGISTER_P (reg) || TARGET_REGISTER_P (reg))))
5408 && reg != STACK_POINTER_REGNUM && reg != ARG_POINTER_REGNUM
5409 && reg != RETURN_ADDRESS_POINTER_REGNUM
5410 && reg != T_REG && reg != GBR_REG
5411 /* Push fpscr only on targets which have FPU */
5412 && (reg != FPSCR_REG || TARGET_FPU_ANY))
5413 : (/* Only push those regs which are used and need to be saved. */
5414 (TARGET_SHCOMPACT
5415 && flag_pic
5416 && current_function_args_info.call_cookie
5417 && reg == PIC_OFFSET_TABLE_REGNUM)
5418 || (regs_ever_live[reg] && ! call_really_used_regs[reg])
5419 || (current_function_calls_eh_return
5420 && (reg == EH_RETURN_DATA_REGNO (0)
5421 || reg == EH_RETURN_DATA_REGNO (1)
5422 || reg == EH_RETURN_DATA_REGNO (2)
5423 || reg == EH_RETURN_DATA_REGNO (3)))
5424 || ((reg == MACL_REG || reg == MACH_REG)
5425 && regs_ever_live[reg]
5426 && sh_cfun_attr_renesas_p ())
5429 SET_HARD_REG_BIT (*live_regs_mask, reg);
5430 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
5432 if ((TARGET_SH4 || TARGET_SH2A_DOUBLE || TARGET_SH5) && TARGET_FMOVD
5433 && GET_MODE_CLASS (REGISTER_NATURAL_MODE (reg)) == MODE_FLOAT)
5435 if (FP_REGISTER_P (reg))
5437 if (! TARGET_FPU_SINGLE && ! regs_ever_live[reg ^ 1])
5439 SET_HARD_REG_BIT (*live_regs_mask, (reg ^ 1));
5440 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg ^ 1));
5443 else if (XD_REGISTER_P (reg))
5445 /* Must switch to double mode to access these registers. */
5446 target_flags &= ~MASK_FPU_SINGLE;
5451 /* If we have a target register optimization pass after prologue / epilogue
5452 threading, we need to assume all target registers will be live even if
5453 they aren't now. */
5454 if (flag_branch_target_load_optimize2
5455 && TARGET_SAVE_ALL_TARGET_REGS
5456 && shmedia_space_reserved_for_target_registers)
5457 for (reg = LAST_TARGET_REG; reg >= FIRST_TARGET_REG; reg--)
5458 if ((! call_really_used_regs[reg] || interrupt_handler)
5459 && ! TEST_HARD_REG_BIT (*live_regs_mask, reg))
5461 SET_HARD_REG_BIT (*live_regs_mask, reg);
5462 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
5464 /* If this is an interrupt handler, we don't have any call-clobbered
5465 registers we can conveniently use for target register save/restore.
5466 Make sure we save at least one general purpose register when we need
5467 to save target registers. */
5468 if (interrupt_handler
5469 && hard_regs_intersect_p (live_regs_mask,
5470 &reg_class_contents[TARGET_REGS])
5471 && ! hard_regs_intersect_p (live_regs_mask,
5472 &reg_class_contents[GENERAL_REGS]))
5474 SET_HARD_REG_BIT (*live_regs_mask, R0_REG);
5475 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (R0_REG));
5478 return count;
5481 /* Code to generate prologue and epilogue sequences */
5483 /* PUSHED is the number of bytes that are being pushed on the
5484 stack for register saves. Return the frame size, padded
5485 appropriately so that the stack stays properly aligned. */
5486 static HOST_WIDE_INT
5487 rounded_frame_size (int pushed)
5489 HOST_WIDE_INT size = get_frame_size ();
5490 HOST_WIDE_INT align = STACK_BOUNDARY / BITS_PER_UNIT;
5492 return ((size + pushed + align - 1) & -align) - pushed;
5495 /* Choose a call-clobbered target-branch register that remains
5496 unchanged along the whole function. We set it up as the return
5497 value in the prologue. */
5499 sh_media_register_for_return (void)
5501 int regno;
5502 int tr0_used;
5504 if (! current_function_is_leaf)
5505 return -1;
5506 if (lookup_attribute ("interrupt_handler",
5507 DECL_ATTRIBUTES (current_function_decl)))
5508 return -1;
5509 if (sh_cfun_interrupt_handler_p ())
5510 return -1;
5512 tr0_used = flag_pic && regs_ever_live[PIC_OFFSET_TABLE_REGNUM];
5514 for (regno = FIRST_TARGET_REG + tr0_used; regno <= LAST_TARGET_REG; regno++)
5515 if (call_really_used_regs[regno] && ! regs_ever_live[regno])
5516 return regno;
5518 return -1;
5521 /* The maximum registers we need to save are:
5522 - 62 general purpose registers (r15 is stack pointer, r63 is zero)
5523 - 32 floating point registers (for each pair, we save none,
5524 one single precision value, or a double precision value).
5525 - 8 target registers
5526 - add 1 entry for a delimiter. */
5527 #define MAX_SAVED_REGS (62+32+8)
5529 typedef struct save_entry_s
5531 unsigned char reg;
5532 unsigned char mode;
5533 short offset;
5534 } save_entry;
5536 #define MAX_TEMPS 4
5538 /* There will be a delimiter entry with VOIDmode both at the start and the
5539 end of a filled in schedule. The end delimiter has the offset of the
5540 save with the smallest (i.e. most negative) offset. */
5541 typedef struct save_schedule_s
5543 save_entry entries[MAX_SAVED_REGS + 2];
5544 int temps[MAX_TEMPS+1];
5545 } save_schedule;
5547 /* Fill in SCHEDULE according to LIVE_REGS_MASK. If RESTORE is nonzero,
5548 use reverse order. Returns the last entry written to (not counting
5549 the delimiter). OFFSET_BASE is a number to be added to all offset
5550 entries. */
5552 static save_entry *
5553 sh5_schedule_saves (HARD_REG_SET *live_regs_mask, save_schedule *schedule,
5554 int offset_base)
5556 int align, i;
5557 save_entry *entry = schedule->entries;
5558 int tmpx = 0;
5559 int offset;
5561 if (! current_function_interrupt)
5562 for (i = FIRST_GENERAL_REG; tmpx < MAX_TEMPS && i <= LAST_GENERAL_REG; i++)
5563 if (call_really_used_regs[i] && ! fixed_regs[i] && i != PR_MEDIA_REG
5564 && ! FUNCTION_ARG_REGNO_P (i)
5565 && i != FIRST_RET_REG
5566 && ! (cfun->static_chain_decl != NULL && i == STATIC_CHAIN_REGNUM)
5567 && ! (current_function_calls_eh_return
5568 && (i == EH_RETURN_STACKADJ_REGNO
5569 || ((unsigned) i >= EH_RETURN_DATA_REGNO (0)
5570 && (unsigned) i <= EH_RETURN_DATA_REGNO (3)))))
5571 schedule->temps[tmpx++] = i;
5572 entry->reg = -1;
5573 entry->mode = VOIDmode;
5574 entry->offset = offset_base;
5575 entry++;
5576 /* We loop twice: first, we save 8-byte aligned registers in the
5577 higher addresses, that are known to be aligned. Then, we
5578 proceed to saving 32-bit registers that don't need 8-byte
5579 alignment.
5580 If this is an interrupt function, all registers that need saving
5581 need to be saved in full. moreover, we need to postpone saving
5582 target registers till we have saved some general purpose registers
5583 we can then use as scratch registers. */
5584 offset = offset_base;
5585 for (align = 1; align >= 0; align--)
5587 for (i = FIRST_PSEUDO_REGISTER - 1; i >= 0; i--)
5588 if (TEST_HARD_REG_BIT (*live_regs_mask, i))
5590 enum machine_mode mode = REGISTER_NATURAL_MODE (i);
5591 int reg = i;
5593 if (current_function_interrupt)
5595 if (TARGET_REGISTER_P (i))
5596 continue;
5597 if (GENERAL_REGISTER_P (i))
5598 mode = DImode;
5600 if (mode == SFmode && (i % 2) == 1
5601 && ! TARGET_FPU_SINGLE && FP_REGISTER_P (i)
5602 && (TEST_HARD_REG_BIT (*live_regs_mask, (i ^ 1))))
5604 mode = DFmode;
5605 i--;
5606 reg--;
5609 /* If we're doing the aligned pass and this is not aligned,
5610 or we're doing the unaligned pass and this is aligned,
5611 skip it. */
5612 if ((GET_MODE_SIZE (mode) % (STACK_BOUNDARY / BITS_PER_UNIT) == 0)
5613 != align)
5614 continue;
5616 if (current_function_interrupt
5617 && GENERAL_REGISTER_P (i)
5618 && tmpx < MAX_TEMPS)
5619 schedule->temps[tmpx++] = i;
5621 offset -= GET_MODE_SIZE (mode);
5622 entry->reg = i;
5623 entry->mode = mode;
5624 entry->offset = offset;
5625 entry++;
5627 if (align && current_function_interrupt)
5628 for (i = LAST_TARGET_REG; i >= FIRST_TARGET_REG; i--)
5629 if (TEST_HARD_REG_BIT (*live_regs_mask, i))
5631 offset -= GET_MODE_SIZE (DImode);
5632 entry->reg = i;
5633 entry->mode = DImode;
5634 entry->offset = offset;
5635 entry++;
5638 entry->reg = -1;
5639 entry->mode = VOIDmode;
5640 entry->offset = offset;
5641 schedule->temps[tmpx] = -1;
5642 return entry - 1;
5645 void
5646 sh_expand_prologue (void)
5648 HARD_REG_SET live_regs_mask;
5649 int d, i;
5650 int d_rounding = 0;
5651 int save_flags = target_flags;
5652 int pretend_args;
5654 current_function_interrupt = sh_cfun_interrupt_handler_p ();
5656 /* We have pretend args if we had an object sent partially in registers
5657 and partially on the stack, e.g. a large structure. */
5658 pretend_args = current_function_pretend_args_size;
5659 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl)
5660 && (NPARM_REGS(SImode)
5661 > current_function_args_info.arg_count[(int) SH_ARG_INT]))
5662 pretend_args = 0;
5663 output_stack_adjust (-pretend_args
5664 - current_function_args_info.stack_regs * 8,
5665 stack_pointer_rtx, 0, NULL);
5667 if (TARGET_SHCOMPACT && flag_pic && current_function_args_info.call_cookie)
5668 /* We're going to use the PIC register to load the address of the
5669 incoming-argument decoder and/or of the return trampoline from
5670 the GOT, so make sure the PIC register is preserved and
5671 initialized. */
5672 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
5674 if (TARGET_SHCOMPACT
5675 && (current_function_args_info.call_cookie & ~ CALL_COOKIE_RET_TRAMP(1)))
5677 int reg;
5679 /* First, make all registers with incoming arguments that will
5680 be pushed onto the stack live, so that register renaming
5681 doesn't overwrite them. */
5682 for (reg = 0; reg < NPARM_REGS (SImode); reg++)
5683 if (CALL_COOKIE_STACKSEQ_GET (current_function_args_info.call_cookie)
5684 >= NPARM_REGS (SImode) - reg)
5685 for (; reg < NPARM_REGS (SImode); reg++)
5686 emit_insn (gen_shcompact_preserve_incoming_args
5687 (gen_rtx_REG (SImode, FIRST_PARM_REG + reg)));
5688 else if (CALL_COOKIE_INT_REG_GET
5689 (current_function_args_info.call_cookie, reg) == 1)
5690 emit_insn (gen_shcompact_preserve_incoming_args
5691 (gen_rtx_REG (SImode, FIRST_PARM_REG + reg)));
5693 emit_move_insn (gen_rtx_REG (Pmode, MACL_REG),
5694 stack_pointer_rtx);
5695 emit_move_insn (gen_rtx_REG (SImode, R0_REG),
5696 GEN_INT (current_function_args_info.call_cookie));
5697 emit_move_insn (gen_rtx_REG (SImode, MACH_REG),
5698 gen_rtx_REG (SImode, R0_REG));
5700 else if (TARGET_SHMEDIA)
5702 int tr = sh_media_register_for_return ();
5704 if (tr >= 0)
5706 rtx insn = emit_move_insn (gen_rtx_REG (DImode, tr),
5707 gen_rtx_REG (DImode, PR_MEDIA_REG));
5709 /* ??? We should suppress saving pr when we don't need it, but this
5710 is tricky because of builtin_return_address. */
5712 /* If this function only exits with sibcalls, this copy
5713 will be flagged as dead. */
5714 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD,
5715 const0_rtx,
5716 REG_NOTES (insn));
5720 /* Emit the code for SETUP_VARARGS. */
5721 if (current_function_stdarg)
5723 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl))
5725 /* Push arg regs as if they'd been provided by caller in stack. */
5726 for (i = 0; i < NPARM_REGS(SImode); i++)
5728 int rn = NPARM_REGS(SImode) + FIRST_PARM_REG - i - 1;
5729 rtx insn;
5731 if (i >= (NPARM_REGS(SImode)
5732 - current_function_args_info.arg_count[(int) SH_ARG_INT]
5734 break;
5735 insn = push (rn);
5736 RTX_FRAME_RELATED_P (insn) = 0;
5741 /* If we're supposed to switch stacks at function entry, do so now. */
5742 if (sp_switch)
5743 emit_insn (gen_sp_switch_1 ());
5745 d = calc_live_regs (&live_regs_mask);
5746 /* ??? Maybe we could save some switching if we can move a mode switch
5747 that already happens to be at the function start into the prologue. */
5748 if (target_flags != save_flags && ! current_function_interrupt)
5749 emit_insn (gen_toggle_sz ());
5751 if (TARGET_SH5)
5753 int offset_base, offset;
5754 rtx r0 = NULL_RTX;
5755 int offset_in_r0 = -1;
5756 int sp_in_r0 = 0;
5757 int tregs_space = shmedia_target_regs_stack_adjust (&live_regs_mask);
5758 int total_size, save_size;
5759 save_schedule schedule;
5760 save_entry *entry;
5761 int *tmp_pnt;
5763 if (call_really_used_regs[R0_REG] && ! fixed_regs[R0_REG]
5764 && ! current_function_interrupt)
5765 r0 = gen_rtx_REG (Pmode, R0_REG);
5767 /* D is the actual number of bytes that we need for saving registers,
5768 however, in initial_elimination_offset we have committed to using
5769 an additional TREGS_SPACE amount of bytes - in order to keep both
5770 addresses to arguments supplied by the caller and local variables
5771 valid, we must keep this gap. Place it between the incoming
5772 arguments and the actually saved registers in a bid to optimize
5773 locality of reference. */
5774 total_size = d + tregs_space;
5775 total_size += rounded_frame_size (total_size);
5776 save_size = total_size - rounded_frame_size (d);
5777 if (save_size % (STACK_BOUNDARY / BITS_PER_UNIT))
5778 d_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
5779 - save_size % (STACK_BOUNDARY / BITS_PER_UNIT));
5781 /* If adjusting the stack in a single step costs nothing extra, do so.
5782 I.e. either if a single addi is enough, or we need a movi anyway,
5783 and we don't exceed the maximum offset range (the test for the
5784 latter is conservative for simplicity). */
5785 if (TARGET_SHMEDIA
5786 && (CONST_OK_FOR_I10 (-total_size)
5787 || (! CONST_OK_FOR_I10 (-(save_size + d_rounding))
5788 && total_size <= 2044)))
5789 d_rounding = total_size - save_size;
5791 offset_base = d + d_rounding;
5793 output_stack_adjust (-(save_size + d_rounding), stack_pointer_rtx,
5794 0, NULL);
5796 sh5_schedule_saves (&live_regs_mask, &schedule, offset_base);
5797 tmp_pnt = schedule.temps;
5798 for (entry = &schedule.entries[1]; entry->mode != VOIDmode; entry++)
5800 enum machine_mode mode = entry->mode;
5801 unsigned int reg = entry->reg;
5802 rtx reg_rtx, mem_rtx, pre_dec = NULL_RTX;
5803 rtx orig_reg_rtx;
5805 offset = entry->offset;
5807 reg_rtx = gen_rtx_REG (mode, reg);
5809 mem_rtx = gen_rtx_MEM (mode,
5810 gen_rtx_PLUS (Pmode,
5811 stack_pointer_rtx,
5812 GEN_INT (offset)));
5814 GO_IF_LEGITIMATE_ADDRESS (mode, XEXP (mem_rtx, 0), try_pre_dec);
5816 gcc_assert (r0);
5817 mem_rtx = NULL_RTX;
5819 try_pre_dec:
5821 if (HAVE_PRE_DECREMENT
5822 && (offset_in_r0 - offset == GET_MODE_SIZE (mode)
5823 || mem_rtx == NULL_RTX
5824 || reg == PR_REG || SPECIAL_REGISTER_P (reg)))
5826 pre_dec = gen_rtx_MEM (mode,
5827 gen_rtx_PRE_DEC (Pmode, r0));
5829 GO_IF_LEGITIMATE_ADDRESS (mode, XEXP (pre_dec, 0),
5830 pre_dec_ok);
5832 pre_dec = NULL_RTX;
5834 break;
5836 pre_dec_ok:
5837 mem_rtx = NULL_RTX;
5838 offset += GET_MODE_SIZE (mode);
5840 while (0);
5842 if (mem_rtx != NULL_RTX)
5843 goto addr_ok;
5845 if (offset_in_r0 == -1)
5847 emit_move_insn (r0, GEN_INT (offset));
5848 offset_in_r0 = offset;
5850 else if (offset != offset_in_r0)
5852 emit_move_insn (r0,
5853 gen_rtx_PLUS
5854 (Pmode, r0,
5855 GEN_INT (offset - offset_in_r0)));
5856 offset_in_r0 += offset - offset_in_r0;
5859 if (pre_dec != NULL_RTX)
5861 if (! sp_in_r0)
5863 emit_move_insn (r0,
5864 gen_rtx_PLUS
5865 (Pmode, r0, stack_pointer_rtx));
5866 sp_in_r0 = 1;
5869 offset -= GET_MODE_SIZE (mode);
5870 offset_in_r0 -= GET_MODE_SIZE (mode);
5872 mem_rtx = pre_dec;
5874 else if (sp_in_r0)
5875 mem_rtx = gen_rtx_MEM (mode, r0);
5876 else
5877 mem_rtx = gen_rtx_MEM (mode,
5878 gen_rtx_PLUS (Pmode,
5879 stack_pointer_rtx,
5880 r0));
5882 /* We must not use an r0-based address for target-branch
5883 registers or for special registers without pre-dec
5884 memory addresses, since we store their values in r0
5885 first. */
5886 gcc_assert (!TARGET_REGISTER_P (reg)
5887 && ((reg != PR_REG && !SPECIAL_REGISTER_P (reg))
5888 || mem_rtx == pre_dec));
5890 addr_ok:
5891 orig_reg_rtx = reg_rtx;
5892 if (TARGET_REGISTER_P (reg)
5893 || ((reg == PR_REG || SPECIAL_REGISTER_P (reg))
5894 && mem_rtx != pre_dec))
5896 rtx tmp_reg = gen_rtx_REG (GET_MODE (reg_rtx), *tmp_pnt);
5898 emit_move_insn (tmp_reg, reg_rtx);
5900 if (REGNO (tmp_reg) == R0_REG)
5902 offset_in_r0 = -1;
5903 sp_in_r0 = 0;
5904 gcc_assert (!refers_to_regno_p
5905 (R0_REG, R0_REG+1, mem_rtx, (rtx *) 0));
5908 if (*++tmp_pnt <= 0)
5909 tmp_pnt = schedule.temps;
5911 reg_rtx = tmp_reg;
5914 rtx insn;
5916 /* Mark as interesting for dwarf cfi generator */
5917 insn = emit_move_insn (mem_rtx, reg_rtx);
5918 RTX_FRAME_RELATED_P (insn) = 1;
5919 /* If we use an intermediate register for the save, we can't
5920 describe this exactly in cfi as a copy of the to-be-saved
5921 register into the temporary register and then the temporary
5922 register on the stack, because the temporary register can
5923 have a different natural size than the to-be-saved register.
5924 Thus, we gloss over the intermediate copy and pretend we do
5925 a direct save from the to-be-saved register. */
5926 if (REGNO (reg_rtx) != reg)
5928 rtx set, note_rtx;
5930 set = gen_rtx_SET (VOIDmode, mem_rtx, orig_reg_rtx);
5931 note_rtx = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, set,
5932 REG_NOTES (insn));
5933 REG_NOTES (insn) = note_rtx;
5936 if (TARGET_SHCOMPACT && (offset_in_r0 != -1))
5938 rtx reg_rtx = gen_rtx_REG (mode, reg);
5939 rtx set, note_rtx;
5940 rtx mem_rtx = gen_rtx_MEM (mode,
5941 gen_rtx_PLUS (Pmode,
5942 stack_pointer_rtx,
5943 GEN_INT (offset)));
5945 set = gen_rtx_SET (VOIDmode, mem_rtx, reg_rtx);
5946 note_rtx = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, set,
5947 REG_NOTES (insn));
5948 REG_NOTES (insn) = note_rtx;
5953 gcc_assert (entry->offset == d_rounding);
5955 else
5956 push_regs (&live_regs_mask, current_function_interrupt);
5958 if (flag_pic && regs_ever_live[PIC_OFFSET_TABLE_REGNUM])
5960 rtx insn = get_last_insn ();
5961 rtx last = emit_insn (gen_GOTaddr2picreg ());
5963 /* Mark these insns as possibly dead. Sometimes, flow2 may
5964 delete all uses of the PIC register. In this case, let it
5965 delete the initialization too. */
5968 insn = NEXT_INSN (insn);
5970 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD,
5971 const0_rtx,
5972 REG_NOTES (insn));
5974 while (insn != last);
5977 if (SHMEDIA_REGS_STACK_ADJUST ())
5979 /* This must NOT go through the PLT, otherwise mach and macl
5980 may be clobbered. */
5981 function_symbol (gen_rtx_REG (Pmode, R0_REG),
5982 (TARGET_FPU_ANY
5983 ? "__GCC_push_shmedia_regs"
5984 : "__GCC_push_shmedia_regs_nofpu"), SFUNC_GOT);
5985 emit_insn (gen_shmedia_save_restore_regs_compact
5986 (GEN_INT (-SHMEDIA_REGS_STACK_ADJUST ())));
5989 if (target_flags != save_flags && ! current_function_interrupt)
5991 rtx insn = emit_insn (gen_toggle_sz ());
5993 /* If we're lucky, a mode switch in the function body will
5994 overwrite fpscr, turning this insn dead. Tell flow this
5995 insn is ok to delete. */
5996 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD,
5997 const0_rtx,
5998 REG_NOTES (insn));
6001 target_flags = save_flags;
6003 output_stack_adjust (-rounded_frame_size (d) + d_rounding,
6004 stack_pointer_rtx, 0, NULL);
6006 if (frame_pointer_needed)
6007 frame_insn (GEN_MOV (hard_frame_pointer_rtx, stack_pointer_rtx));
6009 if (TARGET_SHCOMPACT
6010 && (current_function_args_info.call_cookie & ~ CALL_COOKIE_RET_TRAMP(1)))
6012 /* This must NOT go through the PLT, otherwise mach and macl
6013 may be clobbered. */
6014 function_symbol (gen_rtx_REG (Pmode, R0_REG),
6015 "__GCC_shcompact_incoming_args", SFUNC_GOT);
6016 emit_insn (gen_shcompact_incoming_args ());
6020 void
6021 sh_expand_epilogue (bool sibcall_p)
6023 HARD_REG_SET live_regs_mask;
6024 int d, i;
6025 int d_rounding = 0;
6027 int save_flags = target_flags;
6028 int frame_size, save_size;
6029 int fpscr_deferred = 0;
6030 int e = sibcall_p ? -1 : 1;
6032 d = calc_live_regs (&live_regs_mask);
6034 save_size = d;
6035 frame_size = rounded_frame_size (d);
6037 if (TARGET_SH5)
6039 int tregs_space = shmedia_target_regs_stack_adjust (&live_regs_mask);
6040 int total_size;
6041 if (d % (STACK_BOUNDARY / BITS_PER_UNIT))
6042 d_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
6043 - d % (STACK_BOUNDARY / BITS_PER_UNIT));
6045 total_size = d + tregs_space;
6046 total_size += rounded_frame_size (total_size);
6047 save_size = total_size - frame_size;
6049 /* If adjusting the stack in a single step costs nothing extra, do so.
6050 I.e. either if a single addi is enough, or we need a movi anyway,
6051 and we don't exceed the maximum offset range (the test for the
6052 latter is conservative for simplicity). */
6053 if (TARGET_SHMEDIA
6054 && ! frame_pointer_needed
6055 && (CONST_OK_FOR_I10 (total_size)
6056 || (! CONST_OK_FOR_I10 (save_size + d_rounding)
6057 && total_size <= 2044)))
6058 d_rounding = frame_size;
6060 frame_size -= d_rounding;
6063 if (frame_pointer_needed)
6065 /* We must avoid scheduling the epilogue with previous basic blocks
6066 when exception handling is enabled. See PR/18032. */
6067 if (flag_exceptions)
6068 emit_insn (gen_blockage ());
6069 output_stack_adjust (frame_size, hard_frame_pointer_rtx, e,
6070 &live_regs_mask);
6072 /* We must avoid moving the stack pointer adjustment past code
6073 which reads from the local frame, else an interrupt could
6074 occur after the SP adjustment and clobber data in the local
6075 frame. */
6076 emit_insn (gen_blockage ());
6077 emit_insn (GEN_MOV (stack_pointer_rtx, hard_frame_pointer_rtx));
6079 else if (frame_size)
6081 /* We must avoid moving the stack pointer adjustment past code
6082 which reads from the local frame, else an interrupt could
6083 occur after the SP adjustment and clobber data in the local
6084 frame. */
6085 emit_insn (gen_blockage ());
6086 output_stack_adjust (frame_size, stack_pointer_rtx, e, &live_regs_mask);
6089 if (SHMEDIA_REGS_STACK_ADJUST ())
6091 function_symbol (gen_rtx_REG (Pmode, R0_REG),
6092 (TARGET_FPU_ANY
6093 ? "__GCC_pop_shmedia_regs"
6094 : "__GCC_pop_shmedia_regs_nofpu"), SFUNC_GOT);
6095 /* This must NOT go through the PLT, otherwise mach and macl
6096 may be clobbered. */
6097 emit_insn (gen_shmedia_save_restore_regs_compact
6098 (GEN_INT (SHMEDIA_REGS_STACK_ADJUST ())));
6101 /* Pop all the registers. */
6103 if (target_flags != save_flags && ! current_function_interrupt)
6104 emit_insn (gen_toggle_sz ());
6105 if (TARGET_SH5)
6107 int offset_base, offset;
6108 int offset_in_r0 = -1;
6109 int sp_in_r0 = 0;
6110 rtx r0 = gen_rtx_REG (Pmode, R0_REG);
6111 save_schedule schedule;
6112 save_entry *entry;
6113 int *tmp_pnt;
6115 entry = sh5_schedule_saves (&live_regs_mask, &schedule, d_rounding);
6116 offset_base = -entry[1].offset + d_rounding;
6117 tmp_pnt = schedule.temps;
6118 for (; entry->mode != VOIDmode; entry--)
6120 enum machine_mode mode = entry->mode;
6121 int reg = entry->reg;
6122 rtx reg_rtx, mem_rtx, post_inc = NULL_RTX, insn;
6124 offset = offset_base + entry->offset;
6125 reg_rtx = gen_rtx_REG (mode, reg);
6127 mem_rtx = gen_rtx_MEM (mode,
6128 gen_rtx_PLUS (Pmode,
6129 stack_pointer_rtx,
6130 GEN_INT (offset)));
6132 GO_IF_LEGITIMATE_ADDRESS (mode, XEXP (mem_rtx, 0), try_post_inc);
6134 mem_rtx = NULL_RTX;
6136 try_post_inc:
6138 if (HAVE_POST_INCREMENT
6139 && (offset == offset_in_r0
6140 || (offset + GET_MODE_SIZE (mode) != d + d_rounding
6141 && mem_rtx == NULL_RTX)
6142 || reg == PR_REG || SPECIAL_REGISTER_P (reg)))
6144 post_inc = gen_rtx_MEM (mode,
6145 gen_rtx_POST_INC (Pmode, r0));
6147 GO_IF_LEGITIMATE_ADDRESS (mode, XEXP (post_inc, 0),
6148 post_inc_ok);
6150 post_inc = NULL_RTX;
6152 break;
6154 post_inc_ok:
6155 mem_rtx = NULL_RTX;
6157 while (0);
6159 if (mem_rtx != NULL_RTX)
6160 goto addr_ok;
6162 if (offset_in_r0 == -1)
6164 emit_move_insn (r0, GEN_INT (offset));
6165 offset_in_r0 = offset;
6167 else if (offset != offset_in_r0)
6169 emit_move_insn (r0,
6170 gen_rtx_PLUS
6171 (Pmode, r0,
6172 GEN_INT (offset - offset_in_r0)));
6173 offset_in_r0 += offset - offset_in_r0;
6176 if (post_inc != NULL_RTX)
6178 if (! sp_in_r0)
6180 emit_move_insn (r0,
6181 gen_rtx_PLUS
6182 (Pmode, r0, stack_pointer_rtx));
6183 sp_in_r0 = 1;
6186 mem_rtx = post_inc;
6188 offset_in_r0 += GET_MODE_SIZE (mode);
6190 else if (sp_in_r0)
6191 mem_rtx = gen_rtx_MEM (mode, r0);
6192 else
6193 mem_rtx = gen_rtx_MEM (mode,
6194 gen_rtx_PLUS (Pmode,
6195 stack_pointer_rtx,
6196 r0));
6198 gcc_assert ((reg != PR_REG && !SPECIAL_REGISTER_P (reg))
6199 || mem_rtx == post_inc);
6201 addr_ok:
6202 if ((reg == PR_REG || SPECIAL_REGISTER_P (reg))
6203 && mem_rtx != post_inc)
6205 insn = emit_move_insn (r0, mem_rtx);
6206 mem_rtx = r0;
6208 else if (TARGET_REGISTER_P (reg))
6210 rtx tmp_reg = gen_rtx_REG (mode, *tmp_pnt);
6212 /* Give the scheduler a bit of freedom by using up to
6213 MAX_TEMPS registers in a round-robin fashion. */
6214 insn = emit_move_insn (tmp_reg, mem_rtx);
6215 mem_rtx = tmp_reg;
6216 if (*++tmp_pnt < 0)
6217 tmp_pnt = schedule.temps;
6220 insn = emit_move_insn (reg_rtx, mem_rtx);
6221 if (reg == PR_MEDIA_REG && sh_media_register_for_return () >= 0)
6222 /* This is dead, unless we return with a sibcall. */
6223 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD,
6224 const0_rtx,
6225 REG_NOTES (insn));
6228 gcc_assert (entry->offset + offset_base == d + d_rounding);
6230 else /* ! TARGET_SH5 */
6232 save_size = 0;
6233 if (TEST_HARD_REG_BIT (live_regs_mask, PR_REG))
6234 pop (PR_REG);
6235 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
6237 int j = (FIRST_PSEUDO_REGISTER - 1) - i;
6239 if (j == FPSCR_REG && current_function_interrupt && TARGET_FMOVD
6240 && hard_regs_intersect_p (&live_regs_mask,
6241 &reg_class_contents[DF_REGS]))
6242 fpscr_deferred = 1;
6243 else if (j != PR_REG && TEST_HARD_REG_BIT (live_regs_mask, j))
6244 pop (j);
6245 if (j == FIRST_FP_REG && fpscr_deferred)
6246 pop (FPSCR_REG);
6250 if (target_flags != save_flags && ! current_function_interrupt)
6251 emit_insn (gen_toggle_sz ());
6252 target_flags = save_flags;
6254 output_stack_adjust (current_function_pretend_args_size
6255 + save_size + d_rounding
6256 + current_function_args_info.stack_regs * 8,
6257 stack_pointer_rtx, e, NULL);
6259 if (current_function_calls_eh_return)
6260 emit_insn (GEN_ADD3 (stack_pointer_rtx, stack_pointer_rtx,
6261 EH_RETURN_STACKADJ_RTX));
6263 /* Switch back to the normal stack if necessary. */
6264 if (sp_switch)
6265 emit_insn (gen_sp_switch_2 ());
6267 /* Tell flow the insn that pops PR isn't dead. */
6268 /* PR_REG will never be live in SHmedia mode, and we don't need to
6269 USE PR_MEDIA_REG, since it will be explicitly copied to TR0_REG
6270 by the return pattern. */
6271 if (TEST_HARD_REG_BIT (live_regs_mask, PR_REG))
6272 emit_insn (gen_rtx_USE (VOIDmode, gen_rtx_REG (SImode, PR_REG)));
6275 static int sh_need_epilogue_known = 0;
6278 sh_need_epilogue (void)
6280 if (! sh_need_epilogue_known)
6282 rtx epilogue;
6284 start_sequence ();
6285 sh_expand_epilogue (0);
6286 epilogue = get_insns ();
6287 end_sequence ();
6288 sh_need_epilogue_known = (epilogue == NULL ? -1 : 1);
6290 return sh_need_epilogue_known > 0;
6293 /* Emit code to change the current function's return address to RA.
6294 TEMP is available as a scratch register, if needed. */
6296 void
6297 sh_set_return_address (rtx ra, rtx tmp)
6299 HARD_REG_SET live_regs_mask;
6300 int d;
6301 int pr_reg = TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG;
6302 int pr_offset;
6304 d = calc_live_regs (&live_regs_mask);
6306 /* If pr_reg isn't life, we can set it (or the register given in
6307 sh_media_register_for_return) directly. */
6308 if (! TEST_HARD_REG_BIT (live_regs_mask, pr_reg))
6310 rtx rr;
6312 if (TARGET_SHMEDIA)
6314 int rr_regno = sh_media_register_for_return ();
6316 if (rr_regno < 0)
6317 rr_regno = pr_reg;
6319 rr = gen_rtx_REG (DImode, rr_regno);
6321 else
6322 rr = gen_rtx_REG (SImode, pr_reg);
6324 emit_insn (GEN_MOV (rr, ra));
6325 /* Tell flow the register for return isn't dead. */
6326 emit_insn (gen_rtx_USE (VOIDmode, rr));
6327 return;
6330 if (TARGET_SH5)
6332 int offset;
6333 save_schedule schedule;
6334 save_entry *entry;
6336 entry = sh5_schedule_saves (&live_regs_mask, &schedule, 0);
6337 offset = entry[1].offset;
6338 for (; entry->mode != VOIDmode; entry--)
6339 if (entry->reg == pr_reg)
6340 goto found;
6342 /* We can't find pr register. */
6343 gcc_unreachable ();
6345 found:
6346 offset = entry->offset - offset;
6347 pr_offset = (rounded_frame_size (d) + offset
6348 + SHMEDIA_REGS_STACK_ADJUST ());
6350 else
6351 pr_offset = rounded_frame_size (d);
6353 emit_insn (GEN_MOV (tmp, GEN_INT (pr_offset)));
6354 emit_insn (GEN_ADD3 (tmp, tmp, hard_frame_pointer_rtx));
6356 tmp = gen_rtx_MEM (Pmode, tmp);
6357 emit_insn (GEN_MOV (tmp, ra));
6360 /* Clear variables at function end. */
6362 static void
6363 sh_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
6364 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
6366 trap_exit = pragma_interrupt = pragma_trapa = pragma_nosave_low_regs = 0;
6367 sh_need_epilogue_known = 0;
6368 sp_switch = NULL_RTX;
6371 static rtx
6372 sh_builtin_saveregs (void)
6374 /* First unnamed integer register. */
6375 int first_intreg = current_function_args_info.arg_count[(int) SH_ARG_INT];
6376 /* Number of integer registers we need to save. */
6377 int n_intregs = MAX (0, NPARM_REGS (SImode) - first_intreg);
6378 /* First unnamed SFmode float reg */
6379 int first_floatreg = current_function_args_info.arg_count[(int) SH_ARG_FLOAT];
6380 /* Number of SFmode float regs to save. */
6381 int n_floatregs = MAX (0, NPARM_REGS (SFmode) - first_floatreg);
6382 rtx regbuf, fpregs;
6383 int bufsize, regno;
6384 HOST_WIDE_INT alias_set;
6386 if (TARGET_SH5)
6388 if (n_intregs)
6390 int pushregs = n_intregs;
6392 while (pushregs < NPARM_REGS (SImode) - 1
6393 && (CALL_COOKIE_INT_REG_GET
6394 (current_function_args_info.call_cookie,
6395 NPARM_REGS (SImode) - pushregs)
6396 == 1))
6398 current_function_args_info.call_cookie
6399 &= ~ CALL_COOKIE_INT_REG (NPARM_REGS (SImode)
6400 - pushregs, 1);
6401 pushregs++;
6404 if (pushregs == NPARM_REGS (SImode))
6405 current_function_args_info.call_cookie
6406 |= (CALL_COOKIE_INT_REG (0, 1)
6407 | CALL_COOKIE_STACKSEQ (pushregs - 1));
6408 else
6409 current_function_args_info.call_cookie
6410 |= CALL_COOKIE_STACKSEQ (pushregs);
6412 current_function_pretend_args_size += 8 * n_intregs;
6414 if (TARGET_SHCOMPACT)
6415 return const0_rtx;
6418 if (! TARGET_SH2E && ! TARGET_SH4 && ! TARGET_SH5)
6420 error ("__builtin_saveregs not supported by this subtarget");
6421 return const0_rtx;
6424 if (TARGET_SHMEDIA)
6425 n_floatregs = 0;
6427 /* Allocate block of memory for the regs. */
6428 /* ??? If n_intregs + n_floatregs == 0, should we allocate at least 1 byte?
6429 Or can assign_stack_local accept a 0 SIZE argument? */
6430 bufsize = (n_intregs * UNITS_PER_WORD) + (n_floatregs * UNITS_PER_WORD);
6432 if (TARGET_SHMEDIA)
6433 regbuf = gen_rtx_MEM (BLKmode,
6434 gen_rtx_REG (Pmode, ARG_POINTER_REGNUM));
6435 else if (n_floatregs & 1)
6437 rtx addr;
6439 regbuf = assign_stack_local (BLKmode, bufsize + UNITS_PER_WORD, 0);
6440 addr = copy_to_mode_reg (Pmode, XEXP (regbuf, 0));
6441 emit_insn (gen_iorsi3 (addr, addr, GEN_INT (UNITS_PER_WORD)));
6442 regbuf = change_address (regbuf, BLKmode, addr);
6444 else
6445 regbuf = assign_stack_local (BLKmode, bufsize, 0);
6446 alias_set = get_varargs_alias_set ();
6447 set_mem_alias_set (regbuf, alias_set);
6449 /* Save int args.
6450 This is optimized to only save the regs that are necessary. Explicitly
6451 named args need not be saved. */
6452 if (n_intregs > 0)
6453 move_block_from_reg (BASE_ARG_REG (SImode) + first_intreg,
6454 adjust_address (regbuf, BLKmode,
6455 n_floatregs * UNITS_PER_WORD),
6456 n_intregs);
6458 if (TARGET_SHMEDIA)
6459 /* Return the address of the regbuf. */
6460 return XEXP (regbuf, 0);
6462 /* Save float args.
6463 This is optimized to only save the regs that are necessary. Explicitly
6464 named args need not be saved.
6465 We explicitly build a pointer to the buffer because it halves the insn
6466 count when not optimizing (otherwise the pointer is built for each reg
6467 saved).
6468 We emit the moves in reverse order so that we can use predecrement. */
6470 fpregs = copy_to_mode_reg (Pmode,
6471 plus_constant (XEXP (regbuf, 0),
6472 n_floatregs * UNITS_PER_WORD));
6473 if (TARGET_SH4 || TARGET_SH2A_DOUBLE)
6475 rtx mem;
6476 for (regno = NPARM_REGS (DFmode) - 2; regno >= first_floatreg; regno -= 2)
6478 emit_insn (gen_addsi3 (fpregs, fpregs,
6479 GEN_INT (-2 * UNITS_PER_WORD)));
6480 mem = gen_rtx_MEM (DFmode, fpregs);
6481 set_mem_alias_set (mem, alias_set);
6482 emit_move_insn (mem,
6483 gen_rtx_REG (DFmode, BASE_ARG_REG (DFmode) + regno));
6485 regno = first_floatreg;
6486 if (regno & 1)
6488 emit_insn (gen_addsi3 (fpregs, fpregs, GEN_INT (-UNITS_PER_WORD)));
6489 mem = gen_rtx_MEM (SFmode, fpregs);
6490 set_mem_alias_set (mem, alias_set);
6491 emit_move_insn (mem,
6492 gen_rtx_REG (SFmode, BASE_ARG_REG (SFmode) + regno
6493 - (TARGET_LITTLE_ENDIAN != 0)));
6496 else
6497 for (regno = NPARM_REGS (SFmode) - 1; regno >= first_floatreg; regno--)
6499 rtx mem;
6501 emit_insn (gen_addsi3 (fpregs, fpregs, GEN_INT (-UNITS_PER_WORD)));
6502 mem = gen_rtx_MEM (SFmode, fpregs);
6503 set_mem_alias_set (mem, alias_set);
6504 emit_move_insn (mem,
6505 gen_rtx_REG (SFmode, BASE_ARG_REG (SFmode) + regno));
6508 /* Return the address of the regbuf. */
6509 return XEXP (regbuf, 0);
6512 /* Define the `__builtin_va_list' type for the ABI. */
6514 static tree
6515 sh_build_builtin_va_list (void)
6517 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
6518 tree record;
6520 if (TARGET_SH5 || (! TARGET_SH2E && ! TARGET_SH4)
6521 || TARGET_HITACHI || sh_cfun_attr_renesas_p ())
6522 return ptr_type_node;
6524 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
6526 f_next_o = build_decl (FIELD_DECL, get_identifier ("__va_next_o"),
6527 ptr_type_node);
6528 f_next_o_limit = build_decl (FIELD_DECL,
6529 get_identifier ("__va_next_o_limit"),
6530 ptr_type_node);
6531 f_next_fp = build_decl (FIELD_DECL, get_identifier ("__va_next_fp"),
6532 ptr_type_node);
6533 f_next_fp_limit = build_decl (FIELD_DECL,
6534 get_identifier ("__va_next_fp_limit"),
6535 ptr_type_node);
6536 f_next_stack = build_decl (FIELD_DECL, get_identifier ("__va_next_stack"),
6537 ptr_type_node);
6539 DECL_FIELD_CONTEXT (f_next_o) = record;
6540 DECL_FIELD_CONTEXT (f_next_o_limit) = record;
6541 DECL_FIELD_CONTEXT (f_next_fp) = record;
6542 DECL_FIELD_CONTEXT (f_next_fp_limit) = record;
6543 DECL_FIELD_CONTEXT (f_next_stack) = record;
6545 TYPE_FIELDS (record) = f_next_o;
6546 TREE_CHAIN (f_next_o) = f_next_o_limit;
6547 TREE_CHAIN (f_next_o_limit) = f_next_fp;
6548 TREE_CHAIN (f_next_fp) = f_next_fp_limit;
6549 TREE_CHAIN (f_next_fp_limit) = f_next_stack;
6551 layout_type (record);
6553 return record;
6556 /* Implement `va_start' for varargs and stdarg. */
6558 void
6559 sh_va_start (tree valist, rtx nextarg)
6561 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
6562 tree next_o, next_o_limit, next_fp, next_fp_limit, next_stack;
6563 tree t, u;
6564 int nfp, nint;
6566 if (TARGET_SH5)
6568 expand_builtin_saveregs ();
6569 std_expand_builtin_va_start (valist, nextarg);
6570 return;
6573 if ((! TARGET_SH2E && ! TARGET_SH4)
6574 || TARGET_HITACHI || sh_cfun_attr_renesas_p ())
6576 std_expand_builtin_va_start (valist, nextarg);
6577 return;
6580 f_next_o = TYPE_FIELDS (va_list_type_node);
6581 f_next_o_limit = TREE_CHAIN (f_next_o);
6582 f_next_fp = TREE_CHAIN (f_next_o_limit);
6583 f_next_fp_limit = TREE_CHAIN (f_next_fp);
6584 f_next_stack = TREE_CHAIN (f_next_fp_limit);
6586 next_o = build (COMPONENT_REF, TREE_TYPE (f_next_o), valist, f_next_o,
6587 NULL_TREE);
6588 next_o_limit = build (COMPONENT_REF, TREE_TYPE (f_next_o_limit),
6589 valist, f_next_o_limit, NULL_TREE);
6590 next_fp = build (COMPONENT_REF, TREE_TYPE (f_next_fp), valist, f_next_fp,
6591 NULL_TREE);
6592 next_fp_limit = build (COMPONENT_REF, TREE_TYPE (f_next_fp_limit),
6593 valist, f_next_fp_limit, NULL_TREE);
6594 next_stack = build (COMPONENT_REF, TREE_TYPE (f_next_stack),
6595 valist, f_next_stack, NULL_TREE);
6597 /* Call __builtin_saveregs. */
6598 u = make_tree (ptr_type_node, expand_builtin_saveregs ());
6599 t = build (MODIFY_EXPR, ptr_type_node, next_fp, u);
6600 TREE_SIDE_EFFECTS (t) = 1;
6601 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6603 nfp = current_function_args_info.arg_count[SH_ARG_FLOAT];
6604 if (nfp < 8)
6605 nfp = 8 - nfp;
6606 else
6607 nfp = 0;
6608 u = fold (build (PLUS_EXPR, ptr_type_node, u,
6609 build_int_cst (NULL_TREE, UNITS_PER_WORD * nfp)));
6610 t = build (MODIFY_EXPR, ptr_type_node, next_fp_limit, u);
6611 TREE_SIDE_EFFECTS (t) = 1;
6612 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6614 t = build (MODIFY_EXPR, ptr_type_node, next_o, u);
6615 TREE_SIDE_EFFECTS (t) = 1;
6616 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6618 nint = current_function_args_info.arg_count[SH_ARG_INT];
6619 if (nint < 4)
6620 nint = 4 - nint;
6621 else
6622 nint = 0;
6623 u = fold (build (PLUS_EXPR, ptr_type_node, u,
6624 build_int_cst (NULL_TREE, UNITS_PER_WORD * nint)));
6625 t = build (MODIFY_EXPR, ptr_type_node, next_o_limit, u);
6626 TREE_SIDE_EFFECTS (t) = 1;
6627 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6629 u = make_tree (ptr_type_node, nextarg);
6630 t = build (MODIFY_EXPR, ptr_type_node, next_stack, u);
6631 TREE_SIDE_EFFECTS (t) = 1;
6632 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6635 /* Implement `va_arg'. */
6637 static tree
6638 sh_gimplify_va_arg_expr (tree valist, tree type, tree *pre_p,
6639 tree *post_p ATTRIBUTE_UNUSED)
6641 HOST_WIDE_INT size, rsize;
6642 tree tmp, pptr_type_node;
6643 tree addr, lab_over = NULL, result = NULL;
6644 int pass_by_ref = targetm.calls.must_pass_in_stack (TYPE_MODE (type), type);
6646 if (pass_by_ref)
6647 type = build_pointer_type (type);
6649 size = int_size_in_bytes (type);
6650 rsize = (size + UNITS_PER_WORD - 1) & -UNITS_PER_WORD;
6651 pptr_type_node = build_pointer_type (ptr_type_node);
6653 if (! TARGET_SH5 && (TARGET_SH2E || TARGET_SH4)
6654 && ! (TARGET_HITACHI || sh_cfun_attr_renesas_p ()))
6656 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
6657 tree next_o, next_o_limit, next_fp, next_fp_limit, next_stack;
6658 int pass_as_float;
6659 tree lab_false;
6661 f_next_o = TYPE_FIELDS (va_list_type_node);
6662 f_next_o_limit = TREE_CHAIN (f_next_o);
6663 f_next_fp = TREE_CHAIN (f_next_o_limit);
6664 f_next_fp_limit = TREE_CHAIN (f_next_fp);
6665 f_next_stack = TREE_CHAIN (f_next_fp_limit);
6667 next_o = build (COMPONENT_REF, TREE_TYPE (f_next_o), valist, f_next_o,
6668 NULL_TREE);
6669 next_o_limit = build (COMPONENT_REF, TREE_TYPE (f_next_o_limit),
6670 valist, f_next_o_limit, NULL_TREE);
6671 next_fp = build (COMPONENT_REF, TREE_TYPE (f_next_fp),
6672 valist, f_next_fp, NULL_TREE);
6673 next_fp_limit = build (COMPONENT_REF, TREE_TYPE (f_next_fp_limit),
6674 valist, f_next_fp_limit, NULL_TREE);
6675 next_stack = build (COMPONENT_REF, TREE_TYPE (f_next_stack),
6676 valist, f_next_stack, NULL_TREE);
6678 /* Structures with a single member with a distinct mode are passed
6679 like their member. This is relevant if the latter has a REAL_TYPE
6680 or COMPLEX_TYPE type. */
6681 if (TREE_CODE (type) == RECORD_TYPE
6682 && TYPE_FIELDS (type)
6683 && TREE_CODE (TYPE_FIELDS (type)) == FIELD_DECL
6684 && (TREE_CODE (TREE_TYPE (TYPE_FIELDS (type))) == REAL_TYPE
6685 || TREE_CODE (TREE_TYPE (TYPE_FIELDS (type))) == COMPLEX_TYPE)
6686 && TREE_CHAIN (TYPE_FIELDS (type)) == NULL_TREE)
6687 type = TREE_TYPE (TYPE_FIELDS (type));
6689 if (TARGET_SH4)
6691 pass_as_float = ((TREE_CODE (type) == REAL_TYPE && size <= 8)
6692 || (TREE_CODE (type) == COMPLEX_TYPE
6693 && TREE_CODE (TREE_TYPE (type)) == REAL_TYPE
6694 && size <= 16));
6696 else
6698 pass_as_float = (TREE_CODE (type) == REAL_TYPE && size == 4);
6701 addr = create_tmp_var (pptr_type_node, NULL);
6702 lab_false = create_artificial_label ();
6703 lab_over = create_artificial_label ();
6705 valist = build1 (INDIRECT_REF, ptr_type_node, addr);
6707 if (pass_as_float)
6709 int first_floatreg
6710 = current_function_args_info.arg_count[(int) SH_ARG_FLOAT];
6711 int n_floatregs = MAX (0, NPARM_REGS (SFmode) - first_floatreg);
6713 tmp = build (GE_EXPR, boolean_type_node, next_fp, next_fp_limit);
6714 tmp = build (COND_EXPR, void_type_node, tmp,
6715 build (GOTO_EXPR, void_type_node, lab_false),
6716 NULL);
6717 gimplify_and_add (tmp, pre_p);
6719 if (TYPE_ALIGN (type) > BITS_PER_WORD
6720 || (((TREE_CODE (type) == REAL_TYPE && size == 8) || size == 16)
6721 && (n_floatregs & 1)))
6723 tmp = fold_convert (ptr_type_node, size_int (UNITS_PER_WORD));
6724 tmp = build (BIT_AND_EXPR, ptr_type_node, next_fp, tmp);
6725 tmp = build (PLUS_EXPR, ptr_type_node, next_fp, tmp);
6726 tmp = build (MODIFY_EXPR, ptr_type_node, next_fp, tmp);
6727 gimplify_and_add (tmp, pre_p);
6730 tmp = build1 (ADDR_EXPR, pptr_type_node, next_fp);
6731 tmp = build (MODIFY_EXPR, void_type_node, addr, tmp);
6732 gimplify_and_add (tmp, pre_p);
6734 #ifdef FUNCTION_ARG_SCmode_WART
6735 if (TYPE_MODE (type) == SCmode && TARGET_SH4 && TARGET_LITTLE_ENDIAN)
6737 tree subtype = TREE_TYPE (type);
6738 tree real, imag;
6740 imag = std_gimplify_va_arg_expr (valist, subtype, pre_p, NULL);
6741 imag = get_initialized_tmp_var (imag, pre_p, NULL);
6743 real = std_gimplify_va_arg_expr (valist, subtype, pre_p, NULL);
6744 real = get_initialized_tmp_var (real, pre_p, NULL);
6746 result = build (COMPLEX_EXPR, type, real, imag);
6747 result = get_initialized_tmp_var (result, pre_p, NULL);
6749 #endif /* FUNCTION_ARG_SCmode_WART */
6751 tmp = build (GOTO_EXPR, void_type_node, lab_over);
6752 gimplify_and_add (tmp, pre_p);
6754 tmp = build (LABEL_EXPR, void_type_node, lab_false);
6755 gimplify_and_add (tmp, pre_p);
6757 tmp = build1 (ADDR_EXPR, pptr_type_node, next_stack);
6758 tmp = build (MODIFY_EXPR, void_type_node, addr, tmp);
6759 gimplify_and_add (tmp, pre_p);
6761 else
6763 tmp = fold_convert (ptr_type_node, size_int (rsize));
6764 tmp = build (PLUS_EXPR, ptr_type_node, next_o, tmp);
6765 tmp = build (GT_EXPR, boolean_type_node, tmp, next_o_limit);
6766 tmp = build (COND_EXPR, void_type_node, tmp,
6767 build (GOTO_EXPR, void_type_node, lab_false),
6768 NULL);
6769 gimplify_and_add (tmp, pre_p);
6771 tmp = build1 (ADDR_EXPR, pptr_type_node, next_o);
6772 tmp = build (MODIFY_EXPR, void_type_node, addr, tmp);
6773 gimplify_and_add (tmp, pre_p);
6775 tmp = build (GOTO_EXPR, void_type_node, lab_over);
6776 gimplify_and_add (tmp, pre_p);
6778 tmp = build (LABEL_EXPR, void_type_node, lab_false);
6779 gimplify_and_add (tmp, pre_p);
6781 if (size > 4 && ! TARGET_SH4)
6783 tmp = build (MODIFY_EXPR, ptr_type_node, next_o, next_o_limit);
6784 gimplify_and_add (tmp, pre_p);
6787 tmp = build1 (ADDR_EXPR, pptr_type_node, next_stack);
6788 tmp = build (MODIFY_EXPR, void_type_node, addr, tmp);
6789 gimplify_and_add (tmp, pre_p);
6792 if (!result)
6794 tmp = build (LABEL_EXPR, void_type_node, lab_over);
6795 gimplify_and_add (tmp, pre_p);
6799 /* ??? In va-sh.h, there had been code to make values larger than
6800 size 8 indirect. This does not match the FUNCTION_ARG macros. */
6802 tmp = std_gimplify_va_arg_expr (valist, type, pre_p, NULL);
6803 if (result)
6805 tmp = build (MODIFY_EXPR, void_type_node, result, tmp);
6806 gimplify_and_add (tmp, pre_p);
6808 tmp = build (LABEL_EXPR, void_type_node, lab_over);
6809 gimplify_and_add (tmp, pre_p);
6811 else
6812 result = tmp;
6814 if (pass_by_ref)
6815 result = build_va_arg_indirect_ref (result);
6817 return result;
6820 bool
6821 sh_promote_prototypes (tree type)
6823 if (TARGET_HITACHI)
6824 return 0;
6825 if (! type)
6826 return 1;
6827 return ! sh_attr_renesas_p (type);
6830 /* Whether an argument must be passed by reference. On SHcompact, we
6831 pretend arguments wider than 32-bits that would have been passed in
6832 registers are passed by reference, so that an SHmedia trampoline
6833 loads them into the full 64-bits registers. */
6835 static int
6836 shcompact_byref (CUMULATIVE_ARGS *cum, enum machine_mode mode,
6837 tree type, bool named)
6839 unsigned HOST_WIDE_INT size;
6841 if (type)
6842 size = int_size_in_bytes (type);
6843 else
6844 size = GET_MODE_SIZE (mode);
6846 if (cum->arg_count[SH_ARG_INT] < NPARM_REGS (SImode)
6847 && (!named
6848 || GET_SH_ARG_CLASS (mode) == SH_ARG_INT
6849 || (GET_SH_ARG_CLASS (mode) == SH_ARG_FLOAT
6850 && cum->arg_count[SH_ARG_FLOAT] >= NPARM_REGS (SFmode)))
6851 && size > 4
6852 && !SHCOMPACT_FORCE_ON_STACK (mode, type)
6853 && !SH5_WOULD_BE_PARTIAL_NREGS (*cum, mode, type, named))
6854 return size;
6855 else
6856 return 0;
6859 static bool
6860 sh_pass_by_reference (CUMULATIVE_ARGS *cum, enum machine_mode mode,
6861 tree type, bool named)
6863 if (targetm.calls.must_pass_in_stack (mode, type))
6864 return true;
6866 /* ??? std_gimplify_va_arg_expr passes NULL for cum. That function
6867 wants to know about pass-by-reference semantics for incoming
6868 arguments. */
6869 if (! cum)
6870 return false;
6872 if (TARGET_SHCOMPACT)
6874 cum->byref = shcompact_byref (cum, mode, type, named);
6875 return cum->byref != 0;
6878 return false;
6881 static bool
6882 sh_callee_copies (CUMULATIVE_ARGS *cum, enum machine_mode mode,
6883 tree type, bool named ATTRIBUTE_UNUSED)
6885 /* ??? How can it possibly be correct to return true only on the
6886 caller side of the equation? Is there someplace else in the
6887 sh backend that's magically producing the copies? */
6888 return (cum->outgoing
6889 && ((mode == BLKmode ? TYPE_ALIGN (type) : GET_MODE_ALIGNMENT (mode))
6890 % SH_MIN_ALIGN_FOR_CALLEE_COPY == 0));
6893 static int
6894 sh_arg_partial_bytes (CUMULATIVE_ARGS *cum, enum machine_mode mode,
6895 tree type, bool named ATTRIBUTE_UNUSED)
6897 int words = 0;
6899 if (!TARGET_SH5
6900 && PASS_IN_REG_P (*cum, mode, type)
6901 && !(TARGET_SH4 || TARGET_SH2A_DOUBLE)
6902 && (ROUND_REG (*cum, mode)
6903 + (mode != BLKmode
6904 ? ROUND_ADVANCE (GET_MODE_SIZE (mode))
6905 : ROUND_ADVANCE (int_size_in_bytes (type)))
6906 > NPARM_REGS (mode)))
6907 words = NPARM_REGS (mode) - ROUND_REG (*cum, mode);
6909 else if (!TARGET_SHCOMPACT
6910 && SH5_WOULD_BE_PARTIAL_NREGS (*cum, mode, type, named))
6911 words = NPARM_REGS (SImode) - cum->arg_count[SH_ARG_INT];
6913 return words * UNITS_PER_WORD;
6917 /* Define where to put the arguments to a function.
6918 Value is zero to push the argument on the stack,
6919 or a hard register in which to store the argument.
6921 MODE is the argument's machine mode.
6922 TYPE is the data type of the argument (as a tree).
6923 This is null for libcalls where that information may
6924 not be available.
6925 CUM is a variable of type CUMULATIVE_ARGS which gives info about
6926 the preceding args and about the function being called.
6927 NAMED is nonzero if this argument is a named parameter
6928 (otherwise it is an extra parameter matching an ellipsis).
6930 On SH the first args are normally in registers
6931 and the rest are pushed. Any arg that starts within the first
6932 NPARM_REGS words is at least partially passed in a register unless
6933 its data type forbids. */
6937 sh_function_arg (CUMULATIVE_ARGS *ca, enum machine_mode mode,
6938 tree type, int named)
6940 if (! TARGET_SH5 && mode == VOIDmode)
6941 return GEN_INT (ca->renesas_abi ? 1 : 0);
6943 if (! TARGET_SH5
6944 && PASS_IN_REG_P (*ca, mode, type)
6945 && (named || ! (TARGET_HITACHI || ca->renesas_abi)))
6947 int regno;
6949 if (mode == SCmode && TARGET_SH4 && TARGET_LITTLE_ENDIAN
6950 && (! FUNCTION_ARG_SCmode_WART || (ROUND_REG (*ca, mode) & 1)))
6952 rtx r1 = gen_rtx_EXPR_LIST (VOIDmode,
6953 gen_rtx_REG (SFmode,
6954 BASE_ARG_REG (mode)
6955 + (ROUND_REG (*ca, mode) ^ 1)),
6956 const0_rtx);
6957 rtx r2 = gen_rtx_EXPR_LIST (VOIDmode,
6958 gen_rtx_REG (SFmode,
6959 BASE_ARG_REG (mode)
6960 + ((ROUND_REG (*ca, mode) + 1) ^ 1)),
6961 GEN_INT (4));
6962 return gen_rtx_PARALLEL(SCmode, gen_rtvec(2, r1, r2));
6965 /* If the alignment of a DF value causes an SF register to be
6966 skipped, we will use that skipped register for the next SF
6967 value. */
6968 if ((TARGET_HITACHI || ca->renesas_abi)
6969 && ca->free_single_fp_reg
6970 && mode == SFmode)
6971 return gen_rtx_REG (mode, ca->free_single_fp_reg);
6973 regno = (BASE_ARG_REG (mode) + ROUND_REG (*ca, mode))
6974 ^ (mode == SFmode && TARGET_SH4
6975 && TARGET_LITTLE_ENDIAN != 0
6976 && ! TARGET_HITACHI && ! ca->renesas_abi);
6977 return gen_rtx_REG (mode, regno);
6981 if (TARGET_SH5)
6983 if (mode == VOIDmode && TARGET_SHCOMPACT)
6984 return GEN_INT (ca->call_cookie);
6986 /* The following test assumes unnamed arguments are promoted to
6987 DFmode. */
6988 if (mode == SFmode && ca->free_single_fp_reg)
6989 return SH5_PROTOTYPED_FLOAT_ARG (*ca, mode, ca->free_single_fp_reg);
6991 if ((GET_SH_ARG_CLASS (mode) == SH_ARG_FLOAT)
6992 && (named || ! ca->prototype_p)
6993 && ca->arg_count[(int) SH_ARG_FLOAT] < NPARM_REGS (SFmode))
6995 if (! ca->prototype_p && TARGET_SHMEDIA)
6996 return SH5_PROTOTYPELESS_FLOAT_ARG (*ca, mode);
6998 return SH5_PROTOTYPED_FLOAT_ARG (*ca, mode,
6999 FIRST_FP_PARM_REG
7000 + ca->arg_count[(int) SH_ARG_FLOAT]);
7003 if (ca->arg_count[(int) SH_ARG_INT] < NPARM_REGS (SImode)
7004 && (! TARGET_SHCOMPACT
7005 || (! SHCOMPACT_FORCE_ON_STACK (mode, type)
7006 && ! SH5_WOULD_BE_PARTIAL_NREGS (*ca, mode,
7007 type, named))))
7009 return gen_rtx_REG (mode, (FIRST_PARM_REG
7010 + ca->arg_count[(int) SH_ARG_INT]));
7013 return 0;
7016 return 0;
7019 /* Update the data in CUM to advance over an argument
7020 of mode MODE and data type TYPE.
7021 (TYPE is null for libcalls where that information may not be
7022 available.) */
7024 void
7025 sh_function_arg_advance (CUMULATIVE_ARGS *ca, enum machine_mode mode,
7026 tree type, int named)
7028 if (ca->force_mem)
7029 ca->force_mem = 0;
7030 else if (TARGET_SH5)
7032 tree type2 = (ca->byref && type
7033 ? TREE_TYPE (type)
7034 : type);
7035 enum machine_mode mode2 = (ca->byref && type
7036 ? TYPE_MODE (type2)
7037 : mode);
7038 int dwords = ((ca->byref
7039 ? ca->byref
7040 : mode2 == BLKmode
7041 ? int_size_in_bytes (type2)
7042 : GET_MODE_SIZE (mode2)) + 7) / 8;
7043 int numregs = MIN (dwords, NPARM_REGS (SImode)
7044 - ca->arg_count[(int) SH_ARG_INT]);
7046 if (numregs)
7048 ca->arg_count[(int) SH_ARG_INT] += numregs;
7049 if (TARGET_SHCOMPACT
7050 && SHCOMPACT_FORCE_ON_STACK (mode2, type2))
7052 ca->call_cookie
7053 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
7054 - numregs, 1);
7055 /* N.B. We want this also for outgoing. */
7056 ca->stack_regs += numregs;
7058 else if (ca->byref)
7060 if (! ca->outgoing)
7061 ca->stack_regs += numregs;
7062 ca->byref_regs += numregs;
7063 ca->byref = 0;
7065 ca->call_cookie
7066 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
7067 - numregs, 2);
7068 while (--numregs);
7069 ca->call_cookie
7070 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
7071 - 1, 1);
7073 else if (dwords > numregs)
7075 int pushregs = numregs;
7077 if (TARGET_SHCOMPACT)
7078 ca->stack_regs += numregs;
7079 while (pushregs < NPARM_REGS (SImode) - 1
7080 && (CALL_COOKIE_INT_REG_GET
7081 (ca->call_cookie,
7082 NPARM_REGS (SImode) - pushregs)
7083 == 1))
7085 ca->call_cookie
7086 &= ~ CALL_COOKIE_INT_REG (NPARM_REGS (SImode)
7087 - pushregs, 1);
7088 pushregs++;
7090 if (numregs == NPARM_REGS (SImode))
7091 ca->call_cookie
7092 |= CALL_COOKIE_INT_REG (0, 1)
7093 | CALL_COOKIE_STACKSEQ (numregs - 1);
7094 else
7095 ca->call_cookie
7096 |= CALL_COOKIE_STACKSEQ (numregs);
7099 if (GET_SH_ARG_CLASS (mode2) == SH_ARG_FLOAT
7100 && (named || ! ca->prototype_p))
7102 if (mode2 == SFmode && ca->free_single_fp_reg)
7103 ca->free_single_fp_reg = 0;
7104 else if (ca->arg_count[(int) SH_ARG_FLOAT]
7105 < NPARM_REGS (SFmode))
7107 int numfpregs
7108 = MIN ((GET_MODE_SIZE (mode2) + 7) / 8 * 2,
7109 NPARM_REGS (SFmode)
7110 - ca->arg_count[(int) SH_ARG_FLOAT]);
7112 ca->arg_count[(int) SH_ARG_FLOAT] += numfpregs;
7114 if (TARGET_SHCOMPACT && ! ca->prototype_p)
7116 if (ca->outgoing && numregs > 0)
7119 ca->call_cookie
7120 |= (CALL_COOKIE_INT_REG
7121 (ca->arg_count[(int) SH_ARG_INT]
7122 - numregs + ((numfpregs - 2) / 2),
7123 4 + (ca->arg_count[(int) SH_ARG_FLOAT]
7124 - numfpregs) / 2));
7126 while (numfpregs -= 2);
7128 else if (mode2 == SFmode && (named)
7129 && (ca->arg_count[(int) SH_ARG_FLOAT]
7130 < NPARM_REGS (SFmode)))
7131 ca->free_single_fp_reg
7132 = FIRST_FP_PARM_REG - numfpregs
7133 + ca->arg_count[(int) SH_ARG_FLOAT] + 1;
7136 return;
7139 if ((TARGET_HITACHI || ca->renesas_abi) && TARGET_FPU_DOUBLE)
7141 /* Note that we've used the skipped register. */
7142 if (mode == SFmode && ca->free_single_fp_reg)
7144 ca->free_single_fp_reg = 0;
7145 return;
7147 /* When we have a DF after an SF, there's an SF register that get
7148 skipped in order to align the DF value. We note this skipped
7149 register, because the next SF value will use it, and not the
7150 SF that follows the DF. */
7151 if (mode == DFmode
7152 && ROUND_REG (*ca, DFmode) != ROUND_REG (*ca, SFmode))
7154 ca->free_single_fp_reg = (ROUND_REG (*ca, SFmode)
7155 + BASE_ARG_REG (mode));
7159 if (! ((TARGET_SH4 || TARGET_SH2A) || ca->renesas_abi)
7160 || PASS_IN_REG_P (*ca, mode, type))
7161 (ca->arg_count[(int) GET_SH_ARG_CLASS (mode)]
7162 = (ROUND_REG (*ca, mode)
7163 + (mode == BLKmode
7164 ? ROUND_ADVANCE (int_size_in_bytes (type))
7165 : ROUND_ADVANCE (GET_MODE_SIZE (mode)))));
7168 /* The Renesas calling convention doesn't quite fit into this scheme since
7169 the address is passed like an invisible argument, but one that is always
7170 passed in memory. */
7171 static rtx
7172 sh_struct_value_rtx (tree fndecl, int incoming ATTRIBUTE_UNUSED)
7174 if (TARGET_HITACHI || sh_attr_renesas_p (fndecl))
7175 return 0;
7176 return gen_rtx_REG (Pmode, 2);
7179 /* Worker function for TARGET_RETURN_IN_MEMORY. */
7181 static bool
7182 sh_return_in_memory (tree type, tree fndecl)
7184 if (TARGET_SH5)
7186 if (TYPE_MODE (type) == BLKmode)
7187 return ((unsigned HOST_WIDE_INT) int_size_in_bytes (type)) > 8;
7188 else
7189 return GET_MODE_SIZE (TYPE_MODE (type)) > 8;
7191 else
7193 return (TYPE_MODE (type) == BLKmode
7194 || ((TARGET_HITACHI || sh_attr_renesas_p (fndecl))
7195 && TREE_CODE (type) == RECORD_TYPE));
7199 /* We actually emit the code in sh_expand_prologue. We used to use
7200 a static variable to flag that we need to emit this code, but that
7201 doesn't when inlining, when functions are deferred and then emitted
7202 later. Fortunately, we already have two flags that are part of struct
7203 function that tell if a function uses varargs or stdarg. */
7204 static void
7205 sh_setup_incoming_varargs (CUMULATIVE_ARGS *ca,
7206 enum machine_mode mode,
7207 tree type,
7208 int *pretend_arg_size,
7209 int second_time ATTRIBUTE_UNUSED)
7211 gcc_assert (current_function_stdarg);
7212 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl))
7214 int named_parm_regs, anon_parm_regs;
7216 named_parm_regs = (ROUND_REG (*ca, mode)
7217 + (mode == BLKmode
7218 ? ROUND_ADVANCE (int_size_in_bytes (type))
7219 : ROUND_ADVANCE (GET_MODE_SIZE (mode))));
7220 anon_parm_regs = NPARM_REGS (SImode) - named_parm_regs;
7221 if (anon_parm_regs > 0)
7222 *pretend_arg_size = anon_parm_regs * 4;
7226 static bool
7227 sh_strict_argument_naming (CUMULATIVE_ARGS *ca ATTRIBUTE_UNUSED)
7229 return TARGET_SH5;
7232 static bool
7233 sh_pretend_outgoing_varargs_named (CUMULATIVE_ARGS *ca)
7235 return ! (TARGET_HITACHI || ca->renesas_abi) && ! TARGET_SH5;
7239 /* Define the offset between two registers, one to be eliminated, and
7240 the other its replacement, at the start of a routine. */
7243 initial_elimination_offset (int from, int to)
7245 int regs_saved;
7246 int regs_saved_rounding = 0;
7247 int total_saved_regs_space;
7248 int total_auto_space;
7249 int save_flags = target_flags;
7250 int copy_flags;
7251 HARD_REG_SET live_regs_mask;
7253 shmedia_space_reserved_for_target_registers = false;
7254 regs_saved = calc_live_regs (&live_regs_mask);
7255 regs_saved += SHMEDIA_REGS_STACK_ADJUST ();
7257 if (shmedia_reserve_space_for_target_registers_p (regs_saved, &live_regs_mask))
7259 shmedia_space_reserved_for_target_registers = true;
7260 regs_saved += shmedia_target_regs_stack_adjust (&live_regs_mask);
7263 if (TARGET_SH5 && regs_saved % (STACK_BOUNDARY / BITS_PER_UNIT))
7264 regs_saved_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
7265 - regs_saved % (STACK_BOUNDARY / BITS_PER_UNIT));
7267 total_auto_space = rounded_frame_size (regs_saved) - regs_saved_rounding;
7268 copy_flags = target_flags;
7269 target_flags = save_flags;
7271 total_saved_regs_space = regs_saved + regs_saved_rounding;
7273 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
7274 return total_saved_regs_space + total_auto_space
7275 + current_function_args_info.byref_regs * 8;
7277 if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
7278 return total_saved_regs_space + total_auto_space
7279 + current_function_args_info.byref_regs * 8;
7281 /* Initial gap between fp and sp is 0. */
7282 if (from == HARD_FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
7283 return 0;
7285 if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
7286 return rounded_frame_size (0);
7288 if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
7289 return rounded_frame_size (0);
7291 gcc_assert (from == RETURN_ADDRESS_POINTER_REGNUM
7292 && (to == HARD_FRAME_POINTER_REGNUM
7293 || to == STACK_POINTER_REGNUM));
7294 if (TARGET_SH5)
7296 int n = total_saved_regs_space;
7297 int pr_reg = TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG;
7298 save_schedule schedule;
7299 save_entry *entry;
7301 n += total_auto_space;
7303 /* If it wasn't saved, there's not much we can do. */
7304 if (! TEST_HARD_REG_BIT (live_regs_mask, pr_reg))
7305 return n;
7307 target_flags = copy_flags;
7309 sh5_schedule_saves (&live_regs_mask, &schedule, n);
7310 for (entry = &schedule.entries[1]; entry->mode != VOIDmode; entry++)
7311 if (entry->reg == pr_reg)
7313 target_flags = save_flags;
7314 return entry->offset;
7316 gcc_unreachable ();
7318 else
7319 return total_auto_space;
7322 /* Handle machine specific pragmas to be semi-compatible with Renesas
7323 compiler. */
7325 void
7326 sh_pr_interrupt (struct cpp_reader *pfile ATTRIBUTE_UNUSED)
7328 pragma_interrupt = 1;
7331 void
7332 sh_pr_trapa (struct cpp_reader *pfile ATTRIBUTE_UNUSED)
7334 pragma_interrupt = pragma_trapa = 1;
7337 void
7338 sh_pr_nosave_low_regs (struct cpp_reader *pfile ATTRIBUTE_UNUSED)
7340 pragma_nosave_low_regs = 1;
7343 /* Generate 'handle_interrupt' attribute for decls */
7345 static void
7346 sh_insert_attributes (tree node, tree *attributes)
7348 if (! pragma_interrupt
7349 || TREE_CODE (node) != FUNCTION_DECL)
7350 return;
7352 /* We are only interested in fields. */
7353 if (!DECL_P (node))
7354 return;
7356 /* Add a 'handle_interrupt' attribute. */
7357 * attributes = tree_cons (get_identifier ("interrupt_handler"), NULL, * attributes);
7359 return;
7362 /* Supported attributes:
7364 interrupt_handler -- specifies this function is an interrupt handler.
7366 sp_switch -- specifies an alternate stack for an interrupt handler
7367 to run on.
7369 trap_exit -- use a trapa to exit an interrupt function instead of
7370 an rte instruction.
7372 renesas -- use Renesas calling/layout conventions (functions and
7373 structures).
7377 const struct attribute_spec sh_attribute_table[] =
7379 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
7380 { "interrupt_handler", 0, 0, true, false, false, sh_handle_interrupt_handler_attribute },
7381 { "sp_switch", 1, 1, true, false, false, sh_handle_sp_switch_attribute },
7382 { "trap_exit", 1, 1, true, false, false, sh_handle_trap_exit_attribute },
7383 { "renesas", 0, 0, false, true, false, sh_handle_renesas_attribute },
7384 #ifdef SYMBIAN
7385 /* Symbian support adds three new attributes:
7386 dllexport - for exporting a function/variable that will live in a dll
7387 dllimport - for importing a function/variable from a dll
7389 Microsoft allows multiple declspecs in one __declspec, separating
7390 them with spaces. We do NOT support this. Instead, use __declspec
7391 multiple times. */
7392 { "dllimport", 0, 0, true, false, false, sh_symbian_handle_dll_attribute },
7393 { "dllexport", 0, 0, true, false, false, sh_symbian_handle_dll_attribute },
7394 #endif
7395 { NULL, 0, 0, false, false, false, NULL }
7398 /* Handle an "interrupt_handler" attribute; arguments as in
7399 struct attribute_spec.handler. */
7400 static tree
7401 sh_handle_interrupt_handler_attribute (tree *node, tree name,
7402 tree args ATTRIBUTE_UNUSED,
7403 int flags ATTRIBUTE_UNUSED,
7404 bool *no_add_attrs)
7406 if (TREE_CODE (*node) != FUNCTION_DECL)
7408 warning (OPT_Wattributes, "%qs attribute only applies to functions",
7409 IDENTIFIER_POINTER (name));
7410 *no_add_attrs = true;
7412 else if (TARGET_SHCOMPACT)
7414 error ("attribute interrupt_handler is not compatible with -m5-compact");
7415 *no_add_attrs = true;
7418 return NULL_TREE;
7421 /* Handle an "sp_switch" attribute; arguments as in
7422 struct attribute_spec.handler. */
7423 static tree
7424 sh_handle_sp_switch_attribute (tree *node, tree name, tree args,
7425 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
7427 if (TREE_CODE (*node) != FUNCTION_DECL)
7429 warning (OPT_Wattributes, "%qs attribute only applies to functions",
7430 IDENTIFIER_POINTER (name));
7431 *no_add_attrs = true;
7433 else if (!pragma_interrupt)
7435 /* The sp_switch attribute only has meaning for interrupt functions. */
7436 warning (OPT_Wattributes, "%qs attribute only applies to "
7437 "interrupt functions", IDENTIFIER_POINTER (name));
7438 *no_add_attrs = true;
7440 else if (TREE_CODE (TREE_VALUE (args)) != STRING_CST)
7442 /* The argument must be a constant string. */
7443 warning (OPT_Wattributes, "%qs attribute argument not a string constant",
7444 IDENTIFIER_POINTER (name));
7445 *no_add_attrs = true;
7447 else
7449 const char *s = ggc_strdup (TREE_STRING_POINTER (TREE_VALUE (args)));
7450 sp_switch = gen_rtx_SYMBOL_REF (VOIDmode, s);
7453 return NULL_TREE;
7456 /* Handle an "trap_exit" attribute; arguments as in
7457 struct attribute_spec.handler. */
7458 static tree
7459 sh_handle_trap_exit_attribute (tree *node, tree name, tree args,
7460 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
7462 if (TREE_CODE (*node) != FUNCTION_DECL)
7464 warning (OPT_Wattributes, "%qs attribute only applies to functions",
7465 IDENTIFIER_POINTER (name));
7466 *no_add_attrs = true;
7468 else if (!pragma_interrupt)
7470 /* The trap_exit attribute only has meaning for interrupt functions. */
7471 warning (OPT_Wattributes, "%qs attribute only applies to "
7472 "interrupt functions", IDENTIFIER_POINTER (name));
7473 *no_add_attrs = true;
7475 else if (TREE_CODE (TREE_VALUE (args)) != INTEGER_CST)
7477 /* The argument must be a constant integer. */
7478 warning (OPT_Wattributes, "%qs attribute argument not an "
7479 "integer constant", IDENTIFIER_POINTER (name));
7480 *no_add_attrs = true;
7482 else
7484 trap_exit = TREE_INT_CST_LOW (TREE_VALUE (args));
7487 return NULL_TREE;
7490 static tree
7491 sh_handle_renesas_attribute (tree *node ATTRIBUTE_UNUSED,
7492 tree name ATTRIBUTE_UNUSED,
7493 tree args ATTRIBUTE_UNUSED,
7494 int flags ATTRIBUTE_UNUSED,
7495 bool *no_add_attrs ATTRIBUTE_UNUSED)
7497 return NULL_TREE;
7500 /* True if __attribute__((renesas)) or -mrenesas. */
7502 sh_attr_renesas_p (tree td)
7504 if (TARGET_HITACHI)
7505 return 1;
7506 if (td == 0)
7507 return 0;
7508 if (DECL_P (td))
7509 td = TREE_TYPE (td);
7510 return (lookup_attribute ("renesas", TYPE_ATTRIBUTES (td))
7511 != NULL_TREE);
7514 /* True if __attribute__((renesas)) or -mrenesas, for the current
7515 function. */
7517 sh_cfun_attr_renesas_p (void)
7519 return sh_attr_renesas_p (current_function_decl);
7523 sh_cfun_interrupt_handler_p (void)
7525 return (lookup_attribute ("interrupt_handler",
7526 DECL_ATTRIBUTES (current_function_decl))
7527 != NULL_TREE);
7530 /* Implement TARGET_CHECK_PCH_TARGET_FLAGS. */
7532 static const char *
7533 sh_check_pch_target_flags (int old_flags)
7535 if ((old_flags ^ target_flags) & (MASK_SH1 | MASK_SH2 | MASK_SH3
7536 | MASK_SH_E | MASK_HARD_SH4
7537 | MASK_FPU_SINGLE | MASK_SH4))
7538 return _("created and used with different architectures / ABIs");
7539 if ((old_flags ^ target_flags) & MASK_HITACHI)
7540 return _("created and used with different ABIs");
7541 if ((old_flags ^ target_flags) & MASK_LITTLE_ENDIAN)
7542 return _("created and used with different endianness");
7543 return NULL;
7546 /* Predicates used by the templates. */
7548 /* Returns 1 if OP is MACL, MACH or PR. The input must be a REG rtx.
7549 Used only in general_movsrc_operand. */
7552 system_reg_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
7554 switch (REGNO (op))
7556 case PR_REG:
7557 case MACL_REG:
7558 case MACH_REG:
7559 return 1;
7561 return 0;
7564 /* Nonzero if OP is a floating point value with value 0.0. */
7567 fp_zero_operand (rtx op)
7569 REAL_VALUE_TYPE r;
7571 if (GET_MODE (op) != SFmode)
7572 return 0;
7574 REAL_VALUE_FROM_CONST_DOUBLE (r, op);
7575 return REAL_VALUES_EQUAL (r, dconst0) && ! REAL_VALUE_MINUS_ZERO (r);
7578 /* Nonzero if OP is a floating point value with value 1.0. */
7581 fp_one_operand (rtx op)
7583 REAL_VALUE_TYPE r;
7585 if (GET_MODE (op) != SFmode)
7586 return 0;
7588 REAL_VALUE_FROM_CONST_DOUBLE (r, op);
7589 return REAL_VALUES_EQUAL (r, dconst1);
7592 /* For -m4 and -m4-single-only, mode switching is used. If we are
7593 compiling without -mfmovd, movsf_ie isn't taken into account for
7594 mode switching. We could check in machine_dependent_reorg for
7595 cases where we know we are in single precision mode, but there is
7596 interface to find that out during reload, so we must avoid
7597 choosing an fldi alternative during reload and thus failing to
7598 allocate a scratch register for the constant loading. */
7600 fldi_ok (void)
7602 return ! TARGET_SH4 || TARGET_FMOVD || reload_completed;
7606 tertiary_reload_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
7608 enum rtx_code code = GET_CODE (op);
7609 return code == MEM || (TARGET_SH4 && code == CONST_DOUBLE);
7612 /* Return the TLS type for TLS symbols, 0 for otherwise. */
7614 tls_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
7616 if (GET_CODE (op) != SYMBOL_REF)
7617 return 0;
7618 return SYMBOL_REF_TLS_MODEL (op);
7621 /* Return the destination address of a branch. */
7623 static int
7624 branch_dest (rtx branch)
7626 rtx dest = SET_SRC (PATTERN (branch));
7627 int dest_uid;
7629 if (GET_CODE (dest) == IF_THEN_ELSE)
7630 dest = XEXP (dest, 1);
7631 dest = XEXP (dest, 0);
7632 dest_uid = INSN_UID (dest);
7633 return INSN_ADDRESSES (dest_uid);
7636 /* Return nonzero if REG is not used after INSN.
7637 We assume REG is a reload reg, and therefore does
7638 not live past labels. It may live past calls or jumps though. */
7640 reg_unused_after (rtx reg, rtx insn)
7642 enum rtx_code code;
7643 rtx set;
7645 /* If the reg is set by this instruction, then it is safe for our
7646 case. Disregard the case where this is a store to memory, since
7647 we are checking a register used in the store address. */
7648 set = single_set (insn);
7649 if (set && GET_CODE (SET_DEST (set)) != MEM
7650 && reg_overlap_mentioned_p (reg, SET_DEST (set)))
7651 return 1;
7653 while ((insn = NEXT_INSN (insn)))
7655 rtx set;
7656 if (!INSN_P (insn))
7657 continue;
7659 code = GET_CODE (insn);
7661 #if 0
7662 /* If this is a label that existed before reload, then the register
7663 if dead here. However, if this is a label added by reorg, then
7664 the register may still be live here. We can't tell the difference,
7665 so we just ignore labels completely. */
7666 if (code == CODE_LABEL)
7667 return 1;
7668 /* else */
7669 #endif
7671 if (code == JUMP_INSN)
7672 return 0;
7674 /* If this is a sequence, we must handle them all at once.
7675 We could have for instance a call that sets the target register,
7676 and an insn in a delay slot that uses the register. In this case,
7677 we must return 0. */
7678 else if (code == INSN && GET_CODE (PATTERN (insn)) == SEQUENCE)
7680 int i;
7681 int retval = 0;
7683 for (i = 0; i < XVECLEN (PATTERN (insn), 0); i++)
7685 rtx this_insn = XVECEXP (PATTERN (insn), 0, i);
7686 rtx set = single_set (this_insn);
7688 if (GET_CODE (this_insn) == CALL_INSN)
7689 code = CALL_INSN;
7690 else if (GET_CODE (this_insn) == JUMP_INSN)
7692 if (INSN_ANNULLED_BRANCH_P (this_insn))
7693 return 0;
7694 code = JUMP_INSN;
7697 if (set && reg_overlap_mentioned_p (reg, SET_SRC (set)))
7698 return 0;
7699 if (set && reg_overlap_mentioned_p (reg, SET_DEST (set)))
7701 if (GET_CODE (SET_DEST (set)) != MEM)
7702 retval = 1;
7703 else
7704 return 0;
7706 if (set == 0
7707 && reg_overlap_mentioned_p (reg, PATTERN (this_insn)))
7708 return 0;
7710 if (retval == 1)
7711 return 1;
7712 else if (code == JUMP_INSN)
7713 return 0;
7716 set = single_set (insn);
7717 if (set && reg_overlap_mentioned_p (reg, SET_SRC (set)))
7718 return 0;
7719 if (set && reg_overlap_mentioned_p (reg, SET_DEST (set)))
7720 return GET_CODE (SET_DEST (set)) != MEM;
7721 if (set == 0 && reg_overlap_mentioned_p (reg, PATTERN (insn)))
7722 return 0;
7724 if (code == CALL_INSN && call_really_used_regs[REGNO (reg)])
7725 return 1;
7727 return 1;
7730 #include "ggc.h"
7732 static GTY(()) rtx fpscr_rtx;
7734 get_fpscr_rtx (void)
7736 if (! fpscr_rtx)
7738 fpscr_rtx = gen_rtx_REG (PSImode, FPSCR_REG);
7739 REG_USERVAR_P (fpscr_rtx) = 1;
7740 mark_user_reg (fpscr_rtx);
7742 if (! reload_completed || mdep_reorg_phase != SH_AFTER_MDEP_REORG)
7743 mark_user_reg (fpscr_rtx);
7744 return fpscr_rtx;
7747 void
7748 emit_sf_insn (rtx pat)
7750 emit_insn (pat);
7753 void
7754 emit_df_insn (rtx pat)
7756 emit_insn (pat);
7759 void
7760 expand_sf_unop (rtx (*fun) (rtx, rtx, rtx), rtx *operands)
7762 emit_sf_insn ((*fun) (operands[0], operands[1], get_fpscr_rtx ()));
7765 void
7766 expand_sf_binop (rtx (*fun) (rtx, rtx, rtx, rtx), rtx *operands)
7768 emit_sf_insn ((*fun) (operands[0], operands[1], operands[2],
7769 get_fpscr_rtx ()));
7772 void
7773 expand_df_unop (rtx (*fun) (rtx, rtx, rtx), rtx *operands)
7775 emit_df_insn ((*fun) (operands[0], operands[1], get_fpscr_rtx ()));
7778 void
7779 expand_df_binop (rtx (*fun) (rtx, rtx, rtx, rtx), rtx *operands)
7781 emit_df_insn ((*fun) (operands[0], operands[1], operands[2],
7782 get_fpscr_rtx ()));
7785 /* ??? gcc does flow analysis strictly after common subexpression
7786 elimination. As a result, common subexpression elimination fails
7787 when there are some intervening statements setting the same register.
7788 If we did nothing about this, this would hurt the precision switching
7789 for SH4 badly. There is some cse after reload, but it is unable to
7790 undo the extra register pressure from the unused instructions, and
7791 it cannot remove auto-increment loads.
7793 A C code example that shows this flow/cse weakness for (at least) SH
7794 and sparc (as of gcc ss-970706) is this:
7796 double
7797 f(double a)
7799 double d;
7800 d = 0.1;
7801 a += d;
7802 d = 1.1;
7803 d = 0.1;
7804 a *= d;
7805 return a;
7808 So we add another pass before common subexpression elimination, to
7809 remove assignments that are dead due to a following assignment in the
7810 same basic block. */
7812 static void
7813 mark_use (rtx x, rtx *reg_set_block)
7815 enum rtx_code code;
7817 if (! x)
7818 return;
7819 code = GET_CODE (x);
7820 switch (code)
7822 case REG:
7824 int regno = REGNO (x);
7825 int nregs = (regno < FIRST_PSEUDO_REGISTER
7826 ? HARD_REGNO_NREGS (regno, GET_MODE (x))
7827 : 1);
7830 reg_set_block[regno + nregs - 1] = 0;
7832 while (--nregs);
7833 break;
7835 case SET:
7837 rtx dest = SET_DEST (x);
7839 if (GET_CODE (dest) == SUBREG)
7840 dest = SUBREG_REG (dest);
7841 if (GET_CODE (dest) != REG)
7842 mark_use (dest, reg_set_block);
7843 mark_use (SET_SRC (x), reg_set_block);
7844 break;
7846 case CLOBBER:
7847 break;
7848 default:
7850 const char *fmt = GET_RTX_FORMAT (code);
7851 int i, j;
7852 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
7854 if (fmt[i] == 'e')
7855 mark_use (XEXP (x, i), reg_set_block);
7856 else if (fmt[i] == 'E')
7857 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
7858 mark_use (XVECEXP (x, i, j), reg_set_block);
7860 break;
7865 static rtx get_free_reg (HARD_REG_SET);
7867 /* This function returns a register to use to load the address to load
7868 the fpscr from. Currently it always returns r1 or r7, but when we are
7869 able to use pseudo registers after combine, or have a better mechanism
7870 for choosing a register, it should be done here. */
7871 /* REGS_LIVE is the liveness information for the point for which we
7872 need this allocation. In some bare-bones exit blocks, r1 is live at the
7873 start. We can even have all of r0..r3 being live:
7874 __complex__ long long f (double d) { if (d == 0) return 2; else return 3; }
7875 INSN before which new insns are placed with will clobber the register
7876 we return. If a basic block consists only of setting the return value
7877 register to a pseudo and using that register, the return value is not
7878 live before or after this block, yet we we'll insert our insns right in
7879 the middle. */
7881 static rtx
7882 get_free_reg (HARD_REG_SET regs_live)
7884 if (! TEST_HARD_REG_BIT (regs_live, 1))
7885 return gen_rtx_REG (Pmode, 1);
7887 /* Hard reg 1 is live; since this is a SMALL_REGISTER_CLASSES target,
7888 there shouldn't be anything but a jump before the function end. */
7889 gcc_assert (!TEST_HARD_REG_BIT (regs_live, 7));
7890 return gen_rtx_REG (Pmode, 7);
7893 /* This function will set the fpscr from memory.
7894 MODE is the mode we are setting it to. */
7895 void
7896 fpscr_set_from_mem (int mode, HARD_REG_SET regs_live)
7898 enum attr_fp_mode fp_mode = mode;
7899 rtx addr_reg = get_free_reg (regs_live);
7901 if (fp_mode == (enum attr_fp_mode) ACTUAL_NORMAL_MODE (FP_MODE))
7902 emit_insn (gen_fpu_switch1 (addr_reg));
7903 else
7904 emit_insn (gen_fpu_switch0 (addr_reg));
7907 /* Is the given character a logical line separator for the assembler? */
7908 #ifndef IS_ASM_LOGICAL_LINE_SEPARATOR
7909 #define IS_ASM_LOGICAL_LINE_SEPARATOR(C) ((C) == ';')
7910 #endif
7913 sh_insn_length_adjustment (rtx insn)
7915 /* Instructions with unfilled delay slots take up an extra two bytes for
7916 the nop in the delay slot. */
7917 if (((GET_CODE (insn) == INSN
7918 && GET_CODE (PATTERN (insn)) != USE
7919 && GET_CODE (PATTERN (insn)) != CLOBBER)
7920 || GET_CODE (insn) == CALL_INSN
7921 || (GET_CODE (insn) == JUMP_INSN
7922 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC
7923 && GET_CODE (PATTERN (insn)) != ADDR_VEC))
7924 && GET_CODE (PATTERN (NEXT_INSN (PREV_INSN (insn)))) != SEQUENCE
7925 && get_attr_needs_delay_slot (insn) == NEEDS_DELAY_SLOT_YES)
7926 return 2;
7928 /* SH2e has a bug that prevents the use of annulled branches, so if
7929 the delay slot is not filled, we'll have to put a NOP in it. */
7930 if (sh_cpu == CPU_SH2E
7931 && GET_CODE (insn) == JUMP_INSN
7932 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC
7933 && GET_CODE (PATTERN (insn)) != ADDR_VEC
7934 && get_attr_type (insn) == TYPE_CBRANCH
7935 && GET_CODE (PATTERN (NEXT_INSN (PREV_INSN (insn)))) != SEQUENCE)
7936 return 2;
7938 /* sh-dsp parallel processing insn take four bytes instead of two. */
7940 if (GET_CODE (insn) == INSN)
7942 int sum = 0;
7943 rtx body = PATTERN (insn);
7944 const char *template;
7945 char c;
7946 int maybe_label = 1;
7948 if (GET_CODE (body) == ASM_INPUT)
7949 template = XSTR (body, 0);
7950 else if (asm_noperands (body) >= 0)
7951 template
7952 = decode_asm_operands (body, NULL, NULL, NULL, NULL);
7953 else
7954 return 0;
7957 int ppi_adjust = 0;
7960 c = *template++;
7961 while (c == ' ' || c == '\t');
7962 /* all sh-dsp parallel-processing insns start with p.
7963 The only non-ppi sh insn starting with p is pref.
7964 The only ppi starting with pr is prnd. */
7965 if ((c == 'p' || c == 'P') && strncasecmp ("re", template, 2))
7966 ppi_adjust = 2;
7967 /* The repeat pseudo-insn expands two three insns, a total of
7968 six bytes in size. */
7969 else if ((c == 'r' || c == 'R')
7970 && ! strncasecmp ("epeat", template, 5))
7971 ppi_adjust = 4;
7972 while (c && c != '\n' && ! IS_ASM_LOGICAL_LINE_SEPARATOR (c))
7974 /* If this is a label, it is obviously not a ppi insn. */
7975 if (c == ':' && maybe_label)
7977 ppi_adjust = 0;
7978 break;
7980 else if (c == '\'' || c == '"')
7981 maybe_label = 0;
7982 c = *template++;
7984 sum += ppi_adjust;
7985 maybe_label = c != ':';
7987 while (c);
7988 return sum;
7990 return 0;
7993 /* Return TRUE if X references a SYMBOL_REF or LABEL_REF whose symbol
7994 isn't protected by a PIC unspec. */
7996 nonpic_symbol_mentioned_p (rtx x)
7998 register const char *fmt;
7999 register int i;
8001 if (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF
8002 || GET_CODE (x) == PC)
8003 return 1;
8005 /* We don't want to look into the possible MEM location of a
8006 CONST_DOUBLE, since we're not going to use it, in general. */
8007 if (GET_CODE (x) == CONST_DOUBLE)
8008 return 0;
8010 if (GET_CODE (x) == UNSPEC
8011 && (XINT (x, 1) == UNSPEC_PIC
8012 || XINT (x, 1) == UNSPEC_GOT
8013 || XINT (x, 1) == UNSPEC_GOTOFF
8014 || XINT (x, 1) == UNSPEC_GOTPLT
8015 || XINT (x, 1) == UNSPEC_GOTTPOFF
8016 || XINT (x, 1) == UNSPEC_DTPOFF
8017 || XINT (x, 1) == UNSPEC_PLT))
8018 return 0;
8020 fmt = GET_RTX_FORMAT (GET_CODE (x));
8021 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
8023 if (fmt[i] == 'E')
8025 register int j;
8027 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
8028 if (nonpic_symbol_mentioned_p (XVECEXP (x, i, j)))
8029 return 1;
8031 else if (fmt[i] == 'e' && nonpic_symbol_mentioned_p (XEXP (x, i)))
8032 return 1;
8035 return 0;
8038 /* Convert a non-PIC address in `orig' to a PIC address using @GOT or
8039 @GOTOFF in `reg'. */
8041 legitimize_pic_address (rtx orig, enum machine_mode mode ATTRIBUTE_UNUSED,
8042 rtx reg)
8044 if (tls_symbolic_operand (orig, Pmode))
8045 return orig;
8047 if (GET_CODE (orig) == LABEL_REF
8048 || (GET_CODE (orig) == SYMBOL_REF && SYMBOL_REF_LOCAL_P (orig)))
8050 if (reg == 0)
8051 reg = gen_reg_rtx (Pmode);
8053 emit_insn (gen_symGOTOFF2reg (reg, orig));
8054 return reg;
8056 else if (GET_CODE (orig) == SYMBOL_REF)
8058 if (reg == 0)
8059 reg = gen_reg_rtx (Pmode);
8061 emit_insn (gen_symGOT2reg (reg, orig));
8062 return reg;
8064 return orig;
8067 /* Mark the use of a constant in the literal table. If the constant
8068 has multiple labels, make it unique. */
8069 static rtx
8070 mark_constant_pool_use (rtx x)
8072 rtx insn, lab, pattern;
8074 if (x == NULL)
8075 return x;
8077 switch (GET_CODE (x))
8079 case LABEL_REF:
8080 x = XEXP (x, 0);
8081 case CODE_LABEL:
8082 break;
8083 default:
8084 return x;
8087 /* Get the first label in the list of labels for the same constant
8088 and delete another labels in the list. */
8089 lab = x;
8090 for (insn = PREV_INSN (x); insn; insn = PREV_INSN (insn))
8092 if (GET_CODE (insn) != CODE_LABEL
8093 || LABEL_REFS (insn) != NEXT_INSN (insn))
8094 break;
8095 lab = insn;
8098 for (insn = LABEL_REFS (lab); insn; insn = LABEL_REFS (insn))
8099 INSN_DELETED_P (insn) = 1;
8101 /* Mark constants in a window. */
8102 for (insn = NEXT_INSN (x); insn; insn = NEXT_INSN (insn))
8104 if (GET_CODE (insn) != INSN)
8105 continue;
8107 pattern = PATTERN (insn);
8108 if (GET_CODE (pattern) != UNSPEC_VOLATILE)
8109 continue;
8111 switch (XINT (pattern, 1))
8113 case UNSPECV_CONST2:
8114 case UNSPECV_CONST4:
8115 case UNSPECV_CONST8:
8116 XVECEXP (pattern, 0, 1) = const1_rtx;
8117 break;
8118 case UNSPECV_WINDOW_END:
8119 if (XVECEXP (pattern, 0, 0) == x)
8120 return lab;
8121 break;
8122 case UNSPECV_CONST_END:
8123 return lab;
8124 default:
8125 break;
8129 return lab;
8132 /* Return true if it's possible to redirect BRANCH1 to the destination
8133 of an unconditional jump BRANCH2. We only want to do this if the
8134 resulting branch will have a short displacement. */
8136 sh_can_redirect_branch (rtx branch1, rtx branch2)
8138 if (flag_expensive_optimizations && simplejump_p (branch2))
8140 rtx dest = XEXP (SET_SRC (single_set (branch2)), 0);
8141 rtx insn;
8142 int distance;
8144 for (distance = 0, insn = NEXT_INSN (branch1);
8145 insn && distance < 256;
8146 insn = PREV_INSN (insn))
8148 if (insn == dest)
8149 return 1;
8150 else
8151 distance += get_attr_length (insn);
8153 for (distance = 0, insn = NEXT_INSN (branch1);
8154 insn && distance < 256;
8155 insn = NEXT_INSN (insn))
8157 if (insn == dest)
8158 return 1;
8159 else
8160 distance += get_attr_length (insn);
8163 return 0;
8166 /* Return nonzero if register old_reg can be renamed to register new_reg. */
8168 sh_hard_regno_rename_ok (unsigned int old_reg ATTRIBUTE_UNUSED,
8169 unsigned int new_reg)
8171 /* Interrupt functions can only use registers that have already been
8172 saved by the prologue, even if they would normally be
8173 call-clobbered. */
8175 if (sh_cfun_interrupt_handler_p () && !regs_ever_live[new_reg])
8176 return 0;
8178 return 1;
8181 /* Function to update the integer COST
8182 based on the relationship between INSN that is dependent on
8183 DEP_INSN through the dependence LINK. The default is to make no
8184 adjustment to COST. This can be used for example to specify to
8185 the scheduler that an output- or anti-dependence does not incur
8186 the same cost as a data-dependence. The return value should be
8187 the new value for COST. */
8188 static int
8189 sh_adjust_cost (rtx insn, rtx link ATTRIBUTE_UNUSED, rtx dep_insn, int cost)
8191 rtx reg, use_pat;
8193 if (TARGET_SHMEDIA)
8195 /* On SHmedia, if the dependence is an anti-dependence or
8196 output-dependence, there is no cost. */
8197 if (REG_NOTE_KIND (link) != 0)
8199 /* However, dependencies between target register loads and
8200 uses of the register in a subsequent block that are separated
8201 by a conditional branch are not modelled - we have to do with
8202 the anti-dependency between the target register load and the
8203 conditional branch that ends the current block. */
8204 if (REG_NOTE_KIND (link) == REG_DEP_ANTI
8205 && GET_CODE (PATTERN (dep_insn)) == SET
8206 && (get_attr_type (dep_insn) == TYPE_PT_MEDIA
8207 || get_attr_type (dep_insn) == TYPE_PTABS_MEDIA)
8208 && get_attr_type (insn) == TYPE_CBRANCH_MEDIA)
8210 int orig_cost = cost;
8211 rtx note = find_reg_note (insn, REG_BR_PROB, 0);
8212 rtx target = ((! note
8213 || INTVAL (XEXP (note, 0)) * 2 < REG_BR_PROB_BASE)
8214 ? insn : JUMP_LABEL (insn));
8215 /* On the likely path, the branch costs 1, on the unlikely path,
8216 it costs 3. */
8217 cost--;
8219 target = next_active_insn (target);
8220 while (target && ! flow_dependent_p (target, dep_insn)
8221 && --cost > 0);
8222 /* If two branches are executed in immediate succession, with the
8223 first branch properly predicted, this causes a stall at the
8224 second branch, hence we won't need the target for the
8225 second branch for two cycles after the launch of the first
8226 branch. */
8227 if (cost > orig_cost - 2)
8228 cost = orig_cost - 2;
8230 else
8231 cost = 0;
8234 else if (get_attr_is_mac_media (insn)
8235 && get_attr_is_mac_media (dep_insn))
8236 cost = 1;
8238 else if (! reload_completed
8239 && GET_CODE (PATTERN (insn)) == SET
8240 && GET_CODE (SET_SRC (PATTERN (insn))) == FLOAT
8241 && GET_CODE (PATTERN (dep_insn)) == SET
8242 && fp_arith_reg_operand (SET_SRC (PATTERN (dep_insn)), VOIDmode)
8243 && cost < 4)
8244 cost = 4;
8245 /* Schedule the ptabs for a casesi_jump_media in preference to stuff
8246 that is needed at the target. */
8247 else if (get_attr_type (insn) == TYPE_JUMP_MEDIA
8248 && ! flow_dependent_p (insn, dep_insn))
8249 cost--;
8251 else if (REG_NOTE_KIND (link) == 0)
8253 enum attr_type dep_type, type;
8255 if (recog_memoized (insn) < 0
8256 || recog_memoized (dep_insn) < 0)
8257 return cost;
8259 dep_type = get_attr_type (dep_insn);
8260 if (dep_type == TYPE_FLOAD || dep_type == TYPE_PCFLOAD)
8261 cost--;
8262 if ((dep_type == TYPE_LOAD_SI || dep_type == TYPE_PCLOAD_SI)
8263 && (type = get_attr_type (insn)) != TYPE_CALL
8264 && type != TYPE_SFUNC)
8265 cost--;
8267 /* The only input for a call that is timing-critical is the
8268 function's address. */
8269 if (GET_CODE(insn) == CALL_INSN)
8271 rtx call = PATTERN (insn);
8273 if (GET_CODE (call) == PARALLEL)
8274 call = XVECEXP (call, 0 ,0);
8275 if (GET_CODE (call) == SET)
8276 call = SET_SRC (call);
8277 if (GET_CODE (call) == CALL && GET_CODE (XEXP (call, 0)) == MEM
8278 /* sibcalli_thunk uses a symbol_ref in an unspec. */
8279 && (GET_CODE (XEXP (XEXP (call, 0), 0)) == UNSPEC
8280 || ! reg_set_p (XEXP (XEXP (call, 0), 0), dep_insn)))
8281 cost = 0;
8283 /* Likewise, the most timing critical input for an sfuncs call
8284 is the function address. However, sfuncs typically start
8285 using their arguments pretty quickly.
8286 Assume a four cycle delay before they are needed. */
8287 /* All sfunc calls are parallels with at least four components.
8288 Exploit this to avoid unnecessary calls to sfunc_uses_reg. */
8289 else if (GET_CODE (PATTERN (insn)) == PARALLEL
8290 && XVECLEN (PATTERN (insn), 0) >= 4
8291 && (reg = sfunc_uses_reg (insn)))
8293 if (! reg_set_p (reg, dep_insn))
8294 cost -= 4;
8296 /* When the preceding instruction loads the shift amount of
8297 the following SHAD/SHLD, the latency of the load is increased
8298 by 1 cycle. */
8299 else if (TARGET_SH4
8300 && get_attr_type (insn) == TYPE_DYN_SHIFT
8301 && get_attr_any_int_load (dep_insn) == ANY_INT_LOAD_YES
8302 && reg_overlap_mentioned_p (SET_DEST (PATTERN (dep_insn)),
8303 XEXP (SET_SRC (single_set (insn)),
8304 1)))
8305 cost++;
8306 /* When an LS group instruction with a latency of less than
8307 3 cycles is followed by a double-precision floating-point
8308 instruction, FIPR, or FTRV, the latency of the first
8309 instruction is increased to 3 cycles. */
8310 else if (cost < 3
8311 && get_attr_insn_class (dep_insn) == INSN_CLASS_LS_GROUP
8312 && get_attr_dfp_comp (insn) == DFP_COMP_YES)
8313 cost = 3;
8314 /* The lsw register of a double-precision computation is ready one
8315 cycle earlier. */
8316 else if (reload_completed
8317 && get_attr_dfp_comp (dep_insn) == DFP_COMP_YES
8318 && (use_pat = single_set (insn))
8319 && ! regno_use_in (REGNO (SET_DEST (single_set (dep_insn))),
8320 SET_SRC (use_pat)))
8321 cost -= 1;
8323 if (get_attr_any_fp_comp (dep_insn) == ANY_FP_COMP_YES
8324 && get_attr_late_fp_use (insn) == LATE_FP_USE_YES)
8325 cost -= 1;
8327 /* An anti-dependence penalty of two applies if the first insn is a double
8328 precision fadd / fsub / fmul. */
8329 else if (REG_NOTE_KIND (link) == REG_DEP_ANTI
8330 && recog_memoized (dep_insn) >= 0
8331 && get_attr_type (dep_insn) == TYPE_DFP_ARITH
8332 /* A lot of alleged anti-flow dependences are fake,
8333 so check this one is real. */
8334 && flow_dependent_p (dep_insn, insn))
8335 cost = 2;
8338 return cost;
8341 /* Check if INSN is flow-dependent on DEP_INSN. Can also be used to check
8342 if DEP_INSN is anti-flow dependent on INSN. */
8343 static int
8344 flow_dependent_p (rtx insn, rtx dep_insn)
8346 rtx tmp = PATTERN (insn);
8348 note_stores (PATTERN (dep_insn), flow_dependent_p_1, &tmp);
8349 return tmp == NULL_RTX;
8352 /* A helper function for flow_dependent_p called through note_stores. */
8353 static void
8354 flow_dependent_p_1 (rtx x, rtx pat ATTRIBUTE_UNUSED, void *data)
8356 rtx * pinsn = (rtx *) data;
8358 if (*pinsn && reg_referenced_p (x, *pinsn))
8359 *pinsn = NULL_RTX;
8362 /* For use by sh_allocate_initial_value. Note that sh.md contains some
8363 'special function' patterns (type sfunc) that clobber pr, but that
8364 do not look like function calls to leaf_function_p. Hence we must
8365 do this extra check. */
8366 static int
8367 sh_pr_n_sets (void)
8369 return REG_N_SETS (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG);
8372 /* Return where to allocate pseudo for a given hard register initial
8373 value. */
8374 static rtx
8375 sh_allocate_initial_value (rtx hard_reg)
8377 rtx x;
8379 if (REGNO (hard_reg) == (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG))
8381 if (current_function_is_leaf
8382 && ! sh_pr_n_sets ()
8383 && ! (TARGET_SHCOMPACT
8384 && ((current_function_args_info.call_cookie
8385 & ~ CALL_COOKIE_RET_TRAMP (1))
8386 || current_function_has_nonlocal_label)))
8387 x = hard_reg;
8388 else
8389 x = gen_rtx_MEM (Pmode, return_address_pointer_rtx);
8391 else
8392 x = NULL_RTX;
8394 return x;
8397 /* This function returns "2" to indicate dual issue for the SH4
8398 processor. To be used by the DFA pipeline description. */
8399 static int
8400 sh_issue_rate (void)
8402 if (TARGET_SUPERSCALAR)
8403 return 2;
8404 else
8405 return 1;
8408 /* Functions for ready queue reordering for sched1. */
8410 /* Get weight for mode for a set x. */
8411 static short
8412 find_set_regmode_weight (rtx x, enum machine_mode mode)
8414 if (GET_CODE (x) == CLOBBER && register_operand (SET_DEST (x), mode))
8415 return 1;
8416 if (GET_CODE (x) == SET && register_operand (SET_DEST (x), mode))
8418 if (GET_CODE (SET_DEST (x)) == REG)
8420 if (!reg_mentioned_p (SET_DEST (x), SET_SRC (x)))
8421 return 1;
8422 else
8423 return 0;
8425 return 1;
8427 return 0;
8430 /* Get regmode weight for insn. */
8431 static short
8432 find_insn_regmode_weight (rtx insn, enum machine_mode mode)
8434 short reg_weight = 0;
8435 rtx x;
8437 /* Increment weight for each register born here. */
8438 x = PATTERN (insn);
8439 reg_weight += find_set_regmode_weight (x, mode);
8440 if (GET_CODE (x) == PARALLEL)
8442 int j;
8443 for (j = XVECLEN (x, 0) - 1; j >= 0; j--)
8445 x = XVECEXP (PATTERN (insn), 0, j);
8446 reg_weight += find_set_regmode_weight (x, mode);
8449 /* Decrement weight for each register that dies here. */
8450 for (x = REG_NOTES (insn); x; x = XEXP (x, 1))
8452 if (REG_NOTE_KIND (x) == REG_DEAD || REG_NOTE_KIND (x) == REG_UNUSED)
8454 rtx note = XEXP (x, 0);
8455 if (GET_CODE (note) == REG && GET_MODE (note) == mode)
8456 reg_weight--;
8459 return reg_weight;
8462 /* Calculate regmode weights for all insns of a basic block. */
8463 static void
8464 find_regmode_weight (int b, enum machine_mode mode)
8466 rtx insn, next_tail, head, tail;
8468 get_block_head_tail (b, &head, &tail);
8469 next_tail = NEXT_INSN (tail);
8471 for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
8473 /* Handle register life information. */
8474 if (!INSN_P (insn))
8475 continue;
8477 if (mode == SFmode)
8478 INSN_REGMODE_WEIGHT (insn, mode) =
8479 find_insn_regmode_weight (insn, mode) + 2 * find_insn_regmode_weight (insn, DFmode);
8480 else if (mode == SImode)
8481 INSN_REGMODE_WEIGHT (insn, mode) =
8482 find_insn_regmode_weight (insn, mode) + 2 * find_insn_regmode_weight (insn, DImode);
8486 /* Comparison function for ready queue sorting. */
8487 static int
8488 rank_for_reorder (const void *x, const void *y)
8490 rtx tmp = *(const rtx *) y;
8491 rtx tmp2 = *(const rtx *) x;
8493 /* The insn in a schedule group should be issued the first. */
8494 if (SCHED_GROUP_P (tmp) != SCHED_GROUP_P (tmp2))
8495 return SCHED_GROUP_P (tmp2) ? 1 : -1;
8497 /* If insns are equally good, sort by INSN_LUID (original insn order), This
8498 minimizes instruction movement, thus minimizing sched's effect on
8499 register pressure. */
8500 return INSN_LUID (tmp) - INSN_LUID (tmp2);
8503 /* Resort the array A in which only element at index N may be out of order. */
8504 static void
8505 swap_reorder (rtx *a, int n)
8507 rtx insn = a[n - 1];
8508 int i = n - 2;
8510 while (i >= 0 && rank_for_reorder (a + i, &insn) >= 0)
8512 a[i + 1] = a[i];
8513 i -= 1;
8515 a[i + 1] = insn;
8518 #define SCHED_REORDER(READY, N_READY) \
8519 do \
8521 if ((N_READY) == 2) \
8522 swap_reorder (READY, N_READY); \
8523 else if ((N_READY) > 2) \
8524 qsort (READY, N_READY, sizeof (rtx), rank_for_reorder); \
8526 while (0)
8528 /* Sort the ready list READY by ascending priority, using the SCHED_REORDER
8529 macro. */
8530 static void
8531 ready_reorder (rtx *ready, int nready)
8533 SCHED_REORDER (ready, nready);
8536 /* Calculate regmode weights for all insns of all basic block. */
8537 static void
8538 sh_md_init_global (FILE *dump ATTRIBUTE_UNUSED,
8539 int verbose ATTRIBUTE_UNUSED,
8540 int old_max_uid)
8542 basic_block b;
8544 regmode_weight[0] = (short *) xcalloc (old_max_uid, sizeof (short));
8545 regmode_weight[1] = (short *) xcalloc (old_max_uid, sizeof (short));
8547 FOR_EACH_BB_REVERSE (b)
8549 find_regmode_weight (b->index, SImode);
8550 find_regmode_weight (b->index, SFmode);
8553 CURR_REGMODE_PRESSURE (SImode) = 0;
8554 CURR_REGMODE_PRESSURE (SFmode) = 0;
8558 /* Cleanup. */
8559 static void
8560 sh_md_finish_global (FILE *dump ATTRIBUTE_UNUSED,
8561 int verbose ATTRIBUTE_UNUSED)
8563 if (regmode_weight[0])
8565 free (regmode_weight[0]);
8566 regmode_weight[0] = NULL;
8568 if (regmode_weight[1])
8570 free (regmode_weight[1]);
8571 regmode_weight[1] = NULL;
8575 /* Cache the can_issue_more so that we can return it from reorder2. Also,
8576 keep count of register pressures on SImode and SFmode. */
8577 static int
8578 sh_variable_issue (FILE *dump ATTRIBUTE_UNUSED,
8579 int sched_verbose ATTRIBUTE_UNUSED,
8580 rtx insn,
8581 int can_issue_more)
8583 if (GET_CODE (PATTERN (insn)) != USE
8584 && GET_CODE (PATTERN (insn)) != CLOBBER)
8585 cached_can_issue_more = can_issue_more - 1;
8586 else
8587 cached_can_issue_more = can_issue_more;
8589 if (reload_completed)
8590 return cached_can_issue_more;
8592 CURR_REGMODE_PRESSURE (SImode) += INSN_REGMODE_WEIGHT (insn, SImode);
8593 CURR_REGMODE_PRESSURE (SFmode) += INSN_REGMODE_WEIGHT (insn, SFmode);
8595 return cached_can_issue_more;
8598 static void
8599 sh_md_init (FILE *dump ATTRIBUTE_UNUSED,
8600 int verbose ATTRIBUTE_UNUSED,
8601 int veclen ATTRIBUTE_UNUSED)
8603 CURR_REGMODE_PRESSURE (SImode) = 0;
8604 CURR_REGMODE_PRESSURE (SFmode) = 0;
8607 /* Some magic numbers. */
8608 /* Pressure on register r0 can lead to spill failures. so avoid sched1 for
8609 functions that already have high pressure on r0. */
8610 #define R0_MAX_LIFE_REGIONS 2
8611 #define R0_MAX_LIVE_LENGTH 12
8612 /* Register Pressure thresholds for SImode and SFmode registers. */
8613 #define SIMODE_MAX_WEIGHT 5
8614 #define SFMODE_MAX_WEIGHT 10
8616 /* Return true if the pressure is high for MODE. */
8617 static short
8618 high_pressure (enum machine_mode mode)
8620 /* Pressure on register r0 can lead to spill failures. so avoid sched1 for
8621 functions that already have high pressure on r0. */
8622 if ((REG_N_SETS (0) - REG_N_DEATHS (0)) >= R0_MAX_LIFE_REGIONS
8623 && REG_LIVE_LENGTH (0) >= R0_MAX_LIVE_LENGTH)
8624 return 1;
8626 if (mode == SFmode)
8627 return (CURR_REGMODE_PRESSURE (SFmode) > SFMODE_MAX_WEIGHT);
8628 else
8629 return (CURR_REGMODE_PRESSURE (SImode) > SIMODE_MAX_WEIGHT);
8632 /* Reorder ready queue if register pressure is high. */
8633 static int
8634 sh_reorder (FILE *dump ATTRIBUTE_UNUSED,
8635 int sched_verbose ATTRIBUTE_UNUSED,
8636 rtx *ready,
8637 int *n_readyp,
8638 int clock_var ATTRIBUTE_UNUSED)
8640 if (reload_completed)
8641 return sh_issue_rate ();
8643 if (high_pressure (SFmode) || high_pressure (SImode))
8645 ready_reorder (ready, *n_readyp);
8648 return sh_issue_rate ();
8651 /* Skip cycles if the current register pressure is high. */
8652 static int
8653 sh_reorder2 (FILE *dump ATTRIBUTE_UNUSED,
8654 int sched_verbose ATTRIBUTE_UNUSED,
8655 rtx *ready ATTRIBUTE_UNUSED,
8656 int *n_readyp ATTRIBUTE_UNUSED,
8657 int clock_var ATTRIBUTE_UNUSED)
8659 if (reload_completed)
8660 return cached_can_issue_more;
8662 if (high_pressure(SFmode) || high_pressure (SImode))
8663 skip_cycles = 1;
8665 return cached_can_issue_more;
8668 /* Skip cycles without sorting the ready queue. This will move insn from
8669 Q->R. If this is the last cycle we are skipping; allow sorting of ready
8670 queue by sh_reorder. */
8672 /* Generally, skipping these many cycles are sufficient for all insns to move
8673 from Q -> R. */
8674 #define MAX_SKIPS 8
8676 static int
8677 sh_dfa_new_cycle (FILE *sched_dump ATTRIBUTE_UNUSED,
8678 int sched_verbose ATTRIBUTE_UNUSED,
8679 rtx insn ATTRIBUTE_UNUSED,
8680 int last_clock_var,
8681 int clock_var,
8682 int *sort_p)
8684 if (reload_completed)
8685 return 0;
8687 if (skip_cycles)
8689 if ((clock_var - last_clock_var) < MAX_SKIPS)
8691 *sort_p = 0;
8692 return 1;
8694 /* If this is the last cycle we are skipping, allow reordering of R. */
8695 if ((clock_var - last_clock_var) == MAX_SKIPS)
8697 *sort_p = 1;
8698 return 1;
8702 skip_cycles = 0;
8704 return 0;
8707 /* SHmedia requires registers for branches, so we can't generate new
8708 branches past reload. */
8709 static bool
8710 sh_cannot_modify_jumps_p (void)
8712 return (TARGET_SHMEDIA && (reload_in_progress || reload_completed));
8715 static int
8716 sh_target_reg_class (void)
8718 return TARGET_SHMEDIA ? TARGET_REGS : NO_REGS;
8721 static bool
8722 sh_optimize_target_register_callee_saved (bool after_prologue_epilogue_gen)
8724 HARD_REG_SET dummy;
8725 rtx insn;
8727 if (! shmedia_space_reserved_for_target_registers)
8728 return 0;
8729 if (after_prologue_epilogue_gen && ! TARGET_SAVE_ALL_TARGET_REGS)
8730 return 0;
8731 if (calc_live_regs (&dummy) >= 6 * 8)
8732 return 1;
8733 /* This is a borderline case. See if we got a nested loop, or a loop
8734 with a call, or with more than 4 labels inside. */
8735 for (insn = get_insns(); insn; insn = NEXT_INSN (insn))
8737 if (GET_CODE (insn) == NOTE
8738 && NOTE_LINE_NUMBER (insn) == NOTE_INSN_LOOP_BEG)
8740 int labels = 0;
8744 insn = NEXT_INSN (insn);
8745 if ((GET_CODE (insn) == NOTE
8746 && NOTE_LINE_NUMBER (insn) == NOTE_INSN_LOOP_BEG)
8747 || GET_CODE (insn) == CALL_INSN
8748 || (GET_CODE (insn) == CODE_LABEL && ++labels > 4))
8749 return 1;
8751 while (GET_CODE (insn) != NOTE
8752 || NOTE_LINE_NUMBER (insn) != NOTE_INSN_LOOP_END);
8755 return 0;
8758 static bool
8759 sh_ms_bitfield_layout_p (tree record_type ATTRIBUTE_UNUSED)
8761 return (TARGET_SH5 || TARGET_HITACHI || sh_attr_renesas_p (record_type));
8765 On the SH1..SH4, the trampoline looks like
8766 2 0002 D202 mov.l l2,r2
8767 1 0000 D301 mov.l l1,r3
8768 3 0004 422B jmp @r2
8769 4 0006 0009 nop
8770 5 0008 00000000 l1: .long area
8771 6 000c 00000000 l2: .long function
8773 SH5 (compact) uses r1 instead of r3 for the static chain. */
8776 /* Emit RTL insns to initialize the variable parts of a trampoline.
8777 FNADDR is an RTX for the address of the function's pure code.
8778 CXT is an RTX for the static chain value for the function. */
8780 void
8781 sh_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
8783 if (TARGET_SHMEDIA64)
8785 rtx tramp_templ;
8786 int fixed_len;
8788 rtx movi1 = GEN_INT (0xcc000010);
8789 rtx shori1 = GEN_INT (0xc8000010);
8790 rtx src, dst;
8792 /* The following trampoline works within a +- 128 KB range for cxt:
8793 ptb/u cxt,tr1; movi fnaddr >> 48,r0; shori fnaddr >> 32,r0;
8794 shori fnaddr >> 16,r0; shori fnaddr,r0; ptabs/l r0,tr0
8795 gettr tr1,r1; blink tr0,r63 */
8796 /* Address rounding makes it hard to compute the exact bounds of the
8797 offset for this trampoline, but we have a rather generous offset
8798 range, so frame_offset should do fine as an upper bound. */
8799 if (cxt == virtual_stack_vars_rtx && frame_offset < 0x20000)
8801 /* ??? could optimize this trampoline initialization
8802 by writing DImode words with two insns each. */
8803 rtx mask = force_reg (DImode, GEN_INT (0x3fffc00));
8804 rtx insn = gen_rtx_MINUS (DImode, cxt, tramp);
8805 insn = gen_rtx_ASHIFT (DImode, insn, GEN_INT (10-2));
8806 insn = gen_rtx_AND (DImode, insn, mask);
8807 /* Or in ptb/u .,tr1 pattern */
8808 insn = gen_rtx_IOR (DImode, insn, gen_int_mode (0xec000010, SImode));
8809 insn = force_operand (insn, NULL_RTX);
8810 insn = gen_lowpart (SImode, insn);
8811 emit_move_insn (gen_rtx_MEM (SImode, tramp), insn);
8812 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (38));
8813 insn = gen_rtx_AND (DImode, insn, mask);
8814 insn = force_operand (gen_rtx_IOR (DImode, movi1, insn), NULL_RTX);
8815 insn = gen_lowpart (SImode, insn);
8816 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 4)), insn);
8817 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (22));
8818 insn = gen_rtx_AND (DImode, insn, mask);
8819 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
8820 insn = gen_lowpart (SImode, insn);
8821 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 8)), insn);
8822 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (6));
8823 insn = gen_rtx_AND (DImode, insn, mask);
8824 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
8825 insn = gen_lowpart (SImode, insn);
8826 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 12)),
8827 insn);
8828 insn = gen_rtx_ASHIFT (DImode, fnaddr, GEN_INT (10));
8829 insn = gen_rtx_AND (DImode, insn, mask);
8830 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
8831 insn = gen_lowpart (SImode, insn);
8832 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 16)),
8833 insn);
8834 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 20)),
8835 GEN_INT (0x6bf10600));
8836 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 24)),
8837 GEN_INT (0x4415fc10));
8838 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 28)),
8839 GEN_INT (0x4401fff0));
8840 emit_insn (gen_ic_invalidate_line (tramp));
8841 return;
8843 tramp_templ = gen_rtx_SYMBOL_REF (Pmode,"__GCC_nested_trampoline");
8844 fixed_len = TRAMPOLINE_SIZE - 2 * GET_MODE_SIZE (Pmode);
8846 tramp_templ = gen_datalabel_ref (tramp_templ);
8847 dst = gen_rtx_MEM (BLKmode, tramp);
8848 src = gen_rtx_MEM (BLKmode, tramp_templ);
8849 set_mem_align (dst, 256);
8850 set_mem_align (src, 64);
8851 emit_block_move (dst, src, GEN_INT (fixed_len), BLOCK_OP_NORMAL);
8853 emit_move_insn (gen_rtx_MEM (Pmode, plus_constant (tramp, fixed_len)),
8854 fnaddr);
8855 emit_move_insn (gen_rtx_MEM (Pmode,
8856 plus_constant (tramp,
8857 fixed_len
8858 + GET_MODE_SIZE (Pmode))),
8859 cxt);
8860 emit_insn (gen_ic_invalidate_line (tramp));
8861 return;
8863 else if (TARGET_SHMEDIA)
8865 /* movi fnaddr >> 16,r1; shori fnaddr,r1; ptabs/l r1,tr0
8866 movi cxt >> 16,r1; shori cxt,r1; blink tr0,r63 */
8867 rtx quad0 = gen_reg_rtx (DImode), cxtload = gen_reg_rtx (DImode);
8868 rtx quad1 = gen_reg_rtx (DImode), quad2 = gen_reg_rtx (DImode);
8869 /* movi 0,r1: 0xcc000010 shori 0,r1: c8000010 concatenated,
8870 rotated 10 right, and higher 16 bit of every 32 selected. */
8871 rtx movishori
8872 = force_reg (V2HImode, (simplify_gen_subreg
8873 (V2HImode, GEN_INT (0x4330432), SImode, 0)));
8874 rtx ptabs = force_reg (DImode, GEN_INT (0x6bf10600));
8875 rtx blink = force_reg (DImode, GEN_INT (0x4401fff0));
8877 tramp = force_reg (Pmode, tramp);
8878 fnaddr = force_reg (SImode, fnaddr);
8879 cxt = force_reg (SImode, cxt);
8880 emit_insn (gen_mshflo_w_x (gen_rtx_SUBREG (V4HImode, quad0, 0),
8881 gen_rtx_SUBREG (V2HImode, fnaddr, 0),
8882 movishori));
8883 emit_insn (gen_rotrdi3_mextr (quad0, quad0,
8884 GEN_INT (TARGET_LITTLE_ENDIAN ? 24 : 56)));
8885 emit_insn (gen_ashldi3_media (quad0, quad0, const2_rtx));
8886 emit_move_insn (gen_rtx_MEM (DImode, tramp), quad0);
8887 emit_insn (gen_mshflo_w_x (gen_rtx_SUBREG (V4HImode, cxtload, 0),
8888 gen_rtx_SUBREG (V2HImode, cxt, 0),
8889 movishori));
8890 emit_insn (gen_rotrdi3_mextr (cxtload, cxtload,
8891 GEN_INT (TARGET_LITTLE_ENDIAN ? 24 : 56)));
8892 emit_insn (gen_ashldi3_media (cxtload, cxtload, const2_rtx));
8893 if (TARGET_LITTLE_ENDIAN)
8895 emit_insn (gen_mshflo_l_di (quad1, ptabs, cxtload));
8896 emit_insn (gen_mextr4 (quad2, cxtload, blink));
8898 else
8900 emit_insn (gen_mextr4 (quad1, cxtload, ptabs));
8901 emit_insn (gen_mshflo_l_di (quad2, blink, cxtload));
8903 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, 8)), quad1);
8904 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, 16)), quad2);
8905 emit_insn (gen_ic_invalidate_line (tramp));
8906 return;
8908 else if (TARGET_SHCOMPACT)
8910 emit_insn (gen_initialize_trampoline (tramp, cxt, fnaddr));
8911 return;
8913 emit_move_insn (gen_rtx_MEM (SImode, tramp),
8914 gen_int_mode (TARGET_LITTLE_ENDIAN ? 0xd301d202 : 0xd202d301,
8915 SImode));
8916 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 4)),
8917 gen_int_mode (TARGET_LITTLE_ENDIAN ? 0x0009422b : 0x422b0009,
8918 SImode));
8919 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 8)),
8920 cxt);
8921 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 12)),
8922 fnaddr);
8923 if (TARGET_HARVARD)
8925 if (TARGET_USERMODE)
8926 emit_library_call (function_symbol (NULL, "__ic_invalidate",
8927 FUNCTION_ORDINARY),
8928 0, VOIDmode, 1, tramp, SImode);
8929 else
8930 emit_insn (gen_ic_invalidate_line (tramp));
8934 /* FIXME: This is overly conservative. A SHcompact function that
8935 receives arguments ``by reference'' will have them stored in its
8936 own stack frame, so it must not pass pointers or references to
8937 these arguments to other functions by means of sibling calls. */
8938 /* If PIC, we cannot make sibling calls to global functions
8939 because the PLT requires r12 to be live. */
8940 static bool
8941 sh_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
8943 return (1
8944 && (! TARGET_SHCOMPACT
8945 || current_function_args_info.stack_regs == 0)
8946 && ! sh_cfun_interrupt_handler_p ()
8947 && (! flag_pic
8948 || (decl && ! TREE_PUBLIC (decl))
8949 || (decl && DECL_VISIBILITY (decl) != VISIBILITY_DEFAULT)));
8952 /* Machine specific built-in functions. */
8954 struct builtin_description
8956 const enum insn_code icode;
8957 const char *const name;
8958 int signature;
8961 /* describe number and signedness of arguments; arg[0] == result
8962 (1: unsigned, 2: signed, 4: don't care, 8: pointer 0: no argument */
8963 /* 9: 64 bit pointer, 10: 32 bit pointer */
8964 static const char signature_args[][4] =
8966 #define SH_BLTIN_V2SI2 0
8967 { 4, 4 },
8968 #define SH_BLTIN_V4HI2 1
8969 { 4, 4 },
8970 #define SH_BLTIN_V2SI3 2
8971 { 4, 4, 4 },
8972 #define SH_BLTIN_V4HI3 3
8973 { 4, 4, 4 },
8974 #define SH_BLTIN_V8QI3 4
8975 { 4, 4, 4 },
8976 #define SH_BLTIN_MAC_HISI 5
8977 { 1, 4, 4, 1 },
8978 #define SH_BLTIN_SH_HI 6
8979 { 4, 4, 1 },
8980 #define SH_BLTIN_SH_SI 7
8981 { 4, 4, 1 },
8982 #define SH_BLTIN_V4HI2V2SI 8
8983 { 4, 4, 4 },
8984 #define SH_BLTIN_V4HI2V8QI 9
8985 { 4, 4, 4 },
8986 #define SH_BLTIN_SISF 10
8987 { 4, 2 },
8988 #define SH_BLTIN_LDUA_L 11
8989 { 2, 10 },
8990 #define SH_BLTIN_LDUA_Q 12
8991 { 1, 10 },
8992 #define SH_BLTIN_STUA_L 13
8993 { 0, 10, 2 },
8994 #define SH_BLTIN_STUA_Q 14
8995 { 0, 10, 1 },
8996 #define SH_BLTIN_LDUA_L64 15
8997 { 2, 9 },
8998 #define SH_BLTIN_LDUA_Q64 16
8999 { 1, 9 },
9000 #define SH_BLTIN_STUA_L64 17
9001 { 0, 9, 2 },
9002 #define SH_BLTIN_STUA_Q64 18
9003 { 0, 9, 1 },
9004 #define SH_BLTIN_NUM_SHARED_SIGNATURES 19
9005 #define SH_BLTIN_2 19
9006 #define SH_BLTIN_SU 19
9007 { 1, 2 },
9008 #define SH_BLTIN_3 20
9009 #define SH_BLTIN_SUS 20
9010 { 2, 2, 1 },
9011 #define SH_BLTIN_PSSV 21
9012 { 0, 8, 2, 2 },
9013 #define SH_BLTIN_XXUU 22
9014 #define SH_BLTIN_UUUU 22
9015 { 1, 1, 1, 1 },
9016 #define SH_BLTIN_PV 23
9017 { 0, 8 },
9019 /* mcmv: operands considered unsigned. */
9020 /* mmulsum_wq, msad_ubq: result considered unsigned long long. */
9021 /* mperm: control value considered unsigned int. */
9022 /* mshalds, mshard, mshards, mshlld, mshlrd: shift count is unsigned int. */
9023 /* mshards_q: returns signed short. */
9024 /* nsb: takes long long arg, returns unsigned char. */
9025 static const struct builtin_description bdesc[] =
9027 { CODE_FOR_absv2si2, "__builtin_absv2si2", SH_BLTIN_V2SI2 },
9028 { CODE_FOR_absv4hi2, "__builtin_absv4hi2", SH_BLTIN_V4HI2 },
9029 { CODE_FOR_addv2si3, "__builtin_addv2si3", SH_BLTIN_V2SI3 },
9030 { CODE_FOR_addv4hi3, "__builtin_addv4hi3", SH_BLTIN_V4HI3 },
9031 { CODE_FOR_ssaddv2si3,"__builtin_ssaddv2si3", SH_BLTIN_V2SI3 },
9032 { CODE_FOR_usaddv8qi3,"__builtin_usaddv8qi3", SH_BLTIN_V8QI3 },
9033 { CODE_FOR_ssaddv4hi3,"__builtin_ssaddv4hi3", SH_BLTIN_V4HI3 },
9034 { CODE_FOR_alloco_i, "__builtin_sh_media_ALLOCO", SH_BLTIN_PV },
9035 { CODE_FOR_negcmpeqv8qi,"__builtin_sh_media_MCMPEQ_B", SH_BLTIN_V8QI3 },
9036 { CODE_FOR_negcmpeqv2si,"__builtin_sh_media_MCMPEQ_L", SH_BLTIN_V2SI3 },
9037 { CODE_FOR_negcmpeqv4hi,"__builtin_sh_media_MCMPEQ_W", SH_BLTIN_V4HI3 },
9038 { CODE_FOR_negcmpgtuv8qi,"__builtin_sh_media_MCMPGT_UB", SH_BLTIN_V8QI3 },
9039 { CODE_FOR_negcmpgtv2si,"__builtin_sh_media_MCMPGT_L", SH_BLTIN_V2SI3 },
9040 { CODE_FOR_negcmpgtv4hi,"__builtin_sh_media_MCMPGT_W", SH_BLTIN_V4HI3 },
9041 { CODE_FOR_mcmv, "__builtin_sh_media_MCMV", SH_BLTIN_UUUU },
9042 { CODE_FOR_mcnvs_lw, "__builtin_sh_media_MCNVS_LW", SH_BLTIN_3 },
9043 { CODE_FOR_mcnvs_wb, "__builtin_sh_media_MCNVS_WB", SH_BLTIN_V4HI2V8QI },
9044 { CODE_FOR_mcnvs_wub, "__builtin_sh_media_MCNVS_WUB", SH_BLTIN_V4HI2V8QI },
9045 { CODE_FOR_mextr1, "__builtin_sh_media_MEXTR1", SH_BLTIN_V8QI3 },
9046 { CODE_FOR_mextr2, "__builtin_sh_media_MEXTR2", SH_BLTIN_V8QI3 },
9047 { CODE_FOR_mextr3, "__builtin_sh_media_MEXTR3", SH_BLTIN_V8QI3 },
9048 { CODE_FOR_mextr4, "__builtin_sh_media_MEXTR4", SH_BLTIN_V8QI3 },
9049 { CODE_FOR_mextr5, "__builtin_sh_media_MEXTR5", SH_BLTIN_V8QI3 },
9050 { CODE_FOR_mextr6, "__builtin_sh_media_MEXTR6", SH_BLTIN_V8QI3 },
9051 { CODE_FOR_mextr7, "__builtin_sh_media_MEXTR7", SH_BLTIN_V8QI3 },
9052 { CODE_FOR_mmacfx_wl, "__builtin_sh_media_MMACFX_WL", SH_BLTIN_MAC_HISI },
9053 { CODE_FOR_mmacnfx_wl,"__builtin_sh_media_MMACNFX_WL", SH_BLTIN_MAC_HISI },
9054 { CODE_FOR_mulv2si3, "__builtin_mulv2si3", SH_BLTIN_V2SI3, },
9055 { CODE_FOR_mulv4hi3, "__builtin_mulv4hi3", SH_BLTIN_V4HI3 },
9056 { CODE_FOR_mmulfx_l, "__builtin_sh_media_MMULFX_L", SH_BLTIN_V2SI3 },
9057 { CODE_FOR_mmulfx_w, "__builtin_sh_media_MMULFX_W", SH_BLTIN_V4HI3 },
9058 { CODE_FOR_mmulfxrp_w,"__builtin_sh_media_MMULFXRP_W", SH_BLTIN_V4HI3 },
9059 { CODE_FOR_mmulhi_wl, "__builtin_sh_media_MMULHI_WL", SH_BLTIN_V4HI2V2SI },
9060 { CODE_FOR_mmullo_wl, "__builtin_sh_media_MMULLO_WL", SH_BLTIN_V4HI2V2SI },
9061 { CODE_FOR_mmulsum_wq,"__builtin_sh_media_MMULSUM_WQ", SH_BLTIN_XXUU },
9062 { CODE_FOR_mperm_w, "__builtin_sh_media_MPERM_W", SH_BLTIN_SH_HI },
9063 { CODE_FOR_msad_ubq, "__builtin_sh_media_MSAD_UBQ", SH_BLTIN_XXUU },
9064 { CODE_FOR_mshalds_l, "__builtin_sh_media_MSHALDS_L", SH_BLTIN_SH_SI },
9065 { CODE_FOR_mshalds_w, "__builtin_sh_media_MSHALDS_W", SH_BLTIN_SH_HI },
9066 { CODE_FOR_ashrv2si3, "__builtin_ashrv2si3", SH_BLTIN_SH_SI },
9067 { CODE_FOR_ashrv4hi3, "__builtin_ashrv4hi3", SH_BLTIN_SH_HI },
9068 { CODE_FOR_mshards_q, "__builtin_sh_media_MSHARDS_Q", SH_BLTIN_SUS },
9069 { CODE_FOR_mshfhi_b, "__builtin_sh_media_MSHFHI_B", SH_BLTIN_V8QI3 },
9070 { CODE_FOR_mshfhi_l, "__builtin_sh_media_MSHFHI_L", SH_BLTIN_V2SI3 },
9071 { CODE_FOR_mshfhi_w, "__builtin_sh_media_MSHFHI_W", SH_BLTIN_V4HI3 },
9072 { CODE_FOR_mshflo_b, "__builtin_sh_media_MSHFLO_B", SH_BLTIN_V8QI3 },
9073 { CODE_FOR_mshflo_l, "__builtin_sh_media_MSHFLO_L", SH_BLTIN_V2SI3 },
9074 { CODE_FOR_mshflo_w, "__builtin_sh_media_MSHFLO_W", SH_BLTIN_V4HI3 },
9075 { CODE_FOR_ashlv2si3, "__builtin_ashlv2si3", SH_BLTIN_SH_SI },
9076 { CODE_FOR_ashlv4hi3, "__builtin_ashlv4hi3", SH_BLTIN_SH_HI },
9077 { CODE_FOR_lshrv2si3, "__builtin_lshrv2si3", SH_BLTIN_SH_SI },
9078 { CODE_FOR_lshrv4hi3, "__builtin_lshrv4hi3", SH_BLTIN_SH_HI },
9079 { CODE_FOR_subv2si3, "__builtin_subv2si3", SH_BLTIN_V2SI3 },
9080 { CODE_FOR_subv4hi3, "__builtin_subv4hi3", SH_BLTIN_V4HI3 },
9081 { CODE_FOR_sssubv2si3,"__builtin_sssubv2si3", SH_BLTIN_V2SI3 },
9082 { CODE_FOR_ussubv8qi3,"__builtin_ussubv8qi3", SH_BLTIN_V8QI3 },
9083 { CODE_FOR_sssubv4hi3,"__builtin_sssubv4hi3", SH_BLTIN_V4HI3 },
9084 { CODE_FOR_fcosa_s, "__builtin_sh_media_FCOSA_S", SH_BLTIN_SISF },
9085 { CODE_FOR_fsina_s, "__builtin_sh_media_FSINA_S", SH_BLTIN_SISF },
9086 { CODE_FOR_fipr, "__builtin_sh_media_FIPR_S", SH_BLTIN_3 },
9087 { CODE_FOR_ftrv, "__builtin_sh_media_FTRV_S", SH_BLTIN_3 },
9088 { CODE_FOR_mac_media, "__builtin_sh_media_FMAC_S", SH_BLTIN_3 },
9089 { CODE_FOR_sqrtdf2, "__builtin_sh_media_FSQRT_D", SH_BLTIN_2 },
9090 { CODE_FOR_sqrtsf2, "__builtin_sh_media_FSQRT_S", SH_BLTIN_2 },
9091 { CODE_FOR_fsrra_s, "__builtin_sh_media_FSRRA_S", SH_BLTIN_2 },
9092 { CODE_FOR_ldhi_l, "__builtin_sh_media_LDHI_L", SH_BLTIN_LDUA_L },
9093 { CODE_FOR_ldhi_q, "__builtin_sh_media_LDHI_Q", SH_BLTIN_LDUA_Q },
9094 { CODE_FOR_ldlo_l, "__builtin_sh_media_LDLO_L", SH_BLTIN_LDUA_L },
9095 { CODE_FOR_ldlo_q, "__builtin_sh_media_LDLO_Q", SH_BLTIN_LDUA_Q },
9096 { CODE_FOR_sthi_l, "__builtin_sh_media_STHI_L", SH_BLTIN_STUA_L },
9097 { CODE_FOR_sthi_q, "__builtin_sh_media_STHI_Q", SH_BLTIN_STUA_Q },
9098 { CODE_FOR_stlo_l, "__builtin_sh_media_STLO_L", SH_BLTIN_STUA_L },
9099 { CODE_FOR_stlo_q, "__builtin_sh_media_STLO_Q", SH_BLTIN_STUA_Q },
9100 { CODE_FOR_ldhi_l64, "__builtin_sh_media_LDHI_L", SH_BLTIN_LDUA_L64 },
9101 { CODE_FOR_ldhi_q64, "__builtin_sh_media_LDHI_Q", SH_BLTIN_LDUA_Q64 },
9102 { CODE_FOR_ldlo_l64, "__builtin_sh_media_LDLO_L", SH_BLTIN_LDUA_L64 },
9103 { CODE_FOR_ldlo_q64, "__builtin_sh_media_LDLO_Q", SH_BLTIN_LDUA_Q64 },
9104 { CODE_FOR_sthi_l64, "__builtin_sh_media_STHI_L", SH_BLTIN_STUA_L64 },
9105 { CODE_FOR_sthi_q64, "__builtin_sh_media_STHI_Q", SH_BLTIN_STUA_Q64 },
9106 { CODE_FOR_stlo_l64, "__builtin_sh_media_STLO_L", SH_BLTIN_STUA_L64 },
9107 { CODE_FOR_stlo_q64, "__builtin_sh_media_STLO_Q", SH_BLTIN_STUA_Q64 },
9108 { CODE_FOR_nsb, "__builtin_sh_media_NSB", SH_BLTIN_SU },
9109 { CODE_FOR_byterev, "__builtin_sh_media_BYTEREV", SH_BLTIN_2 },
9110 { CODE_FOR_prefetch, "__builtin_sh_media_PREFO", SH_BLTIN_PSSV },
9113 static void
9114 sh_media_init_builtins (void)
9116 tree shared[SH_BLTIN_NUM_SHARED_SIGNATURES];
9117 const struct builtin_description *d;
9119 memset (shared, 0, sizeof shared);
9120 for (d = bdesc; d - bdesc < (int) ARRAY_SIZE (bdesc); d++)
9122 tree type, arg_type = 0;
9123 int signature = d->signature;
9124 int i;
9126 if (signature < SH_BLTIN_NUM_SHARED_SIGNATURES && shared[signature])
9127 type = shared[signature];
9128 else
9130 int has_result = signature_args[signature][0] != 0;
9132 if ((signature_args[signature][1] & 8)
9133 && (((signature_args[signature][1] & 1) && TARGET_SHMEDIA32)
9134 || ((signature_args[signature][1] & 2) && TARGET_SHMEDIA64)))
9135 continue;
9136 if (! TARGET_FPU_ANY
9137 && FLOAT_MODE_P (insn_data[d->icode].operand[0].mode))
9138 continue;
9139 type = void_list_node;
9140 for (i = 3; ; i--)
9142 int arg = signature_args[signature][i];
9143 int opno = i - 1 + has_result;
9145 if (arg & 8)
9146 arg_type = ptr_type_node;
9147 else if (arg)
9148 arg_type = (*lang_hooks.types.type_for_mode)
9149 (insn_data[d->icode].operand[opno].mode,
9150 (arg & 1));
9151 else if (i)
9152 continue;
9153 else
9154 arg_type = void_type_node;
9155 if (i == 0)
9156 break;
9157 type = tree_cons (NULL_TREE, arg_type, type);
9159 type = build_function_type (arg_type, type);
9160 if (signature < SH_BLTIN_NUM_SHARED_SIGNATURES)
9161 shared[signature] = type;
9163 lang_hooks.builtin_function (d->name, type, d - bdesc, BUILT_IN_MD,
9164 NULL, NULL_TREE);
9168 /* Implements target hook vector_mode_supported_p. */
9169 bool
9170 sh_vector_mode_supported_p (enum machine_mode mode)
9172 if (TARGET_FPU_ANY
9173 && ((mode == V2SFmode)
9174 || (mode == V4SFmode)
9175 || (mode == V16SFmode)))
9176 return true;
9178 else if (TARGET_SHMEDIA
9179 && ((mode == V8QImode)
9180 || (mode == V2HImode)
9181 || (mode == V4HImode)
9182 || (mode == V2SImode)))
9183 return true;
9185 return false;
9188 /* Implements target hook dwarf_calling_convention. Return an enum
9189 of dwarf_calling_convention. */
9191 sh_dwarf_calling_convention (tree func)
9193 if (sh_attr_renesas_p (func))
9194 return DW_CC_GNU_renesas_sh;
9196 return DW_CC_normal;
9199 static void
9200 sh_init_builtins (void)
9202 if (TARGET_SHMEDIA)
9203 sh_media_init_builtins ();
9206 /* Expand an expression EXP that calls a built-in function,
9207 with result going to TARGET if that's convenient
9208 (and in mode MODE if that's convenient).
9209 SUBTARGET may be used as the target for computing one of EXP's operands.
9210 IGNORE is nonzero if the value is to be ignored. */
9212 static rtx
9213 sh_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
9214 enum machine_mode mode ATTRIBUTE_UNUSED, int ignore)
9216 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
9217 tree arglist = TREE_OPERAND (exp, 1);
9218 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
9219 const struct builtin_description *d = &bdesc[fcode];
9220 enum insn_code icode = d->icode;
9221 int signature = d->signature;
9222 enum machine_mode tmode = VOIDmode;
9223 int nop = 0, i;
9224 rtx op[4];
9225 rtx pat = 0;
9227 if (signature_args[signature][0])
9229 if (ignore)
9230 return 0;
9232 tmode = insn_data[icode].operand[0].mode;
9233 if (! target
9234 || GET_MODE (target) != tmode
9235 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
9236 target = gen_reg_rtx (tmode);
9237 op[nop++] = target;
9239 else
9240 target = 0;
9242 for (i = 1; i <= 3; i++, nop++)
9244 tree arg;
9245 enum machine_mode opmode, argmode;
9246 tree optype;
9248 if (! signature_args[signature][i])
9249 break;
9250 arg = TREE_VALUE (arglist);
9251 if (arg == error_mark_node)
9252 return const0_rtx;
9253 arglist = TREE_CHAIN (arglist);
9254 if (signature_args[signature][i] & 8)
9256 opmode = ptr_mode;
9257 optype = ptr_type_node;
9259 else
9261 opmode = insn_data[icode].operand[nop].mode;
9262 optype = (*lang_hooks.types.type_for_mode) (opmode, 0);
9264 argmode = TYPE_MODE (TREE_TYPE (arg));
9265 if (argmode != opmode)
9266 arg = build1 (NOP_EXPR, optype, arg);
9267 op[nop] = expand_expr (arg, NULL_RTX, opmode, 0);
9268 if (! (*insn_data[icode].operand[nop].predicate) (op[nop], opmode))
9269 op[nop] = copy_to_mode_reg (opmode, op[nop]);
9272 switch (nop)
9274 case 1:
9275 pat = (*insn_data[d->icode].genfun) (op[0]);
9276 break;
9277 case 2:
9278 pat = (*insn_data[d->icode].genfun) (op[0], op[1]);
9279 break;
9280 case 3:
9281 pat = (*insn_data[d->icode].genfun) (op[0], op[1], op[2]);
9282 break;
9283 case 4:
9284 pat = (*insn_data[d->icode].genfun) (op[0], op[1], op[2], op[3]);
9285 break;
9286 default:
9287 gcc_unreachable ();
9289 if (! pat)
9290 return 0;
9291 emit_insn (pat);
9292 return target;
9295 void
9296 sh_expand_unop_v2sf (enum rtx_code code, rtx op0, rtx op1)
9298 rtx sel0 = const0_rtx;
9299 rtx sel1 = const1_rtx;
9300 rtx (*fn) (rtx, rtx, rtx, rtx, rtx) = gen_unary_sf_op;
9301 rtx op = gen_rtx_fmt_e (code, SFmode, op1);
9303 emit_insn ((*fn) (op0, op1, op, sel0, sel0));
9304 emit_insn ((*fn) (op0, op1, op, sel1, sel1));
9307 void
9308 sh_expand_binop_v2sf (enum rtx_code code, rtx op0, rtx op1, rtx op2)
9310 rtx sel0 = const0_rtx;
9311 rtx sel1 = const1_rtx;
9312 rtx (*fn) (rtx, rtx, rtx, rtx, rtx, rtx, rtx, rtx)
9313 = gen_binary_sf_op;
9314 rtx op = gen_rtx_fmt_ee (code, SFmode, op1, op2);
9316 emit_insn ((*fn) (op0, op1, op2, op, sel0, sel0, sel0, sel1));
9317 emit_insn ((*fn) (op0, op1, op2, op, sel1, sel1, sel1, sel0));
9320 /* Return the class of registers for which a mode change from FROM to TO
9321 is invalid. */
9322 bool
9323 sh_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
9324 enum reg_class class)
9326 /* We want to enable the use of SUBREGs as a means to
9327 VEC_SELECT a single element of a vector. */
9328 if (to == SFmode && VECTOR_MODE_P (from) && GET_MODE_INNER (from) == SFmode)
9329 return (reg_classes_intersect_p (GENERAL_REGS, class));
9331 if (GET_MODE_SIZE (from) != GET_MODE_SIZE (to))
9333 if (TARGET_LITTLE_ENDIAN)
9335 if (GET_MODE_SIZE (to) < 8 || GET_MODE_SIZE (from) < 8)
9336 return reg_classes_intersect_p (DF_REGS, class);
9338 else
9340 if (GET_MODE_SIZE (from) < 8)
9341 return reg_classes_intersect_p (DF_HI_REGS, class);
9344 return 0;
9348 /* If ADDRESS refers to a CODE_LABEL, add NUSES to the number of times
9349 that label is used. */
9351 void
9352 sh_mark_label (rtx address, int nuses)
9354 if (GOTOFF_P (address))
9356 /* Extract the label or symbol. */
9357 address = XEXP (address, 0);
9358 if (GET_CODE (address) == PLUS)
9359 address = XEXP (address, 0);
9360 address = XVECEXP (address, 0, 0);
9362 if (GET_CODE (address) == LABEL_REF
9363 && GET_CODE (XEXP (address, 0)) == CODE_LABEL)
9364 LABEL_NUSES (XEXP (address, 0)) += nuses;
9367 /* Compute extra cost of moving data between one register class
9368 and another. */
9370 /* If SECONDARY*_RELOAD_CLASS says something about the src/dst pair, regclass
9371 uses this information. Hence, the general register <-> floating point
9372 register information here is not used for SFmode. */
9375 sh_register_move_cost (enum machine_mode mode,
9376 enum reg_class srcclass, enum reg_class dstclass)
9378 if (dstclass == T_REGS || dstclass == PR_REGS)
9379 return 10;
9381 if (dstclass == MAC_REGS && srcclass == MAC_REGS)
9382 return 4;
9384 if (mode == SImode && ! TARGET_SHMEDIA && TARGET_FMOVD
9385 && REGCLASS_HAS_FP_REG (srcclass)
9386 && REGCLASS_HAS_FP_REG (dstclass))
9387 return 4;
9389 if ((REGCLASS_HAS_FP_REG (dstclass) && srcclass == MAC_REGS)
9390 || (dstclass == MAC_REGS && REGCLASS_HAS_FP_REG (srcclass)))
9391 return 9;
9393 if ((REGCLASS_HAS_FP_REG (dstclass)
9394 && REGCLASS_HAS_GENERAL_REG (srcclass))
9395 || (REGCLASS_HAS_GENERAL_REG (dstclass)
9396 && REGCLASS_HAS_FP_REG (srcclass)))
9397 return ((TARGET_SHMEDIA ? 4 : TARGET_FMOVD ? 8 : 12)
9398 * ((GET_MODE_SIZE (mode) + 7) / 8U));
9400 if ((dstclass == FPUL_REGS
9401 && REGCLASS_HAS_GENERAL_REG (srcclass))
9402 || (srcclass == FPUL_REGS
9403 && REGCLASS_HAS_GENERAL_REG (dstclass)))
9404 return 5;
9406 if ((dstclass == FPUL_REGS
9407 && (srcclass == PR_REGS || srcclass == MAC_REGS || srcclass == T_REGS))
9408 || (srcclass == FPUL_REGS
9409 && (dstclass == PR_REGS || dstclass == MAC_REGS)))
9410 return 7;
9412 if ((srcclass == TARGET_REGS && ! REGCLASS_HAS_GENERAL_REG (dstclass))
9413 || ((dstclass) == TARGET_REGS && ! REGCLASS_HAS_GENERAL_REG (srcclass)))
9414 return 20;
9416 /* ??? ptabs faults on (value & 0x3) == 0x3 */
9417 if (TARGET_SHMEDIA
9418 && ((srcclass) == TARGET_REGS || (srcclass) == SIBCALL_REGS))
9420 if (sh_gettrcost >= 0)
9421 return sh_gettrcost;
9422 else if (!TARGET_PT_FIXED)
9423 return 100;
9426 if ((srcclass == FPSCR_REGS && ! REGCLASS_HAS_GENERAL_REG (dstclass))
9427 || (dstclass == FPSCR_REGS && ! REGCLASS_HAS_GENERAL_REG (srcclass)))
9428 return 4;
9430 if (TARGET_SHMEDIA
9431 || (TARGET_FMOVD
9432 && ! REGCLASS_HAS_GENERAL_REG (srcclass)
9433 && ! REGCLASS_HAS_GENERAL_REG (dstclass)))
9434 return 2 * ((GET_MODE_SIZE (mode) + 7) / 8U);
9436 return 2 * ((GET_MODE_SIZE (mode) + 3) / 4U);
9439 static rtx emit_load_ptr (rtx, rtx);
9441 static rtx
9442 emit_load_ptr (rtx reg, rtx addr)
9444 rtx mem = gen_rtx_MEM (ptr_mode, addr);
9446 if (Pmode != ptr_mode)
9447 mem = gen_rtx_SIGN_EXTEND (Pmode, mem);
9448 return emit_move_insn (reg, mem);
9451 static void
9452 sh_output_mi_thunk (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED,
9453 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
9454 tree function)
9456 CUMULATIVE_ARGS cum;
9457 int structure_value_byref = 0;
9458 rtx this, this_value, sibcall, insns, funexp;
9459 tree funtype = TREE_TYPE (function);
9460 int simple_add = CONST_OK_FOR_ADD (delta);
9461 int did_load = 0;
9462 rtx scratch0, scratch1, scratch2;
9463 unsigned i;
9465 reload_completed = 1;
9466 epilogue_completed = 1;
9467 no_new_pseudos = 1;
9468 current_function_uses_only_leaf_regs = 1;
9469 reset_block_changes ();
9471 emit_note (NOTE_INSN_PROLOGUE_END);
9473 /* Find the "this" pointer. We have such a wide range of ABIs for the
9474 SH that it's best to do this completely machine independently.
9475 "this" is passed as first argument, unless a structure return pointer
9476 comes first, in which case "this" comes second. */
9477 INIT_CUMULATIVE_ARGS (cum, funtype, NULL_RTX, 0, 1);
9478 #ifndef PCC_STATIC_STRUCT_RETURN
9479 if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
9480 structure_value_byref = 1;
9481 #endif /* not PCC_STATIC_STRUCT_RETURN */
9482 if (structure_value_byref && sh_struct_value_rtx (function, 0) == 0)
9484 tree ptype = build_pointer_type (TREE_TYPE (funtype));
9486 FUNCTION_ARG_ADVANCE (cum, Pmode, ptype, 1);
9488 this = FUNCTION_ARG (cum, Pmode, ptr_type_node, 1);
9490 /* For SHcompact, we only have r0 for a scratch register: r1 is the
9491 static chain pointer (even if you can't have nested virtual functions
9492 right now, someone might implement them sometime), and the rest of the
9493 registers are used for argument passing, are callee-saved, or reserved. */
9494 /* We need to check call_used_regs / fixed_regs in case -fcall_saved-reg /
9495 -ffixed-reg has been used. */
9496 if (! call_used_regs[0] || fixed_regs[0])
9497 error ("r0 needs to be available as a call-clobbered register");
9498 scratch0 = scratch1 = scratch2 = gen_rtx_REG (Pmode, 0);
9499 if (! TARGET_SH5)
9501 if (call_used_regs[1] && ! fixed_regs[1])
9502 scratch1 = gen_rtx_REG (ptr_mode, 1);
9503 /* N.B., if not TARGET_HITACHI, register 2 is used to pass the pointer
9504 pointing where to return struct values. */
9505 if (call_used_regs[3] && ! fixed_regs[3])
9506 scratch2 = gen_rtx_REG (Pmode, 3);
9508 else if (TARGET_SHMEDIA)
9510 for (i = FIRST_GENERAL_REG; i <= LAST_GENERAL_REG; i++)
9511 if (i != REGNO (scratch0) &&
9512 call_used_regs[i] && ! fixed_regs[i] && ! FUNCTION_ARG_REGNO_P (i))
9514 scratch1 = gen_rtx_REG (ptr_mode, i);
9515 break;
9517 if (scratch1 == scratch0)
9518 error ("Need a second call-clobbered general purpose register");
9519 for (i = FIRST_TARGET_REG; i <= LAST_TARGET_REG; i++)
9520 if (call_used_regs[i] && ! fixed_regs[i])
9522 scratch2 = gen_rtx_REG (Pmode, i);
9523 break;
9525 if (scratch2 == scratch0)
9526 error ("Need a call-clobbered target register");
9529 this_value = plus_constant (this, delta);
9530 if (vcall_offset
9531 && (simple_add || scratch0 != scratch1)
9532 && strict_memory_address_p (ptr_mode, this_value))
9534 emit_load_ptr (scratch0, this_value);
9535 did_load = 1;
9538 if (!delta)
9539 ; /* Do nothing. */
9540 else if (simple_add)
9541 emit_move_insn (this, this_value);
9542 else
9544 emit_move_insn (scratch1, GEN_INT (delta));
9545 emit_insn (gen_add2_insn (this, scratch1));
9548 if (vcall_offset)
9550 rtx offset_addr;
9552 if (!did_load)
9553 emit_load_ptr (scratch0, this);
9555 offset_addr = plus_constant (scratch0, vcall_offset);
9556 if (strict_memory_address_p (ptr_mode, offset_addr))
9557 ; /* Do nothing. */
9558 else if (! TARGET_SH5 && scratch0 != scratch1)
9560 /* scratch0 != scratch1, and we have indexed loads. Get better
9561 schedule by loading the offset into r1 and using an indexed
9562 load - then the load of r1 can issue before the load from
9563 (this + delta) finishes. */
9564 emit_move_insn (scratch1, GEN_INT (vcall_offset));
9565 offset_addr = gen_rtx_PLUS (Pmode, scratch0, scratch1);
9567 else if (CONST_OK_FOR_ADD (vcall_offset))
9569 emit_insn (gen_add2_insn (scratch0, GEN_INT (vcall_offset)));
9570 offset_addr = scratch0;
9572 else if (scratch0 != scratch1)
9574 emit_move_insn (scratch1, GEN_INT (vcall_offset));
9575 emit_insn (gen_add2_insn (scratch0, scratch1));
9576 offset_addr = scratch0;
9578 else
9579 gcc_unreachable (); /* FIXME */
9580 emit_load_ptr (scratch0, offset_addr);
9582 if (Pmode != ptr_mode)
9583 scratch0 = gen_rtx_TRUNCATE (ptr_mode, scratch0);
9584 emit_insn (gen_add2_insn (this, scratch0));
9587 /* Generate a tail call to the target function. */
9588 if (! TREE_USED (function))
9590 assemble_external (function);
9591 TREE_USED (function) = 1;
9593 funexp = XEXP (DECL_RTL (function), 0);
9594 /* If the function is overridden, so is the thunk, hence we don't
9595 need GOT addressing even if this is a public symbol. */
9596 #if 0
9597 if (TARGET_SH1 && ! flag_weak)
9598 sibcall = gen_sibcalli_thunk (funexp, const0_rtx);
9599 else
9600 #endif
9601 if (TARGET_SH2 && flag_pic)
9603 sibcall = gen_sibcall_pcrel (funexp, const0_rtx);
9604 XEXP (XVECEXP (sibcall, 0, 2), 0) = scratch2;
9606 else
9608 if (TARGET_SHMEDIA && flag_pic)
9610 funexp = gen_sym2PIC (funexp);
9611 PUT_MODE (funexp, Pmode);
9613 emit_move_insn (scratch2, funexp);
9614 funexp = gen_rtx_MEM (FUNCTION_MODE, scratch2);
9615 sibcall = gen_sibcall (funexp, const0_rtx, NULL_RTX);
9617 sibcall = emit_call_insn (sibcall);
9618 SIBLING_CALL_P (sibcall) = 1;
9619 use_reg (&CALL_INSN_FUNCTION_USAGE (sibcall), this);
9620 emit_barrier ();
9622 /* Run just enough of rest_of_compilation to do scheduling and get
9623 the insns emitted. Note that use_thunk calls
9624 assemble_start_function and assemble_end_function. */
9626 insn_locators_initialize ();
9627 insns = get_insns ();
9629 if (optimize > 0)
9631 /* Initialize the bitmap obstacks. */
9632 bitmap_obstack_initialize (NULL);
9633 bitmap_obstack_initialize (&reg_obstack);
9634 if (! cfun->cfg)
9635 init_flow ();
9636 rtl_register_cfg_hooks ();
9637 init_rtl_bb_info (ENTRY_BLOCK_PTR);
9638 init_rtl_bb_info (EXIT_BLOCK_PTR);
9639 ENTRY_BLOCK_PTR->flags |= BB_RTL;
9640 EXIT_BLOCK_PTR->flags |= BB_RTL;
9641 find_basic_blocks (insns);
9643 if (flag_schedule_insns_after_reload)
9645 life_analysis (dump_file, PROP_FINAL);
9647 split_all_insns (1);
9649 schedule_insns (dump_file);
9651 /* We must split jmp insn in PIC case. */
9652 else if (flag_pic)
9653 split_all_insns_noflow ();
9656 sh_reorg ();
9658 if (optimize > 0 && flag_delayed_branch)
9659 dbr_schedule (insns, dump_file);
9661 shorten_branches (insns);
9662 final_start_function (insns, file, 1);
9663 final (insns, file, 1);
9664 final_end_function ();
9666 if (optimize > 0)
9668 /* Release all memory allocated by flow. */
9669 free_basic_block_vars ();
9671 /* Release the bitmap obstacks. */
9672 bitmap_obstack_release (&reg_obstack);
9673 bitmap_obstack_release (NULL);
9676 reload_completed = 0;
9677 epilogue_completed = 0;
9678 no_new_pseudos = 0;
9682 function_symbol (rtx target, const char *name, enum sh_function_kind kind)
9684 rtx sym;
9686 /* If this is not an ordinary function, the name usually comes from a
9687 string literal or an sprintf buffer. Make sure we use the same
9688 string consistently, so that cse will be able to unify address loads. */
9689 if (kind != FUNCTION_ORDINARY)
9690 name = IDENTIFIER_POINTER (get_identifier (name));
9691 sym = gen_rtx_SYMBOL_REF (Pmode, name);
9692 SYMBOL_REF_FLAGS (sym) = SYMBOL_FLAG_FUNCTION;
9693 if (flag_pic)
9694 switch (kind)
9696 case FUNCTION_ORDINARY:
9697 break;
9698 case SFUNC_GOT:
9700 rtx reg = target ? target : gen_reg_rtx (Pmode);
9702 emit_insn (gen_symGOT2reg (reg, sym));
9703 sym = reg;
9704 break;
9706 case SFUNC_STATIC:
9708 /* ??? To allow cse to work, we use GOTOFF relocations.
9709 we could add combiner patterns to transform this into
9710 straight pc-relative calls with sym2PIC / bsrf when
9711 label load and function call are still 1:1 and in the
9712 same basic block during combine. */
9713 rtx reg = target ? target : gen_reg_rtx (Pmode);
9715 emit_insn (gen_symGOTOFF2reg (reg, sym));
9716 sym = reg;
9717 break;
9720 if (target && sym != target)
9722 emit_move_insn (target, sym);
9723 return target;
9725 return sym;
9728 /* Find the number of a general purpose register in S. */
9729 static int
9730 scavenge_reg (HARD_REG_SET *s)
9732 int r;
9733 for (r = FIRST_GENERAL_REG; r <= LAST_GENERAL_REG; r++)
9734 if (TEST_HARD_REG_BIT (*s, r))
9735 return r;
9736 return -1;
9740 sh_get_pr_initial_val (void)
9742 rtx val;
9744 /* ??? Unfortunately, get_hard_reg_initial_val doesn't always work for the
9745 PR register on SHcompact, because it might be clobbered by the prologue.
9746 We check first if that is known to be the case. */
9747 if (TARGET_SHCOMPACT
9748 && ((current_function_args_info.call_cookie
9749 & ~ CALL_COOKIE_RET_TRAMP (1))
9750 || current_function_has_nonlocal_label))
9751 return gen_rtx_MEM (SImode, return_address_pointer_rtx);
9753 /* If we haven't finished rtl generation, there might be a nonlocal label
9754 that we haven't seen yet.
9755 ??? get_hard_reg_initial_val fails if it is called while no_new_pseudos
9756 is set, unless it has been called before for the same register. And even
9757 then, we end in trouble if we didn't use the register in the same
9758 basic block before. So call get_hard_reg_initial_val now and wrap it
9759 in an unspec if we might need to replace it. */
9760 /* ??? We also must do this for TARGET_SH1 in general, because otherwise
9761 combine can put the pseudo returned by get_hard_reg_initial_val into
9762 instructions that need a general purpose registers, which will fail to
9763 be recognized when the pseudo becomes allocated to PR. */
9765 = get_hard_reg_initial_val (Pmode, TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG);
9766 if (TARGET_SH1)
9767 return gen_rtx_UNSPEC (SImode, gen_rtvec (1, val), UNSPEC_RA);
9768 return val;
9772 sh_expand_t_scc (enum rtx_code code, rtx target)
9774 rtx result = target;
9775 HOST_WIDE_INT val;
9777 if (GET_CODE (sh_compare_op0) != REG || REGNO (sh_compare_op0) != T_REG
9778 || GET_CODE (sh_compare_op1) != CONST_INT)
9779 return 0;
9780 if (GET_CODE (result) != REG)
9781 result = gen_reg_rtx (SImode);
9782 val = INTVAL (sh_compare_op1);
9783 if ((code == EQ && val == 1) || (code == NE && val == 0))
9784 emit_insn (gen_movt (result));
9785 else if ((code == EQ && val == 0) || (code == NE && val == 1))
9787 emit_insn (gen_rtx_CLOBBER (VOIDmode, result));
9788 emit_insn (gen_subc (result, result, result));
9789 emit_insn (gen_addsi3 (result, result, const1_rtx));
9791 else if (code == EQ || code == NE)
9792 emit_insn (gen_move_insn (result, GEN_INT (code == NE)));
9793 else
9794 return 0;
9795 if (result != target)
9796 emit_move_insn (target, result);
9797 return 1;
9800 /* INSN is an sfunc; return the rtx that describes the address used. */
9801 static rtx
9802 extract_sfunc_addr (rtx insn)
9804 rtx pattern, part = NULL_RTX;
9805 int len, i;
9807 pattern = PATTERN (insn);
9808 len = XVECLEN (pattern, 0);
9809 for (i = 0; i < len; i++)
9811 part = XVECEXP (pattern, 0, i);
9812 if (GET_CODE (part) == USE && GET_MODE (XEXP (part, 0)) == Pmode
9813 && GENERAL_REGISTER_P (true_regnum (XEXP (part, 0))))
9814 return XEXP (part, 0);
9816 gcc_assert (GET_CODE (XVECEXP (pattern, 0, 0)) == UNSPEC_VOLATILE);
9817 return XVECEXP (XVECEXP (pattern, 0, 0), 0, 1);
9820 /* Verify that the register in use_sfunc_addr still agrees with the address
9821 used in the sfunc. This prevents fill_slots_from_thread from changing
9822 use_sfunc_addr.
9823 INSN is the use_sfunc_addr instruction, and REG is the register it
9824 guards. */
9826 check_use_sfunc_addr (rtx insn, rtx reg)
9828 /* Search for the sfunc. It should really come right after INSN. */
9829 while ((insn = NEXT_INSN (insn)))
9831 if (GET_CODE (insn) == CODE_LABEL || GET_CODE (insn) == JUMP_INSN)
9832 break;
9833 if (! INSN_P (insn))
9834 continue;
9836 if (GET_CODE (PATTERN (insn)) == SEQUENCE)
9837 insn = XVECEXP (PATTERN (insn), 0, 0);
9838 if (GET_CODE (PATTERN (insn)) != PARALLEL
9839 || get_attr_type (insn) != TYPE_SFUNC)
9840 continue;
9841 return rtx_equal_p (extract_sfunc_addr (insn), reg);
9843 gcc_unreachable ();
9846 /* This function returns a constant rtx that represents pi / 2**15 in
9847 SFmode. it's used to scale SFmode angles, in radians, to a
9848 fixed-point signed 16.16-bit fraction of a full circle, i.e., 2*pi
9849 maps to 0x10000). */
9851 static GTY(()) rtx sh_fsca_sf2int_rtx;
9854 sh_fsca_sf2int (void)
9856 if (! sh_fsca_sf2int_rtx)
9858 REAL_VALUE_TYPE rv;
9860 real_from_string (&rv, "10430.378350470453");
9861 sh_fsca_sf2int_rtx = const_double_from_real_value (rv, SFmode);
9864 return sh_fsca_sf2int_rtx;
9867 /* This function returns a constant rtx that represents pi / 2**15 in
9868 DFmode. it's used to scale DFmode angles, in radians, to a
9869 fixed-point signed 16.16-bit fraction of a full circle, i.e., 2*pi
9870 maps to 0x10000). */
9872 static GTY(()) rtx sh_fsca_df2int_rtx;
9875 sh_fsca_df2int (void)
9877 if (! sh_fsca_df2int_rtx)
9879 REAL_VALUE_TYPE rv;
9881 real_from_string (&rv, "10430.378350470453");
9882 sh_fsca_df2int_rtx = const_double_from_real_value (rv, DFmode);
9885 return sh_fsca_df2int_rtx;
9888 /* This function returns a constant rtx that represents 2**15 / pi in
9889 SFmode. it's used to scale a fixed-point signed 16.16-bit fraction
9890 of a full circle back to a SFmode value, i.e., 0x10000 maps to
9891 2*pi). */
9893 static GTY(()) rtx sh_fsca_int2sf_rtx;
9896 sh_fsca_int2sf (void)
9898 if (! sh_fsca_int2sf_rtx)
9900 REAL_VALUE_TYPE rv;
9902 real_from_string (&rv, "9.587379924285257e-5");
9903 sh_fsca_int2sf_rtx = const_double_from_real_value (rv, SFmode);
9906 return sh_fsca_int2sf_rtx;
9909 /* Initialize the CUMULATIVE_ARGS structure. */
9911 void
9912 sh_init_cumulative_args (CUMULATIVE_ARGS * pcum,
9913 tree fntype,
9914 rtx libname ATTRIBUTE_UNUSED,
9915 tree fndecl,
9916 signed int n_named_args,
9917 enum machine_mode mode)
9919 pcum->arg_count [(int) SH_ARG_FLOAT] = 0;
9920 pcum->free_single_fp_reg = 0;
9921 pcum->stack_regs = 0;
9922 pcum->byref_regs = 0;
9923 pcum->byref = 0;
9924 pcum->outgoing = (n_named_args == -1) ? 0 : 1;
9926 /* XXX - Should we check TARGET_HITACHI here ??? */
9927 pcum->renesas_abi = sh_attr_renesas_p (fntype) ? 1 : 0;
9929 if (fntype)
9931 pcum->force_mem = ((TARGET_HITACHI || pcum->renesas_abi)
9932 && aggregate_value_p (TREE_TYPE (fntype), fndecl));
9933 pcum->prototype_p = TYPE_ARG_TYPES (fntype) ? TRUE : FALSE;
9934 pcum->arg_count [(int) SH_ARG_INT]
9935 = TARGET_SH5 && aggregate_value_p (TREE_TYPE (fntype), fndecl);
9937 pcum->call_cookie
9938 = CALL_COOKIE_RET_TRAMP (TARGET_SHCOMPACT
9939 && pcum->arg_count [(int) SH_ARG_INT] == 0
9940 && (TYPE_MODE (TREE_TYPE (fntype)) == BLKmode
9941 ? int_size_in_bytes (TREE_TYPE (fntype))
9942 : GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (fntype)))) > 4
9943 && (BASE_RETURN_VALUE_REG (TYPE_MODE (TREE_TYPE (fntype)))
9944 == FIRST_RET_REG));
9946 else
9948 pcum->arg_count [(int) SH_ARG_INT] = 0;
9949 pcum->prototype_p = FALSE;
9950 if (mode != VOIDmode)
9952 pcum->call_cookie =
9953 CALL_COOKIE_RET_TRAMP (TARGET_SHCOMPACT
9954 && GET_MODE_SIZE (mode) > 4
9955 && BASE_RETURN_VALUE_REG (mode) == FIRST_RET_REG);
9957 /* If the default ABI is the Renesas ABI then all library
9958 calls must assume that the library will be using the
9959 Renesas ABI. So if the function would return its result
9960 in memory then we must force the address of this memory
9961 block onto the stack. Ideally we would like to call
9962 targetm.calls.return_in_memory() here but we do not have
9963 the TYPE or the FNDECL available so we synthesize the
9964 contents of that function as best we can. */
9965 pcum->force_mem =
9966 (TARGET_DEFAULT & MASK_HITACHI)
9967 && (mode == BLKmode
9968 || (GET_MODE_SIZE (mode) > 4
9969 && !(mode == DFmode
9970 && TARGET_FPU_DOUBLE)));
9972 else
9974 pcum->call_cookie = 0;
9975 pcum->force_mem = FALSE;
9980 /* Determine if two hard register sets intersect.
9981 Return 1 if they do. */
9983 static int
9984 hard_regs_intersect_p (HARD_REG_SET *a, HARD_REG_SET *b)
9986 HARD_REG_SET c;
9987 COPY_HARD_REG_SET (c, *a);
9988 AND_HARD_REG_SET (c, *b);
9989 GO_IF_HARD_REG_SUBSET (c, reg_class_contents[(int) NO_REGS], lose);
9990 return 1;
9991 lose:
9992 return 0;
9995 #ifdef TARGET_ADJUST_UNROLL_MAX
9996 static int
9997 sh_adjust_unroll_max (struct loop * loop, int insn_count,
9998 int max_unrolled_insns, int strength_reduce_p,
9999 int unroll_type)
10001 /* This doesn't work in 4.0 because the old unroller & loop.h is gone. */
10002 if (TARGET_ADJUST_UNROLL && TARGET_SHMEDIA)
10004 /* Throttle back loop unrolling so that the costs of using more
10005 targets than the eight target register we have don't outweigh
10006 the benefits of unrolling. */
10007 rtx insn;
10008 int n_labels = 0, n_calls = 0, n_exit_dest = 0, n_inner_loops = -1;
10009 int n_barriers = 0;
10010 rtx dest;
10011 int i;
10012 rtx exit_dest[8];
10013 int threshold;
10014 int unroll_benefit = 0, mem_latency = 0;
10015 int base_cost, best_cost, cost;
10016 int factor, best_factor;
10017 int n_dest;
10018 unsigned max_iterations = 32767;
10019 int n_iterations;
10020 int need_precond = 0, precond = 0;
10021 basic_block * bbs = get_loop_body (loop);
10022 struct niter_desc *desc;
10024 /* Assume that all labels inside the loop are used from inside the
10025 loop. If the loop has multiple entry points, it is unlikely to
10026 be unrolled anyways.
10027 Also assume that all calls are to different functions. That is
10028 somewhat pessimistic, but if you have lots of calls, unrolling the
10029 loop is not likely to gain you much in the first place. */
10030 i = loop->num_nodes - 1;
10031 for (insn = BB_HEAD (bbs[i]); ; )
10033 if (GET_CODE (insn) == CODE_LABEL)
10034 n_labels++;
10035 else if (GET_CODE (insn) == CALL_INSN)
10036 n_calls++;
10037 else if (GET_CODE (insn) == NOTE
10038 && NOTE_LINE_NUMBER (insn) == NOTE_INSN_LOOP_BEG)
10039 n_inner_loops++;
10040 else if (GET_CODE (insn) == BARRIER)
10041 n_barriers++;
10042 if (insn != BB_END (bbs[i]))
10043 insn = NEXT_INSN (insn);
10044 else if (--i >= 0)
10045 insn = BB_HEAD (bbs[i]);
10046 else
10047 break;
10049 free (bbs);
10050 /* One label for the loop top is normal, and it won't be duplicated by
10051 unrolling. */
10052 if (n_labels <= 1)
10053 return max_unrolled_insns;
10054 if (n_inner_loops > 0)
10055 return 0;
10056 for (dest = loop->exit_labels; dest && n_exit_dest < 8;
10057 dest = LABEL_NEXTREF (dest))
10059 for (i = n_exit_dest - 1;
10060 i >= 0 && XEXP (dest, 0) != XEXP (exit_dest[i], 0); i--);
10061 if (i < 0)
10062 exit_dest[n_exit_dest++] = dest;
10064 /* If the loop top and call and exit destinations are enough to fill up
10065 the target registers, we're unlikely to do any more damage by
10066 unrolling. */
10067 if (n_calls + n_exit_dest >= 7)
10068 return max_unrolled_insns;
10070 /* ??? In the new loop unroller, there is no longer any strength
10071 reduction information available. Thus, when it comes to unrolling,
10072 we know the cost of everything, but we know the value of nothing. */
10073 #if 0
10074 if (strength_reduce_p
10075 && (unroll_type == LPT_UNROLL_RUNTIME
10076 || unroll_type == LPT_UNROLL_CONSTANT
10077 || unroll_type == LPT_PEEL_COMPLETELY))
10079 struct loop_ivs *ivs = LOOP_IVS (loop);
10080 struct iv_class *bl;
10082 /* We'll save one compare-and-branch in each loop body copy
10083 but the last one. */
10084 unroll_benefit = 1;
10085 /* Assess the benefit of removing biv & giv updates. */
10086 for (bl = ivs->list; bl; bl = bl->next)
10088 rtx increment = biv_total_increment (bl);
10089 struct induction *v;
10091 if (increment && GET_CODE (increment) == CONST_INT)
10093 unroll_benefit++;
10094 for (v = bl->giv; v; v = v->next_iv)
10096 if (! v->ignore && v->same == 0
10097 && GET_CODE (v->mult_val) == CONST_INT)
10098 unroll_benefit++;
10099 /* If this giv uses an array, try to determine
10100 a maximum iteration count from the size of the
10101 array. This need not be correct all the time,
10102 but should not be too far off the mark too often. */
10103 while (v->giv_type == DEST_ADDR)
10105 rtx mem = PATTERN (v->insn);
10106 tree mem_expr, type, size_tree;
10108 if (GET_CODE (SET_SRC (mem)) == MEM)
10109 mem = SET_SRC (mem);
10110 else if (GET_CODE (SET_DEST (mem)) == MEM)
10111 mem = SET_DEST (mem);
10112 else
10113 break;
10114 mem_expr = MEM_EXPR (mem);
10115 if (! mem_expr)
10116 break;
10117 type = TREE_TYPE (mem_expr);
10118 if (TREE_CODE (type) != ARRAY_TYPE
10119 || ! TYPE_SIZE (type) || ! TYPE_SIZE_UNIT (type))
10120 break;
10121 size_tree = fold (build (TRUNC_DIV_EXPR,
10122 bitsizetype,
10123 TYPE_SIZE (type),
10124 TYPE_SIZE_UNIT (type)));
10125 if (TREE_CODE (size_tree) == INTEGER_CST
10126 && ! TREE_INT_CST_HIGH (size_tree)
10127 && TREE_INT_CST_LOW (size_tree) < max_iterations)
10128 max_iterations = TREE_INT_CST_LOW (size_tree);
10129 break;
10135 #else /* 0 */
10136 /* Assume there is at least some benefit. */
10137 unroll_benefit = 1;
10138 #endif /* 0 */
10140 desc = get_simple_loop_desc (loop);
10141 n_iterations = desc->const_iter ? desc->niter : 0;
10142 max_iterations
10143 = max_iterations < desc->niter_max ? max_iterations : desc->niter_max;
10145 if (! strength_reduce_p || ! n_iterations)
10146 need_precond = 1;
10147 if (! n_iterations)
10149 n_iterations
10150 = max_iterations < 3 ? max_iterations : max_iterations * 3 / 4;
10151 if (! n_iterations)
10152 return 0;
10154 #if 0 /* ??? See above - missing induction variable information. */
10155 while (unroll_benefit > 1) /* no loop */
10157 /* We include the benefit of biv/ giv updates. Check if some or
10158 all of these updates are likely to fit into a scheduling
10159 bubble of a load.
10160 We check for the following case:
10161 - All the insns leading to the first JUMP_INSN are in a strict
10162 dependency chain.
10163 - there is at least one memory reference in them.
10165 When we find such a pattern, we assume that we can hide as many
10166 updates as the total of the load latency is, if we have an
10167 unroll factor of at least two. We might or might not also do
10168 this without unrolling, so rather than considering this as an
10169 extra unroll benefit, discount it in the unroll benefits of unroll
10170 factors higher than two. */
10172 rtx set, last_set;
10174 insn = next_active_insn (loop->start);
10175 last_set = single_set (insn);
10176 if (! last_set)
10177 break;
10178 if (GET_CODE (SET_SRC (last_set)) == MEM)
10179 mem_latency += 2;
10180 for (insn = NEXT_INSN (insn); insn != end; insn = NEXT_INSN (insn))
10182 if (! INSN_P (insn))
10183 continue;
10184 if (GET_CODE (insn) == JUMP_INSN)
10185 break;
10186 if (! reg_referenced_p (SET_DEST (last_set), PATTERN (insn)))
10188 /* Check if this is a to-be-reduced giv insn. */
10189 struct loop_ivs *ivs = LOOP_IVS (loop);
10190 struct iv_class *bl;
10191 struct induction *v;
10192 for (bl = ivs->list; bl; bl = bl->next)
10194 if (bl->biv->insn == insn)
10195 goto is_biv;
10196 for (v = bl->giv; v; v = v->next_iv)
10197 if (v->insn == insn)
10198 goto is_giv;
10200 mem_latency--;
10201 is_biv:
10202 is_giv:
10203 continue;
10205 set = single_set (insn);
10206 if (! set)
10207 continue;
10208 if (GET_CODE (SET_SRC (set)) == MEM)
10209 mem_latency += 2;
10210 last_set = set;
10212 if (mem_latency < 0)
10213 mem_latency = 0;
10214 else if (mem_latency > unroll_benefit - 1)
10215 mem_latency = unroll_benefit - 1;
10216 break;
10218 #endif /* 0 */
10219 if (n_labels + (unroll_benefit + n_labels * 8) / n_iterations
10220 <= unroll_benefit)
10221 return max_unrolled_insns;
10223 n_dest = n_labels + n_calls + n_exit_dest;
10224 base_cost = n_dest <= 8 ? 0 : n_dest - 7;
10225 best_cost = 0;
10226 best_factor = 1;
10227 if (n_barriers * 2 > n_labels - 1)
10228 n_barriers = (n_labels - 1) / 2;
10229 for (factor = 2; factor <= 8; factor++)
10231 /* Bump up preconditioning cost for each power of two. */
10232 if (! (factor & (factor-1)))
10233 precond += 4;
10234 /* When preconditioning, only powers of two will be considered. */
10235 else if (need_precond)
10236 continue;
10237 n_dest = ((unroll_type != LPT_PEEL_COMPLETELY)
10238 + (n_labels - 1) * factor + n_calls + n_exit_dest
10239 - (n_barriers * factor >> 1)
10240 + need_precond);
10241 cost
10242 = ((n_dest <= 8 ? 0 : n_dest - 7)
10243 - base_cost * factor
10244 - ((factor > 2 ? unroll_benefit - mem_latency : unroll_benefit)
10245 * (factor - (unroll_type != LPT_PEEL_COMPLETELY)))
10246 + ((unroll_benefit + 1 + (n_labels - 1) * factor)
10247 / n_iterations));
10248 if (need_precond)
10249 cost += (precond + unroll_benefit * factor / 2) / n_iterations;
10250 if (cost < best_cost)
10252 best_cost = cost;
10253 best_factor = factor;
10256 threshold = best_factor * insn_count;
10257 if (max_unrolled_insns > threshold)
10258 max_unrolled_insns = threshold;
10260 return max_unrolled_insns;
10262 #endif /* TARGET_ADJUST_UNROLL_MAX */
10264 /* Replace any occurrence of FROM(n) in X with TO(n). The function does
10265 not enter into CONST_DOUBLE for the replace.
10267 Note that copying is not done so X must not be shared unless all copies
10268 are to be modified.
10270 This is like replace_rtx, except that we operate on N_REPLACEMENTS
10271 replacements simultaneously - FROM(n) is replacements[n*2] and to(n) is
10272 replacements[n*2+1] - and that we take mode changes into account.
10274 If a replacement is ambiguous, return NULL_RTX.
10276 If MODIFY is zero, don't modify any rtl in place,
10277 just return zero or nonzero for failure / success. */
10280 replace_n_hard_rtx (rtx x, rtx *replacements, int n_replacements, int modify)
10282 int i, j;
10283 const char *fmt;
10285 /* The following prevents loops occurrence when we change MEM in
10286 CONST_DOUBLE onto the same CONST_DOUBLE. */
10287 if (x != 0 && GET_CODE (x) == CONST_DOUBLE)
10288 return x;
10290 for (i = n_replacements - 1; i >= 0 ; i--)
10291 if (x == replacements[i*2] && GET_MODE (x) == GET_MODE (replacements[i*2+1]))
10292 return replacements[i*2+1];
10294 /* Allow this function to make replacements in EXPR_LISTs. */
10295 if (x == 0)
10296 return 0;
10298 if (GET_CODE (x) == SUBREG)
10300 rtx new = replace_n_hard_rtx (SUBREG_REG (x), replacements,
10301 n_replacements, modify);
10303 if (GET_CODE (new) == CONST_INT)
10305 x = simplify_subreg (GET_MODE (x), new,
10306 GET_MODE (SUBREG_REG (x)),
10307 SUBREG_BYTE (x));
10308 if (! x)
10309 abort ();
10311 else if (modify)
10312 SUBREG_REG (x) = new;
10314 return x;
10316 else if (GET_CODE (x) == REG)
10318 unsigned regno = REGNO (x);
10319 unsigned nregs = (regno < FIRST_PSEUDO_REGISTER
10320 ? HARD_REGNO_NREGS (regno, GET_MODE (x)) : 1);
10321 rtx result = NULL_RTX;
10323 for (i = n_replacements - 1; i >= 0; i--)
10325 rtx from = replacements[i*2];
10326 rtx to = replacements[i*2+1];
10327 unsigned from_regno, from_nregs, to_regno, new_regno;
10329 if (GET_CODE (from) != REG)
10330 continue;
10331 from_regno = REGNO (from);
10332 from_nregs = (from_regno < FIRST_PSEUDO_REGISTER
10333 ? HARD_REGNO_NREGS (from_regno, GET_MODE (from)) : 1);
10334 if (regno < from_regno + from_nregs && regno + nregs > from_regno)
10336 if (regno < from_regno
10337 || regno + nregs > from_regno + nregs
10338 || GET_CODE (to) != REG
10339 || result)
10340 return NULL_RTX;
10341 to_regno = REGNO (to);
10342 if (to_regno < FIRST_PSEUDO_REGISTER)
10344 new_regno = regno + to_regno - from_regno;
10345 if ((unsigned) HARD_REGNO_NREGS (new_regno, GET_MODE (x))
10346 != nregs)
10347 return NULL_RTX;
10348 result = gen_rtx_REG (GET_MODE (x), new_regno);
10350 else if (GET_MODE (x) <= GET_MODE (to))
10351 result = gen_lowpart_common (GET_MODE (x), to);
10352 else
10353 result = gen_lowpart_SUBREG (GET_MODE (x), to);
10356 return result ? result : x;
10358 else if (GET_CODE (x) == ZERO_EXTEND)
10360 rtx new = replace_n_hard_rtx (XEXP (x, 0), replacements,
10361 n_replacements, modify);
10363 if (GET_CODE (new) == CONST_INT)
10365 x = simplify_unary_operation (ZERO_EXTEND, GET_MODE (x),
10366 new, GET_MODE (XEXP (x, 0)));
10367 if (! x)
10368 abort ();
10370 else if (modify)
10371 XEXP (x, 0) = new;
10373 return x;
10376 fmt = GET_RTX_FORMAT (GET_CODE (x));
10377 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
10379 rtx new;
10381 if (fmt[i] == 'e')
10383 new = replace_n_hard_rtx (XEXP (x, i), replacements,
10384 n_replacements, modify);
10385 if (!new)
10386 return NULL_RTX;
10387 if (modify)
10388 XEXP (x, i) = new;
10390 else if (fmt[i] == 'E')
10391 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
10393 new = replace_n_hard_rtx (XVECEXP (x, i, j), replacements,
10394 n_replacements, modify);
10395 if (!new)
10396 return NULL_RTX;
10397 if (modify)
10398 XVECEXP (x, i, j) = new;
10402 return x;
10406 sh_gen_truncate (enum machine_mode mode, rtx x, int need_sign_ext)
10408 enum rtx_code code = TRUNCATE;
10410 if (GET_CODE (x) == ZERO_EXTEND || GET_CODE (x) == SIGN_EXTEND)
10412 rtx inner = XEXP (x, 0);
10413 enum machine_mode inner_mode = GET_MODE (inner);
10415 if (inner_mode == mode)
10416 return inner;
10417 else if (GET_MODE_SIZE (inner_mode) >= GET_MODE_SIZE (mode))
10418 x = inner;
10419 else if (GET_MODE_SIZE (inner_mode) < GET_MODE_SIZE (mode)
10420 && (! need_sign_ext || GET_CODE (x) == SIGN_EXTEND))
10422 code = GET_CODE (x);
10423 x = inner;
10426 return gen_rtx_fmt_e (code, mode, x);
10429 /* called via for_each_rtx after reload, to clean up truncates of
10430 registers that span multiple actual hard registers. */
10432 shmedia_cleanup_truncate (rtx *p, void *n_changes)
10434 rtx x = *p, reg;
10436 if (GET_CODE (x) != TRUNCATE)
10437 return 0;
10438 reg = XEXP (x, 0);
10439 if (GET_MODE_SIZE (GET_MODE (reg)) > 8 && GET_CODE (reg) == REG)
10441 enum machine_mode reg_mode = GET_MODE (reg);
10442 XEXP (x, 0) = simplify_subreg (DImode, reg, reg_mode,
10443 subreg_lowpart_offset (DImode, reg_mode));
10444 *(int*) n_changes += 1;
10445 return -1;
10447 return 0;
10450 /* Load and store depend on the highpart of the address. However,
10451 set_attr_alternative does not give well-defined results before reload,
10452 so we must look at the rtl ourselves to see if any of the feeding
10453 registers is used in a memref. */
10455 /* Called by sh_contains_memref_p via for_each_rtx. */
10456 static int
10457 sh_contains_memref_p_1 (rtx *loc, void *data ATTRIBUTE_UNUSED)
10459 return (GET_CODE (*loc) == MEM);
10462 /* Return non-zero iff INSN contains a MEM. */
10464 sh_contains_memref_p (rtx insn)
10466 return for_each_rtx (&PATTERN (insn), &sh_contains_memref_p_1, NULL);
10469 /* FNADDR is the MEM expression from a call expander. Return an address
10470 to use in an SHmedia insn pattern. */
10472 shmedia_prepare_call_address (rtx fnaddr, int is_sibcall)
10474 int is_sym;
10476 fnaddr = XEXP (fnaddr, 0);
10477 is_sym = GET_CODE (fnaddr) == SYMBOL_REF;
10478 if (flag_pic && is_sym)
10480 if (! SYMBOL_REF_LOCAL_P (fnaddr))
10482 rtx reg = gen_reg_rtx (Pmode);
10484 /* We must not use GOTPLT for sibcalls, because PIC_REG
10485 must be restored before the PLT code gets to run. */
10486 if (is_sibcall)
10487 emit_insn (gen_symGOT2reg (reg, fnaddr));
10488 else
10489 emit_insn (gen_symGOTPLT2reg (reg, fnaddr));
10490 fnaddr = reg;
10492 else
10494 fnaddr = gen_sym2PIC (fnaddr);
10495 PUT_MODE (fnaddr, Pmode);
10498 /* If ptabs might trap, make this visible to the rest of the compiler.
10499 We generally assume that symbols pertain to valid locations, but
10500 it is possible to generate invalid symbols with asm or linker tricks.
10501 In a list of functions where each returns its successor, an invalid
10502 symbol might denote an empty list. */
10503 if (!TARGET_PT_FIXED
10504 && (!is_sym || TARGET_INVALID_SYMBOLS)
10505 && (!REG_P (fnaddr) || ! TARGET_REGISTER_P (REGNO (fnaddr))))
10507 rtx tr = gen_reg_rtx (PDImode);
10509 emit_insn (gen_ptabs (tr, fnaddr));
10510 fnaddr = tr;
10512 else if (! target_reg_operand (fnaddr, Pmode))
10513 fnaddr = copy_to_mode_reg (Pmode, fnaddr);
10514 return fnaddr;
10517 enum sh_divide_strategy_e sh_div_strategy = SH_DIV_STRATEGY_DEFAULT;
10519 /* This defines the storage for the variable part of a -mboard= option.
10520 It is only required when using the sh-superh-elf target */
10521 #ifdef _SUPERH_H
10522 const char * boardtype = "7750p2";
10523 const char * osruntime = "bare";
10524 #endif
10526 #include "gt-sh.h"