tm.texi (TARGET_GET_PCH_VALIDITY, [...]): Tweak the documentation to make it more...
[official-gcc.git] / gcc / config / sh / sh.c
blob582fe3f52b26ead40ac959e0d2e29e7c17f57eee
1 /* Output routines for GCC for Renesas / SuperH SH.
2 Copyright (C) 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
3 2003, 2004, 2005 Free Software Foundation, Inc.
4 Contributed by Steve Chamberlain (sac@cygnus.com).
5 Improved by Jim Wilson (wilson@cygnus.com).
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 2, or (at your option)
12 any later version.
14 GCC is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING. If not, write to
21 the Free Software Foundation, 59 Temple Place - Suite 330,
22 Boston, MA 02111-1307, USA. */
24 #include "config.h"
25 #include "system.h"
26 #include "coretypes.h"
27 #include "tm.h"
28 #include "insn-config.h"
29 #include "rtl.h"
30 #include "tree.h"
31 #include "flags.h"
32 #include "expr.h"
33 #include "optabs.h"
34 #include "function.h"
35 #include "regs.h"
36 #include "hard-reg-set.h"
37 #include "output.h"
38 #include "insn-attr.h"
39 #include "toplev.h"
40 #include "recog.h"
41 #include "c-pragma.h"
42 #include "integrate.h"
43 #include "dwarf2.h"
44 #include "tm_p.h"
45 #include "target.h"
46 #include "target-def.h"
47 #include "real.h"
48 #include "langhooks.h"
49 #include "basic-block.h"
50 #include "cfglayout.h"
51 #include "intl.h"
52 #include "sched-int.h"
53 #include "ggc.h"
54 #include "tree-gimple.h"
55 #include "cfgloop.h"
58 int code_for_indirect_jump_scratch = CODE_FOR_indirect_jump_scratch;
60 #define MSW (TARGET_LITTLE_ENDIAN ? 1 : 0)
61 #define LSW (TARGET_LITTLE_ENDIAN ? 0 : 1)
63 /* These are some macros to abstract register modes. */
64 #define CONST_OK_FOR_ADD(size) \
65 (TARGET_SHMEDIA ? CONST_OK_FOR_I10 (size) : CONST_OK_FOR_I08 (size))
66 #define GEN_MOV (*(TARGET_SHMEDIA64 ? gen_movdi : gen_movsi))
67 #define GEN_ADD3 (*(TARGET_SHMEDIA64 ? gen_adddi3 : gen_addsi3))
68 #define GEN_SUB3 (*(TARGET_SHMEDIA64 ? gen_subdi3 : gen_subsi3))
70 /* Set to 1 by expand_prologue() when the function is an interrupt handler. */
71 int current_function_interrupt;
73 /* ??? The pragma interrupt support will not work for SH3. */
74 /* This is set by #pragma interrupt and #pragma trapa, and causes gcc to
75 output code for the next function appropriate for an interrupt handler. */
76 int pragma_interrupt;
78 /* This is set by the trap_exit attribute for functions. It specifies
79 a trap number to be used in a trapa instruction at function exit
80 (instead of an rte instruction). */
81 int trap_exit;
83 /* This is used by the sp_switch attribute for functions. It specifies
84 a variable holding the address of the stack the interrupt function
85 should switch to/from at entry/exit. */
86 rtx sp_switch;
88 /* This is set by #pragma trapa, and is similar to the above, except that
89 the compiler doesn't emit code to preserve all registers. */
90 static int pragma_trapa;
92 /* This is set by #pragma nosave_low_regs. This is useful on the SH3,
93 which has a separate set of low regs for User and Supervisor modes.
94 This should only be used for the lowest level of interrupts. Higher levels
95 of interrupts must save the registers in case they themselves are
96 interrupted. */
97 int pragma_nosave_low_regs;
99 /* This is used for communication between TARGET_SETUP_INCOMING_VARARGS and
100 sh_expand_prologue. */
101 int current_function_anonymous_args;
103 /* Global variables for machine-dependent things. */
105 /* Which cpu are we scheduling for. */
106 enum processor_type sh_cpu;
108 /* Definitions used in ready queue reordering for first scheduling pass. */
110 /* Reg weights arrays for modes SFmode and SImode, indexed by insn LUID. */
111 static short *regmode_weight[2];
113 /* Total SFmode and SImode weights of scheduled insns. */
114 static int curr_regmode_pressure[2];
116 /* If true, skip cycles for Q -> R movement. */
117 static int skip_cycles = 0;
119 /* Cached value of can_issue_more. This is cached in sh_variable_issue hook
120 and returned from sh_reorder2. */
121 static short cached_can_issue_more;
123 /* Saved operands from the last compare to use when we generate an scc
124 or bcc insn. */
126 rtx sh_compare_op0;
127 rtx sh_compare_op1;
129 /* Provides the class number of the smallest class containing
130 reg number. */
132 enum reg_class regno_reg_class[FIRST_PSEUDO_REGISTER] =
134 R0_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
135 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
136 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
137 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
138 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
139 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
140 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
141 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
142 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
143 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
144 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
145 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
146 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
147 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
148 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
149 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
150 FP0_REGS,FP_REGS, FP_REGS, FP_REGS,
151 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
152 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
153 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
154 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
155 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
156 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
157 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
158 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
159 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
160 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
161 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
162 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
163 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
164 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
165 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
166 TARGET_REGS, TARGET_REGS, TARGET_REGS, TARGET_REGS,
167 TARGET_REGS, TARGET_REGS, TARGET_REGS, TARGET_REGS,
168 DF_REGS, DF_REGS, DF_REGS, DF_REGS,
169 DF_REGS, DF_REGS, DF_REGS, DF_REGS,
170 NO_REGS, GENERAL_REGS, PR_REGS, T_REGS,
171 MAC_REGS, MAC_REGS, FPUL_REGS, FPSCR_REGS,
172 GENERAL_REGS,
175 char sh_register_names[FIRST_PSEUDO_REGISTER] \
176 [MAX_REGISTER_NAME_LENGTH + 1] = SH_REGISTER_NAMES_INITIALIZER;
178 char sh_additional_register_names[ADDREGNAMES_SIZE] \
179 [MAX_ADDITIONAL_REGISTER_NAME_LENGTH + 1]
180 = SH_ADDITIONAL_REGISTER_NAMES_INITIALIZER;
182 /* Provide reg_class from a letter such as appears in the machine
183 description. *: target independently reserved letter.
184 reg_class_from_letter['e' - 'a'] is set to NO_REGS for TARGET_FMOVD. */
186 enum reg_class reg_class_from_letter[] =
188 /* a */ ALL_REGS, /* b */ TARGET_REGS, /* c */ FPSCR_REGS, /* d */ DF_REGS,
189 /* e */ FP_REGS, /* f */ FP_REGS, /* g **/ NO_REGS, /* h */ NO_REGS,
190 /* i **/ NO_REGS, /* j */ NO_REGS, /* k */ SIBCALL_REGS, /* l */ PR_REGS,
191 /* m **/ NO_REGS, /* n **/ NO_REGS, /* o **/ NO_REGS, /* p **/ NO_REGS,
192 /* q */ NO_REGS, /* r **/ NO_REGS, /* s **/ NO_REGS, /* t */ T_REGS,
193 /* u */ NO_REGS, /* v */ NO_REGS, /* w */ FP0_REGS, /* x */ MAC_REGS,
194 /* y */ FPUL_REGS, /* z */ R0_REGS
197 int assembler_dialect;
199 static bool shmedia_space_reserved_for_target_registers;
201 static bool sh_handle_option (size_t, const char *, int);
202 static void split_branches (rtx);
203 static int branch_dest (rtx);
204 static void force_into (rtx, rtx);
205 static void print_slot (rtx);
206 static rtx add_constant (rtx, enum machine_mode, rtx);
207 static void dump_table (rtx, rtx);
208 static int hi_const (rtx);
209 static int broken_move (rtx);
210 static int mova_p (rtx);
211 static rtx find_barrier (int, rtx, rtx);
212 static int noncall_uses_reg (rtx, rtx, rtx *);
213 static rtx gen_block_redirect (rtx, int, int);
214 static void sh_reorg (void);
215 static void output_stack_adjust (int, rtx, int, HARD_REG_SET *);
216 static rtx frame_insn (rtx);
217 static rtx push (int);
218 static void pop (int);
219 static void push_regs (HARD_REG_SET *, int);
220 static int calc_live_regs (HARD_REG_SET *);
221 static void mark_use (rtx, rtx *);
222 static HOST_WIDE_INT rounded_frame_size (int);
223 static rtx mark_constant_pool_use (rtx);
224 const struct attribute_spec sh_attribute_table[];
225 static tree sh_handle_interrupt_handler_attribute (tree *, tree, tree, int, bool *);
226 static tree sh_handle_sp_switch_attribute (tree *, tree, tree, int, bool *);
227 static tree sh_handle_trap_exit_attribute (tree *, tree, tree, int, bool *);
228 static tree sh_handle_renesas_attribute (tree *, tree, tree, int, bool *);
229 static void sh_output_function_epilogue (FILE *, HOST_WIDE_INT);
230 static void sh_insert_attributes (tree, tree *);
231 static const char *sh_check_pch_target_flags (int);
232 static int sh_adjust_cost (rtx, rtx, rtx, int);
233 static int sh_issue_rate (void);
234 static int sh_dfa_new_cycle (FILE *, int, rtx, int, int, int *sort_p);
235 static short find_set_regmode_weight (rtx, enum machine_mode);
236 static short find_insn_regmode_weight (rtx, enum machine_mode);
237 static void find_regmode_weight (int, enum machine_mode);
238 static void sh_md_init_global (FILE *, int, int);
239 static void sh_md_finish_global (FILE *, int);
240 static int rank_for_reorder (const void *, const void *);
241 static void swap_reorder (rtx *, int);
242 static void ready_reorder (rtx *, int);
243 static short high_pressure (enum machine_mode);
244 static int sh_reorder (FILE *, int, rtx *, int *, int);
245 static int sh_reorder2 (FILE *, int, rtx *, int *, int);
246 static void sh_md_init (FILE *, int, int);
247 static int sh_variable_issue (FILE *, int, rtx, int);
249 static bool sh_function_ok_for_sibcall (tree, tree);
251 static bool sh_cannot_modify_jumps_p (void);
252 static int sh_target_reg_class (void);
253 static bool sh_optimize_target_register_callee_saved (bool);
254 static bool sh_ms_bitfield_layout_p (tree);
256 static void sh_init_builtins (void);
257 static void sh_media_init_builtins (void);
258 static rtx sh_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
259 static void sh_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT, tree);
260 static void sh_file_start (void);
261 static int flow_dependent_p (rtx, rtx);
262 static void flow_dependent_p_1 (rtx, rtx, void *);
263 static int shiftcosts (rtx);
264 static int andcosts (rtx);
265 static int addsubcosts (rtx);
266 static int multcosts (rtx);
267 static bool unspec_caller_rtx_p (rtx);
268 static bool sh_cannot_copy_insn_p (rtx);
269 static bool sh_rtx_costs (rtx, int, int, int *);
270 static int sh_address_cost (rtx);
271 #ifdef TARGET_ADJUST_UNROLL_MAX
272 static int sh_adjust_unroll_max (struct loop *, int, int, int, int);
273 #endif
274 static int shmedia_target_regs_stack_space (HARD_REG_SET *);
275 static int shmedia_reserve_space_for_target_registers_p (int, HARD_REG_SET *);
276 static int shmedia_target_regs_stack_adjust (HARD_REG_SET *);
277 static int scavenge_reg (HARD_REG_SET *s);
278 struct save_schedule_s;
279 static struct save_entry_s *sh5_schedule_saves (HARD_REG_SET *,
280 struct save_schedule_s *, int);
282 static rtx sh_struct_value_rtx (tree, int);
283 static bool sh_return_in_memory (tree, tree);
284 static rtx sh_builtin_saveregs (void);
285 static void sh_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode, tree, int *, int);
286 static bool sh_strict_argument_naming (CUMULATIVE_ARGS *);
287 static bool sh_pretend_outgoing_varargs_named (CUMULATIVE_ARGS *);
288 static tree sh_build_builtin_va_list (void);
289 static tree sh_gimplify_va_arg_expr (tree, tree, tree *, tree *);
290 static bool sh_pass_by_reference (CUMULATIVE_ARGS *, enum machine_mode,
291 tree, bool);
292 static bool sh_callee_copies (CUMULATIVE_ARGS *, enum machine_mode,
293 tree, bool);
294 static int sh_arg_partial_bytes (CUMULATIVE_ARGS *, enum machine_mode,
295 tree, bool);
296 static int sh_dwarf_calling_convention (tree);
297 static int hard_regs_intersect_p (HARD_REG_SET *, HARD_REG_SET *);
300 /* Initialize the GCC target structure. */
301 #undef TARGET_ATTRIBUTE_TABLE
302 #define TARGET_ATTRIBUTE_TABLE sh_attribute_table
304 /* The next two are used for debug info when compiling with -gdwarf. */
305 #undef TARGET_ASM_UNALIGNED_HI_OP
306 #define TARGET_ASM_UNALIGNED_HI_OP "\t.uaword\t"
307 #undef TARGET_ASM_UNALIGNED_SI_OP
308 #define TARGET_ASM_UNALIGNED_SI_OP "\t.ualong\t"
310 /* These are NULLed out on non-SH5 in OVERRIDE_OPTIONS. */
311 #undef TARGET_ASM_UNALIGNED_DI_OP
312 #define TARGET_ASM_UNALIGNED_DI_OP "\t.uaquad\t"
313 #undef TARGET_ASM_ALIGNED_DI_OP
314 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
316 #undef TARGET_ASM_FUNCTION_EPILOGUE
317 #define TARGET_ASM_FUNCTION_EPILOGUE sh_output_function_epilogue
319 #undef TARGET_ASM_OUTPUT_MI_THUNK
320 #define TARGET_ASM_OUTPUT_MI_THUNK sh_output_mi_thunk
322 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
323 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_tree_hwi_hwi_tree_true
325 #undef TARGET_ASM_FILE_START
326 #define TARGET_ASM_FILE_START sh_file_start
327 #undef TARGET_ASM_FILE_START_FILE_DIRECTIVE
328 #define TARGET_ASM_FILE_START_FILE_DIRECTIVE true
330 #undef TARGET_DEFAULT_TARGET_FLAGS
331 #define TARGET_DEFAULT_TARGET_FLAGS TARGET_DEFAULT
332 #undef TARGET_HANDLE_OPTION
333 #define TARGET_HANDLE_OPTION sh_handle_option
335 #undef TARGET_INSERT_ATTRIBUTES
336 #define TARGET_INSERT_ATTRIBUTES sh_insert_attributes
338 #undef TARGET_SCHED_ADJUST_COST
339 #define TARGET_SCHED_ADJUST_COST sh_adjust_cost
341 #undef TARGET_SCHED_ISSUE_RATE
342 #define TARGET_SCHED_ISSUE_RATE sh_issue_rate
344 /* The next 5 hooks have been implemented for reenabling sched1. With the
345 help of these macros we are limiting the movement of insns in sched1 to
346 reduce the register pressure. The overall idea is to keep count of SImode
347 and SFmode regs required by already scheduled insns. When these counts
348 cross some threshold values; give priority to insns that free registers.
349 The insn that frees registers is most likely to be the insn with lowest
350 LUID (original insn order); but such an insn might be there in the stalled
351 queue (Q) instead of the ready queue (R). To solve this, we skip cycles
352 upto a max of 8 cycles so that such insns may move from Q -> R.
354 The description of the hooks are as below:
356 TARGET_SCHED_INIT_GLOBAL: Added a new target hook in the generic
357 scheduler; it is called inside the sched_init function just after
358 find_insn_reg_weights function call. It is used to calculate the SImode
359 and SFmode weights of insns of basic blocks; much similar to what
360 find_insn_reg_weights does.
361 TARGET_SCHED_FINISH_GLOBAL: Corresponding cleanup hook.
363 TARGET_SCHED_DFA_NEW_CYCLE: Skip cycles if high register pressure is
364 indicated by TARGET_SCHED_REORDER2; doing this may move insns from
365 (Q)->(R).
367 TARGET_SCHED_REORDER: If the register pressure for SImode or SFmode is
368 high; reorder the ready queue so that the insn with lowest LUID will be
369 issued next.
371 TARGET_SCHED_REORDER2: If the register pressure is high, indicate to
372 TARGET_SCHED_DFA_NEW_CYCLE to skip cycles.
374 TARGET_SCHED_VARIABLE_ISSUE: Cache the value of can_issue_more so that it
375 can be returned from TARGET_SCHED_REORDER2.
377 TARGET_SCHED_INIT: Reset the register pressure counting variables. */
379 #undef TARGET_SCHED_DFA_NEW_CYCLE
380 #define TARGET_SCHED_DFA_NEW_CYCLE sh_dfa_new_cycle
382 #undef TARGET_SCHED_INIT_GLOBAL
383 #define TARGET_SCHED_INIT_GLOBAL sh_md_init_global
385 #undef TARGET_SCHED_FINISH_GLOBAL
386 #define TARGET_SCHED_FINISH_GLOBAL sh_md_finish_global
388 #undef TARGET_SCHED_VARIABLE_ISSUE
389 #define TARGET_SCHED_VARIABLE_ISSUE sh_variable_issue
391 #undef TARGET_SCHED_REORDER
392 #define TARGET_SCHED_REORDER sh_reorder
394 #undef TARGET_SCHED_REORDER2
395 #define TARGET_SCHED_REORDER2 sh_reorder2
397 #undef TARGET_SCHED_INIT
398 #define TARGET_SCHED_INIT sh_md_init
400 #undef TARGET_CANNOT_MODIFY_JUMPS_P
401 #define TARGET_CANNOT_MODIFY_JUMPS_P sh_cannot_modify_jumps_p
402 #undef TARGET_BRANCH_TARGET_REGISTER_CLASS
403 #define TARGET_BRANCH_TARGET_REGISTER_CLASS sh_target_reg_class
404 #undef TARGET_BRANCH_TARGET_REGISTER_CALLEE_SAVED
405 #define TARGET_BRANCH_TARGET_REGISTER_CALLEE_SAVED \
406 sh_optimize_target_register_callee_saved
408 #undef TARGET_MS_BITFIELD_LAYOUT_P
409 #define TARGET_MS_BITFIELD_LAYOUT_P sh_ms_bitfield_layout_p
411 #undef TARGET_INIT_BUILTINS
412 #define TARGET_INIT_BUILTINS sh_init_builtins
413 #undef TARGET_EXPAND_BUILTIN
414 #define TARGET_EXPAND_BUILTIN sh_expand_builtin
416 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
417 #define TARGET_FUNCTION_OK_FOR_SIBCALL sh_function_ok_for_sibcall
419 #undef TARGET_CANNOT_COPY_INSN_P
420 #define TARGET_CANNOT_COPY_INSN_P sh_cannot_copy_insn_p
421 #undef TARGET_RTX_COSTS
422 #define TARGET_RTX_COSTS sh_rtx_costs
423 #undef TARGET_ADDRESS_COST
424 #define TARGET_ADDRESS_COST sh_address_cost
426 #undef TARGET_MACHINE_DEPENDENT_REORG
427 #define TARGET_MACHINE_DEPENDENT_REORG sh_reorg
429 #ifdef HAVE_AS_TLS
430 #undef TARGET_HAVE_TLS
431 #define TARGET_HAVE_TLS true
432 #endif
434 #undef TARGET_PROMOTE_PROTOTYPES
435 #define TARGET_PROMOTE_PROTOTYPES sh_promote_prototypes
436 #undef TARGET_PROMOTE_FUNCTION_ARGS
437 #define TARGET_PROMOTE_FUNCTION_ARGS sh_promote_prototypes
438 #undef TARGET_PROMOTE_FUNCTION_RETURN
439 #define TARGET_PROMOTE_FUNCTION_RETURN sh_promote_prototypes
441 #undef TARGET_STRUCT_VALUE_RTX
442 #define TARGET_STRUCT_VALUE_RTX sh_struct_value_rtx
443 #undef TARGET_RETURN_IN_MEMORY
444 #define TARGET_RETURN_IN_MEMORY sh_return_in_memory
446 #undef TARGET_EXPAND_BUILTIN_SAVEREGS
447 #define TARGET_EXPAND_BUILTIN_SAVEREGS sh_builtin_saveregs
448 #undef TARGET_SETUP_INCOMING_VARARGS
449 #define TARGET_SETUP_INCOMING_VARARGS sh_setup_incoming_varargs
450 #undef TARGET_STRICT_ARGUMENT_NAMING
451 #define TARGET_STRICT_ARGUMENT_NAMING sh_strict_argument_naming
452 #undef TARGET_PRETEND_OUTGOING_VARARGS_NAMED
453 #define TARGET_PRETEND_OUTGOING_VARARGS_NAMED sh_pretend_outgoing_varargs_named
454 #undef TARGET_MUST_PASS_IN_STACK
455 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
456 #undef TARGET_PASS_BY_REFERENCE
457 #define TARGET_PASS_BY_REFERENCE sh_pass_by_reference
458 #undef TARGET_CALLEE_COPIES
459 #define TARGET_CALLEE_COPIES sh_callee_copies
460 #undef TARGET_ARG_PARTIAL_BYTES
461 #define TARGET_ARG_PARTIAL_BYTES sh_arg_partial_bytes
463 #undef TARGET_BUILD_BUILTIN_VA_LIST
464 #define TARGET_BUILD_BUILTIN_VA_LIST sh_build_builtin_va_list
465 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
466 #define TARGET_GIMPLIFY_VA_ARG_EXPR sh_gimplify_va_arg_expr
468 #undef TARGET_VECTOR_MODE_SUPPORTED_P
469 #define TARGET_VECTOR_MODE_SUPPORTED_P sh_vector_mode_supported_p
471 #undef TARGET_CHECK_PCH_TARGET_FLAGS
472 #define TARGET_CHECK_PCH_TARGET_FLAGS sh_check_pch_target_flags
474 #undef TARGET_DWARF_CALLING_CONVENTION
475 #define TARGET_DWARF_CALLING_CONVENTION sh_dwarf_calling_convention
477 /* Return regmode weight for insn. */
478 #define INSN_REGMODE_WEIGHT(INSN, MODE) regmode_weight[((MODE) == SImode) ? 0 : 1][INSN_UID (INSN)]
480 /* Return current register pressure for regmode. */
481 #define CURR_REGMODE_PRESSURE(MODE) curr_regmode_pressure[((MODE) == SImode) ? 0 : 1]
483 #ifdef SYMBIAN
485 #undef TARGET_ENCODE_SECTION_INFO
486 #define TARGET_ENCODE_SECTION_INFO sh_symbian_encode_section_info
487 #undef TARGET_STRIP_NAME_ENCODING
488 #define TARGET_STRIP_NAME_ENCODING sh_symbian_strip_name_encoding
489 #undef TARGET_CXX_IMPORT_EXPORT_CLASS
490 #define TARGET_CXX_IMPORT_EXPORT_CLASS symbian_import_export_class
492 #endif /* SYMBIAN */
494 #ifdef TARGET_ADJUST_UNROLL_MAX
495 #undef TARGET_ADJUST_UNROLL_MAX
496 #define TARGET_ADJUST_UNROLL_MAX sh_adjust_unroll_max
497 #endif
499 struct gcc_target targetm = TARGET_INITIALIZER;
501 /* Implement TARGET_HANDLE_OPTION. */
503 static bool
504 sh_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED,
505 int value ATTRIBUTE_UNUSED)
507 switch (code)
509 case OPT_m1:
510 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH1;
511 return true;
513 case OPT_m2:
514 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2;
515 return true;
517 case OPT_m2a:
518 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2A;
519 return true;
521 case OPT_m2a_nofpu:
522 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2A_NOFPU;
523 return true;
525 case OPT_m2a_single:
526 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2A_SINGLE;
527 return true;
529 case OPT_m2a_single_only:
530 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2A_SINGLE_ONLY;
531 return true;
533 case OPT_m2e:
534 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2E;
535 return true;
537 case OPT_m3:
538 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH3;
539 return true;
541 case OPT_m3e:
542 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH3E;
543 return true;
545 case OPT_m4:
546 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4;
547 return true;
549 case OPT_m4_nofpu:
550 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4_NOFPU;
551 return true;
553 case OPT_m4_single:
554 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4_SINGLE;
555 return true;
557 case OPT_m4_single_only:
558 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4_SINGLE_ONLY;
559 return true;
561 case OPT_m4a:
562 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4A;
563 return true;
565 case OPT_m4a_nofpu:
566 case OPT_m4al:
567 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4A_NOFPU;
568 return true;
570 case OPT_m4a_single:
571 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4A_SINGLE;
572 return true;
574 case OPT_m4a_single_only:
575 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4A_SINGLE_ONLY;
576 return true;
578 case OPT_m5_32media:
579 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_32MEDIA;
580 return true;
582 case OPT_m5_32media_nofpu:
583 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_32MEDIA_NOFPU;
584 return true;
586 case OPT_m5_64media:
587 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_64MEDIA;
588 return true;
590 case OPT_m5_64media_nofpu:
591 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_64MEDIA_NOFPU;
592 return true;
594 case OPT_m5_compact:
595 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_COMPACT;
596 return true;
598 case OPT_m5_compact_nofpu:
599 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_COMPACT_NOFPU;
600 return true;
602 default:
603 return true;
607 /* Print the operand address in x to the stream. */
609 void
610 print_operand_address (FILE *stream, rtx x)
612 switch (GET_CODE (x))
614 case REG:
615 case SUBREG:
616 fprintf (stream, "@%s", reg_names[true_regnum (x)]);
617 break;
619 case PLUS:
621 rtx base = XEXP (x, 0);
622 rtx index = XEXP (x, 1);
624 switch (GET_CODE (index))
626 case CONST_INT:
627 fprintf (stream, "@(%d,%s)", (int) INTVAL (index),
628 reg_names[true_regnum (base)]);
629 break;
631 case REG:
632 case SUBREG:
634 int base_num = true_regnum (base);
635 int index_num = true_regnum (index);
637 fprintf (stream, "@(r0,%s)",
638 reg_names[MAX (base_num, index_num)]);
639 break;
642 default:
643 gcc_unreachable ();
646 break;
648 case PRE_DEC:
649 fprintf (stream, "@-%s", reg_names[true_regnum (XEXP (x, 0))]);
650 break;
652 case POST_INC:
653 fprintf (stream, "@%s+", reg_names[true_regnum (XEXP (x, 0))]);
654 break;
656 default:
657 x = mark_constant_pool_use (x);
658 output_addr_const (stream, x);
659 break;
663 /* Print operand x (an rtx) in assembler syntax to file stream
664 according to modifier code.
666 '.' print a .s if insn needs delay slot
667 ',' print LOCAL_LABEL_PREFIX
668 '@' print trap, rte or rts depending upon pragma interruptness
669 '#' output a nop if there is nothing to put in the delay slot
670 ''' print likelihood suffix (/u for unlikely).
671 '>' print branch target if -fverbose-asm
672 'O' print a constant without the #
673 'R' print the LSW of a dp value - changes if in little endian
674 'S' print the MSW of a dp value - changes if in little endian
675 'T' print the next word of a dp value - same as 'R' in big endian mode.
676 'M' print an `x' if `m' will print `base,index'.
677 'N' print 'r63' if the operand is (const_int 0).
678 'd' print a V2SF reg as dN instead of fpN.
679 'm' print a pair `base,offset' or `base,index', for LD and ST.
680 'U' Likewise for {LD,ST}{HI,LO}.
681 'u' prints the lowest 16 bits of CONST_INT, as an unsigned value.
682 'o' output an operator. */
684 void
685 print_operand (FILE *stream, rtx x, int code)
687 int regno;
688 enum machine_mode mode;
690 switch (code)
692 case '.':
693 if (final_sequence
694 && ! INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))
695 && get_attr_length (XVECEXP (final_sequence, 0, 1)))
696 fprintf (stream, ASSEMBLER_DIALECT ? "/s" : ".s");
697 break;
698 case ',':
699 fprintf (stream, "%s", LOCAL_LABEL_PREFIX);
700 break;
701 case '@':
702 if (trap_exit)
703 fprintf (stream, "trapa #%d", trap_exit);
704 else if (sh_cfun_interrupt_handler_p ())
705 fprintf (stream, "rte");
706 else
707 fprintf (stream, "rts");
708 break;
709 case '#':
710 /* Output a nop if there's nothing in the delay slot. */
711 if (dbr_sequence_length () == 0)
712 fprintf (stream, "\n\tnop");
713 break;
714 case '\'':
716 rtx note = find_reg_note (current_output_insn, REG_BR_PROB, 0);
718 if (note && INTVAL (XEXP (note, 0)) * 2 < REG_BR_PROB_BASE)
719 fputs ("/u", stream);
720 break;
722 case '>':
723 if (flag_verbose_asm && JUMP_LABEL (current_output_insn))
725 fputs ("\t! target: ", stream);
726 output_addr_const (stream, JUMP_LABEL (current_output_insn));
728 break;
729 case 'O':
730 x = mark_constant_pool_use (x);
731 output_addr_const (stream, x);
732 break;
733 case 'R':
734 fputs (reg_names[REGNO (x) + LSW], (stream));
735 break;
736 case 'S':
737 fputs (reg_names[REGNO (x) + MSW], (stream));
738 break;
739 case 'T':
740 /* Next word of a double. */
741 switch (GET_CODE (x))
743 case REG:
744 fputs (reg_names[REGNO (x) + 1], (stream));
745 break;
746 case MEM:
747 if (GET_CODE (XEXP (x, 0)) != PRE_DEC
748 && GET_CODE (XEXP (x, 0)) != POST_INC)
749 x = adjust_address (x, SImode, 4);
750 print_operand_address (stream, XEXP (x, 0));
751 break;
752 default:
753 break;
755 break;
756 case 'o':
757 switch (GET_CODE (x))
759 case PLUS: fputs ("add", stream); break;
760 case MINUS: fputs ("sub", stream); break;
761 case MULT: fputs ("mul", stream); break;
762 case DIV: fputs ("div", stream); break;
763 case EQ: fputs ("eq", stream); break;
764 case NE: fputs ("ne", stream); break;
765 case GT: case LT: fputs ("gt", stream); break;
766 case GE: case LE: fputs ("ge", stream); break;
767 case GTU: case LTU: fputs ("gtu", stream); break;
768 case GEU: case LEU: fputs ("geu", stream); break;
769 default:
770 break;
772 break;
773 case 'M':
774 if (GET_CODE (x) == MEM
775 && GET_CODE (XEXP (x, 0)) == PLUS
776 && (GET_CODE (XEXP (XEXP (x, 0), 1)) == REG
777 || GET_CODE (XEXP (XEXP (x, 0), 1)) == SUBREG))
778 fputc ('x', stream);
779 break;
781 case 'm':
782 gcc_assert (GET_CODE (x) == MEM);
783 x = XEXP (x, 0);
784 /* Fall through. */
785 case 'U':
786 switch (GET_CODE (x))
788 case REG:
789 case SUBREG:
790 print_operand (stream, x, 0);
791 fputs (", 0", stream);
792 break;
794 case PLUS:
795 print_operand (stream, XEXP (x, 0), 0);
796 fputs (", ", stream);
797 print_operand (stream, XEXP (x, 1), 0);
798 break;
800 default:
801 gcc_unreachable ();
803 break;
805 case 'd':
806 gcc_assert (GET_CODE (x) == REG && GET_MODE (x) == V2SFmode);
808 fprintf ((stream), "d%s", reg_names[REGNO (x)] + 1);
809 break;
811 case 'N':
812 if (x == CONST0_RTX (GET_MODE (x)))
814 fprintf ((stream), "r63");
815 break;
817 goto default_output;
818 case 'u':
819 if (GET_CODE (x) == CONST_INT)
821 fprintf ((stream), "%u", (unsigned) INTVAL (x) & (0x10000 - 1));
822 break;
824 /* Fall through. */
826 default_output:
827 default:
828 regno = 0;
829 mode = GET_MODE (x);
831 switch (GET_CODE (x))
833 case TRUNCATE:
835 rtx inner = XEXP (x, 0);
836 int offset = 0;
837 enum machine_mode inner_mode;
839 /* We might see SUBREGs with vector mode registers inside. */
840 if (GET_CODE (inner) == SUBREG
841 && (GET_MODE_SIZE (GET_MODE (inner))
842 == GET_MODE_SIZE (GET_MODE (SUBREG_REG (inner))))
843 && subreg_lowpart_p (inner))
844 inner = SUBREG_REG (inner);
845 if (GET_CODE (inner) == CONST_INT)
847 x = GEN_INT (trunc_int_for_mode (INTVAL (inner), GET_MODE (x)));
848 goto default_output;
850 inner_mode = GET_MODE (inner);
851 if (GET_CODE (inner) == SUBREG
852 && (GET_MODE_SIZE (GET_MODE (inner))
853 < GET_MODE_SIZE (GET_MODE (SUBREG_REG (inner))))
854 && GET_CODE (SUBREG_REG (inner)) == REG)
856 offset = subreg_regno_offset (REGNO (SUBREG_REG (inner)),
857 GET_MODE (SUBREG_REG (inner)),
858 SUBREG_BYTE (inner),
859 GET_MODE (inner));
860 inner = SUBREG_REG (inner);
862 if (GET_CODE (inner) != REG || GET_MODE_SIZE (inner_mode) > 8)
863 abort ();
864 /* Floating point register pairs are always big endian;
865 general purpose registers are 64 bit wide. */
866 regno = REGNO (inner);
867 regno = (HARD_REGNO_NREGS (regno, inner_mode)
868 - HARD_REGNO_NREGS (regno, mode))
869 + offset;
870 x = inner;
871 goto reg;
873 case SIGN_EXTEND:
874 x = XEXP (x, 0);
875 goto reg;
876 /* FIXME: We need this on SHmedia32 because reload generates
877 some sign-extended HI or QI loads into DImode registers
878 but, because Pmode is SImode, the address ends up with a
879 subreg:SI of the DImode register. Maybe reload should be
880 fixed so as to apply alter_subreg to such loads? */
881 case IF_THEN_ELSE:
882 gcc_assert (trapping_target_operand (x, VOIDmode));
883 x = XEXP (XEXP (x, 2), 0);
884 goto default_output;
885 case SUBREG:
886 gcc_assert (SUBREG_BYTE (x) == 0
887 && GET_CODE (SUBREG_REG (x)) == REG);
889 x = SUBREG_REG (x);
890 /* Fall through. */
892 reg:
893 case REG:
894 regno += REGNO (x);
895 if (FP_REGISTER_P (regno)
896 && mode == V16SFmode)
897 fprintf ((stream), "mtrx%s", reg_names[regno] + 2);
898 else if (FP_REGISTER_P (REGNO (x))
899 && mode == V4SFmode)
900 fprintf ((stream), "fv%s", reg_names[regno] + 2);
901 else if (GET_CODE (x) == REG
902 && mode == V2SFmode)
903 fprintf ((stream), "fp%s", reg_names[regno] + 2);
904 else if (FP_REGISTER_P (REGNO (x))
905 && GET_MODE_SIZE (mode) > 4)
906 fprintf ((stream), "d%s", reg_names[regno] + 1);
907 else
908 fputs (reg_names[regno], (stream));
909 break;
911 case MEM:
912 output_address (XEXP (x, 0));
913 break;
915 case CONST:
916 if (TARGET_SHMEDIA
917 && GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
918 && (GET_MODE (XEXP (x, 0)) == DImode
919 || GET_MODE (XEXP (x, 0)) == SImode)
920 && GET_CODE (XEXP (XEXP (x, 0), 0)) == TRUNCATE
921 && GET_MODE (XEXP (XEXP (x, 0), 0)) == HImode)
923 rtx val = XEXP (XEXP (XEXP (x, 0), 0), 0);
925 fputc ('(', stream);
926 if (GET_CODE (val) == ASHIFTRT)
928 fputc ('(', stream);
929 if (GET_CODE (XEXP (val, 0)) == CONST)
930 fputc ('(', stream);
931 output_addr_const (stream, XEXP (val, 0));
932 if (GET_CODE (XEXP (val, 0)) == CONST)
933 fputc (')', stream);
934 fputs (" >> ", stream);
935 output_addr_const (stream, XEXP (val, 1));
936 fputc (')', stream);
938 else
940 if (GET_CODE (val) == CONST)
941 fputc ('(', stream);
942 output_addr_const (stream, val);
943 if (GET_CODE (val) == CONST)
944 fputc (')', stream);
946 fputs (" & 65535)", stream);
947 break;
950 /* Fall through. */
951 default:
952 if (TARGET_SH1)
953 fputc ('#', stream);
954 output_addr_const (stream, x);
955 break;
957 break;
961 /* Like force_operand, but guarantees that VALUE ends up in TARGET. */
962 static void
963 force_into (rtx value, rtx target)
965 value = force_operand (value, target);
966 if (! rtx_equal_p (value, target))
967 emit_insn (gen_move_insn (target, value));
970 /* Emit code to perform a block move. Choose the best method.
972 OPERANDS[0] is the destination.
973 OPERANDS[1] is the source.
974 OPERANDS[2] is the size.
975 OPERANDS[3] is the alignment safe to use. */
978 expand_block_move (rtx *operands)
980 int align = INTVAL (operands[3]);
981 int constp = (GET_CODE (operands[2]) == CONST_INT);
982 int bytes = (constp ? INTVAL (operands[2]) : 0);
984 if (! constp)
985 return 0;
987 /* If we could use mov.l to move words and dest is word-aligned, we
988 can use movua.l for loads and still generate a relatively short
989 and efficient sequence. */
990 if (TARGET_SH4A_ARCH && align < 4
991 && MEM_ALIGN (operands[0]) >= 32
992 && can_move_by_pieces (bytes, 32))
994 rtx dest = copy_rtx (operands[0]);
995 rtx src = copy_rtx (operands[1]);
996 /* We could use different pseudos for each copied word, but
997 since movua can only load into r0, it's kind of
998 pointless. */
999 rtx temp = gen_reg_rtx (SImode);
1000 rtx src_addr = copy_addr_to_reg (XEXP (src, 0));
1001 int copied = 0;
1003 while (copied + 4 <= bytes)
1005 rtx to = adjust_address (dest, SImode, copied);
1006 rtx from = adjust_automodify_address (src, SImode, src_addr, copied);
1008 emit_insn (gen_movua (temp, from));
1009 emit_move_insn (src_addr, plus_constant (src_addr, 4));
1010 emit_move_insn (to, temp);
1011 copied += 4;
1014 if (copied < bytes)
1015 move_by_pieces (adjust_address (dest, BLKmode, copied),
1016 adjust_automodify_address (src, BLKmode,
1017 src_addr, copied),
1018 bytes - copied, align, 0);
1020 return 1;
1023 /* If it isn't a constant number of bytes, or if it doesn't have 4 byte
1024 alignment, or if it isn't a multiple of 4 bytes, then fail. */
1025 if (align < 4 || (bytes % 4 != 0))
1026 return 0;
1028 if (TARGET_HARD_SH4)
1030 if (bytes < 12)
1031 return 0;
1032 else if (bytes == 12)
1034 rtx func_addr_rtx = gen_reg_rtx (Pmode);
1035 rtx r4 = gen_rtx_REG (SImode, 4);
1036 rtx r5 = gen_rtx_REG (SImode, 5);
1038 function_symbol (func_addr_rtx, "__movmemSI12_i4", SFUNC_STATIC);
1039 force_into (XEXP (operands[0], 0), r4);
1040 force_into (XEXP (operands[1], 0), r5);
1041 emit_insn (gen_block_move_real_i4 (func_addr_rtx));
1042 return 1;
1044 else if (! TARGET_SMALLCODE)
1046 const char *entry_name;
1047 rtx func_addr_rtx = gen_reg_rtx (Pmode);
1048 int dwords;
1049 rtx r4 = gen_rtx_REG (SImode, 4);
1050 rtx r5 = gen_rtx_REG (SImode, 5);
1051 rtx r6 = gen_rtx_REG (SImode, 6);
1053 entry_name = (bytes & 4 ? "__movmem_i4_odd" : "__movmem_i4_even");
1054 function_symbol (func_addr_rtx, entry_name, SFUNC_STATIC);
1055 force_into (XEXP (operands[0], 0), r4);
1056 force_into (XEXP (operands[1], 0), r5);
1058 dwords = bytes >> 3;
1059 emit_insn (gen_move_insn (r6, GEN_INT (dwords - 1)));
1060 emit_insn (gen_block_lump_real_i4 (func_addr_rtx));
1061 return 1;
1063 else
1064 return 0;
1066 if (bytes < 64)
1068 char entry[30];
1069 rtx func_addr_rtx = gen_reg_rtx (Pmode);
1070 rtx r4 = gen_rtx_REG (SImode, 4);
1071 rtx r5 = gen_rtx_REG (SImode, 5);
1073 sprintf (entry, "__movmemSI%d", bytes);
1074 function_symbol (func_addr_rtx, entry, SFUNC_STATIC);
1075 force_into (XEXP (operands[0], 0), r4);
1076 force_into (XEXP (operands[1], 0), r5);
1077 emit_insn (gen_block_move_real (func_addr_rtx));
1078 return 1;
1081 /* This is the same number of bytes as a memcpy call, but to a different
1082 less common function name, so this will occasionally use more space. */
1083 if (! TARGET_SMALLCODE)
1085 rtx func_addr_rtx = gen_reg_rtx (Pmode);
1086 int final_switch, while_loop;
1087 rtx r4 = gen_rtx_REG (SImode, 4);
1088 rtx r5 = gen_rtx_REG (SImode, 5);
1089 rtx r6 = gen_rtx_REG (SImode, 6);
1091 function_symbol (func_addr_rtx, "__movmem", SFUNC_STATIC);
1092 force_into (XEXP (operands[0], 0), r4);
1093 force_into (XEXP (operands[1], 0), r5);
1095 /* r6 controls the size of the move. 16 is decremented from it
1096 for each 64 bytes moved. Then the negative bit left over is used
1097 as an index into a list of move instructions. e.g., a 72 byte move
1098 would be set up with size(r6) = 14, for one iteration through the
1099 big while loop, and a switch of -2 for the last part. */
1101 final_switch = 16 - ((bytes / 4) % 16);
1102 while_loop = ((bytes / 4) / 16 - 1) * 16;
1103 emit_insn (gen_move_insn (r6, GEN_INT (while_loop + final_switch)));
1104 emit_insn (gen_block_lump_real (func_addr_rtx));
1105 return 1;
1108 return 0;
1111 /* Prepare operands for a move define_expand; specifically, one of the
1112 operands must be in a register. */
1115 prepare_move_operands (rtx operands[], enum machine_mode mode)
1117 if ((mode == SImode || mode == DImode)
1118 && flag_pic
1119 && ! ((mode == Pmode || mode == ptr_mode)
1120 && tls_symbolic_operand (operands[1], Pmode) != 0))
1122 rtx temp;
1123 if (SYMBOLIC_CONST_P (operands[1]))
1125 if (GET_CODE (operands[0]) == MEM)
1126 operands[1] = force_reg (Pmode, operands[1]);
1127 else if (TARGET_SHMEDIA
1128 && GET_CODE (operands[1]) == LABEL_REF
1129 && target_reg_operand (operands[0], mode))
1130 /* It's ok. */;
1131 else
1133 temp = no_new_pseudos ? operands[0] : gen_reg_rtx (Pmode);
1134 operands[1] = legitimize_pic_address (operands[1], mode, temp);
1137 else if (GET_CODE (operands[1]) == CONST
1138 && GET_CODE (XEXP (operands[1], 0)) == PLUS
1139 && SYMBOLIC_CONST_P (XEXP (XEXP (operands[1], 0), 0)))
1141 temp = no_new_pseudos ? operands[0] : gen_reg_rtx (Pmode);
1142 temp = legitimize_pic_address (XEXP (XEXP (operands[1], 0), 0),
1143 mode, temp);
1144 operands[1] = expand_binop (mode, add_optab, temp,
1145 XEXP (XEXP (operands[1], 0), 1),
1146 no_new_pseudos ? temp
1147 : gen_reg_rtx (Pmode),
1148 0, OPTAB_LIB_WIDEN);
1152 if (! reload_in_progress && ! reload_completed)
1154 /* Copy the source to a register if both operands aren't registers. */
1155 if (! register_operand (operands[0], mode)
1156 && ! sh_register_operand (operands[1], mode))
1157 operands[1] = copy_to_mode_reg (mode, operands[1]);
1159 if (GET_CODE (operands[0]) == MEM && ! memory_operand (operands[0], mode))
1161 /* This is like change_address_1 (operands[0], mode, 0, 1) ,
1162 except that we can't use that function because it is static. */
1163 rtx new = change_address (operands[0], mode, 0);
1164 MEM_COPY_ATTRIBUTES (new, operands[0]);
1165 operands[0] = new;
1168 /* This case can happen while generating code to move the result
1169 of a library call to the target. Reject `st r0,@(rX,rY)' because
1170 reload will fail to find a spill register for rX, since r0 is already
1171 being used for the source. */
1172 else if (TARGET_SH1
1173 && refers_to_regno_p (R0_REG, R0_REG + 1, operands[1], (rtx *)0)
1174 && GET_CODE (operands[0]) == MEM
1175 && GET_CODE (XEXP (operands[0], 0)) == PLUS
1176 && GET_CODE (XEXP (XEXP (operands[0], 0), 1)) == REG)
1177 operands[1] = copy_to_mode_reg (mode, operands[1]);
1180 if (mode == Pmode || mode == ptr_mode)
1182 rtx op0, op1;
1183 enum tls_model tls_kind;
1185 op0 = operands[0];
1186 op1 = operands[1];
1187 if ((tls_kind = tls_symbolic_operand (op1, Pmode)))
1189 rtx tga_op1, tga_ret, tmp, tmp2;
1191 switch (tls_kind)
1193 case TLS_MODEL_GLOBAL_DYNAMIC:
1194 tga_ret = gen_rtx_REG (Pmode, R0_REG);
1195 emit_call_insn (gen_tls_global_dynamic (tga_ret, op1));
1196 op1 = tga_ret;
1197 break;
1199 case TLS_MODEL_LOCAL_DYNAMIC:
1200 tga_ret = gen_rtx_REG (Pmode, R0_REG);
1201 emit_call_insn (gen_tls_local_dynamic (tga_ret, op1));
1203 tmp = gen_reg_rtx (Pmode);
1204 emit_move_insn (tmp, tga_ret);
1206 if (register_operand (op0, Pmode))
1207 tmp2 = op0;
1208 else
1209 tmp2 = gen_reg_rtx (Pmode);
1211 emit_insn (gen_symDTPOFF2reg (tmp2, op1, tmp));
1212 op1 = tmp2;
1213 break;
1215 case TLS_MODEL_INITIAL_EXEC:
1216 if (! flag_pic)
1218 /* Don't schedule insns for getting GOT address when
1219 the first scheduling is enabled, to avoid spill
1220 failures for R0. */
1221 if (flag_schedule_insns)
1222 emit_insn (gen_blockage ());
1223 emit_insn (gen_GOTaddr2picreg ());
1224 emit_insn (gen_rtx_USE (VOIDmode, gen_rtx_REG (SImode,
1225 PIC_REG)));
1226 if (flag_schedule_insns)
1227 emit_insn (gen_blockage ());
1229 tga_op1 = no_new_pseudos ? op0 : gen_reg_rtx (Pmode);
1230 tmp = gen_sym2GOTTPOFF (op1);
1231 emit_insn (gen_tls_initial_exec (tga_op1, tmp));
1232 op1 = tga_op1;
1233 break;
1235 case TLS_MODEL_LOCAL_EXEC:
1236 tmp2 = gen_reg_rtx (Pmode);
1237 emit_insn (gen_load_gbr (tmp2));
1238 tmp = gen_reg_rtx (Pmode);
1239 emit_insn (gen_symTPOFF2reg (tmp, op1));
1241 if (register_operand (op0, Pmode))
1242 op1 = op0;
1243 else
1244 op1 = gen_reg_rtx (Pmode);
1246 emit_insn (gen_addsi3 (op1, tmp, tmp2));
1247 break;
1249 default:
1250 gcc_unreachable ();
1252 operands[1] = op1;
1256 return 0;
1259 /* Prepare the operands for an scc instruction; make sure that the
1260 compare has been done. */
1262 prepare_scc_operands (enum rtx_code code)
1264 rtx t_reg = gen_rtx_REG (SImode, T_REG);
1265 enum rtx_code oldcode = code;
1266 enum machine_mode mode;
1268 /* First need a compare insn. */
1269 switch (code)
1271 case NE:
1272 /* It isn't possible to handle this case. */
1273 gcc_unreachable ();
1274 case LT:
1275 code = GT;
1276 break;
1277 case LE:
1278 code = GE;
1279 break;
1280 case LTU:
1281 code = GTU;
1282 break;
1283 case LEU:
1284 code = GEU;
1285 break;
1286 default:
1287 break;
1289 if (code != oldcode)
1291 rtx tmp = sh_compare_op0;
1292 sh_compare_op0 = sh_compare_op1;
1293 sh_compare_op1 = tmp;
1296 mode = GET_MODE (sh_compare_op0);
1297 if (mode == VOIDmode)
1298 mode = GET_MODE (sh_compare_op1);
1300 sh_compare_op0 = force_reg (mode, sh_compare_op0);
1301 if ((code != EQ && code != NE
1302 && (sh_compare_op1 != const0_rtx
1303 || code == GTU || code == GEU || code == LTU || code == LEU))
1304 || (mode == DImode && sh_compare_op1 != const0_rtx)
1305 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
1306 sh_compare_op1 = force_reg (mode, sh_compare_op1);
1308 if ((TARGET_SH4 || TARGET_SH2A) && GET_MODE_CLASS (mode) == MODE_FLOAT)
1309 (mode == SFmode ? emit_sf_insn : emit_df_insn)
1310 (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2,
1311 gen_rtx_SET (VOIDmode, t_reg,
1312 gen_rtx_fmt_ee (code, SImode,
1313 sh_compare_op0, sh_compare_op1)),
1314 gen_rtx_USE (VOIDmode, get_fpscr_rtx ()))));
1315 else
1316 emit_insn (gen_rtx_SET (VOIDmode, t_reg,
1317 gen_rtx_fmt_ee (code, SImode,
1318 sh_compare_op0, sh_compare_op1)));
1320 return t_reg;
1323 /* Called from the md file, set up the operands of a compare instruction. */
1325 void
1326 from_compare (rtx *operands, int code)
1328 enum machine_mode mode = GET_MODE (sh_compare_op0);
1329 rtx insn;
1330 if (mode == VOIDmode)
1331 mode = GET_MODE (sh_compare_op1);
1332 if (code != EQ
1333 || mode == DImode
1334 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
1336 /* Force args into regs, since we can't use constants here. */
1337 sh_compare_op0 = force_reg (mode, sh_compare_op0);
1338 if (sh_compare_op1 != const0_rtx
1339 || code == GTU || code == GEU
1340 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
1341 sh_compare_op1 = force_reg (mode, sh_compare_op1);
1343 if (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT && code == GE)
1345 from_compare (operands, GT);
1346 insn = gen_ieee_ccmpeqsf_t (sh_compare_op0, sh_compare_op1);
1348 else
1349 insn = gen_rtx_SET (VOIDmode,
1350 gen_rtx_REG (SImode, T_REG),
1351 gen_rtx_fmt_ee (code, SImode,
1352 sh_compare_op0, sh_compare_op1));
1353 if ((TARGET_SH4 || TARGET_SH2A) && GET_MODE_CLASS (mode) == MODE_FLOAT)
1355 insn = gen_rtx_PARALLEL (VOIDmode,
1356 gen_rtvec (2, insn,
1357 gen_rtx_USE (VOIDmode, get_fpscr_rtx ())));
1358 (mode == SFmode ? emit_sf_insn : emit_df_insn) (insn);
1360 else
1361 emit_insn (insn);
1364 /* Functions to output assembly code. */
1366 /* Return a sequence of instructions to perform DI or DF move.
1368 Since the SH cannot move a DI or DF in one instruction, we have
1369 to take care when we see overlapping source and dest registers. */
1371 const char *
1372 output_movedouble (rtx insn ATTRIBUTE_UNUSED, rtx operands[],
1373 enum machine_mode mode)
1375 rtx dst = operands[0];
1376 rtx src = operands[1];
1378 if (GET_CODE (dst) == MEM
1379 && GET_CODE (XEXP (dst, 0)) == PRE_DEC)
1380 return "mov.l %T1,%0\n\tmov.l %1,%0";
1382 if (register_operand (dst, mode)
1383 && register_operand (src, mode))
1385 if (REGNO (src) == MACH_REG)
1386 return "sts mach,%S0\n\tsts macl,%R0";
1388 /* When mov.d r1,r2 do r2->r3 then r1->r2;
1389 when mov.d r1,r0 do r1->r0 then r2->r1. */
1391 if (REGNO (src) + 1 == REGNO (dst))
1392 return "mov %T1,%T0\n\tmov %1,%0";
1393 else
1394 return "mov %1,%0\n\tmov %T1,%T0";
1396 else if (GET_CODE (src) == CONST_INT)
1398 if (INTVAL (src) < 0)
1399 output_asm_insn ("mov #-1,%S0", operands);
1400 else
1401 output_asm_insn ("mov #0,%S0", operands);
1403 return "mov %1,%R0";
1405 else if (GET_CODE (src) == MEM)
1407 int ptrreg = -1;
1408 int dreg = REGNO (dst);
1409 rtx inside = XEXP (src, 0);
1411 switch (GET_CODE (inside))
1413 case REG:
1414 ptrreg = REGNO (inside);
1415 break;
1417 case SUBREG:
1418 ptrreg = subreg_regno (inside);
1419 break;
1421 case PLUS:
1422 ptrreg = REGNO (XEXP (inside, 0));
1423 /* ??? A r0+REG address shouldn't be possible here, because it isn't
1424 an offsettable address. Unfortunately, offsettable addresses use
1425 QImode to check the offset, and a QImode offsettable address
1426 requires r0 for the other operand, which is not currently
1427 supported, so we can't use the 'o' constraint.
1428 Thus we must check for and handle r0+REG addresses here.
1429 We punt for now, since this is likely very rare. */
1430 gcc_assert (GET_CODE (XEXP (inside, 1)) != REG);
1431 break;
1433 case LABEL_REF:
1434 return "mov.l %1,%0\n\tmov.l %1+4,%T0";
1435 case POST_INC:
1436 return "mov.l %1,%0\n\tmov.l %1,%T0";
1437 default:
1438 gcc_unreachable ();
1441 /* Work out the safe way to copy. Copy into the second half first. */
1442 if (dreg == ptrreg)
1443 return "mov.l %T1,%T0\n\tmov.l %1,%0";
1446 return "mov.l %1,%0\n\tmov.l %T1,%T0";
1449 /* Print an instruction which would have gone into a delay slot after
1450 another instruction, but couldn't because the other instruction expanded
1451 into a sequence where putting the slot insn at the end wouldn't work. */
1453 static void
1454 print_slot (rtx insn)
1456 final_scan_insn (XVECEXP (insn, 0, 1), asm_out_file, optimize, 1, NULL);
1458 INSN_DELETED_P (XVECEXP (insn, 0, 1)) = 1;
1461 const char *
1462 output_far_jump (rtx insn, rtx op)
1464 struct { rtx lab, reg, op; } this;
1465 rtx braf_base_lab = NULL_RTX;
1466 const char *jump;
1467 int far;
1468 int offset = branch_dest (insn) - INSN_ADDRESSES (INSN_UID (insn));
1469 rtx prev;
1471 this.lab = gen_label_rtx ();
1473 if (TARGET_SH2
1474 && offset >= -32764
1475 && offset - get_attr_length (insn) <= 32766)
1477 far = 0;
1478 jump = "mov.w %O0,%1; braf %1";
1480 else
1482 far = 1;
1483 if (flag_pic)
1485 if (TARGET_SH2)
1486 jump = "mov.l %O0,%1; braf %1";
1487 else
1488 jump = "mov.l r0,@-r15; mova %O0,r0; mov.l @r0,%1; add r0,%1; mov.l @r15+,r0; jmp @%1";
1490 else
1491 jump = "mov.l %O0,%1; jmp @%1";
1493 /* If we have a scratch register available, use it. */
1494 if (GET_CODE ((prev = prev_nonnote_insn (insn))) == INSN
1495 && INSN_CODE (prev) == CODE_FOR_indirect_jump_scratch)
1497 this.reg = SET_DEST (XVECEXP (PATTERN (prev), 0, 0));
1498 if (REGNO (this.reg) == R0_REG && flag_pic && ! TARGET_SH2)
1499 jump = "mov.l r1,@-r15; mova %O0,r0; mov.l @r0,r1; add r1,r0; mov.l @r15+,r1; jmp @%1";
1500 output_asm_insn (jump, &this.lab);
1501 if (dbr_sequence_length ())
1502 print_slot (final_sequence);
1503 else
1504 output_asm_insn ("nop", 0);
1506 else
1508 /* Output the delay slot insn first if any. */
1509 if (dbr_sequence_length ())
1510 print_slot (final_sequence);
1512 this.reg = gen_rtx_REG (SImode, 13);
1513 /* We must keep the stack aligned to 8-byte boundaries on SH5.
1514 Fortunately, MACL is fixed and call-clobbered, and we never
1515 need its value across jumps, so save r13 in it instead of in
1516 the stack. */
1517 if (TARGET_SH5)
1518 output_asm_insn ("lds r13, macl", 0);
1519 else
1520 output_asm_insn ("mov.l r13,@-r15", 0);
1521 output_asm_insn (jump, &this.lab);
1522 if (TARGET_SH5)
1523 output_asm_insn ("sts macl, r13", 0);
1524 else
1525 output_asm_insn ("mov.l @r15+,r13", 0);
1527 if (far && flag_pic && TARGET_SH2)
1529 braf_base_lab = gen_label_rtx ();
1530 (*targetm.asm_out.internal_label) (asm_out_file, "L",
1531 CODE_LABEL_NUMBER (braf_base_lab));
1533 if (far)
1534 output_asm_insn (".align 2", 0);
1535 (*targetm.asm_out.internal_label) (asm_out_file, "L", CODE_LABEL_NUMBER (this.lab));
1536 this.op = op;
1537 if (far && flag_pic)
1539 if (TARGET_SH2)
1540 this.lab = braf_base_lab;
1541 output_asm_insn (".long %O2-%O0", &this.lab);
1543 else
1544 output_asm_insn (far ? ".long %O2" : ".word %O2-%O0", &this.lab);
1545 return "";
1548 /* Local label counter, used for constants in the pool and inside
1549 pattern branches. */
1551 static int lf = 100;
1553 /* Output code for ordinary branches. */
1555 const char *
1556 output_branch (int logic, rtx insn, rtx *operands)
1558 switch (get_attr_length (insn))
1560 case 6:
1561 /* This can happen if filling the delay slot has caused a forward
1562 branch to exceed its range (we could reverse it, but only
1563 when we know we won't overextend other branches; this should
1564 best be handled by relaxation).
1565 It can also happen when other condbranches hoist delay slot insn
1566 from their destination, thus leading to code size increase.
1567 But the branch will still be in the range -4092..+4098 bytes. */
1569 if (! TARGET_RELAX)
1571 int label = lf++;
1572 /* The call to print_slot will clobber the operands. */
1573 rtx op0 = operands[0];
1575 /* If the instruction in the delay slot is annulled (true), then
1576 there is no delay slot where we can put it now. The only safe
1577 place for it is after the label. final will do that by default. */
1579 if (final_sequence
1580 && ! INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))
1581 && get_attr_length (XVECEXP (final_sequence, 0, 1)))
1583 asm_fprintf (asm_out_file, "\tb%s%ss\t%LLF%d\n", logic ? "f" : "t",
1584 ASSEMBLER_DIALECT ? "/" : ".", label);
1585 print_slot (final_sequence);
1587 else
1588 asm_fprintf (asm_out_file, "\tb%s\t%LLF%d\n", logic ? "f" : "t", label);
1590 output_asm_insn ("bra\t%l0", &op0);
1591 fprintf (asm_out_file, "\tnop\n");
1592 (*targetm.asm_out.internal_label) (asm_out_file, "LF", label);
1594 return "";
1596 /* When relaxing, handle this like a short branch. The linker
1597 will fix it up if it still doesn't fit after relaxation. */
1598 case 2:
1599 return logic ? "bt%.\t%l0" : "bf%.\t%l0";
1601 /* These are for SH2e, in which we have to account for the
1602 extra nop because of the hardware bug in annulled branches. */
1603 case 8:
1604 if (! TARGET_RELAX)
1606 int label = lf++;
1608 gcc_assert (!final_sequence
1609 || !(INSN_ANNULLED_BRANCH_P
1610 (XVECEXP (final_sequence, 0, 0))));
1611 asm_fprintf (asm_out_file, "b%s%ss\t%LLF%d\n",
1612 logic ? "f" : "t",
1613 ASSEMBLER_DIALECT ? "/" : ".", label);
1614 fprintf (asm_out_file, "\tnop\n");
1615 output_asm_insn ("bra\t%l0", operands);
1616 fprintf (asm_out_file, "\tnop\n");
1617 (*targetm.asm_out.internal_label) (asm_out_file, "LF", label);
1619 return "";
1621 /* When relaxing, fall through. */
1622 case 4:
1624 char buffer[10];
1626 sprintf (buffer, "b%s%ss\t%%l0",
1627 logic ? "t" : "f",
1628 ASSEMBLER_DIALECT ? "/" : ".");
1629 output_asm_insn (buffer, &operands[0]);
1630 return "nop";
1633 default:
1634 /* There should be no longer branches now - that would
1635 indicate that something has destroyed the branches set
1636 up in machine_dependent_reorg. */
1637 gcc_unreachable ();
1641 const char *
1642 output_branchy_insn (enum rtx_code code, const char *template,
1643 rtx insn, rtx *operands)
1645 rtx next_insn = NEXT_INSN (insn);
1647 if (next_insn && GET_CODE (next_insn) == JUMP_INSN && condjump_p (next_insn))
1649 rtx src = SET_SRC (PATTERN (next_insn));
1650 if (GET_CODE (src) == IF_THEN_ELSE && GET_CODE (XEXP (src, 0)) != code)
1652 /* Following branch not taken */
1653 operands[9] = gen_label_rtx ();
1654 emit_label_after (operands[9], next_insn);
1655 INSN_ADDRESSES_NEW (operands[9],
1656 INSN_ADDRESSES (INSN_UID (next_insn))
1657 + get_attr_length (next_insn));
1658 return template;
1660 else
1662 int offset = (branch_dest (next_insn)
1663 - INSN_ADDRESSES (INSN_UID (next_insn)) + 4);
1664 if (offset >= -252 && offset <= 258)
1666 if (GET_CODE (src) == IF_THEN_ELSE)
1667 /* branch_true */
1668 src = XEXP (src, 1);
1669 operands[9] = src;
1670 return template;
1674 operands[9] = gen_label_rtx ();
1675 emit_label_after (operands[9], insn);
1676 INSN_ADDRESSES_NEW (operands[9],
1677 INSN_ADDRESSES (INSN_UID (insn))
1678 + get_attr_length (insn));
1679 return template;
1682 const char *
1683 output_ieee_ccmpeq (rtx insn, rtx *operands)
1685 return output_branchy_insn (NE, "bt\t%l9\\;fcmp/eq\t%1,%0", insn, operands);
1688 /* Output the start of the assembler file. */
1690 static void
1691 sh_file_start (void)
1693 default_file_start ();
1695 #ifdef SYMBIAN
1696 /* Declare the .directive section before it is used. */
1697 fputs ("\t.section .directive, \"SM\", @progbits, 1\n", asm_out_file);
1698 fputs ("\t.asciz \"#<SYMEDIT>#\\n\"\n", asm_out_file);
1699 #endif
1701 if (TARGET_ELF)
1702 /* We need to show the text section with the proper
1703 attributes as in TEXT_SECTION_ASM_OP, before dwarf2out
1704 emits it without attributes in TEXT_SECTION_ASM_OP, else GAS
1705 will complain. We can teach GAS specifically about the
1706 default attributes for our choice of text section, but
1707 then we would have to change GAS again if/when we change
1708 the text section name. */
1709 fprintf (asm_out_file, "%s\n", TEXT_SECTION_ASM_OP);
1710 else
1711 /* Switch to the data section so that the coffsem symbol
1712 isn't in the text section. */
1713 data_section ();
1715 if (TARGET_LITTLE_ENDIAN)
1716 fputs ("\t.little\n", asm_out_file);
1718 if (!TARGET_ELF)
1720 if (TARGET_SHCOMPACT)
1721 fputs ("\t.mode\tSHcompact\n", asm_out_file);
1722 else if (TARGET_SHMEDIA)
1723 fprintf (asm_out_file, "\t.mode\tSHmedia\n\t.abi\t%i\n",
1724 TARGET_SHMEDIA64 ? 64 : 32);
1728 /* Check if PAT includes UNSPEC_CALLER unspec pattern. */
1730 static bool
1731 unspec_caller_rtx_p (rtx pat)
1733 switch (GET_CODE (pat))
1735 case CONST:
1736 return unspec_caller_rtx_p (XEXP (pat, 0));
1737 case PLUS:
1738 case MINUS:
1739 if (unspec_caller_rtx_p (XEXP (pat, 0)))
1740 return true;
1741 return unspec_caller_rtx_p (XEXP (pat, 1));
1742 case UNSPEC:
1743 if (XINT (pat, 1) == UNSPEC_CALLER)
1744 return true;
1745 default:
1746 break;
1749 return false;
1752 /* Indicate that INSN cannot be duplicated. This is true for insn
1753 that generates an unique label. */
1755 static bool
1756 sh_cannot_copy_insn_p (rtx insn)
1758 rtx pat;
1760 if (!reload_completed || !flag_pic)
1761 return false;
1763 if (GET_CODE (insn) != INSN)
1764 return false;
1765 if (asm_noperands (insn) >= 0)
1766 return false;
1768 pat = PATTERN (insn);
1769 if (GET_CODE (pat) != SET)
1770 return false;
1771 pat = SET_SRC (pat);
1773 if (unspec_caller_rtx_p (pat))
1774 return true;
1776 return false;
1779 /* Actual number of instructions used to make a shift by N. */
1780 static const char ashiftrt_insns[] =
1781 { 0,1,2,3,4,5,8,8,8,8,8,8,8,8,8,8,2,3,4,5,8,8,8,8,8,8,8,8,8,8,8,2};
1783 /* Left shift and logical right shift are the same. */
1784 static const char shift_insns[] =
1785 { 0,1,1,2,2,3,3,4,1,2,2,3,3,4,3,3,1,2,2,3,3,4,3,3,2,3,3,4,4,4,3,3};
1787 /* Individual shift amounts needed to get the above length sequences.
1788 One bit right shifts clobber the T bit, so when possible, put one bit
1789 shifts in the middle of the sequence, so the ends are eligible for
1790 branch delay slots. */
1791 static const short shift_amounts[32][5] = {
1792 {0}, {1}, {2}, {2, 1},
1793 {2, 2}, {2, 1, 2}, {2, 2, 2}, {2, 2, 1, 2},
1794 {8}, {8, 1}, {8, 2}, {8, 1, 2},
1795 {8, 2, 2}, {8, 2, 1, 2}, {8, -2, 8}, {8, -1, 8},
1796 {16}, {16, 1}, {16, 2}, {16, 1, 2},
1797 {16, 2, 2}, {16, 2, 1, 2}, {16, -2, 8}, {16, -1, 8},
1798 {16, 8}, {16, 1, 8}, {16, 8, 2}, {16, 8, 1, 2},
1799 {16, 8, 2, 2}, {16, -1, -2, 16}, {16, -2, 16}, {16, -1, 16}};
1801 /* Likewise, but for shift amounts < 16, up to three highmost bits
1802 might be clobbered. This is typically used when combined with some
1803 kind of sign or zero extension. */
1805 static const char ext_shift_insns[] =
1806 { 0,1,1,2,2,3,2,2,1,2,2,3,3,3,2,2,1,2,2,3,3,4,3,3,2,3,3,4,4,4,3,3};
1808 static const short ext_shift_amounts[32][4] = {
1809 {0}, {1}, {2}, {2, 1},
1810 {2, 2}, {2, 1, 2}, {8, -2}, {8, -1},
1811 {8}, {8, 1}, {8, 2}, {8, 1, 2},
1812 {8, 2, 2}, {16, -2, -1}, {16, -2}, {16, -1},
1813 {16}, {16, 1}, {16, 2}, {16, 1, 2},
1814 {16, 2, 2}, {16, 2, 1, 2}, {16, -2, 8}, {16, -1, 8},
1815 {16, 8}, {16, 1, 8}, {16, 8, 2}, {16, 8, 1, 2},
1816 {16, 8, 2, 2}, {16, -1, -2, 16}, {16, -2, 16}, {16, -1, 16}};
1818 /* Assuming we have a value that has been sign-extended by at least one bit,
1819 can we use the ext_shift_amounts with the last shift turned to an arithmetic shift
1820 to shift it by N without data loss, and quicker than by other means? */
1821 #define EXT_SHIFT_SIGNED(n) (((n) | 8) == 15)
1823 /* This is used in length attributes in sh.md to help compute the length
1824 of arbitrary constant shift instructions. */
1827 shift_insns_rtx (rtx insn)
1829 rtx set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
1830 int shift_count = INTVAL (XEXP (set_src, 1));
1831 enum rtx_code shift_code = GET_CODE (set_src);
1833 switch (shift_code)
1835 case ASHIFTRT:
1836 return ashiftrt_insns[shift_count];
1837 case LSHIFTRT:
1838 case ASHIFT:
1839 return shift_insns[shift_count];
1840 default:
1841 gcc_unreachable ();
1845 /* Return the cost of a shift. */
1847 static inline int
1848 shiftcosts (rtx x)
1850 int value;
1852 if (TARGET_SHMEDIA)
1853 return 1;
1855 if (GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD)
1857 if (GET_MODE (x) == DImode
1858 && GET_CODE (XEXP (x, 1)) == CONST_INT
1859 && INTVAL (XEXP (x, 1)) == 1)
1860 return 2;
1862 /* Everything else is invalid, because there is no pattern for it. */
1863 return 10000;
1865 /* If shift by a non constant, then this will be expensive. */
1866 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
1867 return SH_DYNAMIC_SHIFT_COST;
1869 value = INTVAL (XEXP (x, 1));
1871 /* Otherwise, return the true cost in instructions. */
1872 if (GET_CODE (x) == ASHIFTRT)
1874 int cost = ashiftrt_insns[value];
1875 /* If SH3, then we put the constant in a reg and use shad. */
1876 if (cost > 1 + SH_DYNAMIC_SHIFT_COST)
1877 cost = 1 + SH_DYNAMIC_SHIFT_COST;
1878 return cost;
1880 else
1881 return shift_insns[value];
1884 /* Return the cost of an AND operation. */
1886 static inline int
1887 andcosts (rtx x)
1889 int i;
1891 /* Anding with a register is a single cycle and instruction. */
1892 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
1893 return 1;
1895 i = INTVAL (XEXP (x, 1));
1897 if (TARGET_SHMEDIA)
1899 if ((GET_CODE (XEXP (x, 1)) == CONST_INT
1900 && CONST_OK_FOR_I16 (INTVAL (XEXP (x, 1))))
1901 || EXTRA_CONSTRAINT_C16 (XEXP (x, 1)))
1902 return 1;
1903 else
1904 return 2;
1907 /* These constants are single cycle extu.[bw] instructions. */
1908 if (i == 0xff || i == 0xffff)
1909 return 1;
1910 /* Constants that can be used in an and immediate instruction in a single
1911 cycle, but this requires r0, so make it a little more expensive. */
1912 if (CONST_OK_FOR_K08 (i))
1913 return 2;
1914 /* Constants that can be loaded with a mov immediate and an and.
1915 This case is probably unnecessary. */
1916 if (CONST_OK_FOR_I08 (i))
1917 return 2;
1918 /* Any other constants requires a 2 cycle pc-relative load plus an and.
1919 This case is probably unnecessary. */
1920 return 3;
1923 /* Return the cost of an addition or a subtraction. */
1925 static inline int
1926 addsubcosts (rtx x)
1928 /* Adding a register is a single cycle insn. */
1929 if (GET_CODE (XEXP (x, 1)) == REG
1930 || GET_CODE (XEXP (x, 1)) == SUBREG)
1931 return 1;
1933 /* Likewise for small constants. */
1934 if (GET_CODE (XEXP (x, 1)) == CONST_INT
1935 && CONST_OK_FOR_ADD (INTVAL (XEXP (x, 1))))
1936 return 1;
1938 if (TARGET_SHMEDIA)
1939 switch (GET_CODE (XEXP (x, 1)))
1941 case CONST:
1942 case LABEL_REF:
1943 case SYMBOL_REF:
1944 return TARGET_SHMEDIA64 ? 5 : 3;
1946 case CONST_INT:
1947 if (CONST_OK_FOR_I16 (INTVAL (XEXP (x, 1))))
1948 return 2;
1949 else if (CONST_OK_FOR_I16 (INTVAL (XEXP (x, 1)) >> 16))
1950 return 3;
1951 else if (CONST_OK_FOR_I16 ((INTVAL (XEXP (x, 1)) >> 16) >> 16))
1952 return 4;
1954 /* Fall through. */
1955 default:
1956 return 5;
1959 /* Any other constant requires a 2 cycle pc-relative load plus an
1960 addition. */
1961 return 3;
1964 /* Return the cost of a multiply. */
1965 static inline int
1966 multcosts (rtx x ATTRIBUTE_UNUSED)
1968 if (*sh_multcost_str)
1969 return atoi (sh_multcost_str);
1970 if (TARGET_SHMEDIA)
1971 /* ??? We have a mul insn, but it has a latency of three, and doesn't
1972 accept constants. Ideally, we would use a cost of one or two and
1973 add the cost of the operand, but disregard the latter when inside loops
1974 and loop invariant code motion is still to follow.
1975 Using a multiply first and splitting it later if it's a loss
1976 doesn't work because of different sign / zero extension semantics
1977 of multiplies vs. shifts. */
1978 return TARGET_SMALLCODE ? 2 : 3;
1980 if (TARGET_SH2)
1982 /* We have a mul insn, so we can never take more than the mul and the
1983 read of the mac reg, but count more because of the latency and extra
1984 reg usage. */
1985 if (TARGET_SMALLCODE)
1986 return 2;
1987 return 3;
1990 /* If we're aiming at small code, then just count the number of
1991 insns in a multiply call sequence. */
1992 if (TARGET_SMALLCODE)
1993 return 5;
1995 /* Otherwise count all the insns in the routine we'd be calling too. */
1996 return 20;
1999 /* Compute a (partial) cost for rtx X. Return true if the complete
2000 cost has been computed, and false if subexpressions should be
2001 scanned. In either case, *TOTAL contains the cost result. */
2003 static bool
2004 sh_rtx_costs (rtx x, int code, int outer_code, int *total)
2006 switch (code)
2008 case CONST_INT:
2009 if (TARGET_SHMEDIA)
2011 if (INTVAL (x) == 0)
2012 *total = 0;
2013 else if (outer_code == AND && and_operand ((x), DImode))
2014 *total = 0;
2015 else if ((outer_code == IOR || outer_code == XOR
2016 || outer_code == PLUS)
2017 && CONST_OK_FOR_I10 (INTVAL (x)))
2018 *total = 0;
2019 else if (CONST_OK_FOR_I16 (INTVAL (x)))
2020 *total = COSTS_N_INSNS (outer_code != SET);
2021 else if (CONST_OK_FOR_I16 (INTVAL (x) >> 16))
2022 *total = COSTS_N_INSNS ((outer_code != SET) + 1);
2023 else if (CONST_OK_FOR_I16 ((INTVAL (x) >> 16) >> 16))
2024 *total = COSTS_N_INSNS (3);
2025 else
2026 *total = COSTS_N_INSNS (4);
2027 return true;
2029 if (CONST_OK_FOR_I08 (INTVAL (x)))
2030 *total = 0;
2031 else if ((outer_code == AND || outer_code == IOR || outer_code == XOR)
2032 && CONST_OK_FOR_K08 (INTVAL (x)))
2033 *total = 1;
2034 else
2035 *total = 8;
2036 return true;
2038 case CONST:
2039 case LABEL_REF:
2040 case SYMBOL_REF:
2041 if (TARGET_SHMEDIA64)
2042 *total = COSTS_N_INSNS (4);
2043 else if (TARGET_SHMEDIA32)
2044 *total = COSTS_N_INSNS (2);
2045 else
2046 *total = 5;
2047 return true;
2049 case CONST_DOUBLE:
2050 if (TARGET_SHMEDIA)
2051 *total = COSTS_N_INSNS (4);
2052 else
2053 *total = 10;
2054 return true;
2055 case CONST_VECTOR:
2056 if (x == CONST0_RTX (GET_MODE (x)))
2057 *total = 0;
2058 else if (sh_1el_vec (x, VOIDmode))
2059 *total = outer_code != SET;
2060 if (sh_rep_vec (x, VOIDmode))
2061 *total = ((GET_MODE_UNIT_SIZE (GET_MODE (x)) + 3) / 4
2062 + (outer_code != SET));
2063 *total = COSTS_N_INSNS (3) + (outer_code != SET);
2064 return true;
2066 case PLUS:
2067 case MINUS:
2068 *total = COSTS_N_INSNS (addsubcosts (x));
2069 return true;
2071 case AND:
2072 *total = COSTS_N_INSNS (andcosts (x));
2073 return true;
2075 case MULT:
2076 *total = COSTS_N_INSNS (multcosts (x));
2077 return true;
2079 case ASHIFT:
2080 case ASHIFTRT:
2081 case LSHIFTRT:
2082 *total = COSTS_N_INSNS (shiftcosts (x));
2083 return true;
2085 case DIV:
2086 case UDIV:
2087 case MOD:
2088 case UMOD:
2089 *total = COSTS_N_INSNS (20);
2090 return true;
2092 case PARALLEL:
2093 if (sh_1el_vec (x, VOIDmode))
2094 *total = outer_code != SET;
2095 if (sh_rep_vec (x, VOIDmode))
2096 *total = ((GET_MODE_UNIT_SIZE (GET_MODE (x)) + 3) / 4
2097 + (outer_code != SET));
2098 *total = COSTS_N_INSNS (3) + (outer_code != SET);
2099 return true;
2101 case FLOAT:
2102 case FIX:
2103 *total = 100;
2104 return true;
2106 default:
2107 return false;
2111 /* Compute the cost of an address. For the SH, all valid addresses are
2112 the same cost. Use a slightly higher cost for reg + reg addressing,
2113 since it increases pressure on r0. */
2115 static int
2116 sh_address_cost (rtx X)
2118 return (GET_CODE (X) == PLUS
2119 && ! CONSTANT_P (XEXP (X, 1))
2120 && ! TARGET_SHMEDIA ? 1 : 0);
2123 /* Code to expand a shift. */
2125 void
2126 gen_ashift (int type, int n, rtx reg)
2128 /* Negative values here come from the shift_amounts array. */
2129 if (n < 0)
2131 if (type == ASHIFT)
2132 type = LSHIFTRT;
2133 else
2134 type = ASHIFT;
2135 n = -n;
2138 switch (type)
2140 case ASHIFTRT:
2141 emit_insn (gen_ashrsi3_k (reg, reg, GEN_INT (n)));
2142 break;
2143 case LSHIFTRT:
2144 if (n == 1)
2145 emit_insn (gen_lshrsi3_m (reg, reg, GEN_INT (n)));
2146 else
2147 emit_insn (gen_lshrsi3_k (reg, reg, GEN_INT (n)));
2148 break;
2149 case ASHIFT:
2150 emit_insn (gen_ashlsi3_std (reg, reg, GEN_INT (n)));
2151 break;
2155 /* Same for HImode */
2157 void
2158 gen_ashift_hi (int type, int n, rtx reg)
2160 /* Negative values here come from the shift_amounts array. */
2161 if (n < 0)
2163 if (type == ASHIFT)
2164 type = LSHIFTRT;
2165 else
2166 type = ASHIFT;
2167 n = -n;
2170 switch (type)
2172 case ASHIFTRT:
2173 case LSHIFTRT:
2174 /* We don't have HImode right shift operations because using the
2175 ordinary 32 bit shift instructions for that doesn't generate proper
2176 zero/sign extension.
2177 gen_ashift_hi is only called in contexts where we know that the
2178 sign extension works out correctly. */
2180 int offset = 0;
2181 if (GET_CODE (reg) == SUBREG)
2183 offset = SUBREG_BYTE (reg);
2184 reg = SUBREG_REG (reg);
2186 gen_ashift (type, n, gen_rtx_SUBREG (SImode, reg, offset));
2187 break;
2189 case ASHIFT:
2190 emit_insn (gen_ashlhi3_k (reg, reg, GEN_INT (n)));
2191 break;
2195 /* Output RTL to split a constant shift into its component SH constant
2196 shift instructions. */
2198 void
2199 gen_shifty_op (int code, rtx *operands)
2201 int value = INTVAL (operands[2]);
2202 int max, i;
2204 /* Truncate the shift count in case it is out of bounds. */
2205 value = value & 0x1f;
2207 if (value == 31)
2209 if (code == LSHIFTRT)
2211 emit_insn (gen_rotlsi3_1 (operands[0], operands[0]));
2212 emit_insn (gen_movt (operands[0]));
2213 return;
2215 else if (code == ASHIFT)
2217 /* There is a two instruction sequence for 31 bit left shifts,
2218 but it requires r0. */
2219 if (GET_CODE (operands[0]) == REG && REGNO (operands[0]) == 0)
2221 emit_insn (gen_andsi3 (operands[0], operands[0], const1_rtx));
2222 emit_insn (gen_rotlsi3_31 (operands[0], operands[0]));
2223 return;
2227 else if (value == 0)
2229 /* This can happen even when optimizing, if there were subregs before
2230 reload. Don't output a nop here, as this is never optimized away;
2231 use a no-op move instead. */
2232 emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[0]));
2233 return;
2236 max = shift_insns[value];
2237 for (i = 0; i < max; i++)
2238 gen_ashift (code, shift_amounts[value][i], operands[0]);
2241 /* Same as above, but optimized for values where the topmost bits don't
2242 matter. */
2244 void
2245 gen_shifty_hi_op (int code, rtx *operands)
2247 int value = INTVAL (operands[2]);
2248 int max, i;
2249 void (*gen_fun) (int, int, rtx);
2251 /* This operation is used by and_shl for SImode values with a few
2252 high bits known to be cleared. */
2253 value &= 31;
2254 if (value == 0)
2256 emit_insn (gen_nop ());
2257 return;
2260 gen_fun = GET_MODE (operands[0]) == HImode ? gen_ashift_hi : gen_ashift;
2261 if (code == ASHIFT)
2263 max = ext_shift_insns[value];
2264 for (i = 0; i < max; i++)
2265 gen_fun (code, ext_shift_amounts[value][i], operands[0]);
2267 else
2268 /* When shifting right, emit the shifts in reverse order, so that
2269 solitary negative values come first. */
2270 for (i = ext_shift_insns[value] - 1; i >= 0; i--)
2271 gen_fun (code, ext_shift_amounts[value][i], operands[0]);
2274 /* Output RTL for an arithmetic right shift. */
2276 /* ??? Rewrite to use super-optimizer sequences. */
2279 expand_ashiftrt (rtx *operands)
2281 rtx wrk;
2282 char func[18];
2283 int value;
2285 if (TARGET_SH3)
2287 if (GET_CODE (operands[2]) != CONST_INT)
2289 rtx count = copy_to_mode_reg (SImode, operands[2]);
2290 emit_insn (gen_negsi2 (count, count));
2291 emit_insn (gen_ashrsi3_d (operands[0], operands[1], count));
2292 return 1;
2294 else if (ashiftrt_insns[INTVAL (operands[2]) & 31]
2295 > 1 + SH_DYNAMIC_SHIFT_COST)
2297 rtx count
2298 = force_reg (SImode, GEN_INT (- (INTVAL (operands[2]) & 31)));
2299 emit_insn (gen_ashrsi3_d (operands[0], operands[1], count));
2300 return 1;
2303 if (GET_CODE (operands[2]) != CONST_INT)
2304 return 0;
2306 value = INTVAL (operands[2]) & 31;
2308 if (value == 31)
2310 /* If we are called from abs expansion, arrange things so that we
2311 we can use a single MT instruction that doesn't clobber the source,
2312 if LICM can hoist out the load of the constant zero. */
2313 if (currently_expanding_to_rtl)
2315 emit_insn (gen_cmpgtsi_t (force_reg (SImode, CONST0_RTX (SImode)),
2316 operands[1]));
2317 emit_insn (gen_mov_neg_si_t (operands[0]));
2318 return 1;
2320 emit_insn (gen_ashrsi2_31 (operands[0], operands[1]));
2321 return 1;
2323 else if (value >= 16 && value <= 19)
2325 wrk = gen_reg_rtx (SImode);
2326 emit_insn (gen_ashrsi2_16 (wrk, operands[1]));
2327 value -= 16;
2328 while (value--)
2329 gen_ashift (ASHIFTRT, 1, wrk);
2330 emit_move_insn (operands[0], wrk);
2331 return 1;
2333 /* Expand a short sequence inline, longer call a magic routine. */
2334 else if (value <= 5)
2336 wrk = gen_reg_rtx (SImode);
2337 emit_move_insn (wrk, operands[1]);
2338 while (value--)
2339 gen_ashift (ASHIFTRT, 1, wrk);
2340 emit_move_insn (operands[0], wrk);
2341 return 1;
2344 wrk = gen_reg_rtx (Pmode);
2346 /* Load the value into an arg reg and call a helper. */
2347 emit_move_insn (gen_rtx_REG (SImode, 4), operands[1]);
2348 sprintf (func, "__ashiftrt_r4_%d", value);
2349 function_symbol (wrk, func, SFUNC_STATIC);
2350 emit_insn (gen_ashrsi3_n (GEN_INT (value), wrk));
2351 emit_move_insn (operands[0], gen_rtx_REG (SImode, 4));
2352 return 1;
2356 sh_dynamicalize_shift_p (rtx count)
2358 return shift_insns[INTVAL (count)] > 1 + SH_DYNAMIC_SHIFT_COST;
2361 /* Try to find a good way to implement the combiner pattern
2362 [(set (match_operand:SI 0 "register_operand" "r")
2363 (and:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
2364 (match_operand:SI 2 "const_int_operand" "n"))
2365 (match_operand:SI 3 "const_int_operand" "n"))) .
2366 LEFT_RTX is operand 2 in the above pattern, and MASK_RTX is operand 3.
2367 return 0 for simple right / left or left/right shift combination.
2368 return 1 for a combination of shifts with zero_extend.
2369 return 2 for a combination of shifts with an AND that needs r0.
2370 return 3 for a combination of shifts with an AND that needs an extra
2371 scratch register, when the three highmost bits of the AND mask are clear.
2372 return 4 for a combination of shifts with an AND that needs an extra
2373 scratch register, when any of the three highmost bits of the AND mask
2374 is set.
2375 If ATTRP is set, store an initial right shift width in ATTRP[0],
2376 and the instruction length in ATTRP[1] . These values are not valid
2377 when returning 0.
2378 When ATTRP is set and returning 1, ATTRP[2] gets set to the index into
2379 shift_amounts for the last shift value that is to be used before the
2380 sign extend. */
2382 shl_and_kind (rtx left_rtx, rtx mask_rtx, int *attrp)
2384 unsigned HOST_WIDE_INT mask, lsb, mask2, lsb2;
2385 int left = INTVAL (left_rtx), right;
2386 int best = 0;
2387 int cost, best_cost = 10000;
2388 int best_right = 0, best_len = 0;
2389 int i;
2390 int can_ext;
2392 if (left < 0 || left > 31)
2393 return 0;
2394 if (GET_CODE (mask_rtx) == CONST_INT)
2395 mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> left;
2396 else
2397 mask = (unsigned HOST_WIDE_INT) GET_MODE_MASK (SImode) >> left;
2398 /* Can this be expressed as a right shift / left shift pair? */
2399 lsb = ((mask ^ (mask - 1)) >> 1) + 1;
2400 right = exact_log2 (lsb);
2401 mask2 = ~(mask + lsb - 1);
2402 lsb2 = ((mask2 ^ (mask2 - 1)) >> 1) + 1;
2403 /* mask has no zeroes but trailing zeroes <==> ! mask2 */
2404 if (! mask2)
2405 best_cost = shift_insns[right] + shift_insns[right + left];
2406 /* mask has no trailing zeroes <==> ! right */
2407 else if (! right && mask2 == ~(lsb2 - 1))
2409 int late_right = exact_log2 (lsb2);
2410 best_cost = shift_insns[left + late_right] + shift_insns[late_right];
2412 /* Try to use zero extend. */
2413 if (mask2 == ~(lsb2 - 1))
2415 int width, first;
2417 for (width = 8; width <= 16; width += 8)
2419 /* Can we zero-extend right away? */
2420 if (lsb2 == (unsigned HOST_WIDE_INT) 1 << width)
2422 cost
2423 = 1 + ext_shift_insns[right] + ext_shift_insns[left + right];
2424 if (cost < best_cost)
2426 best = 1;
2427 best_cost = cost;
2428 best_right = right;
2429 best_len = cost;
2430 if (attrp)
2431 attrp[2] = -1;
2433 continue;
2435 /* ??? Could try to put zero extend into initial right shift,
2436 or even shift a bit left before the right shift. */
2437 /* Determine value of first part of left shift, to get to the
2438 zero extend cut-off point. */
2439 first = width - exact_log2 (lsb2) + right;
2440 if (first >= 0 && right + left - first >= 0)
2442 cost = ext_shift_insns[right] + ext_shift_insns[first] + 1
2443 + ext_shift_insns[right + left - first];
2444 if (cost < best_cost)
2446 best = 1;
2447 best_cost = cost;
2448 best_right = right;
2449 best_len = cost;
2450 if (attrp)
2451 attrp[2] = first;
2456 /* Try to use r0 AND pattern */
2457 for (i = 0; i <= 2; i++)
2459 if (i > right)
2460 break;
2461 if (! CONST_OK_FOR_K08 (mask >> i))
2462 continue;
2463 cost = (i != 0) + 2 + ext_shift_insns[left + i];
2464 if (cost < best_cost)
2466 best = 2;
2467 best_cost = cost;
2468 best_right = i;
2469 best_len = cost - 1;
2472 /* Try to use a scratch register to hold the AND operand. */
2473 can_ext = ((mask << left) & ((unsigned HOST_WIDE_INT) 3 << 30)) == 0;
2474 for (i = 0; i <= 2; i++)
2476 if (i > right)
2477 break;
2478 cost = (i != 0) + (CONST_OK_FOR_I08 (mask >> i) ? 2 : 3)
2479 + (can_ext ? ext_shift_insns : shift_insns)[left + i];
2480 if (cost < best_cost)
2482 best = 4 - can_ext;
2483 best_cost = cost;
2484 best_right = i;
2485 best_len = cost - 1 - ! CONST_OK_FOR_I08 (mask >> i);
2489 if (attrp)
2491 attrp[0] = best_right;
2492 attrp[1] = best_len;
2494 return best;
2497 /* This is used in length attributes of the unnamed instructions
2498 corresponding to shl_and_kind return values of 1 and 2. */
2500 shl_and_length (rtx insn)
2502 rtx set_src, left_rtx, mask_rtx;
2503 int attributes[3];
2505 set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
2506 left_rtx = XEXP (XEXP (set_src, 0), 1);
2507 mask_rtx = XEXP (set_src, 1);
2508 shl_and_kind (left_rtx, mask_rtx, attributes);
2509 return attributes[1];
2512 /* This is used in length attribute of the and_shl_scratch instruction. */
2515 shl_and_scr_length (rtx insn)
2517 rtx set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
2518 int len = shift_insns[INTVAL (XEXP (set_src, 1))];
2519 rtx op = XEXP (set_src, 0);
2520 len += shift_insns[INTVAL (XEXP (op, 1))] + 1;
2521 op = XEXP (XEXP (op, 0), 0);
2522 return len + shift_insns[INTVAL (XEXP (op, 1))];
2525 /* Generate rtl for instructions for which shl_and_kind advised a particular
2526 method of generating them, i.e. returned zero. */
2529 gen_shl_and (rtx dest, rtx left_rtx, rtx mask_rtx, rtx source)
2531 int attributes[3];
2532 unsigned HOST_WIDE_INT mask;
2533 int kind = shl_and_kind (left_rtx, mask_rtx, attributes);
2534 int right, total_shift;
2535 void (*shift_gen_fun) (int, rtx *) = gen_shifty_hi_op;
2537 right = attributes[0];
2538 total_shift = INTVAL (left_rtx) + right;
2539 mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> total_shift;
2540 switch (kind)
2542 default:
2543 return -1;
2544 case 1:
2546 int first = attributes[2];
2547 rtx operands[3];
2549 if (first < 0)
2551 emit_insn ((mask << right) <= 0xff
2552 ? gen_zero_extendqisi2 (dest,
2553 gen_lowpart (QImode, source))
2554 : gen_zero_extendhisi2 (dest,
2555 gen_lowpart (HImode, source)));
2556 source = dest;
2558 if (source != dest)
2559 emit_insn (gen_movsi (dest, source));
2560 operands[0] = dest;
2561 if (right)
2563 operands[2] = GEN_INT (right);
2564 gen_shifty_hi_op (LSHIFTRT, operands);
2566 if (first > 0)
2568 operands[2] = GEN_INT (first);
2569 gen_shifty_hi_op (ASHIFT, operands);
2570 total_shift -= first;
2571 mask <<= first;
2573 if (first >= 0)
2574 emit_insn (mask <= 0xff
2575 ? gen_zero_extendqisi2 (dest, gen_lowpart (QImode, dest))
2576 : gen_zero_extendhisi2 (dest, gen_lowpart (HImode, dest)));
2577 if (total_shift > 0)
2579 operands[2] = GEN_INT (total_shift);
2580 gen_shifty_hi_op (ASHIFT, operands);
2582 break;
2584 case 4:
2585 shift_gen_fun = gen_shifty_op;
2586 case 3:
2587 /* If the topmost bit that matters is set, set the topmost bits
2588 that don't matter. This way, we might be able to get a shorter
2589 signed constant. */
2590 if (mask & ((HOST_WIDE_INT) 1 << (31 - total_shift)))
2591 mask |= (HOST_WIDE_INT) ~0 << (31 - total_shift);
2592 case 2:
2593 /* Don't expand fine-grained when combining, because that will
2594 make the pattern fail. */
2595 if (currently_expanding_to_rtl
2596 || reload_in_progress || reload_completed)
2598 rtx operands[3];
2600 /* Cases 3 and 4 should be handled by this split
2601 only while combining */
2602 gcc_assert (kind <= 2);
2603 if (right)
2605 emit_insn (gen_lshrsi3 (dest, source, GEN_INT (right)));
2606 source = dest;
2608 emit_insn (gen_andsi3 (dest, source, GEN_INT (mask)));
2609 if (total_shift)
2611 operands[0] = dest;
2612 operands[1] = dest;
2613 operands[2] = GEN_INT (total_shift);
2614 shift_gen_fun (ASHIFT, operands);
2616 break;
2618 else
2620 int neg = 0;
2621 if (kind != 4 && total_shift < 16)
2623 neg = -ext_shift_amounts[total_shift][1];
2624 if (neg > 0)
2625 neg -= ext_shift_amounts[total_shift][2];
2626 else
2627 neg = 0;
2629 emit_insn (gen_and_shl_scratch (dest, source,
2630 GEN_INT (right),
2631 GEN_INT (mask),
2632 GEN_INT (total_shift + neg),
2633 GEN_INT (neg)));
2634 emit_insn (gen_movsi (dest, dest));
2635 break;
2638 return 0;
2641 /* Try to find a good way to implement the combiner pattern
2642 [(set (match_operand:SI 0 "register_operand" "=r")
2643 (sign_extract:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
2644 (match_operand:SI 2 "const_int_operand" "n")
2645 (match_operand:SI 3 "const_int_operand" "n")
2646 (const_int 0)))
2647 (clobber (reg:SI T_REG))]
2648 LEFT_RTX is operand 2 in the above pattern, and SIZE_RTX is operand 3.
2649 return 0 for simple left / right shift combination.
2650 return 1 for left shift / 8 bit sign extend / left shift.
2651 return 2 for left shift / 16 bit sign extend / left shift.
2652 return 3 for left shift / 8 bit sign extend / shift / sign extend.
2653 return 4 for left shift / 16 bit sign extend / shift / sign extend.
2654 return 5 for left shift / 16 bit sign extend / right shift
2655 return 6 for < 8 bit sign extend / left shift.
2656 return 7 for < 8 bit sign extend / left shift / single right shift.
2657 If COSTP is nonzero, assign the calculated cost to *COSTP. */
2660 shl_sext_kind (rtx left_rtx, rtx size_rtx, int *costp)
2662 int left, size, insize, ext;
2663 int cost = 0, best_cost;
2664 int kind;
2666 left = INTVAL (left_rtx);
2667 size = INTVAL (size_rtx);
2668 insize = size - left;
2669 gcc_assert (insize > 0);
2670 /* Default to left / right shift. */
2671 kind = 0;
2672 best_cost = shift_insns[32 - insize] + ashiftrt_insns[32 - size];
2673 if (size <= 16)
2675 /* 16 bit shift / sign extend / 16 bit shift */
2676 cost = shift_insns[16 - insize] + 1 + ashiftrt_insns[16 - size];
2677 /* If ashiftrt_insns[16 - size] is 8, this choice will be overridden
2678 below, by alternative 3 or something even better. */
2679 if (cost < best_cost)
2681 kind = 5;
2682 best_cost = cost;
2685 /* Try a plain sign extend between two shifts. */
2686 for (ext = 16; ext >= insize; ext -= 8)
2688 if (ext <= size)
2690 cost = ext_shift_insns[ext - insize] + 1 + shift_insns[size - ext];
2691 if (cost < best_cost)
2693 kind = ext / (unsigned) 8;
2694 best_cost = cost;
2697 /* Check if we can do a sloppy shift with a final signed shift
2698 restoring the sign. */
2699 if (EXT_SHIFT_SIGNED (size - ext))
2700 cost = ext_shift_insns[ext - insize] + ext_shift_insns[size - ext] + 1;
2701 /* If not, maybe it's still cheaper to do the second shift sloppy,
2702 and do a final sign extend? */
2703 else if (size <= 16)
2704 cost = ext_shift_insns[ext - insize] + 1
2705 + ext_shift_insns[size > ext ? size - ext : ext - size] + 1;
2706 else
2707 continue;
2708 if (cost < best_cost)
2710 kind = ext / (unsigned) 8 + 2;
2711 best_cost = cost;
2714 /* Check if we can sign extend in r0 */
2715 if (insize < 8)
2717 cost = 3 + shift_insns[left];
2718 if (cost < best_cost)
2720 kind = 6;
2721 best_cost = cost;
2723 /* Try the same with a final signed shift. */
2724 if (left < 31)
2726 cost = 3 + ext_shift_insns[left + 1] + 1;
2727 if (cost < best_cost)
2729 kind = 7;
2730 best_cost = cost;
2734 if (TARGET_SH3)
2736 /* Try to use a dynamic shift. */
2737 cost = shift_insns[32 - insize] + 1 + SH_DYNAMIC_SHIFT_COST;
2738 if (cost < best_cost)
2740 kind = 0;
2741 best_cost = cost;
2744 if (costp)
2745 *costp = cost;
2746 return kind;
2749 /* Function to be used in the length attribute of the instructions
2750 implementing this pattern. */
2753 shl_sext_length (rtx insn)
2755 rtx set_src, left_rtx, size_rtx;
2756 int cost;
2758 set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
2759 left_rtx = XEXP (XEXP (set_src, 0), 1);
2760 size_rtx = XEXP (set_src, 1);
2761 shl_sext_kind (left_rtx, size_rtx, &cost);
2762 return cost;
2765 /* Generate rtl for this pattern */
2768 gen_shl_sext (rtx dest, rtx left_rtx, rtx size_rtx, rtx source)
2770 int kind;
2771 int left, size, insize, cost;
2772 rtx operands[3];
2774 kind = shl_sext_kind (left_rtx, size_rtx, &cost);
2775 left = INTVAL (left_rtx);
2776 size = INTVAL (size_rtx);
2777 insize = size - left;
2778 switch (kind)
2780 case 1:
2781 case 2:
2782 case 3:
2783 case 4:
2785 int ext = kind & 1 ? 8 : 16;
2786 int shift2 = size - ext;
2788 /* Don't expand fine-grained when combining, because that will
2789 make the pattern fail. */
2790 if (! currently_expanding_to_rtl
2791 && ! reload_in_progress && ! reload_completed)
2793 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
2794 emit_insn (gen_movsi (dest, source));
2795 break;
2797 if (dest != source)
2798 emit_insn (gen_movsi (dest, source));
2799 operands[0] = dest;
2800 if (ext - insize)
2802 operands[2] = GEN_INT (ext - insize);
2803 gen_shifty_hi_op (ASHIFT, operands);
2805 emit_insn (kind & 1
2806 ? gen_extendqisi2 (dest, gen_lowpart (QImode, dest))
2807 : gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
2808 if (kind <= 2)
2810 if (shift2)
2812 operands[2] = GEN_INT (shift2);
2813 gen_shifty_op (ASHIFT, operands);
2816 else
2818 if (shift2 > 0)
2820 if (EXT_SHIFT_SIGNED (shift2))
2822 operands[2] = GEN_INT (shift2 + 1);
2823 gen_shifty_op (ASHIFT, operands);
2824 operands[2] = const1_rtx;
2825 gen_shifty_op (ASHIFTRT, operands);
2826 break;
2828 operands[2] = GEN_INT (shift2);
2829 gen_shifty_hi_op (ASHIFT, operands);
2831 else if (shift2)
2833 operands[2] = GEN_INT (-shift2);
2834 gen_shifty_hi_op (LSHIFTRT, operands);
2836 emit_insn (size <= 8
2837 ? gen_extendqisi2 (dest, gen_lowpart (QImode, dest))
2838 : gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
2840 break;
2842 case 5:
2844 int i = 16 - size;
2845 if (! currently_expanding_to_rtl
2846 && ! reload_in_progress && ! reload_completed)
2847 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
2848 else
2850 operands[0] = dest;
2851 operands[2] = GEN_INT (16 - insize);
2852 gen_shifty_hi_op (ASHIFT, operands);
2853 emit_insn (gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
2855 /* Don't use gen_ashrsi3 because it generates new pseudos. */
2856 while (--i >= 0)
2857 gen_ashift (ASHIFTRT, 1, dest);
2858 break;
2860 case 6:
2861 case 7:
2862 /* Don't expand fine-grained when combining, because that will
2863 make the pattern fail. */
2864 if (! currently_expanding_to_rtl
2865 && ! reload_in_progress && ! reload_completed)
2867 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
2868 emit_insn (gen_movsi (dest, source));
2869 break;
2871 emit_insn (gen_andsi3 (dest, source, GEN_INT ((1 << insize) - 1)));
2872 emit_insn (gen_xorsi3 (dest, dest, GEN_INT (1 << (insize - 1))));
2873 emit_insn (gen_addsi3 (dest, dest, GEN_INT (-1 << (insize - 1))));
2874 operands[0] = dest;
2875 operands[2] = kind == 7 ? GEN_INT (left + 1) : left_rtx;
2876 gen_shifty_op (ASHIFT, operands);
2877 if (kind == 7)
2878 emit_insn (gen_ashrsi3_k (dest, dest, const1_rtx));
2879 break;
2880 default:
2881 return -1;
2883 return 0;
2886 /* Prefix a symbol_ref name with "datalabel". */
2889 gen_datalabel_ref (rtx sym)
2891 const char *str;
2893 if (GET_CODE (sym) == LABEL_REF)
2894 return gen_rtx_CONST (GET_MODE (sym),
2895 gen_rtx_UNSPEC (GET_MODE (sym),
2896 gen_rtvec (1, sym),
2897 UNSPEC_DATALABEL));
2899 gcc_assert (GET_CODE (sym) == SYMBOL_REF);
2901 str = XSTR (sym, 0);
2902 /* Share all SYMBOL_REF strings with the same value - that is important
2903 for cse. */
2904 str = IDENTIFIER_POINTER (get_identifier (str));
2905 XSTR (sym, 0) = str;
2907 return sym;
2911 /* The SH cannot load a large constant into a register, constants have to
2912 come from a pc relative load. The reference of a pc relative load
2913 instruction must be less than 1k in front of the instruction. This
2914 means that we often have to dump a constant inside a function, and
2915 generate code to branch around it.
2917 It is important to minimize this, since the branches will slow things
2918 down and make things bigger.
2920 Worst case code looks like:
2922 mov.l L1,rn
2923 bra L2
2925 align
2926 L1: .long value
2930 mov.l L3,rn
2931 bra L4
2933 align
2934 L3: .long value
2938 We fix this by performing a scan before scheduling, which notices which
2939 instructions need to have their operands fetched from the constant table
2940 and builds the table.
2942 The algorithm is:
2944 scan, find an instruction which needs a pcrel move. Look forward, find the
2945 last barrier which is within MAX_COUNT bytes of the requirement.
2946 If there isn't one, make one. Process all the instructions between
2947 the find and the barrier.
2949 In the above example, we can tell that L3 is within 1k of L1, so
2950 the first move can be shrunk from the 3 insn+constant sequence into
2951 just 1 insn, and the constant moved to L3 to make:
2953 mov.l L1,rn
2955 mov.l L3,rn
2956 bra L4
2958 align
2959 L3:.long value
2960 L4:.long value
2962 Then the second move becomes the target for the shortening process. */
2964 typedef struct
2966 rtx value; /* Value in table. */
2967 rtx label; /* Label of value. */
2968 rtx wend; /* End of window. */
2969 enum machine_mode mode; /* Mode of value. */
2971 /* True if this constant is accessed as part of a post-increment
2972 sequence. Note that HImode constants are never accessed in this way. */
2973 bool part_of_sequence_p;
2974 } pool_node;
2976 /* The maximum number of constants that can fit into one pool, since
2977 constants in the range 0..510 are at least 2 bytes long, and in the
2978 range from there to 1018 at least 4 bytes. */
2980 #define MAX_POOL_SIZE 372
2981 static pool_node pool_vector[MAX_POOL_SIZE];
2982 static int pool_size;
2983 static rtx pool_window_label;
2984 static int pool_window_last;
2986 /* ??? If we need a constant in HImode which is the truncated value of a
2987 constant we need in SImode, we could combine the two entries thus saving
2988 two bytes. Is this common enough to be worth the effort of implementing
2989 it? */
2991 /* ??? This stuff should be done at the same time that we shorten branches.
2992 As it is now, we must assume that all branches are the maximum size, and
2993 this causes us to almost always output constant pools sooner than
2994 necessary. */
2996 /* Add a constant to the pool and return its label. */
2998 static rtx
2999 add_constant (rtx x, enum machine_mode mode, rtx last_value)
3001 int i;
3002 rtx lab, new, ref, newref;
3004 /* First see if we've already got it. */
3005 for (i = 0; i < pool_size; i++)
3007 if (x->code == pool_vector[i].value->code
3008 && mode == pool_vector[i].mode)
3010 if (x->code == CODE_LABEL)
3012 if (XINT (x, 3) != XINT (pool_vector[i].value, 3))
3013 continue;
3015 if (rtx_equal_p (x, pool_vector[i].value))
3017 lab = new = 0;
3018 if (! last_value
3019 || ! i
3020 || ! rtx_equal_p (last_value, pool_vector[i-1].value))
3022 new = gen_label_rtx ();
3023 LABEL_REFS (new) = pool_vector[i].label;
3024 pool_vector[i].label = lab = new;
3026 if (lab && pool_window_label)
3028 newref = gen_rtx_LABEL_REF (VOIDmode, pool_window_label);
3029 ref = pool_vector[pool_window_last].wend;
3030 LABEL_NEXTREF (newref) = ref;
3031 pool_vector[pool_window_last].wend = newref;
3033 if (new)
3034 pool_window_label = new;
3035 pool_window_last = i;
3036 return lab;
3041 /* Need a new one. */
3042 pool_vector[pool_size].value = x;
3043 if (last_value && rtx_equal_p (last_value, pool_vector[pool_size - 1].value))
3045 lab = 0;
3046 pool_vector[pool_size - 1].part_of_sequence_p = true;
3048 else
3049 lab = gen_label_rtx ();
3050 pool_vector[pool_size].mode = mode;
3051 pool_vector[pool_size].label = lab;
3052 pool_vector[pool_size].wend = NULL_RTX;
3053 pool_vector[pool_size].part_of_sequence_p = (lab == 0);
3054 if (lab && pool_window_label)
3056 newref = gen_rtx_LABEL_REF (VOIDmode, pool_window_label);
3057 ref = pool_vector[pool_window_last].wend;
3058 LABEL_NEXTREF (newref) = ref;
3059 pool_vector[pool_window_last].wend = newref;
3061 if (lab)
3062 pool_window_label = lab;
3063 pool_window_last = pool_size;
3064 pool_size++;
3065 return lab;
3068 /* Output the literal table. START, if nonzero, is the first instruction
3069 this table is needed for, and also indicates that there is at least one
3070 casesi_worker_2 instruction; We have to emit the operand3 labels from
3071 these insns at a 4-byte aligned position. BARRIER is the barrier
3072 after which we are to place the table. */
3074 static void
3075 dump_table (rtx start, rtx barrier)
3077 rtx scan = barrier;
3078 int i;
3079 int need_align = 1;
3080 rtx lab, ref;
3081 int have_df = 0;
3083 /* Do two passes, first time dump out the HI sized constants. */
3085 for (i = 0; i < pool_size; i++)
3087 pool_node *p = &pool_vector[i];
3089 if (p->mode == HImode)
3091 if (need_align)
3093 scan = emit_insn_after (gen_align_2 (), scan);
3094 need_align = 0;
3096 for (lab = p->label; lab; lab = LABEL_REFS (lab))
3097 scan = emit_label_after (lab, scan);
3098 scan = emit_insn_after (gen_consttable_2 (p->value, const0_rtx),
3099 scan);
3100 for (ref = p->wend; ref; ref = LABEL_NEXTREF (ref))
3102 lab = XEXP (ref, 0);
3103 scan = emit_insn_after (gen_consttable_window_end (lab), scan);
3106 else if (p->mode == DFmode)
3107 have_df = 1;
3110 need_align = 1;
3112 if (start)
3114 scan = emit_insn_after (gen_align_4 (), scan);
3115 need_align = 0;
3116 for (; start != barrier; start = NEXT_INSN (start))
3117 if (GET_CODE (start) == INSN
3118 && recog_memoized (start) == CODE_FOR_casesi_worker_2)
3120 rtx src = SET_SRC (XVECEXP (PATTERN (start), 0, 0));
3121 rtx lab = XEXP (XVECEXP (src, 0, 3), 0);
3123 scan = emit_label_after (lab, scan);
3126 if (TARGET_FMOVD && TARGET_ALIGN_DOUBLE && have_df)
3128 rtx align_insn = NULL_RTX;
3130 scan = emit_label_after (gen_label_rtx (), scan);
3131 scan = emit_insn_after (gen_align_log (GEN_INT (3)), scan);
3132 need_align = 0;
3134 for (i = 0; i < pool_size; i++)
3136 pool_node *p = &pool_vector[i];
3138 switch (p->mode)
3140 case HImode:
3141 break;
3142 case SImode:
3143 case SFmode:
3144 if (align_insn && !p->part_of_sequence_p)
3146 for (lab = p->label; lab; lab = LABEL_REFS (lab))
3147 emit_label_before (lab, align_insn);
3148 emit_insn_before (gen_consttable_4 (p->value, const0_rtx),
3149 align_insn);
3150 for (ref = p->wend; ref; ref = LABEL_NEXTREF (ref))
3152 lab = XEXP (ref, 0);
3153 emit_insn_before (gen_consttable_window_end (lab),
3154 align_insn);
3156 delete_insn (align_insn);
3157 align_insn = NULL_RTX;
3158 continue;
3160 else
3162 for (lab = p->label; lab; lab = LABEL_REFS (lab))
3163 scan = emit_label_after (lab, scan);
3164 scan = emit_insn_after (gen_consttable_4 (p->value,
3165 const0_rtx), scan);
3166 need_align = ! need_align;
3168 break;
3169 case DFmode:
3170 if (need_align)
3172 scan = emit_insn_after (gen_align_log (GEN_INT (3)), scan);
3173 align_insn = scan;
3174 need_align = 0;
3176 case DImode:
3177 for (lab = p->label; lab; lab = LABEL_REFS (lab))
3178 scan = emit_label_after (lab, scan);
3179 scan = emit_insn_after (gen_consttable_8 (p->value, const0_rtx),
3180 scan);
3181 break;
3182 default:
3183 gcc_unreachable ();
3186 if (p->mode != HImode)
3188 for (ref = p->wend; ref; ref = LABEL_NEXTREF (ref))
3190 lab = XEXP (ref, 0);
3191 scan = emit_insn_after (gen_consttable_window_end (lab),
3192 scan);
3197 pool_size = 0;
3200 for (i = 0; i < pool_size; i++)
3202 pool_node *p = &pool_vector[i];
3204 switch (p->mode)
3206 case HImode:
3207 break;
3208 case SImode:
3209 case SFmode:
3210 if (need_align)
3212 need_align = 0;
3213 scan = emit_label_after (gen_label_rtx (), scan);
3214 scan = emit_insn_after (gen_align_4 (), scan);
3216 for (lab = p->label; lab; lab = LABEL_REFS (lab))
3217 scan = emit_label_after (lab, scan);
3218 scan = emit_insn_after (gen_consttable_4 (p->value, const0_rtx),
3219 scan);
3220 break;
3221 case DFmode:
3222 case DImode:
3223 if (need_align)
3225 need_align = 0;
3226 scan = emit_label_after (gen_label_rtx (), scan);
3227 scan = emit_insn_after (gen_align_4 (), scan);
3229 for (lab = p->label; lab; lab = LABEL_REFS (lab))
3230 scan = emit_label_after (lab, scan);
3231 scan = emit_insn_after (gen_consttable_8 (p->value, const0_rtx),
3232 scan);
3233 break;
3234 default:
3235 gcc_unreachable ();
3238 if (p->mode != HImode)
3240 for (ref = p->wend; ref; ref = LABEL_NEXTREF (ref))
3242 lab = XEXP (ref, 0);
3243 scan = emit_insn_after (gen_consttable_window_end (lab), scan);
3248 scan = emit_insn_after (gen_consttable_end (), scan);
3249 scan = emit_barrier_after (scan);
3250 pool_size = 0;
3251 pool_window_label = NULL_RTX;
3252 pool_window_last = 0;
3255 /* Return nonzero if constant would be an ok source for a
3256 mov.w instead of a mov.l. */
3258 static int
3259 hi_const (rtx src)
3261 return (GET_CODE (src) == CONST_INT
3262 && INTVAL (src) >= -32768
3263 && INTVAL (src) <= 32767);
3266 /* Nonzero if the insn is a move instruction which needs to be fixed. */
3268 /* ??? For a DImode/DFmode moves, we don't need to fix it if each half of the
3269 CONST_DOUBLE input value is CONST_OK_FOR_I08. For a SFmode move, we don't
3270 need to fix it if the input value is CONST_OK_FOR_I08. */
3272 static int
3273 broken_move (rtx insn)
3275 if (GET_CODE (insn) == INSN)
3277 rtx pat = PATTERN (insn);
3278 if (GET_CODE (pat) == PARALLEL)
3279 pat = XVECEXP (pat, 0, 0);
3280 if (GET_CODE (pat) == SET
3281 /* We can load any 8 bit value if we don't care what the high
3282 order bits end up as. */
3283 && GET_MODE (SET_DEST (pat)) != QImode
3284 && (CONSTANT_P (SET_SRC (pat))
3285 /* Match mova_const. */
3286 || (GET_CODE (SET_SRC (pat)) == UNSPEC
3287 && XINT (SET_SRC (pat), 1) == UNSPEC_MOVA
3288 && GET_CODE (XVECEXP (SET_SRC (pat), 0, 0)) == CONST))
3289 && ! (TARGET_SH2E
3290 && GET_CODE (SET_SRC (pat)) == CONST_DOUBLE
3291 && (fp_zero_operand (SET_SRC (pat))
3292 || fp_one_operand (SET_SRC (pat)))
3293 /* ??? If this is a -m4 or -m4-single compilation, in general
3294 we don't know the current setting of fpscr, so disable fldi.
3295 There is an exception if this was a register-register move
3296 before reload - and hence it was ascertained that we have
3297 single precision setting - and in a post-reload optimization
3298 we changed this to do a constant load. In that case
3299 we don't have an r0 clobber, hence we must use fldi. */
3300 && (! TARGET_SH4 || TARGET_FMOVD
3301 || (GET_CODE (XEXP (XVECEXP (PATTERN (insn), 0, 2), 0))
3302 == SCRATCH))
3303 && GET_CODE (SET_DEST (pat)) == REG
3304 && FP_REGISTER_P (REGNO (SET_DEST (pat))))
3305 && ! (TARGET_SH2A
3306 && GET_MODE (SET_DEST (pat)) == SImode
3307 && GET_CODE (SET_SRC (pat)) == CONST_INT
3308 && CONST_OK_FOR_I20 (INTVAL (SET_SRC (pat))))
3309 && (GET_CODE (SET_SRC (pat)) != CONST_INT
3310 || ! CONST_OK_FOR_I08 (INTVAL (SET_SRC (pat)))))
3311 return 1;
3314 return 0;
3317 static int
3318 mova_p (rtx insn)
3320 return (GET_CODE (insn) == INSN
3321 && GET_CODE (PATTERN (insn)) == SET
3322 && GET_CODE (SET_SRC (PATTERN (insn))) == UNSPEC
3323 && XINT (SET_SRC (PATTERN (insn)), 1) == UNSPEC_MOVA
3324 /* Don't match mova_const. */
3325 && GET_CODE (XVECEXP (SET_SRC (PATTERN (insn)), 0, 0)) == LABEL_REF);
3328 /* Fix up a mova from a switch that went out of range. */
3329 static void
3330 fixup_mova (rtx mova)
3332 if (! flag_pic)
3334 SET_SRC (PATTERN (mova)) = XVECEXP (SET_SRC (PATTERN (mova)), 0, 0);
3335 INSN_CODE (mova) = -1;
3337 else
3339 rtx worker = mova;
3340 rtx lab = gen_label_rtx ();
3341 rtx wpat, wpat0, wpat1, wsrc, diff;
3345 worker = NEXT_INSN (worker);
3346 gcc_assert (worker
3347 && GET_CODE (worker) != CODE_LABEL
3348 && GET_CODE (worker) != JUMP_INSN);
3349 } while (recog_memoized (worker) != CODE_FOR_casesi_worker_1);
3350 wpat = PATTERN (worker);
3351 wpat0 = XVECEXP (wpat, 0, 0);
3352 wpat1 = XVECEXP (wpat, 0, 1);
3353 wsrc = SET_SRC (wpat0);
3354 PATTERN (worker) = (gen_casesi_worker_2
3355 (SET_DEST (wpat0), XVECEXP (wsrc, 0, 1),
3356 XEXP (XVECEXP (wsrc, 0, 2), 0), lab,
3357 XEXP (wpat1, 0)));
3358 INSN_CODE (worker) = -1;
3359 diff = gen_rtx_MINUS (Pmode, XVECEXP (SET_SRC (PATTERN (mova)), 0, 0),
3360 gen_rtx_LABEL_REF (Pmode, lab));
3361 diff = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, diff), UNSPEC_PIC);
3362 SET_SRC (PATTERN (mova)) = gen_rtx_CONST (Pmode, diff);
3363 INSN_CODE (mova) = -1;
3367 /* Find the last barrier from insn FROM which is close enough to hold the
3368 constant pool. If we can't find one, then create one near the end of
3369 the range. */
3371 static rtx
3372 find_barrier (int num_mova, rtx mova, rtx from)
3374 int count_si = 0;
3375 int count_hi = 0;
3376 int found_hi = 0;
3377 int found_si = 0;
3378 int found_di = 0;
3379 int hi_align = 2;
3380 int si_align = 2;
3381 int leading_mova = num_mova;
3382 rtx barrier_before_mova = 0, found_barrier = 0, good_barrier = 0;
3383 int si_limit;
3384 int hi_limit;
3386 /* For HImode: range is 510, add 4 because pc counts from address of
3387 second instruction after this one, subtract 2 for the jump instruction
3388 that we may need to emit before the table, subtract 2 for the instruction
3389 that fills the jump delay slot (in very rare cases, reorg will take an
3390 instruction from after the constant pool or will leave the delay slot
3391 empty). This gives 510.
3392 For SImode: range is 1020, add 4 because pc counts from address of
3393 second instruction after this one, subtract 2 in case pc is 2 byte
3394 aligned, subtract 2 for the jump instruction that we may need to emit
3395 before the table, subtract 2 for the instruction that fills the jump
3396 delay slot. This gives 1018. */
3398 /* The branch will always be shortened now that the reference address for
3399 forward branches is the successor address, thus we need no longer make
3400 adjustments to the [sh]i_limit for -O0. */
3402 si_limit = 1018;
3403 hi_limit = 510;
3405 while (from && count_si < si_limit && count_hi < hi_limit)
3407 int inc = get_attr_length (from);
3408 int new_align = 1;
3410 if (GET_CODE (from) == CODE_LABEL)
3412 if (optimize)
3413 new_align = 1 << label_to_alignment (from);
3414 else if (GET_CODE (prev_nonnote_insn (from)) == BARRIER)
3415 new_align = 1 << barrier_align (from);
3416 else
3417 new_align = 1;
3418 inc = 0;
3421 if (GET_CODE (from) == BARRIER)
3424 found_barrier = from;
3426 /* If we are at the end of the function, or in front of an alignment
3427 instruction, we need not insert an extra alignment. We prefer
3428 this kind of barrier. */
3429 if (barrier_align (from) > 2)
3430 good_barrier = from;
3433 if (broken_move (from))
3435 rtx pat, src, dst;
3436 enum machine_mode mode;
3438 pat = PATTERN (from);
3439 if (GET_CODE (pat) == PARALLEL)
3440 pat = XVECEXP (pat, 0, 0);
3441 src = SET_SRC (pat);
3442 dst = SET_DEST (pat);
3443 mode = GET_MODE (dst);
3445 /* We must explicitly check the mode, because sometimes the
3446 front end will generate code to load unsigned constants into
3447 HImode targets without properly sign extending them. */
3448 if (mode == HImode
3449 || (mode == SImode && hi_const (src) && REGNO (dst) != FPUL_REG))
3451 found_hi += 2;
3452 /* We put the short constants before the long constants, so
3453 we must count the length of short constants in the range
3454 for the long constants. */
3455 /* ??? This isn't optimal, but is easy to do. */
3456 si_limit -= 2;
3458 else
3460 /* We dump DF/DI constants before SF/SI ones, because
3461 the limit is the same, but the alignment requirements
3462 are higher. We may waste up to 4 additional bytes
3463 for alignment, and the DF/DI constant may have
3464 another SF/SI constant placed before it. */
3465 if (TARGET_SHCOMPACT
3466 && ! found_di
3467 && (mode == DFmode || mode == DImode))
3469 found_di = 1;
3470 si_limit -= 8;
3472 while (si_align > 2 && found_si + si_align - 2 > count_si)
3473 si_align >>= 1;
3474 if (found_si > count_si)
3475 count_si = found_si;
3476 found_si += GET_MODE_SIZE (mode);
3477 if (num_mova)
3478 si_limit -= GET_MODE_SIZE (mode);
3482 if (mova_p (from))
3484 if (! num_mova++)
3486 leading_mova = 0;
3487 mova = from;
3488 barrier_before_mova = good_barrier ? good_barrier : found_barrier;
3490 if (found_si > count_si)
3491 count_si = found_si;
3493 else if (GET_CODE (from) == JUMP_INSN
3494 && (GET_CODE (PATTERN (from)) == ADDR_VEC
3495 || GET_CODE (PATTERN (from)) == ADDR_DIFF_VEC))
3497 if (num_mova)
3498 num_mova--;
3499 if (barrier_align (next_real_insn (from)) == align_jumps_log)
3501 /* We have just passed the barrier in front of the
3502 ADDR_DIFF_VEC, which is stored in found_barrier. Since
3503 the ADDR_DIFF_VEC is accessed as data, just like our pool
3504 constants, this is a good opportunity to accommodate what
3505 we have gathered so far.
3506 If we waited any longer, we could end up at a barrier in
3507 front of code, which gives worse cache usage for separated
3508 instruction / data caches. */
3509 good_barrier = found_barrier;
3510 break;
3512 else
3514 rtx body = PATTERN (from);
3515 inc = XVECLEN (body, 1) * GET_MODE_SIZE (GET_MODE (body));
3518 /* For the SH1, we generate alignments even after jumps-around-jumps. */
3519 else if (GET_CODE (from) == JUMP_INSN
3520 && ! TARGET_SH2
3521 && ! TARGET_SMALLCODE)
3522 new_align = 4;
3524 if (found_si)
3526 count_si += inc;
3527 if (new_align > si_align)
3529 si_limit -= (count_si - 1) & (new_align - si_align);
3530 si_align = new_align;
3532 count_si = (count_si + new_align - 1) & -new_align;
3534 if (found_hi)
3536 count_hi += inc;
3537 if (new_align > hi_align)
3539 hi_limit -= (count_hi - 1) & (new_align - hi_align);
3540 hi_align = new_align;
3542 count_hi = (count_hi + new_align - 1) & -new_align;
3544 from = NEXT_INSN (from);
3547 if (num_mova)
3549 if (leading_mova)
3551 /* Try as we might, the leading mova is out of range. Change
3552 it into a load (which will become a pcload) and retry. */
3553 fixup_mova (mova);
3554 return find_barrier (0, 0, mova);
3556 else
3558 /* Insert the constant pool table before the mova instruction,
3559 to prevent the mova label reference from going out of range. */
3560 from = mova;
3561 good_barrier = found_barrier = barrier_before_mova;
3565 if (found_barrier)
3567 if (good_barrier && next_real_insn (found_barrier))
3568 found_barrier = good_barrier;
3570 else
3572 /* We didn't find a barrier in time to dump our stuff,
3573 so we'll make one. */
3574 rtx label = gen_label_rtx ();
3576 /* If we exceeded the range, then we must back up over the last
3577 instruction we looked at. Otherwise, we just need to undo the
3578 NEXT_INSN at the end of the loop. */
3579 if (count_hi > hi_limit || count_si > si_limit)
3580 from = PREV_INSN (PREV_INSN (from));
3581 else
3582 from = PREV_INSN (from);
3584 /* Walk back to be just before any jump or label.
3585 Putting it before a label reduces the number of times the branch
3586 around the constant pool table will be hit. Putting it before
3587 a jump makes it more likely that the bra delay slot will be
3588 filled. */
3589 while (GET_CODE (from) == JUMP_INSN || GET_CODE (from) == NOTE
3590 || GET_CODE (from) == CODE_LABEL)
3591 from = PREV_INSN (from);
3593 from = emit_jump_insn_after (gen_jump (label), from);
3594 JUMP_LABEL (from) = label;
3595 LABEL_NUSES (label) = 1;
3596 found_barrier = emit_barrier_after (from);
3597 emit_label_after (label, found_barrier);
3600 return found_barrier;
3603 /* If the instruction INSN is implemented by a special function, and we can
3604 positively find the register that is used to call the sfunc, and this
3605 register is not used anywhere else in this instruction - except as the
3606 destination of a set, return this register; else, return 0. */
3608 sfunc_uses_reg (rtx insn)
3610 int i;
3611 rtx pattern, part, reg_part, reg;
3613 if (GET_CODE (insn) != INSN)
3614 return 0;
3615 pattern = PATTERN (insn);
3616 if (GET_CODE (pattern) != PARALLEL || get_attr_type (insn) != TYPE_SFUNC)
3617 return 0;
3619 for (reg_part = 0, i = XVECLEN (pattern, 0) - 1; i >= 1; i--)
3621 part = XVECEXP (pattern, 0, i);
3622 if (GET_CODE (part) == USE && GET_MODE (XEXP (part, 0)) == SImode)
3623 reg_part = part;
3625 if (! reg_part)
3626 return 0;
3627 reg = XEXP (reg_part, 0);
3628 for (i = XVECLEN (pattern, 0) - 1; i >= 0; i--)
3630 part = XVECEXP (pattern, 0, i);
3631 if (part == reg_part || GET_CODE (part) == CLOBBER)
3632 continue;
3633 if (reg_mentioned_p (reg, ((GET_CODE (part) == SET
3634 && GET_CODE (SET_DEST (part)) == REG)
3635 ? SET_SRC (part) : part)))
3636 return 0;
3638 return reg;
3641 /* See if the only way in which INSN uses REG is by calling it, or by
3642 setting it while calling it. Set *SET to a SET rtx if the register
3643 is set by INSN. */
3645 static int
3646 noncall_uses_reg (rtx reg, rtx insn, rtx *set)
3648 rtx pattern, reg2;
3650 *set = NULL_RTX;
3652 reg2 = sfunc_uses_reg (insn);
3653 if (reg2 && REGNO (reg2) == REGNO (reg))
3655 pattern = single_set (insn);
3656 if (pattern
3657 && GET_CODE (SET_DEST (pattern)) == REG
3658 && REGNO (reg) == REGNO (SET_DEST (pattern)))
3659 *set = pattern;
3660 return 0;
3662 if (GET_CODE (insn) != CALL_INSN)
3664 /* We don't use rtx_equal_p because we don't care if the mode is
3665 different. */
3666 pattern = single_set (insn);
3667 if (pattern
3668 && GET_CODE (SET_DEST (pattern)) == REG
3669 && REGNO (reg) == REGNO (SET_DEST (pattern)))
3671 rtx par, part;
3672 int i;
3674 *set = pattern;
3675 par = PATTERN (insn);
3676 if (GET_CODE (par) == PARALLEL)
3677 for (i = XVECLEN (par, 0) - 1; i >= 0; i--)
3679 part = XVECEXP (par, 0, i);
3680 if (GET_CODE (part) != SET && reg_mentioned_p (reg, part))
3681 return 1;
3683 return reg_mentioned_p (reg, SET_SRC (pattern));
3686 return 1;
3689 pattern = PATTERN (insn);
3691 if (GET_CODE (pattern) == PARALLEL)
3693 int i;
3695 for (i = XVECLEN (pattern, 0) - 1; i >= 1; i--)
3696 if (reg_mentioned_p (reg, XVECEXP (pattern, 0, i)))
3697 return 1;
3698 pattern = XVECEXP (pattern, 0, 0);
3701 if (GET_CODE (pattern) == SET)
3703 if (reg_mentioned_p (reg, SET_DEST (pattern)))
3705 /* We don't use rtx_equal_p, because we don't care if the
3706 mode is different. */
3707 if (GET_CODE (SET_DEST (pattern)) != REG
3708 || REGNO (reg) != REGNO (SET_DEST (pattern)))
3709 return 1;
3711 *set = pattern;
3714 pattern = SET_SRC (pattern);
3717 if (GET_CODE (pattern) != CALL
3718 || GET_CODE (XEXP (pattern, 0)) != MEM
3719 || ! rtx_equal_p (reg, XEXP (XEXP (pattern, 0), 0)))
3720 return 1;
3722 return 0;
3725 /* Given a X, a pattern of an insn or a part of it, return a mask of used
3726 general registers. Bits 0..15 mean that the respective registers
3727 are used as inputs in the instruction. Bits 16..31 mean that the
3728 registers 0..15, respectively, are used as outputs, or are clobbered.
3729 IS_DEST should be set to 16 if X is the destination of a SET, else to 0. */
3731 regs_used (rtx x, int is_dest)
3733 enum rtx_code code;
3734 const char *fmt;
3735 int i, used = 0;
3737 if (! x)
3738 return used;
3739 code = GET_CODE (x);
3740 switch (code)
3742 case REG:
3743 if (REGNO (x) < 16)
3744 return (((1 << HARD_REGNO_NREGS (0, GET_MODE (x))) - 1)
3745 << (REGNO (x) + is_dest));
3746 return 0;
3747 case SUBREG:
3749 rtx y = SUBREG_REG (x);
3751 if (GET_CODE (y) != REG)
3752 break;
3753 if (REGNO (y) < 16)
3754 return (((1 << HARD_REGNO_NREGS (0, GET_MODE (x))) - 1)
3755 << (REGNO (y) +
3756 subreg_regno_offset (REGNO (y),
3757 GET_MODE (y),
3758 SUBREG_BYTE (x),
3759 GET_MODE (x)) + is_dest));
3760 return 0;
3762 case SET:
3763 return regs_used (SET_SRC (x), 0) | regs_used (SET_DEST (x), 16);
3764 case RETURN:
3765 /* If there was a return value, it must have been indicated with USE. */
3766 return 0x00ffff00;
3767 case CLOBBER:
3768 is_dest = 1;
3769 break;
3770 case MEM:
3771 is_dest = 0;
3772 break;
3773 case CALL:
3774 used |= 0x00ff00f0;
3775 break;
3776 default:
3777 break;
3780 fmt = GET_RTX_FORMAT (code);
3782 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
3784 if (fmt[i] == 'E')
3786 register int j;
3787 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
3788 used |= regs_used (XVECEXP (x, i, j), is_dest);
3790 else if (fmt[i] == 'e')
3791 used |= regs_used (XEXP (x, i), is_dest);
3793 return used;
3796 /* Create an instruction that prevents redirection of a conditional branch
3797 to the destination of the JUMP with address ADDR.
3798 If the branch needs to be implemented as an indirect jump, try to find
3799 a scratch register for it.
3800 If NEED_BLOCK is 0, don't do anything unless we need a scratch register.
3801 If any preceding insn that doesn't fit into a delay slot is good enough,
3802 pass 1. Pass 2 if a definite blocking insn is needed.
3803 -1 is used internally to avoid deep recursion.
3804 If a blocking instruction is made or recognized, return it. */
3806 static rtx
3807 gen_block_redirect (rtx jump, int addr, int need_block)
3809 int dead = 0;
3810 rtx prev = prev_nonnote_insn (jump);
3811 rtx dest;
3813 /* First, check if we already have an instruction that satisfies our need. */
3814 if (prev && GET_CODE (prev) == INSN && ! INSN_DELETED_P (prev))
3816 if (INSN_CODE (prev) == CODE_FOR_indirect_jump_scratch)
3817 return prev;
3818 if (GET_CODE (PATTERN (prev)) == USE
3819 || GET_CODE (PATTERN (prev)) == CLOBBER
3820 || get_attr_in_delay_slot (prev) == IN_DELAY_SLOT_YES)
3821 prev = jump;
3822 else if ((need_block &= ~1) < 0)
3823 return prev;
3824 else if (recog_memoized (prev) == CODE_FOR_block_branch_redirect)
3825 need_block = 0;
3827 if (GET_CODE (PATTERN (jump)) == RETURN)
3829 if (! need_block)
3830 return prev;
3831 /* Reorg even does nasty things with return insns that cause branches
3832 to go out of range - see find_end_label and callers. */
3833 return emit_insn_before (gen_block_branch_redirect (const0_rtx) , jump);
3835 /* We can't use JUMP_LABEL here because it might be undefined
3836 when not optimizing. */
3837 dest = XEXP (SET_SRC (PATTERN (jump)), 0);
3838 /* If the branch is out of range, try to find a scratch register for it. */
3839 if (optimize
3840 && (INSN_ADDRESSES (INSN_UID (dest)) - addr + (unsigned) 4092
3841 > 4092 + 4098))
3843 rtx scan;
3844 /* Don't look for the stack pointer as a scratch register,
3845 it would cause trouble if an interrupt occurred. */
3846 unsigned try = 0x7fff, used;
3847 int jump_left = flag_expensive_optimizations + 1;
3849 /* It is likely that the most recent eligible instruction is wanted for
3850 the delay slot. Therefore, find out which registers it uses, and
3851 try to avoid using them. */
3853 for (scan = jump; (scan = PREV_INSN (scan)); )
3855 enum rtx_code code;
3857 if (INSN_DELETED_P (scan))
3858 continue;
3859 code = GET_CODE (scan);
3860 if (code == CODE_LABEL || code == JUMP_INSN)
3861 break;
3862 if (code == INSN
3863 && GET_CODE (PATTERN (scan)) != USE
3864 && GET_CODE (PATTERN (scan)) != CLOBBER
3865 && get_attr_in_delay_slot (scan) == IN_DELAY_SLOT_YES)
3867 try &= ~regs_used (PATTERN (scan), 0);
3868 break;
3871 for (used = dead = 0, scan = JUMP_LABEL (jump);
3872 (scan = NEXT_INSN (scan)); )
3874 enum rtx_code code;
3876 if (INSN_DELETED_P (scan))
3877 continue;
3878 code = GET_CODE (scan);
3879 if (INSN_P (scan))
3881 used |= regs_used (PATTERN (scan), 0);
3882 if (code == CALL_INSN)
3883 used |= regs_used (CALL_INSN_FUNCTION_USAGE (scan), 0);
3884 dead |= (used >> 16) & ~used;
3885 if (dead & try)
3887 dead &= try;
3888 break;
3890 if (code == JUMP_INSN)
3892 if (jump_left-- && simplejump_p (scan))
3893 scan = JUMP_LABEL (scan);
3894 else
3895 break;
3899 /* Mask out the stack pointer again, in case it was
3900 the only 'free' register we have found. */
3901 dead &= 0x7fff;
3903 /* If the immediate destination is still in range, check for possible
3904 threading with a jump beyond the delay slot insn.
3905 Don't check if we are called recursively; the jump has been or will be
3906 checked in a different invocation then. */
3908 else if (optimize && need_block >= 0)
3910 rtx next = next_active_insn (next_active_insn (dest));
3911 if (next && GET_CODE (next) == JUMP_INSN
3912 && GET_CODE (PATTERN (next)) == SET
3913 && recog_memoized (next) == CODE_FOR_jump_compact)
3915 dest = JUMP_LABEL (next);
3916 if (dest
3917 && (INSN_ADDRESSES (INSN_UID (dest)) - addr + (unsigned) 4092
3918 > 4092 + 4098))
3919 gen_block_redirect (next, INSN_ADDRESSES (INSN_UID (next)), -1);
3923 if (dead)
3925 rtx reg = gen_rtx_REG (SImode, exact_log2 (dead & -dead));
3927 /* It would be nice if we could convert the jump into an indirect
3928 jump / far branch right now, and thus exposing all constituent
3929 instructions to further optimization. However, reorg uses
3930 simplejump_p to determine if there is an unconditional jump where
3931 it should try to schedule instructions from the target of the
3932 branch; simplejump_p fails for indirect jumps even if they have
3933 a JUMP_LABEL. */
3934 rtx insn = emit_insn_before (gen_indirect_jump_scratch
3935 (reg, GEN_INT (INSN_UID (JUMP_LABEL (jump))))
3936 , jump);
3937 /* ??? We would like this to have the scope of the jump, but that
3938 scope will change when a delay slot insn of an inner scope is added.
3939 Hence, after delay slot scheduling, we'll have to expect
3940 NOTE_INSN_BLOCK_END notes between the indirect_jump_scratch and
3941 the jump. */
3943 INSN_LOCATOR (insn) = INSN_LOCATOR (jump);
3944 INSN_CODE (insn) = CODE_FOR_indirect_jump_scratch;
3945 return insn;
3947 else if (need_block)
3948 /* We can't use JUMP_LABEL here because it might be undefined
3949 when not optimizing. */
3950 return emit_insn_before (gen_block_branch_redirect
3951 (GEN_INT (INSN_UID (XEXP (SET_SRC (PATTERN (jump)), 0))))
3952 , jump);
3953 return prev;
3956 #define CONDJUMP_MIN -252
3957 #define CONDJUMP_MAX 262
3958 struct far_branch
3960 /* A label (to be placed) in front of the jump
3961 that jumps to our ultimate destination. */
3962 rtx near_label;
3963 /* Where we are going to insert it if we cannot move the jump any farther,
3964 or the jump itself if we have picked up an existing jump. */
3965 rtx insert_place;
3966 /* The ultimate destination. */
3967 rtx far_label;
3968 struct far_branch *prev;
3969 /* If the branch has already been created, its address;
3970 else the address of its first prospective user. */
3971 int address;
3974 static void gen_far_branch (struct far_branch *);
3975 enum mdep_reorg_phase_e mdep_reorg_phase;
3976 static void
3977 gen_far_branch (struct far_branch *bp)
3979 rtx insn = bp->insert_place;
3980 rtx jump;
3981 rtx label = gen_label_rtx ();
3982 int ok;
3984 emit_label_after (label, insn);
3985 if (bp->far_label)
3987 jump = emit_jump_insn_after (gen_jump (bp->far_label), insn);
3988 LABEL_NUSES (bp->far_label)++;
3990 else
3991 jump = emit_jump_insn_after (gen_return (), insn);
3992 /* Emit a barrier so that reorg knows that any following instructions
3993 are not reachable via a fall-through path.
3994 But don't do this when not optimizing, since we wouldn't suppress the
3995 alignment for the barrier then, and could end up with out-of-range
3996 pc-relative loads. */
3997 if (optimize)
3998 emit_barrier_after (jump);
3999 emit_label_after (bp->near_label, insn);
4000 JUMP_LABEL (jump) = bp->far_label;
4001 ok = invert_jump (insn, label, 1);
4002 gcc_assert (ok);
4004 /* If we are branching around a jump (rather than a return), prevent
4005 reorg from using an insn from the jump target as the delay slot insn -
4006 when reorg did this, it pessimized code (we rather hide the delay slot)
4007 and it could cause branches to go out of range. */
4008 if (bp->far_label)
4009 (emit_insn_after
4010 (gen_stuff_delay_slot
4011 (GEN_INT (INSN_UID (XEXP (SET_SRC (PATTERN (jump)), 0))),
4012 GEN_INT (recog_memoized (insn) == CODE_FOR_branch_false)),
4013 insn));
4014 /* Prevent reorg from undoing our splits. */
4015 gen_block_redirect (jump, bp->address += 2, 2);
4018 /* Fix up ADDR_DIFF_VECs. */
4019 void
4020 fixup_addr_diff_vecs (rtx first)
4022 rtx insn;
4024 for (insn = first; insn; insn = NEXT_INSN (insn))
4026 rtx vec_lab, pat, prev, prevpat, x, braf_label;
4028 if (GET_CODE (insn) != JUMP_INSN
4029 || GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
4030 continue;
4031 pat = PATTERN (insn);
4032 vec_lab = XEXP (XEXP (pat, 0), 0);
4034 /* Search the matching casesi_jump_2. */
4035 for (prev = vec_lab; ; prev = PREV_INSN (prev))
4037 if (GET_CODE (prev) != JUMP_INSN)
4038 continue;
4039 prevpat = PATTERN (prev);
4040 if (GET_CODE (prevpat) != PARALLEL || XVECLEN (prevpat, 0) != 2)
4041 continue;
4042 x = XVECEXP (prevpat, 0, 1);
4043 if (GET_CODE (x) != USE)
4044 continue;
4045 x = XEXP (x, 0);
4046 if (GET_CODE (x) == LABEL_REF && XEXP (x, 0) == vec_lab)
4047 break;
4049 /* FIXME: This is a bug in the optimizer, but it seems harmless
4050 to just avoid panicing. */
4051 if (!prev)
4052 continue;
4054 /* Emit the reference label of the braf where it belongs, right after
4055 the casesi_jump_2 (i.e. braf). */
4056 braf_label = XEXP (XEXP (SET_SRC (XVECEXP (prevpat, 0, 0)), 1), 0);
4057 emit_label_after (braf_label, prev);
4059 /* Fix up the ADDR_DIF_VEC to be relative
4060 to the reference address of the braf. */
4061 XEXP (XEXP (pat, 0), 0) = braf_label;
4065 /* BARRIER_OR_LABEL is either a BARRIER or a CODE_LABEL immediately following
4066 a barrier. Return the base 2 logarithm of the desired alignment. */
4068 barrier_align (rtx barrier_or_label)
4070 rtx next = next_real_insn (barrier_or_label), pat, prev;
4071 int slot, credit, jump_to_next = 0;
4073 if (! next)
4074 return 0;
4076 pat = PATTERN (next);
4078 if (GET_CODE (pat) == ADDR_DIFF_VEC)
4079 return 2;
4081 if (GET_CODE (pat) == UNSPEC_VOLATILE && XINT (pat, 1) == UNSPECV_ALIGN)
4082 /* This is a barrier in front of a constant table. */
4083 return 0;
4085 prev = prev_real_insn (barrier_or_label);
4086 if (GET_CODE (PATTERN (prev)) == ADDR_DIFF_VEC)
4088 pat = PATTERN (prev);
4089 /* If this is a very small table, we want to keep the alignment after
4090 the table to the minimum for proper code alignment. */
4091 return ((TARGET_SMALLCODE
4092 || ((unsigned) XVECLEN (pat, 1) * GET_MODE_SIZE (GET_MODE (pat))
4093 <= (unsigned) 1 << (CACHE_LOG - 2)))
4094 ? 1 << TARGET_SHMEDIA : align_jumps_log);
4097 if (TARGET_SMALLCODE)
4098 return 0;
4100 if (! TARGET_SH2 || ! optimize)
4101 return align_jumps_log;
4103 /* When fixing up pcloads, a constant table might be inserted just before
4104 the basic block that ends with the barrier. Thus, we can't trust the
4105 instruction lengths before that. */
4106 if (mdep_reorg_phase > SH_FIXUP_PCLOAD)
4108 /* Check if there is an immediately preceding branch to the insn beyond
4109 the barrier. We must weight the cost of discarding useful information
4110 from the current cache line when executing this branch and there is
4111 an alignment, against that of fetching unneeded insn in front of the
4112 branch target when there is no alignment. */
4114 /* There are two delay_slot cases to consider. One is the simple case
4115 where the preceding branch is to the insn beyond the barrier (simple
4116 delay slot filling), and the other is where the preceding branch has
4117 a delay slot that is a duplicate of the insn after the barrier
4118 (fill_eager_delay_slots) and the branch is to the insn after the insn
4119 after the barrier. */
4121 /* PREV is presumed to be the JUMP_INSN for the barrier under
4122 investigation. Skip to the insn before it. */
4123 prev = prev_real_insn (prev);
4125 for (slot = 2, credit = (1 << (CACHE_LOG - 2)) + 2;
4126 credit >= 0 && prev && GET_CODE (prev) == INSN;
4127 prev = prev_real_insn (prev))
4129 jump_to_next = 0;
4130 if (GET_CODE (PATTERN (prev)) == USE
4131 || GET_CODE (PATTERN (prev)) == CLOBBER)
4132 continue;
4133 if (GET_CODE (PATTERN (prev)) == SEQUENCE)
4135 prev = XVECEXP (PATTERN (prev), 0, 1);
4136 if (INSN_UID (prev) == INSN_UID (next))
4138 /* Delay slot was filled with insn at jump target. */
4139 jump_to_next = 1;
4140 continue;
4144 if (slot &&
4145 get_attr_in_delay_slot (prev) == IN_DELAY_SLOT_YES)
4146 slot = 0;
4147 credit -= get_attr_length (prev);
4149 if (prev
4150 && GET_CODE (prev) == JUMP_INSN
4151 && JUMP_LABEL (prev))
4153 rtx x;
4154 if (jump_to_next
4155 || next_real_insn (JUMP_LABEL (prev)) == next
4156 /* If relax_delay_slots() decides NEXT was redundant
4157 with some previous instruction, it will have
4158 redirected PREV's jump to the following insn. */
4159 || JUMP_LABEL (prev) == next_nonnote_insn (next)
4160 /* There is no upper bound on redundant instructions
4161 that might have been skipped, but we must not put an
4162 alignment where none had been before. */
4163 || (x = (NEXT_INSN (NEXT_INSN (PREV_INSN (prev)))),
4164 (INSN_P (x)
4165 && (INSN_CODE (x) == CODE_FOR_block_branch_redirect
4166 || INSN_CODE (x) == CODE_FOR_indirect_jump_scratch
4167 || INSN_CODE (x) == CODE_FOR_stuff_delay_slot))))
4169 rtx pat = PATTERN (prev);
4170 if (GET_CODE (pat) == PARALLEL)
4171 pat = XVECEXP (pat, 0, 0);
4172 if (credit - slot >= (GET_CODE (SET_SRC (pat)) == PC ? 2 : 0))
4173 return 0;
4178 return align_jumps_log;
4181 /* If we are inside a phony loop, almost any kind of label can turn up as the
4182 first one in the loop. Aligning a braf label causes incorrect switch
4183 destination addresses; we can detect braf labels because they are
4184 followed by a BARRIER.
4185 Applying loop alignment to small constant or switch tables is a waste
4186 of space, so we suppress this too. */
4188 sh_loop_align (rtx label)
4190 rtx next = label;
4193 next = next_nonnote_insn (next);
4194 while (next && GET_CODE (next) == CODE_LABEL);
4196 if (! next
4197 || ! INSN_P (next)
4198 || GET_CODE (PATTERN (next)) == ADDR_DIFF_VEC
4199 || recog_memoized (next) == CODE_FOR_consttable_2)
4200 return 0;
4202 return align_loops_log;
4205 /* Do a final pass over the function, just before delayed branch
4206 scheduling. */
4208 static void
4209 sh_reorg (void)
4211 rtx first, insn, mova = NULL_RTX;
4212 int num_mova;
4213 rtx r0_rtx = gen_rtx_REG (Pmode, 0);
4214 rtx r0_inc_rtx = gen_rtx_POST_INC (Pmode, r0_rtx);
4216 first = get_insns ();
4218 /* We must split call insns before introducing `mova's. If we're
4219 optimizing, they'll have already been split. Otherwise, make
4220 sure we don't split them too late. */
4221 if (! optimize)
4222 split_all_insns_noflow ();
4224 if (TARGET_SHMEDIA)
4225 return;
4227 /* If relaxing, generate pseudo-ops to associate function calls with
4228 the symbols they call. It does no harm to not generate these
4229 pseudo-ops. However, when we can generate them, it enables to
4230 linker to potentially relax the jsr to a bsr, and eliminate the
4231 register load and, possibly, the constant pool entry. */
4233 mdep_reorg_phase = SH_INSERT_USES_LABELS;
4234 if (TARGET_RELAX)
4236 /* Remove all REG_LABEL notes. We want to use them for our own
4237 purposes. This works because none of the remaining passes
4238 need to look at them.
4240 ??? But it may break in the future. We should use a machine
4241 dependent REG_NOTE, or some other approach entirely. */
4242 for (insn = first; insn; insn = NEXT_INSN (insn))
4244 if (INSN_P (insn))
4246 rtx note;
4248 while ((note = find_reg_note (insn, REG_LABEL, NULL_RTX)) != 0)
4249 remove_note (insn, note);
4253 for (insn = first; insn; insn = NEXT_INSN (insn))
4255 rtx pattern, reg, link, set, scan, dies, label;
4256 int rescan = 0, foundinsn = 0;
4258 if (GET_CODE (insn) == CALL_INSN)
4260 pattern = PATTERN (insn);
4262 if (GET_CODE (pattern) == PARALLEL)
4263 pattern = XVECEXP (pattern, 0, 0);
4264 if (GET_CODE (pattern) == SET)
4265 pattern = SET_SRC (pattern);
4267 if (GET_CODE (pattern) != CALL
4268 || GET_CODE (XEXP (pattern, 0)) != MEM)
4269 continue;
4271 reg = XEXP (XEXP (pattern, 0), 0);
4273 else
4275 reg = sfunc_uses_reg (insn);
4276 if (! reg)
4277 continue;
4280 if (GET_CODE (reg) != REG)
4281 continue;
4283 /* This is a function call via REG. If the only uses of REG
4284 between the time that it is set and the time that it dies
4285 are in function calls, then we can associate all the
4286 function calls with the setting of REG. */
4288 for (link = LOG_LINKS (insn); link; link = XEXP (link, 1))
4290 if (REG_NOTE_KIND (link) != 0)
4291 continue;
4292 set = single_set (XEXP (link, 0));
4293 if (set && rtx_equal_p (reg, SET_DEST (set)))
4295 link = XEXP (link, 0);
4296 break;
4300 if (! link)
4302 /* ??? Sometimes global register allocation will have
4303 deleted the insn pointed to by LOG_LINKS. Try
4304 scanning backward to find where the register is set. */
4305 for (scan = PREV_INSN (insn);
4306 scan && GET_CODE (scan) != CODE_LABEL;
4307 scan = PREV_INSN (scan))
4309 if (! INSN_P (scan))
4310 continue;
4312 if (! reg_mentioned_p (reg, scan))
4313 continue;
4315 if (noncall_uses_reg (reg, scan, &set))
4316 break;
4318 if (set)
4320 link = scan;
4321 break;
4326 if (! link)
4327 continue;
4329 /* The register is set at LINK. */
4331 /* We can only optimize the function call if the register is
4332 being set to a symbol. In theory, we could sometimes
4333 optimize calls to a constant location, but the assembler
4334 and linker do not support that at present. */
4335 if (GET_CODE (SET_SRC (set)) != SYMBOL_REF
4336 && GET_CODE (SET_SRC (set)) != LABEL_REF)
4337 continue;
4339 /* Scan forward from LINK to the place where REG dies, and
4340 make sure that the only insns which use REG are
4341 themselves function calls. */
4343 /* ??? This doesn't work for call targets that were allocated
4344 by reload, since there may not be a REG_DEAD note for the
4345 register. */
4347 dies = NULL_RTX;
4348 for (scan = NEXT_INSN (link); scan; scan = NEXT_INSN (scan))
4350 rtx scanset;
4352 /* Don't try to trace forward past a CODE_LABEL if we haven't
4353 seen INSN yet. Ordinarily, we will only find the setting insn
4354 in LOG_LINKS if it is in the same basic block. However,
4355 cross-jumping can insert code labels in between the load and
4356 the call, and can result in situations where a single call
4357 insn may have two targets depending on where we came from. */
4359 if (GET_CODE (scan) == CODE_LABEL && ! foundinsn)
4360 break;
4362 if (! INSN_P (scan))
4363 continue;
4365 /* Don't try to trace forward past a JUMP. To optimize
4366 safely, we would have to check that all the
4367 instructions at the jump destination did not use REG. */
4369 if (GET_CODE (scan) == JUMP_INSN)
4370 break;
4372 if (! reg_mentioned_p (reg, scan))
4373 continue;
4375 if (noncall_uses_reg (reg, scan, &scanset))
4376 break;
4378 if (scan == insn)
4379 foundinsn = 1;
4381 if (scan != insn
4382 && (GET_CODE (scan) == CALL_INSN || sfunc_uses_reg (scan)))
4384 /* There is a function call to this register other
4385 than the one we are checking. If we optimize
4386 this call, we need to rescan again below. */
4387 rescan = 1;
4390 /* ??? We shouldn't have to worry about SCANSET here.
4391 We should just be able to check for a REG_DEAD note
4392 on a function call. However, the REG_DEAD notes are
4393 apparently not dependable around libcalls; c-torture
4394 execute/920501-2 is a test case. If SCANSET is set,
4395 then this insn sets the register, so it must have
4396 died earlier. Unfortunately, this will only handle
4397 the cases in which the register is, in fact, set in a
4398 later insn. */
4400 /* ??? We shouldn't have to use FOUNDINSN here.
4401 However, the LOG_LINKS fields are apparently not
4402 entirely reliable around libcalls;
4403 newlib/libm/math/e_pow.c is a test case. Sometimes
4404 an insn will appear in LOG_LINKS even though it is
4405 not the most recent insn which sets the register. */
4407 if (foundinsn
4408 && (scanset
4409 || find_reg_note (scan, REG_DEAD, reg)))
4411 dies = scan;
4412 break;
4416 if (! dies)
4418 /* Either there was a branch, or some insn used REG
4419 other than as a function call address. */
4420 continue;
4423 /* Create a code label, and put it in a REG_LABEL note on
4424 the insn which sets the register, and on each call insn
4425 which uses the register. In final_prescan_insn we look
4426 for the REG_LABEL notes, and output the appropriate label
4427 or pseudo-op. */
4429 label = gen_label_rtx ();
4430 REG_NOTES (link) = gen_rtx_INSN_LIST (REG_LABEL, label,
4431 REG_NOTES (link));
4432 REG_NOTES (insn) = gen_rtx_INSN_LIST (REG_LABEL, label,
4433 REG_NOTES (insn));
4434 if (rescan)
4436 scan = link;
4439 rtx reg2;
4441 scan = NEXT_INSN (scan);
4442 if (scan != insn
4443 && ((GET_CODE (scan) == CALL_INSN
4444 && reg_mentioned_p (reg, scan))
4445 || ((reg2 = sfunc_uses_reg (scan))
4446 && REGNO (reg2) == REGNO (reg))))
4447 REG_NOTES (scan)
4448 = gen_rtx_INSN_LIST (REG_LABEL, label, REG_NOTES (scan));
4450 while (scan != dies);
4455 if (TARGET_SH2)
4456 fixup_addr_diff_vecs (first);
4458 if (optimize)
4460 mdep_reorg_phase = SH_SHORTEN_BRANCHES0;
4461 shorten_branches (first);
4463 /* Scan the function looking for move instructions which have to be
4464 changed to pc-relative loads and insert the literal tables. */
4466 mdep_reorg_phase = SH_FIXUP_PCLOAD;
4467 for (insn = first, num_mova = 0; insn; insn = NEXT_INSN (insn))
4469 if (mova_p (insn))
4471 /* ??? basic block reordering can move a switch table dispatch
4472 below the switch table. Check if that has happened.
4473 We only have the addresses available when optimizing; but then,
4474 this check shouldn't be needed when not optimizing. */
4475 rtx label_ref = XVECEXP (SET_SRC (PATTERN (insn)), 0, 0);
4476 if (optimize
4477 && (INSN_ADDRESSES (INSN_UID (insn))
4478 > INSN_ADDRESSES (INSN_UID (XEXP (label_ref, 0)))))
4480 /* Change the mova into a load.
4481 broken_move will then return true for it. */
4482 fixup_mova (insn);
4484 else if (! num_mova++)
4485 mova = insn;
4487 else if (GET_CODE (insn) == JUMP_INSN
4488 && GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC
4489 && num_mova)
4491 rtx scan;
4492 int total;
4494 num_mova--;
4496 /* Some code might have been inserted between the mova and
4497 its ADDR_DIFF_VEC. Check if the mova is still in range. */
4498 for (scan = mova, total = 0; scan != insn; scan = NEXT_INSN (scan))
4499 total += get_attr_length (scan);
4501 /* range of mova is 1020, add 4 because pc counts from address of
4502 second instruction after this one, subtract 2 in case pc is 2
4503 byte aligned. Possible alignment needed for the ADDR_DIFF_VEC
4504 cancels out with alignment effects of the mova itself. */
4505 if (total > 1022)
4507 /* Change the mova into a load, and restart scanning
4508 there. broken_move will then return true for mova. */
4509 fixup_mova (mova);
4510 insn = mova;
4513 if (broken_move (insn)
4514 || (GET_CODE (insn) == INSN
4515 && recog_memoized (insn) == CODE_FOR_casesi_worker_2))
4517 rtx scan;
4518 /* Scan ahead looking for a barrier to stick the constant table
4519 behind. */
4520 rtx barrier = find_barrier (num_mova, mova, insn);
4521 rtx last_float_move = NULL_RTX, last_float = 0, *last_float_addr = NULL;
4522 int need_aligned_label = 0;
4524 if (num_mova && ! mova_p (mova))
4526 /* find_barrier had to change the first mova into a
4527 pcload; thus, we have to start with this new pcload. */
4528 insn = mova;
4529 num_mova = 0;
4531 /* Now find all the moves between the points and modify them. */
4532 for (scan = insn; scan != barrier; scan = NEXT_INSN (scan))
4534 if (GET_CODE (scan) == CODE_LABEL)
4535 last_float = 0;
4536 if (GET_CODE (scan) == INSN
4537 && recog_memoized (scan) == CODE_FOR_casesi_worker_2)
4538 need_aligned_label = 1;
4539 if (broken_move (scan))
4541 rtx *patp = &PATTERN (scan), pat = *patp;
4542 rtx src, dst;
4543 rtx lab;
4544 rtx newsrc;
4545 enum machine_mode mode;
4547 if (GET_CODE (pat) == PARALLEL)
4548 patp = &XVECEXP (pat, 0, 0), pat = *patp;
4549 src = SET_SRC (pat);
4550 dst = SET_DEST (pat);
4551 mode = GET_MODE (dst);
4553 if (mode == SImode && hi_const (src)
4554 && REGNO (dst) != FPUL_REG)
4556 int offset = 0;
4558 mode = HImode;
4559 while (GET_CODE (dst) == SUBREG)
4561 offset += subreg_regno_offset (REGNO (SUBREG_REG (dst)),
4562 GET_MODE (SUBREG_REG (dst)),
4563 SUBREG_BYTE (dst),
4564 GET_MODE (dst));
4565 dst = SUBREG_REG (dst);
4567 dst = gen_rtx_REG (HImode, REGNO (dst) + offset);
4569 if (GET_CODE (dst) == REG && FP_ANY_REGISTER_P (REGNO (dst)))
4571 /* This must be an insn that clobbers r0. */
4572 rtx *clobberp = &XVECEXP (PATTERN (scan), 0,
4573 XVECLEN (PATTERN (scan), 0)
4574 - 1);
4575 rtx clobber = *clobberp;
4577 gcc_assert (GET_CODE (clobber) == CLOBBER
4578 && rtx_equal_p (XEXP (clobber, 0), r0_rtx));
4580 if (last_float
4581 && reg_set_between_p (r0_rtx, last_float_move, scan))
4582 last_float = 0;
4583 if (last_float
4584 && TARGET_SHCOMPACT
4585 && GET_MODE_SIZE (mode) != 4
4586 && GET_MODE_SIZE (GET_MODE (last_float)) == 4)
4587 last_float = 0;
4588 lab = add_constant (src, mode, last_float);
4589 if (lab)
4590 emit_insn_before (gen_mova (lab), scan);
4591 else
4593 /* There will be a REG_UNUSED note for r0 on
4594 LAST_FLOAT_MOVE; we have to change it to REG_INC,
4595 lest reorg:mark_target_live_regs will not
4596 consider r0 to be used, and we end up with delay
4597 slot insn in front of SCAN that clobbers r0. */
4598 rtx note
4599 = find_regno_note (last_float_move, REG_UNUSED, 0);
4601 /* If we are not optimizing, then there may not be
4602 a note. */
4603 if (note)
4604 PUT_MODE (note, REG_INC);
4606 *last_float_addr = r0_inc_rtx;
4608 last_float_move = scan;
4609 last_float = src;
4610 newsrc = gen_rtx_MEM (mode,
4611 (((TARGET_SH4 && ! TARGET_FMOVD)
4612 || REGNO (dst) == FPUL_REG)
4613 ? r0_inc_rtx
4614 : r0_rtx));
4615 last_float_addr = &XEXP (newsrc, 0);
4617 /* Remove the clobber of r0. */
4618 *clobberp = gen_rtx_CLOBBER (GET_MODE (clobber),
4619 gen_rtx_SCRATCH (Pmode));
4621 /* This is a mova needing a label. Create it. */
4622 else if (GET_CODE (src) == UNSPEC
4623 && XINT (src, 1) == UNSPEC_MOVA
4624 && GET_CODE (XVECEXP (src, 0, 0)) == CONST)
4626 lab = add_constant (XVECEXP (src, 0, 0), mode, 0);
4627 newsrc = gen_rtx_LABEL_REF (VOIDmode, lab);
4628 newsrc = gen_rtx_UNSPEC (SImode,
4629 gen_rtvec (1, newsrc),
4630 UNSPEC_MOVA);
4632 else
4634 lab = add_constant (src, mode, 0);
4635 newsrc = gen_rtx_LABEL_REF (VOIDmode, lab);
4636 newsrc = gen_const_mem (mode, newsrc);
4638 *patp = gen_rtx_SET (VOIDmode, dst, newsrc);
4639 INSN_CODE (scan) = -1;
4642 dump_table (need_aligned_label ? insn : 0, barrier);
4643 insn = barrier;
4647 mdep_reorg_phase = SH_SHORTEN_BRANCHES1;
4648 INSN_ADDRESSES_FREE ();
4649 split_branches (first);
4651 /* The INSN_REFERENCES_ARE_DELAYED in sh.h is problematic because it
4652 also has an effect on the register that holds the address of the sfunc.
4653 Insert an extra dummy insn in front of each sfunc that pretends to
4654 use this register. */
4655 if (flag_delayed_branch)
4657 for (insn = first; insn; insn = NEXT_INSN (insn))
4659 rtx reg = sfunc_uses_reg (insn);
4661 if (! reg)
4662 continue;
4663 emit_insn_before (gen_use_sfunc_addr (reg), insn);
4666 #if 0
4667 /* fpscr is not actually a user variable, but we pretend it is for the
4668 sake of the previous optimization passes, since we want it handled like
4669 one. However, we don't have any debugging information for it, so turn
4670 it into a non-user variable now. */
4671 if (TARGET_SH4)
4672 REG_USERVAR_P (get_fpscr_rtx ()) = 0;
4673 #endif
4674 mdep_reorg_phase = SH_AFTER_MDEP_REORG;
4678 get_dest_uid (rtx label, int max_uid)
4680 rtx dest = next_real_insn (label);
4681 int dest_uid;
4682 if (! dest)
4683 /* This can happen for an undefined label. */
4684 return 0;
4685 dest_uid = INSN_UID (dest);
4686 /* If this is a newly created branch redirection blocking instruction,
4687 we cannot index the branch_uid or insn_addresses arrays with its
4688 uid. But then, we won't need to, because the actual destination is
4689 the following branch. */
4690 while (dest_uid >= max_uid)
4692 dest = NEXT_INSN (dest);
4693 dest_uid = INSN_UID (dest);
4695 if (GET_CODE (dest) == JUMP_INSN && GET_CODE (PATTERN (dest)) == RETURN)
4696 return 0;
4697 return dest_uid;
4700 /* Split condbranches that are out of range. Also add clobbers for
4701 scratch registers that are needed in far jumps.
4702 We do this before delay slot scheduling, so that it can take our
4703 newly created instructions into account. It also allows us to
4704 find branches with common targets more easily. */
4706 static void
4707 split_branches (rtx first)
4709 rtx insn;
4710 struct far_branch **uid_branch, *far_branch_list = 0;
4711 int max_uid = get_max_uid ();
4712 int ok;
4714 /* Find out which branches are out of range. */
4715 shorten_branches (first);
4717 uid_branch = (struct far_branch **) alloca (max_uid * sizeof *uid_branch);
4718 memset ((char *) uid_branch, 0, max_uid * sizeof *uid_branch);
4720 for (insn = first; insn; insn = NEXT_INSN (insn))
4721 if (! INSN_P (insn))
4722 continue;
4723 else if (INSN_DELETED_P (insn))
4725 /* Shorten_branches would split this instruction again,
4726 so transform it into a note. */
4727 PUT_CODE (insn, NOTE);
4728 NOTE_LINE_NUMBER (insn) = NOTE_INSN_DELETED;
4729 NOTE_SOURCE_FILE (insn) = 0;
4731 else if (GET_CODE (insn) == JUMP_INSN
4732 /* Don't mess with ADDR_DIFF_VEC */
4733 && (GET_CODE (PATTERN (insn)) == SET
4734 || GET_CODE (PATTERN (insn)) == RETURN))
4736 enum attr_type type = get_attr_type (insn);
4737 if (type == TYPE_CBRANCH)
4739 rtx next, beyond;
4741 if (get_attr_length (insn) > 4)
4743 rtx src = SET_SRC (PATTERN (insn));
4744 rtx olabel = XEXP (XEXP (src, 1), 0);
4745 int addr = INSN_ADDRESSES (INSN_UID (insn));
4746 rtx label = 0;
4747 int dest_uid = get_dest_uid (olabel, max_uid);
4748 struct far_branch *bp = uid_branch[dest_uid];
4750 /* redirect_jump needs a valid JUMP_LABEL, and it might delete
4751 the label if the LABEL_NUSES count drops to zero. There is
4752 always a jump_optimize pass that sets these values, but it
4753 proceeds to delete unreferenced code, and then if not
4754 optimizing, to un-delete the deleted instructions, thus
4755 leaving labels with too low uses counts. */
4756 if (! optimize)
4758 JUMP_LABEL (insn) = olabel;
4759 LABEL_NUSES (olabel)++;
4761 if (! bp)
4763 bp = (struct far_branch *) alloca (sizeof *bp);
4764 uid_branch[dest_uid] = bp;
4765 bp->prev = far_branch_list;
4766 far_branch_list = bp;
4767 bp->far_label
4768 = XEXP (XEXP (SET_SRC (PATTERN (insn)), 1), 0);
4769 LABEL_NUSES (bp->far_label)++;
4771 else
4773 label = bp->near_label;
4774 if (! label && bp->address - addr >= CONDJUMP_MIN)
4776 rtx block = bp->insert_place;
4778 if (GET_CODE (PATTERN (block)) == RETURN)
4779 block = PREV_INSN (block);
4780 else
4781 block = gen_block_redirect (block,
4782 bp->address, 2);
4783 label = emit_label_after (gen_label_rtx (),
4784 PREV_INSN (block));
4785 bp->near_label = label;
4787 else if (label && ! NEXT_INSN (label))
4789 if (addr + 2 - bp->address <= CONDJUMP_MAX)
4790 bp->insert_place = insn;
4791 else
4792 gen_far_branch (bp);
4795 if (! label
4796 || (NEXT_INSN (label) && bp->address - addr < CONDJUMP_MIN))
4798 bp->near_label = label = gen_label_rtx ();
4799 bp->insert_place = insn;
4800 bp->address = addr;
4802 ok = redirect_jump (insn, label, 1);
4803 gcc_assert (ok);
4805 else
4807 /* get_attr_length (insn) == 2 */
4808 /* Check if we have a pattern where reorg wants to redirect
4809 the branch to a label from an unconditional branch that
4810 is too far away. */
4811 /* We can't use JUMP_LABEL here because it might be undefined
4812 when not optimizing. */
4813 /* A syntax error might cause beyond to be NULL_RTX. */
4814 beyond
4815 = next_active_insn (XEXP (XEXP (SET_SRC (PATTERN (insn)), 1),
4816 0));
4818 if (beyond
4819 && (GET_CODE (beyond) == JUMP_INSN
4820 || ((beyond = next_active_insn (beyond))
4821 && GET_CODE (beyond) == JUMP_INSN))
4822 && GET_CODE (PATTERN (beyond)) == SET
4823 && recog_memoized (beyond) == CODE_FOR_jump_compact
4824 && ((INSN_ADDRESSES
4825 (INSN_UID (XEXP (SET_SRC (PATTERN (beyond)), 0)))
4826 - INSN_ADDRESSES (INSN_UID (insn)) + (unsigned) 252)
4827 > 252 + 258 + 2))
4828 gen_block_redirect (beyond,
4829 INSN_ADDRESSES (INSN_UID (beyond)), 1);
4832 next = next_active_insn (insn);
4834 if ((GET_CODE (next) == JUMP_INSN
4835 || ((next = next_active_insn (next))
4836 && GET_CODE (next) == JUMP_INSN))
4837 && GET_CODE (PATTERN (next)) == SET
4838 && recog_memoized (next) == CODE_FOR_jump_compact
4839 && ((INSN_ADDRESSES
4840 (INSN_UID (XEXP (SET_SRC (PATTERN (next)), 0)))
4841 - INSN_ADDRESSES (INSN_UID (insn)) + (unsigned) 252)
4842 > 252 + 258 + 2))
4843 gen_block_redirect (next, INSN_ADDRESSES (INSN_UID (next)), 1);
4845 else if (type == TYPE_JUMP || type == TYPE_RETURN)
4847 int addr = INSN_ADDRESSES (INSN_UID (insn));
4848 rtx far_label = 0;
4849 int dest_uid = 0;
4850 struct far_branch *bp;
4852 if (type == TYPE_JUMP)
4854 far_label = XEXP (SET_SRC (PATTERN (insn)), 0);
4855 dest_uid = get_dest_uid (far_label, max_uid);
4856 if (! dest_uid)
4858 /* Parse errors can lead to labels outside
4859 the insn stream. */
4860 if (! NEXT_INSN (far_label))
4861 continue;
4863 if (! optimize)
4865 JUMP_LABEL (insn) = far_label;
4866 LABEL_NUSES (far_label)++;
4868 redirect_jump (insn, NULL_RTX, 1);
4869 far_label = 0;
4872 bp = uid_branch[dest_uid];
4873 if (! bp)
4875 bp = (struct far_branch *) alloca (sizeof *bp);
4876 uid_branch[dest_uid] = bp;
4877 bp->prev = far_branch_list;
4878 far_branch_list = bp;
4879 bp->near_label = 0;
4880 bp->far_label = far_label;
4881 if (far_label)
4882 LABEL_NUSES (far_label)++;
4884 else if (bp->near_label && ! NEXT_INSN (bp->near_label))
4885 if (addr - bp->address <= CONDJUMP_MAX)
4886 emit_label_after (bp->near_label, PREV_INSN (insn));
4887 else
4889 gen_far_branch (bp);
4890 bp->near_label = 0;
4892 else
4893 bp->near_label = 0;
4894 bp->address = addr;
4895 bp->insert_place = insn;
4896 if (! far_label)
4897 emit_insn_before (gen_block_branch_redirect (const0_rtx), insn);
4898 else
4899 gen_block_redirect (insn, addr, bp->near_label ? 2 : 0);
4902 /* Generate all pending far branches,
4903 and free our references to the far labels. */
4904 while (far_branch_list)
4906 if (far_branch_list->near_label
4907 && ! NEXT_INSN (far_branch_list->near_label))
4908 gen_far_branch (far_branch_list);
4909 if (optimize
4910 && far_branch_list->far_label
4911 && ! --LABEL_NUSES (far_branch_list->far_label))
4912 delete_insn (far_branch_list->far_label);
4913 far_branch_list = far_branch_list->prev;
4916 /* Instruction length information is no longer valid due to the new
4917 instructions that have been generated. */
4918 init_insn_lengths ();
4921 /* Dump out instruction addresses, which is useful for debugging the
4922 constant pool table stuff.
4924 If relaxing, output the label and pseudo-ops used to link together
4925 calls and the instruction which set the registers. */
4927 /* ??? The addresses printed by this routine for insns are nonsense for
4928 insns which are inside of a sequence where none of the inner insns have
4929 variable length. This is because the second pass of shorten_branches
4930 does not bother to update them. */
4932 void
4933 final_prescan_insn (rtx insn, rtx *opvec ATTRIBUTE_UNUSED,
4934 int noperands ATTRIBUTE_UNUSED)
4936 if (TARGET_DUMPISIZE)
4937 fprintf (asm_out_file, "\n! at %04x\n", INSN_ADDRESSES (INSN_UID (insn)));
4939 if (TARGET_RELAX)
4941 rtx note;
4943 note = find_reg_note (insn, REG_LABEL, NULL_RTX);
4944 if (note)
4946 rtx pattern;
4948 pattern = PATTERN (insn);
4949 switch (GET_CODE (pattern))
4951 case PARALLEL:
4952 pattern = XVECEXP (pattern, 0, 0);
4953 break;
4955 case SET:
4956 if (GET_CODE (SET_SRC (pattern)) != CALL
4957 && get_attr_type (insn) != TYPE_SFUNC)
4959 targetm.asm_out.internal_label
4960 (asm_out_file, "L", CODE_LABEL_NUMBER (XEXP (note, 0)));
4961 break;
4963 /* else FALLTHROUGH */
4964 case CALL:
4965 asm_fprintf (asm_out_file, "\t.uses %LL%d\n",
4966 CODE_LABEL_NUMBER (XEXP (note, 0)));
4967 break;
4969 default:
4970 gcc_unreachable ();
4976 /* Dump out any constants accumulated in the final pass. These will
4977 only be labels. */
4979 const char *
4980 output_jump_label_table (void)
4982 int i;
4984 if (pool_size)
4986 fprintf (asm_out_file, "\t.align 2\n");
4987 for (i = 0; i < pool_size; i++)
4989 pool_node *p = &pool_vector[i];
4991 (*targetm.asm_out.internal_label) (asm_out_file, "L",
4992 CODE_LABEL_NUMBER (p->label));
4993 output_asm_insn (".long %O0", &p->value);
4995 pool_size = 0;
4998 return "";
5001 /* A full frame looks like:
5003 arg-5
5004 arg-4
5005 [ if current_function_anonymous_args
5006 arg-3
5007 arg-2
5008 arg-1
5009 arg-0 ]
5010 saved-fp
5011 saved-r10
5012 saved-r11
5013 saved-r12
5014 saved-pr
5015 local-n
5017 local-1
5018 local-0 <- fp points here. */
5020 /* Number of bytes pushed for anonymous args, used to pass information
5021 between expand_prologue and expand_epilogue. */
5023 /* Adjust the stack by SIZE bytes. REG holds the rtl of the register to be
5024 adjusted. If epilogue_p is zero, this is for a prologue; otherwise, it's
5025 for an epilogue and a negative value means that it's for a sibcall
5026 epilogue. If LIVE_REGS_MASK is nonzero, it points to a HARD_REG_SET of
5027 all the registers that are about to be restored, and hence dead. */
5029 static void
5030 output_stack_adjust (int size, rtx reg, int epilogue_p,
5031 HARD_REG_SET *live_regs_mask)
5033 rtx (*emit_fn) (rtx) = epilogue_p ? &emit_insn : &frame_insn;
5034 if (size)
5036 HOST_WIDE_INT align = STACK_BOUNDARY / BITS_PER_UNIT;
5038 /* This test is bogus, as output_stack_adjust is used to re-align the
5039 stack. */
5040 #if 0
5041 gcc_assert (!(size % align));
5042 #endif
5044 if (CONST_OK_FOR_ADD (size))
5045 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size)));
5046 /* Try to do it with two partial adjustments; however, we must make
5047 sure that the stack is properly aligned at all times, in case
5048 an interrupt occurs between the two partial adjustments. */
5049 else if (CONST_OK_FOR_ADD (size / 2 & -align)
5050 && CONST_OK_FOR_ADD (size - (size / 2 & -align)))
5052 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size / 2 & -align)));
5053 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size - (size / 2 & -align))));
5055 else
5057 rtx const_reg;
5058 rtx insn;
5059 int temp = epilogue_p ? 7 : (TARGET_SH5 ? 0 : 1);
5060 int i;
5062 /* If TEMP is invalid, we could temporarily save a general
5063 register to MACL. However, there is currently no need
5064 to handle this case, so just die when we see it. */
5065 if (epilogue_p < 0
5066 || current_function_interrupt
5067 || ! call_really_used_regs[temp] || fixed_regs[temp])
5068 temp = -1;
5069 if (temp < 0 && ! current_function_interrupt
5070 && (TARGET_SHMEDIA || epilogue_p >= 0))
5072 HARD_REG_SET temps;
5073 COPY_HARD_REG_SET (temps, call_used_reg_set);
5074 AND_COMPL_HARD_REG_SET (temps, call_fixed_reg_set);
5075 if (epilogue_p > 0)
5077 int nreg = 0;
5078 if (current_function_return_rtx)
5080 enum machine_mode mode;
5081 mode = GET_MODE (current_function_return_rtx);
5082 if (BASE_RETURN_VALUE_REG (mode) == FIRST_RET_REG)
5083 nreg = HARD_REGNO_NREGS (FIRST_RET_REG, mode);
5085 for (i = 0; i < nreg; i++)
5086 CLEAR_HARD_REG_BIT (temps, FIRST_RET_REG + i);
5087 if (current_function_calls_eh_return)
5089 CLEAR_HARD_REG_BIT (temps, EH_RETURN_STACKADJ_REGNO);
5090 for (i = 0; i <= 3; i++)
5091 CLEAR_HARD_REG_BIT (temps, EH_RETURN_DATA_REGNO (i));
5094 if (TARGET_SHMEDIA && epilogue_p < 0)
5095 for (i = FIRST_TARGET_REG; i <= LAST_TARGET_REG; i++)
5096 CLEAR_HARD_REG_BIT (temps, i);
5097 if (epilogue_p <= 0)
5099 for (i = FIRST_PARM_REG;
5100 i < FIRST_PARM_REG + NPARM_REGS (SImode); i++)
5101 CLEAR_HARD_REG_BIT (temps, i);
5102 if (cfun->static_chain_decl != NULL)
5103 CLEAR_HARD_REG_BIT (temps, STATIC_CHAIN_REGNUM);
5105 temp = scavenge_reg (&temps);
5107 if (temp < 0 && live_regs_mask)
5108 temp = scavenge_reg (live_regs_mask);
5109 if (temp < 0)
5111 rtx adj_reg, tmp_reg, mem;
5113 /* If we reached here, the most likely case is the (sibcall)
5114 epilogue for non SHmedia. Put a special push/pop sequence
5115 for such case as the last resort. This looks lengthy but
5116 would not be problem because it seems to be very
5117 rare. */
5119 gcc_assert (!TARGET_SHMEDIA && epilogue_p);
5122 /* ??? There is still the slight possibility that r4 or
5123 r5 have been reserved as fixed registers or assigned
5124 as global registers, and they change during an
5125 interrupt. There are possible ways to handle this:
5127 - If we are adjusting the frame pointer (r14), we can do
5128 with a single temp register and an ordinary push / pop
5129 on the stack.
5130 - Grab any call-used or call-saved registers (i.e. not
5131 fixed or globals) for the temps we need. We might
5132 also grab r14 if we are adjusting the stack pointer.
5133 If we can't find enough available registers, issue
5134 a diagnostic and die - the user must have reserved
5135 way too many registers.
5136 But since all this is rather unlikely to happen and
5137 would require extra testing, we just die if r4 / r5
5138 are not available. */
5139 gcc_assert (!fixed_regs[4] && !fixed_regs[5]
5140 && !global_regs[4] && !global_regs[5]);
5142 adj_reg = gen_rtx_REG (GET_MODE (reg), 4);
5143 tmp_reg = gen_rtx_REG (GET_MODE (reg), 5);
5144 emit_move_insn (gen_rtx_MEM (Pmode, reg), adj_reg);
5145 emit_insn (GEN_MOV (adj_reg, GEN_INT (size)));
5146 emit_insn (GEN_ADD3 (adj_reg, adj_reg, reg));
5147 mem = gen_rtx_MEM (Pmode, gen_rtx_PRE_DEC (Pmode, adj_reg));
5148 emit_move_insn (mem, tmp_reg);
5149 emit_move_insn (tmp_reg, gen_rtx_MEM (Pmode, reg));
5150 mem = gen_rtx_MEM (Pmode, gen_rtx_PRE_DEC (Pmode, adj_reg));
5151 emit_move_insn (mem, tmp_reg);
5152 emit_move_insn (reg, adj_reg);
5153 mem = gen_rtx_MEM (Pmode, gen_rtx_POST_INC (Pmode, reg));
5154 emit_move_insn (adj_reg, mem);
5155 mem = gen_rtx_MEM (Pmode, gen_rtx_POST_INC (Pmode, reg));
5156 emit_move_insn (tmp_reg, mem);
5157 return;
5159 const_reg = gen_rtx_REG (GET_MODE (reg), temp);
5161 /* If SIZE is negative, subtract the positive value.
5162 This sometimes allows a constant pool entry to be shared
5163 between prologue and epilogue code. */
5164 if (size < 0)
5166 emit_insn (GEN_MOV (const_reg, GEN_INT (-size)));
5167 insn = emit_fn (GEN_SUB3 (reg, reg, const_reg));
5169 else
5171 emit_insn (GEN_MOV (const_reg, GEN_INT (size)));
5172 insn = emit_fn (GEN_ADD3 (reg, reg, const_reg));
5174 if (! epilogue_p)
5175 REG_NOTES (insn)
5176 = (gen_rtx_EXPR_LIST
5177 (REG_FRAME_RELATED_EXPR,
5178 gen_rtx_SET (VOIDmode, reg,
5179 gen_rtx_PLUS (SImode, reg, GEN_INT (size))),
5180 REG_NOTES (insn)));
5185 static rtx
5186 frame_insn (rtx x)
5188 x = emit_insn (x);
5189 RTX_FRAME_RELATED_P (x) = 1;
5190 return x;
5193 /* Output RTL to push register RN onto the stack. */
5195 static rtx
5196 push (int rn)
5198 rtx x;
5199 if (rn == FPUL_REG)
5200 x = gen_push_fpul ();
5201 else if (rn == FPSCR_REG)
5202 x = gen_push_fpscr ();
5203 else if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && ! TARGET_FPU_SINGLE
5204 && FP_OR_XD_REGISTER_P (rn))
5206 if (FP_REGISTER_P (rn) && (rn - FIRST_FP_REG) & 1)
5207 return NULL_RTX;
5208 x = gen_push_4 (gen_rtx_REG (DFmode, rn));
5210 else if (TARGET_SH2E && FP_REGISTER_P (rn))
5211 x = gen_push_e (gen_rtx_REG (SFmode, rn));
5212 else
5213 x = gen_push (gen_rtx_REG (SImode, rn));
5215 x = frame_insn (x);
5216 REG_NOTES (x)
5217 = gen_rtx_EXPR_LIST (REG_INC,
5218 gen_rtx_REG (SImode, STACK_POINTER_REGNUM), 0);
5219 return x;
5222 /* Output RTL to pop register RN from the stack. */
5224 static void
5225 pop (int rn)
5227 rtx x;
5228 if (rn == FPUL_REG)
5229 x = gen_pop_fpul ();
5230 else if (rn == FPSCR_REG)
5231 x = gen_pop_fpscr ();
5232 else if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && ! TARGET_FPU_SINGLE
5233 && FP_OR_XD_REGISTER_P (rn))
5235 if (FP_REGISTER_P (rn) && (rn - FIRST_FP_REG) & 1)
5236 return;
5237 x = gen_pop_4 (gen_rtx_REG (DFmode, rn));
5239 else if (TARGET_SH2E && FP_REGISTER_P (rn))
5240 x = gen_pop_e (gen_rtx_REG (SFmode, rn));
5241 else
5242 x = gen_pop (gen_rtx_REG (SImode, rn));
5244 x = emit_insn (x);
5245 REG_NOTES (x)
5246 = gen_rtx_EXPR_LIST (REG_INC,
5247 gen_rtx_REG (SImode, STACK_POINTER_REGNUM), 0);
5250 /* Generate code to push the regs specified in the mask. */
5252 static void
5253 push_regs (HARD_REG_SET *mask, int interrupt_handler)
5255 int i;
5256 int skip_fpscr = 0;
5258 /* Push PR last; this gives better latencies after the prologue, and
5259 candidates for the return delay slot when there are no general
5260 registers pushed. */
5261 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
5263 /* If this is an interrupt handler, and the SZ bit varies,
5264 and we have to push any floating point register, we need
5265 to switch to the correct precision first. */
5266 if (i == FIRST_FP_REG && interrupt_handler && TARGET_FMOVD
5267 && hard_regs_intersect_p (mask, &reg_class_contents[DF_REGS]))
5269 HARD_REG_SET unsaved;
5271 push (FPSCR_REG);
5272 COMPL_HARD_REG_SET (unsaved, *mask);
5273 fpscr_set_from_mem (NORMAL_MODE (FP_MODE), unsaved);
5274 skip_fpscr = 1;
5276 if (i != PR_REG
5277 && (i != FPSCR_REG || ! skip_fpscr)
5278 && TEST_HARD_REG_BIT (*mask, i))
5279 push (i);
5281 if (TEST_HARD_REG_BIT (*mask, PR_REG))
5282 push (PR_REG);
5285 /* Calculate how much extra space is needed to save all callee-saved
5286 target registers.
5287 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
5289 static int
5290 shmedia_target_regs_stack_space (HARD_REG_SET *live_regs_mask)
5292 int reg;
5293 int stack_space = 0;
5294 int interrupt_handler = sh_cfun_interrupt_handler_p ();
5296 for (reg = LAST_TARGET_REG; reg >= FIRST_TARGET_REG; reg--)
5297 if ((! call_really_used_regs[reg] || interrupt_handler)
5298 && ! TEST_HARD_REG_BIT (*live_regs_mask, reg))
5299 /* Leave space to save this target register on the stack,
5300 in case target register allocation wants to use it. */
5301 stack_space += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
5302 return stack_space;
5305 /* Decide whether we should reserve space for callee-save target registers,
5306 in case target register allocation wants to use them. REGS_SAVED is
5307 the space, in bytes, that is already required for register saves.
5308 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
5310 static int
5311 shmedia_reserve_space_for_target_registers_p (int regs_saved,
5312 HARD_REG_SET *live_regs_mask)
5314 if (optimize_size)
5315 return 0;
5316 return shmedia_target_regs_stack_space (live_regs_mask) <= regs_saved;
5319 /* Decide how much space to reserve for callee-save target registers
5320 in case target register allocation wants to use them.
5321 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
5323 static int
5324 shmedia_target_regs_stack_adjust (HARD_REG_SET *live_regs_mask)
5326 if (shmedia_space_reserved_for_target_registers)
5327 return shmedia_target_regs_stack_space (live_regs_mask);
5328 else
5329 return 0;
5332 /* Work out the registers which need to be saved, both as a mask and a
5333 count of saved words. Return the count.
5335 If doing a pragma interrupt function, then push all regs used by the
5336 function, and if we call another function (we can tell by looking at PR),
5337 make sure that all the regs it clobbers are safe too. */
5339 static int
5340 calc_live_regs (HARD_REG_SET *live_regs_mask)
5342 unsigned int reg;
5343 int count;
5344 int interrupt_handler;
5345 int pr_live, has_call;
5347 interrupt_handler = sh_cfun_interrupt_handler_p ();
5349 CLEAR_HARD_REG_SET (*live_regs_mask);
5350 if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && interrupt_handler
5351 && regs_ever_live[FPSCR_REG])
5352 target_flags &= ~MASK_FPU_SINGLE;
5353 /* If we can save a lot of saves by switching to double mode, do that. */
5354 else if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && TARGET_FPU_SINGLE)
5355 for (count = 0, reg = FIRST_FP_REG; reg <= LAST_FP_REG; reg += 2)
5356 if (regs_ever_live[reg] && regs_ever_live[reg+1]
5357 && (! call_really_used_regs[reg]
5358 || (interrupt_handler && ! pragma_trapa))
5359 && ++count > 2)
5361 target_flags &= ~MASK_FPU_SINGLE;
5362 break;
5364 /* PR_MEDIA_REG is a general purpose register, thus global_alloc already
5365 knows how to use it. That means the pseudo originally allocated for
5366 the initial value can become the PR_MEDIA_REG hard register, as seen for
5367 execute/20010122-1.c:test9. */
5368 if (TARGET_SHMEDIA)
5369 /* ??? this function is called from initial_elimination_offset, hence we
5370 can't use the result of sh_media_register_for_return here. */
5371 pr_live = sh_pr_n_sets ();
5372 else
5374 rtx pr_initial = has_hard_reg_initial_val (Pmode, PR_REG);
5375 pr_live = (pr_initial
5376 ? (GET_CODE (pr_initial) != REG
5377 || REGNO (pr_initial) != (PR_REG))
5378 : regs_ever_live[PR_REG]);
5379 /* For Shcompact, if not optimizing, we end up with a memory reference
5380 using the return address pointer for __builtin_return_address even
5381 though there is no actual need to put the PR register on the stack. */
5382 pr_live |= regs_ever_live[RETURN_ADDRESS_POINTER_REGNUM];
5384 /* Force PR to be live if the prologue has to call the SHmedia
5385 argument decoder or register saver. */
5386 if (TARGET_SHCOMPACT
5387 && ((current_function_args_info.call_cookie
5388 & ~ CALL_COOKIE_RET_TRAMP (1))
5389 || current_function_has_nonlocal_label))
5390 pr_live = 1;
5391 has_call = TARGET_SHMEDIA ? ! leaf_function_p () : pr_live;
5392 for (count = 0, reg = FIRST_PSEUDO_REGISTER; reg-- != 0; )
5394 if (reg == (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG)
5395 ? pr_live
5396 : (interrupt_handler && ! pragma_trapa)
5397 ? (/* Need to save all the regs ever live. */
5398 (regs_ever_live[reg]
5399 || (call_really_used_regs[reg]
5400 && (! fixed_regs[reg] || reg == MACH_REG || reg == MACL_REG
5401 || reg == PIC_OFFSET_TABLE_REGNUM)
5402 && has_call)
5403 || (has_call && REGISTER_NATURAL_MODE (reg) == SImode
5404 && (GENERAL_REGISTER_P (reg) || TARGET_REGISTER_P (reg))))
5405 && reg != STACK_POINTER_REGNUM && reg != ARG_POINTER_REGNUM
5406 && reg != RETURN_ADDRESS_POINTER_REGNUM
5407 && reg != T_REG && reg != GBR_REG
5408 /* Push fpscr only on targets which have FPU */
5409 && (reg != FPSCR_REG || TARGET_FPU_ANY))
5410 : (/* Only push those regs which are used and need to be saved. */
5411 (TARGET_SHCOMPACT
5412 && flag_pic
5413 && current_function_args_info.call_cookie
5414 && reg == PIC_OFFSET_TABLE_REGNUM)
5415 || (regs_ever_live[reg] && ! call_really_used_regs[reg])
5416 || (current_function_calls_eh_return
5417 && (reg == EH_RETURN_DATA_REGNO (0)
5418 || reg == EH_RETURN_DATA_REGNO (1)
5419 || reg == EH_RETURN_DATA_REGNO (2)
5420 || reg == EH_RETURN_DATA_REGNO (3)))
5421 || ((reg == MACL_REG || reg == MACH_REG)
5422 && regs_ever_live[reg]
5423 && sh_cfun_attr_renesas_p ())
5426 SET_HARD_REG_BIT (*live_regs_mask, reg);
5427 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
5429 if ((TARGET_SH4 || TARGET_SH2A_DOUBLE || TARGET_SH5) && TARGET_FMOVD
5430 && GET_MODE_CLASS (REGISTER_NATURAL_MODE (reg)) == MODE_FLOAT)
5432 if (FP_REGISTER_P (reg))
5434 if (! TARGET_FPU_SINGLE && ! regs_ever_live[reg ^ 1])
5436 SET_HARD_REG_BIT (*live_regs_mask, (reg ^ 1));
5437 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg ^ 1));
5440 else if (XD_REGISTER_P (reg))
5442 /* Must switch to double mode to access these registers. */
5443 target_flags &= ~MASK_FPU_SINGLE;
5448 /* If we have a target register optimization pass after prologue / epilogue
5449 threading, we need to assume all target registers will be live even if
5450 they aren't now. */
5451 if (flag_branch_target_load_optimize2
5452 && TARGET_SAVE_ALL_TARGET_REGS
5453 && shmedia_space_reserved_for_target_registers)
5454 for (reg = LAST_TARGET_REG; reg >= FIRST_TARGET_REG; reg--)
5455 if ((! call_really_used_regs[reg] || interrupt_handler)
5456 && ! TEST_HARD_REG_BIT (*live_regs_mask, reg))
5458 SET_HARD_REG_BIT (*live_regs_mask, reg);
5459 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
5461 /* If this is an interrupt handler, we don't have any call-clobbered
5462 registers we can conveniently use for target register save/restore.
5463 Make sure we save at least one general purpose register when we need
5464 to save target registers. */
5465 if (interrupt_handler
5466 && hard_regs_intersect_p (live_regs_mask,
5467 &reg_class_contents[TARGET_REGS])
5468 && ! hard_regs_intersect_p (live_regs_mask,
5469 &reg_class_contents[GENERAL_REGS]))
5471 SET_HARD_REG_BIT (*live_regs_mask, R0_REG);
5472 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (R0_REG));
5475 return count;
5478 /* Code to generate prologue and epilogue sequences */
5480 /* PUSHED is the number of bytes that are being pushed on the
5481 stack for register saves. Return the frame size, padded
5482 appropriately so that the stack stays properly aligned. */
5483 static HOST_WIDE_INT
5484 rounded_frame_size (int pushed)
5486 HOST_WIDE_INT size = get_frame_size ();
5487 HOST_WIDE_INT align = STACK_BOUNDARY / BITS_PER_UNIT;
5489 return ((size + pushed + align - 1) & -align) - pushed;
5492 /* Choose a call-clobbered target-branch register that remains
5493 unchanged along the whole function. We set it up as the return
5494 value in the prologue. */
5496 sh_media_register_for_return (void)
5498 int regno;
5499 int tr0_used;
5501 if (! current_function_is_leaf)
5502 return -1;
5503 if (lookup_attribute ("interrupt_handler",
5504 DECL_ATTRIBUTES (current_function_decl)))
5505 return -1;
5506 if (sh_cfun_interrupt_handler_p ())
5507 return -1;
5509 tr0_used = flag_pic && regs_ever_live[PIC_OFFSET_TABLE_REGNUM];
5511 for (regno = FIRST_TARGET_REG + tr0_used; regno <= LAST_TARGET_REG; regno++)
5512 if (call_really_used_regs[regno] && ! regs_ever_live[regno])
5513 return regno;
5515 return -1;
5518 /* The maximum registers we need to save are:
5519 - 62 general purpose registers (r15 is stack pointer, r63 is zero)
5520 - 32 floating point registers (for each pair, we save none,
5521 one single precision value, or a double precision value).
5522 - 8 target registers
5523 - add 1 entry for a delimiter. */
5524 #define MAX_SAVED_REGS (62+32+8)
5526 typedef struct save_entry_s
5528 unsigned char reg;
5529 unsigned char mode;
5530 short offset;
5531 } save_entry;
5533 #define MAX_TEMPS 4
5535 /* There will be a delimiter entry with VOIDmode both at the start and the
5536 end of a filled in schedule. The end delimiter has the offset of the
5537 save with the smallest (i.e. most negative) offset. */
5538 typedef struct save_schedule_s
5540 save_entry entries[MAX_SAVED_REGS + 2];
5541 int temps[MAX_TEMPS+1];
5542 } save_schedule;
5544 /* Fill in SCHEDULE according to LIVE_REGS_MASK. If RESTORE is nonzero,
5545 use reverse order. Returns the last entry written to (not counting
5546 the delimiter). OFFSET_BASE is a number to be added to all offset
5547 entries. */
5549 static save_entry *
5550 sh5_schedule_saves (HARD_REG_SET *live_regs_mask, save_schedule *schedule,
5551 int offset_base)
5553 int align, i;
5554 save_entry *entry = schedule->entries;
5555 int tmpx = 0;
5556 int offset;
5558 if (! current_function_interrupt)
5559 for (i = FIRST_GENERAL_REG; tmpx < MAX_TEMPS && i <= LAST_GENERAL_REG; i++)
5560 if (call_really_used_regs[i] && ! fixed_regs[i] && i != PR_MEDIA_REG
5561 && ! FUNCTION_ARG_REGNO_P (i)
5562 && i != FIRST_RET_REG
5563 && ! (cfun->static_chain_decl != NULL && i == STATIC_CHAIN_REGNUM)
5564 && ! (current_function_calls_eh_return
5565 && (i == EH_RETURN_STACKADJ_REGNO
5566 || ((unsigned) i >= EH_RETURN_DATA_REGNO (0)
5567 && (unsigned) i <= EH_RETURN_DATA_REGNO (3)))))
5568 schedule->temps[tmpx++] = i;
5569 entry->reg = -1;
5570 entry->mode = VOIDmode;
5571 entry->offset = offset_base;
5572 entry++;
5573 /* We loop twice: first, we save 8-byte aligned registers in the
5574 higher addresses, that are known to be aligned. Then, we
5575 proceed to saving 32-bit registers that don't need 8-byte
5576 alignment.
5577 If this is an interrupt function, all registers that need saving
5578 need to be saved in full. moreover, we need to postpone saving
5579 target registers till we have saved some general purpose registers
5580 we can then use as scratch registers. */
5581 offset = offset_base;
5582 for (align = 1; align >= 0; align--)
5584 for (i = FIRST_PSEUDO_REGISTER - 1; i >= 0; i--)
5585 if (TEST_HARD_REG_BIT (*live_regs_mask, i))
5587 enum machine_mode mode = REGISTER_NATURAL_MODE (i);
5588 int reg = i;
5590 if (current_function_interrupt)
5592 if (TARGET_REGISTER_P (i))
5593 continue;
5594 if (GENERAL_REGISTER_P (i))
5595 mode = DImode;
5597 if (mode == SFmode && (i % 2) == 1
5598 && ! TARGET_FPU_SINGLE && FP_REGISTER_P (i)
5599 && (TEST_HARD_REG_BIT (*live_regs_mask, (i ^ 1))))
5601 mode = DFmode;
5602 i--;
5603 reg--;
5606 /* If we're doing the aligned pass and this is not aligned,
5607 or we're doing the unaligned pass and this is aligned,
5608 skip it. */
5609 if ((GET_MODE_SIZE (mode) % (STACK_BOUNDARY / BITS_PER_UNIT) == 0)
5610 != align)
5611 continue;
5613 if (current_function_interrupt
5614 && GENERAL_REGISTER_P (i)
5615 && tmpx < MAX_TEMPS)
5616 schedule->temps[tmpx++] = i;
5618 offset -= GET_MODE_SIZE (mode);
5619 entry->reg = i;
5620 entry->mode = mode;
5621 entry->offset = offset;
5622 entry++;
5624 if (align && current_function_interrupt)
5625 for (i = LAST_TARGET_REG; i >= FIRST_TARGET_REG; i--)
5626 if (TEST_HARD_REG_BIT (*live_regs_mask, i))
5628 offset -= GET_MODE_SIZE (DImode);
5629 entry->reg = i;
5630 entry->mode = DImode;
5631 entry->offset = offset;
5632 entry++;
5635 entry->reg = -1;
5636 entry->mode = VOIDmode;
5637 entry->offset = offset;
5638 schedule->temps[tmpx] = -1;
5639 return entry - 1;
5642 void
5643 sh_expand_prologue (void)
5645 HARD_REG_SET live_regs_mask;
5646 int d, i;
5647 int d_rounding = 0;
5648 int save_flags = target_flags;
5649 int pretend_args;
5651 current_function_interrupt = sh_cfun_interrupt_handler_p ();
5653 /* We have pretend args if we had an object sent partially in registers
5654 and partially on the stack, e.g. a large structure. */
5655 pretend_args = current_function_pretend_args_size;
5656 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl)
5657 && (NPARM_REGS(SImode)
5658 > current_function_args_info.arg_count[(int) SH_ARG_INT]))
5659 pretend_args = 0;
5660 output_stack_adjust (-pretend_args
5661 - current_function_args_info.stack_regs * 8,
5662 stack_pointer_rtx, 0, NULL);
5664 if (TARGET_SHCOMPACT && flag_pic && current_function_args_info.call_cookie)
5665 /* We're going to use the PIC register to load the address of the
5666 incoming-argument decoder and/or of the return trampoline from
5667 the GOT, so make sure the PIC register is preserved and
5668 initialized. */
5669 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
5671 if (TARGET_SHCOMPACT
5672 && (current_function_args_info.call_cookie & ~ CALL_COOKIE_RET_TRAMP(1)))
5674 int reg;
5676 /* First, make all registers with incoming arguments that will
5677 be pushed onto the stack live, so that register renaming
5678 doesn't overwrite them. */
5679 for (reg = 0; reg < NPARM_REGS (SImode); reg++)
5680 if (CALL_COOKIE_STACKSEQ_GET (current_function_args_info.call_cookie)
5681 >= NPARM_REGS (SImode) - reg)
5682 for (; reg < NPARM_REGS (SImode); reg++)
5683 emit_insn (gen_shcompact_preserve_incoming_args
5684 (gen_rtx_REG (SImode, FIRST_PARM_REG + reg)));
5685 else if (CALL_COOKIE_INT_REG_GET
5686 (current_function_args_info.call_cookie, reg) == 1)
5687 emit_insn (gen_shcompact_preserve_incoming_args
5688 (gen_rtx_REG (SImode, FIRST_PARM_REG + reg)));
5690 emit_move_insn (gen_rtx_REG (Pmode, MACL_REG),
5691 stack_pointer_rtx);
5692 emit_move_insn (gen_rtx_REG (SImode, R0_REG),
5693 GEN_INT (current_function_args_info.call_cookie));
5694 emit_move_insn (gen_rtx_REG (SImode, MACH_REG),
5695 gen_rtx_REG (SImode, R0_REG));
5697 else if (TARGET_SHMEDIA)
5699 int tr = sh_media_register_for_return ();
5701 if (tr >= 0)
5703 rtx insn = emit_move_insn (gen_rtx_REG (DImode, tr),
5704 gen_rtx_REG (DImode, PR_MEDIA_REG));
5706 /* ??? We should suppress saving pr when we don't need it, but this
5707 is tricky because of builtin_return_address. */
5709 /* If this function only exits with sibcalls, this copy
5710 will be flagged as dead. */
5711 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD,
5712 const0_rtx,
5713 REG_NOTES (insn));
5717 /* Emit the code for SETUP_VARARGS. */
5718 if (current_function_stdarg)
5720 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl))
5722 /* Push arg regs as if they'd been provided by caller in stack. */
5723 for (i = 0; i < NPARM_REGS(SImode); i++)
5725 int rn = NPARM_REGS(SImode) + FIRST_PARM_REG - i - 1;
5726 rtx insn;
5728 if (i >= (NPARM_REGS(SImode)
5729 - current_function_args_info.arg_count[(int) SH_ARG_INT]
5731 break;
5732 insn = push (rn);
5733 RTX_FRAME_RELATED_P (insn) = 0;
5738 /* If we're supposed to switch stacks at function entry, do so now. */
5739 if (sp_switch)
5740 emit_insn (gen_sp_switch_1 ());
5742 d = calc_live_regs (&live_regs_mask);
5743 /* ??? Maybe we could save some switching if we can move a mode switch
5744 that already happens to be at the function start into the prologue. */
5745 if (target_flags != save_flags && ! current_function_interrupt)
5746 emit_insn (gen_toggle_sz ());
5748 if (TARGET_SH5)
5750 int offset_base, offset;
5751 rtx r0 = NULL_RTX;
5752 int offset_in_r0 = -1;
5753 int sp_in_r0 = 0;
5754 int tregs_space = shmedia_target_regs_stack_adjust (&live_regs_mask);
5755 int total_size, save_size;
5756 save_schedule schedule;
5757 save_entry *entry;
5758 int *tmp_pnt;
5760 if (call_really_used_regs[R0_REG] && ! fixed_regs[R0_REG]
5761 && ! current_function_interrupt)
5762 r0 = gen_rtx_REG (Pmode, R0_REG);
5764 /* D is the actual number of bytes that we need for saving registers,
5765 however, in initial_elimination_offset we have committed to using
5766 an additional TREGS_SPACE amount of bytes - in order to keep both
5767 addresses to arguments supplied by the caller and local variables
5768 valid, we must keep this gap. Place it between the incoming
5769 arguments and the actually saved registers in a bid to optimize
5770 locality of reference. */
5771 total_size = d + tregs_space;
5772 total_size += rounded_frame_size (total_size);
5773 save_size = total_size - rounded_frame_size (d);
5774 if (save_size % (STACK_BOUNDARY / BITS_PER_UNIT))
5775 d_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
5776 - save_size % (STACK_BOUNDARY / BITS_PER_UNIT));
5778 /* If adjusting the stack in a single step costs nothing extra, do so.
5779 I.e. either if a single addi is enough, or we need a movi anyway,
5780 and we don't exceed the maximum offset range (the test for the
5781 latter is conservative for simplicity). */
5782 if (TARGET_SHMEDIA
5783 && (CONST_OK_FOR_I10 (-total_size)
5784 || (! CONST_OK_FOR_I10 (-(save_size + d_rounding))
5785 && total_size <= 2044)))
5786 d_rounding = total_size - save_size;
5788 offset_base = d + d_rounding;
5790 output_stack_adjust (-(save_size + d_rounding), stack_pointer_rtx,
5791 0, NULL);
5793 sh5_schedule_saves (&live_regs_mask, &schedule, offset_base);
5794 tmp_pnt = schedule.temps;
5795 for (entry = &schedule.entries[1]; entry->mode != VOIDmode; entry++)
5797 enum machine_mode mode = entry->mode;
5798 unsigned int reg = entry->reg;
5799 rtx reg_rtx, mem_rtx, pre_dec = NULL_RTX;
5800 rtx orig_reg_rtx;
5802 offset = entry->offset;
5804 reg_rtx = gen_rtx_REG (mode, reg);
5806 mem_rtx = gen_rtx_MEM (mode,
5807 gen_rtx_PLUS (Pmode,
5808 stack_pointer_rtx,
5809 GEN_INT (offset)));
5811 GO_IF_LEGITIMATE_ADDRESS (mode, XEXP (mem_rtx, 0), try_pre_dec);
5813 gcc_assert (r0);
5814 mem_rtx = NULL_RTX;
5816 try_pre_dec:
5818 if (HAVE_PRE_DECREMENT
5819 && (offset_in_r0 - offset == GET_MODE_SIZE (mode)
5820 || mem_rtx == NULL_RTX
5821 || reg == PR_REG || SPECIAL_REGISTER_P (reg)))
5823 pre_dec = gen_rtx_MEM (mode,
5824 gen_rtx_PRE_DEC (Pmode, r0));
5826 GO_IF_LEGITIMATE_ADDRESS (mode, XEXP (pre_dec, 0),
5827 pre_dec_ok);
5829 pre_dec = NULL_RTX;
5831 break;
5833 pre_dec_ok:
5834 mem_rtx = NULL_RTX;
5835 offset += GET_MODE_SIZE (mode);
5837 while (0);
5839 if (mem_rtx != NULL_RTX)
5840 goto addr_ok;
5842 if (offset_in_r0 == -1)
5844 emit_move_insn (r0, GEN_INT (offset));
5845 offset_in_r0 = offset;
5847 else if (offset != offset_in_r0)
5849 emit_move_insn (r0,
5850 gen_rtx_PLUS
5851 (Pmode, r0,
5852 GEN_INT (offset - offset_in_r0)));
5853 offset_in_r0 += offset - offset_in_r0;
5856 if (pre_dec != NULL_RTX)
5858 if (! sp_in_r0)
5860 emit_move_insn (r0,
5861 gen_rtx_PLUS
5862 (Pmode, r0, stack_pointer_rtx));
5863 sp_in_r0 = 1;
5866 offset -= GET_MODE_SIZE (mode);
5867 offset_in_r0 -= GET_MODE_SIZE (mode);
5869 mem_rtx = pre_dec;
5871 else if (sp_in_r0)
5872 mem_rtx = gen_rtx_MEM (mode, r0);
5873 else
5874 mem_rtx = gen_rtx_MEM (mode,
5875 gen_rtx_PLUS (Pmode,
5876 stack_pointer_rtx,
5877 r0));
5879 /* We must not use an r0-based address for target-branch
5880 registers or for special registers without pre-dec
5881 memory addresses, since we store their values in r0
5882 first. */
5883 gcc_assert (!TARGET_REGISTER_P (reg)
5884 && ((reg != PR_REG && !SPECIAL_REGISTER_P (reg))
5885 || mem_rtx == pre_dec));
5887 addr_ok:
5888 orig_reg_rtx = reg_rtx;
5889 if (TARGET_REGISTER_P (reg)
5890 || ((reg == PR_REG || SPECIAL_REGISTER_P (reg))
5891 && mem_rtx != pre_dec))
5893 rtx tmp_reg = gen_rtx_REG (GET_MODE (reg_rtx), *tmp_pnt);
5895 emit_move_insn (tmp_reg, reg_rtx);
5897 if (REGNO (tmp_reg) == R0_REG)
5899 offset_in_r0 = -1;
5900 sp_in_r0 = 0;
5901 gcc_assert (!refers_to_regno_p
5902 (R0_REG, R0_REG+1, mem_rtx, (rtx *) 0));
5905 if (*++tmp_pnt <= 0)
5906 tmp_pnt = schedule.temps;
5908 reg_rtx = tmp_reg;
5911 rtx insn;
5913 /* Mark as interesting for dwarf cfi generator */
5914 insn = emit_move_insn (mem_rtx, reg_rtx);
5915 RTX_FRAME_RELATED_P (insn) = 1;
5916 /* If we use an intermediate register for the save, we can't
5917 describe this exactly in cfi as a copy of the to-be-saved
5918 register into the temporary register and then the temporary
5919 register on the stack, because the temporary register can
5920 have a different natural size than the to-be-saved register.
5921 Thus, we gloss over the intermediate copy and pretend we do
5922 a direct save from the to-be-saved register. */
5923 if (REGNO (reg_rtx) != reg)
5925 rtx set, note_rtx;
5927 set = gen_rtx_SET (VOIDmode, mem_rtx, orig_reg_rtx);
5928 note_rtx = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, set,
5929 REG_NOTES (insn));
5930 REG_NOTES (insn) = note_rtx;
5933 if (TARGET_SHCOMPACT && (offset_in_r0 != -1))
5935 rtx reg_rtx = gen_rtx_REG (mode, reg);
5936 rtx set, note_rtx;
5937 rtx mem_rtx = gen_rtx_MEM (mode,
5938 gen_rtx_PLUS (Pmode,
5939 stack_pointer_rtx,
5940 GEN_INT (offset)));
5942 set = gen_rtx_SET (VOIDmode, mem_rtx, reg_rtx);
5943 note_rtx = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, set,
5944 REG_NOTES (insn));
5945 REG_NOTES (insn) = note_rtx;
5950 gcc_assert (entry->offset == d_rounding);
5952 else
5953 push_regs (&live_regs_mask, current_function_interrupt);
5955 if (flag_pic && regs_ever_live[PIC_OFFSET_TABLE_REGNUM])
5957 rtx insn = get_last_insn ();
5958 rtx last = emit_insn (gen_GOTaddr2picreg ());
5960 /* Mark these insns as possibly dead. Sometimes, flow2 may
5961 delete all uses of the PIC register. In this case, let it
5962 delete the initialization too. */
5965 insn = NEXT_INSN (insn);
5967 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD,
5968 const0_rtx,
5969 REG_NOTES (insn));
5971 while (insn != last);
5974 if (SHMEDIA_REGS_STACK_ADJUST ())
5976 /* This must NOT go through the PLT, otherwise mach and macl
5977 may be clobbered. */
5978 function_symbol (gen_rtx_REG (Pmode, R0_REG),
5979 (TARGET_FPU_ANY
5980 ? "__GCC_push_shmedia_regs"
5981 : "__GCC_push_shmedia_regs_nofpu"), SFUNC_GOT);
5982 emit_insn (gen_shmedia_save_restore_regs_compact
5983 (GEN_INT (-SHMEDIA_REGS_STACK_ADJUST ())));
5986 if (target_flags != save_flags && ! current_function_interrupt)
5988 rtx insn = emit_insn (gen_toggle_sz ());
5990 /* If we're lucky, a mode switch in the function body will
5991 overwrite fpscr, turning this insn dead. Tell flow this
5992 insn is ok to delete. */
5993 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD,
5994 const0_rtx,
5995 REG_NOTES (insn));
5998 target_flags = save_flags;
6000 output_stack_adjust (-rounded_frame_size (d) + d_rounding,
6001 stack_pointer_rtx, 0, NULL);
6003 if (frame_pointer_needed)
6004 frame_insn (GEN_MOV (frame_pointer_rtx, stack_pointer_rtx));
6006 if (TARGET_SHCOMPACT
6007 && (current_function_args_info.call_cookie & ~ CALL_COOKIE_RET_TRAMP(1)))
6009 /* This must NOT go through the PLT, otherwise mach and macl
6010 may be clobbered. */
6011 function_symbol (gen_rtx_REG (Pmode, R0_REG),
6012 "__GCC_shcompact_incoming_args", SFUNC_GOT);
6013 emit_insn (gen_shcompact_incoming_args ());
6017 void
6018 sh_expand_epilogue (bool sibcall_p)
6020 HARD_REG_SET live_regs_mask;
6021 int d, i;
6022 int d_rounding = 0;
6024 int save_flags = target_flags;
6025 int frame_size, save_size;
6026 int fpscr_deferred = 0;
6027 int e = sibcall_p ? -1 : 1;
6029 d = calc_live_regs (&live_regs_mask);
6031 save_size = d;
6032 frame_size = rounded_frame_size (d);
6034 if (TARGET_SH5)
6036 int tregs_space = shmedia_target_regs_stack_adjust (&live_regs_mask);
6037 int total_size;
6038 if (d % (STACK_BOUNDARY / BITS_PER_UNIT))
6039 d_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
6040 - d % (STACK_BOUNDARY / BITS_PER_UNIT));
6042 total_size = d + tregs_space;
6043 total_size += rounded_frame_size (total_size);
6044 save_size = total_size - frame_size;
6046 /* If adjusting the stack in a single step costs nothing extra, do so.
6047 I.e. either if a single addi is enough, or we need a movi anyway,
6048 and we don't exceed the maximum offset range (the test for the
6049 latter is conservative for simplicity). */
6050 if (TARGET_SHMEDIA
6051 && ! frame_pointer_needed
6052 && (CONST_OK_FOR_I10 (total_size)
6053 || (! CONST_OK_FOR_I10 (save_size + d_rounding)
6054 && total_size <= 2044)))
6055 d_rounding = frame_size;
6057 frame_size -= d_rounding;
6060 if (frame_pointer_needed)
6062 /* We must avoid scheduling the epilogue with previous basic blocks
6063 when exception handling is enabled. See PR/18032. */
6064 if (flag_exceptions)
6065 emit_insn (gen_blockage ());
6066 output_stack_adjust (frame_size, frame_pointer_rtx, e, &live_regs_mask);
6068 /* We must avoid moving the stack pointer adjustment past code
6069 which reads from the local frame, else an interrupt could
6070 occur after the SP adjustment and clobber data in the local
6071 frame. */
6072 emit_insn (gen_blockage ());
6073 emit_insn (GEN_MOV (stack_pointer_rtx, frame_pointer_rtx));
6075 else if (frame_size)
6077 /* We must avoid moving the stack pointer adjustment past code
6078 which reads from the local frame, else an interrupt could
6079 occur after the SP adjustment and clobber data in the local
6080 frame. */
6081 emit_insn (gen_blockage ());
6082 output_stack_adjust (frame_size, stack_pointer_rtx, e, &live_regs_mask);
6085 if (SHMEDIA_REGS_STACK_ADJUST ())
6087 function_symbol (gen_rtx_REG (Pmode, R0_REG),
6088 (TARGET_FPU_ANY
6089 ? "__GCC_pop_shmedia_regs"
6090 : "__GCC_pop_shmedia_regs_nofpu"), SFUNC_GOT);
6091 /* This must NOT go through the PLT, otherwise mach and macl
6092 may be clobbered. */
6093 emit_insn (gen_shmedia_save_restore_regs_compact
6094 (GEN_INT (SHMEDIA_REGS_STACK_ADJUST ())));
6097 /* Pop all the registers. */
6099 if (target_flags != save_flags && ! current_function_interrupt)
6100 emit_insn (gen_toggle_sz ());
6101 if (TARGET_SH5)
6103 int offset_base, offset;
6104 int offset_in_r0 = -1;
6105 int sp_in_r0 = 0;
6106 rtx r0 = gen_rtx_REG (Pmode, R0_REG);
6107 save_schedule schedule;
6108 save_entry *entry;
6109 int *tmp_pnt;
6111 entry = sh5_schedule_saves (&live_regs_mask, &schedule, d_rounding);
6112 offset_base = -entry[1].offset + d_rounding;
6113 tmp_pnt = schedule.temps;
6114 for (; entry->mode != VOIDmode; entry--)
6116 enum machine_mode mode = entry->mode;
6117 int reg = entry->reg;
6118 rtx reg_rtx, mem_rtx, post_inc = NULL_RTX, insn;
6120 offset = offset_base + entry->offset;
6121 reg_rtx = gen_rtx_REG (mode, reg);
6123 mem_rtx = gen_rtx_MEM (mode,
6124 gen_rtx_PLUS (Pmode,
6125 stack_pointer_rtx,
6126 GEN_INT (offset)));
6128 GO_IF_LEGITIMATE_ADDRESS (mode, XEXP (mem_rtx, 0), try_post_inc);
6130 mem_rtx = NULL_RTX;
6132 try_post_inc:
6134 if (HAVE_POST_INCREMENT
6135 && (offset == offset_in_r0
6136 || (offset + GET_MODE_SIZE (mode) != d + d_rounding
6137 && mem_rtx == NULL_RTX)
6138 || reg == PR_REG || SPECIAL_REGISTER_P (reg)))
6140 post_inc = gen_rtx_MEM (mode,
6141 gen_rtx_POST_INC (Pmode, r0));
6143 GO_IF_LEGITIMATE_ADDRESS (mode, XEXP (post_inc, 0),
6144 post_inc_ok);
6146 post_inc = NULL_RTX;
6148 break;
6150 post_inc_ok:
6151 mem_rtx = NULL_RTX;
6153 while (0);
6155 if (mem_rtx != NULL_RTX)
6156 goto addr_ok;
6158 if (offset_in_r0 == -1)
6160 emit_move_insn (r0, GEN_INT (offset));
6161 offset_in_r0 = offset;
6163 else if (offset != offset_in_r0)
6165 emit_move_insn (r0,
6166 gen_rtx_PLUS
6167 (Pmode, r0,
6168 GEN_INT (offset - offset_in_r0)));
6169 offset_in_r0 += offset - offset_in_r0;
6172 if (post_inc != NULL_RTX)
6174 if (! sp_in_r0)
6176 emit_move_insn (r0,
6177 gen_rtx_PLUS
6178 (Pmode, r0, stack_pointer_rtx));
6179 sp_in_r0 = 1;
6182 mem_rtx = post_inc;
6184 offset_in_r0 += GET_MODE_SIZE (mode);
6186 else if (sp_in_r0)
6187 mem_rtx = gen_rtx_MEM (mode, r0);
6188 else
6189 mem_rtx = gen_rtx_MEM (mode,
6190 gen_rtx_PLUS (Pmode,
6191 stack_pointer_rtx,
6192 r0));
6194 gcc_assert ((reg != PR_REG && !SPECIAL_REGISTER_P (reg))
6195 || mem_rtx == post_inc);
6197 addr_ok:
6198 if ((reg == PR_REG || SPECIAL_REGISTER_P (reg))
6199 && mem_rtx != post_inc)
6201 insn = emit_move_insn (r0, mem_rtx);
6202 mem_rtx = r0;
6204 else if (TARGET_REGISTER_P (reg))
6206 rtx tmp_reg = gen_rtx_REG (mode, *tmp_pnt);
6208 /* Give the scheduler a bit of freedom by using up to
6209 MAX_TEMPS registers in a round-robin fashion. */
6210 insn = emit_move_insn (tmp_reg, mem_rtx);
6211 mem_rtx = tmp_reg;
6212 if (*++tmp_pnt < 0)
6213 tmp_pnt = schedule.temps;
6216 insn = emit_move_insn (reg_rtx, mem_rtx);
6217 if (reg == PR_MEDIA_REG && sh_media_register_for_return () >= 0)
6218 /* This is dead, unless we return with a sibcall. */
6219 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD,
6220 const0_rtx,
6221 REG_NOTES (insn));
6224 gcc_assert (entry->offset + offset_base == d + d_rounding);
6226 else /* ! TARGET_SH5 */
6228 save_size = 0;
6229 if (TEST_HARD_REG_BIT (live_regs_mask, PR_REG))
6230 pop (PR_REG);
6231 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
6233 int j = (FIRST_PSEUDO_REGISTER - 1) - i;
6235 if (j == FPSCR_REG && current_function_interrupt && TARGET_FMOVD
6236 && hard_regs_intersect_p (&live_regs_mask,
6237 &reg_class_contents[DF_REGS]))
6238 fpscr_deferred = 1;
6239 else if (j != PR_REG && TEST_HARD_REG_BIT (live_regs_mask, j))
6240 pop (j);
6241 if (j == FIRST_FP_REG && fpscr_deferred)
6242 pop (FPSCR_REG);
6246 if (target_flags != save_flags && ! current_function_interrupt)
6247 emit_insn (gen_toggle_sz ());
6248 target_flags = save_flags;
6250 output_stack_adjust (current_function_pretend_args_size
6251 + save_size + d_rounding
6252 + current_function_args_info.stack_regs * 8,
6253 stack_pointer_rtx, e, NULL);
6255 if (current_function_calls_eh_return)
6256 emit_insn (GEN_ADD3 (stack_pointer_rtx, stack_pointer_rtx,
6257 EH_RETURN_STACKADJ_RTX));
6259 /* Switch back to the normal stack if necessary. */
6260 if (sp_switch)
6261 emit_insn (gen_sp_switch_2 ());
6263 /* Tell flow the insn that pops PR isn't dead. */
6264 /* PR_REG will never be live in SHmedia mode, and we don't need to
6265 USE PR_MEDIA_REG, since it will be explicitly copied to TR0_REG
6266 by the return pattern. */
6267 if (TEST_HARD_REG_BIT (live_regs_mask, PR_REG))
6268 emit_insn (gen_rtx_USE (VOIDmode, gen_rtx_REG (SImode, PR_REG)));
6271 static int sh_need_epilogue_known = 0;
6274 sh_need_epilogue (void)
6276 if (! sh_need_epilogue_known)
6278 rtx epilogue;
6280 start_sequence ();
6281 sh_expand_epilogue (0);
6282 epilogue = get_insns ();
6283 end_sequence ();
6284 sh_need_epilogue_known = (epilogue == NULL ? -1 : 1);
6286 return sh_need_epilogue_known > 0;
6289 /* Emit code to change the current function's return address to RA.
6290 TEMP is available as a scratch register, if needed. */
6292 void
6293 sh_set_return_address (rtx ra, rtx tmp)
6295 HARD_REG_SET live_regs_mask;
6296 int d;
6297 int pr_reg = TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG;
6298 int pr_offset;
6300 d = calc_live_regs (&live_regs_mask);
6302 /* If pr_reg isn't life, we can set it (or the register given in
6303 sh_media_register_for_return) directly. */
6304 if (! TEST_HARD_REG_BIT (live_regs_mask, pr_reg))
6306 rtx rr;
6308 if (TARGET_SHMEDIA)
6310 int rr_regno = sh_media_register_for_return ();
6312 if (rr_regno < 0)
6313 rr_regno = pr_reg;
6315 rr = gen_rtx_REG (DImode, rr_regno);
6317 else
6318 rr = gen_rtx_REG (SImode, pr_reg);
6320 emit_insn (GEN_MOV (rr, ra));
6321 /* Tell flow the register for return isn't dead. */
6322 emit_insn (gen_rtx_USE (VOIDmode, rr));
6323 return;
6326 if (TARGET_SH5)
6328 int offset;
6329 save_schedule schedule;
6330 save_entry *entry;
6332 entry = sh5_schedule_saves (&live_regs_mask, &schedule, 0);
6333 offset = entry[1].offset;
6334 for (; entry->mode != VOIDmode; entry--)
6335 if (entry->reg == pr_reg)
6336 goto found;
6338 /* We can't find pr register. */
6339 gcc_unreachable ();
6341 found:
6342 offset = entry->offset - offset;
6343 pr_offset = (rounded_frame_size (d) + offset
6344 + SHMEDIA_REGS_STACK_ADJUST ());
6346 else
6347 pr_offset = rounded_frame_size (d);
6349 emit_insn (GEN_MOV (tmp, GEN_INT (pr_offset)));
6350 emit_insn (GEN_ADD3 (tmp, tmp, frame_pointer_rtx));
6352 tmp = gen_rtx_MEM (Pmode, tmp);
6353 emit_insn (GEN_MOV (tmp, ra));
6356 /* Clear variables at function end. */
6358 static void
6359 sh_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
6360 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
6362 trap_exit = pragma_interrupt = pragma_trapa = pragma_nosave_low_regs = 0;
6363 sh_need_epilogue_known = 0;
6364 sp_switch = NULL_RTX;
6367 static rtx
6368 sh_builtin_saveregs (void)
6370 /* First unnamed integer register. */
6371 int first_intreg = current_function_args_info.arg_count[(int) SH_ARG_INT];
6372 /* Number of integer registers we need to save. */
6373 int n_intregs = MAX (0, NPARM_REGS (SImode) - first_intreg);
6374 /* First unnamed SFmode float reg */
6375 int first_floatreg = current_function_args_info.arg_count[(int) SH_ARG_FLOAT];
6376 /* Number of SFmode float regs to save. */
6377 int n_floatregs = MAX (0, NPARM_REGS (SFmode) - first_floatreg);
6378 rtx regbuf, fpregs;
6379 int bufsize, regno;
6380 HOST_WIDE_INT alias_set;
6382 if (TARGET_SH5)
6384 if (n_intregs)
6386 int pushregs = n_intregs;
6388 while (pushregs < NPARM_REGS (SImode) - 1
6389 && (CALL_COOKIE_INT_REG_GET
6390 (current_function_args_info.call_cookie,
6391 NPARM_REGS (SImode) - pushregs)
6392 == 1))
6394 current_function_args_info.call_cookie
6395 &= ~ CALL_COOKIE_INT_REG (NPARM_REGS (SImode)
6396 - pushregs, 1);
6397 pushregs++;
6400 if (pushregs == NPARM_REGS (SImode))
6401 current_function_args_info.call_cookie
6402 |= (CALL_COOKIE_INT_REG (0, 1)
6403 | CALL_COOKIE_STACKSEQ (pushregs - 1));
6404 else
6405 current_function_args_info.call_cookie
6406 |= CALL_COOKIE_STACKSEQ (pushregs);
6408 current_function_pretend_args_size += 8 * n_intregs;
6410 if (TARGET_SHCOMPACT)
6411 return const0_rtx;
6414 if (! TARGET_SH2E && ! TARGET_SH4 && ! TARGET_SH5)
6416 error ("__builtin_saveregs not supported by this subtarget");
6417 return const0_rtx;
6420 if (TARGET_SHMEDIA)
6421 n_floatregs = 0;
6423 /* Allocate block of memory for the regs. */
6424 /* ??? If n_intregs + n_floatregs == 0, should we allocate at least 1 byte?
6425 Or can assign_stack_local accept a 0 SIZE argument? */
6426 bufsize = (n_intregs * UNITS_PER_WORD) + (n_floatregs * UNITS_PER_WORD);
6428 if (TARGET_SHMEDIA)
6429 regbuf = gen_rtx_MEM (BLKmode,
6430 gen_rtx_REG (Pmode, ARG_POINTER_REGNUM));
6431 else if (n_floatregs & 1)
6433 rtx addr;
6435 regbuf = assign_stack_local (BLKmode, bufsize + UNITS_PER_WORD, 0);
6436 addr = copy_to_mode_reg (Pmode, XEXP (regbuf, 0));
6437 emit_insn (gen_iorsi3 (addr, addr, GEN_INT (UNITS_PER_WORD)));
6438 regbuf = change_address (regbuf, BLKmode, addr);
6440 else
6441 regbuf = assign_stack_local (BLKmode, bufsize, 0);
6442 alias_set = get_varargs_alias_set ();
6443 set_mem_alias_set (regbuf, alias_set);
6445 /* Save int args.
6446 This is optimized to only save the regs that are necessary. Explicitly
6447 named args need not be saved. */
6448 if (n_intregs > 0)
6449 move_block_from_reg (BASE_ARG_REG (SImode) + first_intreg,
6450 adjust_address (regbuf, BLKmode,
6451 n_floatregs * UNITS_PER_WORD),
6452 n_intregs);
6454 if (TARGET_SHMEDIA)
6455 /* Return the address of the regbuf. */
6456 return XEXP (regbuf, 0);
6458 /* Save float args.
6459 This is optimized to only save the regs that are necessary. Explicitly
6460 named args need not be saved.
6461 We explicitly build a pointer to the buffer because it halves the insn
6462 count when not optimizing (otherwise the pointer is built for each reg
6463 saved).
6464 We emit the moves in reverse order so that we can use predecrement. */
6466 fpregs = copy_to_mode_reg (Pmode,
6467 plus_constant (XEXP (regbuf, 0),
6468 n_floatregs * UNITS_PER_WORD));
6469 if (TARGET_SH4 || TARGET_SH2A_DOUBLE)
6471 rtx mem;
6472 for (regno = NPARM_REGS (DFmode) - 2; regno >= first_floatreg; regno -= 2)
6474 emit_insn (gen_addsi3 (fpregs, fpregs,
6475 GEN_INT (-2 * UNITS_PER_WORD)));
6476 mem = gen_rtx_MEM (DFmode, fpregs);
6477 set_mem_alias_set (mem, alias_set);
6478 emit_move_insn (mem,
6479 gen_rtx_REG (DFmode, BASE_ARG_REG (DFmode) + regno));
6481 regno = first_floatreg;
6482 if (regno & 1)
6484 emit_insn (gen_addsi3 (fpregs, fpregs, GEN_INT (-UNITS_PER_WORD)));
6485 mem = gen_rtx_MEM (SFmode, fpregs);
6486 set_mem_alias_set (mem, alias_set);
6487 emit_move_insn (mem,
6488 gen_rtx_REG (SFmode, BASE_ARG_REG (SFmode) + regno
6489 - (TARGET_LITTLE_ENDIAN != 0)));
6492 else
6493 for (regno = NPARM_REGS (SFmode) - 1; regno >= first_floatreg; regno--)
6495 rtx mem;
6497 emit_insn (gen_addsi3 (fpregs, fpregs, GEN_INT (-UNITS_PER_WORD)));
6498 mem = gen_rtx_MEM (SFmode, fpregs);
6499 set_mem_alias_set (mem, alias_set);
6500 emit_move_insn (mem,
6501 gen_rtx_REG (SFmode, BASE_ARG_REG (SFmode) + regno));
6504 /* Return the address of the regbuf. */
6505 return XEXP (regbuf, 0);
6508 /* Define the `__builtin_va_list' type for the ABI. */
6510 static tree
6511 sh_build_builtin_va_list (void)
6513 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
6514 tree record;
6516 if (TARGET_SH5 || (! TARGET_SH2E && ! TARGET_SH4)
6517 || TARGET_HITACHI || sh_cfun_attr_renesas_p ())
6518 return ptr_type_node;
6520 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
6522 f_next_o = build_decl (FIELD_DECL, get_identifier ("__va_next_o"),
6523 ptr_type_node);
6524 f_next_o_limit = build_decl (FIELD_DECL,
6525 get_identifier ("__va_next_o_limit"),
6526 ptr_type_node);
6527 f_next_fp = build_decl (FIELD_DECL, get_identifier ("__va_next_fp"),
6528 ptr_type_node);
6529 f_next_fp_limit = build_decl (FIELD_DECL,
6530 get_identifier ("__va_next_fp_limit"),
6531 ptr_type_node);
6532 f_next_stack = build_decl (FIELD_DECL, get_identifier ("__va_next_stack"),
6533 ptr_type_node);
6535 DECL_FIELD_CONTEXT (f_next_o) = record;
6536 DECL_FIELD_CONTEXT (f_next_o_limit) = record;
6537 DECL_FIELD_CONTEXT (f_next_fp) = record;
6538 DECL_FIELD_CONTEXT (f_next_fp_limit) = record;
6539 DECL_FIELD_CONTEXT (f_next_stack) = record;
6541 TYPE_FIELDS (record) = f_next_o;
6542 TREE_CHAIN (f_next_o) = f_next_o_limit;
6543 TREE_CHAIN (f_next_o_limit) = f_next_fp;
6544 TREE_CHAIN (f_next_fp) = f_next_fp_limit;
6545 TREE_CHAIN (f_next_fp_limit) = f_next_stack;
6547 layout_type (record);
6549 return record;
6552 /* Implement `va_start' for varargs and stdarg. */
6554 void
6555 sh_va_start (tree valist, rtx nextarg)
6557 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
6558 tree next_o, next_o_limit, next_fp, next_fp_limit, next_stack;
6559 tree t, u;
6560 int nfp, nint;
6562 if (TARGET_SH5)
6564 expand_builtin_saveregs ();
6565 std_expand_builtin_va_start (valist, nextarg);
6566 return;
6569 if ((! TARGET_SH2E && ! TARGET_SH4)
6570 || TARGET_HITACHI || sh_cfun_attr_renesas_p ())
6572 std_expand_builtin_va_start (valist, nextarg);
6573 return;
6576 f_next_o = TYPE_FIELDS (va_list_type_node);
6577 f_next_o_limit = TREE_CHAIN (f_next_o);
6578 f_next_fp = TREE_CHAIN (f_next_o_limit);
6579 f_next_fp_limit = TREE_CHAIN (f_next_fp);
6580 f_next_stack = TREE_CHAIN (f_next_fp_limit);
6582 next_o = build (COMPONENT_REF, TREE_TYPE (f_next_o), valist, f_next_o,
6583 NULL_TREE);
6584 next_o_limit = build (COMPONENT_REF, TREE_TYPE (f_next_o_limit),
6585 valist, f_next_o_limit, NULL_TREE);
6586 next_fp = build (COMPONENT_REF, TREE_TYPE (f_next_fp), valist, f_next_fp,
6587 NULL_TREE);
6588 next_fp_limit = build (COMPONENT_REF, TREE_TYPE (f_next_fp_limit),
6589 valist, f_next_fp_limit, NULL_TREE);
6590 next_stack = build (COMPONENT_REF, TREE_TYPE (f_next_stack),
6591 valist, f_next_stack, NULL_TREE);
6593 /* Call __builtin_saveregs. */
6594 u = make_tree (ptr_type_node, expand_builtin_saveregs ());
6595 t = build (MODIFY_EXPR, ptr_type_node, next_fp, u);
6596 TREE_SIDE_EFFECTS (t) = 1;
6597 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6599 nfp = current_function_args_info.arg_count[SH_ARG_FLOAT];
6600 if (nfp < 8)
6601 nfp = 8 - nfp;
6602 else
6603 nfp = 0;
6604 u = fold (build (PLUS_EXPR, ptr_type_node, u,
6605 build_int_cst (NULL_TREE, UNITS_PER_WORD * nfp)));
6606 t = build (MODIFY_EXPR, ptr_type_node, next_fp_limit, u);
6607 TREE_SIDE_EFFECTS (t) = 1;
6608 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6610 t = build (MODIFY_EXPR, ptr_type_node, next_o, u);
6611 TREE_SIDE_EFFECTS (t) = 1;
6612 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6614 nint = current_function_args_info.arg_count[SH_ARG_INT];
6615 if (nint < 4)
6616 nint = 4 - nint;
6617 else
6618 nint = 0;
6619 u = fold (build (PLUS_EXPR, ptr_type_node, u,
6620 build_int_cst (NULL_TREE, UNITS_PER_WORD * nint)));
6621 t = build (MODIFY_EXPR, ptr_type_node, next_o_limit, u);
6622 TREE_SIDE_EFFECTS (t) = 1;
6623 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6625 u = make_tree (ptr_type_node, nextarg);
6626 t = build (MODIFY_EXPR, ptr_type_node, next_stack, u);
6627 TREE_SIDE_EFFECTS (t) = 1;
6628 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6631 /* Implement `va_arg'. */
6633 static tree
6634 sh_gimplify_va_arg_expr (tree valist, tree type, tree *pre_p,
6635 tree *post_p ATTRIBUTE_UNUSED)
6637 HOST_WIDE_INT size, rsize;
6638 tree tmp, pptr_type_node;
6639 tree addr, lab_over = NULL, result = NULL;
6640 int pass_by_ref = targetm.calls.must_pass_in_stack (TYPE_MODE (type), type);
6642 if (pass_by_ref)
6643 type = build_pointer_type (type);
6645 size = int_size_in_bytes (type);
6646 rsize = (size + UNITS_PER_WORD - 1) & -UNITS_PER_WORD;
6647 pptr_type_node = build_pointer_type (ptr_type_node);
6649 if (! TARGET_SH5 && (TARGET_SH2E || TARGET_SH4)
6650 && ! (TARGET_HITACHI || sh_cfun_attr_renesas_p ()))
6652 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
6653 tree next_o, next_o_limit, next_fp, next_fp_limit, next_stack;
6654 int pass_as_float;
6655 tree lab_false;
6657 f_next_o = TYPE_FIELDS (va_list_type_node);
6658 f_next_o_limit = TREE_CHAIN (f_next_o);
6659 f_next_fp = TREE_CHAIN (f_next_o_limit);
6660 f_next_fp_limit = TREE_CHAIN (f_next_fp);
6661 f_next_stack = TREE_CHAIN (f_next_fp_limit);
6663 next_o = build (COMPONENT_REF, TREE_TYPE (f_next_o), valist, f_next_o,
6664 NULL_TREE);
6665 next_o_limit = build (COMPONENT_REF, TREE_TYPE (f_next_o_limit),
6666 valist, f_next_o_limit, NULL_TREE);
6667 next_fp = build (COMPONENT_REF, TREE_TYPE (f_next_fp),
6668 valist, f_next_fp, NULL_TREE);
6669 next_fp_limit = build (COMPONENT_REF, TREE_TYPE (f_next_fp_limit),
6670 valist, f_next_fp_limit, NULL_TREE);
6671 next_stack = build (COMPONENT_REF, TREE_TYPE (f_next_stack),
6672 valist, f_next_stack, NULL_TREE);
6674 /* Structures with a single member with a distinct mode are passed
6675 like their member. This is relevant if the latter has a REAL_TYPE
6676 or COMPLEX_TYPE type. */
6677 if (TREE_CODE (type) == RECORD_TYPE
6678 && TYPE_FIELDS (type)
6679 && TREE_CODE (TYPE_FIELDS (type)) == FIELD_DECL
6680 && (TREE_CODE (TREE_TYPE (TYPE_FIELDS (type))) == REAL_TYPE
6681 || TREE_CODE (TREE_TYPE (TYPE_FIELDS (type))) == COMPLEX_TYPE)
6682 && TREE_CHAIN (TYPE_FIELDS (type)) == NULL_TREE)
6683 type = TREE_TYPE (TYPE_FIELDS (type));
6685 if (TARGET_SH4)
6687 pass_as_float = ((TREE_CODE (type) == REAL_TYPE && size <= 8)
6688 || (TREE_CODE (type) == COMPLEX_TYPE
6689 && TREE_CODE (TREE_TYPE (type)) == REAL_TYPE
6690 && size <= 16));
6692 else
6694 pass_as_float = (TREE_CODE (type) == REAL_TYPE && size == 4);
6697 addr = create_tmp_var (pptr_type_node, NULL);
6698 lab_false = create_artificial_label ();
6699 lab_over = create_artificial_label ();
6701 valist = build1 (INDIRECT_REF, ptr_type_node, addr);
6703 if (pass_as_float)
6705 int first_floatreg
6706 = current_function_args_info.arg_count[(int) SH_ARG_FLOAT];
6707 int n_floatregs = MAX (0, NPARM_REGS (SFmode) - first_floatreg);
6709 tmp = build (GE_EXPR, boolean_type_node, next_fp, next_fp_limit);
6710 tmp = build (COND_EXPR, void_type_node, tmp,
6711 build (GOTO_EXPR, void_type_node, lab_false),
6712 NULL);
6713 gimplify_and_add (tmp, pre_p);
6715 if (TYPE_ALIGN (type) > BITS_PER_WORD
6716 || (((TREE_CODE (type) == REAL_TYPE && size == 8) || size == 16)
6717 && (n_floatregs & 1)))
6719 tmp = fold_convert (ptr_type_node, size_int (UNITS_PER_WORD));
6720 tmp = build (BIT_AND_EXPR, ptr_type_node, next_fp, tmp);
6721 tmp = build (PLUS_EXPR, ptr_type_node, next_fp, tmp);
6722 tmp = build (MODIFY_EXPR, ptr_type_node, next_fp, tmp);
6723 gimplify_and_add (tmp, pre_p);
6726 tmp = build1 (ADDR_EXPR, pptr_type_node, next_fp);
6727 tmp = build (MODIFY_EXPR, void_type_node, addr, tmp);
6728 gimplify_and_add (tmp, pre_p);
6730 #ifdef FUNCTION_ARG_SCmode_WART
6731 if (TYPE_MODE (type) == SCmode && TARGET_SH4 && TARGET_LITTLE_ENDIAN)
6733 tree subtype = TREE_TYPE (type);
6734 tree real, imag;
6736 imag = std_gimplify_va_arg_expr (valist, subtype, pre_p, NULL);
6737 imag = get_initialized_tmp_var (imag, pre_p, NULL);
6739 real = std_gimplify_va_arg_expr (valist, subtype, pre_p, NULL);
6740 real = get_initialized_tmp_var (real, pre_p, NULL);
6742 result = build (COMPLEX_EXPR, type, real, imag);
6743 result = get_initialized_tmp_var (result, pre_p, NULL);
6745 #endif /* FUNCTION_ARG_SCmode_WART */
6747 tmp = build (GOTO_EXPR, void_type_node, lab_over);
6748 gimplify_and_add (tmp, pre_p);
6750 tmp = build (LABEL_EXPR, void_type_node, lab_false);
6751 gimplify_and_add (tmp, pre_p);
6753 tmp = build1 (ADDR_EXPR, pptr_type_node, next_stack);
6754 tmp = build (MODIFY_EXPR, void_type_node, addr, tmp);
6755 gimplify_and_add (tmp, pre_p);
6757 else
6759 tmp = fold_convert (ptr_type_node, size_int (rsize));
6760 tmp = build (PLUS_EXPR, ptr_type_node, next_o, tmp);
6761 tmp = build (GT_EXPR, boolean_type_node, tmp, next_o_limit);
6762 tmp = build (COND_EXPR, void_type_node, tmp,
6763 build (GOTO_EXPR, void_type_node, lab_false),
6764 NULL);
6765 gimplify_and_add (tmp, pre_p);
6767 tmp = build1 (ADDR_EXPR, pptr_type_node, next_o);
6768 tmp = build (MODIFY_EXPR, void_type_node, addr, tmp);
6769 gimplify_and_add (tmp, pre_p);
6771 tmp = build (GOTO_EXPR, void_type_node, lab_over);
6772 gimplify_and_add (tmp, pre_p);
6774 tmp = build (LABEL_EXPR, void_type_node, lab_false);
6775 gimplify_and_add (tmp, pre_p);
6777 if (size > 4 && ! TARGET_SH4)
6779 tmp = build (MODIFY_EXPR, ptr_type_node, next_o, next_o_limit);
6780 gimplify_and_add (tmp, pre_p);
6783 tmp = build1 (ADDR_EXPR, pptr_type_node, next_stack);
6784 tmp = build (MODIFY_EXPR, void_type_node, addr, tmp);
6785 gimplify_and_add (tmp, pre_p);
6788 if (!result)
6790 tmp = build (LABEL_EXPR, void_type_node, lab_over);
6791 gimplify_and_add (tmp, pre_p);
6795 /* ??? In va-sh.h, there had been code to make values larger than
6796 size 8 indirect. This does not match the FUNCTION_ARG macros. */
6798 tmp = std_gimplify_va_arg_expr (valist, type, pre_p, NULL);
6799 if (result)
6801 tmp = build (MODIFY_EXPR, void_type_node, result, tmp);
6802 gimplify_and_add (tmp, pre_p);
6804 tmp = build (LABEL_EXPR, void_type_node, lab_over);
6805 gimplify_and_add (tmp, pre_p);
6807 else
6808 result = tmp;
6810 if (pass_by_ref)
6811 result = build_fold_indirect_ref (result);
6813 return result;
6816 bool
6817 sh_promote_prototypes (tree type)
6819 if (TARGET_HITACHI)
6820 return 0;
6821 if (! type)
6822 return 1;
6823 return ! sh_attr_renesas_p (type);
6826 /* Whether an argument must be passed by reference. On SHcompact, we
6827 pretend arguments wider than 32-bits that would have been passed in
6828 registers are passed by reference, so that an SHmedia trampoline
6829 loads them into the full 64-bits registers. */
6831 static int
6832 shcompact_byref (CUMULATIVE_ARGS *cum, enum machine_mode mode,
6833 tree type, bool named)
6835 unsigned HOST_WIDE_INT size;
6837 if (type)
6838 size = int_size_in_bytes (type);
6839 else
6840 size = GET_MODE_SIZE (mode);
6842 if (cum->arg_count[SH_ARG_INT] < NPARM_REGS (SImode)
6843 && (!named
6844 || GET_SH_ARG_CLASS (mode) == SH_ARG_INT
6845 || (GET_SH_ARG_CLASS (mode) == SH_ARG_FLOAT
6846 && cum->arg_count[SH_ARG_FLOAT] >= NPARM_REGS (SFmode)))
6847 && size > 4
6848 && !SHCOMPACT_FORCE_ON_STACK (mode, type)
6849 && !SH5_WOULD_BE_PARTIAL_NREGS (*cum, mode, type, named))
6850 return size;
6851 else
6852 return 0;
6855 static bool
6856 sh_pass_by_reference (CUMULATIVE_ARGS *cum, enum machine_mode mode,
6857 tree type, bool named)
6859 if (targetm.calls.must_pass_in_stack (mode, type))
6860 return true;
6862 /* ??? std_gimplify_va_arg_expr passes NULL for cum. That function
6863 wants to know about pass-by-reference semantics for incoming
6864 arguments. */
6865 if (! cum)
6866 return false;
6868 if (TARGET_SHCOMPACT)
6870 cum->byref = shcompact_byref (cum, mode, type, named);
6871 return cum->byref != 0;
6874 return false;
6877 static bool
6878 sh_callee_copies (CUMULATIVE_ARGS *cum, enum machine_mode mode,
6879 tree type, bool named ATTRIBUTE_UNUSED)
6881 /* ??? How can it possibly be correct to return true only on the
6882 caller side of the equation? Is there someplace else in the
6883 sh backend that's magically producing the copies? */
6884 return (cum->outgoing
6885 && ((mode == BLKmode ? TYPE_ALIGN (type) : GET_MODE_ALIGNMENT (mode))
6886 % SH_MIN_ALIGN_FOR_CALLEE_COPY == 0));
6889 static int
6890 sh_arg_partial_bytes (CUMULATIVE_ARGS *cum, enum machine_mode mode,
6891 tree type, bool named ATTRIBUTE_UNUSED)
6893 int words = 0;
6895 if (!TARGET_SH5
6896 && PASS_IN_REG_P (*cum, mode, type)
6897 && !(TARGET_SH4 || TARGET_SH2A_DOUBLE)
6898 && (ROUND_REG (*cum, mode)
6899 + (mode != BLKmode
6900 ? ROUND_ADVANCE (GET_MODE_SIZE (mode))
6901 : ROUND_ADVANCE (int_size_in_bytes (type)))
6902 > NPARM_REGS (mode)))
6903 words = NPARM_REGS (mode) - ROUND_REG (*cum, mode);
6905 else if (!TARGET_SHCOMPACT
6906 && SH5_WOULD_BE_PARTIAL_NREGS (*cum, mode, type, named))
6907 words = NPARM_REGS (SImode) - cum->arg_count[SH_ARG_INT];
6909 return words * UNITS_PER_WORD;
6913 /* Define where to put the arguments to a function.
6914 Value is zero to push the argument on the stack,
6915 or a hard register in which to store the argument.
6917 MODE is the argument's machine mode.
6918 TYPE is the data type of the argument (as a tree).
6919 This is null for libcalls where that information may
6920 not be available.
6921 CUM is a variable of type CUMULATIVE_ARGS which gives info about
6922 the preceding args and about the function being called.
6923 NAMED is nonzero if this argument is a named parameter
6924 (otherwise it is an extra parameter matching an ellipsis).
6926 On SH the first args are normally in registers
6927 and the rest are pushed. Any arg that starts within the first
6928 NPARM_REGS words is at least partially passed in a register unless
6929 its data type forbids. */
6933 sh_function_arg (CUMULATIVE_ARGS *ca, enum machine_mode mode,
6934 tree type, int named)
6936 if (! TARGET_SH5 && mode == VOIDmode)
6937 return GEN_INT (ca->renesas_abi ? 1 : 0);
6939 if (! TARGET_SH5
6940 && PASS_IN_REG_P (*ca, mode, type)
6941 && (named || ! (TARGET_HITACHI || ca->renesas_abi)))
6943 int regno;
6945 if (mode == SCmode && TARGET_SH4 && TARGET_LITTLE_ENDIAN
6946 && (! FUNCTION_ARG_SCmode_WART || (ROUND_REG (*ca, mode) & 1)))
6948 rtx r1 = gen_rtx_EXPR_LIST (VOIDmode,
6949 gen_rtx_REG (SFmode,
6950 BASE_ARG_REG (mode)
6951 + (ROUND_REG (*ca, mode) ^ 1)),
6952 const0_rtx);
6953 rtx r2 = gen_rtx_EXPR_LIST (VOIDmode,
6954 gen_rtx_REG (SFmode,
6955 BASE_ARG_REG (mode)
6956 + ((ROUND_REG (*ca, mode) + 1) ^ 1)),
6957 GEN_INT (4));
6958 return gen_rtx_PARALLEL(SCmode, gen_rtvec(2, r1, r2));
6961 /* If the alignment of a DF value causes an SF register to be
6962 skipped, we will use that skipped register for the next SF
6963 value. */
6964 if ((TARGET_HITACHI || ca->renesas_abi)
6965 && ca->free_single_fp_reg
6966 && mode == SFmode)
6967 return gen_rtx_REG (mode, ca->free_single_fp_reg);
6969 regno = (BASE_ARG_REG (mode) + ROUND_REG (*ca, mode))
6970 ^ (mode == SFmode && TARGET_SH4
6971 && TARGET_LITTLE_ENDIAN != 0
6972 && ! TARGET_HITACHI && ! ca->renesas_abi);
6973 return gen_rtx_REG (mode, regno);
6977 if (TARGET_SH5)
6979 if (mode == VOIDmode && TARGET_SHCOMPACT)
6980 return GEN_INT (ca->call_cookie);
6982 /* The following test assumes unnamed arguments are promoted to
6983 DFmode. */
6984 if (mode == SFmode && ca->free_single_fp_reg)
6985 return SH5_PROTOTYPED_FLOAT_ARG (*ca, mode, ca->free_single_fp_reg);
6987 if ((GET_SH_ARG_CLASS (mode) == SH_ARG_FLOAT)
6988 && (named || ! ca->prototype_p)
6989 && ca->arg_count[(int) SH_ARG_FLOAT] < NPARM_REGS (SFmode))
6991 if (! ca->prototype_p && TARGET_SHMEDIA)
6992 return SH5_PROTOTYPELESS_FLOAT_ARG (*ca, mode);
6994 return SH5_PROTOTYPED_FLOAT_ARG (*ca, mode,
6995 FIRST_FP_PARM_REG
6996 + ca->arg_count[(int) SH_ARG_FLOAT]);
6999 if (ca->arg_count[(int) SH_ARG_INT] < NPARM_REGS (SImode)
7000 && (! TARGET_SHCOMPACT
7001 || (! SHCOMPACT_FORCE_ON_STACK (mode, type)
7002 && ! SH5_WOULD_BE_PARTIAL_NREGS (*ca, mode,
7003 type, named))))
7005 return gen_rtx_REG (mode, (FIRST_PARM_REG
7006 + ca->arg_count[(int) SH_ARG_INT]));
7009 return 0;
7012 return 0;
7015 /* Update the data in CUM to advance over an argument
7016 of mode MODE and data type TYPE.
7017 (TYPE is null for libcalls where that information may not be
7018 available.) */
7020 void
7021 sh_function_arg_advance (CUMULATIVE_ARGS *ca, enum machine_mode mode,
7022 tree type, int named)
7024 if (ca->force_mem)
7025 ca->force_mem = 0;
7026 else if (TARGET_SH5)
7028 tree type2 = (ca->byref && type
7029 ? TREE_TYPE (type)
7030 : type);
7031 enum machine_mode mode2 = (ca->byref && type
7032 ? TYPE_MODE (type2)
7033 : mode);
7034 int dwords = ((ca->byref
7035 ? ca->byref
7036 : mode2 == BLKmode
7037 ? int_size_in_bytes (type2)
7038 : GET_MODE_SIZE (mode2)) + 7) / 8;
7039 int numregs = MIN (dwords, NPARM_REGS (SImode)
7040 - ca->arg_count[(int) SH_ARG_INT]);
7042 if (numregs)
7044 ca->arg_count[(int) SH_ARG_INT] += numregs;
7045 if (TARGET_SHCOMPACT
7046 && SHCOMPACT_FORCE_ON_STACK (mode2, type2))
7048 ca->call_cookie
7049 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
7050 - numregs, 1);
7051 /* N.B. We want this also for outgoing. */
7052 ca->stack_regs += numregs;
7054 else if (ca->byref)
7056 if (! ca->outgoing)
7057 ca->stack_regs += numregs;
7058 ca->byref_regs += numregs;
7059 ca->byref = 0;
7061 ca->call_cookie
7062 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
7063 - numregs, 2);
7064 while (--numregs);
7065 ca->call_cookie
7066 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
7067 - 1, 1);
7069 else if (dwords > numregs)
7071 int pushregs = numregs;
7073 if (TARGET_SHCOMPACT)
7074 ca->stack_regs += numregs;
7075 while (pushregs < NPARM_REGS (SImode) - 1
7076 && (CALL_COOKIE_INT_REG_GET
7077 (ca->call_cookie,
7078 NPARM_REGS (SImode) - pushregs)
7079 == 1))
7081 ca->call_cookie
7082 &= ~ CALL_COOKIE_INT_REG (NPARM_REGS (SImode)
7083 - pushregs, 1);
7084 pushregs++;
7086 if (numregs == NPARM_REGS (SImode))
7087 ca->call_cookie
7088 |= CALL_COOKIE_INT_REG (0, 1)
7089 | CALL_COOKIE_STACKSEQ (numregs - 1);
7090 else
7091 ca->call_cookie
7092 |= CALL_COOKIE_STACKSEQ (numregs);
7095 if (GET_SH_ARG_CLASS (mode2) == SH_ARG_FLOAT
7096 && (named || ! ca->prototype_p))
7098 if (mode2 == SFmode && ca->free_single_fp_reg)
7099 ca->free_single_fp_reg = 0;
7100 else if (ca->arg_count[(int) SH_ARG_FLOAT]
7101 < NPARM_REGS (SFmode))
7103 int numfpregs
7104 = MIN ((GET_MODE_SIZE (mode2) + 7) / 8 * 2,
7105 NPARM_REGS (SFmode)
7106 - ca->arg_count[(int) SH_ARG_FLOAT]);
7108 ca->arg_count[(int) SH_ARG_FLOAT] += numfpregs;
7110 if (TARGET_SHCOMPACT && ! ca->prototype_p)
7112 if (ca->outgoing && numregs > 0)
7115 ca->call_cookie
7116 |= (CALL_COOKIE_INT_REG
7117 (ca->arg_count[(int) SH_ARG_INT]
7118 - numregs + ((numfpregs - 2) / 2),
7119 4 + (ca->arg_count[(int) SH_ARG_FLOAT]
7120 - numfpregs) / 2));
7122 while (numfpregs -= 2);
7124 else if (mode2 == SFmode && (named)
7125 && (ca->arg_count[(int) SH_ARG_FLOAT]
7126 < NPARM_REGS (SFmode)))
7127 ca->free_single_fp_reg
7128 = FIRST_FP_PARM_REG - numfpregs
7129 + ca->arg_count[(int) SH_ARG_FLOAT] + 1;
7132 return;
7135 if ((TARGET_HITACHI || ca->renesas_abi) && TARGET_FPU_DOUBLE)
7137 /* Note that we've used the skipped register. */
7138 if (mode == SFmode && ca->free_single_fp_reg)
7140 ca->free_single_fp_reg = 0;
7141 return;
7143 /* When we have a DF after an SF, there's an SF register that get
7144 skipped in order to align the DF value. We note this skipped
7145 register, because the next SF value will use it, and not the
7146 SF that follows the DF. */
7147 if (mode == DFmode
7148 && ROUND_REG (*ca, DFmode) != ROUND_REG (*ca, SFmode))
7150 ca->free_single_fp_reg = (ROUND_REG (*ca, SFmode)
7151 + BASE_ARG_REG (mode));
7155 if (! ((TARGET_SH4 || TARGET_SH2A) || ca->renesas_abi)
7156 || PASS_IN_REG_P (*ca, mode, type))
7157 (ca->arg_count[(int) GET_SH_ARG_CLASS (mode)]
7158 = (ROUND_REG (*ca, mode)
7159 + (mode == BLKmode
7160 ? ROUND_ADVANCE (int_size_in_bytes (type))
7161 : ROUND_ADVANCE (GET_MODE_SIZE (mode)))));
7164 /* The Renesas calling convention doesn't quite fit into this scheme since
7165 the address is passed like an invisible argument, but one that is always
7166 passed in memory. */
7167 static rtx
7168 sh_struct_value_rtx (tree fndecl, int incoming ATTRIBUTE_UNUSED)
7170 if (TARGET_HITACHI || sh_attr_renesas_p (fndecl))
7171 return 0;
7172 return gen_rtx_REG (Pmode, 2);
7175 /* Worker function for TARGET_RETURN_IN_MEMORY. */
7177 static bool
7178 sh_return_in_memory (tree type, tree fndecl)
7180 if (TARGET_SH5)
7182 if (TYPE_MODE (type) == BLKmode)
7183 return ((unsigned HOST_WIDE_INT) int_size_in_bytes (type)) > 8;
7184 else
7185 return GET_MODE_SIZE (TYPE_MODE (type)) > 8;
7187 else
7189 return (TYPE_MODE (type) == BLKmode
7190 || ((TARGET_HITACHI || sh_attr_renesas_p (fndecl))
7191 && TREE_CODE (type) == RECORD_TYPE));
7195 /* We actually emit the code in sh_expand_prologue. We used to use
7196 a static variable to flag that we need to emit this code, but that
7197 doesn't when inlining, when functions are deferred and then emitted
7198 later. Fortunately, we already have two flags that are part of struct
7199 function that tell if a function uses varargs or stdarg. */
7200 static void
7201 sh_setup_incoming_varargs (CUMULATIVE_ARGS *ca,
7202 enum machine_mode mode,
7203 tree type,
7204 int *pretend_arg_size,
7205 int second_time ATTRIBUTE_UNUSED)
7207 gcc_assert (current_function_stdarg);
7208 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl))
7210 int named_parm_regs, anon_parm_regs;
7212 named_parm_regs = (ROUND_REG (*ca, mode)
7213 + (mode == BLKmode
7214 ? ROUND_ADVANCE (int_size_in_bytes (type))
7215 : ROUND_ADVANCE (GET_MODE_SIZE (mode))));
7216 anon_parm_regs = NPARM_REGS (SImode) - named_parm_regs;
7217 if (anon_parm_regs > 0)
7218 *pretend_arg_size = anon_parm_regs * 4;
7222 static bool
7223 sh_strict_argument_naming (CUMULATIVE_ARGS *ca ATTRIBUTE_UNUSED)
7225 return TARGET_SH5;
7228 static bool
7229 sh_pretend_outgoing_varargs_named (CUMULATIVE_ARGS *ca)
7231 return ! (TARGET_HITACHI || ca->renesas_abi) && ! TARGET_SH5;
7235 /* Define the offset between two registers, one to be eliminated, and
7236 the other its replacement, at the start of a routine. */
7239 initial_elimination_offset (int from, int to)
7241 int regs_saved;
7242 int regs_saved_rounding = 0;
7243 int total_saved_regs_space;
7244 int total_auto_space;
7245 int save_flags = target_flags;
7246 int copy_flags;
7247 HARD_REG_SET live_regs_mask;
7249 shmedia_space_reserved_for_target_registers = false;
7250 regs_saved = calc_live_regs (&live_regs_mask);
7251 regs_saved += SHMEDIA_REGS_STACK_ADJUST ();
7253 if (shmedia_reserve_space_for_target_registers_p (regs_saved, &live_regs_mask))
7255 shmedia_space_reserved_for_target_registers = true;
7256 regs_saved += shmedia_target_regs_stack_adjust (&live_regs_mask);
7259 if (TARGET_SH5 && regs_saved % (STACK_BOUNDARY / BITS_PER_UNIT))
7260 regs_saved_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
7261 - regs_saved % (STACK_BOUNDARY / BITS_PER_UNIT));
7263 total_auto_space = rounded_frame_size (regs_saved) - regs_saved_rounding;
7264 copy_flags = target_flags;
7265 target_flags = save_flags;
7267 total_saved_regs_space = regs_saved + regs_saved_rounding;
7269 if (from == ARG_POINTER_REGNUM && to == FRAME_POINTER_REGNUM)
7270 return total_saved_regs_space + total_auto_space
7271 + current_function_args_info.byref_regs * 8;
7273 if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
7274 return total_saved_regs_space + total_auto_space
7275 + current_function_args_info.byref_regs * 8;
7277 /* Initial gap between fp and sp is 0. */
7278 if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
7279 return 0;
7281 gcc_assert (from == RETURN_ADDRESS_POINTER_REGNUM
7282 && (to == FRAME_POINTER_REGNUM || to == STACK_POINTER_REGNUM));
7283 if (TARGET_SH5)
7285 int n = total_saved_regs_space;
7286 int pr_reg = TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG;
7287 save_schedule schedule;
7288 save_entry *entry;
7290 n += total_auto_space;
7292 /* If it wasn't saved, there's not much we can do. */
7293 if (! TEST_HARD_REG_BIT (live_regs_mask, pr_reg))
7294 return n;
7296 target_flags = copy_flags;
7298 sh5_schedule_saves (&live_regs_mask, &schedule, n);
7299 for (entry = &schedule.entries[1]; entry->mode != VOIDmode; entry++)
7300 if (entry->reg == pr_reg)
7302 target_flags = save_flags;
7303 return entry->offset;
7305 gcc_unreachable ();
7307 else
7308 return total_auto_space;
7311 /* Handle machine specific pragmas to be semi-compatible with Renesas
7312 compiler. */
7314 void
7315 sh_pr_interrupt (struct cpp_reader *pfile ATTRIBUTE_UNUSED)
7317 pragma_interrupt = 1;
7320 void
7321 sh_pr_trapa (struct cpp_reader *pfile ATTRIBUTE_UNUSED)
7323 pragma_interrupt = pragma_trapa = 1;
7326 void
7327 sh_pr_nosave_low_regs (struct cpp_reader *pfile ATTRIBUTE_UNUSED)
7329 pragma_nosave_low_regs = 1;
7332 /* Generate 'handle_interrupt' attribute for decls */
7334 static void
7335 sh_insert_attributes (tree node, tree *attributes)
7337 if (! pragma_interrupt
7338 || TREE_CODE (node) != FUNCTION_DECL)
7339 return;
7341 /* We are only interested in fields. */
7342 if (!DECL_P (node))
7343 return;
7345 /* Add a 'handle_interrupt' attribute. */
7346 * attributes = tree_cons (get_identifier ("interrupt_handler"), NULL, * attributes);
7348 return;
7351 /* Supported attributes:
7353 interrupt_handler -- specifies this function is an interrupt handler.
7355 sp_switch -- specifies an alternate stack for an interrupt handler
7356 to run on.
7358 trap_exit -- use a trapa to exit an interrupt function instead of
7359 an rte instruction.
7361 renesas -- use Renesas calling/layout conventions (functions and
7362 structures).
7366 const struct attribute_spec sh_attribute_table[] =
7368 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
7369 { "interrupt_handler", 0, 0, true, false, false, sh_handle_interrupt_handler_attribute },
7370 { "sp_switch", 1, 1, true, false, false, sh_handle_sp_switch_attribute },
7371 { "trap_exit", 1, 1, true, false, false, sh_handle_trap_exit_attribute },
7372 { "renesas", 0, 0, false, true, false, sh_handle_renesas_attribute },
7373 #ifdef SYMBIAN
7374 /* Symbian support adds three new attributes:
7375 dllexport - for exporting a function/variable that will live in a dll
7376 dllimport - for importing a function/variable from a dll
7378 Microsoft allows multiple declspecs in one __declspec, separating
7379 them with spaces. We do NOT support this. Instead, use __declspec
7380 multiple times. */
7381 { "dllimport", 0, 0, true, false, false, sh_symbian_handle_dll_attribute },
7382 { "dllexport", 0, 0, true, false, false, sh_symbian_handle_dll_attribute },
7383 #endif
7384 { NULL, 0, 0, false, false, false, NULL }
7387 /* Handle an "interrupt_handler" attribute; arguments as in
7388 struct attribute_spec.handler. */
7389 static tree
7390 sh_handle_interrupt_handler_attribute (tree *node, tree name,
7391 tree args ATTRIBUTE_UNUSED,
7392 int flags ATTRIBUTE_UNUSED,
7393 bool *no_add_attrs)
7395 if (TREE_CODE (*node) != FUNCTION_DECL)
7397 warning (OPT_Wattributes, "%qs attribute only applies to functions",
7398 IDENTIFIER_POINTER (name));
7399 *no_add_attrs = true;
7401 else if (TARGET_SHCOMPACT)
7403 error ("attribute interrupt_handler is not compatible with -m5-compact");
7404 *no_add_attrs = true;
7407 return NULL_TREE;
7410 /* Handle an "sp_switch" attribute; arguments as in
7411 struct attribute_spec.handler. */
7412 static tree
7413 sh_handle_sp_switch_attribute (tree *node, tree name, tree args,
7414 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
7416 if (TREE_CODE (*node) != FUNCTION_DECL)
7418 warning (OPT_Wattributes, "%qs attribute only applies to functions",
7419 IDENTIFIER_POINTER (name));
7420 *no_add_attrs = true;
7422 else if (!pragma_interrupt)
7424 /* The sp_switch attribute only has meaning for interrupt functions. */
7425 warning (OPT_Wattributes, "%qs attribute only applies to "
7426 "interrupt functions", IDENTIFIER_POINTER (name));
7427 *no_add_attrs = true;
7429 else if (TREE_CODE (TREE_VALUE (args)) != STRING_CST)
7431 /* The argument must be a constant string. */
7432 warning (OPT_Wattributes, "%qs attribute argument not a string constant",
7433 IDENTIFIER_POINTER (name));
7434 *no_add_attrs = true;
7436 else
7438 const char *s = ggc_strdup (TREE_STRING_POINTER (TREE_VALUE (args)));
7439 sp_switch = gen_rtx_SYMBOL_REF (VOIDmode, s);
7442 return NULL_TREE;
7445 /* Handle an "trap_exit" attribute; arguments as in
7446 struct attribute_spec.handler. */
7447 static tree
7448 sh_handle_trap_exit_attribute (tree *node, tree name, tree args,
7449 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
7451 if (TREE_CODE (*node) != FUNCTION_DECL)
7453 warning (OPT_Wattributes, "%qs attribute only applies to functions",
7454 IDENTIFIER_POINTER (name));
7455 *no_add_attrs = true;
7457 else if (!pragma_interrupt)
7459 /* The trap_exit attribute only has meaning for interrupt functions. */
7460 warning (OPT_Wattributes, "%qs attribute only applies to "
7461 "interrupt functions", IDENTIFIER_POINTER (name));
7462 *no_add_attrs = true;
7464 else if (TREE_CODE (TREE_VALUE (args)) != INTEGER_CST)
7466 /* The argument must be a constant integer. */
7467 warning (OPT_Wattributes, "%qs attribute argument not an "
7468 "integer constant", IDENTIFIER_POINTER (name));
7469 *no_add_attrs = true;
7471 else
7473 trap_exit = TREE_INT_CST_LOW (TREE_VALUE (args));
7476 return NULL_TREE;
7479 static tree
7480 sh_handle_renesas_attribute (tree *node ATTRIBUTE_UNUSED,
7481 tree name ATTRIBUTE_UNUSED,
7482 tree args ATTRIBUTE_UNUSED,
7483 int flags ATTRIBUTE_UNUSED,
7484 bool *no_add_attrs ATTRIBUTE_UNUSED)
7486 return NULL_TREE;
7489 /* True if __attribute__((renesas)) or -mrenesas. */
7491 sh_attr_renesas_p (tree td)
7493 if (TARGET_HITACHI)
7494 return 1;
7495 if (td == 0)
7496 return 0;
7497 if (DECL_P (td))
7498 td = TREE_TYPE (td);
7499 return (lookup_attribute ("renesas", TYPE_ATTRIBUTES (td))
7500 != NULL_TREE);
7503 /* True if __attribute__((renesas)) or -mrenesas, for the current
7504 function. */
7506 sh_cfun_attr_renesas_p (void)
7508 return sh_attr_renesas_p (current_function_decl);
7512 sh_cfun_interrupt_handler_p (void)
7514 return (lookup_attribute ("interrupt_handler",
7515 DECL_ATTRIBUTES (current_function_decl))
7516 != NULL_TREE);
7519 /* Implement TARGET_CHECK_PCH_TARGET_FLAGS. */
7521 static const char *
7522 sh_check_pch_target_flags (int old_flags)
7524 if ((old_flags ^ target_flags) & (MASK_SH1 | MASK_SH2 | MASK_SH3
7525 | MASK_SH_E | MASK_HARD_SH4
7526 | MASK_FPU_SINGLE | MASK_SH4))
7527 return _("created and used with different architectures / ABIs");
7528 if ((old_flags ^ target_flags) & MASK_HITACHI)
7529 return _("created and used with different ABIs");
7530 if ((old_flags ^ target_flags) & MASK_LITTLE_ENDIAN)
7531 return _("created and used with different endianness");
7532 return NULL;
7535 /* Predicates used by the templates. */
7537 /* Returns 1 if OP is MACL, MACH or PR. The input must be a REG rtx.
7538 Used only in general_movsrc_operand. */
7541 system_reg_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
7543 switch (REGNO (op))
7545 case PR_REG:
7546 case MACL_REG:
7547 case MACH_REG:
7548 return 1;
7550 return 0;
7553 /* Returns 1 if OP can be source of a simple move operation.
7554 Same as general_operand, but a LABEL_REF is valid, PRE_DEC is
7555 invalid as are subregs of system registers. */
7558 general_movsrc_operand (rtx op, enum machine_mode mode)
7560 if (GET_CODE (op) == MEM)
7562 rtx inside = XEXP (op, 0);
7563 if (GET_CODE (inside) == CONST)
7564 inside = XEXP (inside, 0);
7566 if (GET_CODE (inside) == LABEL_REF)
7567 return 1;
7569 if (GET_CODE (inside) == PLUS
7570 && GET_CODE (XEXP (inside, 0)) == LABEL_REF
7571 && GET_CODE (XEXP (inside, 1)) == CONST_INT)
7572 return 1;
7574 /* Only post inc allowed. */
7575 if (GET_CODE (inside) == PRE_DEC)
7576 return 0;
7579 if ((mode == QImode || mode == HImode)
7580 && (GET_CODE (op) == SUBREG
7581 && GET_CODE (XEXP (op, 0)) == REG
7582 && system_reg_operand (XEXP (op, 0), mode)))
7583 return 0;
7585 if (TARGET_SHMEDIA
7586 && (GET_CODE (op) == PARALLEL || GET_CODE (op) == CONST_VECTOR)
7587 && sh_rep_vec (op, mode))
7588 return 1;
7589 if (TARGET_SHMEDIA && 1
7590 && GET_CODE (op) == SUBREG && GET_MODE (op) == mode
7591 && SUBREG_REG (op) == const0_rtx && subreg_lowpart_p (op))
7592 /* FIXME */ abort (); /* return 1; */
7593 return general_operand (op, mode);
7596 /* Returns 1 if OP can be a destination of a move.
7597 Same as general_operand, but no preinc allowed. */
7600 general_movdst_operand (rtx op, enum machine_mode mode)
7602 /* Only pre dec allowed. */
7603 if (GET_CODE (op) == MEM && GET_CODE (XEXP (op, 0)) == POST_INC)
7604 return 0;
7605 if (mode == DImode && TARGET_SHMEDIA && GET_CODE (op) == SUBREG
7606 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (op))) < 8
7607 && ! (high_life_started || reload_completed))
7608 return 0;
7610 return general_operand (op, mode);
7613 /* Returns 1 if OP is a normal arithmetic register. */
7616 arith_reg_operand (rtx op, enum machine_mode mode)
7618 if (register_operand (op, mode))
7620 int regno;
7622 if (GET_CODE (op) == REG)
7623 regno = REGNO (op);
7624 else if (GET_CODE (op) == SUBREG && GET_CODE (SUBREG_REG (op)) == REG)
7625 regno = REGNO (SUBREG_REG (op));
7626 else
7627 return 1;
7629 return (regno != T_REG && regno != PR_REG
7630 && ! TARGET_REGISTER_P (regno)
7631 && (regno != FPUL_REG || TARGET_SH4)
7632 && regno != MACH_REG && regno != MACL_REG);
7634 /* Allow a no-op sign extension - compare LOAD_EXTEND_OP.
7635 We allow SImode here, as not using an FP register is just a matter of
7636 proper register allocation. */
7637 if (TARGET_SHMEDIA
7638 && GET_MODE (op) == DImode && GET_CODE (op) == SIGN_EXTEND
7639 && GET_MODE (XEXP (op, 0)) == SImode
7640 && GET_CODE (XEXP (op, 0)) != SUBREG)
7641 return register_operand (XEXP (op, 0), VOIDmode);
7642 #if 0 /* Can't do this because of PROMOTE_MODE for unsigned vars. */
7643 if (GET_MODE (op) == SImode && GET_CODE (op) == SIGN_EXTEND
7644 && GET_MODE (XEXP (op, 0)) == HImode
7645 && GET_CODE (XEXP (op, 0)) == REG
7646 && REGNO (XEXP (op, 0)) <= LAST_GENERAL_REG)
7647 return register_operand (XEXP (op, 0), VOIDmode);
7648 #endif
7649 if (GET_MODE_CLASS (GET_MODE (op)) == MODE_VECTOR_INT
7650 && GET_CODE (op) == SUBREG
7651 && GET_MODE (SUBREG_REG (op)) == DImode
7652 && GET_CODE (SUBREG_REG (op)) == SIGN_EXTEND
7653 && GET_MODE (XEXP (SUBREG_REG (op), 0)) == SImode
7654 && GET_CODE (XEXP (SUBREG_REG (op), 0)) != SUBREG)
7655 return register_operand (XEXP (SUBREG_REG (op), 0), VOIDmode);
7656 return 0;
7659 /* Like above, but for DImode destinations: forbid paradoxical DImode subregs,
7660 because this would lead to missing sign extensions when truncating from
7661 DImode to SImode. */
7663 arith_reg_dest (rtx op, enum machine_mode mode)
7665 if (mode == DImode && GET_CODE (op) == SUBREG
7666 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (op))) < 8
7667 && TARGET_SHMEDIA)
7668 return 0;
7669 return arith_reg_operand (op, mode);
7672 /* Like arith_reg_operand, but for register source operands of narrow
7673 logical SHMEDIA operations: forbid subregs of DImode / TImode regs. */
7675 logical_reg_operand (rtx op, enum machine_mode mode)
7677 if (TARGET_SHMEDIA
7678 && GET_CODE (op) == SUBREG
7679 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (op))) > 4
7680 && mode != DImode)
7681 return 0;
7682 return arith_reg_operand (op, mode);
7686 int_gpr_dest (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
7688 enum machine_mode op_mode = GET_MODE (op);
7690 if (GET_MODE_CLASS (op_mode) != MODE_INT
7691 || GET_MODE_SIZE (op_mode) >= UNITS_PER_WORD)
7692 return 0;
7693 if (! reload_completed)
7694 return 0;
7695 return true_regnum (op) <= LAST_GENERAL_REG;
7699 fp_arith_reg_operand (rtx op, enum machine_mode mode)
7701 if (register_operand (op, mode))
7703 int regno;
7705 if (GET_CODE (op) == REG)
7706 regno = REGNO (op);
7707 else if (GET_CODE (op) == SUBREG && GET_CODE (SUBREG_REG (op)) == REG)
7708 regno = REGNO (SUBREG_REG (op));
7709 else
7710 return 1;
7712 return (regno >= FIRST_PSEUDO_REGISTER
7713 || FP_REGISTER_P (regno));
7715 return 0;
7719 fp_arith_reg_dest (rtx op, enum machine_mode mode)
7721 if (mode == DImode && GET_CODE (op) == SUBREG
7722 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (op))) < 8)
7723 return 0;
7724 return fp_arith_reg_operand (op, mode);
7727 /* Returns 1 if OP is a valid source operand for an arithmetic insn. */
7730 arith_operand (rtx op, enum machine_mode mode)
7732 if (arith_reg_operand (op, mode))
7733 return 1;
7735 if (TARGET_SHMEDIA)
7737 /* FIXME: We should be checking whether the CONST_INT fits in a
7738 CONST_OK_FOR_I16 here, but this causes reload_cse to crash when
7739 attempting to transform a sequence of two 64-bit sets of the
7740 same register from literal constants into a set and an add,
7741 when the difference is too wide for an add. */
7742 if (GET_CODE (op) == CONST_INT
7743 || EXTRA_CONSTRAINT_C16 (op))
7744 return 1;
7745 else if (GET_CODE (op) == TRUNCATE
7746 && ! system_reg_operand (XEXP (op, 0), VOIDmode)
7747 && (mode == VOIDmode || mode == GET_MODE (op))
7748 && (GET_MODE_SIZE (GET_MODE (op))
7749 < GET_MODE_SIZE (GET_MODE (XEXP (op, 0))))
7750 && (! FP_REGISTER_P (REGNO (XEXP (op, 0)))
7751 || GET_MODE_SIZE (GET_MODE (op)) == 4))
7752 return register_operand (XEXP (op, 0), VOIDmode);
7753 else
7754 return 0;
7756 else if (GET_CODE (op) == CONST_INT && CONST_OK_FOR_I08 (INTVAL (op)))
7757 return 1;
7759 return 0;
7762 /* Returns 1 if OP is a valid source operand for a compare insn. */
7765 arith_reg_or_0_operand (rtx op, enum machine_mode mode)
7767 if (arith_reg_operand (op, mode))
7768 return 1;
7770 if (EXTRA_CONSTRAINT_Z (op))
7771 return 1;
7773 return 0;
7776 /* Return 1 if OP is a valid source operand for xor. */
7779 xor_operand (rtx op, enum machine_mode mode)
7781 if (GET_CODE (op) == CONST_INT)
7782 return (TARGET_SHMEDIA
7783 ? (CONST_OK_FOR_I06 (INTVAL (op))
7784 || (no_new_pseudos && INTVAL (op) == 0xff))
7785 : CONST_OK_FOR_K08 (INTVAL (op)));
7786 if (TARGET_SHMEDIA
7787 && mode != DImode && GET_CODE (op) == SUBREG
7788 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (op))) > 4)
7789 return 0;
7790 return arith_reg_operand (op, mode);
7793 /* Return 1 if OP is a valid source operand for shmedia cmpgt / cmpgtu. */
7795 cmp_operand (rtx op, enum machine_mode mode)
7797 if (GET_CODE (op) == CONST_INT && CONST_OK_FOR_N (INTVAL (op)))
7798 return 1;
7799 if (TARGET_SHMEDIA
7800 && mode != DImode && GET_CODE (op) == SUBREG
7801 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (op))) > 4)
7802 return 0;
7803 return arith_reg_operand (op, mode);
7806 /* Returns 1 if OP is a valid source operand for a logical operation. */
7809 logical_operand (rtx op, enum machine_mode mode)
7811 if (TARGET_SHMEDIA
7812 && mode != DImode && GET_CODE (op) == SUBREG
7813 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (op))) > 4)
7814 return 0;
7816 if (arith_reg_operand (op, mode))
7817 return 1;
7819 if (TARGET_SHMEDIA)
7821 if (GET_CODE (op) == CONST_INT && CONST_OK_FOR_I10 (INTVAL (op)))
7822 return 1;
7823 else
7824 return 0;
7826 else if (GET_CODE (op) == CONST_INT && CONST_OK_FOR_K08 (INTVAL (op)))
7827 return 1;
7829 return 0;
7833 and_operand (rtx op, enum machine_mode mode)
7835 if (logical_operand (op, mode))
7836 return 1;
7838 /* Check mshflo.l / mshflhi.l opportunities. */
7839 if (TARGET_SHMEDIA
7840 && mode == DImode
7841 && GET_CODE (op) == CONST_INT
7842 && CONST_OK_FOR_J16 (INTVAL (op)))
7843 return 1;
7845 return 0;
7848 /* Nonzero if OP is a floating point value with value 0.0. */
7851 fp_zero_operand (rtx op)
7853 REAL_VALUE_TYPE r;
7855 if (GET_MODE (op) != SFmode)
7856 return 0;
7858 REAL_VALUE_FROM_CONST_DOUBLE (r, op);
7859 return REAL_VALUES_EQUAL (r, dconst0) && ! REAL_VALUE_MINUS_ZERO (r);
7862 /* Nonzero if OP is a floating point value with value 1.0. */
7865 fp_one_operand (rtx op)
7867 REAL_VALUE_TYPE r;
7869 if (GET_MODE (op) != SFmode)
7870 return 0;
7872 REAL_VALUE_FROM_CONST_DOUBLE (r, op);
7873 return REAL_VALUES_EQUAL (r, dconst1);
7876 /* For -m4 and -m4-single-only, mode switching is used. If we are
7877 compiling without -mfmovd, movsf_ie isn't taken into account for
7878 mode switching. We could check in machine_dependent_reorg for
7879 cases where we know we are in single precision mode, but there is
7880 interface to find that out during reload, so we must avoid
7881 choosing an fldi alternative during reload and thus failing to
7882 allocate a scratch register for the constant loading. */
7884 fldi_ok (void)
7886 return ! TARGET_SH4 || TARGET_FMOVD || reload_completed;
7890 tertiary_reload_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
7892 enum rtx_code code = GET_CODE (op);
7893 return code == MEM || (TARGET_SH4 && code == CONST_DOUBLE);
7897 fpscr_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
7899 return (GET_CODE (op) == REG
7900 && (REGNO (op) == FPSCR_REG
7901 || (REGNO (op) >= FIRST_PSEUDO_REGISTER
7902 && !(reload_in_progress || reload_completed)))
7903 && GET_MODE (op) == PSImode);
7907 fpul_operand (rtx op, enum machine_mode mode)
7909 if (TARGET_SHMEDIA)
7910 return fp_arith_reg_operand (op, mode);
7912 return (GET_CODE (op) == REG
7913 && (REGNO (op) == FPUL_REG || REGNO (op) >= FIRST_PSEUDO_REGISTER)
7914 && GET_MODE (op) == mode);
7918 symbol_ref_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
7920 return (GET_CODE (op) == SYMBOL_REF);
7923 /* Return the TLS type for TLS symbols, 0 for otherwise. */
7925 tls_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
7927 if (GET_CODE (op) != SYMBOL_REF)
7928 return 0;
7929 return SYMBOL_REF_TLS_MODEL (op);
7933 commutative_float_operator (rtx op, enum machine_mode mode)
7935 if (GET_MODE (op) != mode)
7936 return 0;
7937 switch (GET_CODE (op))
7939 case PLUS:
7940 case MULT:
7941 return 1;
7942 default:
7943 break;
7945 return 0;
7949 noncommutative_float_operator (rtx op, enum machine_mode mode)
7951 if (GET_MODE (op) != mode)
7952 return 0;
7953 switch (GET_CODE (op))
7955 case MINUS:
7956 case DIV:
7957 return 1;
7958 default:
7959 break;
7961 return 0;
7965 unary_float_operator (rtx op, enum machine_mode mode)
7967 if (GET_MODE (op) != mode)
7968 return 0;
7969 switch (GET_CODE (op))
7971 case ABS:
7972 case NEG:
7973 case SQRT:
7974 return 1;
7975 default:
7976 break;
7978 return 0;
7982 binary_float_operator (rtx op, enum machine_mode mode)
7984 if (GET_MODE (op) != mode)
7985 return 0;
7986 switch (GET_CODE (op))
7988 case PLUS:
7989 case MINUS:
7990 case MULT:
7991 case DIV:
7992 return 1;
7993 default:
7994 break;
7996 return 0;
8000 binary_logical_operator (rtx op, enum machine_mode mode)
8002 if (GET_MODE (op) != mode)
8003 return 0;
8004 switch (GET_CODE (op))
8006 case IOR:
8007 case AND:
8008 case XOR:
8009 return 1;
8010 default:
8011 break;
8013 return 0;
8017 equality_comparison_operator (rtx op, enum machine_mode mode)
8019 return ((mode == VOIDmode || GET_MODE (op) == mode)
8020 && (GET_CODE (op) == EQ || GET_CODE (op) == NE));
8024 greater_comparison_operator (rtx op, enum machine_mode mode)
8026 if (mode != VOIDmode && GET_MODE (op) != mode)
8027 return 0;
8028 switch (GET_CODE (op))
8030 case GT:
8031 case GE:
8032 case GTU:
8033 case GEU:
8034 return 1;
8035 default:
8036 return 0;
8041 less_comparison_operator (rtx op, enum machine_mode mode)
8043 if (mode != VOIDmode && GET_MODE (op) != mode)
8044 return 0;
8045 switch (GET_CODE (op))
8047 case LT:
8048 case LE:
8049 case LTU:
8050 case LEU:
8051 return 1;
8052 default:
8053 return 0;
8058 shift_operator (rtx op, enum machine_mode mode)
8060 if (mode != VOIDmode && GET_MODE (op) != mode)
8061 return 0;
8062 switch (GET_CODE (op))
8064 case ASHIFT:
8065 case ASHIFTRT:
8066 case LSHIFTRT:
8067 return 1;
8068 default:
8069 return 0;
8074 logical_operator (rtx op, enum machine_mode mode)
8076 if (mode != VOIDmode && GET_MODE (op) != mode)
8077 return 0;
8078 switch (GET_CODE (op))
8080 case AND:
8081 case IOR:
8082 case XOR:
8083 return 1;
8084 default:
8085 return 0;
8089 /* Accept pseudos and branch target registers. */
8091 target_reg_operand (rtx op, enum machine_mode mode)
8093 if (mode == VOIDmode
8094 ? GET_MODE (op) != Pmode && GET_MODE (op) != PDImode
8095 : mode != GET_MODE (op))
8096 return 0;
8098 if (GET_CODE (op) == SUBREG)
8099 op = XEXP (op, 0);
8101 if (GET_CODE (op) != REG)
8102 return 0;
8104 /* We must protect ourselves from matching pseudos that are virtual
8105 register, because they will eventually be replaced with hardware
8106 registers that aren't branch-target registers. */
8107 if (REGNO (op) > LAST_VIRTUAL_REGISTER
8108 || TARGET_REGISTER_P (REGNO (op)))
8109 return 1;
8111 return 0;
8114 /* Same as target_reg_operand, except that label_refs and symbol_refs
8115 are accepted before reload. */
8117 target_operand (rtx op, enum machine_mode mode)
8119 if (mode != VOIDmode && mode != Pmode)
8120 return 0;
8122 if ((GET_MODE (op) == Pmode || GET_MODE (op) == VOIDmode)
8123 && EXTRA_CONSTRAINT_Csy (op))
8124 return ! reload_completed;
8126 return target_reg_operand (op, mode);
8130 mextr_bit_offset (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
8132 HOST_WIDE_INT i;
8134 if (GET_CODE (op) != CONST_INT)
8135 return 0;
8136 i = INTVAL (op);
8137 return i >= 1 * 8 && i <= 7 * 8 && (i & 7) == 0;
8141 extend_reg_operand (rtx op, enum machine_mode mode)
8143 return (GET_CODE (op) == TRUNCATE
8144 ? arith_operand
8145 : arith_reg_operand) (op, mode);
8149 trunc_hi_operand (rtx op, enum machine_mode mode)
8151 enum machine_mode op_mode = GET_MODE (op);
8153 if (op_mode != SImode && op_mode != DImode
8154 && op_mode != V4HImode && op_mode != V2SImode)
8155 return 0;
8156 return extend_reg_operand (op, mode);
8160 extend_reg_or_0_operand (rtx op, enum machine_mode mode)
8162 return (GET_CODE (op) == TRUNCATE
8163 ? arith_operand
8164 : arith_reg_or_0_operand) (op, mode);
8168 minuend_operand (rtx op, enum machine_mode mode)
8170 return op == constm1_rtx || extend_reg_or_0_operand (op, mode);
8174 general_extend_operand (rtx op, enum machine_mode mode)
8176 return (GET_CODE (op) == TRUNCATE
8177 ? arith_operand
8178 : nonimmediate_operand) (op, mode);
8182 ua_address_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
8184 if (GET_CODE (op) == PLUS
8185 && (GET_CODE (XEXP (op, 1)) != CONST_INT
8186 || ! CONST_OK_FOR_I06 (INTVAL (XEXP (op, 1)))))
8187 return 0;
8188 return address_operand (op, QImode);
8192 cache_address_operand (rtx op, enum machine_mode mode)
8194 if (GET_CODE (op) == PLUS)
8196 if (GET_CODE (XEXP (op, 0)) != REG)
8197 return 0;
8198 if (GET_CODE (XEXP (op, 1)) != CONST_INT
8199 || (INTVAL (XEXP (op, 1)) & 31))
8200 return 0;
8202 else if (GET_CODE (op) != REG)
8203 return 0;
8204 return address_operand (op, mode);
8208 inqhi_operand (rtx op, enum machine_mode mode)
8210 if (GET_CODE (op) != TRUNCATE || mode != GET_MODE (op))
8211 return 0;
8212 op = XEXP (op, 0);
8213 /* Can't use true_regnum here because copy_cost wants to know about
8214 SECONDARY_INPUT_RELOAD_CLASS. */
8215 return GET_CODE (op) == REG && FP_REGISTER_P (REGNO (op));
8219 sh_rep_vec (rtx v, enum machine_mode mode)
8221 int i;
8222 rtx x, y;
8224 if ((GET_CODE (v) != CONST_VECTOR && GET_CODE (v) != PARALLEL)
8225 || (GET_MODE (v) != mode && mode != VOIDmode))
8226 return 0;
8227 i = XVECLEN (v, 0) - 2;
8228 x = XVECEXP (v, 0, i + 1);
8229 if (GET_MODE_UNIT_SIZE (mode) == 1)
8231 y = XVECEXP (v, 0, i);
8232 for (i -= 2; i >= 0; i -= 2)
8233 if (! rtx_equal_p (XVECEXP (v, 0, i + 1), x)
8234 || ! rtx_equal_p (XVECEXP (v, 0, i), y))
8235 return 0;
8237 else
8238 for (; i >= 0; i--)
8239 if (XVECEXP (v, 0, i) != x)
8240 return 0;
8241 return 1;
8244 /* Determine if V is a constant vector matching MODE with only one element
8245 that is not a sign extension. Two byte-sized elements count as one. */
8247 sh_1el_vec (rtx v, enum machine_mode mode)
8249 int unit_size;
8250 int i, last, least, sign_ix;
8251 rtx sign;
8253 if (GET_CODE (v) != CONST_VECTOR
8254 || (GET_MODE (v) != mode && mode != VOIDmode))
8255 return 0;
8256 /* Determine numbers of last and of least significant elements. */
8257 last = XVECLEN (v, 0) - 1;
8258 least = TARGET_LITTLE_ENDIAN ? 0 : last;
8259 if (GET_CODE (XVECEXP (v, 0, least)) != CONST_INT)
8260 return 0;
8261 sign_ix = least;
8262 if (GET_MODE_UNIT_SIZE (mode) == 1)
8263 sign_ix = TARGET_LITTLE_ENDIAN ? 1 : last - 1;
8264 if (GET_CODE (XVECEXP (v, 0, sign_ix)) != CONST_INT)
8265 return 0;
8266 unit_size = GET_MODE_UNIT_SIZE (GET_MODE (v));
8267 sign = (INTVAL (XVECEXP (v, 0, sign_ix)) >> (unit_size * BITS_PER_UNIT - 1)
8268 ? constm1_rtx : const0_rtx);
8269 i = XVECLEN (v, 0) - 1;
8271 if (i != least && i != sign_ix && XVECEXP (v, 0, i) != sign)
8272 return 0;
8273 while (--i);
8274 return 1;
8278 sh_const_vec (rtx v, enum machine_mode mode)
8280 int i;
8282 if (GET_CODE (v) != CONST_VECTOR
8283 || (GET_MODE (v) != mode && mode != VOIDmode))
8284 return 0;
8285 i = XVECLEN (v, 0) - 1;
8286 for (; i >= 0; i--)
8287 if (GET_CODE (XVECEXP (v, 0, i)) != CONST_INT)
8288 return 0;
8289 return 1;
8292 /* Return the destination address of a branch. */
8294 static int
8295 branch_dest (rtx branch)
8297 rtx dest = SET_SRC (PATTERN (branch));
8298 int dest_uid;
8300 if (GET_CODE (dest) == IF_THEN_ELSE)
8301 dest = XEXP (dest, 1);
8302 dest = XEXP (dest, 0);
8303 dest_uid = INSN_UID (dest);
8304 return INSN_ADDRESSES (dest_uid);
8307 /* Return nonzero if REG is not used after INSN.
8308 We assume REG is a reload reg, and therefore does
8309 not live past labels. It may live past calls or jumps though. */
8311 reg_unused_after (rtx reg, rtx insn)
8313 enum rtx_code code;
8314 rtx set;
8316 /* If the reg is set by this instruction, then it is safe for our
8317 case. Disregard the case where this is a store to memory, since
8318 we are checking a register used in the store address. */
8319 set = single_set (insn);
8320 if (set && GET_CODE (SET_DEST (set)) != MEM
8321 && reg_overlap_mentioned_p (reg, SET_DEST (set)))
8322 return 1;
8324 while ((insn = NEXT_INSN (insn)))
8326 rtx set;
8327 if (!INSN_P (insn))
8328 continue;
8330 code = GET_CODE (insn);
8332 #if 0
8333 /* If this is a label that existed before reload, then the register
8334 if dead here. However, if this is a label added by reorg, then
8335 the register may still be live here. We can't tell the difference,
8336 so we just ignore labels completely. */
8337 if (code == CODE_LABEL)
8338 return 1;
8339 /* else */
8340 #endif
8342 if (code == JUMP_INSN)
8343 return 0;
8345 /* If this is a sequence, we must handle them all at once.
8346 We could have for instance a call that sets the target register,
8347 and an insn in a delay slot that uses the register. In this case,
8348 we must return 0. */
8349 else if (code == INSN && GET_CODE (PATTERN (insn)) == SEQUENCE)
8351 int i;
8352 int retval = 0;
8354 for (i = 0; i < XVECLEN (PATTERN (insn), 0); i++)
8356 rtx this_insn = XVECEXP (PATTERN (insn), 0, i);
8357 rtx set = single_set (this_insn);
8359 if (GET_CODE (this_insn) == CALL_INSN)
8360 code = CALL_INSN;
8361 else if (GET_CODE (this_insn) == JUMP_INSN)
8363 if (INSN_ANNULLED_BRANCH_P (this_insn))
8364 return 0;
8365 code = JUMP_INSN;
8368 if (set && reg_overlap_mentioned_p (reg, SET_SRC (set)))
8369 return 0;
8370 if (set && reg_overlap_mentioned_p (reg, SET_DEST (set)))
8372 if (GET_CODE (SET_DEST (set)) != MEM)
8373 retval = 1;
8374 else
8375 return 0;
8377 if (set == 0
8378 && reg_overlap_mentioned_p (reg, PATTERN (this_insn)))
8379 return 0;
8381 if (retval == 1)
8382 return 1;
8383 else if (code == JUMP_INSN)
8384 return 0;
8387 set = single_set (insn);
8388 if (set && reg_overlap_mentioned_p (reg, SET_SRC (set)))
8389 return 0;
8390 if (set && reg_overlap_mentioned_p (reg, SET_DEST (set)))
8391 return GET_CODE (SET_DEST (set)) != MEM;
8392 if (set == 0 && reg_overlap_mentioned_p (reg, PATTERN (insn)))
8393 return 0;
8395 if (code == CALL_INSN && call_really_used_regs[REGNO (reg)])
8396 return 1;
8398 return 1;
8401 #include "ggc.h"
8403 static GTY(()) rtx fpscr_rtx;
8405 get_fpscr_rtx (void)
8407 if (! fpscr_rtx)
8409 fpscr_rtx = gen_rtx_REG (PSImode, FPSCR_REG);
8410 REG_USERVAR_P (fpscr_rtx) = 1;
8411 mark_user_reg (fpscr_rtx);
8413 if (! reload_completed || mdep_reorg_phase != SH_AFTER_MDEP_REORG)
8414 mark_user_reg (fpscr_rtx);
8415 return fpscr_rtx;
8418 void
8419 emit_sf_insn (rtx pat)
8421 emit_insn (pat);
8424 void
8425 emit_df_insn (rtx pat)
8427 emit_insn (pat);
8430 void
8431 expand_sf_unop (rtx (*fun) (rtx, rtx, rtx), rtx *operands)
8433 emit_sf_insn ((*fun) (operands[0], operands[1], get_fpscr_rtx ()));
8436 void
8437 expand_sf_binop (rtx (*fun) (rtx, rtx, rtx, rtx), rtx *operands)
8439 emit_sf_insn ((*fun) (operands[0], operands[1], operands[2],
8440 get_fpscr_rtx ()));
8443 void
8444 expand_df_unop (rtx (*fun) (rtx, rtx, rtx), rtx *operands)
8446 emit_df_insn ((*fun) (operands[0], operands[1], get_fpscr_rtx ()));
8449 void
8450 expand_df_binop (rtx (*fun) (rtx, rtx, rtx, rtx), rtx *operands)
8452 emit_df_insn ((*fun) (operands[0], operands[1], operands[2],
8453 get_fpscr_rtx ()));
8456 /* ??? gcc does flow analysis strictly after common subexpression
8457 elimination. As a result, common subexpression elimination fails
8458 when there are some intervening statements setting the same register.
8459 If we did nothing about this, this would hurt the precision switching
8460 for SH4 badly. There is some cse after reload, but it is unable to
8461 undo the extra register pressure from the unused instructions, and
8462 it cannot remove auto-increment loads.
8464 A C code example that shows this flow/cse weakness for (at least) SH
8465 and sparc (as of gcc ss-970706) is this:
8467 double
8468 f(double a)
8470 double d;
8471 d = 0.1;
8472 a += d;
8473 d = 1.1;
8474 d = 0.1;
8475 a *= d;
8476 return a;
8479 So we add another pass before common subexpression elimination, to
8480 remove assignments that are dead due to a following assignment in the
8481 same basic block. */
8483 static void
8484 mark_use (rtx x, rtx *reg_set_block)
8486 enum rtx_code code;
8488 if (! x)
8489 return;
8490 code = GET_CODE (x);
8491 switch (code)
8493 case REG:
8495 int regno = REGNO (x);
8496 int nregs = (regno < FIRST_PSEUDO_REGISTER
8497 ? HARD_REGNO_NREGS (regno, GET_MODE (x))
8498 : 1);
8501 reg_set_block[regno + nregs - 1] = 0;
8503 while (--nregs);
8504 break;
8506 case SET:
8508 rtx dest = SET_DEST (x);
8510 if (GET_CODE (dest) == SUBREG)
8511 dest = SUBREG_REG (dest);
8512 if (GET_CODE (dest) != REG)
8513 mark_use (dest, reg_set_block);
8514 mark_use (SET_SRC (x), reg_set_block);
8515 break;
8517 case CLOBBER:
8518 break;
8519 default:
8521 const char *fmt = GET_RTX_FORMAT (code);
8522 int i, j;
8523 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
8525 if (fmt[i] == 'e')
8526 mark_use (XEXP (x, i), reg_set_block);
8527 else if (fmt[i] == 'E')
8528 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
8529 mark_use (XVECEXP (x, i, j), reg_set_block);
8531 break;
8536 static rtx get_free_reg (HARD_REG_SET);
8538 /* This function returns a register to use to load the address to load
8539 the fpscr from. Currently it always returns r1 or r7, but when we are
8540 able to use pseudo registers after combine, or have a better mechanism
8541 for choosing a register, it should be done here. */
8542 /* REGS_LIVE is the liveness information for the point for which we
8543 need this allocation. In some bare-bones exit blocks, r1 is live at the
8544 start. We can even have all of r0..r3 being live:
8545 __complex__ long long f (double d) { if (d == 0) return 2; else return 3; }
8546 INSN before which new insns are placed with will clobber the register
8547 we return. If a basic block consists only of setting the return value
8548 register to a pseudo and using that register, the return value is not
8549 live before or after this block, yet we we'll insert our insns right in
8550 the middle. */
8552 static rtx
8553 get_free_reg (HARD_REG_SET regs_live)
8555 if (! TEST_HARD_REG_BIT (regs_live, 1))
8556 return gen_rtx_REG (Pmode, 1);
8558 /* Hard reg 1 is live; since this is a SMALL_REGISTER_CLASSES target,
8559 there shouldn't be anything but a jump before the function end. */
8560 gcc_assert (!TEST_HARD_REG_BIT (regs_live, 7));
8561 return gen_rtx_REG (Pmode, 7);
8564 /* This function will set the fpscr from memory.
8565 MODE is the mode we are setting it to. */
8566 void
8567 fpscr_set_from_mem (int mode, HARD_REG_SET regs_live)
8569 enum attr_fp_mode fp_mode = mode;
8570 rtx addr_reg = get_free_reg (regs_live);
8572 if (fp_mode == (enum attr_fp_mode) ACTUAL_NORMAL_MODE (FP_MODE))
8573 emit_insn (gen_fpu_switch1 (addr_reg));
8574 else
8575 emit_insn (gen_fpu_switch0 (addr_reg));
8578 /* Is the given character a logical line separator for the assembler? */
8579 #ifndef IS_ASM_LOGICAL_LINE_SEPARATOR
8580 #define IS_ASM_LOGICAL_LINE_SEPARATOR(C) ((C) == ';')
8581 #endif
8584 sh_insn_length_adjustment (rtx insn)
8586 /* Instructions with unfilled delay slots take up an extra two bytes for
8587 the nop in the delay slot. */
8588 if (((GET_CODE (insn) == INSN
8589 && GET_CODE (PATTERN (insn)) != USE
8590 && GET_CODE (PATTERN (insn)) != CLOBBER)
8591 || GET_CODE (insn) == CALL_INSN
8592 || (GET_CODE (insn) == JUMP_INSN
8593 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC
8594 && GET_CODE (PATTERN (insn)) != ADDR_VEC))
8595 && GET_CODE (PATTERN (NEXT_INSN (PREV_INSN (insn)))) != SEQUENCE
8596 && get_attr_needs_delay_slot (insn) == NEEDS_DELAY_SLOT_YES)
8597 return 2;
8599 /* SH2e has a bug that prevents the use of annulled branches, so if
8600 the delay slot is not filled, we'll have to put a NOP in it. */
8601 if (sh_cpu == CPU_SH2E
8602 && GET_CODE (insn) == JUMP_INSN
8603 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC
8604 && GET_CODE (PATTERN (insn)) != ADDR_VEC
8605 && get_attr_type (insn) == TYPE_CBRANCH
8606 && GET_CODE (PATTERN (NEXT_INSN (PREV_INSN (insn)))) != SEQUENCE)
8607 return 2;
8609 /* sh-dsp parallel processing insn take four bytes instead of two. */
8611 if (GET_CODE (insn) == INSN)
8613 int sum = 0;
8614 rtx body = PATTERN (insn);
8615 const char *template;
8616 char c;
8617 int maybe_label = 1;
8619 if (GET_CODE (body) == ASM_INPUT)
8620 template = XSTR (body, 0);
8621 else if (asm_noperands (body) >= 0)
8622 template
8623 = decode_asm_operands (body, NULL, NULL, NULL, NULL);
8624 else
8625 return 0;
8628 int ppi_adjust = 0;
8631 c = *template++;
8632 while (c == ' ' || c == '\t');
8633 /* all sh-dsp parallel-processing insns start with p.
8634 The only non-ppi sh insn starting with p is pref.
8635 The only ppi starting with pr is prnd. */
8636 if ((c == 'p' || c == 'P') && strncasecmp ("re", template, 2))
8637 ppi_adjust = 2;
8638 /* The repeat pseudo-insn expands two three insns, a total of
8639 six bytes in size. */
8640 else if ((c == 'r' || c == 'R')
8641 && ! strncasecmp ("epeat", template, 5))
8642 ppi_adjust = 4;
8643 while (c && c != '\n' && ! IS_ASM_LOGICAL_LINE_SEPARATOR (c))
8645 /* If this is a label, it is obviously not a ppi insn. */
8646 if (c == ':' && maybe_label)
8648 ppi_adjust = 0;
8649 break;
8651 else if (c == '\'' || c == '"')
8652 maybe_label = 0;
8653 c = *template++;
8655 sum += ppi_adjust;
8656 maybe_label = c != ':';
8658 while (c);
8659 return sum;
8661 return 0;
8664 /* Return TRUE if X references a SYMBOL_REF or LABEL_REF whose symbol
8665 isn't protected by a PIC unspec. */
8667 nonpic_symbol_mentioned_p (rtx x)
8669 register const char *fmt;
8670 register int i;
8672 if (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF
8673 || GET_CODE (x) == PC)
8674 return 1;
8676 /* We don't want to look into the possible MEM location of a
8677 CONST_DOUBLE, since we're not going to use it, in general. */
8678 if (GET_CODE (x) == CONST_DOUBLE)
8679 return 0;
8681 if (GET_CODE (x) == UNSPEC
8682 && (XINT (x, 1) == UNSPEC_PIC
8683 || XINT (x, 1) == UNSPEC_GOT
8684 || XINT (x, 1) == UNSPEC_GOTOFF
8685 || XINT (x, 1) == UNSPEC_GOTPLT
8686 || XINT (x, 1) == UNSPEC_GOTTPOFF
8687 || XINT (x, 1) == UNSPEC_DTPOFF
8688 || XINT (x, 1) == UNSPEC_PLT))
8689 return 0;
8691 fmt = GET_RTX_FORMAT (GET_CODE (x));
8692 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
8694 if (fmt[i] == 'E')
8696 register int j;
8698 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
8699 if (nonpic_symbol_mentioned_p (XVECEXP (x, i, j)))
8700 return 1;
8702 else if (fmt[i] == 'e' && nonpic_symbol_mentioned_p (XEXP (x, i)))
8703 return 1;
8706 return 0;
8709 /* Convert a non-PIC address in `orig' to a PIC address using @GOT or
8710 @GOTOFF in `reg'. */
8712 legitimize_pic_address (rtx orig, enum machine_mode mode ATTRIBUTE_UNUSED,
8713 rtx reg)
8715 if (tls_symbolic_operand (orig, Pmode))
8716 return orig;
8718 if (GET_CODE (orig) == LABEL_REF
8719 || (GET_CODE (orig) == SYMBOL_REF && SYMBOL_REF_LOCAL_P (orig)))
8721 if (reg == 0)
8722 reg = gen_reg_rtx (Pmode);
8724 emit_insn (gen_symGOTOFF2reg (reg, orig));
8725 return reg;
8727 else if (GET_CODE (orig) == SYMBOL_REF)
8729 if (reg == 0)
8730 reg = gen_reg_rtx (Pmode);
8732 emit_insn (gen_symGOT2reg (reg, orig));
8733 return reg;
8735 return orig;
8738 /* Mark the use of a constant in the literal table. If the constant
8739 has multiple labels, make it unique. */
8740 static rtx
8741 mark_constant_pool_use (rtx x)
8743 rtx insn, lab, pattern;
8745 if (x == NULL)
8746 return x;
8748 switch (GET_CODE (x))
8750 case LABEL_REF:
8751 x = XEXP (x, 0);
8752 case CODE_LABEL:
8753 break;
8754 default:
8755 return x;
8758 /* Get the first label in the list of labels for the same constant
8759 and delete another labels in the list. */
8760 lab = x;
8761 for (insn = PREV_INSN (x); insn; insn = PREV_INSN (insn))
8763 if (GET_CODE (insn) != CODE_LABEL
8764 || LABEL_REFS (insn) != NEXT_INSN (insn))
8765 break;
8766 lab = insn;
8769 for (insn = LABEL_REFS (lab); insn; insn = LABEL_REFS (insn))
8770 INSN_DELETED_P (insn) = 1;
8772 /* Mark constants in a window. */
8773 for (insn = NEXT_INSN (x); insn; insn = NEXT_INSN (insn))
8775 if (GET_CODE (insn) != INSN)
8776 continue;
8778 pattern = PATTERN (insn);
8779 if (GET_CODE (pattern) != UNSPEC_VOLATILE)
8780 continue;
8782 switch (XINT (pattern, 1))
8784 case UNSPECV_CONST2:
8785 case UNSPECV_CONST4:
8786 case UNSPECV_CONST8:
8787 XVECEXP (pattern, 0, 1) = const1_rtx;
8788 break;
8789 case UNSPECV_WINDOW_END:
8790 if (XVECEXP (pattern, 0, 0) == x)
8791 return lab;
8792 break;
8793 case UNSPECV_CONST_END:
8794 return lab;
8795 default:
8796 break;
8800 return lab;
8804 ua_offset (rtx c, enum machine_mode mode ATTRIBUTE_UNUSED)
8806 return GET_CODE (c) == CONST_INT && CONST_OK_FOR_I06 (INTVAL (c));
8809 /* Return true if it's possible to redirect BRANCH1 to the destination
8810 of an unconditional jump BRANCH2. We only want to do this if the
8811 resulting branch will have a short displacement. */
8813 sh_can_redirect_branch (rtx branch1, rtx branch2)
8815 if (flag_expensive_optimizations && simplejump_p (branch2))
8817 rtx dest = XEXP (SET_SRC (single_set (branch2)), 0);
8818 rtx insn;
8819 int distance;
8821 for (distance = 0, insn = NEXT_INSN (branch1);
8822 insn && distance < 256;
8823 insn = PREV_INSN (insn))
8825 if (insn == dest)
8826 return 1;
8827 else
8828 distance += get_attr_length (insn);
8830 for (distance = 0, insn = NEXT_INSN (branch1);
8831 insn && distance < 256;
8832 insn = NEXT_INSN (insn))
8834 if (insn == dest)
8835 return 1;
8836 else
8837 distance += get_attr_length (insn);
8840 return 0;
8843 /* Return nonzero if register old_reg can be renamed to register new_reg. */
8845 sh_hard_regno_rename_ok (unsigned int old_reg ATTRIBUTE_UNUSED,
8846 unsigned int new_reg)
8848 /* Interrupt functions can only use registers that have already been
8849 saved by the prologue, even if they would normally be
8850 call-clobbered. */
8852 if (sh_cfun_interrupt_handler_p () && !regs_ever_live[new_reg])
8853 return 0;
8855 return 1;
8858 /* Function to update the integer COST
8859 based on the relationship between INSN that is dependent on
8860 DEP_INSN through the dependence LINK. The default is to make no
8861 adjustment to COST. This can be used for example to specify to
8862 the scheduler that an output- or anti-dependence does not incur
8863 the same cost as a data-dependence. The return value should be
8864 the new value for COST. */
8865 static int
8866 sh_adjust_cost (rtx insn, rtx link ATTRIBUTE_UNUSED, rtx dep_insn, int cost)
8868 rtx reg, use_pat;
8870 if (TARGET_SHMEDIA)
8872 /* On SHmedia, if the dependence is an anti-dependence or
8873 output-dependence, there is no cost. */
8874 if (REG_NOTE_KIND (link) != 0)
8876 /* However, dependencies between target register loads and
8877 uses of the register in a subsequent block that are separated
8878 by a conditional branch are not modelled - we have to do with
8879 the anti-dependency between the target register load and the
8880 conditional branch that ends the current block. */
8881 if (REG_NOTE_KIND (link) == REG_DEP_ANTI
8882 && GET_CODE (PATTERN (dep_insn)) == SET
8883 && (get_attr_type (dep_insn) == TYPE_PT_MEDIA
8884 || get_attr_type (dep_insn) == TYPE_PTABS_MEDIA)
8885 && get_attr_type (insn) == TYPE_CBRANCH_MEDIA)
8887 int orig_cost = cost;
8888 rtx note = find_reg_note (insn, REG_BR_PROB, 0);
8889 rtx target = ((! note
8890 || INTVAL (XEXP (note, 0)) * 2 < REG_BR_PROB_BASE)
8891 ? insn : JUMP_LABEL (insn));
8892 /* On the likely path, the branch costs 1, on the unlikely path,
8893 it costs 3. */
8894 cost--;
8896 target = next_active_insn (target);
8897 while (target && ! flow_dependent_p (target, dep_insn)
8898 && --cost > 0);
8899 /* If two branches are executed in immediate succession, with the
8900 first branch properly predicted, this causes a stall at the
8901 second branch, hence we won't need the target for the
8902 second branch for two cycles after the launch of the first
8903 branch. */
8904 if (cost > orig_cost - 2)
8905 cost = orig_cost - 2;
8907 else
8908 cost = 0;
8911 else if (get_attr_is_mac_media (insn)
8912 && get_attr_is_mac_media (dep_insn))
8913 cost = 1;
8915 else if (! reload_completed
8916 && GET_CODE (PATTERN (insn)) == SET
8917 && GET_CODE (SET_SRC (PATTERN (insn))) == FLOAT
8918 && GET_CODE (PATTERN (dep_insn)) == SET
8919 && fp_arith_reg_operand (SET_SRC (PATTERN (dep_insn)), VOIDmode)
8920 && cost < 4)
8921 cost = 4;
8922 /* Schedule the ptabs for a casesi_jump_media in preference to stuff
8923 that is needed at the target. */
8924 else if (get_attr_type (insn) == TYPE_JUMP_MEDIA
8925 && ! flow_dependent_p (insn, dep_insn))
8926 cost--;
8928 else if (REG_NOTE_KIND (link) == 0)
8930 enum attr_type dep_type, type;
8932 if (recog_memoized (insn) < 0
8933 || recog_memoized (dep_insn) < 0)
8934 return cost;
8936 dep_type = get_attr_type (dep_insn);
8937 if (dep_type == TYPE_FLOAD || dep_type == TYPE_PCFLOAD)
8938 cost--;
8939 if ((dep_type == TYPE_LOAD_SI || dep_type == TYPE_PCLOAD_SI)
8940 && (type = get_attr_type (insn)) != TYPE_CALL
8941 && type != TYPE_SFUNC)
8942 cost--;
8944 /* The only input for a call that is timing-critical is the
8945 function's address. */
8946 if (GET_CODE(insn) == CALL_INSN)
8948 rtx call = PATTERN (insn);
8950 if (GET_CODE (call) == PARALLEL)
8951 call = XVECEXP (call, 0 ,0);
8952 if (GET_CODE (call) == SET)
8953 call = SET_SRC (call);
8954 if (GET_CODE (call) == CALL && GET_CODE (XEXP (call, 0)) == MEM
8955 /* sibcalli_thunk uses a symbol_ref in an unspec. */
8956 && (GET_CODE (XEXP (XEXP (call, 0), 0)) == UNSPEC
8957 || ! reg_set_p (XEXP (XEXP (call, 0), 0), dep_insn)))
8958 cost = 0;
8960 /* Likewise, the most timing critical input for an sfuncs call
8961 is the function address. However, sfuncs typically start
8962 using their arguments pretty quickly.
8963 Assume a four cycle delay before they are needed. */
8964 /* All sfunc calls are parallels with at least four components.
8965 Exploit this to avoid unnecessary calls to sfunc_uses_reg. */
8966 else if (GET_CODE (PATTERN (insn)) == PARALLEL
8967 && XVECLEN (PATTERN (insn), 0) >= 4
8968 && (reg = sfunc_uses_reg (insn)))
8970 if (! reg_set_p (reg, dep_insn))
8971 cost -= 4;
8973 /* When the preceding instruction loads the shift amount of
8974 the following SHAD/SHLD, the latency of the load is increased
8975 by 1 cycle. */
8976 else if (TARGET_SH4
8977 && get_attr_type (insn) == TYPE_DYN_SHIFT
8978 && get_attr_any_int_load (dep_insn) == ANY_INT_LOAD_YES
8979 && reg_overlap_mentioned_p (SET_DEST (PATTERN (dep_insn)),
8980 XEXP (SET_SRC (single_set (insn)),
8981 1)))
8982 cost++;
8983 /* When an LS group instruction with a latency of less than
8984 3 cycles is followed by a double-precision floating-point
8985 instruction, FIPR, or FTRV, the latency of the first
8986 instruction is increased to 3 cycles. */
8987 else if (cost < 3
8988 && get_attr_insn_class (dep_insn) == INSN_CLASS_LS_GROUP
8989 && get_attr_dfp_comp (insn) == DFP_COMP_YES)
8990 cost = 3;
8991 /* The lsw register of a double-precision computation is ready one
8992 cycle earlier. */
8993 else if (reload_completed
8994 && get_attr_dfp_comp (dep_insn) == DFP_COMP_YES
8995 && (use_pat = single_set (insn))
8996 && ! regno_use_in (REGNO (SET_DEST (single_set (dep_insn))),
8997 SET_SRC (use_pat)))
8998 cost -= 1;
9000 if (get_attr_any_fp_comp (dep_insn) == ANY_FP_COMP_YES
9001 && get_attr_late_fp_use (insn) == LATE_FP_USE_YES)
9002 cost -= 1;
9004 /* An anti-dependence penalty of two applies if the first insn is a double
9005 precision fadd / fsub / fmul. */
9006 else if (REG_NOTE_KIND (link) == REG_DEP_ANTI
9007 && recog_memoized (dep_insn) >= 0
9008 && get_attr_type (dep_insn) == TYPE_DFP_ARITH
9009 /* A lot of alleged anti-flow dependences are fake,
9010 so check this one is real. */
9011 && flow_dependent_p (dep_insn, insn))
9012 cost = 2;
9015 return cost;
9018 /* Check if INSN is flow-dependent on DEP_INSN. Can also be used to check
9019 if DEP_INSN is anti-flow dependent on INSN. */
9020 static int
9021 flow_dependent_p (rtx insn, rtx dep_insn)
9023 rtx tmp = PATTERN (insn);
9025 note_stores (PATTERN (dep_insn), flow_dependent_p_1, &tmp);
9026 return tmp == NULL_RTX;
9029 /* A helper function for flow_dependent_p called through note_stores. */
9030 static void
9031 flow_dependent_p_1 (rtx x, rtx pat ATTRIBUTE_UNUSED, void *data)
9033 rtx * pinsn = (rtx *) data;
9035 if (*pinsn && reg_referenced_p (x, *pinsn))
9036 *pinsn = NULL_RTX;
9039 /* For use by ALLOCATE_INITIAL_VALUE. Note that sh.md contains some
9040 'special function' patterns (type sfunc) that clobber pr, but that
9041 do not look like function calls to leaf_function_p. Hence we must
9042 do this extra check. */
9044 sh_pr_n_sets (void)
9046 return REG_N_SETS (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG);
9049 /* This function returns "2" to indicate dual issue for the SH4
9050 processor. To be used by the DFA pipeline description. */
9051 static int
9052 sh_issue_rate (void)
9054 if (TARGET_SUPERSCALAR)
9055 return 2;
9056 else
9057 return 1;
9060 /* Functions for ready queue reordering for sched1. */
9062 /* Get weight for mode for a set x. */
9063 static short
9064 find_set_regmode_weight (rtx x, enum machine_mode mode)
9066 if (GET_CODE (x) == CLOBBER && register_operand (SET_DEST (x), mode))
9067 return 1;
9068 if (GET_CODE (x) == SET && register_operand (SET_DEST (x), mode))
9070 if (GET_CODE (SET_DEST (x)) == REG)
9072 if (!reg_mentioned_p (SET_DEST (x), SET_SRC (x)))
9073 return 1;
9074 else
9075 return 0;
9077 return 1;
9079 return 0;
9082 /* Get regmode weight for insn. */
9083 static short
9084 find_insn_regmode_weight (rtx insn, enum machine_mode mode)
9086 short reg_weight = 0;
9087 rtx x;
9089 /* Increment weight for each register born here. */
9090 x = PATTERN (insn);
9091 reg_weight += find_set_regmode_weight (x, mode);
9092 if (GET_CODE (x) == PARALLEL)
9094 int j;
9095 for (j = XVECLEN (x, 0) - 1; j >= 0; j--)
9097 x = XVECEXP (PATTERN (insn), 0, j);
9098 reg_weight += find_set_regmode_weight (x, mode);
9101 /* Decrement weight for each register that dies here. */
9102 for (x = REG_NOTES (insn); x; x = XEXP (x, 1))
9104 if (REG_NOTE_KIND (x) == REG_DEAD || REG_NOTE_KIND (x) == REG_UNUSED)
9106 rtx note = XEXP (x, 0);
9107 if (GET_CODE (note) == REG && GET_MODE (note) == mode)
9108 reg_weight--;
9111 return reg_weight;
9114 /* Calculate regmode weights for all insns of a basic block. */
9115 static void
9116 find_regmode_weight (int b, enum machine_mode mode)
9118 rtx insn, next_tail, head, tail;
9120 get_block_head_tail (b, &head, &tail);
9121 next_tail = NEXT_INSN (tail);
9123 for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
9125 /* Handle register life information. */
9126 if (!INSN_P (insn))
9127 continue;
9129 if (mode == SFmode)
9130 INSN_REGMODE_WEIGHT (insn, mode) =
9131 find_insn_regmode_weight (insn, mode) + 2 * find_insn_regmode_weight (insn, DFmode);
9132 else if (mode == SImode)
9133 INSN_REGMODE_WEIGHT (insn, mode) =
9134 find_insn_regmode_weight (insn, mode) + 2 * find_insn_regmode_weight (insn, DImode);
9138 /* Comparison function for ready queue sorting. */
9139 static int
9140 rank_for_reorder (const void *x, const void *y)
9142 rtx tmp = *(const rtx *) y;
9143 rtx tmp2 = *(const rtx *) x;
9145 /* The insn in a schedule group should be issued the first. */
9146 if (SCHED_GROUP_P (tmp) != SCHED_GROUP_P (tmp2))
9147 return SCHED_GROUP_P (tmp2) ? 1 : -1;
9149 /* If insns are equally good, sort by INSN_LUID (original insn order), This
9150 minimizes instruction movement, thus minimizing sched's effect on
9151 register pressure. */
9152 return INSN_LUID (tmp) - INSN_LUID (tmp2);
9155 /* Resort the array A in which only element at index N may be out of order. */
9156 static void
9157 swap_reorder (rtx *a, int n)
9159 rtx insn = a[n - 1];
9160 int i = n - 2;
9162 while (i >= 0 && rank_for_reorder (a + i, &insn) >= 0)
9164 a[i + 1] = a[i];
9165 i -= 1;
9167 a[i + 1] = insn;
9170 #define SCHED_REORDER(READY, N_READY) \
9171 do \
9173 if ((N_READY) == 2) \
9174 swap_reorder (READY, N_READY); \
9175 else if ((N_READY) > 2) \
9176 qsort (READY, N_READY, sizeof (rtx), rank_for_reorder); \
9178 while (0)
9180 /* Sort the ready list READY by ascending priority, using the SCHED_REORDER
9181 macro. */
9182 static void
9183 ready_reorder (rtx *ready, int nready)
9185 SCHED_REORDER (ready, nready);
9188 /* Calculate regmode weights for all insns of all basic block. */
9189 static void
9190 sh_md_init_global (FILE *dump ATTRIBUTE_UNUSED,
9191 int verbose ATTRIBUTE_UNUSED,
9192 int old_max_uid)
9194 basic_block b;
9196 regmode_weight[0] = (short *) xcalloc (old_max_uid, sizeof (short));
9197 regmode_weight[1] = (short *) xcalloc (old_max_uid, sizeof (short));
9199 FOR_EACH_BB_REVERSE (b)
9201 find_regmode_weight (b->index, SImode);
9202 find_regmode_weight (b->index, SFmode);
9205 CURR_REGMODE_PRESSURE (SImode) = 0;
9206 CURR_REGMODE_PRESSURE (SFmode) = 0;
9210 /* Cleanup. */
9211 static void
9212 sh_md_finish_global (FILE *dump ATTRIBUTE_UNUSED,
9213 int verbose ATTRIBUTE_UNUSED)
9215 if (regmode_weight[0])
9217 free (regmode_weight[0]);
9218 regmode_weight[0] = NULL;
9220 if (regmode_weight[1])
9222 free (regmode_weight[1]);
9223 regmode_weight[1] = NULL;
9227 /* Cache the can_issue_more so that we can return it from reorder2. Also,
9228 keep count of register pressures on SImode and SFmode. */
9229 static int
9230 sh_variable_issue (FILE *dump ATTRIBUTE_UNUSED,
9231 int sched_verbose ATTRIBUTE_UNUSED,
9232 rtx insn,
9233 int can_issue_more)
9235 if (GET_CODE (PATTERN (insn)) != USE
9236 && GET_CODE (PATTERN (insn)) != CLOBBER)
9237 cached_can_issue_more = can_issue_more - 1;
9238 else
9239 cached_can_issue_more = can_issue_more;
9241 if (reload_completed)
9242 return cached_can_issue_more;
9244 CURR_REGMODE_PRESSURE (SImode) += INSN_REGMODE_WEIGHT (insn, SImode);
9245 CURR_REGMODE_PRESSURE (SFmode) += INSN_REGMODE_WEIGHT (insn, SFmode);
9247 return cached_can_issue_more;
9250 static void
9251 sh_md_init (FILE *dump ATTRIBUTE_UNUSED,
9252 int verbose ATTRIBUTE_UNUSED,
9253 int veclen ATTRIBUTE_UNUSED)
9255 CURR_REGMODE_PRESSURE (SImode) = 0;
9256 CURR_REGMODE_PRESSURE (SFmode) = 0;
9259 /* Some magic numbers. */
9260 /* Pressure on register r0 can lead to spill failures. so avoid sched1 for
9261 functions that already have high pressure on r0. */
9262 #define R0_MAX_LIFE_REGIONS 2
9263 #define R0_MAX_LIVE_LENGTH 12
9264 /* Register Pressure thresholds for SImode and SFmode registers. */
9265 #define SIMODE_MAX_WEIGHT 5
9266 #define SFMODE_MAX_WEIGHT 10
9268 /* Return true if the pressure is high for MODE. */
9269 static short
9270 high_pressure (enum machine_mode mode)
9272 /* Pressure on register r0 can lead to spill failures. so avoid sched1 for
9273 functions that already have high pressure on r0. */
9274 if ((REG_N_SETS (0) - REG_N_DEATHS (0)) >= R0_MAX_LIFE_REGIONS
9275 && REG_LIVE_LENGTH (0) >= R0_MAX_LIVE_LENGTH)
9276 return 1;
9278 if (mode == SFmode)
9279 return (CURR_REGMODE_PRESSURE (SFmode) > SFMODE_MAX_WEIGHT);
9280 else
9281 return (CURR_REGMODE_PRESSURE (SImode) > SIMODE_MAX_WEIGHT);
9284 /* Reorder ready queue if register pressure is high. */
9285 static int
9286 sh_reorder (FILE *dump ATTRIBUTE_UNUSED,
9287 int sched_verbose ATTRIBUTE_UNUSED,
9288 rtx *ready,
9289 int *n_readyp,
9290 int clock_var ATTRIBUTE_UNUSED)
9292 if (reload_completed)
9293 return sh_issue_rate ();
9295 if (high_pressure (SFmode) || high_pressure (SImode))
9297 ready_reorder (ready, *n_readyp);
9300 return sh_issue_rate ();
9303 /* Skip cycles if the current register pressure is high. */
9304 static int
9305 sh_reorder2 (FILE *dump ATTRIBUTE_UNUSED,
9306 int sched_verbose ATTRIBUTE_UNUSED,
9307 rtx *ready ATTRIBUTE_UNUSED,
9308 int *n_readyp ATTRIBUTE_UNUSED,
9309 int clock_var ATTRIBUTE_UNUSED)
9311 if (reload_completed)
9312 return cached_can_issue_more;
9314 if (high_pressure(SFmode) || high_pressure (SImode))
9315 skip_cycles = 1;
9317 return cached_can_issue_more;
9320 /* Skip cycles without sorting the ready queue. This will move insn from
9321 Q->R. If this is the last cycle we are skipping; allow sorting of ready
9322 queue by sh_reorder. */
9324 /* Generally, skipping these many cycles are sufficient for all insns to move
9325 from Q -> R. */
9326 #define MAX_SKIPS 8
9328 static int
9329 sh_dfa_new_cycle (FILE *sched_dump ATTRIBUTE_UNUSED,
9330 int sched_verbose ATTRIBUTE_UNUSED,
9331 rtx insn ATTRIBUTE_UNUSED,
9332 int last_clock_var,
9333 int clock_var,
9334 int *sort_p)
9336 if (reload_completed)
9337 return 0;
9339 if (skip_cycles)
9341 if ((clock_var - last_clock_var) < MAX_SKIPS)
9343 *sort_p = 0;
9344 return 1;
9346 /* If this is the last cycle we are skipping, allow reordering of R. */
9347 if ((clock_var - last_clock_var) == MAX_SKIPS)
9349 *sort_p = 1;
9350 return 1;
9354 skip_cycles = 0;
9356 return 0;
9359 /* SHmedia requires registers for branches, so we can't generate new
9360 branches past reload. */
9361 static bool
9362 sh_cannot_modify_jumps_p (void)
9364 return (TARGET_SHMEDIA && (reload_in_progress || reload_completed));
9367 static int
9368 sh_target_reg_class (void)
9370 return TARGET_SHMEDIA ? TARGET_REGS : NO_REGS;
9373 static bool
9374 sh_optimize_target_register_callee_saved (bool after_prologue_epilogue_gen)
9376 HARD_REG_SET dummy;
9377 rtx insn;
9379 if (! shmedia_space_reserved_for_target_registers)
9380 return 0;
9381 if (after_prologue_epilogue_gen && ! TARGET_SAVE_ALL_TARGET_REGS)
9382 return 0;
9383 if (calc_live_regs (&dummy) >= 6 * 8)
9384 return 1;
9385 /* This is a borderline case. See if we got a nested loop, or a loop
9386 with a call, or with more than 4 labels inside. */
9387 for (insn = get_insns(); insn; insn = NEXT_INSN (insn))
9389 if (GET_CODE (insn) == NOTE
9390 && NOTE_LINE_NUMBER (insn) == NOTE_INSN_LOOP_BEG)
9392 int labels = 0;
9396 insn = NEXT_INSN (insn);
9397 if ((GET_CODE (insn) == NOTE
9398 && NOTE_LINE_NUMBER (insn) == NOTE_INSN_LOOP_BEG)
9399 || GET_CODE (insn) == CALL_INSN
9400 || (GET_CODE (insn) == CODE_LABEL && ++labels > 4))
9401 return 1;
9403 while (GET_CODE (insn) != NOTE
9404 || NOTE_LINE_NUMBER (insn) != NOTE_INSN_LOOP_END);
9407 return 0;
9410 static bool
9411 sh_ms_bitfield_layout_p (tree record_type ATTRIBUTE_UNUSED)
9413 return (TARGET_SH5 || TARGET_HITACHI || sh_attr_renesas_p (record_type));
9417 On the SH1..SH4, the trampoline looks like
9418 2 0002 D202 mov.l l2,r2
9419 1 0000 D301 mov.l l1,r3
9420 3 0004 422B jmp @r2
9421 4 0006 0009 nop
9422 5 0008 00000000 l1: .long area
9423 6 000c 00000000 l2: .long function
9425 SH5 (compact) uses r1 instead of r3 for the static chain. */
9428 /* Emit RTL insns to initialize the variable parts of a trampoline.
9429 FNADDR is an RTX for the address of the function's pure code.
9430 CXT is an RTX for the static chain value for the function. */
9432 void
9433 sh_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
9435 if (TARGET_SHMEDIA64)
9437 rtx tramp_templ;
9438 int fixed_len;
9440 rtx movi1 = GEN_INT (0xcc000010);
9441 rtx shori1 = GEN_INT (0xc8000010);
9442 rtx src, dst;
9444 /* The following trampoline works within a +- 128 KB range for cxt:
9445 ptb/u cxt,tr1; movi fnaddr >> 48,r0; shori fnaddr >> 32,r0;
9446 shori fnaddr >> 16,r0; shori fnaddr,r0; ptabs/l r0,tr0
9447 gettr tr1,r1; blink tr0,r63 */
9448 /* Address rounding makes it hard to compute the exact bounds of the
9449 offset for this trampoline, but we have a rather generous offset
9450 range, so frame_offset should do fine as an upper bound. */
9451 if (cxt == virtual_stack_vars_rtx && frame_offset < 0x20000)
9453 /* ??? could optimize this trampoline initialization
9454 by writing DImode words with two insns each. */
9455 rtx mask = force_reg (DImode, GEN_INT (0x3fffc00));
9456 rtx insn = gen_rtx_MINUS (DImode, cxt, tramp);
9457 insn = gen_rtx_ASHIFT (DImode, insn, GEN_INT (10-2));
9458 insn = gen_rtx_AND (DImode, insn, mask);
9459 /* Or in ptb/u .,tr1 pattern */
9460 insn = gen_rtx_IOR (DImode, insn, gen_int_mode (0xec000010, SImode));
9461 insn = force_operand (insn, NULL_RTX);
9462 insn = gen_lowpart (SImode, insn);
9463 emit_move_insn (gen_rtx_MEM (SImode, tramp), insn);
9464 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (38));
9465 insn = gen_rtx_AND (DImode, insn, mask);
9466 insn = force_operand (gen_rtx_IOR (DImode, movi1, insn), NULL_RTX);
9467 insn = gen_lowpart (SImode, insn);
9468 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 4)), insn);
9469 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (22));
9470 insn = gen_rtx_AND (DImode, insn, mask);
9471 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
9472 insn = gen_lowpart (SImode, insn);
9473 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 8)), insn);
9474 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (6));
9475 insn = gen_rtx_AND (DImode, insn, mask);
9476 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
9477 insn = gen_lowpart (SImode, insn);
9478 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 12)),
9479 insn);
9480 insn = gen_rtx_ASHIFT (DImode, fnaddr, GEN_INT (10));
9481 insn = gen_rtx_AND (DImode, insn, mask);
9482 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
9483 insn = gen_lowpart (SImode, insn);
9484 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 16)),
9485 insn);
9486 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 20)),
9487 GEN_INT (0x6bf10600));
9488 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 24)),
9489 GEN_INT (0x4415fc10));
9490 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 28)),
9491 GEN_INT (0x4401fff0));
9492 emit_insn (gen_ic_invalidate_line (tramp));
9493 return;
9495 tramp_templ = gen_rtx_SYMBOL_REF (Pmode,"__GCC_nested_trampoline");
9496 fixed_len = TRAMPOLINE_SIZE - 2 * GET_MODE_SIZE (Pmode);
9498 tramp_templ = gen_datalabel_ref (tramp_templ);
9499 dst = gen_rtx_MEM (BLKmode, tramp);
9500 src = gen_rtx_MEM (BLKmode, tramp_templ);
9501 set_mem_align (dst, 256);
9502 set_mem_align (src, 64);
9503 emit_block_move (dst, src, GEN_INT (fixed_len), BLOCK_OP_NORMAL);
9505 emit_move_insn (gen_rtx_MEM (Pmode, plus_constant (tramp, fixed_len)),
9506 fnaddr);
9507 emit_move_insn (gen_rtx_MEM (Pmode,
9508 plus_constant (tramp,
9509 fixed_len
9510 + GET_MODE_SIZE (Pmode))),
9511 cxt);
9512 emit_insn (gen_ic_invalidate_line (tramp));
9513 return;
9515 else if (TARGET_SHMEDIA)
9517 /* movi fnaddr >> 16,r1; shori fnaddr,r1; ptabs/l r1,tr0
9518 movi cxt >> 16,r1; shori cxt,r1; blink tr0,r63 */
9519 rtx quad0 = gen_reg_rtx (DImode), cxtload = gen_reg_rtx (DImode);
9520 rtx quad1 = gen_reg_rtx (DImode), quad2 = gen_reg_rtx (DImode);
9521 /* movi 0,r1: 0xcc000010 shori 0,r1: c8000010 concatenated,
9522 rotated 10 right, and higher 16 bit of every 32 selected. */
9523 rtx movishori
9524 = force_reg (V2HImode, (simplify_gen_subreg
9525 (V2HImode, GEN_INT (0x4330432), SImode, 0)));
9526 rtx ptabs = force_reg (DImode, GEN_INT (0x6bf10600));
9527 rtx blink = force_reg (DImode, GEN_INT (0x4401fff0));
9529 tramp = force_reg (Pmode, tramp);
9530 fnaddr = force_reg (SImode, fnaddr);
9531 cxt = force_reg (SImode, cxt);
9532 emit_insn (gen_mshflo_w_x (gen_rtx_SUBREG (V4HImode, quad0, 0),
9533 gen_rtx_SUBREG (V2HImode, fnaddr, 0),
9534 movishori));
9535 emit_insn (gen_rotrdi3_mextr (quad0, quad0,
9536 GEN_INT (TARGET_LITTLE_ENDIAN ? 24 : 56)));
9537 emit_insn (gen_ashldi3_media (quad0, quad0, const2_rtx));
9538 emit_move_insn (gen_rtx_MEM (DImode, tramp), quad0);
9539 emit_insn (gen_mshflo_w_x (gen_rtx_SUBREG (V4HImode, cxtload, 0),
9540 gen_rtx_SUBREG (V2HImode, cxt, 0),
9541 movishori));
9542 emit_insn (gen_rotrdi3_mextr (cxtload, cxtload,
9543 GEN_INT (TARGET_LITTLE_ENDIAN ? 24 : 56)));
9544 emit_insn (gen_ashldi3_media (cxtload, cxtload, const2_rtx));
9545 if (TARGET_LITTLE_ENDIAN)
9547 emit_insn (gen_mshflo_l_di (quad1, ptabs, cxtload));
9548 emit_insn (gen_mextr4 (quad2, cxtload, blink));
9550 else
9552 emit_insn (gen_mextr4 (quad1, cxtload, ptabs));
9553 emit_insn (gen_mshflo_l_di (quad2, blink, cxtload));
9555 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, 8)), quad1);
9556 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, 16)), quad2);
9557 emit_insn (gen_ic_invalidate_line (tramp));
9558 return;
9560 else if (TARGET_SHCOMPACT)
9562 emit_insn (gen_initialize_trampoline (tramp, cxt, fnaddr));
9563 return;
9565 emit_move_insn (gen_rtx_MEM (SImode, tramp),
9566 gen_int_mode (TARGET_LITTLE_ENDIAN ? 0xd301d202 : 0xd202d301,
9567 SImode));
9568 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 4)),
9569 gen_int_mode (TARGET_LITTLE_ENDIAN ? 0x0009422b : 0x422b0009,
9570 SImode));
9571 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 8)),
9572 cxt);
9573 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 12)),
9574 fnaddr);
9575 if (TARGET_HARVARD)
9577 if (TARGET_USERMODE)
9578 emit_library_call (function_symbol (NULL, "__ic_invalidate",
9579 FUNCTION_ORDINARY),
9580 0, VOIDmode, 1, tramp, SImode);
9581 else
9582 emit_insn (gen_ic_invalidate_line (tramp));
9586 /* FIXME: This is overly conservative. A SHcompact function that
9587 receives arguments ``by reference'' will have them stored in its
9588 own stack frame, so it must not pass pointers or references to
9589 these arguments to other functions by means of sibling calls. */
9590 /* If PIC, we cannot make sibling calls to global functions
9591 because the PLT requires r12 to be live. */
9592 static bool
9593 sh_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
9595 return (1
9596 && (! TARGET_SHCOMPACT
9597 || current_function_args_info.stack_regs == 0)
9598 && ! sh_cfun_interrupt_handler_p ()
9599 && (! flag_pic
9600 || (decl && ! TREE_PUBLIC (decl))
9601 || (decl && DECL_VISIBILITY (decl) != VISIBILITY_DEFAULT)));
9604 /* Machine specific built-in functions. */
9606 struct builtin_description
9608 const enum insn_code icode;
9609 const char *const name;
9610 int signature;
9613 /* describe number and signedness of arguments; arg[0] == result
9614 (1: unsigned, 2: signed, 4: don't care, 8: pointer 0: no argument */
9615 /* 9: 64 bit pointer, 10: 32 bit pointer */
9616 static const char signature_args[][4] =
9618 #define SH_BLTIN_V2SI2 0
9619 { 4, 4 },
9620 #define SH_BLTIN_V4HI2 1
9621 { 4, 4 },
9622 #define SH_BLTIN_V2SI3 2
9623 { 4, 4, 4 },
9624 #define SH_BLTIN_V4HI3 3
9625 { 4, 4, 4 },
9626 #define SH_BLTIN_V8QI3 4
9627 { 4, 4, 4 },
9628 #define SH_BLTIN_MAC_HISI 5
9629 { 1, 4, 4, 1 },
9630 #define SH_BLTIN_SH_HI 6
9631 { 4, 4, 1 },
9632 #define SH_BLTIN_SH_SI 7
9633 { 4, 4, 1 },
9634 #define SH_BLTIN_V4HI2V2SI 8
9635 { 4, 4, 4 },
9636 #define SH_BLTIN_V4HI2V8QI 9
9637 { 4, 4, 4 },
9638 #define SH_BLTIN_SISF 10
9639 { 4, 2 },
9640 #define SH_BLTIN_LDUA_L 11
9641 { 2, 10 },
9642 #define SH_BLTIN_LDUA_Q 12
9643 { 1, 10 },
9644 #define SH_BLTIN_STUA_L 13
9645 { 0, 10, 2 },
9646 #define SH_BLTIN_STUA_Q 14
9647 { 0, 10, 1 },
9648 #define SH_BLTIN_LDUA_L64 15
9649 { 2, 9 },
9650 #define SH_BLTIN_LDUA_Q64 16
9651 { 1, 9 },
9652 #define SH_BLTIN_STUA_L64 17
9653 { 0, 9, 2 },
9654 #define SH_BLTIN_STUA_Q64 18
9655 { 0, 9, 1 },
9656 #define SH_BLTIN_NUM_SHARED_SIGNATURES 19
9657 #define SH_BLTIN_2 19
9658 #define SH_BLTIN_SU 19
9659 { 1, 2 },
9660 #define SH_BLTIN_3 20
9661 #define SH_BLTIN_SUS 20
9662 { 2, 2, 1 },
9663 #define SH_BLTIN_PSSV 21
9664 { 0, 8, 2, 2 },
9665 #define SH_BLTIN_XXUU 22
9666 #define SH_BLTIN_UUUU 22
9667 { 1, 1, 1, 1 },
9668 #define SH_BLTIN_PV 23
9669 { 0, 8 },
9671 /* mcmv: operands considered unsigned. */
9672 /* mmulsum_wq, msad_ubq: result considered unsigned long long. */
9673 /* mperm: control value considered unsigned int. */
9674 /* mshalds, mshard, mshards, mshlld, mshlrd: shift count is unsigned int. */
9675 /* mshards_q: returns signed short. */
9676 /* nsb: takes long long arg, returns unsigned char. */
9677 static const struct builtin_description bdesc[] =
9679 { CODE_FOR_absv2si2, "__builtin_absv2si2", SH_BLTIN_V2SI2 },
9680 { CODE_FOR_absv4hi2, "__builtin_absv4hi2", SH_BLTIN_V4HI2 },
9681 { CODE_FOR_addv2si3, "__builtin_addv2si3", SH_BLTIN_V2SI3 },
9682 { CODE_FOR_addv4hi3, "__builtin_addv4hi3", SH_BLTIN_V4HI3 },
9683 { CODE_FOR_ssaddv2si3,"__builtin_ssaddv2si3", SH_BLTIN_V2SI3 },
9684 { CODE_FOR_usaddv8qi3,"__builtin_usaddv8qi3", SH_BLTIN_V8QI3 },
9685 { CODE_FOR_ssaddv4hi3,"__builtin_ssaddv4hi3", SH_BLTIN_V4HI3 },
9686 { CODE_FOR_alloco_i, "__builtin_sh_media_ALLOCO", SH_BLTIN_PV },
9687 { CODE_FOR_negcmpeqv8qi,"__builtin_sh_media_MCMPEQ_B", SH_BLTIN_V8QI3 },
9688 { CODE_FOR_negcmpeqv2si,"__builtin_sh_media_MCMPEQ_L", SH_BLTIN_V2SI3 },
9689 { CODE_FOR_negcmpeqv4hi,"__builtin_sh_media_MCMPEQ_W", SH_BLTIN_V4HI3 },
9690 { CODE_FOR_negcmpgtuv8qi,"__builtin_sh_media_MCMPGT_UB", SH_BLTIN_V8QI3 },
9691 { CODE_FOR_negcmpgtv2si,"__builtin_sh_media_MCMPGT_L", SH_BLTIN_V2SI3 },
9692 { CODE_FOR_negcmpgtv4hi,"__builtin_sh_media_MCMPGT_W", SH_BLTIN_V4HI3 },
9693 { CODE_FOR_mcmv, "__builtin_sh_media_MCMV", SH_BLTIN_UUUU },
9694 { CODE_FOR_mcnvs_lw, "__builtin_sh_media_MCNVS_LW", SH_BLTIN_3 },
9695 { CODE_FOR_mcnvs_wb, "__builtin_sh_media_MCNVS_WB", SH_BLTIN_V4HI2V8QI },
9696 { CODE_FOR_mcnvs_wub, "__builtin_sh_media_MCNVS_WUB", SH_BLTIN_V4HI2V8QI },
9697 { CODE_FOR_mextr1, "__builtin_sh_media_MEXTR1", SH_BLTIN_V8QI3 },
9698 { CODE_FOR_mextr2, "__builtin_sh_media_MEXTR2", SH_BLTIN_V8QI3 },
9699 { CODE_FOR_mextr3, "__builtin_sh_media_MEXTR3", SH_BLTIN_V8QI3 },
9700 { CODE_FOR_mextr4, "__builtin_sh_media_MEXTR4", SH_BLTIN_V8QI3 },
9701 { CODE_FOR_mextr5, "__builtin_sh_media_MEXTR5", SH_BLTIN_V8QI3 },
9702 { CODE_FOR_mextr6, "__builtin_sh_media_MEXTR6", SH_BLTIN_V8QI3 },
9703 { CODE_FOR_mextr7, "__builtin_sh_media_MEXTR7", SH_BLTIN_V8QI3 },
9704 { CODE_FOR_mmacfx_wl, "__builtin_sh_media_MMACFX_WL", SH_BLTIN_MAC_HISI },
9705 { CODE_FOR_mmacnfx_wl,"__builtin_sh_media_MMACNFX_WL", SH_BLTIN_MAC_HISI },
9706 { CODE_FOR_mulv2si3, "__builtin_mulv2si3", SH_BLTIN_V2SI3, },
9707 { CODE_FOR_mulv4hi3, "__builtin_mulv4hi3", SH_BLTIN_V4HI3 },
9708 { CODE_FOR_mmulfx_l, "__builtin_sh_media_MMULFX_L", SH_BLTIN_V2SI3 },
9709 { CODE_FOR_mmulfx_w, "__builtin_sh_media_MMULFX_W", SH_BLTIN_V4HI3 },
9710 { CODE_FOR_mmulfxrp_w,"__builtin_sh_media_MMULFXRP_W", SH_BLTIN_V4HI3 },
9711 { CODE_FOR_mmulhi_wl, "__builtin_sh_media_MMULHI_WL", SH_BLTIN_V4HI2V2SI },
9712 { CODE_FOR_mmullo_wl, "__builtin_sh_media_MMULLO_WL", SH_BLTIN_V4HI2V2SI },
9713 { CODE_FOR_mmulsum_wq,"__builtin_sh_media_MMULSUM_WQ", SH_BLTIN_XXUU },
9714 { CODE_FOR_mperm_w, "__builtin_sh_media_MPERM_W", SH_BLTIN_SH_HI },
9715 { CODE_FOR_msad_ubq, "__builtin_sh_media_MSAD_UBQ", SH_BLTIN_XXUU },
9716 { CODE_FOR_mshalds_l, "__builtin_sh_media_MSHALDS_L", SH_BLTIN_SH_SI },
9717 { CODE_FOR_mshalds_w, "__builtin_sh_media_MSHALDS_W", SH_BLTIN_SH_HI },
9718 { CODE_FOR_ashrv2si3, "__builtin_ashrv2si3", SH_BLTIN_SH_SI },
9719 { CODE_FOR_ashrv4hi3, "__builtin_ashrv4hi3", SH_BLTIN_SH_HI },
9720 { CODE_FOR_mshards_q, "__builtin_sh_media_MSHARDS_Q", SH_BLTIN_SUS },
9721 { CODE_FOR_mshfhi_b, "__builtin_sh_media_MSHFHI_B", SH_BLTIN_V8QI3 },
9722 { CODE_FOR_mshfhi_l, "__builtin_sh_media_MSHFHI_L", SH_BLTIN_V2SI3 },
9723 { CODE_FOR_mshfhi_w, "__builtin_sh_media_MSHFHI_W", SH_BLTIN_V4HI3 },
9724 { CODE_FOR_mshflo_b, "__builtin_sh_media_MSHFLO_B", SH_BLTIN_V8QI3 },
9725 { CODE_FOR_mshflo_l, "__builtin_sh_media_MSHFLO_L", SH_BLTIN_V2SI3 },
9726 { CODE_FOR_mshflo_w, "__builtin_sh_media_MSHFLO_W", SH_BLTIN_V4HI3 },
9727 { CODE_FOR_ashlv2si3, "__builtin_ashlv2si3", SH_BLTIN_SH_SI },
9728 { CODE_FOR_ashlv4hi3, "__builtin_ashlv4hi3", SH_BLTIN_SH_HI },
9729 { CODE_FOR_lshrv2si3, "__builtin_lshrv2si3", SH_BLTIN_SH_SI },
9730 { CODE_FOR_lshrv4hi3, "__builtin_lshrv4hi3", SH_BLTIN_SH_HI },
9731 { CODE_FOR_subv2si3, "__builtin_subv2si3", SH_BLTIN_V2SI3 },
9732 { CODE_FOR_subv4hi3, "__builtin_subv4hi3", SH_BLTIN_V4HI3 },
9733 { CODE_FOR_sssubv2si3,"__builtin_sssubv2si3", SH_BLTIN_V2SI3 },
9734 { CODE_FOR_ussubv8qi3,"__builtin_ussubv8qi3", SH_BLTIN_V8QI3 },
9735 { CODE_FOR_sssubv4hi3,"__builtin_sssubv4hi3", SH_BLTIN_V4HI3 },
9736 { CODE_FOR_fcosa_s, "__builtin_sh_media_FCOSA_S", SH_BLTIN_SISF },
9737 { CODE_FOR_fsina_s, "__builtin_sh_media_FSINA_S", SH_BLTIN_SISF },
9738 { CODE_FOR_fipr, "__builtin_sh_media_FIPR_S", SH_BLTIN_3 },
9739 { CODE_FOR_ftrv, "__builtin_sh_media_FTRV_S", SH_BLTIN_3 },
9740 { CODE_FOR_mac_media, "__builtin_sh_media_FMAC_S", SH_BLTIN_3 },
9741 { CODE_FOR_sqrtdf2, "__builtin_sh_media_FSQRT_D", SH_BLTIN_2 },
9742 { CODE_FOR_sqrtsf2, "__builtin_sh_media_FSQRT_S", SH_BLTIN_2 },
9743 { CODE_FOR_fsrra_s, "__builtin_sh_media_FSRRA_S", SH_BLTIN_2 },
9744 { CODE_FOR_ldhi_l, "__builtin_sh_media_LDHI_L", SH_BLTIN_LDUA_L },
9745 { CODE_FOR_ldhi_q, "__builtin_sh_media_LDHI_Q", SH_BLTIN_LDUA_Q },
9746 { CODE_FOR_ldlo_l, "__builtin_sh_media_LDLO_L", SH_BLTIN_LDUA_L },
9747 { CODE_FOR_ldlo_q, "__builtin_sh_media_LDLO_Q", SH_BLTIN_LDUA_Q },
9748 { CODE_FOR_sthi_l, "__builtin_sh_media_STHI_L", SH_BLTIN_STUA_L },
9749 { CODE_FOR_sthi_q, "__builtin_sh_media_STHI_Q", SH_BLTIN_STUA_Q },
9750 { CODE_FOR_stlo_l, "__builtin_sh_media_STLO_L", SH_BLTIN_STUA_L },
9751 { CODE_FOR_stlo_q, "__builtin_sh_media_STLO_Q", SH_BLTIN_STUA_Q },
9752 { CODE_FOR_ldhi_l64, "__builtin_sh_media_LDHI_L", SH_BLTIN_LDUA_L64 },
9753 { CODE_FOR_ldhi_q64, "__builtin_sh_media_LDHI_Q", SH_BLTIN_LDUA_Q64 },
9754 { CODE_FOR_ldlo_l64, "__builtin_sh_media_LDLO_L", SH_BLTIN_LDUA_L64 },
9755 { CODE_FOR_ldlo_q64, "__builtin_sh_media_LDLO_Q", SH_BLTIN_LDUA_Q64 },
9756 { CODE_FOR_sthi_l64, "__builtin_sh_media_STHI_L", SH_BLTIN_STUA_L64 },
9757 { CODE_FOR_sthi_q64, "__builtin_sh_media_STHI_Q", SH_BLTIN_STUA_Q64 },
9758 { CODE_FOR_stlo_l64, "__builtin_sh_media_STLO_L", SH_BLTIN_STUA_L64 },
9759 { CODE_FOR_stlo_q64, "__builtin_sh_media_STLO_Q", SH_BLTIN_STUA_Q64 },
9760 { CODE_FOR_nsb, "__builtin_sh_media_NSB", SH_BLTIN_SU },
9761 { CODE_FOR_byterev, "__builtin_sh_media_BYTEREV", SH_BLTIN_2 },
9762 { CODE_FOR_prefetch, "__builtin_sh_media_PREFO", SH_BLTIN_PSSV },
9765 static void
9766 sh_media_init_builtins (void)
9768 tree shared[SH_BLTIN_NUM_SHARED_SIGNATURES];
9769 const struct builtin_description *d;
9771 memset (shared, 0, sizeof shared);
9772 for (d = bdesc; d - bdesc < (int) ARRAY_SIZE (bdesc); d++)
9774 tree type, arg_type = 0;
9775 int signature = d->signature;
9776 int i;
9778 if (signature < SH_BLTIN_NUM_SHARED_SIGNATURES && shared[signature])
9779 type = shared[signature];
9780 else
9782 int has_result = signature_args[signature][0] != 0;
9784 if ((signature_args[signature][1] & 8)
9785 && (((signature_args[signature][1] & 1) && TARGET_SHMEDIA32)
9786 || ((signature_args[signature][1] & 2) && TARGET_SHMEDIA64)))
9787 continue;
9788 if (! TARGET_FPU_ANY
9789 && FLOAT_MODE_P (insn_data[d->icode].operand[0].mode))
9790 continue;
9791 type = void_list_node;
9792 for (i = 3; ; i--)
9794 int arg = signature_args[signature][i];
9795 int opno = i - 1 + has_result;
9797 if (arg & 8)
9798 arg_type = ptr_type_node;
9799 else if (arg)
9800 arg_type = (*lang_hooks.types.type_for_mode)
9801 (insn_data[d->icode].operand[opno].mode,
9802 (arg & 1));
9803 else if (i)
9804 continue;
9805 else
9806 arg_type = void_type_node;
9807 if (i == 0)
9808 break;
9809 type = tree_cons (NULL_TREE, arg_type, type);
9811 type = build_function_type (arg_type, type);
9812 if (signature < SH_BLTIN_NUM_SHARED_SIGNATURES)
9813 shared[signature] = type;
9815 lang_hooks.builtin_function (d->name, type, d - bdesc, BUILT_IN_MD,
9816 NULL, NULL_TREE);
9820 /* Implements target hook vector_mode_supported_p. */
9821 bool
9822 sh_vector_mode_supported_p (enum machine_mode mode)
9824 if (TARGET_FPU_ANY
9825 && ((mode == V2SFmode)
9826 || (mode == V4SFmode)
9827 || (mode == V16SFmode)))
9828 return true;
9830 else if (TARGET_SHMEDIA
9831 && ((mode == V8QImode)
9832 || (mode == V2HImode)
9833 || (mode == V4HImode)
9834 || (mode == V2SImode)))
9835 return true;
9837 return false;
9840 /* Implements target hook dwarf_calling_convention. Return an enum
9841 of dwarf_calling_convention. */
9843 sh_dwarf_calling_convention (tree func)
9845 if (sh_attr_renesas_p (func))
9846 return DW_CC_GNU_renesas_sh;
9848 return DW_CC_normal;
9851 static void
9852 sh_init_builtins (void)
9854 if (TARGET_SHMEDIA)
9855 sh_media_init_builtins ();
9858 /* Expand an expression EXP that calls a built-in function,
9859 with result going to TARGET if that's convenient
9860 (and in mode MODE if that's convenient).
9861 SUBTARGET may be used as the target for computing one of EXP's operands.
9862 IGNORE is nonzero if the value is to be ignored. */
9864 static rtx
9865 sh_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
9866 enum machine_mode mode ATTRIBUTE_UNUSED, int ignore)
9868 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
9869 tree arglist = TREE_OPERAND (exp, 1);
9870 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
9871 const struct builtin_description *d = &bdesc[fcode];
9872 enum insn_code icode = d->icode;
9873 int signature = d->signature;
9874 enum machine_mode tmode = VOIDmode;
9875 int nop = 0, i;
9876 rtx op[4];
9877 rtx pat = 0;
9879 if (signature_args[signature][0])
9881 if (ignore)
9882 return 0;
9884 tmode = insn_data[icode].operand[0].mode;
9885 if (! target
9886 || GET_MODE (target) != tmode
9887 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
9888 target = gen_reg_rtx (tmode);
9889 op[nop++] = target;
9891 else
9892 target = 0;
9894 for (i = 1; i <= 3; i++, nop++)
9896 tree arg;
9897 enum machine_mode opmode, argmode;
9898 tree optype;
9900 if (! signature_args[signature][i])
9901 break;
9902 arg = TREE_VALUE (arglist);
9903 if (arg == error_mark_node)
9904 return const0_rtx;
9905 arglist = TREE_CHAIN (arglist);
9906 if (signature_args[signature][i] & 8)
9908 opmode = ptr_mode;
9909 optype = ptr_type_node;
9911 else
9913 opmode = insn_data[icode].operand[nop].mode;
9914 optype = (*lang_hooks.types.type_for_mode) (opmode, 0);
9916 argmode = TYPE_MODE (TREE_TYPE (arg));
9917 if (argmode != opmode)
9918 arg = build1 (NOP_EXPR, optype, arg);
9919 op[nop] = expand_expr (arg, NULL_RTX, opmode, 0);
9920 if (! (*insn_data[icode].operand[nop].predicate) (op[nop], opmode))
9921 op[nop] = copy_to_mode_reg (opmode, op[nop]);
9924 switch (nop)
9926 case 1:
9927 pat = (*insn_data[d->icode].genfun) (op[0]);
9928 break;
9929 case 2:
9930 pat = (*insn_data[d->icode].genfun) (op[0], op[1]);
9931 break;
9932 case 3:
9933 pat = (*insn_data[d->icode].genfun) (op[0], op[1], op[2]);
9934 break;
9935 case 4:
9936 pat = (*insn_data[d->icode].genfun) (op[0], op[1], op[2], op[3]);
9937 break;
9938 default:
9939 gcc_unreachable ();
9941 if (! pat)
9942 return 0;
9943 emit_insn (pat);
9944 return target;
9947 void
9948 sh_expand_unop_v2sf (enum rtx_code code, rtx op0, rtx op1)
9950 rtx sel0 = const0_rtx;
9951 rtx sel1 = const1_rtx;
9952 rtx (*fn) (rtx, rtx, rtx, rtx, rtx) = gen_unary_sf_op;
9953 rtx op = gen_rtx_fmt_e (code, SFmode, op1);
9955 emit_insn ((*fn) (op0, op1, op, sel0, sel0));
9956 emit_insn ((*fn) (op0, op1, op, sel1, sel1));
9959 void
9960 sh_expand_binop_v2sf (enum rtx_code code, rtx op0, rtx op1, rtx op2)
9962 rtx sel0 = const0_rtx;
9963 rtx sel1 = const1_rtx;
9964 rtx (*fn) (rtx, rtx, rtx, rtx, rtx, rtx, rtx, rtx)
9965 = gen_binary_sf_op;
9966 rtx op = gen_rtx_fmt_ee (code, SFmode, op1, op2);
9968 emit_insn ((*fn) (op0, op1, op2, op, sel0, sel0, sel0, sel1));
9969 emit_insn ((*fn) (op0, op1, op2, op, sel1, sel1, sel1, sel0));
9972 /* Return the class of registers for which a mode change from FROM to TO
9973 is invalid. */
9974 bool
9975 sh_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
9976 enum reg_class class)
9978 /* We want to enable the use of SUBREGs as a means to
9979 VEC_SELECT a single element of a vector. */
9980 if (to == SFmode && VECTOR_MODE_P (from) && GET_MODE_INNER (from) == SFmode)
9981 return (reg_classes_intersect_p (GENERAL_REGS, class));
9983 if (GET_MODE_SIZE (from) != GET_MODE_SIZE (to))
9985 if (TARGET_LITTLE_ENDIAN)
9987 if (GET_MODE_SIZE (to) < 8 || GET_MODE_SIZE (from) < 8)
9988 return reg_classes_intersect_p (DF_REGS, class);
9990 else
9992 if (GET_MODE_SIZE (from) < 8)
9993 return reg_classes_intersect_p (DF_HI_REGS, class);
9996 return 0;
10000 /* If ADDRESS refers to a CODE_LABEL, add NUSES to the number of times
10001 that label is used. */
10003 void
10004 sh_mark_label (rtx address, int nuses)
10006 if (GOTOFF_P (address))
10008 /* Extract the label or symbol. */
10009 address = XEXP (address, 0);
10010 if (GET_CODE (address) == PLUS)
10011 address = XEXP (address, 0);
10012 address = XVECEXP (address, 0, 0);
10014 if (GET_CODE (address) == LABEL_REF
10015 && GET_CODE (XEXP (address, 0)) == CODE_LABEL)
10016 LABEL_NUSES (XEXP (address, 0)) += nuses;
10019 /* Compute extra cost of moving data between one register class
10020 and another. */
10022 /* If SECONDARY*_RELOAD_CLASS says something about the src/dst pair, regclass
10023 uses this information. Hence, the general register <-> floating point
10024 register information here is not used for SFmode. */
10027 sh_register_move_cost (enum machine_mode mode,
10028 enum reg_class srcclass, enum reg_class dstclass)
10030 if (dstclass == T_REGS || dstclass == PR_REGS)
10031 return 10;
10033 if (dstclass == MAC_REGS && srcclass == MAC_REGS)
10034 return 4;
10036 if (mode == SImode && ! TARGET_SHMEDIA && TARGET_FMOVD
10037 && REGCLASS_HAS_FP_REG (srcclass)
10038 && REGCLASS_HAS_FP_REG (dstclass))
10039 return 4;
10041 if ((REGCLASS_HAS_FP_REG (dstclass) && srcclass == MAC_REGS)
10042 || (dstclass == MAC_REGS && REGCLASS_HAS_FP_REG (srcclass)))
10043 return 9;
10045 if ((REGCLASS_HAS_FP_REG (dstclass)
10046 && REGCLASS_HAS_GENERAL_REG (srcclass))
10047 || (REGCLASS_HAS_GENERAL_REG (dstclass)
10048 && REGCLASS_HAS_FP_REG (srcclass)))
10049 return ((TARGET_SHMEDIA ? 4 : TARGET_FMOVD ? 8 : 12)
10050 * ((GET_MODE_SIZE (mode) + 7) / 8U));
10052 if ((dstclass == FPUL_REGS
10053 && REGCLASS_HAS_GENERAL_REG (srcclass))
10054 || (srcclass == FPUL_REGS
10055 && REGCLASS_HAS_GENERAL_REG (dstclass)))
10056 return 5;
10058 if ((dstclass == FPUL_REGS
10059 && (srcclass == PR_REGS || srcclass == MAC_REGS || srcclass == T_REGS))
10060 || (srcclass == FPUL_REGS
10061 && (dstclass == PR_REGS || dstclass == MAC_REGS)))
10062 return 7;
10064 if ((srcclass == TARGET_REGS && ! REGCLASS_HAS_GENERAL_REG (dstclass))
10065 || ((dstclass) == TARGET_REGS && ! REGCLASS_HAS_GENERAL_REG (srcclass)))
10066 return 20;
10068 /* ??? ptabs faults on (value & 0x3) == 0x3 */
10069 if (TARGET_SHMEDIA
10070 && ((srcclass) == TARGET_REGS || (srcclass) == SIBCALL_REGS))
10072 if (*sh_gettrcost_str)
10073 return atoi (sh_gettrcost_str);
10074 else if (!TARGET_PT_FIXED)
10075 return 100;
10078 if ((srcclass == FPSCR_REGS && ! REGCLASS_HAS_GENERAL_REG (dstclass))
10079 || (dstclass == FPSCR_REGS && ! REGCLASS_HAS_GENERAL_REG (srcclass)))
10080 return 4;
10082 if (TARGET_SHMEDIA
10083 || (TARGET_FMOVD
10084 && ! REGCLASS_HAS_GENERAL_REG (srcclass)
10085 && ! REGCLASS_HAS_GENERAL_REG (dstclass)))
10086 return 2 * ((GET_MODE_SIZE (mode) + 7) / 8U);
10088 return 2 * ((GET_MODE_SIZE (mode) + 3) / 4U);
10091 /* Like register_operand, but take into account that SHMEDIA can use
10092 the constant zero like a general register. */
10094 sh_register_operand (rtx op, enum machine_mode mode)
10096 if (op == CONST0_RTX (mode) && TARGET_SHMEDIA)
10097 return 1;
10098 return register_operand (op, mode);
10102 cmpsi_operand (rtx op, enum machine_mode mode)
10104 if (GET_CODE (op) == REG && REGNO (op) == T_REG
10105 && GET_MODE (op) == SImode
10106 && TARGET_SH1)
10107 return 1;
10108 return arith_operand (op, mode);
10112 shift_count_reg_operand (rtx op, enum machine_mode mode)
10114 if ((GET_CODE (op) == ZERO_EXTEND || GET_CODE (op) == SIGN_EXTEND
10115 || (GET_CODE (op) == SUBREG && SUBREG_BYTE (op) == 0))
10116 && (mode == VOIDmode || mode == GET_MODE (op))
10117 && GET_MODE_BITSIZE (GET_MODE (XEXP (op, 0))) >= 6
10118 && GET_MODE_CLASS (GET_MODE (XEXP (op, 0))) == MODE_INT)
10120 mode = VOIDmode;
10122 op = XEXP (op, 0);
10123 while ((GET_CODE (op) == ZERO_EXTEND || GET_CODE (op) == SIGN_EXTEND
10124 || GET_CODE (op) == TRUNCATE)
10125 && GET_MODE_BITSIZE (GET_MODE (XEXP (op, 0))) >= 6
10126 && GET_MODE_CLASS (GET_MODE (XEXP (op, 0))) == MODE_INT);
10129 return arith_reg_operand (op, mode);
10133 shift_count_operand (rtx op, enum machine_mode mode)
10135 return (CONSTANT_P (op)
10136 ? (GET_CODE (op) == CONST_INT
10137 ? (unsigned) INTVAL (op) < GET_MODE_BITSIZE (mode)
10138 : nonmemory_operand (op, mode))
10139 : shift_count_reg_operand (op, mode));
10142 static rtx emit_load_ptr (rtx, rtx);
10144 static rtx
10145 emit_load_ptr (rtx reg, rtx addr)
10147 rtx mem = gen_rtx_MEM (ptr_mode, addr);
10149 if (Pmode != ptr_mode)
10150 mem = gen_rtx_SIGN_EXTEND (Pmode, mem);
10151 return emit_move_insn (reg, mem);
10154 static void
10155 sh_output_mi_thunk (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED,
10156 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
10157 tree function)
10159 CUMULATIVE_ARGS cum;
10160 int structure_value_byref = 0;
10161 rtx this, this_value, sibcall, insns, funexp;
10162 tree funtype = TREE_TYPE (function);
10163 int simple_add = CONST_OK_FOR_ADD (delta);
10164 int did_load = 0;
10165 rtx scratch0, scratch1, scratch2;
10166 unsigned i;
10168 reload_completed = 1;
10169 epilogue_completed = 1;
10170 no_new_pseudos = 1;
10171 current_function_uses_only_leaf_regs = 1;
10172 reset_block_changes ();
10174 emit_note (NOTE_INSN_PROLOGUE_END);
10176 /* Find the "this" pointer. We have such a wide range of ABIs for the
10177 SH that it's best to do this completely machine independently.
10178 "this" is passed as first argument, unless a structure return pointer
10179 comes first, in which case "this" comes second. */
10180 INIT_CUMULATIVE_ARGS (cum, funtype, NULL_RTX, 0, 1);
10181 #ifndef PCC_STATIC_STRUCT_RETURN
10182 if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
10183 structure_value_byref = 1;
10184 #endif /* not PCC_STATIC_STRUCT_RETURN */
10185 if (structure_value_byref && sh_struct_value_rtx (function, 0) == 0)
10187 tree ptype = build_pointer_type (TREE_TYPE (funtype));
10189 FUNCTION_ARG_ADVANCE (cum, Pmode, ptype, 1);
10191 this = FUNCTION_ARG (cum, Pmode, ptr_type_node, 1);
10193 /* For SHcompact, we only have r0 for a scratch register: r1 is the
10194 static chain pointer (even if you can't have nested virtual functions
10195 right now, someone might implement them sometime), and the rest of the
10196 registers are used for argument passing, are callee-saved, or reserved. */
10197 /* We need to check call_used_regs / fixed_regs in case -fcall_saved-reg /
10198 -ffixed-reg has been used. */
10199 if (! call_used_regs[0] || fixed_regs[0])
10200 error ("r0 needs to be available as a call-clobbered register");
10201 scratch0 = scratch1 = scratch2 = gen_rtx_REG (Pmode, 0);
10202 if (! TARGET_SH5)
10204 if (call_used_regs[1] && ! fixed_regs[1])
10205 scratch1 = gen_rtx_REG (ptr_mode, 1);
10206 /* N.B., if not TARGET_HITACHI, register 2 is used to pass the pointer
10207 pointing where to return struct values. */
10208 if (call_used_regs[3] && ! fixed_regs[3])
10209 scratch2 = gen_rtx_REG (Pmode, 3);
10211 else if (TARGET_SHMEDIA)
10213 for (i = FIRST_GENERAL_REG; i <= LAST_GENERAL_REG; i++)
10214 if (i != REGNO (scratch0) &&
10215 call_used_regs[i] && ! fixed_regs[i] && ! FUNCTION_ARG_REGNO_P (i))
10217 scratch1 = gen_rtx_REG (ptr_mode, i);
10218 break;
10220 if (scratch1 == scratch0)
10221 error ("Need a second call-clobbered general purpose register");
10222 for (i = FIRST_TARGET_REG; i <= LAST_TARGET_REG; i++)
10223 if (call_used_regs[i] && ! fixed_regs[i])
10225 scratch2 = gen_rtx_REG (Pmode, i);
10226 break;
10228 if (scratch2 == scratch0)
10229 error ("Need a call-clobbered target register");
10232 this_value = plus_constant (this, delta);
10233 if (vcall_offset
10234 && (simple_add || scratch0 != scratch1)
10235 && strict_memory_address_p (ptr_mode, this_value))
10237 emit_load_ptr (scratch0, this_value);
10238 did_load = 1;
10241 if (!delta)
10242 ; /* Do nothing. */
10243 else if (simple_add)
10244 emit_move_insn (this, this_value);
10245 else
10247 emit_move_insn (scratch1, GEN_INT (delta));
10248 emit_insn (gen_add2_insn (this, scratch1));
10251 if (vcall_offset)
10253 rtx offset_addr;
10255 if (!did_load)
10256 emit_load_ptr (scratch0, this);
10258 offset_addr = plus_constant (scratch0, vcall_offset);
10259 if (strict_memory_address_p (ptr_mode, offset_addr))
10260 ; /* Do nothing. */
10261 else if (! TARGET_SH5 && scratch0 != scratch1)
10263 /* scratch0 != scratch1, and we have indexed loads. Get better
10264 schedule by loading the offset into r1 and using an indexed
10265 load - then the load of r1 can issue before the load from
10266 (this + delta) finishes. */
10267 emit_move_insn (scratch1, GEN_INT (vcall_offset));
10268 offset_addr = gen_rtx_PLUS (Pmode, scratch0, scratch1);
10270 else if (CONST_OK_FOR_ADD (vcall_offset))
10272 emit_insn (gen_add2_insn (scratch0, GEN_INT (vcall_offset)));
10273 offset_addr = scratch0;
10275 else if (scratch0 != scratch1)
10277 emit_move_insn (scratch1, GEN_INT (vcall_offset));
10278 emit_insn (gen_add2_insn (scratch0, scratch1));
10279 offset_addr = scratch0;
10281 else
10282 gcc_unreachable (); /* FIXME */
10283 emit_load_ptr (scratch0, offset_addr);
10285 if (Pmode != ptr_mode)
10286 scratch0 = gen_rtx_TRUNCATE (ptr_mode, scratch0);
10287 emit_insn (gen_add2_insn (this, scratch0));
10290 /* Generate a tail call to the target function. */
10291 if (! TREE_USED (function))
10293 assemble_external (function);
10294 TREE_USED (function) = 1;
10296 funexp = XEXP (DECL_RTL (function), 0);
10297 /* If the function is overridden, so is the thunk, hence we don't
10298 need GOT addressing even if this is a public symbol. */
10299 #if 0
10300 if (TARGET_SH1 && ! flag_weak)
10301 sibcall = gen_sibcalli_thunk (funexp, const0_rtx);
10302 else
10303 #endif
10304 if (TARGET_SH2 && flag_pic)
10306 sibcall = gen_sibcall_pcrel (funexp, const0_rtx);
10307 XEXP (XVECEXP (sibcall, 0, 2), 0) = scratch2;
10309 else
10311 if (TARGET_SHMEDIA && flag_pic)
10313 funexp = gen_sym2PIC (funexp);
10314 PUT_MODE (funexp, Pmode);
10316 emit_move_insn (scratch2, funexp);
10317 funexp = gen_rtx_MEM (FUNCTION_MODE, scratch2);
10318 sibcall = gen_sibcall (funexp, const0_rtx, NULL_RTX);
10320 sibcall = emit_call_insn (sibcall);
10321 SIBLING_CALL_P (sibcall) = 1;
10322 use_reg (&CALL_INSN_FUNCTION_USAGE (sibcall), this);
10323 emit_barrier ();
10325 /* Run just enough of rest_of_compilation to do scheduling and get
10326 the insns emitted. Note that use_thunk calls
10327 assemble_start_function and assemble_end_function. */
10329 insn_locators_initialize ();
10330 insns = get_insns ();
10332 if (optimize > 0 && flag_schedule_insns_after_reload)
10334 /* Initialize the bitmap obstacks. */
10335 bitmap_obstack_initialize (NULL);
10336 bitmap_obstack_initialize (&reg_obstack);
10337 if (! cfun->cfg)
10338 init_flow ();
10339 rtl_register_cfg_hooks ();
10340 find_basic_blocks (insns);
10341 life_analysis (dump_file, PROP_FINAL);
10343 split_all_insns (1);
10345 schedule_insns (dump_file);
10348 sh_reorg ();
10350 if (optimize > 0 && flag_delayed_branch)
10352 if (! cfun->cfg)
10354 init_flow ();
10355 find_basic_blocks (insns);
10357 dbr_schedule (insns, dump_file);
10359 shorten_branches (insns);
10360 final_start_function (insns, file, 1);
10361 final (insns, file, 1);
10362 final_end_function ();
10364 if (optimize > 0 && flag_schedule_insns_after_reload)
10366 /* Release all memory allocated by flow. */
10367 free_basic_block_vars ();
10369 /* Release the bitmap obstacks. */
10370 bitmap_obstack_release (&reg_obstack);
10371 bitmap_obstack_release (NULL);
10374 reload_completed = 0;
10375 epilogue_completed = 0;
10376 no_new_pseudos = 0;
10380 function_symbol (rtx target, const char *name, enum sh_function_kind kind)
10382 rtx sym;
10384 /* If this is not an ordinary function, the name usually comes from a
10385 string literal or an sprintf buffer. Make sure we use the same
10386 string consistently, so that cse will be able to unify address loads. */
10387 if (kind != FUNCTION_ORDINARY)
10388 name = IDENTIFIER_POINTER (get_identifier (name));
10389 sym = gen_rtx_SYMBOL_REF (Pmode, name);
10390 SYMBOL_REF_FLAGS (sym) = SYMBOL_FLAG_FUNCTION;
10391 if (flag_pic)
10392 switch (kind)
10394 case FUNCTION_ORDINARY:
10395 break;
10396 case SFUNC_GOT:
10398 rtx reg = target ? target : gen_reg_rtx (Pmode);
10400 emit_insn (gen_symGOT2reg (reg, sym));
10401 sym = reg;
10402 break;
10404 case SFUNC_STATIC:
10406 /* ??? To allow cse to work, we use GOTOFF relocations.
10407 we could add combiner patterns to transform this into
10408 straight pc-relative calls with sym2PIC / bsrf when
10409 label load and function call are still 1:1 and in the
10410 same basic block during combine. */
10411 rtx reg = target ? target : gen_reg_rtx (Pmode);
10413 emit_insn (gen_symGOTOFF2reg (reg, sym));
10414 sym = reg;
10415 break;
10418 if (target && sym != target)
10420 emit_move_insn (target, sym);
10421 return target;
10423 return sym;
10426 /* Find the number of a general purpose register in S. */
10427 static int
10428 scavenge_reg (HARD_REG_SET *s)
10430 int r;
10431 for (r = FIRST_GENERAL_REG; r <= LAST_GENERAL_REG; r++)
10432 if (TEST_HARD_REG_BIT (*s, r))
10433 return r;
10434 return -1;
10438 sh_get_pr_initial_val (void)
10440 rtx val;
10442 /* ??? Unfortunately, get_hard_reg_initial_val doesn't always work for the
10443 PR register on SHcompact, because it might be clobbered by the prologue.
10444 We check first if that is known to be the case. */
10445 if (TARGET_SHCOMPACT
10446 && ((current_function_args_info.call_cookie
10447 & ~ CALL_COOKIE_RET_TRAMP (1))
10448 || current_function_has_nonlocal_label))
10449 return gen_rtx_MEM (SImode, return_address_pointer_rtx);
10451 /* If we haven't finished rtl generation, there might be a nonlocal label
10452 that we haven't seen yet.
10453 ??? get_hard_reg_initial_val fails if it is called while no_new_pseudos
10454 is set, unless it has been called before for the same register. And even
10455 then, we end in trouble if we didn't use the register in the same
10456 basic block before. So call get_hard_reg_initial_val now and wrap it
10457 in an unspec if we might need to replace it. */
10458 /* ??? We also must do this for TARGET_SH1 in general, because otherwise
10459 combine can put the pseudo returned by get_hard_reg_initial_val into
10460 instructions that need a general purpose registers, which will fail to
10461 be recognized when the pseudo becomes allocated to PR. */
10463 = get_hard_reg_initial_val (Pmode, TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG);
10464 if (TARGET_SH1)
10465 return gen_rtx_UNSPEC (SImode, gen_rtvec (1, val), UNSPEC_RA);
10466 return val;
10470 sh_expand_t_scc (enum rtx_code code, rtx target)
10472 rtx result = target;
10473 HOST_WIDE_INT val;
10475 if (GET_CODE (sh_compare_op0) != REG || REGNO (sh_compare_op0) != T_REG
10476 || GET_CODE (sh_compare_op1) != CONST_INT)
10477 return 0;
10478 if (GET_CODE (result) != REG)
10479 result = gen_reg_rtx (SImode);
10480 val = INTVAL (sh_compare_op1);
10481 if ((code == EQ && val == 1) || (code == NE && val == 0))
10482 emit_insn (gen_movt (result));
10483 else if ((code == EQ && val == 0) || (code == NE && val == 1))
10485 emit_insn (gen_rtx_CLOBBER (VOIDmode, result));
10486 emit_insn (gen_subc (result, result, result));
10487 emit_insn (gen_addsi3 (result, result, const1_rtx));
10489 else if (code == EQ || code == NE)
10490 emit_insn (gen_move_insn (result, GEN_INT (code == NE)));
10491 else
10492 return 0;
10493 if (result != target)
10494 emit_move_insn (target, result);
10495 return 1;
10498 /* INSN is an sfunc; return the rtx that describes the address used. */
10499 static rtx
10500 extract_sfunc_addr (rtx insn)
10502 rtx pattern, part = NULL_RTX;
10503 int len, i;
10505 pattern = PATTERN (insn);
10506 len = XVECLEN (pattern, 0);
10507 for (i = 0; i < len; i++)
10509 part = XVECEXP (pattern, 0, i);
10510 if (GET_CODE (part) == USE && GET_MODE (XEXP (part, 0)) == Pmode
10511 && GENERAL_REGISTER_P (true_regnum (XEXP (part, 0))))
10512 return XEXP (part, 0);
10514 gcc_assert (GET_CODE (XVECEXP (pattern, 0, 0)) == UNSPEC_VOLATILE);
10515 return XVECEXP (XVECEXP (pattern, 0, 0), 0, 1);
10518 /* Verify that the register in use_sfunc_addr still agrees with the address
10519 used in the sfunc. This prevents fill_slots_from_thread from changing
10520 use_sfunc_addr.
10521 INSN is the use_sfunc_addr instruction, and REG is the register it
10522 guards. */
10524 check_use_sfunc_addr (rtx insn, rtx reg)
10526 /* Search for the sfunc. It should really come right after INSN. */
10527 while ((insn = NEXT_INSN (insn)))
10529 if (GET_CODE (insn) == CODE_LABEL || GET_CODE (insn) == JUMP_INSN)
10530 break;
10531 if (! INSN_P (insn))
10532 continue;
10534 if (GET_CODE (PATTERN (insn)) == SEQUENCE)
10535 insn = XVECEXP (PATTERN (insn), 0, 0);
10536 if (GET_CODE (PATTERN (insn)) != PARALLEL
10537 || get_attr_type (insn) != TYPE_SFUNC)
10538 continue;
10539 return rtx_equal_p (extract_sfunc_addr (insn), reg);
10541 gcc_unreachable ();
10544 /* Returns 1 if OP is a MEM that can be source of a simple move operation. */
10547 unaligned_load_operand (rtx op, enum machine_mode mode)
10549 rtx inside;
10551 if (GET_CODE (op) != MEM || GET_MODE (op) != mode)
10552 return 0;
10554 inside = XEXP (op, 0);
10556 if (GET_CODE (inside) == POST_INC)
10557 inside = XEXP (inside, 0);
10559 if (GET_CODE (inside) == REG)
10560 return 1;
10562 return 0;
10565 /* This function returns a constant rtx that represents pi / 2**15 in
10566 SFmode. it's used to scale SFmode angles, in radians, to a
10567 fixed-point signed 16.16-bit fraction of a full circle, i.e., 2*pi
10568 maps to 0x10000). */
10570 static GTY(()) rtx sh_fsca_sf2int_rtx;
10573 sh_fsca_sf2int (void)
10575 if (! sh_fsca_sf2int_rtx)
10577 REAL_VALUE_TYPE rv;
10579 real_from_string (&rv, "10430.378350470453");
10580 sh_fsca_sf2int_rtx = const_double_from_real_value (rv, SFmode);
10583 return sh_fsca_sf2int_rtx;
10586 /* This function returns a constant rtx that represents pi / 2**15 in
10587 DFmode. it's used to scale DFmode angles, in radians, to a
10588 fixed-point signed 16.16-bit fraction of a full circle, i.e., 2*pi
10589 maps to 0x10000). */
10591 static GTY(()) rtx sh_fsca_df2int_rtx;
10594 sh_fsca_df2int (void)
10596 if (! sh_fsca_df2int_rtx)
10598 REAL_VALUE_TYPE rv;
10600 real_from_string (&rv, "10430.378350470453");
10601 sh_fsca_df2int_rtx = const_double_from_real_value (rv, DFmode);
10604 return sh_fsca_df2int_rtx;
10607 /* This function returns a constant rtx that represents 2**15 / pi in
10608 SFmode. it's used to scale a fixed-point signed 16.16-bit fraction
10609 of a full circle back to a SFmode value, i.e., 0x10000 maps to
10610 2*pi). */
10612 static GTY(()) rtx sh_fsca_int2sf_rtx;
10615 sh_fsca_int2sf (void)
10617 if (! sh_fsca_int2sf_rtx)
10619 REAL_VALUE_TYPE rv;
10621 real_from_string (&rv, "9.587379924285257e-5");
10622 sh_fsca_int2sf_rtx = const_double_from_real_value (rv, SFmode);
10625 return sh_fsca_int2sf_rtx;
10628 /* Initialize the CUMULATIVE_ARGS structure. */
10630 void
10631 sh_init_cumulative_args (CUMULATIVE_ARGS * pcum,
10632 tree fntype,
10633 rtx libname ATTRIBUTE_UNUSED,
10634 tree fndecl,
10635 signed int n_named_args,
10636 enum machine_mode mode)
10638 pcum->arg_count [(int) SH_ARG_FLOAT] = 0;
10639 pcum->free_single_fp_reg = 0;
10640 pcum->stack_regs = 0;
10641 pcum->byref_regs = 0;
10642 pcum->byref = 0;
10643 pcum->outgoing = (n_named_args == -1) ? 0 : 1;
10645 /* XXX - Should we check TARGET_HITACHI here ??? */
10646 pcum->renesas_abi = sh_attr_renesas_p (fntype) ? 1 : 0;
10648 if (fntype)
10650 pcum->force_mem = ((TARGET_HITACHI || pcum->renesas_abi)
10651 && aggregate_value_p (TREE_TYPE (fntype), fndecl));
10652 pcum->prototype_p = TYPE_ARG_TYPES (fntype) ? TRUE : FALSE;
10653 pcum->arg_count [(int) SH_ARG_INT]
10654 = TARGET_SH5 && aggregate_value_p (TREE_TYPE (fntype), fndecl);
10656 pcum->call_cookie
10657 = CALL_COOKIE_RET_TRAMP (TARGET_SHCOMPACT
10658 && pcum->arg_count [(int) SH_ARG_INT] == 0
10659 && (TYPE_MODE (TREE_TYPE (fntype)) == BLKmode
10660 ? int_size_in_bytes (TREE_TYPE (fntype))
10661 : GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (fntype)))) > 4
10662 && (BASE_RETURN_VALUE_REG (TYPE_MODE (TREE_TYPE (fntype)))
10663 == FIRST_RET_REG));
10665 else
10667 pcum->arg_count [(int) SH_ARG_INT] = 0;
10668 pcum->prototype_p = FALSE;
10669 if (mode != VOIDmode)
10671 pcum->call_cookie =
10672 CALL_COOKIE_RET_TRAMP (TARGET_SHCOMPACT
10673 && GET_MODE_SIZE (mode) > 4
10674 && BASE_RETURN_VALUE_REG (mode) == FIRST_RET_REG);
10676 /* If the default ABI is the Renesas ABI then all library
10677 calls must assume that the library will be using the
10678 Renesas ABI. So if the function would return its result
10679 in memory then we must force the address of this memory
10680 block onto the stack. Ideally we would like to call
10681 targetm.calls.return_in_memory() here but we do not have
10682 the TYPE or the FNDECL available so we synthesize the
10683 contents of that function as best we can. */
10684 pcum->force_mem =
10685 (TARGET_DEFAULT & MASK_HITACHI)
10686 && (mode == BLKmode
10687 || (GET_MODE_SIZE (mode) > 4
10688 && !(mode == DFmode
10689 && TARGET_FPU_DOUBLE)));
10691 else
10693 pcum->call_cookie = 0;
10694 pcum->force_mem = FALSE;
10699 /* Determine if two hard register sets intersect.
10700 Return 1 if they do. */
10702 static int
10703 hard_regs_intersect_p (HARD_REG_SET *a, HARD_REG_SET *b)
10705 HARD_REG_SET c;
10706 COPY_HARD_REG_SET (c, *a);
10707 AND_HARD_REG_SET (c, *b);
10708 GO_IF_HARD_REG_SUBSET (c, reg_class_contents[(int) NO_REGS], lose);
10709 return 1;
10710 lose:
10711 return 0;
10714 #ifdef TARGET_ADJUST_UNROLL_MAX
10715 static int
10716 sh_adjust_unroll_max (struct loop * loop, int insn_count,
10717 int max_unrolled_insns, int strength_reduce_p,
10718 int unroll_type)
10720 /* This doesn't work in 4.0 because the old unroller & loop.h is gone. */
10721 if (TARGET_ADJUST_UNROLL && TARGET_SHMEDIA)
10723 /* Throttle back loop unrolling so that the costs of using more
10724 targets than the eight target register we have don't outweigh
10725 the benefits of unrolling. */
10726 rtx insn;
10727 int n_labels = 0, n_calls = 0, n_exit_dest = 0, n_inner_loops = -1;
10728 int n_barriers = 0;
10729 rtx dest;
10730 int i;
10731 rtx exit_dest[8];
10732 int threshold;
10733 int unroll_benefit = 0, mem_latency = 0;
10734 int base_cost, best_cost, cost;
10735 int factor, best_factor;
10736 int n_dest;
10737 unsigned max_iterations = 32767;
10738 int n_iterations;
10739 int need_precond = 0, precond = 0;
10740 basic_block * bbs = get_loop_body (loop);
10741 struct niter_desc *desc;
10743 /* Assume that all labels inside the loop are used from inside the
10744 loop. If the loop has multiple entry points, it is unlikely to
10745 be unrolled anyways.
10746 Also assume that all calls are to different functions. That is
10747 somewhat pessimistic, but if you have lots of calls, unrolling the
10748 loop is not likely to gain you much in the first place. */
10749 i = loop->num_nodes - 1;
10750 for (insn = BB_HEAD (bbs[i]); ; )
10752 if (GET_CODE (insn) == CODE_LABEL)
10753 n_labels++;
10754 else if (GET_CODE (insn) == CALL_INSN)
10755 n_calls++;
10756 else if (GET_CODE (insn) == NOTE
10757 && NOTE_LINE_NUMBER (insn) == NOTE_INSN_LOOP_BEG)
10758 n_inner_loops++;
10759 else if (GET_CODE (insn) == BARRIER)
10760 n_barriers++;
10761 if (insn != BB_END (bbs[i]))
10762 insn = NEXT_INSN (insn);
10763 else if (--i >= 0)
10764 insn = BB_HEAD (bbs[i]);
10765 else
10766 break;
10768 free (bbs);
10769 /* One label for the loop top is normal, and it won't be duplicated by
10770 unrolling. */
10771 if (n_labels <= 1)
10772 return max_unrolled_insns;
10773 if (n_inner_loops > 0)
10774 return 0;
10775 for (dest = loop->exit_labels; dest && n_exit_dest < 8;
10776 dest = LABEL_NEXTREF (dest))
10778 for (i = n_exit_dest - 1;
10779 i >= 0 && XEXP (dest, 0) != XEXP (exit_dest[i], 0); i--);
10780 if (i < 0)
10781 exit_dest[n_exit_dest++] = dest;
10783 /* If the loop top and call and exit destinations are enough to fill up
10784 the target registers, we're unlikely to do any more damage by
10785 unrolling. */
10786 if (n_calls + n_exit_dest >= 7)
10787 return max_unrolled_insns;
10789 /* ??? In the new loop unroller, there is no longer any strength
10790 reduction information available. Thus, when it comes to unrolling,
10791 we know the cost of everything, but we know the value of nothing. */
10792 #if 0
10793 if (strength_reduce_p
10794 && (unroll_type == LPT_UNROLL_RUNTIME
10795 || unroll_type == LPT_UNROLL_CONSTANT
10796 || unroll_type == LPT_PEEL_COMPLETELY))
10798 struct loop_ivs *ivs = LOOP_IVS (loop);
10799 struct iv_class *bl;
10801 /* We'll save one compare-and-branch in each loop body copy
10802 but the last one. */
10803 unroll_benefit = 1;
10804 /* Assess the benefit of removing biv & giv updates. */
10805 for (bl = ivs->list; bl; bl = bl->next)
10807 rtx increment = biv_total_increment (bl);
10808 struct induction *v;
10810 if (increment && GET_CODE (increment) == CONST_INT)
10812 unroll_benefit++;
10813 for (v = bl->giv; v; v = v->next_iv)
10815 if (! v->ignore && v->same == 0
10816 && GET_CODE (v->mult_val) == CONST_INT)
10817 unroll_benefit++;
10818 /* If this giv uses an array, try to determine
10819 a maximum iteration count from the size of the
10820 array. This need not be correct all the time,
10821 but should not be too far off the mark too often. */
10822 while (v->giv_type == DEST_ADDR)
10824 rtx mem = PATTERN (v->insn);
10825 tree mem_expr, type, size_tree;
10827 if (GET_CODE (SET_SRC (mem)) == MEM)
10828 mem = SET_SRC (mem);
10829 else if (GET_CODE (SET_DEST (mem)) == MEM)
10830 mem = SET_DEST (mem);
10831 else
10832 break;
10833 mem_expr = MEM_EXPR (mem);
10834 if (! mem_expr)
10835 break;
10836 type = TREE_TYPE (mem_expr);
10837 if (TREE_CODE (type) != ARRAY_TYPE
10838 || ! TYPE_SIZE (type) || ! TYPE_SIZE_UNIT (type))
10839 break;
10840 size_tree = fold (build (TRUNC_DIV_EXPR,
10841 bitsizetype,
10842 TYPE_SIZE (type),
10843 TYPE_SIZE_UNIT (type)));
10844 if (TREE_CODE (size_tree) == INTEGER_CST
10845 && ! TREE_INT_CST_HIGH (size_tree)
10846 && TREE_INT_CST_LOW (size_tree) < max_iterations)
10847 max_iterations = TREE_INT_CST_LOW (size_tree);
10848 break;
10854 #else /* 0 */
10855 /* Assume there is at least some benefit. */
10856 unroll_benefit = 1;
10857 #endif /* 0 */
10859 desc = get_simple_loop_desc (loop);
10860 n_iterations = desc->const_iter ? desc->niter : 0;
10861 max_iterations
10862 = max_iterations < desc->niter_max ? max_iterations : desc->niter_max;
10864 if (! strength_reduce_p || ! n_iterations)
10865 need_precond = 1;
10866 if (! n_iterations)
10868 n_iterations
10869 = max_iterations < 3 ? max_iterations : max_iterations * 3 / 4;
10870 if (! n_iterations)
10871 return 0;
10873 #if 0 /* ??? See above - missing induction variable information. */
10874 while (unroll_benefit > 1) /* no loop */
10876 /* We include the benefit of biv/ giv updates. Check if some or
10877 all of these updates are likely to fit into a scheduling
10878 bubble of a load.
10879 We check for the following case:
10880 - All the insns leading to the first JUMP_INSN are in a strict
10881 dependency chain.
10882 - there is at least one memory reference in them.
10884 When we find such a pattern, we assume that we can hide as many
10885 updates as the total of the load latency is, if we have an
10886 unroll factor of at least two. We might or might not also do
10887 this without unrolling, so rather than considering this as an
10888 extra unroll benefit, discount it in the unroll benefits of unroll
10889 factors higher than two. */
10891 rtx set, last_set;
10893 insn = next_active_insn (loop->start);
10894 last_set = single_set (insn);
10895 if (! last_set)
10896 break;
10897 if (GET_CODE (SET_SRC (last_set)) == MEM)
10898 mem_latency += 2;
10899 for (insn = NEXT_INSN (insn); insn != end; insn = NEXT_INSN (insn))
10901 if (! INSN_P (insn))
10902 continue;
10903 if (GET_CODE (insn) == JUMP_INSN)
10904 break;
10905 if (! reg_referenced_p (SET_DEST (last_set), PATTERN (insn)))
10907 /* Check if this is a to-be-reduced giv insn. */
10908 struct loop_ivs *ivs = LOOP_IVS (loop);
10909 struct iv_class *bl;
10910 struct induction *v;
10911 for (bl = ivs->list; bl; bl = bl->next)
10913 if (bl->biv->insn == insn)
10914 goto is_biv;
10915 for (v = bl->giv; v; v = v->next_iv)
10916 if (v->insn == insn)
10917 goto is_giv;
10919 mem_latency--;
10920 is_biv:
10921 is_giv:
10922 continue;
10924 set = single_set (insn);
10925 if (! set)
10926 continue;
10927 if (GET_CODE (SET_SRC (set)) == MEM)
10928 mem_latency += 2;
10929 last_set = set;
10931 if (mem_latency < 0)
10932 mem_latency = 0;
10933 else if (mem_latency > unroll_benefit - 1)
10934 mem_latency = unroll_benefit - 1;
10935 break;
10937 #endif /* 0 */
10938 if (n_labels + (unroll_benefit + n_labels * 8) / n_iterations
10939 <= unroll_benefit)
10940 return max_unrolled_insns;
10942 n_dest = n_labels + n_calls + n_exit_dest;
10943 base_cost = n_dest <= 8 ? 0 : n_dest - 7;
10944 best_cost = 0;
10945 best_factor = 1;
10946 if (n_barriers * 2 > n_labels - 1)
10947 n_barriers = (n_labels - 1) / 2;
10948 for (factor = 2; factor <= 8; factor++)
10950 /* Bump up preconditioning cost for each power of two. */
10951 if (! (factor & (factor-1)))
10952 precond += 4;
10953 /* When preconditioning, only powers of two will be considered. */
10954 else if (need_precond)
10955 continue;
10956 n_dest = ((unroll_type != LPT_PEEL_COMPLETELY)
10957 + (n_labels - 1) * factor + n_calls + n_exit_dest
10958 - (n_barriers * factor >> 1)
10959 + need_precond);
10960 cost
10961 = ((n_dest <= 8 ? 0 : n_dest - 7)
10962 - base_cost * factor
10963 - ((factor > 2 ? unroll_benefit - mem_latency : unroll_benefit)
10964 * (factor - (unroll_type != LPT_PEEL_COMPLETELY)))
10965 + ((unroll_benefit + 1 + (n_labels - 1) * factor)
10966 / n_iterations));
10967 if (need_precond)
10968 cost += (precond + unroll_benefit * factor / 2) / n_iterations;
10969 if (cost < best_cost)
10971 best_cost = cost;
10972 best_factor = factor;
10975 threshold = best_factor * insn_count;
10976 if (max_unrolled_insns > threshold)
10977 max_unrolled_insns = threshold;
10979 return max_unrolled_insns;
10981 #endif /* TARGET_ADJUST_UNROLL_MAX */
10983 /* Replace any occurrence of FROM(n) in X with TO(n). The function does
10984 not enter into CONST_DOUBLE for the replace.
10986 Note that copying is not done so X must not be shared unless all copies
10987 are to be modified.
10989 This is like replace_rtx, except that we operate on N_REPLACEMENTS
10990 replacements simultaneously - FROM(n) is replacements[n*2] and to(n) is
10991 replacements[n*2+1] - and that we take mode changes into account.
10993 If a replacement is ambiguous, return NULL_RTX.
10995 If MODIFY is zero, don't modify any rtl in place,
10996 just return zero or nonzero for failure / success. */
10999 replace_n_hard_rtx (rtx x, rtx *replacements, int n_replacements, int modify)
11001 int i, j;
11002 const char *fmt;
11004 /* The following prevents loops occurrence when we change MEM in
11005 CONST_DOUBLE onto the same CONST_DOUBLE. */
11006 if (x != 0 && GET_CODE (x) == CONST_DOUBLE)
11007 return x;
11009 for (i = n_replacements - 1; i >= 0 ; i--)
11010 if (x == replacements[i*2] && GET_MODE (x) == GET_MODE (replacements[i*2+1]))
11011 return replacements[i*2+1];
11013 /* Allow this function to make replacements in EXPR_LISTs. */
11014 if (x == 0)
11015 return 0;
11017 if (GET_CODE (x) == SUBREG)
11019 rtx new = replace_n_hard_rtx (SUBREG_REG (x), replacements,
11020 n_replacements, modify);
11022 if (GET_CODE (new) == CONST_INT)
11024 x = simplify_subreg (GET_MODE (x), new,
11025 GET_MODE (SUBREG_REG (x)),
11026 SUBREG_BYTE (x));
11027 if (! x)
11028 abort ();
11030 else if (modify)
11031 SUBREG_REG (x) = new;
11033 return x;
11035 else if (GET_CODE (x) == REG)
11037 unsigned regno = REGNO (x);
11038 unsigned nregs = (regno < FIRST_PSEUDO_REGISTER
11039 ? HARD_REGNO_NREGS (regno, GET_MODE (x)) : 1);
11040 rtx result = NULL_RTX;
11042 for (i = n_replacements - 1; i >= 0; i--)
11044 rtx from = replacements[i*2];
11045 rtx to = replacements[i*2+1];
11046 unsigned from_regno, from_nregs, to_regno, new_regno;
11048 if (GET_CODE (from) != REG)
11049 continue;
11050 from_regno = REGNO (from);
11051 from_nregs = (from_regno < FIRST_PSEUDO_REGISTER
11052 ? HARD_REGNO_NREGS (from_regno, GET_MODE (from)) : 1);
11053 if (regno < from_regno + from_nregs && regno + nregs > from_regno)
11055 if (regno < from_regno
11056 || regno + nregs > from_regno + nregs
11057 || GET_CODE (to) != REG
11058 || result)
11059 return NULL_RTX;
11060 to_regno = REGNO (to);
11061 if (to_regno < FIRST_PSEUDO_REGISTER)
11063 new_regno = regno + to_regno - from_regno;
11064 if ((unsigned) HARD_REGNO_NREGS (new_regno, GET_MODE (x))
11065 != nregs)
11066 return NULL_RTX;
11067 result = gen_rtx_REG (GET_MODE (x), new_regno);
11069 else if (GET_MODE (x) <= GET_MODE (to))
11070 result = gen_lowpart_common (GET_MODE (x), to);
11071 else
11072 result = gen_lowpart_SUBREG (GET_MODE (x), to);
11075 return result ? result : x;
11077 else if (GET_CODE (x) == ZERO_EXTEND)
11079 rtx new = replace_n_hard_rtx (XEXP (x, 0), replacements,
11080 n_replacements, modify);
11082 if (GET_CODE (new) == CONST_INT)
11084 x = simplify_unary_operation (ZERO_EXTEND, GET_MODE (x),
11085 new, GET_MODE (XEXP (x, 0)));
11086 if (! x)
11087 abort ();
11089 else if (modify)
11090 XEXP (x, 0) = new;
11092 return x;
11095 fmt = GET_RTX_FORMAT (GET_CODE (x));
11096 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
11098 rtx new;
11100 if (fmt[i] == 'e')
11102 new = replace_n_hard_rtx (XEXP (x, i), replacements,
11103 n_replacements, modify);
11104 if (!new)
11105 return NULL_RTX;
11106 if (modify)
11107 XEXP (x, i) = new;
11109 else if (fmt[i] == 'E')
11110 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
11112 new = replace_n_hard_rtx (XVECEXP (x, i, j), replacements,
11113 n_replacements, modify);
11114 if (!new)
11115 return NULL_RTX;
11116 if (modify)
11117 XVECEXP (x, i, j) = new;
11121 return x;
11125 sh_gen_truncate (enum machine_mode mode, rtx x, int need_sign_ext)
11127 enum rtx_code code = TRUNCATE;
11129 if (GET_CODE (x) == ZERO_EXTEND || GET_CODE (x) == SIGN_EXTEND)
11131 rtx inner = XEXP (x, 0);
11132 enum machine_mode inner_mode = GET_MODE (inner);
11134 if (inner_mode == mode)
11135 return inner;
11136 else if (GET_MODE_SIZE (inner_mode) >= GET_MODE_SIZE (mode))
11137 x = inner;
11138 else if (GET_MODE_SIZE (inner_mode) < GET_MODE_SIZE (mode)
11139 && (! need_sign_ext || GET_CODE (x) == SIGN_EXTEND))
11141 code = GET_CODE (x);
11142 x = inner;
11145 return gen_rtx_fmt_e (code, mode, x);
11148 /* called via for_each_rtx after reload, to clean up truncates of
11149 registers that span multiple actual hard registers. */
11151 shmedia_cleanup_truncate (rtx *p, void *n_changes)
11153 rtx x = *p, reg;
11155 if (GET_CODE (x) != TRUNCATE)
11156 return 0;
11157 reg = XEXP (x, 0);
11158 if (GET_MODE_SIZE (GET_MODE (reg)) > 8 && GET_CODE (reg) == REG)
11160 enum machine_mode reg_mode = GET_MODE (reg);
11161 XEXP (x, 0) = simplify_subreg (DImode, reg, reg_mode,
11162 subreg_lowpart_offset (DImode, reg_mode));
11163 *(int*) n_changes += 1;
11164 return -1;
11166 return 0;
11169 /* Load and store depend on the highpart of the address. However,
11170 set_attr_alternative does not give well-defined results before reload,
11171 so we must look at the rtl ourselves to see if any of the feeding
11172 registers is used in a memref. */
11174 /* Called by sh_contains_memref_p via for_each_rtx. */
11175 static int
11176 sh_contains_memref_p_1 (rtx *loc, void *data ATTRIBUTE_UNUSED)
11178 return (GET_CODE (*loc) == MEM);
11181 /* Return non-zero iff INSN contains a MEM. */
11183 sh_contains_memref_p (rtx insn)
11185 return for_each_rtx (&PATTERN (insn), &sh_contains_memref_p_1, NULL);
11188 /* FNADDR is the MEM expression from a call expander. Return an address
11189 to use in an SHmedia insn pattern. */
11191 shmedia_prepare_call_address (rtx fnaddr, int is_sibcall)
11193 int is_sym;
11195 fnaddr = XEXP (fnaddr, 0);
11196 is_sym = GET_CODE (fnaddr) == SYMBOL_REF;
11197 if (flag_pic && is_sym)
11199 if (! SYMBOL_REF_LOCAL_P (fnaddr))
11201 rtx reg = gen_reg_rtx (Pmode);
11203 /* We must not use GOTPLT for sibcalls, because PIC_REG
11204 must be restored before the PLT code gets to run. */
11205 if (is_sibcall)
11206 emit_insn (gen_symGOT2reg (reg, fnaddr));
11207 else
11208 emit_insn (gen_symGOTPLT2reg (reg, fnaddr));
11209 fnaddr = reg;
11211 else
11213 fnaddr = gen_sym2PIC (fnaddr);
11214 PUT_MODE (fnaddr, Pmode);
11217 /* If ptabs might trap, make this visible to the rest of the compiler.
11218 We generally assume that symbols pertain to valid locations, but
11219 it is possible to generate invalid symbols with asm or linker tricks.
11220 In a list of functions where each returns its successor, an invalid
11221 symbol might denote an empty list. */
11222 if (!TARGET_PT_FIXED
11223 && (!is_sym || TARGET_INVALID_SYMBOLS)
11224 && (!REG_P (fnaddr) || ! TARGET_REGISTER_P (REGNO (fnaddr))))
11226 rtx tr = gen_reg_rtx (PDImode);
11228 emit_insn (gen_ptabs (tr, fnaddr));
11229 fnaddr = tr;
11231 else if (! target_reg_operand (fnaddr, Pmode))
11232 fnaddr = copy_to_mode_reg (Pmode, fnaddr);
11233 return fnaddr;
11236 const char *sh_multcost_str = "";
11237 const char *sh_gettrcost_str = "";
11238 const char *sh_div_str = "";
11239 const char *sh_divsi3_libfunc = "";
11240 const char *cut2_workaround_str = "";
11241 enum sh_divide_strategy_e sh_div_strategy = SH_DIV_STRATEGY_DEFAULT;
11243 /* This defines the storage for the variable part of a -mboard= option.
11244 It is only required when using the sh-superh-elf target */
11245 #ifdef _SUPERH_H
11246 const char * boardtype = "7750p2";
11247 const char * osruntime = "bare";
11248 #endif
11250 #include "gt-sh.h"