* common.opt (-Wattributes): New. Default true.
[official-gcc.git] / gcc / config / sh / sh.c
blob9da837e4aa10d9956462038aa0b1925e035bc0b3
1 /* Output routines for GCC for Renesas / SuperH SH.
2 Copyright (C) 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
3 2003, 2004, 2005 Free Software Foundation, Inc.
4 Contributed by Steve Chamberlain (sac@cygnus.com).
5 Improved by Jim Wilson (wilson@cygnus.com).
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 2, or (at your option)
12 any later version.
14 GCC is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING. If not, write to
21 the Free Software Foundation, 59 Temple Place - Suite 330,
22 Boston, MA 02111-1307, USA. */
24 #include "config.h"
25 #include "system.h"
26 #include "coretypes.h"
27 #include "tm.h"
28 #include "insn-config.h"
29 #include "rtl.h"
30 #include "tree.h"
31 #include "flags.h"
32 #include "expr.h"
33 #include "optabs.h"
34 #include "function.h"
35 #include "regs.h"
36 #include "hard-reg-set.h"
37 #include "output.h"
38 #include "insn-attr.h"
39 #include "toplev.h"
40 #include "recog.h"
41 #include "c-pragma.h"
42 #include "integrate.h"
43 #include "dwarf2.h"
44 #include "tm_p.h"
45 #include "target.h"
46 #include "target-def.h"
47 #include "real.h"
48 #include "langhooks.h"
49 #include "basic-block.h"
50 #include "cfglayout.h"
51 #include "intl.h"
52 #include "sched-int.h"
53 #include "ggc.h"
54 #include "tree-gimple.h"
55 #include "cfgloop.h"
58 int code_for_indirect_jump_scratch = CODE_FOR_indirect_jump_scratch;
60 #define MSW (TARGET_LITTLE_ENDIAN ? 1 : 0)
61 #define LSW (TARGET_LITTLE_ENDIAN ? 0 : 1)
63 /* These are some macros to abstract register modes. */
64 #define CONST_OK_FOR_ADD(size) \
65 (TARGET_SHMEDIA ? CONST_OK_FOR_I10 (size) : CONST_OK_FOR_I08 (size))
66 #define GEN_MOV (*(TARGET_SHMEDIA64 ? gen_movdi : gen_movsi))
67 #define GEN_ADD3 (*(TARGET_SHMEDIA64 ? gen_adddi3 : gen_addsi3))
68 #define GEN_SUB3 (*(TARGET_SHMEDIA64 ? gen_subdi3 : gen_subsi3))
70 /* Set to 1 by expand_prologue() when the function is an interrupt handler. */
71 int current_function_interrupt;
73 /* ??? The pragma interrupt support will not work for SH3. */
74 /* This is set by #pragma interrupt and #pragma trapa, and causes gcc to
75 output code for the next function appropriate for an interrupt handler. */
76 int pragma_interrupt;
78 /* This is set by the trap_exit attribute for functions. It specifies
79 a trap number to be used in a trapa instruction at function exit
80 (instead of an rte instruction). */
81 int trap_exit;
83 /* This is used by the sp_switch attribute for functions. It specifies
84 a variable holding the address of the stack the interrupt function
85 should switch to/from at entry/exit. */
86 rtx sp_switch;
88 /* This is set by #pragma trapa, and is similar to the above, except that
89 the compiler doesn't emit code to preserve all registers. */
90 static int pragma_trapa;
92 /* This is set by #pragma nosave_low_regs. This is useful on the SH3,
93 which has a separate set of low regs for User and Supervisor modes.
94 This should only be used for the lowest level of interrupts. Higher levels
95 of interrupts must save the registers in case they themselves are
96 interrupted. */
97 int pragma_nosave_low_regs;
99 /* This is used for communication between TARGET_SETUP_INCOMING_VARARGS and
100 sh_expand_prologue. */
101 int current_function_anonymous_args;
103 /* Global variables for machine-dependent things. */
105 /* Which cpu are we scheduling for. */
106 enum processor_type sh_cpu;
108 /* Definitions used in ready queue reordering for first scheduling pass. */
110 /* Reg weights arrays for modes SFmode and SImode, indexed by insn LUID. */
111 static short *regmode_weight[2];
113 /* Total SFmode and SImode weights of scheduled insns. */
114 static int curr_regmode_pressure[2];
116 /* If true, skip cycles for Q -> R movement. */
117 static int skip_cycles = 0;
119 /* Cached value of can_issue_more. This is cached in sh_variable_issue hook
120 and returned from sh_reorder2. */
121 static short cached_can_issue_more;
123 /* Saved operands from the last compare to use when we generate an scc
124 or bcc insn. */
126 rtx sh_compare_op0;
127 rtx sh_compare_op1;
129 /* Provides the class number of the smallest class containing
130 reg number. */
132 enum reg_class regno_reg_class[FIRST_PSEUDO_REGISTER] =
134 R0_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
135 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
136 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
137 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
138 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
139 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
140 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
141 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
142 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
143 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
144 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
145 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
146 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
147 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
148 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
149 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
150 FP0_REGS,FP_REGS, FP_REGS, FP_REGS,
151 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
152 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
153 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
154 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
155 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
156 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
157 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
158 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
159 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
160 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
161 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
162 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
163 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
164 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
165 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
166 TARGET_REGS, TARGET_REGS, TARGET_REGS, TARGET_REGS,
167 TARGET_REGS, TARGET_REGS, TARGET_REGS, TARGET_REGS,
168 DF_REGS, DF_REGS, DF_REGS, DF_REGS,
169 DF_REGS, DF_REGS, DF_REGS, DF_REGS,
170 NO_REGS, GENERAL_REGS, PR_REGS, T_REGS,
171 MAC_REGS, MAC_REGS, FPUL_REGS, FPSCR_REGS,
172 GENERAL_REGS,
175 char sh_register_names[FIRST_PSEUDO_REGISTER] \
176 [MAX_REGISTER_NAME_LENGTH + 1] = SH_REGISTER_NAMES_INITIALIZER;
178 char sh_additional_register_names[ADDREGNAMES_SIZE] \
179 [MAX_ADDITIONAL_REGISTER_NAME_LENGTH + 1]
180 = SH_ADDITIONAL_REGISTER_NAMES_INITIALIZER;
182 /* Provide reg_class from a letter such as appears in the machine
183 description. *: target independently reserved letter.
184 reg_class_from_letter['e' - 'a'] is set to NO_REGS for TARGET_FMOVD. */
186 enum reg_class reg_class_from_letter[] =
188 /* a */ ALL_REGS, /* b */ TARGET_REGS, /* c */ FPSCR_REGS, /* d */ DF_REGS,
189 /* e */ FP_REGS, /* f */ FP_REGS, /* g **/ NO_REGS, /* h */ NO_REGS,
190 /* i **/ NO_REGS, /* j */ NO_REGS, /* k */ SIBCALL_REGS, /* l */ PR_REGS,
191 /* m **/ NO_REGS, /* n **/ NO_REGS, /* o **/ NO_REGS, /* p **/ NO_REGS,
192 /* q */ NO_REGS, /* r **/ NO_REGS, /* s **/ NO_REGS, /* t */ T_REGS,
193 /* u */ NO_REGS, /* v */ NO_REGS, /* w */ FP0_REGS, /* x */ MAC_REGS,
194 /* y */ FPUL_REGS, /* z */ R0_REGS
197 int assembler_dialect;
199 static bool shmedia_space_reserved_for_target_registers;
201 static bool sh_handle_option (size_t, const char *, int);
202 static void split_branches (rtx);
203 static int branch_dest (rtx);
204 static void force_into (rtx, rtx);
205 static void print_slot (rtx);
206 static rtx add_constant (rtx, enum machine_mode, rtx);
207 static void dump_table (rtx, rtx);
208 static int hi_const (rtx);
209 static int broken_move (rtx);
210 static int mova_p (rtx);
211 static rtx find_barrier (int, rtx, rtx);
212 static int noncall_uses_reg (rtx, rtx, rtx *);
213 static rtx gen_block_redirect (rtx, int, int);
214 static void sh_reorg (void);
215 static void output_stack_adjust (int, rtx, int, HARD_REG_SET *);
216 static rtx frame_insn (rtx);
217 static rtx push (int);
218 static void pop (int);
219 static void push_regs (HARD_REG_SET *, int);
220 static int calc_live_regs (HARD_REG_SET *);
221 static void mark_use (rtx, rtx *);
222 static HOST_WIDE_INT rounded_frame_size (int);
223 static rtx mark_constant_pool_use (rtx);
224 const struct attribute_spec sh_attribute_table[];
225 static tree sh_handle_interrupt_handler_attribute (tree *, tree, tree, int, bool *);
226 static tree sh_handle_sp_switch_attribute (tree *, tree, tree, int, bool *);
227 static tree sh_handle_trap_exit_attribute (tree *, tree, tree, int, bool *);
228 static tree sh_handle_renesas_attribute (tree *, tree, tree, int, bool *);
229 static void sh_output_function_epilogue (FILE *, HOST_WIDE_INT);
230 static void sh_insert_attributes (tree, tree *);
231 static int sh_adjust_cost (rtx, rtx, rtx, int);
232 static int sh_issue_rate (void);
233 static int sh_dfa_new_cycle (FILE *, int, rtx, int, int, int *sort_p);
234 static short find_set_regmode_weight (rtx, enum machine_mode);
235 static short find_insn_regmode_weight (rtx, enum machine_mode);
236 static void find_regmode_weight (int, enum machine_mode);
237 static void sh_md_init_global (FILE *, int, int);
238 static void sh_md_finish_global (FILE *, int);
239 static int rank_for_reorder (const void *, const void *);
240 static void swap_reorder (rtx *, int);
241 static void ready_reorder (rtx *, int);
242 static short high_pressure (enum machine_mode);
243 static int sh_reorder (FILE *, int, rtx *, int *, int);
244 static int sh_reorder2 (FILE *, int, rtx *, int *, int);
245 static void sh_md_init (FILE *, int, int);
246 static int sh_variable_issue (FILE *, int, rtx, int);
248 static bool sh_function_ok_for_sibcall (tree, tree);
250 static bool sh_cannot_modify_jumps_p (void);
251 static int sh_target_reg_class (void);
252 static bool sh_optimize_target_register_callee_saved (bool);
253 static bool sh_ms_bitfield_layout_p (tree);
255 static void sh_init_builtins (void);
256 static void sh_media_init_builtins (void);
257 static rtx sh_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
258 static void sh_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT, tree);
259 static void sh_file_start (void);
260 static int flow_dependent_p (rtx, rtx);
261 static void flow_dependent_p_1 (rtx, rtx, void *);
262 static int shiftcosts (rtx);
263 static int andcosts (rtx);
264 static int addsubcosts (rtx);
265 static int multcosts (rtx);
266 static bool unspec_caller_rtx_p (rtx);
267 static bool sh_cannot_copy_insn_p (rtx);
268 static bool sh_rtx_costs (rtx, int, int, int *);
269 static int sh_address_cost (rtx);
270 #ifdef TARGET_ADJUST_UNROLL_MAX
271 static int sh_adjust_unroll_max (struct loop *, int, int, int, int);
272 #endif
273 static int shmedia_target_regs_stack_space (HARD_REG_SET *);
274 static int shmedia_reserve_space_for_target_registers_p (int, HARD_REG_SET *);
275 static int shmedia_target_regs_stack_adjust (HARD_REG_SET *);
276 static int scavenge_reg (HARD_REG_SET *s);
277 struct save_schedule_s;
278 static struct save_entry_s *sh5_schedule_saves (HARD_REG_SET *,
279 struct save_schedule_s *, int);
281 static rtx sh_struct_value_rtx (tree, int);
282 static bool sh_return_in_memory (tree, tree);
283 static rtx sh_builtin_saveregs (void);
284 static void sh_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode, tree, int *, int);
285 static bool sh_strict_argument_naming (CUMULATIVE_ARGS *);
286 static bool sh_pretend_outgoing_varargs_named (CUMULATIVE_ARGS *);
287 static tree sh_build_builtin_va_list (void);
288 static tree sh_gimplify_va_arg_expr (tree, tree, tree *, tree *);
289 static bool sh_pass_by_reference (CUMULATIVE_ARGS *, enum machine_mode,
290 tree, bool);
291 static bool sh_callee_copies (CUMULATIVE_ARGS *, enum machine_mode,
292 tree, bool);
293 static int sh_arg_partial_bytes (CUMULATIVE_ARGS *, enum machine_mode,
294 tree, bool);
295 static int sh_dwarf_calling_convention (tree);
296 static int hard_regs_intersect_p (HARD_REG_SET *, HARD_REG_SET *);
299 /* Initialize the GCC target structure. */
300 #undef TARGET_ATTRIBUTE_TABLE
301 #define TARGET_ATTRIBUTE_TABLE sh_attribute_table
303 /* The next two are used for debug info when compiling with -gdwarf. */
304 #undef TARGET_ASM_UNALIGNED_HI_OP
305 #define TARGET_ASM_UNALIGNED_HI_OP "\t.uaword\t"
306 #undef TARGET_ASM_UNALIGNED_SI_OP
307 #define TARGET_ASM_UNALIGNED_SI_OP "\t.ualong\t"
309 /* These are NULLed out on non-SH5 in OVERRIDE_OPTIONS. */
310 #undef TARGET_ASM_UNALIGNED_DI_OP
311 #define TARGET_ASM_UNALIGNED_DI_OP "\t.uaquad\t"
312 #undef TARGET_ASM_ALIGNED_DI_OP
313 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
315 #undef TARGET_ASM_FUNCTION_EPILOGUE
316 #define TARGET_ASM_FUNCTION_EPILOGUE sh_output_function_epilogue
318 #undef TARGET_ASM_OUTPUT_MI_THUNK
319 #define TARGET_ASM_OUTPUT_MI_THUNK sh_output_mi_thunk
321 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
322 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_tree_hwi_hwi_tree_true
324 #undef TARGET_ASM_FILE_START
325 #define TARGET_ASM_FILE_START sh_file_start
326 #undef TARGET_ASM_FILE_START_FILE_DIRECTIVE
327 #define TARGET_ASM_FILE_START_FILE_DIRECTIVE true
329 #undef TARGET_DEFAULT_TARGET_FLAGS
330 #define TARGET_DEFAULT_TARGET_FLAGS TARGET_DEFAULT
331 #undef TARGET_HANDLE_OPTION
332 #define TARGET_HANDLE_OPTION sh_handle_option
334 #undef TARGET_INSERT_ATTRIBUTES
335 #define TARGET_INSERT_ATTRIBUTES sh_insert_attributes
337 #undef TARGET_SCHED_ADJUST_COST
338 #define TARGET_SCHED_ADJUST_COST sh_adjust_cost
340 #undef TARGET_SCHED_ISSUE_RATE
341 #define TARGET_SCHED_ISSUE_RATE sh_issue_rate
343 /* The next 5 hooks have been implemented for reenabling sched1. With the
344 help of these macros we are limiting the movement of insns in sched1 to
345 reduce the register pressure. The overall idea is to keep count of SImode
346 and SFmode regs required by already scheduled insns. When these counts
347 cross some threshold values; give priority to insns that free registers.
348 The insn that frees registers is most likely to be the insn with lowest
349 LUID (original insn order); but such an insn might be there in the stalled
350 queue (Q) instead of the ready queue (R). To solve this, we skip cycles
351 upto a max of 8 cycles so that such insns may move from Q -> R.
353 The description of the hooks are as below:
355 TARGET_SCHED_INIT_GLOBAL: Added a new target hook in the generic
356 scheduler; it is called inside the sched_init function just after
357 find_insn_reg_weights function call. It is used to calculate the SImode
358 and SFmode weights of insns of basic blocks; much similar to what
359 find_insn_reg_weights does.
360 TARGET_SCHED_FINISH_GLOBAL: Corresponding cleanup hook.
362 TARGET_SCHED_DFA_NEW_CYCLE: Skip cycles if high register pressure is
363 indicated by TARGET_SCHED_REORDER2; doing this may move insns from
364 (Q)->(R).
366 TARGET_SCHED_REORDER: If the register pressure for SImode or SFmode is
367 high; reorder the ready queue so that the insn with lowest LUID will be
368 issued next.
370 TARGET_SCHED_REORDER2: If the register pressure is high, indicate to
371 TARGET_SCHED_DFA_NEW_CYCLE to skip cycles.
373 TARGET_SCHED_VARIABLE_ISSUE: Cache the value of can_issue_more so that it
374 can be returned from TARGET_SCHED_REORDER2.
376 TARGET_SCHED_INIT: Reset the register pressure counting variables. */
378 #undef TARGET_SCHED_DFA_NEW_CYCLE
379 #define TARGET_SCHED_DFA_NEW_CYCLE sh_dfa_new_cycle
381 #undef TARGET_SCHED_INIT_GLOBAL
382 #define TARGET_SCHED_INIT_GLOBAL sh_md_init_global
384 #undef TARGET_SCHED_FINISH_GLOBAL
385 #define TARGET_SCHED_FINISH_GLOBAL sh_md_finish_global
387 #undef TARGET_SCHED_VARIABLE_ISSUE
388 #define TARGET_SCHED_VARIABLE_ISSUE sh_variable_issue
390 #undef TARGET_SCHED_REORDER
391 #define TARGET_SCHED_REORDER sh_reorder
393 #undef TARGET_SCHED_REORDER2
394 #define TARGET_SCHED_REORDER2 sh_reorder2
396 #undef TARGET_SCHED_INIT
397 #define TARGET_SCHED_INIT sh_md_init
399 #undef TARGET_CANNOT_MODIFY_JUMPS_P
400 #define TARGET_CANNOT_MODIFY_JUMPS_P sh_cannot_modify_jumps_p
401 #undef TARGET_BRANCH_TARGET_REGISTER_CLASS
402 #define TARGET_BRANCH_TARGET_REGISTER_CLASS sh_target_reg_class
403 #undef TARGET_BRANCH_TARGET_REGISTER_CALLEE_SAVED
404 #define TARGET_BRANCH_TARGET_REGISTER_CALLEE_SAVED \
405 sh_optimize_target_register_callee_saved
407 #undef TARGET_MS_BITFIELD_LAYOUT_P
408 #define TARGET_MS_BITFIELD_LAYOUT_P sh_ms_bitfield_layout_p
410 #undef TARGET_INIT_BUILTINS
411 #define TARGET_INIT_BUILTINS sh_init_builtins
412 #undef TARGET_EXPAND_BUILTIN
413 #define TARGET_EXPAND_BUILTIN sh_expand_builtin
415 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
416 #define TARGET_FUNCTION_OK_FOR_SIBCALL sh_function_ok_for_sibcall
418 #undef TARGET_CANNOT_COPY_INSN_P
419 #define TARGET_CANNOT_COPY_INSN_P sh_cannot_copy_insn_p
420 #undef TARGET_RTX_COSTS
421 #define TARGET_RTX_COSTS sh_rtx_costs
422 #undef TARGET_ADDRESS_COST
423 #define TARGET_ADDRESS_COST sh_address_cost
425 #undef TARGET_MACHINE_DEPENDENT_REORG
426 #define TARGET_MACHINE_DEPENDENT_REORG sh_reorg
428 #ifdef HAVE_AS_TLS
429 #undef TARGET_HAVE_TLS
430 #define TARGET_HAVE_TLS true
431 #endif
433 #undef TARGET_PROMOTE_PROTOTYPES
434 #define TARGET_PROMOTE_PROTOTYPES sh_promote_prototypes
435 #undef TARGET_PROMOTE_FUNCTION_ARGS
436 #define TARGET_PROMOTE_FUNCTION_ARGS sh_promote_prototypes
437 #undef TARGET_PROMOTE_FUNCTION_RETURN
438 #define TARGET_PROMOTE_FUNCTION_RETURN sh_promote_prototypes
440 #undef TARGET_STRUCT_VALUE_RTX
441 #define TARGET_STRUCT_VALUE_RTX sh_struct_value_rtx
442 #undef TARGET_RETURN_IN_MEMORY
443 #define TARGET_RETURN_IN_MEMORY sh_return_in_memory
445 #undef TARGET_EXPAND_BUILTIN_SAVEREGS
446 #define TARGET_EXPAND_BUILTIN_SAVEREGS sh_builtin_saveregs
447 #undef TARGET_SETUP_INCOMING_VARARGS
448 #define TARGET_SETUP_INCOMING_VARARGS sh_setup_incoming_varargs
449 #undef TARGET_STRICT_ARGUMENT_NAMING
450 #define TARGET_STRICT_ARGUMENT_NAMING sh_strict_argument_naming
451 #undef TARGET_PRETEND_OUTGOING_VARARGS_NAMED
452 #define TARGET_PRETEND_OUTGOING_VARARGS_NAMED sh_pretend_outgoing_varargs_named
453 #undef TARGET_MUST_PASS_IN_STACK
454 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
455 #undef TARGET_PASS_BY_REFERENCE
456 #define TARGET_PASS_BY_REFERENCE sh_pass_by_reference
457 #undef TARGET_CALLEE_COPIES
458 #define TARGET_CALLEE_COPIES sh_callee_copies
459 #undef TARGET_ARG_PARTIAL_BYTES
460 #define TARGET_ARG_PARTIAL_BYTES sh_arg_partial_bytes
462 #undef TARGET_BUILD_BUILTIN_VA_LIST
463 #define TARGET_BUILD_BUILTIN_VA_LIST sh_build_builtin_va_list
464 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
465 #define TARGET_GIMPLIFY_VA_ARG_EXPR sh_gimplify_va_arg_expr
467 #undef TARGET_VECTOR_MODE_SUPPORTED_P
468 #define TARGET_VECTOR_MODE_SUPPORTED_P sh_vector_mode_supported_p
470 #undef TARGET_PCH_VALID_P
471 #define TARGET_PCH_VALID_P sh_pch_valid_p
473 #undef TARGET_DWARF_CALLING_CONVENTION
474 #define TARGET_DWARF_CALLING_CONVENTION sh_dwarf_calling_convention
476 /* Return regmode weight for insn. */
477 #define INSN_REGMODE_WEIGHT(INSN, MODE) regmode_weight[((MODE) == SImode) ? 0 : 1][INSN_UID (INSN)]
479 /* Return current register pressure for regmode. */
480 #define CURR_REGMODE_PRESSURE(MODE) curr_regmode_pressure[((MODE) == SImode) ? 0 : 1]
482 #ifdef SYMBIAN
484 #undef TARGET_ENCODE_SECTION_INFO
485 #define TARGET_ENCODE_SECTION_INFO sh_symbian_encode_section_info
486 #undef TARGET_STRIP_NAME_ENCODING
487 #define TARGET_STRIP_NAME_ENCODING sh_symbian_strip_name_encoding
488 #undef TARGET_CXX_IMPORT_EXPORT_CLASS
489 #define TARGET_CXX_IMPORT_EXPORT_CLASS symbian_import_export_class
491 #endif /* SYMBIAN */
493 #ifdef TARGET_ADJUST_UNROLL_MAX
494 #undef TARGET_ADJUST_UNROLL_MAX
495 #define TARGET_ADJUST_UNROLL_MAX sh_adjust_unroll_max
496 #endif
498 struct gcc_target targetm = TARGET_INITIALIZER;
500 /* Implement TARGET_HANDLE_OPTION. */
502 static bool
503 sh_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED,
504 int value ATTRIBUTE_UNUSED)
506 switch (code)
508 case OPT_m1:
509 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH1;
510 return true;
512 case OPT_m2:
513 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2;
514 return true;
516 case OPT_m2a:
517 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2A;
518 return true;
520 case OPT_m2a_nofpu:
521 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2A_NOFPU;
522 return true;
524 case OPT_m2a_single:
525 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2A_SINGLE;
526 return true;
528 case OPT_m2a_single_only:
529 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2A_SINGLE_ONLY;
530 return true;
532 case OPT_m2e:
533 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2E;
534 return true;
536 case OPT_m3:
537 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH3;
538 return true;
540 case OPT_m3e:
541 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH3E;
542 return true;
544 case OPT_m4:
545 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4;
546 return true;
548 case OPT_m4_nofpu:
549 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4_NOFPU;
550 return true;
552 case OPT_m4_single:
553 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4_SINGLE;
554 return true;
556 case OPT_m4_single_only:
557 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4_SINGLE_ONLY;
558 return true;
560 case OPT_m4a:
561 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4A;
562 return true;
564 case OPT_m4a_nofpu:
565 case OPT_m4al:
566 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4A_NOFPU;
567 return true;
569 case OPT_m4a_single:
570 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4A_SINGLE;
571 return true;
573 case OPT_m4a_single_only:
574 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4A_SINGLE_ONLY;
575 return true;
577 case OPT_m5_32media:
578 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_32MEDIA;
579 return true;
581 case OPT_m5_32media_nofpu:
582 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_32MEDIA_NOFPU;
583 return true;
585 case OPT_m5_64media:
586 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_64MEDIA;
587 return true;
589 case OPT_m5_64media_nofpu:
590 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_64MEDIA_NOFPU;
591 return true;
593 case OPT_m5_compact:
594 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_COMPACT;
595 return true;
597 case OPT_m5_compact_nofpu:
598 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_COMPACT_NOFPU;
599 return true;
601 default:
602 return true;
606 /* Print the operand address in x to the stream. */
608 void
609 print_operand_address (FILE *stream, rtx x)
611 switch (GET_CODE (x))
613 case REG:
614 case SUBREG:
615 fprintf (stream, "@%s", reg_names[true_regnum (x)]);
616 break;
618 case PLUS:
620 rtx base = XEXP (x, 0);
621 rtx index = XEXP (x, 1);
623 switch (GET_CODE (index))
625 case CONST_INT:
626 fprintf (stream, "@(%d,%s)", (int) INTVAL (index),
627 reg_names[true_regnum (base)]);
628 break;
630 case REG:
631 case SUBREG:
633 int base_num = true_regnum (base);
634 int index_num = true_regnum (index);
636 fprintf (stream, "@(r0,%s)",
637 reg_names[MAX (base_num, index_num)]);
638 break;
641 default:
642 gcc_unreachable ();
645 break;
647 case PRE_DEC:
648 fprintf (stream, "@-%s", reg_names[true_regnum (XEXP (x, 0))]);
649 break;
651 case POST_INC:
652 fprintf (stream, "@%s+", reg_names[true_regnum (XEXP (x, 0))]);
653 break;
655 default:
656 x = mark_constant_pool_use (x);
657 output_addr_const (stream, x);
658 break;
662 /* Print operand x (an rtx) in assembler syntax to file stream
663 according to modifier code.
665 '.' print a .s if insn needs delay slot
666 ',' print LOCAL_LABEL_PREFIX
667 '@' print trap, rte or rts depending upon pragma interruptness
668 '#' output a nop if there is nothing to put in the delay slot
669 ''' print likelihood suffix (/u for unlikely).
670 '>' print branch target if -fverbose-asm
671 'O' print a constant without the #
672 'R' print the LSW of a dp value - changes if in little endian
673 'S' print the MSW of a dp value - changes if in little endian
674 'T' print the next word of a dp value - same as 'R' in big endian mode.
675 'M' print an `x' if `m' will print `base,index'.
676 'N' print 'r63' if the operand is (const_int 0).
677 'd' print a V2SF reg as dN instead of fpN.
678 'm' print a pair `base,offset' or `base,index', for LD and ST.
679 'U' Likewise for {LD,ST}{HI,LO}.
680 'u' prints the lowest 16 bits of CONST_INT, as an unsigned value.
681 'o' output an operator. */
683 void
684 print_operand (FILE *stream, rtx x, int code)
686 int regno;
687 enum machine_mode mode;
689 switch (code)
691 case '.':
692 if (final_sequence
693 && ! INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))
694 && get_attr_length (XVECEXP (final_sequence, 0, 1)))
695 fprintf (stream, ASSEMBLER_DIALECT ? "/s" : ".s");
696 break;
697 case ',':
698 fprintf (stream, "%s", LOCAL_LABEL_PREFIX);
699 break;
700 case '@':
701 if (trap_exit)
702 fprintf (stream, "trapa #%d", trap_exit);
703 else if (sh_cfun_interrupt_handler_p ())
704 fprintf (stream, "rte");
705 else
706 fprintf (stream, "rts");
707 break;
708 case '#':
709 /* Output a nop if there's nothing in the delay slot. */
710 if (dbr_sequence_length () == 0)
711 fprintf (stream, "\n\tnop");
712 break;
713 case '\'':
715 rtx note = find_reg_note (current_output_insn, REG_BR_PROB, 0);
717 if (note && INTVAL (XEXP (note, 0)) * 2 < REG_BR_PROB_BASE)
718 fputs ("/u", stream);
719 break;
721 case '>':
722 if (flag_verbose_asm && JUMP_LABEL (current_output_insn))
724 fputs ("\t! target: ", stream);
725 output_addr_const (stream, JUMP_LABEL (current_output_insn));
727 break;
728 case 'O':
729 x = mark_constant_pool_use (x);
730 output_addr_const (stream, x);
731 break;
732 case 'R':
733 fputs (reg_names[REGNO (x) + LSW], (stream));
734 break;
735 case 'S':
736 fputs (reg_names[REGNO (x) + MSW], (stream));
737 break;
738 case 'T':
739 /* Next word of a double. */
740 switch (GET_CODE (x))
742 case REG:
743 fputs (reg_names[REGNO (x) + 1], (stream));
744 break;
745 case MEM:
746 if (GET_CODE (XEXP (x, 0)) != PRE_DEC
747 && GET_CODE (XEXP (x, 0)) != POST_INC)
748 x = adjust_address (x, SImode, 4);
749 print_operand_address (stream, XEXP (x, 0));
750 break;
751 default:
752 break;
754 break;
755 case 'o':
756 switch (GET_CODE (x))
758 case PLUS: fputs ("add", stream); break;
759 case MINUS: fputs ("sub", stream); break;
760 case MULT: fputs ("mul", stream); break;
761 case DIV: fputs ("div", stream); break;
762 case EQ: fputs ("eq", stream); break;
763 case NE: fputs ("ne", stream); break;
764 case GT: case LT: fputs ("gt", stream); break;
765 case GE: case LE: fputs ("ge", stream); break;
766 case GTU: case LTU: fputs ("gtu", stream); break;
767 case GEU: case LEU: fputs ("geu", stream); break;
768 default:
769 break;
771 break;
772 case 'M':
773 if (GET_CODE (x) == MEM
774 && GET_CODE (XEXP (x, 0)) == PLUS
775 && (GET_CODE (XEXP (XEXP (x, 0), 1)) == REG
776 || GET_CODE (XEXP (XEXP (x, 0), 1)) == SUBREG))
777 fputc ('x', stream);
778 break;
780 case 'm':
781 gcc_assert (GET_CODE (x) == MEM);
782 x = XEXP (x, 0);
783 /* Fall through. */
784 case 'U':
785 switch (GET_CODE (x))
787 case REG:
788 case SUBREG:
789 print_operand (stream, x, 0);
790 fputs (", 0", stream);
791 break;
793 case PLUS:
794 print_operand (stream, XEXP (x, 0), 0);
795 fputs (", ", stream);
796 print_operand (stream, XEXP (x, 1), 0);
797 break;
799 default:
800 gcc_unreachable ();
802 break;
804 case 'd':
805 gcc_assert (GET_CODE (x) == REG && GET_MODE (x) == V2SFmode);
807 fprintf ((stream), "d%s", reg_names[REGNO (x)] + 1);
808 break;
810 case 'N':
811 if (x == CONST0_RTX (GET_MODE (x)))
813 fprintf ((stream), "r63");
814 break;
816 goto default_output;
817 case 'u':
818 if (GET_CODE (x) == CONST_INT)
820 fprintf ((stream), "%u", (unsigned) INTVAL (x) & (0x10000 - 1));
821 break;
823 /* Fall through. */
825 default_output:
826 default:
827 regno = 0;
828 mode = GET_MODE (x);
830 switch (GET_CODE (x))
832 case TRUNCATE:
834 rtx inner = XEXP (x, 0);
835 int offset = 0;
836 enum machine_mode inner_mode;
838 /* We might see SUBREGs with vector mode registers inside. */
839 if (GET_CODE (inner) == SUBREG
840 && (GET_MODE_SIZE (GET_MODE (inner))
841 == GET_MODE_SIZE (GET_MODE (SUBREG_REG (inner))))
842 && subreg_lowpart_p (inner))
843 inner = SUBREG_REG (inner);
844 if (GET_CODE (inner) == CONST_INT)
846 x = GEN_INT (trunc_int_for_mode (INTVAL (inner), GET_MODE (x)));
847 goto default_output;
849 inner_mode = GET_MODE (inner);
850 if (GET_CODE (inner) == SUBREG
851 && (GET_MODE_SIZE (GET_MODE (inner))
852 < GET_MODE_SIZE (GET_MODE (SUBREG_REG (inner))))
853 && GET_CODE (SUBREG_REG (inner)) == REG)
855 offset = subreg_regno_offset (REGNO (SUBREG_REG (inner)),
856 GET_MODE (SUBREG_REG (inner)),
857 SUBREG_BYTE (inner),
858 GET_MODE (inner));
859 inner = SUBREG_REG (inner);
861 if (GET_CODE (inner) != REG || GET_MODE_SIZE (inner_mode) > 8)
862 abort ();
863 /* Floating point register pairs are always big endian;
864 general purpose registers are 64 bit wide. */
865 regno = REGNO (inner);
866 regno = (HARD_REGNO_NREGS (regno, inner_mode)
867 - HARD_REGNO_NREGS (regno, mode))
868 + offset;
869 x = inner;
870 goto reg;
872 case SIGN_EXTEND:
873 x = XEXP (x, 0);
874 goto reg;
875 /* FIXME: We need this on SHmedia32 because reload generates
876 some sign-extended HI or QI loads into DImode registers
877 but, because Pmode is SImode, the address ends up with a
878 subreg:SI of the DImode register. Maybe reload should be
879 fixed so as to apply alter_subreg to such loads? */
880 case IF_THEN_ELSE:
881 gcc_assert (trapping_target_operand (x, VOIDmode));
882 x = XEXP (XEXP (x, 2), 0);
883 goto default_output;
884 case SUBREG:
885 gcc_assert (SUBREG_BYTE (x) == 0
886 && GET_CODE (SUBREG_REG (x)) == REG);
888 x = SUBREG_REG (x);
889 /* Fall through. */
891 reg:
892 case REG:
893 regno += REGNO (x);
894 if (FP_REGISTER_P (regno)
895 && mode == V16SFmode)
896 fprintf ((stream), "mtrx%s", reg_names[regno] + 2);
897 else if (FP_REGISTER_P (REGNO (x))
898 && mode == V4SFmode)
899 fprintf ((stream), "fv%s", reg_names[regno] + 2);
900 else if (GET_CODE (x) == REG
901 && mode == V2SFmode)
902 fprintf ((stream), "fp%s", reg_names[regno] + 2);
903 else if (FP_REGISTER_P (REGNO (x))
904 && GET_MODE_SIZE (mode) > 4)
905 fprintf ((stream), "d%s", reg_names[regno] + 1);
906 else
907 fputs (reg_names[regno], (stream));
908 break;
910 case MEM:
911 output_address (XEXP (x, 0));
912 break;
914 case CONST:
915 if (TARGET_SHMEDIA
916 && GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
917 && (GET_MODE (XEXP (x, 0)) == DImode
918 || GET_MODE (XEXP (x, 0)) == SImode)
919 && GET_CODE (XEXP (XEXP (x, 0), 0)) == TRUNCATE
920 && GET_MODE (XEXP (XEXP (x, 0), 0)) == HImode)
922 rtx val = XEXP (XEXP (XEXP (x, 0), 0), 0);
924 fputc ('(', stream);
925 if (GET_CODE (val) == ASHIFTRT)
927 fputc ('(', stream);
928 if (GET_CODE (XEXP (val, 0)) == CONST)
929 fputc ('(', stream);
930 output_addr_const (stream, XEXP (val, 0));
931 if (GET_CODE (XEXP (val, 0)) == CONST)
932 fputc (')', stream);
933 fputs (" >> ", stream);
934 output_addr_const (stream, XEXP (val, 1));
935 fputc (')', stream);
937 else
939 if (GET_CODE (val) == CONST)
940 fputc ('(', stream);
941 output_addr_const (stream, val);
942 if (GET_CODE (val) == CONST)
943 fputc (')', stream);
945 fputs (" & 65535)", stream);
946 break;
949 /* Fall through. */
950 default:
951 if (TARGET_SH1)
952 fputc ('#', stream);
953 output_addr_const (stream, x);
954 break;
956 break;
960 /* Like force_operand, but guarantees that VALUE ends up in TARGET. */
961 static void
962 force_into (rtx value, rtx target)
964 value = force_operand (value, target);
965 if (! rtx_equal_p (value, target))
966 emit_insn (gen_move_insn (target, value));
969 /* Emit code to perform a block move. Choose the best method.
971 OPERANDS[0] is the destination.
972 OPERANDS[1] is the source.
973 OPERANDS[2] is the size.
974 OPERANDS[3] is the alignment safe to use. */
977 expand_block_move (rtx *operands)
979 int align = INTVAL (operands[3]);
980 int constp = (GET_CODE (operands[2]) == CONST_INT);
981 int bytes = (constp ? INTVAL (operands[2]) : 0);
983 if (! constp)
984 return 0;
986 /* If we could use mov.l to move words and dest is word-aligned, we
987 can use movua.l for loads and still generate a relatively short
988 and efficient sequence. */
989 if (TARGET_SH4A_ARCH && align < 4
990 && MEM_ALIGN (operands[0]) >= 32
991 && can_move_by_pieces (bytes, 32))
993 rtx dest = copy_rtx (operands[0]);
994 rtx src = copy_rtx (operands[1]);
995 /* We could use different pseudos for each copied word, but
996 since movua can only load into r0, it's kind of
997 pointless. */
998 rtx temp = gen_reg_rtx (SImode);
999 rtx src_addr = copy_addr_to_reg (XEXP (src, 0));
1000 int copied = 0;
1002 while (copied + 4 <= bytes)
1004 rtx to = adjust_address (dest, SImode, copied);
1005 rtx from = adjust_automodify_address (src, SImode, src_addr, copied);
1007 emit_insn (gen_movua (temp, from));
1008 emit_move_insn (src_addr, plus_constant (src_addr, 4));
1009 emit_move_insn (to, temp);
1010 copied += 4;
1013 if (copied < bytes)
1014 move_by_pieces (adjust_address (dest, BLKmode, copied),
1015 adjust_automodify_address (src, BLKmode,
1016 src_addr, copied),
1017 bytes - copied, align, 0);
1019 return 1;
1022 /* If it isn't a constant number of bytes, or if it doesn't have 4 byte
1023 alignment, or if it isn't a multiple of 4 bytes, then fail. */
1024 if (align < 4 || (bytes % 4 != 0))
1025 return 0;
1027 if (TARGET_HARD_SH4)
1029 if (bytes < 12)
1030 return 0;
1031 else if (bytes == 12)
1033 rtx func_addr_rtx = gen_reg_rtx (Pmode);
1034 rtx r4 = gen_rtx_REG (SImode, 4);
1035 rtx r5 = gen_rtx_REG (SImode, 5);
1037 function_symbol (func_addr_rtx, "__movmemSI12_i4", SFUNC_STATIC);
1038 force_into (XEXP (operands[0], 0), r4);
1039 force_into (XEXP (operands[1], 0), r5);
1040 emit_insn (gen_block_move_real_i4 (func_addr_rtx));
1041 return 1;
1043 else if (! TARGET_SMALLCODE)
1045 const char *entry_name;
1046 rtx func_addr_rtx = gen_reg_rtx (Pmode);
1047 int dwords;
1048 rtx r4 = gen_rtx_REG (SImode, 4);
1049 rtx r5 = gen_rtx_REG (SImode, 5);
1050 rtx r6 = gen_rtx_REG (SImode, 6);
1052 entry_name = (bytes & 4 ? "__movmem_i4_odd" : "__movmem_i4_even");
1053 function_symbol (func_addr_rtx, entry_name, SFUNC_STATIC);
1054 force_into (XEXP (operands[0], 0), r4);
1055 force_into (XEXP (operands[1], 0), r5);
1057 dwords = bytes >> 3;
1058 emit_insn (gen_move_insn (r6, GEN_INT (dwords - 1)));
1059 emit_insn (gen_block_lump_real_i4 (func_addr_rtx));
1060 return 1;
1062 else
1063 return 0;
1065 if (bytes < 64)
1067 char entry[30];
1068 rtx func_addr_rtx = gen_reg_rtx (Pmode);
1069 rtx r4 = gen_rtx_REG (SImode, 4);
1070 rtx r5 = gen_rtx_REG (SImode, 5);
1072 sprintf (entry, "__movmemSI%d", bytes);
1073 function_symbol (func_addr_rtx, entry, SFUNC_STATIC);
1074 force_into (XEXP (operands[0], 0), r4);
1075 force_into (XEXP (operands[1], 0), r5);
1076 emit_insn (gen_block_move_real (func_addr_rtx));
1077 return 1;
1080 /* This is the same number of bytes as a memcpy call, but to a different
1081 less common function name, so this will occasionally use more space. */
1082 if (! TARGET_SMALLCODE)
1084 rtx func_addr_rtx = gen_reg_rtx (Pmode);
1085 int final_switch, while_loop;
1086 rtx r4 = gen_rtx_REG (SImode, 4);
1087 rtx r5 = gen_rtx_REG (SImode, 5);
1088 rtx r6 = gen_rtx_REG (SImode, 6);
1090 function_symbol (func_addr_rtx, "__movmem", SFUNC_STATIC);
1091 force_into (XEXP (operands[0], 0), r4);
1092 force_into (XEXP (operands[1], 0), r5);
1094 /* r6 controls the size of the move. 16 is decremented from it
1095 for each 64 bytes moved. Then the negative bit left over is used
1096 as an index into a list of move instructions. e.g., a 72 byte move
1097 would be set up with size(r6) = 14, for one iteration through the
1098 big while loop, and a switch of -2 for the last part. */
1100 final_switch = 16 - ((bytes / 4) % 16);
1101 while_loop = ((bytes / 4) / 16 - 1) * 16;
1102 emit_insn (gen_move_insn (r6, GEN_INT (while_loop + final_switch)));
1103 emit_insn (gen_block_lump_real (func_addr_rtx));
1104 return 1;
1107 return 0;
1110 /* Prepare operands for a move define_expand; specifically, one of the
1111 operands must be in a register. */
1114 prepare_move_operands (rtx operands[], enum machine_mode mode)
1116 if ((mode == SImode || mode == DImode)
1117 && flag_pic
1118 && ! ((mode == Pmode || mode == ptr_mode)
1119 && tls_symbolic_operand (operands[1], Pmode) != 0))
1121 rtx temp;
1122 if (SYMBOLIC_CONST_P (operands[1]))
1124 if (GET_CODE (operands[0]) == MEM)
1125 operands[1] = force_reg (Pmode, operands[1]);
1126 else if (TARGET_SHMEDIA
1127 && GET_CODE (operands[1]) == LABEL_REF
1128 && target_reg_operand (operands[0], mode))
1129 /* It's ok. */;
1130 else
1132 temp = no_new_pseudos ? operands[0] : gen_reg_rtx (Pmode);
1133 operands[1] = legitimize_pic_address (operands[1], mode, temp);
1136 else if (GET_CODE (operands[1]) == CONST
1137 && GET_CODE (XEXP (operands[1], 0)) == PLUS
1138 && SYMBOLIC_CONST_P (XEXP (XEXP (operands[1], 0), 0)))
1140 temp = no_new_pseudos ? operands[0] : gen_reg_rtx (Pmode);
1141 temp = legitimize_pic_address (XEXP (XEXP (operands[1], 0), 0),
1142 mode, temp);
1143 operands[1] = expand_binop (mode, add_optab, temp,
1144 XEXP (XEXP (operands[1], 0), 1),
1145 no_new_pseudos ? temp
1146 : gen_reg_rtx (Pmode),
1147 0, OPTAB_LIB_WIDEN);
1151 if (! reload_in_progress && ! reload_completed)
1153 /* Copy the source to a register if both operands aren't registers. */
1154 if (! register_operand (operands[0], mode)
1155 && ! sh_register_operand (operands[1], mode))
1156 operands[1] = copy_to_mode_reg (mode, operands[1]);
1158 if (GET_CODE (operands[0]) == MEM && ! memory_operand (operands[0], mode))
1160 /* This is like change_address_1 (operands[0], mode, 0, 1) ,
1161 except that we can't use that function because it is static. */
1162 rtx new = change_address (operands[0], mode, 0);
1163 MEM_COPY_ATTRIBUTES (new, operands[0]);
1164 operands[0] = new;
1167 /* This case can happen while generating code to move the result
1168 of a library call to the target. Reject `st r0,@(rX,rY)' because
1169 reload will fail to find a spill register for rX, since r0 is already
1170 being used for the source. */
1171 else if (TARGET_SH1
1172 && refers_to_regno_p (R0_REG, R0_REG + 1, operands[1], (rtx *)0)
1173 && GET_CODE (operands[0]) == MEM
1174 && GET_CODE (XEXP (operands[0], 0)) == PLUS
1175 && GET_CODE (XEXP (XEXP (operands[0], 0), 1)) == REG)
1176 operands[1] = copy_to_mode_reg (mode, operands[1]);
1179 if (mode == Pmode || mode == ptr_mode)
1181 rtx op0, op1;
1182 enum tls_model tls_kind;
1184 op0 = operands[0];
1185 op1 = operands[1];
1186 if ((tls_kind = tls_symbolic_operand (op1, Pmode)))
1188 rtx tga_op1, tga_ret, tmp, tmp2;
1190 switch (tls_kind)
1192 case TLS_MODEL_GLOBAL_DYNAMIC:
1193 tga_ret = gen_rtx_REG (Pmode, R0_REG);
1194 emit_call_insn (gen_tls_global_dynamic (tga_ret, op1));
1195 op1 = tga_ret;
1196 break;
1198 case TLS_MODEL_LOCAL_DYNAMIC:
1199 tga_ret = gen_rtx_REG (Pmode, R0_REG);
1200 emit_call_insn (gen_tls_local_dynamic (tga_ret, op1));
1202 tmp = gen_reg_rtx (Pmode);
1203 emit_move_insn (tmp, tga_ret);
1205 if (register_operand (op0, Pmode))
1206 tmp2 = op0;
1207 else
1208 tmp2 = gen_reg_rtx (Pmode);
1210 emit_insn (gen_symDTPOFF2reg (tmp2, op1, tmp));
1211 op1 = tmp2;
1212 break;
1214 case TLS_MODEL_INITIAL_EXEC:
1215 if (! flag_pic)
1217 /* Don't schedule insns for getting GOT address when
1218 the first scheduling is enabled, to avoid spill
1219 failures for R0. */
1220 if (flag_schedule_insns)
1221 emit_insn (gen_blockage ());
1222 emit_insn (gen_GOTaddr2picreg ());
1223 emit_insn (gen_rtx_USE (VOIDmode, gen_rtx_REG (SImode,
1224 PIC_REG)));
1225 if (flag_schedule_insns)
1226 emit_insn (gen_blockage ());
1228 tga_op1 = no_new_pseudos ? op0 : gen_reg_rtx (Pmode);
1229 tmp = gen_sym2GOTTPOFF (op1);
1230 emit_insn (gen_tls_initial_exec (tga_op1, tmp));
1231 op1 = tga_op1;
1232 break;
1234 case TLS_MODEL_LOCAL_EXEC:
1235 tmp2 = gen_reg_rtx (Pmode);
1236 emit_insn (gen_load_gbr (tmp2));
1237 tmp = gen_reg_rtx (Pmode);
1238 emit_insn (gen_symTPOFF2reg (tmp, op1));
1240 if (register_operand (op0, Pmode))
1241 op1 = op0;
1242 else
1243 op1 = gen_reg_rtx (Pmode);
1245 emit_insn (gen_addsi3 (op1, tmp, tmp2));
1246 break;
1248 default:
1249 gcc_unreachable ();
1251 operands[1] = op1;
1255 return 0;
1258 /* Prepare the operands for an scc instruction; make sure that the
1259 compare has been done. */
1261 prepare_scc_operands (enum rtx_code code)
1263 rtx t_reg = gen_rtx_REG (SImode, T_REG);
1264 enum rtx_code oldcode = code;
1265 enum machine_mode mode;
1267 /* First need a compare insn. */
1268 switch (code)
1270 case NE:
1271 /* It isn't possible to handle this case. */
1272 gcc_unreachable ();
1273 case LT:
1274 code = GT;
1275 break;
1276 case LE:
1277 code = GE;
1278 break;
1279 case LTU:
1280 code = GTU;
1281 break;
1282 case LEU:
1283 code = GEU;
1284 break;
1285 default:
1286 break;
1288 if (code != oldcode)
1290 rtx tmp = sh_compare_op0;
1291 sh_compare_op0 = sh_compare_op1;
1292 sh_compare_op1 = tmp;
1295 mode = GET_MODE (sh_compare_op0);
1296 if (mode == VOIDmode)
1297 mode = GET_MODE (sh_compare_op1);
1299 sh_compare_op0 = force_reg (mode, sh_compare_op0);
1300 if ((code != EQ && code != NE
1301 && (sh_compare_op1 != const0_rtx
1302 || code == GTU || code == GEU || code == LTU || code == LEU))
1303 || (mode == DImode && sh_compare_op1 != const0_rtx)
1304 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
1305 sh_compare_op1 = force_reg (mode, sh_compare_op1);
1307 if ((TARGET_SH4 || TARGET_SH2A) && GET_MODE_CLASS (mode) == MODE_FLOAT)
1308 (mode == SFmode ? emit_sf_insn : emit_df_insn)
1309 (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2,
1310 gen_rtx_SET (VOIDmode, t_reg,
1311 gen_rtx_fmt_ee (code, SImode,
1312 sh_compare_op0, sh_compare_op1)),
1313 gen_rtx_USE (VOIDmode, get_fpscr_rtx ()))));
1314 else
1315 emit_insn (gen_rtx_SET (VOIDmode, t_reg,
1316 gen_rtx_fmt_ee (code, SImode,
1317 sh_compare_op0, sh_compare_op1)));
1319 return t_reg;
1322 /* Called from the md file, set up the operands of a compare instruction. */
1324 void
1325 from_compare (rtx *operands, int code)
1327 enum machine_mode mode = GET_MODE (sh_compare_op0);
1328 rtx insn;
1329 if (mode == VOIDmode)
1330 mode = GET_MODE (sh_compare_op1);
1331 if (code != EQ
1332 || mode == DImode
1333 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
1335 /* Force args into regs, since we can't use constants here. */
1336 sh_compare_op0 = force_reg (mode, sh_compare_op0);
1337 if (sh_compare_op1 != const0_rtx
1338 || code == GTU || code == GEU
1339 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
1340 sh_compare_op1 = force_reg (mode, sh_compare_op1);
1342 if (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT && code == GE)
1344 from_compare (operands, GT);
1345 insn = gen_ieee_ccmpeqsf_t (sh_compare_op0, sh_compare_op1);
1347 else
1348 insn = gen_rtx_SET (VOIDmode,
1349 gen_rtx_REG (SImode, T_REG),
1350 gen_rtx_fmt_ee (code, SImode,
1351 sh_compare_op0, sh_compare_op1));
1352 if ((TARGET_SH4 || TARGET_SH2A) && GET_MODE_CLASS (mode) == MODE_FLOAT)
1354 insn = gen_rtx_PARALLEL (VOIDmode,
1355 gen_rtvec (2, insn,
1356 gen_rtx_USE (VOIDmode, get_fpscr_rtx ())));
1357 (mode == SFmode ? emit_sf_insn : emit_df_insn) (insn);
1359 else
1360 emit_insn (insn);
1363 /* Functions to output assembly code. */
1365 /* Return a sequence of instructions to perform DI or DF move.
1367 Since the SH cannot move a DI or DF in one instruction, we have
1368 to take care when we see overlapping source and dest registers. */
1370 const char *
1371 output_movedouble (rtx insn ATTRIBUTE_UNUSED, rtx operands[],
1372 enum machine_mode mode)
1374 rtx dst = operands[0];
1375 rtx src = operands[1];
1377 if (GET_CODE (dst) == MEM
1378 && GET_CODE (XEXP (dst, 0)) == PRE_DEC)
1379 return "mov.l %T1,%0\n\tmov.l %1,%0";
1381 if (register_operand (dst, mode)
1382 && register_operand (src, mode))
1384 if (REGNO (src) == MACH_REG)
1385 return "sts mach,%S0\n\tsts macl,%R0";
1387 /* When mov.d r1,r2 do r2->r3 then r1->r2;
1388 when mov.d r1,r0 do r1->r0 then r2->r1. */
1390 if (REGNO (src) + 1 == REGNO (dst))
1391 return "mov %T1,%T0\n\tmov %1,%0";
1392 else
1393 return "mov %1,%0\n\tmov %T1,%T0";
1395 else if (GET_CODE (src) == CONST_INT)
1397 if (INTVAL (src) < 0)
1398 output_asm_insn ("mov #-1,%S0", operands);
1399 else
1400 output_asm_insn ("mov #0,%S0", operands);
1402 return "mov %1,%R0";
1404 else if (GET_CODE (src) == MEM)
1406 int ptrreg = -1;
1407 int dreg = REGNO (dst);
1408 rtx inside = XEXP (src, 0);
1410 switch (GET_CODE (inside))
1412 case REG:
1413 ptrreg = REGNO (inside);
1414 break;
1416 case SUBREG:
1417 ptrreg = subreg_regno (inside);
1418 break;
1420 case PLUS:
1421 ptrreg = REGNO (XEXP (inside, 0));
1422 /* ??? A r0+REG address shouldn't be possible here, because it isn't
1423 an offsettable address. Unfortunately, offsettable addresses use
1424 QImode to check the offset, and a QImode offsettable address
1425 requires r0 for the other operand, which is not currently
1426 supported, so we can't use the 'o' constraint.
1427 Thus we must check for and handle r0+REG addresses here.
1428 We punt for now, since this is likely very rare. */
1429 gcc_assert (GET_CODE (XEXP (inside, 1)) != REG);
1430 break;
1432 case LABEL_REF:
1433 return "mov.l %1,%0\n\tmov.l %1+4,%T0";
1434 case POST_INC:
1435 return "mov.l %1,%0\n\tmov.l %1,%T0";
1436 default:
1437 gcc_unreachable ();
1440 /* Work out the safe way to copy. Copy into the second half first. */
1441 if (dreg == ptrreg)
1442 return "mov.l %T1,%T0\n\tmov.l %1,%0";
1445 return "mov.l %1,%0\n\tmov.l %T1,%T0";
1448 /* Print an instruction which would have gone into a delay slot after
1449 another instruction, but couldn't because the other instruction expanded
1450 into a sequence where putting the slot insn at the end wouldn't work. */
1452 static void
1453 print_slot (rtx insn)
1455 final_scan_insn (XVECEXP (insn, 0, 1), asm_out_file, optimize, 1, NULL);
1457 INSN_DELETED_P (XVECEXP (insn, 0, 1)) = 1;
1460 const char *
1461 output_far_jump (rtx insn, rtx op)
1463 struct { rtx lab, reg, op; } this;
1464 rtx braf_base_lab = NULL_RTX;
1465 const char *jump;
1466 int far;
1467 int offset = branch_dest (insn) - INSN_ADDRESSES (INSN_UID (insn));
1468 rtx prev;
1470 this.lab = gen_label_rtx ();
1472 if (TARGET_SH2
1473 && offset >= -32764
1474 && offset - get_attr_length (insn) <= 32766)
1476 far = 0;
1477 jump = "mov.w %O0,%1; braf %1";
1479 else
1481 far = 1;
1482 if (flag_pic)
1484 if (TARGET_SH2)
1485 jump = "mov.l %O0,%1; braf %1";
1486 else
1487 jump = "mov.l r0,@-r15; mova %O0,r0; mov.l @r0,%1; add r0,%1; mov.l @r15+,r0; jmp @%1";
1489 else
1490 jump = "mov.l %O0,%1; jmp @%1";
1492 /* If we have a scratch register available, use it. */
1493 if (GET_CODE ((prev = prev_nonnote_insn (insn))) == INSN
1494 && INSN_CODE (prev) == CODE_FOR_indirect_jump_scratch)
1496 this.reg = SET_DEST (XVECEXP (PATTERN (prev), 0, 0));
1497 if (REGNO (this.reg) == R0_REG && flag_pic && ! TARGET_SH2)
1498 jump = "mov.l r1,@-r15; mova %O0,r0; mov.l @r0,r1; add r1,r0; mov.l @r15+,r1; jmp @%1";
1499 output_asm_insn (jump, &this.lab);
1500 if (dbr_sequence_length ())
1501 print_slot (final_sequence);
1502 else
1503 output_asm_insn ("nop", 0);
1505 else
1507 /* Output the delay slot insn first if any. */
1508 if (dbr_sequence_length ())
1509 print_slot (final_sequence);
1511 this.reg = gen_rtx_REG (SImode, 13);
1512 /* We must keep the stack aligned to 8-byte boundaries on SH5.
1513 Fortunately, MACL is fixed and call-clobbered, and we never
1514 need its value across jumps, so save r13 in it instead of in
1515 the stack. */
1516 if (TARGET_SH5)
1517 output_asm_insn ("lds r13, macl", 0);
1518 else
1519 output_asm_insn ("mov.l r13,@-r15", 0);
1520 output_asm_insn (jump, &this.lab);
1521 if (TARGET_SH5)
1522 output_asm_insn ("sts macl, r13", 0);
1523 else
1524 output_asm_insn ("mov.l @r15+,r13", 0);
1526 if (far && flag_pic && TARGET_SH2)
1528 braf_base_lab = gen_label_rtx ();
1529 (*targetm.asm_out.internal_label) (asm_out_file, "L",
1530 CODE_LABEL_NUMBER (braf_base_lab));
1532 if (far)
1533 output_asm_insn (".align 2", 0);
1534 (*targetm.asm_out.internal_label) (asm_out_file, "L", CODE_LABEL_NUMBER (this.lab));
1535 this.op = op;
1536 if (far && flag_pic)
1538 if (TARGET_SH2)
1539 this.lab = braf_base_lab;
1540 output_asm_insn (".long %O2-%O0", &this.lab);
1542 else
1543 output_asm_insn (far ? ".long %O2" : ".word %O2-%O0", &this.lab);
1544 return "";
1547 /* Local label counter, used for constants in the pool and inside
1548 pattern branches. */
1550 static int lf = 100;
1552 /* Output code for ordinary branches. */
1554 const char *
1555 output_branch (int logic, rtx insn, rtx *operands)
1557 switch (get_attr_length (insn))
1559 case 6:
1560 /* This can happen if filling the delay slot has caused a forward
1561 branch to exceed its range (we could reverse it, but only
1562 when we know we won't overextend other branches; this should
1563 best be handled by relaxation).
1564 It can also happen when other condbranches hoist delay slot insn
1565 from their destination, thus leading to code size increase.
1566 But the branch will still be in the range -4092..+4098 bytes. */
1568 if (! TARGET_RELAX)
1570 int label = lf++;
1571 /* The call to print_slot will clobber the operands. */
1572 rtx op0 = operands[0];
1574 /* If the instruction in the delay slot is annulled (true), then
1575 there is no delay slot where we can put it now. The only safe
1576 place for it is after the label. final will do that by default. */
1578 if (final_sequence
1579 && ! INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))
1580 && get_attr_length (XVECEXP (final_sequence, 0, 1)))
1582 asm_fprintf (asm_out_file, "\tb%s%ss\t%LLF%d\n", logic ? "f" : "t",
1583 ASSEMBLER_DIALECT ? "/" : ".", label);
1584 print_slot (final_sequence);
1586 else
1587 asm_fprintf (asm_out_file, "\tb%s\t%LLF%d\n", logic ? "f" : "t", label);
1589 output_asm_insn ("bra\t%l0", &op0);
1590 fprintf (asm_out_file, "\tnop\n");
1591 (*targetm.asm_out.internal_label) (asm_out_file, "LF", label);
1593 return "";
1595 /* When relaxing, handle this like a short branch. The linker
1596 will fix it up if it still doesn't fit after relaxation. */
1597 case 2:
1598 return logic ? "bt%.\t%l0" : "bf%.\t%l0";
1600 /* These are for SH2e, in which we have to account for the
1601 extra nop because of the hardware bug in annulled branches. */
1602 case 8:
1603 if (! TARGET_RELAX)
1605 int label = lf++;
1607 gcc_assert (!final_sequence
1608 || !(INSN_ANNULLED_BRANCH_P
1609 (XVECEXP (final_sequence, 0, 0))));
1610 asm_fprintf (asm_out_file, "b%s%ss\t%LLF%d\n",
1611 logic ? "f" : "t",
1612 ASSEMBLER_DIALECT ? "/" : ".", label);
1613 fprintf (asm_out_file, "\tnop\n");
1614 output_asm_insn ("bra\t%l0", operands);
1615 fprintf (asm_out_file, "\tnop\n");
1616 (*targetm.asm_out.internal_label) (asm_out_file, "LF", label);
1618 return "";
1620 /* When relaxing, fall through. */
1621 case 4:
1623 char buffer[10];
1625 sprintf (buffer, "b%s%ss\t%%l0",
1626 logic ? "t" : "f",
1627 ASSEMBLER_DIALECT ? "/" : ".");
1628 output_asm_insn (buffer, &operands[0]);
1629 return "nop";
1632 default:
1633 /* There should be no longer branches now - that would
1634 indicate that something has destroyed the branches set
1635 up in machine_dependent_reorg. */
1636 gcc_unreachable ();
1640 const char *
1641 output_branchy_insn (enum rtx_code code, const char *template,
1642 rtx insn, rtx *operands)
1644 rtx next_insn = NEXT_INSN (insn);
1646 if (next_insn && GET_CODE (next_insn) == JUMP_INSN && condjump_p (next_insn))
1648 rtx src = SET_SRC (PATTERN (next_insn));
1649 if (GET_CODE (src) == IF_THEN_ELSE && GET_CODE (XEXP (src, 0)) != code)
1651 /* Following branch not taken */
1652 operands[9] = gen_label_rtx ();
1653 emit_label_after (operands[9], next_insn);
1654 INSN_ADDRESSES_NEW (operands[9],
1655 INSN_ADDRESSES (INSN_UID (next_insn))
1656 + get_attr_length (next_insn));
1657 return template;
1659 else
1661 int offset = (branch_dest (next_insn)
1662 - INSN_ADDRESSES (INSN_UID (next_insn)) + 4);
1663 if (offset >= -252 && offset <= 258)
1665 if (GET_CODE (src) == IF_THEN_ELSE)
1666 /* branch_true */
1667 src = XEXP (src, 1);
1668 operands[9] = src;
1669 return template;
1673 operands[9] = gen_label_rtx ();
1674 emit_label_after (operands[9], insn);
1675 INSN_ADDRESSES_NEW (operands[9],
1676 INSN_ADDRESSES (INSN_UID (insn))
1677 + get_attr_length (insn));
1678 return template;
1681 const char *
1682 output_ieee_ccmpeq (rtx insn, rtx *operands)
1684 return output_branchy_insn (NE, "bt\t%l9\\;fcmp/eq\t%1,%0", insn, operands);
1687 /* Output the start of the assembler file. */
1689 static void
1690 sh_file_start (void)
1692 default_file_start ();
1694 #ifdef SYMBIAN
1695 /* Declare the .directive section before it is used. */
1696 fputs ("\t.section .directive, \"SM\", @progbits, 1\n", asm_out_file);
1697 fputs ("\t.asciz \"#<SYMEDIT>#\\n\"\n", asm_out_file);
1698 #endif
1700 if (TARGET_ELF)
1701 /* We need to show the text section with the proper
1702 attributes as in TEXT_SECTION_ASM_OP, before dwarf2out
1703 emits it without attributes in TEXT_SECTION_ASM_OP, else GAS
1704 will complain. We can teach GAS specifically about the
1705 default attributes for our choice of text section, but
1706 then we would have to change GAS again if/when we change
1707 the text section name. */
1708 fprintf (asm_out_file, "%s\n", TEXT_SECTION_ASM_OP);
1709 else
1710 /* Switch to the data section so that the coffsem symbol
1711 isn't in the text section. */
1712 data_section ();
1714 if (TARGET_LITTLE_ENDIAN)
1715 fputs ("\t.little\n", asm_out_file);
1717 if (!TARGET_ELF)
1719 if (TARGET_SHCOMPACT)
1720 fputs ("\t.mode\tSHcompact\n", asm_out_file);
1721 else if (TARGET_SHMEDIA)
1722 fprintf (asm_out_file, "\t.mode\tSHmedia\n\t.abi\t%i\n",
1723 TARGET_SHMEDIA64 ? 64 : 32);
1727 /* Check if PAT includes UNSPEC_CALLER unspec pattern. */
1729 static bool
1730 unspec_caller_rtx_p (rtx pat)
1732 switch (GET_CODE (pat))
1734 case CONST:
1735 return unspec_caller_rtx_p (XEXP (pat, 0));
1736 case PLUS:
1737 case MINUS:
1738 if (unspec_caller_rtx_p (XEXP (pat, 0)))
1739 return true;
1740 return unspec_caller_rtx_p (XEXP (pat, 1));
1741 case UNSPEC:
1742 if (XINT (pat, 1) == UNSPEC_CALLER)
1743 return true;
1744 default:
1745 break;
1748 return false;
1751 /* Indicate that INSN cannot be duplicated. This is true for insn
1752 that generates an unique label. */
1754 static bool
1755 sh_cannot_copy_insn_p (rtx insn)
1757 rtx pat;
1759 if (!reload_completed || !flag_pic)
1760 return false;
1762 if (GET_CODE (insn) != INSN)
1763 return false;
1764 if (asm_noperands (insn) >= 0)
1765 return false;
1767 pat = PATTERN (insn);
1768 if (GET_CODE (pat) != SET)
1769 return false;
1770 pat = SET_SRC (pat);
1772 if (unspec_caller_rtx_p (pat))
1773 return true;
1775 return false;
1778 /* Actual number of instructions used to make a shift by N. */
1779 static const char ashiftrt_insns[] =
1780 { 0,1,2,3,4,5,8,8,8,8,8,8,8,8,8,8,2,3,4,5,8,8,8,8,8,8,8,8,8,8,8,2};
1782 /* Left shift and logical right shift are the same. */
1783 static const char shift_insns[] =
1784 { 0,1,1,2,2,3,3,4,1,2,2,3,3,4,3,3,1,2,2,3,3,4,3,3,2,3,3,4,4,4,3,3};
1786 /* Individual shift amounts needed to get the above length sequences.
1787 One bit right shifts clobber the T bit, so when possible, put one bit
1788 shifts in the middle of the sequence, so the ends are eligible for
1789 branch delay slots. */
1790 static const short shift_amounts[32][5] = {
1791 {0}, {1}, {2}, {2, 1},
1792 {2, 2}, {2, 1, 2}, {2, 2, 2}, {2, 2, 1, 2},
1793 {8}, {8, 1}, {8, 2}, {8, 1, 2},
1794 {8, 2, 2}, {8, 2, 1, 2}, {8, -2, 8}, {8, -1, 8},
1795 {16}, {16, 1}, {16, 2}, {16, 1, 2},
1796 {16, 2, 2}, {16, 2, 1, 2}, {16, -2, 8}, {16, -1, 8},
1797 {16, 8}, {16, 1, 8}, {16, 8, 2}, {16, 8, 1, 2},
1798 {16, 8, 2, 2}, {16, -1, -2, 16}, {16, -2, 16}, {16, -1, 16}};
1800 /* Likewise, but for shift amounts < 16, up to three highmost bits
1801 might be clobbered. This is typically used when combined with some
1802 kind of sign or zero extension. */
1804 static const char ext_shift_insns[] =
1805 { 0,1,1,2,2,3,2,2,1,2,2,3,3,3,2,2,1,2,2,3,3,4,3,3,2,3,3,4,4,4,3,3};
1807 static const short ext_shift_amounts[32][4] = {
1808 {0}, {1}, {2}, {2, 1},
1809 {2, 2}, {2, 1, 2}, {8, -2}, {8, -1},
1810 {8}, {8, 1}, {8, 2}, {8, 1, 2},
1811 {8, 2, 2}, {16, -2, -1}, {16, -2}, {16, -1},
1812 {16}, {16, 1}, {16, 2}, {16, 1, 2},
1813 {16, 2, 2}, {16, 2, 1, 2}, {16, -2, 8}, {16, -1, 8},
1814 {16, 8}, {16, 1, 8}, {16, 8, 2}, {16, 8, 1, 2},
1815 {16, 8, 2, 2}, {16, -1, -2, 16}, {16, -2, 16}, {16, -1, 16}};
1817 /* Assuming we have a value that has been sign-extended by at least one bit,
1818 can we use the ext_shift_amounts with the last shift turned to an arithmetic shift
1819 to shift it by N without data loss, and quicker than by other means? */
1820 #define EXT_SHIFT_SIGNED(n) (((n) | 8) == 15)
1822 /* This is used in length attributes in sh.md to help compute the length
1823 of arbitrary constant shift instructions. */
1826 shift_insns_rtx (rtx insn)
1828 rtx set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
1829 int shift_count = INTVAL (XEXP (set_src, 1));
1830 enum rtx_code shift_code = GET_CODE (set_src);
1832 switch (shift_code)
1834 case ASHIFTRT:
1835 return ashiftrt_insns[shift_count];
1836 case LSHIFTRT:
1837 case ASHIFT:
1838 return shift_insns[shift_count];
1839 default:
1840 gcc_unreachable ();
1844 /* Return the cost of a shift. */
1846 static inline int
1847 shiftcosts (rtx x)
1849 int value;
1851 if (TARGET_SHMEDIA)
1852 return 1;
1854 if (GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD)
1856 if (GET_MODE (x) == DImode
1857 && GET_CODE (XEXP (x, 1)) == CONST_INT
1858 && INTVAL (XEXP (x, 1)) == 1)
1859 return 2;
1861 /* Everything else is invalid, because there is no pattern for it. */
1862 return 10000;
1864 /* If shift by a non constant, then this will be expensive. */
1865 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
1866 return SH_DYNAMIC_SHIFT_COST;
1868 value = INTVAL (XEXP (x, 1));
1870 /* Otherwise, return the true cost in instructions. */
1871 if (GET_CODE (x) == ASHIFTRT)
1873 int cost = ashiftrt_insns[value];
1874 /* If SH3, then we put the constant in a reg and use shad. */
1875 if (cost > 1 + SH_DYNAMIC_SHIFT_COST)
1876 cost = 1 + SH_DYNAMIC_SHIFT_COST;
1877 return cost;
1879 else
1880 return shift_insns[value];
1883 /* Return the cost of an AND operation. */
1885 static inline int
1886 andcosts (rtx x)
1888 int i;
1890 /* Anding with a register is a single cycle and instruction. */
1891 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
1892 return 1;
1894 i = INTVAL (XEXP (x, 1));
1896 if (TARGET_SHMEDIA)
1898 if ((GET_CODE (XEXP (x, 1)) == CONST_INT
1899 && CONST_OK_FOR_I16 (INTVAL (XEXP (x, 1))))
1900 || EXTRA_CONSTRAINT_C16 (XEXP (x, 1)))
1901 return 1;
1902 else
1903 return 2;
1906 /* These constants are single cycle extu.[bw] instructions. */
1907 if (i == 0xff || i == 0xffff)
1908 return 1;
1909 /* Constants that can be used in an and immediate instruction in a single
1910 cycle, but this requires r0, so make it a little more expensive. */
1911 if (CONST_OK_FOR_K08 (i))
1912 return 2;
1913 /* Constants that can be loaded with a mov immediate and an and.
1914 This case is probably unnecessary. */
1915 if (CONST_OK_FOR_I08 (i))
1916 return 2;
1917 /* Any other constants requires a 2 cycle pc-relative load plus an and.
1918 This case is probably unnecessary. */
1919 return 3;
1922 /* Return the cost of an addition or a subtraction. */
1924 static inline int
1925 addsubcosts (rtx x)
1927 /* Adding a register is a single cycle insn. */
1928 if (GET_CODE (XEXP (x, 1)) == REG
1929 || GET_CODE (XEXP (x, 1)) == SUBREG)
1930 return 1;
1932 /* Likewise for small constants. */
1933 if (GET_CODE (XEXP (x, 1)) == CONST_INT
1934 && CONST_OK_FOR_ADD (INTVAL (XEXP (x, 1))))
1935 return 1;
1937 if (TARGET_SHMEDIA)
1938 switch (GET_CODE (XEXP (x, 1)))
1940 case CONST:
1941 case LABEL_REF:
1942 case SYMBOL_REF:
1943 return TARGET_SHMEDIA64 ? 5 : 3;
1945 case CONST_INT:
1946 if (CONST_OK_FOR_I16 (INTVAL (XEXP (x, 1))))
1947 return 2;
1948 else if (CONST_OK_FOR_I16 (INTVAL (XEXP (x, 1)) >> 16))
1949 return 3;
1950 else if (CONST_OK_FOR_I16 ((INTVAL (XEXP (x, 1)) >> 16) >> 16))
1951 return 4;
1953 /* Fall through. */
1954 default:
1955 return 5;
1958 /* Any other constant requires a 2 cycle pc-relative load plus an
1959 addition. */
1960 return 3;
1963 /* Return the cost of a multiply. */
1964 static inline int
1965 multcosts (rtx x ATTRIBUTE_UNUSED)
1967 if (*sh_multcost_str)
1968 return atoi (sh_multcost_str);
1969 if (TARGET_SHMEDIA)
1970 /* ??? We have a mul insn, but it has a latency of three, and doesn't
1971 accept constants. Ideally, we would use a cost of one or two and
1972 add the cost of the operand, but disregard the latter when inside loops
1973 and loop invariant code motion is still to follow.
1974 Using a multiply first and splitting it later if it's a loss
1975 doesn't work because of different sign / zero extension semantics
1976 of multiplies vs. shifts. */
1977 return TARGET_SMALLCODE ? 2 : 3;
1979 if (TARGET_SH2)
1981 /* We have a mul insn, so we can never take more than the mul and the
1982 read of the mac reg, but count more because of the latency and extra
1983 reg usage. */
1984 if (TARGET_SMALLCODE)
1985 return 2;
1986 return 3;
1989 /* If we're aiming at small code, then just count the number of
1990 insns in a multiply call sequence. */
1991 if (TARGET_SMALLCODE)
1992 return 5;
1994 /* Otherwise count all the insns in the routine we'd be calling too. */
1995 return 20;
1998 /* Compute a (partial) cost for rtx X. Return true if the complete
1999 cost has been computed, and false if subexpressions should be
2000 scanned. In either case, *TOTAL contains the cost result. */
2002 static bool
2003 sh_rtx_costs (rtx x, int code, int outer_code, int *total)
2005 switch (code)
2007 case CONST_INT:
2008 if (TARGET_SHMEDIA)
2010 if (INTVAL (x) == 0)
2011 *total = 0;
2012 else if (outer_code == AND && and_operand ((x), DImode))
2013 *total = 0;
2014 else if ((outer_code == IOR || outer_code == XOR
2015 || outer_code == PLUS)
2016 && CONST_OK_FOR_I10 (INTVAL (x)))
2017 *total = 0;
2018 else if (CONST_OK_FOR_I16 (INTVAL (x)))
2019 *total = COSTS_N_INSNS (outer_code != SET);
2020 else if (CONST_OK_FOR_I16 (INTVAL (x) >> 16))
2021 *total = COSTS_N_INSNS ((outer_code != SET) + 1);
2022 else if (CONST_OK_FOR_I16 ((INTVAL (x) >> 16) >> 16))
2023 *total = COSTS_N_INSNS (3);
2024 else
2025 *total = COSTS_N_INSNS (4);
2026 return true;
2028 if (CONST_OK_FOR_I08 (INTVAL (x)))
2029 *total = 0;
2030 else if ((outer_code == AND || outer_code == IOR || outer_code == XOR)
2031 && CONST_OK_FOR_K08 (INTVAL (x)))
2032 *total = 1;
2033 else
2034 *total = 8;
2035 return true;
2037 case CONST:
2038 case LABEL_REF:
2039 case SYMBOL_REF:
2040 if (TARGET_SHMEDIA64)
2041 *total = COSTS_N_INSNS (4);
2042 else if (TARGET_SHMEDIA32)
2043 *total = COSTS_N_INSNS (2);
2044 else
2045 *total = 5;
2046 return true;
2048 case CONST_DOUBLE:
2049 if (TARGET_SHMEDIA)
2050 *total = COSTS_N_INSNS (4);
2051 else
2052 *total = 10;
2053 return true;
2054 case CONST_VECTOR:
2055 if (x == CONST0_RTX (GET_MODE (x)))
2056 *total = 0;
2057 else if (sh_1el_vec (x, VOIDmode))
2058 *total = outer_code != SET;
2059 if (sh_rep_vec (x, VOIDmode))
2060 *total = ((GET_MODE_UNIT_SIZE (GET_MODE (x)) + 3) / 4
2061 + (outer_code != SET));
2062 *total = COSTS_N_INSNS (3) + (outer_code != SET);
2063 return true;
2065 case PLUS:
2066 case MINUS:
2067 *total = COSTS_N_INSNS (addsubcosts (x));
2068 return true;
2070 case AND:
2071 *total = COSTS_N_INSNS (andcosts (x));
2072 return true;
2074 case MULT:
2075 *total = COSTS_N_INSNS (multcosts (x));
2076 return true;
2078 case ASHIFT:
2079 case ASHIFTRT:
2080 case LSHIFTRT:
2081 *total = COSTS_N_INSNS (shiftcosts (x));
2082 return true;
2084 case DIV:
2085 case UDIV:
2086 case MOD:
2087 case UMOD:
2088 *total = COSTS_N_INSNS (20);
2089 return true;
2091 case PARALLEL:
2092 if (sh_1el_vec (x, VOIDmode))
2093 *total = outer_code != SET;
2094 if (sh_rep_vec (x, VOIDmode))
2095 *total = ((GET_MODE_UNIT_SIZE (GET_MODE (x)) + 3) / 4
2096 + (outer_code != SET));
2097 *total = COSTS_N_INSNS (3) + (outer_code != SET);
2098 return true;
2100 case FLOAT:
2101 case FIX:
2102 *total = 100;
2103 return true;
2105 default:
2106 return false;
2110 /* Compute the cost of an address. For the SH, all valid addresses are
2111 the same cost. Use a slightly higher cost for reg + reg addressing,
2112 since it increases pressure on r0. */
2114 static int
2115 sh_address_cost (rtx X)
2117 return (GET_CODE (X) == PLUS
2118 && ! CONSTANT_P (XEXP (X, 1))
2119 && ! TARGET_SHMEDIA ? 1 : 0);
2122 /* Code to expand a shift. */
2124 void
2125 gen_ashift (int type, int n, rtx reg)
2127 /* Negative values here come from the shift_amounts array. */
2128 if (n < 0)
2130 if (type == ASHIFT)
2131 type = LSHIFTRT;
2132 else
2133 type = ASHIFT;
2134 n = -n;
2137 switch (type)
2139 case ASHIFTRT:
2140 emit_insn (gen_ashrsi3_k (reg, reg, GEN_INT (n)));
2141 break;
2142 case LSHIFTRT:
2143 if (n == 1)
2144 emit_insn (gen_lshrsi3_m (reg, reg, GEN_INT (n)));
2145 else
2146 emit_insn (gen_lshrsi3_k (reg, reg, GEN_INT (n)));
2147 break;
2148 case ASHIFT:
2149 emit_insn (gen_ashlsi3_std (reg, reg, GEN_INT (n)));
2150 break;
2154 /* Same for HImode */
2156 void
2157 gen_ashift_hi (int type, int n, rtx reg)
2159 /* Negative values here come from the shift_amounts array. */
2160 if (n < 0)
2162 if (type == ASHIFT)
2163 type = LSHIFTRT;
2164 else
2165 type = ASHIFT;
2166 n = -n;
2169 switch (type)
2171 case ASHIFTRT:
2172 case LSHIFTRT:
2173 /* We don't have HImode right shift operations because using the
2174 ordinary 32 bit shift instructions for that doesn't generate proper
2175 zero/sign extension.
2176 gen_ashift_hi is only called in contexts where we know that the
2177 sign extension works out correctly. */
2179 int offset = 0;
2180 if (GET_CODE (reg) == SUBREG)
2182 offset = SUBREG_BYTE (reg);
2183 reg = SUBREG_REG (reg);
2185 gen_ashift (type, n, gen_rtx_SUBREG (SImode, reg, offset));
2186 break;
2188 case ASHIFT:
2189 emit_insn (gen_ashlhi3_k (reg, reg, GEN_INT (n)));
2190 break;
2194 /* Output RTL to split a constant shift into its component SH constant
2195 shift instructions. */
2197 void
2198 gen_shifty_op (int code, rtx *operands)
2200 int value = INTVAL (operands[2]);
2201 int max, i;
2203 /* Truncate the shift count in case it is out of bounds. */
2204 value = value & 0x1f;
2206 if (value == 31)
2208 if (code == LSHIFTRT)
2210 emit_insn (gen_rotlsi3_1 (operands[0], operands[0]));
2211 emit_insn (gen_movt (operands[0]));
2212 return;
2214 else if (code == ASHIFT)
2216 /* There is a two instruction sequence for 31 bit left shifts,
2217 but it requires r0. */
2218 if (GET_CODE (operands[0]) == REG && REGNO (operands[0]) == 0)
2220 emit_insn (gen_andsi3 (operands[0], operands[0], const1_rtx));
2221 emit_insn (gen_rotlsi3_31 (operands[0], operands[0]));
2222 return;
2226 else if (value == 0)
2228 /* This can happen even when optimizing, if there were subregs before
2229 reload. Don't output a nop here, as this is never optimized away;
2230 use a no-op move instead. */
2231 emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[0]));
2232 return;
2235 max = shift_insns[value];
2236 for (i = 0; i < max; i++)
2237 gen_ashift (code, shift_amounts[value][i], operands[0]);
2240 /* Same as above, but optimized for values where the topmost bits don't
2241 matter. */
2243 void
2244 gen_shifty_hi_op (int code, rtx *operands)
2246 int value = INTVAL (operands[2]);
2247 int max, i;
2248 void (*gen_fun) (int, int, rtx);
2250 /* This operation is used by and_shl for SImode values with a few
2251 high bits known to be cleared. */
2252 value &= 31;
2253 if (value == 0)
2255 emit_insn (gen_nop ());
2256 return;
2259 gen_fun = GET_MODE (operands[0]) == HImode ? gen_ashift_hi : gen_ashift;
2260 if (code == ASHIFT)
2262 max = ext_shift_insns[value];
2263 for (i = 0; i < max; i++)
2264 gen_fun (code, ext_shift_amounts[value][i], operands[0]);
2266 else
2267 /* When shifting right, emit the shifts in reverse order, so that
2268 solitary negative values come first. */
2269 for (i = ext_shift_insns[value] - 1; i >= 0; i--)
2270 gen_fun (code, ext_shift_amounts[value][i], operands[0]);
2273 /* Output RTL for an arithmetic right shift. */
2275 /* ??? Rewrite to use super-optimizer sequences. */
2278 expand_ashiftrt (rtx *operands)
2280 rtx wrk;
2281 char func[18];
2282 int value;
2284 if (TARGET_SH3)
2286 if (GET_CODE (operands[2]) != CONST_INT)
2288 rtx count = copy_to_mode_reg (SImode, operands[2]);
2289 emit_insn (gen_negsi2 (count, count));
2290 emit_insn (gen_ashrsi3_d (operands[0], operands[1], count));
2291 return 1;
2293 else if (ashiftrt_insns[INTVAL (operands[2]) & 31]
2294 > 1 + SH_DYNAMIC_SHIFT_COST)
2296 rtx count
2297 = force_reg (SImode, GEN_INT (- (INTVAL (operands[2]) & 31)));
2298 emit_insn (gen_ashrsi3_d (operands[0], operands[1], count));
2299 return 1;
2302 if (GET_CODE (operands[2]) != CONST_INT)
2303 return 0;
2305 value = INTVAL (operands[2]) & 31;
2307 if (value == 31)
2309 /* If we are called from abs expansion, arrange things so that we
2310 we can use a single MT instruction that doesn't clobber the source,
2311 if LICM can hoist out the load of the constant zero. */
2312 if (currently_expanding_to_rtl)
2314 emit_insn (gen_cmpgtsi_t (force_reg (SImode, CONST0_RTX (SImode)),
2315 operands[1]));
2316 emit_insn (gen_mov_neg_si_t (operands[0]));
2317 return 1;
2319 emit_insn (gen_ashrsi2_31 (operands[0], operands[1]));
2320 return 1;
2322 else if (value >= 16 && value <= 19)
2324 wrk = gen_reg_rtx (SImode);
2325 emit_insn (gen_ashrsi2_16 (wrk, operands[1]));
2326 value -= 16;
2327 while (value--)
2328 gen_ashift (ASHIFTRT, 1, wrk);
2329 emit_move_insn (operands[0], wrk);
2330 return 1;
2332 /* Expand a short sequence inline, longer call a magic routine. */
2333 else if (value <= 5)
2335 wrk = gen_reg_rtx (SImode);
2336 emit_move_insn (wrk, operands[1]);
2337 while (value--)
2338 gen_ashift (ASHIFTRT, 1, wrk);
2339 emit_move_insn (operands[0], wrk);
2340 return 1;
2343 wrk = gen_reg_rtx (Pmode);
2345 /* Load the value into an arg reg and call a helper. */
2346 emit_move_insn (gen_rtx_REG (SImode, 4), operands[1]);
2347 sprintf (func, "__ashiftrt_r4_%d", value);
2348 function_symbol (wrk, func, SFUNC_STATIC);
2349 emit_insn (gen_ashrsi3_n (GEN_INT (value), wrk));
2350 emit_move_insn (operands[0], gen_rtx_REG (SImode, 4));
2351 return 1;
2355 sh_dynamicalize_shift_p (rtx count)
2357 return shift_insns[INTVAL (count)] > 1 + SH_DYNAMIC_SHIFT_COST;
2360 /* Try to find a good way to implement the combiner pattern
2361 [(set (match_operand:SI 0 "register_operand" "r")
2362 (and:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
2363 (match_operand:SI 2 "const_int_operand" "n"))
2364 (match_operand:SI 3 "const_int_operand" "n"))) .
2365 LEFT_RTX is operand 2 in the above pattern, and MASK_RTX is operand 3.
2366 return 0 for simple right / left or left/right shift combination.
2367 return 1 for a combination of shifts with zero_extend.
2368 return 2 for a combination of shifts with an AND that needs r0.
2369 return 3 for a combination of shifts with an AND that needs an extra
2370 scratch register, when the three highmost bits of the AND mask are clear.
2371 return 4 for a combination of shifts with an AND that needs an extra
2372 scratch register, when any of the three highmost bits of the AND mask
2373 is set.
2374 If ATTRP is set, store an initial right shift width in ATTRP[0],
2375 and the instruction length in ATTRP[1] . These values are not valid
2376 when returning 0.
2377 When ATTRP is set and returning 1, ATTRP[2] gets set to the index into
2378 shift_amounts for the last shift value that is to be used before the
2379 sign extend. */
2381 shl_and_kind (rtx left_rtx, rtx mask_rtx, int *attrp)
2383 unsigned HOST_WIDE_INT mask, lsb, mask2, lsb2;
2384 int left = INTVAL (left_rtx), right;
2385 int best = 0;
2386 int cost, best_cost = 10000;
2387 int best_right = 0, best_len = 0;
2388 int i;
2389 int can_ext;
2391 if (left < 0 || left > 31)
2392 return 0;
2393 if (GET_CODE (mask_rtx) == CONST_INT)
2394 mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> left;
2395 else
2396 mask = (unsigned HOST_WIDE_INT) GET_MODE_MASK (SImode) >> left;
2397 /* Can this be expressed as a right shift / left shift pair? */
2398 lsb = ((mask ^ (mask - 1)) >> 1) + 1;
2399 right = exact_log2 (lsb);
2400 mask2 = ~(mask + lsb - 1);
2401 lsb2 = ((mask2 ^ (mask2 - 1)) >> 1) + 1;
2402 /* mask has no zeroes but trailing zeroes <==> ! mask2 */
2403 if (! mask2)
2404 best_cost = shift_insns[right] + shift_insns[right + left];
2405 /* mask has no trailing zeroes <==> ! right */
2406 else if (! right && mask2 == ~(lsb2 - 1))
2408 int late_right = exact_log2 (lsb2);
2409 best_cost = shift_insns[left + late_right] + shift_insns[late_right];
2411 /* Try to use zero extend. */
2412 if (mask2 == ~(lsb2 - 1))
2414 int width, first;
2416 for (width = 8; width <= 16; width += 8)
2418 /* Can we zero-extend right away? */
2419 if (lsb2 == (unsigned HOST_WIDE_INT) 1 << width)
2421 cost
2422 = 1 + ext_shift_insns[right] + ext_shift_insns[left + right];
2423 if (cost < best_cost)
2425 best = 1;
2426 best_cost = cost;
2427 best_right = right;
2428 best_len = cost;
2429 if (attrp)
2430 attrp[2] = -1;
2432 continue;
2434 /* ??? Could try to put zero extend into initial right shift,
2435 or even shift a bit left before the right shift. */
2436 /* Determine value of first part of left shift, to get to the
2437 zero extend cut-off point. */
2438 first = width - exact_log2 (lsb2) + right;
2439 if (first >= 0 && right + left - first >= 0)
2441 cost = ext_shift_insns[right] + ext_shift_insns[first] + 1
2442 + ext_shift_insns[right + left - first];
2443 if (cost < best_cost)
2445 best = 1;
2446 best_cost = cost;
2447 best_right = right;
2448 best_len = cost;
2449 if (attrp)
2450 attrp[2] = first;
2455 /* Try to use r0 AND pattern */
2456 for (i = 0; i <= 2; i++)
2458 if (i > right)
2459 break;
2460 if (! CONST_OK_FOR_K08 (mask >> i))
2461 continue;
2462 cost = (i != 0) + 2 + ext_shift_insns[left + i];
2463 if (cost < best_cost)
2465 best = 2;
2466 best_cost = cost;
2467 best_right = i;
2468 best_len = cost - 1;
2471 /* Try to use a scratch register to hold the AND operand. */
2472 can_ext = ((mask << left) & ((unsigned HOST_WIDE_INT) 3 << 30)) == 0;
2473 for (i = 0; i <= 2; i++)
2475 if (i > right)
2476 break;
2477 cost = (i != 0) + (CONST_OK_FOR_I08 (mask >> i) ? 2 : 3)
2478 + (can_ext ? ext_shift_insns : shift_insns)[left + i];
2479 if (cost < best_cost)
2481 best = 4 - can_ext;
2482 best_cost = cost;
2483 best_right = i;
2484 best_len = cost - 1 - ! CONST_OK_FOR_I08 (mask >> i);
2488 if (attrp)
2490 attrp[0] = best_right;
2491 attrp[1] = best_len;
2493 return best;
2496 /* This is used in length attributes of the unnamed instructions
2497 corresponding to shl_and_kind return values of 1 and 2. */
2499 shl_and_length (rtx insn)
2501 rtx set_src, left_rtx, mask_rtx;
2502 int attributes[3];
2504 set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
2505 left_rtx = XEXP (XEXP (set_src, 0), 1);
2506 mask_rtx = XEXP (set_src, 1);
2507 shl_and_kind (left_rtx, mask_rtx, attributes);
2508 return attributes[1];
2511 /* This is used in length attribute of the and_shl_scratch instruction. */
2514 shl_and_scr_length (rtx insn)
2516 rtx set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
2517 int len = shift_insns[INTVAL (XEXP (set_src, 1))];
2518 rtx op = XEXP (set_src, 0);
2519 len += shift_insns[INTVAL (XEXP (op, 1))] + 1;
2520 op = XEXP (XEXP (op, 0), 0);
2521 return len + shift_insns[INTVAL (XEXP (op, 1))];
2524 /* Generate rtl for instructions for which shl_and_kind advised a particular
2525 method of generating them, i.e. returned zero. */
2528 gen_shl_and (rtx dest, rtx left_rtx, rtx mask_rtx, rtx source)
2530 int attributes[3];
2531 unsigned HOST_WIDE_INT mask;
2532 int kind = shl_and_kind (left_rtx, mask_rtx, attributes);
2533 int right, total_shift;
2534 void (*shift_gen_fun) (int, rtx *) = gen_shifty_hi_op;
2536 right = attributes[0];
2537 total_shift = INTVAL (left_rtx) + right;
2538 mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> total_shift;
2539 switch (kind)
2541 default:
2542 return -1;
2543 case 1:
2545 int first = attributes[2];
2546 rtx operands[3];
2548 if (first < 0)
2550 emit_insn ((mask << right) <= 0xff
2551 ? gen_zero_extendqisi2 (dest,
2552 gen_lowpart (QImode, source))
2553 : gen_zero_extendhisi2 (dest,
2554 gen_lowpart (HImode, source)));
2555 source = dest;
2557 if (source != dest)
2558 emit_insn (gen_movsi (dest, source));
2559 operands[0] = dest;
2560 if (right)
2562 operands[2] = GEN_INT (right);
2563 gen_shifty_hi_op (LSHIFTRT, operands);
2565 if (first > 0)
2567 operands[2] = GEN_INT (first);
2568 gen_shifty_hi_op (ASHIFT, operands);
2569 total_shift -= first;
2570 mask <<= first;
2572 if (first >= 0)
2573 emit_insn (mask <= 0xff
2574 ? gen_zero_extendqisi2 (dest, gen_lowpart (QImode, dest))
2575 : gen_zero_extendhisi2 (dest, gen_lowpart (HImode, dest)));
2576 if (total_shift > 0)
2578 operands[2] = GEN_INT (total_shift);
2579 gen_shifty_hi_op (ASHIFT, operands);
2581 break;
2583 case 4:
2584 shift_gen_fun = gen_shifty_op;
2585 case 3:
2586 /* If the topmost bit that matters is set, set the topmost bits
2587 that don't matter. This way, we might be able to get a shorter
2588 signed constant. */
2589 if (mask & ((HOST_WIDE_INT) 1 << (31 - total_shift)))
2590 mask |= (HOST_WIDE_INT) ~0 << (31 - total_shift);
2591 case 2:
2592 /* Don't expand fine-grained when combining, because that will
2593 make the pattern fail. */
2594 if (currently_expanding_to_rtl
2595 || reload_in_progress || reload_completed)
2597 rtx operands[3];
2599 /* Cases 3 and 4 should be handled by this split
2600 only while combining */
2601 gcc_assert (kind <= 2);
2602 if (right)
2604 emit_insn (gen_lshrsi3 (dest, source, GEN_INT (right)));
2605 source = dest;
2607 emit_insn (gen_andsi3 (dest, source, GEN_INT (mask)));
2608 if (total_shift)
2610 operands[0] = dest;
2611 operands[1] = dest;
2612 operands[2] = GEN_INT (total_shift);
2613 shift_gen_fun (ASHIFT, operands);
2615 break;
2617 else
2619 int neg = 0;
2620 if (kind != 4 && total_shift < 16)
2622 neg = -ext_shift_amounts[total_shift][1];
2623 if (neg > 0)
2624 neg -= ext_shift_amounts[total_shift][2];
2625 else
2626 neg = 0;
2628 emit_insn (gen_and_shl_scratch (dest, source,
2629 GEN_INT (right),
2630 GEN_INT (mask),
2631 GEN_INT (total_shift + neg),
2632 GEN_INT (neg)));
2633 emit_insn (gen_movsi (dest, dest));
2634 break;
2637 return 0;
2640 /* Try to find a good way to implement the combiner pattern
2641 [(set (match_operand:SI 0 "register_operand" "=r")
2642 (sign_extract:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
2643 (match_operand:SI 2 "const_int_operand" "n")
2644 (match_operand:SI 3 "const_int_operand" "n")
2645 (const_int 0)))
2646 (clobber (reg:SI T_REG))]
2647 LEFT_RTX is operand 2 in the above pattern, and SIZE_RTX is operand 3.
2648 return 0 for simple left / right shift combination.
2649 return 1 for left shift / 8 bit sign extend / left shift.
2650 return 2 for left shift / 16 bit sign extend / left shift.
2651 return 3 for left shift / 8 bit sign extend / shift / sign extend.
2652 return 4 for left shift / 16 bit sign extend / shift / sign extend.
2653 return 5 for left shift / 16 bit sign extend / right shift
2654 return 6 for < 8 bit sign extend / left shift.
2655 return 7 for < 8 bit sign extend / left shift / single right shift.
2656 If COSTP is nonzero, assign the calculated cost to *COSTP. */
2659 shl_sext_kind (rtx left_rtx, rtx size_rtx, int *costp)
2661 int left, size, insize, ext;
2662 int cost = 0, best_cost;
2663 int kind;
2665 left = INTVAL (left_rtx);
2666 size = INTVAL (size_rtx);
2667 insize = size - left;
2668 gcc_assert (insize > 0);
2669 /* Default to left / right shift. */
2670 kind = 0;
2671 best_cost = shift_insns[32 - insize] + ashiftrt_insns[32 - size];
2672 if (size <= 16)
2674 /* 16 bit shift / sign extend / 16 bit shift */
2675 cost = shift_insns[16 - insize] + 1 + ashiftrt_insns[16 - size];
2676 /* If ashiftrt_insns[16 - size] is 8, this choice will be overridden
2677 below, by alternative 3 or something even better. */
2678 if (cost < best_cost)
2680 kind = 5;
2681 best_cost = cost;
2684 /* Try a plain sign extend between two shifts. */
2685 for (ext = 16; ext >= insize; ext -= 8)
2687 if (ext <= size)
2689 cost = ext_shift_insns[ext - insize] + 1 + shift_insns[size - ext];
2690 if (cost < best_cost)
2692 kind = ext / (unsigned) 8;
2693 best_cost = cost;
2696 /* Check if we can do a sloppy shift with a final signed shift
2697 restoring the sign. */
2698 if (EXT_SHIFT_SIGNED (size - ext))
2699 cost = ext_shift_insns[ext - insize] + ext_shift_insns[size - ext] + 1;
2700 /* If not, maybe it's still cheaper to do the second shift sloppy,
2701 and do a final sign extend? */
2702 else if (size <= 16)
2703 cost = ext_shift_insns[ext - insize] + 1
2704 + ext_shift_insns[size > ext ? size - ext : ext - size] + 1;
2705 else
2706 continue;
2707 if (cost < best_cost)
2709 kind = ext / (unsigned) 8 + 2;
2710 best_cost = cost;
2713 /* Check if we can sign extend in r0 */
2714 if (insize < 8)
2716 cost = 3 + shift_insns[left];
2717 if (cost < best_cost)
2719 kind = 6;
2720 best_cost = cost;
2722 /* Try the same with a final signed shift. */
2723 if (left < 31)
2725 cost = 3 + ext_shift_insns[left + 1] + 1;
2726 if (cost < best_cost)
2728 kind = 7;
2729 best_cost = cost;
2733 if (TARGET_SH3)
2735 /* Try to use a dynamic shift. */
2736 cost = shift_insns[32 - insize] + 1 + SH_DYNAMIC_SHIFT_COST;
2737 if (cost < best_cost)
2739 kind = 0;
2740 best_cost = cost;
2743 if (costp)
2744 *costp = cost;
2745 return kind;
2748 /* Function to be used in the length attribute of the instructions
2749 implementing this pattern. */
2752 shl_sext_length (rtx insn)
2754 rtx set_src, left_rtx, size_rtx;
2755 int cost;
2757 set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
2758 left_rtx = XEXP (XEXP (set_src, 0), 1);
2759 size_rtx = XEXP (set_src, 1);
2760 shl_sext_kind (left_rtx, size_rtx, &cost);
2761 return cost;
2764 /* Generate rtl for this pattern */
2767 gen_shl_sext (rtx dest, rtx left_rtx, rtx size_rtx, rtx source)
2769 int kind;
2770 int left, size, insize, cost;
2771 rtx operands[3];
2773 kind = shl_sext_kind (left_rtx, size_rtx, &cost);
2774 left = INTVAL (left_rtx);
2775 size = INTVAL (size_rtx);
2776 insize = size - left;
2777 switch (kind)
2779 case 1:
2780 case 2:
2781 case 3:
2782 case 4:
2784 int ext = kind & 1 ? 8 : 16;
2785 int shift2 = size - ext;
2787 /* Don't expand fine-grained when combining, because that will
2788 make the pattern fail. */
2789 if (! currently_expanding_to_rtl
2790 && ! reload_in_progress && ! reload_completed)
2792 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
2793 emit_insn (gen_movsi (dest, source));
2794 break;
2796 if (dest != source)
2797 emit_insn (gen_movsi (dest, source));
2798 operands[0] = dest;
2799 if (ext - insize)
2801 operands[2] = GEN_INT (ext - insize);
2802 gen_shifty_hi_op (ASHIFT, operands);
2804 emit_insn (kind & 1
2805 ? gen_extendqisi2 (dest, gen_lowpart (QImode, dest))
2806 : gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
2807 if (kind <= 2)
2809 if (shift2)
2811 operands[2] = GEN_INT (shift2);
2812 gen_shifty_op (ASHIFT, operands);
2815 else
2817 if (shift2 > 0)
2819 if (EXT_SHIFT_SIGNED (shift2))
2821 operands[2] = GEN_INT (shift2 + 1);
2822 gen_shifty_op (ASHIFT, operands);
2823 operands[2] = const1_rtx;
2824 gen_shifty_op (ASHIFTRT, operands);
2825 break;
2827 operands[2] = GEN_INT (shift2);
2828 gen_shifty_hi_op (ASHIFT, operands);
2830 else if (shift2)
2832 operands[2] = GEN_INT (-shift2);
2833 gen_shifty_hi_op (LSHIFTRT, operands);
2835 emit_insn (size <= 8
2836 ? gen_extendqisi2 (dest, gen_lowpart (QImode, dest))
2837 : gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
2839 break;
2841 case 5:
2843 int i = 16 - size;
2844 if (! currently_expanding_to_rtl
2845 && ! reload_in_progress && ! reload_completed)
2846 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
2847 else
2849 operands[0] = dest;
2850 operands[2] = GEN_INT (16 - insize);
2851 gen_shifty_hi_op (ASHIFT, operands);
2852 emit_insn (gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
2854 /* Don't use gen_ashrsi3 because it generates new pseudos. */
2855 while (--i >= 0)
2856 gen_ashift (ASHIFTRT, 1, dest);
2857 break;
2859 case 6:
2860 case 7:
2861 /* Don't expand fine-grained when combining, because that will
2862 make the pattern fail. */
2863 if (! currently_expanding_to_rtl
2864 && ! reload_in_progress && ! reload_completed)
2866 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
2867 emit_insn (gen_movsi (dest, source));
2868 break;
2870 emit_insn (gen_andsi3 (dest, source, GEN_INT ((1 << insize) - 1)));
2871 emit_insn (gen_xorsi3 (dest, dest, GEN_INT (1 << (insize - 1))));
2872 emit_insn (gen_addsi3 (dest, dest, GEN_INT (-1 << (insize - 1))));
2873 operands[0] = dest;
2874 operands[2] = kind == 7 ? GEN_INT (left + 1) : left_rtx;
2875 gen_shifty_op (ASHIFT, operands);
2876 if (kind == 7)
2877 emit_insn (gen_ashrsi3_k (dest, dest, const1_rtx));
2878 break;
2879 default:
2880 return -1;
2882 return 0;
2885 /* Prefix a symbol_ref name with "datalabel". */
2888 gen_datalabel_ref (rtx sym)
2890 const char *str;
2892 if (GET_CODE (sym) == LABEL_REF)
2893 return gen_rtx_CONST (GET_MODE (sym),
2894 gen_rtx_UNSPEC (GET_MODE (sym),
2895 gen_rtvec (1, sym),
2896 UNSPEC_DATALABEL));
2898 gcc_assert (GET_CODE (sym) == SYMBOL_REF);
2900 str = XSTR (sym, 0);
2901 /* Share all SYMBOL_REF strings with the same value - that is important
2902 for cse. */
2903 str = IDENTIFIER_POINTER (get_identifier (str));
2904 XSTR (sym, 0) = str;
2906 return sym;
2910 /* The SH cannot load a large constant into a register, constants have to
2911 come from a pc relative load. The reference of a pc relative load
2912 instruction must be less than 1k infront of the instruction. This
2913 means that we often have to dump a constant inside a function, and
2914 generate code to branch around it.
2916 It is important to minimize this, since the branches will slow things
2917 down and make things bigger.
2919 Worst case code looks like:
2921 mov.l L1,rn
2922 bra L2
2924 align
2925 L1: .long value
2929 mov.l L3,rn
2930 bra L4
2932 align
2933 L3: .long value
2937 We fix this by performing a scan before scheduling, which notices which
2938 instructions need to have their operands fetched from the constant table
2939 and builds the table.
2941 The algorithm is:
2943 scan, find an instruction which needs a pcrel move. Look forward, find the
2944 last barrier which is within MAX_COUNT bytes of the requirement.
2945 If there isn't one, make one. Process all the instructions between
2946 the find and the barrier.
2948 In the above example, we can tell that L3 is within 1k of L1, so
2949 the first move can be shrunk from the 3 insn+constant sequence into
2950 just 1 insn, and the constant moved to L3 to make:
2952 mov.l L1,rn
2954 mov.l L3,rn
2955 bra L4
2957 align
2958 L3:.long value
2959 L4:.long value
2961 Then the second move becomes the target for the shortening process. */
2963 typedef struct
2965 rtx value; /* Value in table. */
2966 rtx label; /* Label of value. */
2967 rtx wend; /* End of window. */
2968 enum machine_mode mode; /* Mode of value. */
2970 /* True if this constant is accessed as part of a post-increment
2971 sequence. Note that HImode constants are never accessed in this way. */
2972 bool part_of_sequence_p;
2973 } pool_node;
2975 /* The maximum number of constants that can fit into one pool, since
2976 constants in the range 0..510 are at least 2 bytes long, and in the
2977 range from there to 1018 at least 4 bytes. */
2979 #define MAX_POOL_SIZE 372
2980 static pool_node pool_vector[MAX_POOL_SIZE];
2981 static int pool_size;
2982 static rtx pool_window_label;
2983 static int pool_window_last;
2985 /* ??? If we need a constant in HImode which is the truncated value of a
2986 constant we need in SImode, we could combine the two entries thus saving
2987 two bytes. Is this common enough to be worth the effort of implementing
2988 it? */
2990 /* ??? This stuff should be done at the same time that we shorten branches.
2991 As it is now, we must assume that all branches are the maximum size, and
2992 this causes us to almost always output constant pools sooner than
2993 necessary. */
2995 /* Add a constant to the pool and return its label. */
2997 static rtx
2998 add_constant (rtx x, enum machine_mode mode, rtx last_value)
3000 int i;
3001 rtx lab, new, ref, newref;
3003 /* First see if we've already got it. */
3004 for (i = 0; i < pool_size; i++)
3006 if (x->code == pool_vector[i].value->code
3007 && mode == pool_vector[i].mode)
3009 if (x->code == CODE_LABEL)
3011 if (XINT (x, 3) != XINT (pool_vector[i].value, 3))
3012 continue;
3014 if (rtx_equal_p (x, pool_vector[i].value))
3016 lab = new = 0;
3017 if (! last_value
3018 || ! i
3019 || ! rtx_equal_p (last_value, pool_vector[i-1].value))
3021 new = gen_label_rtx ();
3022 LABEL_REFS (new) = pool_vector[i].label;
3023 pool_vector[i].label = lab = new;
3025 if (lab && pool_window_label)
3027 newref = gen_rtx_LABEL_REF (VOIDmode, pool_window_label);
3028 ref = pool_vector[pool_window_last].wend;
3029 LABEL_NEXTREF (newref) = ref;
3030 pool_vector[pool_window_last].wend = newref;
3032 if (new)
3033 pool_window_label = new;
3034 pool_window_last = i;
3035 return lab;
3040 /* Need a new one. */
3041 pool_vector[pool_size].value = x;
3042 if (last_value && rtx_equal_p (last_value, pool_vector[pool_size - 1].value))
3044 lab = 0;
3045 pool_vector[pool_size - 1].part_of_sequence_p = true;
3047 else
3048 lab = gen_label_rtx ();
3049 pool_vector[pool_size].mode = mode;
3050 pool_vector[pool_size].label = lab;
3051 pool_vector[pool_size].wend = NULL_RTX;
3052 pool_vector[pool_size].part_of_sequence_p = (lab == 0);
3053 if (lab && pool_window_label)
3055 newref = gen_rtx_LABEL_REF (VOIDmode, pool_window_label);
3056 ref = pool_vector[pool_window_last].wend;
3057 LABEL_NEXTREF (newref) = ref;
3058 pool_vector[pool_window_last].wend = newref;
3060 if (lab)
3061 pool_window_label = lab;
3062 pool_window_last = pool_size;
3063 pool_size++;
3064 return lab;
3067 /* Output the literal table. START, if nonzero, is the first instruction
3068 this table is needed for, and also indicates that there is at least one
3069 casesi_worker_2 instruction; We have to emit the operand3 labels from
3070 these insns at a 4-byte aligned position. BARRIER is the barrier
3071 after which we are to place the table. */
3073 static void
3074 dump_table (rtx start, rtx barrier)
3076 rtx scan = barrier;
3077 int i;
3078 int need_align = 1;
3079 rtx lab, ref;
3080 int have_df = 0;
3082 /* Do two passes, first time dump out the HI sized constants. */
3084 for (i = 0; i < pool_size; i++)
3086 pool_node *p = &pool_vector[i];
3088 if (p->mode == HImode)
3090 if (need_align)
3092 scan = emit_insn_after (gen_align_2 (), scan);
3093 need_align = 0;
3095 for (lab = p->label; lab; lab = LABEL_REFS (lab))
3096 scan = emit_label_after (lab, scan);
3097 scan = emit_insn_after (gen_consttable_2 (p->value, const0_rtx),
3098 scan);
3099 for (ref = p->wend; ref; ref = LABEL_NEXTREF (ref))
3101 lab = XEXP (ref, 0);
3102 scan = emit_insn_after (gen_consttable_window_end (lab), scan);
3105 else if (p->mode == DFmode)
3106 have_df = 1;
3109 need_align = 1;
3111 if (start)
3113 scan = emit_insn_after (gen_align_4 (), scan);
3114 need_align = 0;
3115 for (; start != barrier; start = NEXT_INSN (start))
3116 if (GET_CODE (start) == INSN
3117 && recog_memoized (start) == CODE_FOR_casesi_worker_2)
3119 rtx src = SET_SRC (XVECEXP (PATTERN (start), 0, 0));
3120 rtx lab = XEXP (XVECEXP (src, 0, 3), 0);
3122 scan = emit_label_after (lab, scan);
3125 if (TARGET_FMOVD && TARGET_ALIGN_DOUBLE && have_df)
3127 rtx align_insn = NULL_RTX;
3129 scan = emit_label_after (gen_label_rtx (), scan);
3130 scan = emit_insn_after (gen_align_log (GEN_INT (3)), scan);
3131 need_align = 0;
3133 for (i = 0; i < pool_size; i++)
3135 pool_node *p = &pool_vector[i];
3137 switch (p->mode)
3139 case HImode:
3140 break;
3141 case SImode:
3142 case SFmode:
3143 if (align_insn && !p->part_of_sequence_p)
3145 for (lab = p->label; lab; lab = LABEL_REFS (lab))
3146 emit_label_before (lab, align_insn);
3147 emit_insn_before (gen_consttable_4 (p->value, const0_rtx),
3148 align_insn);
3149 for (ref = p->wend; ref; ref = LABEL_NEXTREF (ref))
3151 lab = XEXP (ref, 0);
3152 emit_insn_before (gen_consttable_window_end (lab),
3153 align_insn);
3155 delete_insn (align_insn);
3156 align_insn = NULL_RTX;
3157 continue;
3159 else
3161 for (lab = p->label; lab; lab = LABEL_REFS (lab))
3162 scan = emit_label_after (lab, scan);
3163 scan = emit_insn_after (gen_consttable_4 (p->value,
3164 const0_rtx), scan);
3165 need_align = ! need_align;
3167 break;
3168 case DFmode:
3169 if (need_align)
3171 scan = emit_insn_after (gen_align_log (GEN_INT (3)), scan);
3172 align_insn = scan;
3173 need_align = 0;
3175 case DImode:
3176 for (lab = p->label; lab; lab = LABEL_REFS (lab))
3177 scan = emit_label_after (lab, scan);
3178 scan = emit_insn_after (gen_consttable_8 (p->value, const0_rtx),
3179 scan);
3180 break;
3181 default:
3182 gcc_unreachable ();
3185 if (p->mode != HImode)
3187 for (ref = p->wend; ref; ref = LABEL_NEXTREF (ref))
3189 lab = XEXP (ref, 0);
3190 scan = emit_insn_after (gen_consttable_window_end (lab),
3191 scan);
3196 pool_size = 0;
3199 for (i = 0; i < pool_size; i++)
3201 pool_node *p = &pool_vector[i];
3203 switch (p->mode)
3205 case HImode:
3206 break;
3207 case SImode:
3208 case SFmode:
3209 if (need_align)
3211 need_align = 0;
3212 scan = emit_label_after (gen_label_rtx (), scan);
3213 scan = emit_insn_after (gen_align_4 (), scan);
3215 for (lab = p->label; lab; lab = LABEL_REFS (lab))
3216 scan = emit_label_after (lab, scan);
3217 scan = emit_insn_after (gen_consttable_4 (p->value, const0_rtx),
3218 scan);
3219 break;
3220 case DFmode:
3221 case DImode:
3222 if (need_align)
3224 need_align = 0;
3225 scan = emit_label_after (gen_label_rtx (), scan);
3226 scan = emit_insn_after (gen_align_4 (), scan);
3228 for (lab = p->label; lab; lab = LABEL_REFS (lab))
3229 scan = emit_label_after (lab, scan);
3230 scan = emit_insn_after (gen_consttable_8 (p->value, const0_rtx),
3231 scan);
3232 break;
3233 default:
3234 gcc_unreachable ();
3237 if (p->mode != HImode)
3239 for (ref = p->wend; ref; ref = LABEL_NEXTREF (ref))
3241 lab = XEXP (ref, 0);
3242 scan = emit_insn_after (gen_consttable_window_end (lab), scan);
3247 scan = emit_insn_after (gen_consttable_end (), scan);
3248 scan = emit_barrier_after (scan);
3249 pool_size = 0;
3250 pool_window_label = NULL_RTX;
3251 pool_window_last = 0;
3254 /* Return nonzero if constant would be an ok source for a
3255 mov.w instead of a mov.l. */
3257 static int
3258 hi_const (rtx src)
3260 return (GET_CODE (src) == CONST_INT
3261 && INTVAL (src) >= -32768
3262 && INTVAL (src) <= 32767);
3265 /* Nonzero if the insn is a move instruction which needs to be fixed. */
3267 /* ??? For a DImode/DFmode moves, we don't need to fix it if each half of the
3268 CONST_DOUBLE input value is CONST_OK_FOR_I08. For a SFmode move, we don't
3269 need to fix it if the input value is CONST_OK_FOR_I08. */
3271 static int
3272 broken_move (rtx insn)
3274 if (GET_CODE (insn) == INSN)
3276 rtx pat = PATTERN (insn);
3277 if (GET_CODE (pat) == PARALLEL)
3278 pat = XVECEXP (pat, 0, 0);
3279 if (GET_CODE (pat) == SET
3280 /* We can load any 8 bit value if we don't care what the high
3281 order bits end up as. */
3282 && GET_MODE (SET_DEST (pat)) != QImode
3283 && (CONSTANT_P (SET_SRC (pat))
3284 /* Match mova_const. */
3285 || (GET_CODE (SET_SRC (pat)) == UNSPEC
3286 && XINT (SET_SRC (pat), 1) == UNSPEC_MOVA
3287 && GET_CODE (XVECEXP (SET_SRC (pat), 0, 0)) == CONST))
3288 && ! (TARGET_SH2E
3289 && GET_CODE (SET_SRC (pat)) == CONST_DOUBLE
3290 && (fp_zero_operand (SET_SRC (pat))
3291 || fp_one_operand (SET_SRC (pat)))
3292 /* ??? If this is a -m4 or -m4-single compilation, in general
3293 we don't know the current setting of fpscr, so disable fldi.
3294 There is an exception if this was a register-register move
3295 before reload - and hence it was ascertained that we have
3296 single precision setting - and in a post-reload optimization
3297 we changed this to do a constant load. In that case
3298 we don't have an r0 clobber, hence we must use fldi. */
3299 && (! TARGET_SH4 || TARGET_FMOVD
3300 || (GET_CODE (XEXP (XVECEXP (PATTERN (insn), 0, 2), 0))
3301 == SCRATCH))
3302 && GET_CODE (SET_DEST (pat)) == REG
3303 && FP_REGISTER_P (REGNO (SET_DEST (pat))))
3304 && ! (TARGET_SH2A
3305 && GET_MODE (SET_DEST (pat)) == SImode
3306 && GET_CODE (SET_SRC (pat)) == CONST_INT
3307 && CONST_OK_FOR_I20 (INTVAL (SET_SRC (pat))))
3308 && (GET_CODE (SET_SRC (pat)) != CONST_INT
3309 || ! CONST_OK_FOR_I08 (INTVAL (SET_SRC (pat)))))
3310 return 1;
3313 return 0;
3316 static int
3317 mova_p (rtx insn)
3319 return (GET_CODE (insn) == INSN
3320 && GET_CODE (PATTERN (insn)) == SET
3321 && GET_CODE (SET_SRC (PATTERN (insn))) == UNSPEC
3322 && XINT (SET_SRC (PATTERN (insn)), 1) == UNSPEC_MOVA
3323 /* Don't match mova_const. */
3324 && GET_CODE (XVECEXP (SET_SRC (PATTERN (insn)), 0, 0)) == LABEL_REF);
3327 /* Fix up a mova from a switch that went out of range. */
3328 static void
3329 fixup_mova (rtx mova)
3331 if (! flag_pic)
3333 SET_SRC (PATTERN (mova)) = XVECEXP (SET_SRC (PATTERN (mova)), 0, 0);
3334 INSN_CODE (mova) = -1;
3336 else
3338 rtx worker = mova;
3339 rtx lab = gen_label_rtx ();
3340 rtx wpat, wpat0, wpat1, wsrc, diff;
3344 worker = NEXT_INSN (worker);
3345 gcc_assert (worker
3346 && GET_CODE (worker) != CODE_LABEL
3347 && GET_CODE (worker) != JUMP_INSN);
3348 } while (recog_memoized (worker) != CODE_FOR_casesi_worker_1);
3349 wpat = PATTERN (worker);
3350 wpat0 = XVECEXP (wpat, 0, 0);
3351 wpat1 = XVECEXP (wpat, 0, 1);
3352 wsrc = SET_SRC (wpat0);
3353 PATTERN (worker) = (gen_casesi_worker_2
3354 (SET_DEST (wpat0), XVECEXP (wsrc, 0, 1),
3355 XEXP (XVECEXP (wsrc, 0, 2), 0), lab,
3356 XEXP (wpat1, 0)));
3357 INSN_CODE (worker) = -1;
3358 diff = gen_rtx_MINUS (Pmode, XVECEXP (SET_SRC (PATTERN (mova)), 0, 0),
3359 gen_rtx_LABEL_REF (Pmode, lab));
3360 diff = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, diff), UNSPEC_PIC);
3361 SET_SRC (PATTERN (mova)) = gen_rtx_CONST (Pmode, diff);
3362 INSN_CODE (mova) = -1;
3366 /* Find the last barrier from insn FROM which is close enough to hold the
3367 constant pool. If we can't find one, then create one near the end of
3368 the range. */
3370 static rtx
3371 find_barrier (int num_mova, rtx mova, rtx from)
3373 int count_si = 0;
3374 int count_hi = 0;
3375 int found_hi = 0;
3376 int found_si = 0;
3377 int found_di = 0;
3378 int hi_align = 2;
3379 int si_align = 2;
3380 int leading_mova = num_mova;
3381 rtx barrier_before_mova = 0, found_barrier = 0, good_barrier = 0;
3382 int si_limit;
3383 int hi_limit;
3385 /* For HImode: range is 510, add 4 because pc counts from address of
3386 second instruction after this one, subtract 2 for the jump instruction
3387 that we may need to emit before the table, subtract 2 for the instruction
3388 that fills the jump delay slot (in very rare cases, reorg will take an
3389 instruction from after the constant pool or will leave the delay slot
3390 empty). This gives 510.
3391 For SImode: range is 1020, add 4 because pc counts from address of
3392 second instruction after this one, subtract 2 in case pc is 2 byte
3393 aligned, subtract 2 for the jump instruction that we may need to emit
3394 before the table, subtract 2 for the instruction that fills the jump
3395 delay slot. This gives 1018. */
3397 /* The branch will always be shortened now that the reference address for
3398 forward branches is the successor address, thus we need no longer make
3399 adjustments to the [sh]i_limit for -O0. */
3401 si_limit = 1018;
3402 hi_limit = 510;
3404 while (from && count_si < si_limit && count_hi < hi_limit)
3406 int inc = get_attr_length (from);
3407 int new_align = 1;
3409 if (GET_CODE (from) == CODE_LABEL)
3411 if (optimize)
3412 new_align = 1 << label_to_alignment (from);
3413 else if (GET_CODE (prev_nonnote_insn (from)) == BARRIER)
3414 new_align = 1 << barrier_align (from);
3415 else
3416 new_align = 1;
3417 inc = 0;
3420 if (GET_CODE (from) == BARRIER)
3423 found_barrier = from;
3425 /* If we are at the end of the function, or in front of an alignment
3426 instruction, we need not insert an extra alignment. We prefer
3427 this kind of barrier. */
3428 if (barrier_align (from) > 2)
3429 good_barrier = from;
3432 if (broken_move (from))
3434 rtx pat, src, dst;
3435 enum machine_mode mode;
3437 pat = PATTERN (from);
3438 if (GET_CODE (pat) == PARALLEL)
3439 pat = XVECEXP (pat, 0, 0);
3440 src = SET_SRC (pat);
3441 dst = SET_DEST (pat);
3442 mode = GET_MODE (dst);
3444 /* We must explicitly check the mode, because sometimes the
3445 front end will generate code to load unsigned constants into
3446 HImode targets without properly sign extending them. */
3447 if (mode == HImode
3448 || (mode == SImode && hi_const (src) && REGNO (dst) != FPUL_REG))
3450 found_hi += 2;
3451 /* We put the short constants before the long constants, so
3452 we must count the length of short constants in the range
3453 for the long constants. */
3454 /* ??? This isn't optimal, but is easy to do. */
3455 si_limit -= 2;
3457 else
3459 /* We dump DF/DI constants before SF/SI ones, because
3460 the limit is the same, but the alignment requirements
3461 are higher. We may waste up to 4 additional bytes
3462 for alignment, and the DF/DI constant may have
3463 another SF/SI constant placed before it. */
3464 if (TARGET_SHCOMPACT
3465 && ! found_di
3466 && (mode == DFmode || mode == DImode))
3468 found_di = 1;
3469 si_limit -= 8;
3471 while (si_align > 2 && found_si + si_align - 2 > count_si)
3472 si_align >>= 1;
3473 if (found_si > count_si)
3474 count_si = found_si;
3475 found_si += GET_MODE_SIZE (mode);
3476 if (num_mova)
3477 si_limit -= GET_MODE_SIZE (mode);
3481 if (mova_p (from))
3483 if (! num_mova++)
3485 leading_mova = 0;
3486 mova = from;
3487 barrier_before_mova = good_barrier ? good_barrier : found_barrier;
3489 if (found_si > count_si)
3490 count_si = found_si;
3492 else if (GET_CODE (from) == JUMP_INSN
3493 && (GET_CODE (PATTERN (from)) == ADDR_VEC
3494 || GET_CODE (PATTERN (from)) == ADDR_DIFF_VEC))
3496 if (num_mova)
3497 num_mova--;
3498 if (barrier_align (next_real_insn (from)) == align_jumps_log)
3500 /* We have just passed the barrier in front of the
3501 ADDR_DIFF_VEC, which is stored in found_barrier. Since
3502 the ADDR_DIFF_VEC is accessed as data, just like our pool
3503 constants, this is a good opportunity to accommodate what
3504 we have gathered so far.
3505 If we waited any longer, we could end up at a barrier in
3506 front of code, which gives worse cache usage for separated
3507 instruction / data caches. */
3508 good_barrier = found_barrier;
3509 break;
3511 else
3513 rtx body = PATTERN (from);
3514 inc = XVECLEN (body, 1) * GET_MODE_SIZE (GET_MODE (body));
3517 /* For the SH1, we generate alignments even after jumps-around-jumps. */
3518 else if (GET_CODE (from) == JUMP_INSN
3519 && ! TARGET_SH2
3520 && ! TARGET_SMALLCODE)
3521 new_align = 4;
3523 if (found_si)
3525 count_si += inc;
3526 if (new_align > si_align)
3528 si_limit -= (count_si - 1) & (new_align - si_align);
3529 si_align = new_align;
3531 count_si = (count_si + new_align - 1) & -new_align;
3533 if (found_hi)
3535 count_hi += inc;
3536 if (new_align > hi_align)
3538 hi_limit -= (count_hi - 1) & (new_align - hi_align);
3539 hi_align = new_align;
3541 count_hi = (count_hi + new_align - 1) & -new_align;
3543 from = NEXT_INSN (from);
3546 if (num_mova)
3548 if (leading_mova)
3550 /* Try as we might, the leading mova is out of range. Change
3551 it into a load (which will become a pcload) and retry. */
3552 fixup_mova (mova);
3553 return find_barrier (0, 0, mova);
3555 else
3557 /* Insert the constant pool table before the mova instruction,
3558 to prevent the mova label reference from going out of range. */
3559 from = mova;
3560 good_barrier = found_barrier = barrier_before_mova;
3564 if (found_barrier)
3566 if (good_barrier && next_real_insn (found_barrier))
3567 found_barrier = good_barrier;
3569 else
3571 /* We didn't find a barrier in time to dump our stuff,
3572 so we'll make one. */
3573 rtx label = gen_label_rtx ();
3575 /* If we exceeded the range, then we must back up over the last
3576 instruction we looked at. Otherwise, we just need to undo the
3577 NEXT_INSN at the end of the loop. */
3578 if (count_hi > hi_limit || count_si > si_limit)
3579 from = PREV_INSN (PREV_INSN (from));
3580 else
3581 from = PREV_INSN (from);
3583 /* Walk back to be just before any jump or label.
3584 Putting it before a label reduces the number of times the branch
3585 around the constant pool table will be hit. Putting it before
3586 a jump makes it more likely that the bra delay slot will be
3587 filled. */
3588 while (GET_CODE (from) == JUMP_INSN || GET_CODE (from) == NOTE
3589 || GET_CODE (from) == CODE_LABEL)
3590 from = PREV_INSN (from);
3592 from = emit_jump_insn_after (gen_jump (label), from);
3593 JUMP_LABEL (from) = label;
3594 LABEL_NUSES (label) = 1;
3595 found_barrier = emit_barrier_after (from);
3596 emit_label_after (label, found_barrier);
3599 return found_barrier;
3602 /* If the instruction INSN is implemented by a special function, and we can
3603 positively find the register that is used to call the sfunc, and this
3604 register is not used anywhere else in this instruction - except as the
3605 destination of a set, return this register; else, return 0. */
3607 sfunc_uses_reg (rtx insn)
3609 int i;
3610 rtx pattern, part, reg_part, reg;
3612 if (GET_CODE (insn) != INSN)
3613 return 0;
3614 pattern = PATTERN (insn);
3615 if (GET_CODE (pattern) != PARALLEL || get_attr_type (insn) != TYPE_SFUNC)
3616 return 0;
3618 for (reg_part = 0, i = XVECLEN (pattern, 0) - 1; i >= 1; i--)
3620 part = XVECEXP (pattern, 0, i);
3621 if (GET_CODE (part) == USE && GET_MODE (XEXP (part, 0)) == SImode)
3622 reg_part = part;
3624 if (! reg_part)
3625 return 0;
3626 reg = XEXP (reg_part, 0);
3627 for (i = XVECLEN (pattern, 0) - 1; i >= 0; i--)
3629 part = XVECEXP (pattern, 0, i);
3630 if (part == reg_part || GET_CODE (part) == CLOBBER)
3631 continue;
3632 if (reg_mentioned_p (reg, ((GET_CODE (part) == SET
3633 && GET_CODE (SET_DEST (part)) == REG)
3634 ? SET_SRC (part) : part)))
3635 return 0;
3637 return reg;
3640 /* See if the only way in which INSN uses REG is by calling it, or by
3641 setting it while calling it. Set *SET to a SET rtx if the register
3642 is set by INSN. */
3644 static int
3645 noncall_uses_reg (rtx reg, rtx insn, rtx *set)
3647 rtx pattern, reg2;
3649 *set = NULL_RTX;
3651 reg2 = sfunc_uses_reg (insn);
3652 if (reg2 && REGNO (reg2) == REGNO (reg))
3654 pattern = single_set (insn);
3655 if (pattern
3656 && GET_CODE (SET_DEST (pattern)) == REG
3657 && REGNO (reg) == REGNO (SET_DEST (pattern)))
3658 *set = pattern;
3659 return 0;
3661 if (GET_CODE (insn) != CALL_INSN)
3663 /* We don't use rtx_equal_p because we don't care if the mode is
3664 different. */
3665 pattern = single_set (insn);
3666 if (pattern
3667 && GET_CODE (SET_DEST (pattern)) == REG
3668 && REGNO (reg) == REGNO (SET_DEST (pattern)))
3670 rtx par, part;
3671 int i;
3673 *set = pattern;
3674 par = PATTERN (insn);
3675 if (GET_CODE (par) == PARALLEL)
3676 for (i = XVECLEN (par, 0) - 1; i >= 0; i--)
3678 part = XVECEXP (par, 0, i);
3679 if (GET_CODE (part) != SET && reg_mentioned_p (reg, part))
3680 return 1;
3682 return reg_mentioned_p (reg, SET_SRC (pattern));
3685 return 1;
3688 pattern = PATTERN (insn);
3690 if (GET_CODE (pattern) == PARALLEL)
3692 int i;
3694 for (i = XVECLEN (pattern, 0) - 1; i >= 1; i--)
3695 if (reg_mentioned_p (reg, XVECEXP (pattern, 0, i)))
3696 return 1;
3697 pattern = XVECEXP (pattern, 0, 0);
3700 if (GET_CODE (pattern) == SET)
3702 if (reg_mentioned_p (reg, SET_DEST (pattern)))
3704 /* We don't use rtx_equal_p, because we don't care if the
3705 mode is different. */
3706 if (GET_CODE (SET_DEST (pattern)) != REG
3707 || REGNO (reg) != REGNO (SET_DEST (pattern)))
3708 return 1;
3710 *set = pattern;
3713 pattern = SET_SRC (pattern);
3716 if (GET_CODE (pattern) != CALL
3717 || GET_CODE (XEXP (pattern, 0)) != MEM
3718 || ! rtx_equal_p (reg, XEXP (XEXP (pattern, 0), 0)))
3719 return 1;
3721 return 0;
3724 /* Given a X, a pattern of an insn or a part of it, return a mask of used
3725 general registers. Bits 0..15 mean that the respective registers
3726 are used as inputs in the instruction. Bits 16..31 mean that the
3727 registers 0..15, respectively, are used as outputs, or are clobbered.
3728 IS_DEST should be set to 16 if X is the destination of a SET, else to 0. */
3730 regs_used (rtx x, int is_dest)
3732 enum rtx_code code;
3733 const char *fmt;
3734 int i, used = 0;
3736 if (! x)
3737 return used;
3738 code = GET_CODE (x);
3739 switch (code)
3741 case REG:
3742 if (REGNO (x) < 16)
3743 return (((1 << HARD_REGNO_NREGS (0, GET_MODE (x))) - 1)
3744 << (REGNO (x) + is_dest));
3745 return 0;
3746 case SUBREG:
3748 rtx y = SUBREG_REG (x);
3750 if (GET_CODE (y) != REG)
3751 break;
3752 if (REGNO (y) < 16)
3753 return (((1 << HARD_REGNO_NREGS (0, GET_MODE (x))) - 1)
3754 << (REGNO (y) +
3755 subreg_regno_offset (REGNO (y),
3756 GET_MODE (y),
3757 SUBREG_BYTE (x),
3758 GET_MODE (x)) + is_dest));
3759 return 0;
3761 case SET:
3762 return regs_used (SET_SRC (x), 0) | regs_used (SET_DEST (x), 16);
3763 case RETURN:
3764 /* If there was a return value, it must have been indicated with USE. */
3765 return 0x00ffff00;
3766 case CLOBBER:
3767 is_dest = 1;
3768 break;
3769 case MEM:
3770 is_dest = 0;
3771 break;
3772 case CALL:
3773 used |= 0x00ff00f0;
3774 break;
3775 default:
3776 break;
3779 fmt = GET_RTX_FORMAT (code);
3781 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
3783 if (fmt[i] == 'E')
3785 register int j;
3786 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
3787 used |= regs_used (XVECEXP (x, i, j), is_dest);
3789 else if (fmt[i] == 'e')
3790 used |= regs_used (XEXP (x, i), is_dest);
3792 return used;
3795 /* Create an instruction that prevents redirection of a conditional branch
3796 to the destination of the JUMP with address ADDR.
3797 If the branch needs to be implemented as an indirect jump, try to find
3798 a scratch register for it.
3799 If NEED_BLOCK is 0, don't do anything unless we need a scratch register.
3800 If any preceding insn that doesn't fit into a delay slot is good enough,
3801 pass 1. Pass 2 if a definite blocking insn is needed.
3802 -1 is used internally to avoid deep recursion.
3803 If a blocking instruction is made or recognized, return it. */
3805 static rtx
3806 gen_block_redirect (rtx jump, int addr, int need_block)
3808 int dead = 0;
3809 rtx prev = prev_nonnote_insn (jump);
3810 rtx dest;
3812 /* First, check if we already have an instruction that satisfies our need. */
3813 if (prev && GET_CODE (prev) == INSN && ! INSN_DELETED_P (prev))
3815 if (INSN_CODE (prev) == CODE_FOR_indirect_jump_scratch)
3816 return prev;
3817 if (GET_CODE (PATTERN (prev)) == USE
3818 || GET_CODE (PATTERN (prev)) == CLOBBER
3819 || get_attr_in_delay_slot (prev) == IN_DELAY_SLOT_YES)
3820 prev = jump;
3821 else if ((need_block &= ~1) < 0)
3822 return prev;
3823 else if (recog_memoized (prev) == CODE_FOR_block_branch_redirect)
3824 need_block = 0;
3826 if (GET_CODE (PATTERN (jump)) == RETURN)
3828 if (! need_block)
3829 return prev;
3830 /* Reorg even does nasty things with return insns that cause branches
3831 to go out of range - see find_end_label and callers. */
3832 return emit_insn_before (gen_block_branch_redirect (const0_rtx) , jump);
3834 /* We can't use JUMP_LABEL here because it might be undefined
3835 when not optimizing. */
3836 dest = XEXP (SET_SRC (PATTERN (jump)), 0);
3837 /* If the branch is out of range, try to find a scratch register for it. */
3838 if (optimize
3839 && (INSN_ADDRESSES (INSN_UID (dest)) - addr + (unsigned) 4092
3840 > 4092 + 4098))
3842 rtx scan;
3843 /* Don't look for the stack pointer as a scratch register,
3844 it would cause trouble if an interrupt occurred. */
3845 unsigned try = 0x7fff, used;
3846 int jump_left = flag_expensive_optimizations + 1;
3848 /* It is likely that the most recent eligible instruction is wanted for
3849 the delay slot. Therefore, find out which registers it uses, and
3850 try to avoid using them. */
3852 for (scan = jump; (scan = PREV_INSN (scan)); )
3854 enum rtx_code code;
3856 if (INSN_DELETED_P (scan))
3857 continue;
3858 code = GET_CODE (scan);
3859 if (code == CODE_LABEL || code == JUMP_INSN)
3860 break;
3861 if (code == INSN
3862 && GET_CODE (PATTERN (scan)) != USE
3863 && GET_CODE (PATTERN (scan)) != CLOBBER
3864 && get_attr_in_delay_slot (scan) == IN_DELAY_SLOT_YES)
3866 try &= ~regs_used (PATTERN (scan), 0);
3867 break;
3870 for (used = dead = 0, scan = JUMP_LABEL (jump);
3871 (scan = NEXT_INSN (scan)); )
3873 enum rtx_code code;
3875 if (INSN_DELETED_P (scan))
3876 continue;
3877 code = GET_CODE (scan);
3878 if (INSN_P (scan))
3880 used |= regs_used (PATTERN (scan), 0);
3881 if (code == CALL_INSN)
3882 used |= regs_used (CALL_INSN_FUNCTION_USAGE (scan), 0);
3883 dead |= (used >> 16) & ~used;
3884 if (dead & try)
3886 dead &= try;
3887 break;
3889 if (code == JUMP_INSN)
3891 if (jump_left-- && simplejump_p (scan))
3892 scan = JUMP_LABEL (scan);
3893 else
3894 break;
3898 /* Mask out the stack pointer again, in case it was
3899 the only 'free' register we have found. */
3900 dead &= 0x7fff;
3902 /* If the immediate destination is still in range, check for possible
3903 threading with a jump beyond the delay slot insn.
3904 Don't check if we are called recursively; the jump has been or will be
3905 checked in a different invocation then. */
3907 else if (optimize && need_block >= 0)
3909 rtx next = next_active_insn (next_active_insn (dest));
3910 if (next && GET_CODE (next) == JUMP_INSN
3911 && GET_CODE (PATTERN (next)) == SET
3912 && recog_memoized (next) == CODE_FOR_jump_compact)
3914 dest = JUMP_LABEL (next);
3915 if (dest
3916 && (INSN_ADDRESSES (INSN_UID (dest)) - addr + (unsigned) 4092
3917 > 4092 + 4098))
3918 gen_block_redirect (next, INSN_ADDRESSES (INSN_UID (next)), -1);
3922 if (dead)
3924 rtx reg = gen_rtx_REG (SImode, exact_log2 (dead & -dead));
3926 /* It would be nice if we could convert the jump into an indirect
3927 jump / far branch right now, and thus exposing all constituent
3928 instructions to further optimization. However, reorg uses
3929 simplejump_p to determine if there is an unconditional jump where
3930 it should try to schedule instructions from the target of the
3931 branch; simplejump_p fails for indirect jumps even if they have
3932 a JUMP_LABEL. */
3933 rtx insn = emit_insn_before (gen_indirect_jump_scratch
3934 (reg, GEN_INT (INSN_UID (JUMP_LABEL (jump))))
3935 , jump);
3936 /* ??? We would like this to have the scope of the jump, but that
3937 scope will change when a delay slot insn of an inner scope is added.
3938 Hence, after delay slot scheduling, we'll have to expect
3939 NOTE_INSN_BLOCK_END notes between the indirect_jump_scratch and
3940 the jump. */
3942 INSN_LOCATOR (insn) = INSN_LOCATOR (jump);
3943 INSN_CODE (insn) = CODE_FOR_indirect_jump_scratch;
3944 return insn;
3946 else if (need_block)
3947 /* We can't use JUMP_LABEL here because it might be undefined
3948 when not optimizing. */
3949 return emit_insn_before (gen_block_branch_redirect
3950 (GEN_INT (INSN_UID (XEXP (SET_SRC (PATTERN (jump)), 0))))
3951 , jump);
3952 return prev;
3955 #define CONDJUMP_MIN -252
3956 #define CONDJUMP_MAX 262
3957 struct far_branch
3959 /* A label (to be placed) in front of the jump
3960 that jumps to our ultimate destination. */
3961 rtx near_label;
3962 /* Where we are going to insert it if we cannot move the jump any farther,
3963 or the jump itself if we have picked up an existing jump. */
3964 rtx insert_place;
3965 /* The ultimate destination. */
3966 rtx far_label;
3967 struct far_branch *prev;
3968 /* If the branch has already been created, its address;
3969 else the address of its first prospective user. */
3970 int address;
3973 static void gen_far_branch (struct far_branch *);
3974 enum mdep_reorg_phase_e mdep_reorg_phase;
3975 static void
3976 gen_far_branch (struct far_branch *bp)
3978 rtx insn = bp->insert_place;
3979 rtx jump;
3980 rtx label = gen_label_rtx ();
3981 int ok;
3983 emit_label_after (label, insn);
3984 if (bp->far_label)
3986 jump = emit_jump_insn_after (gen_jump (bp->far_label), insn);
3987 LABEL_NUSES (bp->far_label)++;
3989 else
3990 jump = emit_jump_insn_after (gen_return (), insn);
3991 /* Emit a barrier so that reorg knows that any following instructions
3992 are not reachable via a fall-through path.
3993 But don't do this when not optimizing, since we wouldn't suppress the
3994 alignment for the barrier then, and could end up with out-of-range
3995 pc-relative loads. */
3996 if (optimize)
3997 emit_barrier_after (jump);
3998 emit_label_after (bp->near_label, insn);
3999 JUMP_LABEL (jump) = bp->far_label;
4000 ok = invert_jump (insn, label, 1);
4001 gcc_assert (ok);
4003 /* If we are branching around a jump (rather than a return), prevent
4004 reorg from using an insn from the jump target as the delay slot insn -
4005 when reorg did this, it pessimized code (we rather hide the delay slot)
4006 and it could cause branches to go out of range. */
4007 if (bp->far_label)
4008 (emit_insn_after
4009 (gen_stuff_delay_slot
4010 (GEN_INT (INSN_UID (XEXP (SET_SRC (PATTERN (jump)), 0))),
4011 GEN_INT (recog_memoized (insn) == CODE_FOR_branch_false)),
4012 insn));
4013 /* Prevent reorg from undoing our splits. */
4014 gen_block_redirect (jump, bp->address += 2, 2);
4017 /* Fix up ADDR_DIFF_VECs. */
4018 void
4019 fixup_addr_diff_vecs (rtx first)
4021 rtx insn;
4023 for (insn = first; insn; insn = NEXT_INSN (insn))
4025 rtx vec_lab, pat, prev, prevpat, x, braf_label;
4027 if (GET_CODE (insn) != JUMP_INSN
4028 || GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
4029 continue;
4030 pat = PATTERN (insn);
4031 vec_lab = XEXP (XEXP (pat, 0), 0);
4033 /* Search the matching casesi_jump_2. */
4034 for (prev = vec_lab; ; prev = PREV_INSN (prev))
4036 if (GET_CODE (prev) != JUMP_INSN)
4037 continue;
4038 prevpat = PATTERN (prev);
4039 if (GET_CODE (prevpat) != PARALLEL || XVECLEN (prevpat, 0) != 2)
4040 continue;
4041 x = XVECEXP (prevpat, 0, 1);
4042 if (GET_CODE (x) != USE)
4043 continue;
4044 x = XEXP (x, 0);
4045 if (GET_CODE (x) == LABEL_REF && XEXP (x, 0) == vec_lab)
4046 break;
4048 /* FIXME: This is a bug in the optimizer, but it seems harmless
4049 to just avoid panicing. */
4050 if (!prev)
4051 continue;
4053 /* Emit the reference label of the braf where it belongs, right after
4054 the casesi_jump_2 (i.e. braf). */
4055 braf_label = XEXP (XEXP (SET_SRC (XVECEXP (prevpat, 0, 0)), 1), 0);
4056 emit_label_after (braf_label, prev);
4058 /* Fix up the ADDR_DIF_VEC to be relative
4059 to the reference address of the braf. */
4060 XEXP (XEXP (pat, 0), 0) = braf_label;
4064 /* BARRIER_OR_LABEL is either a BARRIER or a CODE_LABEL immediately following
4065 a barrier. Return the base 2 logarithm of the desired alignment. */
4067 barrier_align (rtx barrier_or_label)
4069 rtx next = next_real_insn (barrier_or_label), pat, prev;
4070 int slot, credit, jump_to_next = 0;
4072 if (! next)
4073 return 0;
4075 pat = PATTERN (next);
4077 if (GET_CODE (pat) == ADDR_DIFF_VEC)
4078 return 2;
4080 if (GET_CODE (pat) == UNSPEC_VOLATILE && XINT (pat, 1) == UNSPECV_ALIGN)
4081 /* This is a barrier in front of a constant table. */
4082 return 0;
4084 prev = prev_real_insn (barrier_or_label);
4085 if (GET_CODE (PATTERN (prev)) == ADDR_DIFF_VEC)
4087 pat = PATTERN (prev);
4088 /* If this is a very small table, we want to keep the alignment after
4089 the table to the minimum for proper code alignment. */
4090 return ((TARGET_SMALLCODE
4091 || ((unsigned) XVECLEN (pat, 1) * GET_MODE_SIZE (GET_MODE (pat))
4092 <= (unsigned) 1 << (CACHE_LOG - 2)))
4093 ? 1 << TARGET_SHMEDIA : align_jumps_log);
4096 if (TARGET_SMALLCODE)
4097 return 0;
4099 if (! TARGET_SH2 || ! optimize)
4100 return align_jumps_log;
4102 /* When fixing up pcloads, a constant table might be inserted just before
4103 the basic block that ends with the barrier. Thus, we can't trust the
4104 instruction lengths before that. */
4105 if (mdep_reorg_phase > SH_FIXUP_PCLOAD)
4107 /* Check if there is an immediately preceding branch to the insn beyond
4108 the barrier. We must weight the cost of discarding useful information
4109 from the current cache line when executing this branch and there is
4110 an alignment, against that of fetching unneeded insn in front of the
4111 branch target when there is no alignment. */
4113 /* There are two delay_slot cases to consider. One is the simple case
4114 where the preceding branch is to the insn beyond the barrier (simple
4115 delay slot filling), and the other is where the preceding branch has
4116 a delay slot that is a duplicate of the insn after the barrier
4117 (fill_eager_delay_slots) and the branch is to the insn after the insn
4118 after the barrier. */
4120 /* PREV is presumed to be the JUMP_INSN for the barrier under
4121 investigation. Skip to the insn before it. */
4122 prev = prev_real_insn (prev);
4124 for (slot = 2, credit = (1 << (CACHE_LOG - 2)) + 2;
4125 credit >= 0 && prev && GET_CODE (prev) == INSN;
4126 prev = prev_real_insn (prev))
4128 jump_to_next = 0;
4129 if (GET_CODE (PATTERN (prev)) == USE
4130 || GET_CODE (PATTERN (prev)) == CLOBBER)
4131 continue;
4132 if (GET_CODE (PATTERN (prev)) == SEQUENCE)
4134 prev = XVECEXP (PATTERN (prev), 0, 1);
4135 if (INSN_UID (prev) == INSN_UID (next))
4137 /* Delay slot was filled with insn at jump target. */
4138 jump_to_next = 1;
4139 continue;
4143 if (slot &&
4144 get_attr_in_delay_slot (prev) == IN_DELAY_SLOT_YES)
4145 slot = 0;
4146 credit -= get_attr_length (prev);
4148 if (prev
4149 && GET_CODE (prev) == JUMP_INSN
4150 && JUMP_LABEL (prev))
4152 rtx x;
4153 if (jump_to_next
4154 || next_real_insn (JUMP_LABEL (prev)) == next
4155 /* If relax_delay_slots() decides NEXT was redundant
4156 with some previous instruction, it will have
4157 redirected PREV's jump to the following insn. */
4158 || JUMP_LABEL (prev) == next_nonnote_insn (next)
4159 /* There is no upper bound on redundant instructions
4160 that might have been skipped, but we must not put an
4161 alignment where none had been before. */
4162 || (x = (NEXT_INSN (NEXT_INSN (PREV_INSN (prev)))),
4163 (INSN_P (x)
4164 && (INSN_CODE (x) == CODE_FOR_block_branch_redirect
4165 || INSN_CODE (x) == CODE_FOR_indirect_jump_scratch
4166 || INSN_CODE (x) == CODE_FOR_stuff_delay_slot))))
4168 rtx pat = PATTERN (prev);
4169 if (GET_CODE (pat) == PARALLEL)
4170 pat = XVECEXP (pat, 0, 0);
4171 if (credit - slot >= (GET_CODE (SET_SRC (pat)) == PC ? 2 : 0))
4172 return 0;
4177 return align_jumps_log;
4180 /* If we are inside a phony loop, almost any kind of label can turn up as the
4181 first one in the loop. Aligning a braf label causes incorrect switch
4182 destination addresses; we can detect braf labels because they are
4183 followed by a BARRIER.
4184 Applying loop alignment to small constant or switch tables is a waste
4185 of space, so we suppress this too. */
4187 sh_loop_align (rtx label)
4189 rtx next = label;
4192 next = next_nonnote_insn (next);
4193 while (next && GET_CODE (next) == CODE_LABEL);
4195 if (! next
4196 || ! INSN_P (next)
4197 || GET_CODE (PATTERN (next)) == ADDR_DIFF_VEC
4198 || recog_memoized (next) == CODE_FOR_consttable_2)
4199 return 0;
4201 return align_loops_log;
4204 /* Do a final pass over the function, just before delayed branch
4205 scheduling. */
4207 static void
4208 sh_reorg (void)
4210 rtx first, insn, mova = NULL_RTX;
4211 int num_mova;
4212 rtx r0_rtx = gen_rtx_REG (Pmode, 0);
4213 rtx r0_inc_rtx = gen_rtx_POST_INC (Pmode, r0_rtx);
4215 first = get_insns ();
4217 /* We must split call insns before introducing `mova's. If we're
4218 optimizing, they'll have already been split. Otherwise, make
4219 sure we don't split them too late. */
4220 if (! optimize)
4221 split_all_insns_noflow ();
4223 if (TARGET_SHMEDIA)
4224 return;
4226 /* If relaxing, generate pseudo-ops to associate function calls with
4227 the symbols they call. It does no harm to not generate these
4228 pseudo-ops. However, when we can generate them, it enables to
4229 linker to potentially relax the jsr to a bsr, and eliminate the
4230 register load and, possibly, the constant pool entry. */
4232 mdep_reorg_phase = SH_INSERT_USES_LABELS;
4233 if (TARGET_RELAX)
4235 /* Remove all REG_LABEL notes. We want to use them for our own
4236 purposes. This works because none of the remaining passes
4237 need to look at them.
4239 ??? But it may break in the future. We should use a machine
4240 dependent REG_NOTE, or some other approach entirely. */
4241 for (insn = first; insn; insn = NEXT_INSN (insn))
4243 if (INSN_P (insn))
4245 rtx note;
4247 while ((note = find_reg_note (insn, REG_LABEL, NULL_RTX)) != 0)
4248 remove_note (insn, note);
4252 for (insn = first; insn; insn = NEXT_INSN (insn))
4254 rtx pattern, reg, link, set, scan, dies, label;
4255 int rescan = 0, foundinsn = 0;
4257 if (GET_CODE (insn) == CALL_INSN)
4259 pattern = PATTERN (insn);
4261 if (GET_CODE (pattern) == PARALLEL)
4262 pattern = XVECEXP (pattern, 0, 0);
4263 if (GET_CODE (pattern) == SET)
4264 pattern = SET_SRC (pattern);
4266 if (GET_CODE (pattern) != CALL
4267 || GET_CODE (XEXP (pattern, 0)) != MEM)
4268 continue;
4270 reg = XEXP (XEXP (pattern, 0), 0);
4272 else
4274 reg = sfunc_uses_reg (insn);
4275 if (! reg)
4276 continue;
4279 if (GET_CODE (reg) != REG)
4280 continue;
4282 /* This is a function call via REG. If the only uses of REG
4283 between the time that it is set and the time that it dies
4284 are in function calls, then we can associate all the
4285 function calls with the setting of REG. */
4287 for (link = LOG_LINKS (insn); link; link = XEXP (link, 1))
4289 if (REG_NOTE_KIND (link) != 0)
4290 continue;
4291 set = single_set (XEXP (link, 0));
4292 if (set && rtx_equal_p (reg, SET_DEST (set)))
4294 link = XEXP (link, 0);
4295 break;
4299 if (! link)
4301 /* ??? Sometimes global register allocation will have
4302 deleted the insn pointed to by LOG_LINKS. Try
4303 scanning backward to find where the register is set. */
4304 for (scan = PREV_INSN (insn);
4305 scan && GET_CODE (scan) != CODE_LABEL;
4306 scan = PREV_INSN (scan))
4308 if (! INSN_P (scan))
4309 continue;
4311 if (! reg_mentioned_p (reg, scan))
4312 continue;
4314 if (noncall_uses_reg (reg, scan, &set))
4315 break;
4317 if (set)
4319 link = scan;
4320 break;
4325 if (! link)
4326 continue;
4328 /* The register is set at LINK. */
4330 /* We can only optimize the function call if the register is
4331 being set to a symbol. In theory, we could sometimes
4332 optimize calls to a constant location, but the assembler
4333 and linker do not support that at present. */
4334 if (GET_CODE (SET_SRC (set)) != SYMBOL_REF
4335 && GET_CODE (SET_SRC (set)) != LABEL_REF)
4336 continue;
4338 /* Scan forward from LINK to the place where REG dies, and
4339 make sure that the only insns which use REG are
4340 themselves function calls. */
4342 /* ??? This doesn't work for call targets that were allocated
4343 by reload, since there may not be a REG_DEAD note for the
4344 register. */
4346 dies = NULL_RTX;
4347 for (scan = NEXT_INSN (link); scan; scan = NEXT_INSN (scan))
4349 rtx scanset;
4351 /* Don't try to trace forward past a CODE_LABEL if we haven't
4352 seen INSN yet. Ordinarily, we will only find the setting insn
4353 in LOG_LINKS if it is in the same basic block. However,
4354 cross-jumping can insert code labels in between the load and
4355 the call, and can result in situations where a single call
4356 insn may have two targets depending on where we came from. */
4358 if (GET_CODE (scan) == CODE_LABEL && ! foundinsn)
4359 break;
4361 if (! INSN_P (scan))
4362 continue;
4364 /* Don't try to trace forward past a JUMP. To optimize
4365 safely, we would have to check that all the
4366 instructions at the jump destination did not use REG. */
4368 if (GET_CODE (scan) == JUMP_INSN)
4369 break;
4371 if (! reg_mentioned_p (reg, scan))
4372 continue;
4374 if (noncall_uses_reg (reg, scan, &scanset))
4375 break;
4377 if (scan == insn)
4378 foundinsn = 1;
4380 if (scan != insn
4381 && (GET_CODE (scan) == CALL_INSN || sfunc_uses_reg (scan)))
4383 /* There is a function call to this register other
4384 than the one we are checking. If we optimize
4385 this call, we need to rescan again below. */
4386 rescan = 1;
4389 /* ??? We shouldn't have to worry about SCANSET here.
4390 We should just be able to check for a REG_DEAD note
4391 on a function call. However, the REG_DEAD notes are
4392 apparently not dependable around libcalls; c-torture
4393 execute/920501-2 is a test case. If SCANSET is set,
4394 then this insn sets the register, so it must have
4395 died earlier. Unfortunately, this will only handle
4396 the cases in which the register is, in fact, set in a
4397 later insn. */
4399 /* ??? We shouldn't have to use FOUNDINSN here.
4400 However, the LOG_LINKS fields are apparently not
4401 entirely reliable around libcalls;
4402 newlib/libm/math/e_pow.c is a test case. Sometimes
4403 an insn will appear in LOG_LINKS even though it is
4404 not the most recent insn which sets the register. */
4406 if (foundinsn
4407 && (scanset
4408 || find_reg_note (scan, REG_DEAD, reg)))
4410 dies = scan;
4411 break;
4415 if (! dies)
4417 /* Either there was a branch, or some insn used REG
4418 other than as a function call address. */
4419 continue;
4422 /* Create a code label, and put it in a REG_LABEL note on
4423 the insn which sets the register, and on each call insn
4424 which uses the register. In final_prescan_insn we look
4425 for the REG_LABEL notes, and output the appropriate label
4426 or pseudo-op. */
4428 label = gen_label_rtx ();
4429 REG_NOTES (link) = gen_rtx_INSN_LIST (REG_LABEL, label,
4430 REG_NOTES (link));
4431 REG_NOTES (insn) = gen_rtx_INSN_LIST (REG_LABEL, label,
4432 REG_NOTES (insn));
4433 if (rescan)
4435 scan = link;
4438 rtx reg2;
4440 scan = NEXT_INSN (scan);
4441 if (scan != insn
4442 && ((GET_CODE (scan) == CALL_INSN
4443 && reg_mentioned_p (reg, scan))
4444 || ((reg2 = sfunc_uses_reg (scan))
4445 && REGNO (reg2) == REGNO (reg))))
4446 REG_NOTES (scan)
4447 = gen_rtx_INSN_LIST (REG_LABEL, label, REG_NOTES (scan));
4449 while (scan != dies);
4454 if (TARGET_SH2)
4455 fixup_addr_diff_vecs (first);
4457 if (optimize)
4459 mdep_reorg_phase = SH_SHORTEN_BRANCHES0;
4460 shorten_branches (first);
4462 /* Scan the function looking for move instructions which have to be
4463 changed to pc-relative loads and insert the literal tables. */
4465 mdep_reorg_phase = SH_FIXUP_PCLOAD;
4466 for (insn = first, num_mova = 0; insn; insn = NEXT_INSN (insn))
4468 if (mova_p (insn))
4470 /* ??? basic block reordering can move a switch table dispatch
4471 below the switch table. Check if that has happened.
4472 We only have the addresses available when optimizing; but then,
4473 this check shouldn't be needed when not optimizing. */
4474 rtx label_ref = XVECEXP (SET_SRC (PATTERN (insn)), 0, 0);
4475 if (optimize
4476 && (INSN_ADDRESSES (INSN_UID (insn))
4477 > INSN_ADDRESSES (INSN_UID (XEXP (label_ref, 0)))))
4479 /* Change the mova into a load.
4480 broken_move will then return true for it. */
4481 fixup_mova (insn);
4483 else if (! num_mova++)
4484 mova = insn;
4486 else if (GET_CODE (insn) == JUMP_INSN
4487 && GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC
4488 && num_mova)
4490 rtx scan;
4491 int total;
4493 num_mova--;
4495 /* Some code might have been inserted between the mova and
4496 its ADDR_DIFF_VEC. Check if the mova is still in range. */
4497 for (scan = mova, total = 0; scan != insn; scan = NEXT_INSN (scan))
4498 total += get_attr_length (scan);
4500 /* range of mova is 1020, add 4 because pc counts from address of
4501 second instruction after this one, subtract 2 in case pc is 2
4502 byte aligned. Possible alignment needed for the ADDR_DIFF_VEC
4503 cancels out with alignment effects of the mova itself. */
4504 if (total > 1022)
4506 /* Change the mova into a load, and restart scanning
4507 there. broken_move will then return true for mova. */
4508 fixup_mova (mova);
4509 insn = mova;
4512 if (broken_move (insn)
4513 || (GET_CODE (insn) == INSN
4514 && recog_memoized (insn) == CODE_FOR_casesi_worker_2))
4516 rtx scan;
4517 /* Scan ahead looking for a barrier to stick the constant table
4518 behind. */
4519 rtx barrier = find_barrier (num_mova, mova, insn);
4520 rtx last_float_move = NULL_RTX, last_float = 0, *last_float_addr = NULL;
4521 int need_aligned_label = 0;
4523 if (num_mova && ! mova_p (mova))
4525 /* find_barrier had to change the first mova into a
4526 pcload; thus, we have to start with this new pcload. */
4527 insn = mova;
4528 num_mova = 0;
4530 /* Now find all the moves between the points and modify them. */
4531 for (scan = insn; scan != barrier; scan = NEXT_INSN (scan))
4533 if (GET_CODE (scan) == CODE_LABEL)
4534 last_float = 0;
4535 if (GET_CODE (scan) == INSN
4536 && recog_memoized (scan) == CODE_FOR_casesi_worker_2)
4537 need_aligned_label = 1;
4538 if (broken_move (scan))
4540 rtx *patp = &PATTERN (scan), pat = *patp;
4541 rtx src, dst;
4542 rtx lab;
4543 rtx newsrc;
4544 enum machine_mode mode;
4546 if (GET_CODE (pat) == PARALLEL)
4547 patp = &XVECEXP (pat, 0, 0), pat = *patp;
4548 src = SET_SRC (pat);
4549 dst = SET_DEST (pat);
4550 mode = GET_MODE (dst);
4552 if (mode == SImode && hi_const (src)
4553 && REGNO (dst) != FPUL_REG)
4555 int offset = 0;
4557 mode = HImode;
4558 while (GET_CODE (dst) == SUBREG)
4560 offset += subreg_regno_offset (REGNO (SUBREG_REG (dst)),
4561 GET_MODE (SUBREG_REG (dst)),
4562 SUBREG_BYTE (dst),
4563 GET_MODE (dst));
4564 dst = SUBREG_REG (dst);
4566 dst = gen_rtx_REG (HImode, REGNO (dst) + offset);
4568 if (GET_CODE (dst) == REG && FP_ANY_REGISTER_P (REGNO (dst)))
4570 /* This must be an insn that clobbers r0. */
4571 rtx *clobberp = &XVECEXP (PATTERN (scan), 0,
4572 XVECLEN (PATTERN (scan), 0)
4573 - 1);
4574 rtx clobber = *clobberp;
4576 gcc_assert (GET_CODE (clobber) == CLOBBER
4577 && rtx_equal_p (XEXP (clobber, 0), r0_rtx));
4579 if (last_float
4580 && reg_set_between_p (r0_rtx, last_float_move, scan))
4581 last_float = 0;
4582 if (last_float
4583 && TARGET_SHCOMPACT
4584 && GET_MODE_SIZE (mode) != 4
4585 && GET_MODE_SIZE (GET_MODE (last_float)) == 4)
4586 last_float = 0;
4587 lab = add_constant (src, mode, last_float);
4588 if (lab)
4589 emit_insn_before (gen_mova (lab), scan);
4590 else
4592 /* There will be a REG_UNUSED note for r0 on
4593 LAST_FLOAT_MOVE; we have to change it to REG_INC,
4594 lest reorg:mark_target_live_regs will not
4595 consider r0 to be used, and we end up with delay
4596 slot insn in front of SCAN that clobbers r0. */
4597 rtx note
4598 = find_regno_note (last_float_move, REG_UNUSED, 0);
4600 /* If we are not optimizing, then there may not be
4601 a note. */
4602 if (note)
4603 PUT_MODE (note, REG_INC);
4605 *last_float_addr = r0_inc_rtx;
4607 last_float_move = scan;
4608 last_float = src;
4609 newsrc = gen_rtx_MEM (mode,
4610 (((TARGET_SH4 && ! TARGET_FMOVD)
4611 || REGNO (dst) == FPUL_REG)
4612 ? r0_inc_rtx
4613 : r0_rtx));
4614 last_float_addr = &XEXP (newsrc, 0);
4616 /* Remove the clobber of r0. */
4617 *clobberp = gen_rtx_CLOBBER (GET_MODE (clobber),
4618 gen_rtx_SCRATCH (Pmode));
4620 /* This is a mova needing a label. Create it. */
4621 else if (GET_CODE (src) == UNSPEC
4622 && XINT (src, 1) == UNSPEC_MOVA
4623 && GET_CODE (XVECEXP (src, 0, 0)) == CONST)
4625 lab = add_constant (XVECEXP (src, 0, 0), mode, 0);
4626 newsrc = gen_rtx_LABEL_REF (VOIDmode, lab);
4627 newsrc = gen_rtx_UNSPEC (SImode,
4628 gen_rtvec (1, newsrc),
4629 UNSPEC_MOVA);
4631 else
4633 lab = add_constant (src, mode, 0);
4634 newsrc = gen_rtx_LABEL_REF (VOIDmode, lab);
4635 newsrc = gen_const_mem (mode, newsrc);
4637 *patp = gen_rtx_SET (VOIDmode, dst, newsrc);
4638 INSN_CODE (scan) = -1;
4641 dump_table (need_aligned_label ? insn : 0, barrier);
4642 insn = barrier;
4646 mdep_reorg_phase = SH_SHORTEN_BRANCHES1;
4647 INSN_ADDRESSES_FREE ();
4648 split_branches (first);
4650 /* The INSN_REFERENCES_ARE_DELAYED in sh.h is problematic because it
4651 also has an effect on the register that holds the address of the sfunc.
4652 Insert an extra dummy insn in front of each sfunc that pretends to
4653 use this register. */
4654 if (flag_delayed_branch)
4656 for (insn = first; insn; insn = NEXT_INSN (insn))
4658 rtx reg = sfunc_uses_reg (insn);
4660 if (! reg)
4661 continue;
4662 emit_insn_before (gen_use_sfunc_addr (reg), insn);
4665 #if 0
4666 /* fpscr is not actually a user variable, but we pretend it is for the
4667 sake of the previous optimization passes, since we want it handled like
4668 one. However, we don't have any debugging information for it, so turn
4669 it into a non-user variable now. */
4670 if (TARGET_SH4)
4671 REG_USERVAR_P (get_fpscr_rtx ()) = 0;
4672 #endif
4673 mdep_reorg_phase = SH_AFTER_MDEP_REORG;
4677 get_dest_uid (rtx label, int max_uid)
4679 rtx dest = next_real_insn (label);
4680 int dest_uid;
4681 if (! dest)
4682 /* This can happen for an undefined label. */
4683 return 0;
4684 dest_uid = INSN_UID (dest);
4685 /* If this is a newly created branch redirection blocking instruction,
4686 we cannot index the branch_uid or insn_addresses arrays with its
4687 uid. But then, we won't need to, because the actual destination is
4688 the following branch. */
4689 while (dest_uid >= max_uid)
4691 dest = NEXT_INSN (dest);
4692 dest_uid = INSN_UID (dest);
4694 if (GET_CODE (dest) == JUMP_INSN && GET_CODE (PATTERN (dest)) == RETURN)
4695 return 0;
4696 return dest_uid;
4699 /* Split condbranches that are out of range. Also add clobbers for
4700 scratch registers that are needed in far jumps.
4701 We do this before delay slot scheduling, so that it can take our
4702 newly created instructions into account. It also allows us to
4703 find branches with common targets more easily. */
4705 static void
4706 split_branches (rtx first)
4708 rtx insn;
4709 struct far_branch **uid_branch, *far_branch_list = 0;
4710 int max_uid = get_max_uid ();
4711 int ok;
4713 /* Find out which branches are out of range. */
4714 shorten_branches (first);
4716 uid_branch = (struct far_branch **) alloca (max_uid * sizeof *uid_branch);
4717 memset ((char *) uid_branch, 0, max_uid * sizeof *uid_branch);
4719 for (insn = first; insn; insn = NEXT_INSN (insn))
4720 if (! INSN_P (insn))
4721 continue;
4722 else if (INSN_DELETED_P (insn))
4724 /* Shorten_branches would split this instruction again,
4725 so transform it into a note. */
4726 PUT_CODE (insn, NOTE);
4727 NOTE_LINE_NUMBER (insn) = NOTE_INSN_DELETED;
4728 NOTE_SOURCE_FILE (insn) = 0;
4730 else if (GET_CODE (insn) == JUMP_INSN
4731 /* Don't mess with ADDR_DIFF_VEC */
4732 && (GET_CODE (PATTERN (insn)) == SET
4733 || GET_CODE (PATTERN (insn)) == RETURN))
4735 enum attr_type type = get_attr_type (insn);
4736 if (type == TYPE_CBRANCH)
4738 rtx next, beyond;
4740 if (get_attr_length (insn) > 4)
4742 rtx src = SET_SRC (PATTERN (insn));
4743 rtx olabel = XEXP (XEXP (src, 1), 0);
4744 int addr = INSN_ADDRESSES (INSN_UID (insn));
4745 rtx label = 0;
4746 int dest_uid = get_dest_uid (olabel, max_uid);
4747 struct far_branch *bp = uid_branch[dest_uid];
4749 /* redirect_jump needs a valid JUMP_LABEL, and it might delete
4750 the label if the LABEL_NUSES count drops to zero. There is
4751 always a jump_optimize pass that sets these values, but it
4752 proceeds to delete unreferenced code, and then if not
4753 optimizing, to un-delete the deleted instructions, thus
4754 leaving labels with too low uses counts. */
4755 if (! optimize)
4757 JUMP_LABEL (insn) = olabel;
4758 LABEL_NUSES (olabel)++;
4760 if (! bp)
4762 bp = (struct far_branch *) alloca (sizeof *bp);
4763 uid_branch[dest_uid] = bp;
4764 bp->prev = far_branch_list;
4765 far_branch_list = bp;
4766 bp->far_label
4767 = XEXP (XEXP (SET_SRC (PATTERN (insn)), 1), 0);
4768 LABEL_NUSES (bp->far_label)++;
4770 else
4772 label = bp->near_label;
4773 if (! label && bp->address - addr >= CONDJUMP_MIN)
4775 rtx block = bp->insert_place;
4777 if (GET_CODE (PATTERN (block)) == RETURN)
4778 block = PREV_INSN (block);
4779 else
4780 block = gen_block_redirect (block,
4781 bp->address, 2);
4782 label = emit_label_after (gen_label_rtx (),
4783 PREV_INSN (block));
4784 bp->near_label = label;
4786 else if (label && ! NEXT_INSN (label))
4788 if (addr + 2 - bp->address <= CONDJUMP_MAX)
4789 bp->insert_place = insn;
4790 else
4791 gen_far_branch (bp);
4794 if (! label
4795 || (NEXT_INSN (label) && bp->address - addr < CONDJUMP_MIN))
4797 bp->near_label = label = gen_label_rtx ();
4798 bp->insert_place = insn;
4799 bp->address = addr;
4801 ok = redirect_jump (insn, label, 1);
4802 gcc_assert (ok);
4804 else
4806 /* get_attr_length (insn) == 2 */
4807 /* Check if we have a pattern where reorg wants to redirect
4808 the branch to a label from an unconditional branch that
4809 is too far away. */
4810 /* We can't use JUMP_LABEL here because it might be undefined
4811 when not optimizing. */
4812 /* A syntax error might cause beyond to be NULL_RTX. */
4813 beyond
4814 = next_active_insn (XEXP (XEXP (SET_SRC (PATTERN (insn)), 1),
4815 0));
4817 if (beyond
4818 && (GET_CODE (beyond) == JUMP_INSN
4819 || ((beyond = next_active_insn (beyond))
4820 && GET_CODE (beyond) == JUMP_INSN))
4821 && GET_CODE (PATTERN (beyond)) == SET
4822 && recog_memoized (beyond) == CODE_FOR_jump_compact
4823 && ((INSN_ADDRESSES
4824 (INSN_UID (XEXP (SET_SRC (PATTERN (beyond)), 0)))
4825 - INSN_ADDRESSES (INSN_UID (insn)) + (unsigned) 252)
4826 > 252 + 258 + 2))
4827 gen_block_redirect (beyond,
4828 INSN_ADDRESSES (INSN_UID (beyond)), 1);
4831 next = next_active_insn (insn);
4833 if ((GET_CODE (next) == JUMP_INSN
4834 || ((next = next_active_insn (next))
4835 && GET_CODE (next) == JUMP_INSN))
4836 && GET_CODE (PATTERN (next)) == SET
4837 && recog_memoized (next) == CODE_FOR_jump_compact
4838 && ((INSN_ADDRESSES
4839 (INSN_UID (XEXP (SET_SRC (PATTERN (next)), 0)))
4840 - INSN_ADDRESSES (INSN_UID (insn)) + (unsigned) 252)
4841 > 252 + 258 + 2))
4842 gen_block_redirect (next, INSN_ADDRESSES (INSN_UID (next)), 1);
4844 else if (type == TYPE_JUMP || type == TYPE_RETURN)
4846 int addr = INSN_ADDRESSES (INSN_UID (insn));
4847 rtx far_label = 0;
4848 int dest_uid = 0;
4849 struct far_branch *bp;
4851 if (type == TYPE_JUMP)
4853 far_label = XEXP (SET_SRC (PATTERN (insn)), 0);
4854 dest_uid = get_dest_uid (far_label, max_uid);
4855 if (! dest_uid)
4857 /* Parse errors can lead to labels outside
4858 the insn stream. */
4859 if (! NEXT_INSN (far_label))
4860 continue;
4862 if (! optimize)
4864 JUMP_LABEL (insn) = far_label;
4865 LABEL_NUSES (far_label)++;
4867 redirect_jump (insn, NULL_RTX, 1);
4868 far_label = 0;
4871 bp = uid_branch[dest_uid];
4872 if (! bp)
4874 bp = (struct far_branch *) alloca (sizeof *bp);
4875 uid_branch[dest_uid] = bp;
4876 bp->prev = far_branch_list;
4877 far_branch_list = bp;
4878 bp->near_label = 0;
4879 bp->far_label = far_label;
4880 if (far_label)
4881 LABEL_NUSES (far_label)++;
4883 else if (bp->near_label && ! NEXT_INSN (bp->near_label))
4884 if (addr - bp->address <= CONDJUMP_MAX)
4885 emit_label_after (bp->near_label, PREV_INSN (insn));
4886 else
4888 gen_far_branch (bp);
4889 bp->near_label = 0;
4891 else
4892 bp->near_label = 0;
4893 bp->address = addr;
4894 bp->insert_place = insn;
4895 if (! far_label)
4896 emit_insn_before (gen_block_branch_redirect (const0_rtx), insn);
4897 else
4898 gen_block_redirect (insn, addr, bp->near_label ? 2 : 0);
4901 /* Generate all pending far branches,
4902 and free our references to the far labels. */
4903 while (far_branch_list)
4905 if (far_branch_list->near_label
4906 && ! NEXT_INSN (far_branch_list->near_label))
4907 gen_far_branch (far_branch_list);
4908 if (optimize
4909 && far_branch_list->far_label
4910 && ! --LABEL_NUSES (far_branch_list->far_label))
4911 delete_insn (far_branch_list->far_label);
4912 far_branch_list = far_branch_list->prev;
4915 /* Instruction length information is no longer valid due to the new
4916 instructions that have been generated. */
4917 init_insn_lengths ();
4920 /* Dump out instruction addresses, which is useful for debugging the
4921 constant pool table stuff.
4923 If relaxing, output the label and pseudo-ops used to link together
4924 calls and the instruction which set the registers. */
4926 /* ??? The addresses printed by this routine for insns are nonsense for
4927 insns which are inside of a sequence where none of the inner insns have
4928 variable length. This is because the second pass of shorten_branches
4929 does not bother to update them. */
4931 void
4932 final_prescan_insn (rtx insn, rtx *opvec ATTRIBUTE_UNUSED,
4933 int noperands ATTRIBUTE_UNUSED)
4935 if (TARGET_DUMPISIZE)
4936 fprintf (asm_out_file, "\n! at %04x\n", INSN_ADDRESSES (INSN_UID (insn)));
4938 if (TARGET_RELAX)
4940 rtx note;
4942 note = find_reg_note (insn, REG_LABEL, NULL_RTX);
4943 if (note)
4945 rtx pattern;
4947 pattern = PATTERN (insn);
4948 switch (GET_CODE (pattern))
4950 case PARALLEL:
4951 pattern = XVECEXP (pattern, 0, 0);
4952 break;
4954 case SET:
4955 if (GET_CODE (SET_SRC (pattern)) != CALL
4956 && get_attr_type (insn) != TYPE_SFUNC)
4958 targetm.asm_out.internal_label
4959 (asm_out_file, "L", CODE_LABEL_NUMBER (XEXP (note, 0)));
4960 break;
4962 /* else FALLTHROUGH */
4963 case CALL:
4964 asm_fprintf (asm_out_file, "\t.uses %LL%d\n",
4965 CODE_LABEL_NUMBER (XEXP (note, 0)));
4966 break;
4968 default:
4969 gcc_unreachable ();
4975 /* Dump out any constants accumulated in the final pass. These will
4976 only be labels. */
4978 const char *
4979 output_jump_label_table (void)
4981 int i;
4983 if (pool_size)
4985 fprintf (asm_out_file, "\t.align 2\n");
4986 for (i = 0; i < pool_size; i++)
4988 pool_node *p = &pool_vector[i];
4990 (*targetm.asm_out.internal_label) (asm_out_file, "L",
4991 CODE_LABEL_NUMBER (p->label));
4992 output_asm_insn (".long %O0", &p->value);
4994 pool_size = 0;
4997 return "";
5000 /* A full frame looks like:
5002 arg-5
5003 arg-4
5004 [ if current_function_anonymous_args
5005 arg-3
5006 arg-2
5007 arg-1
5008 arg-0 ]
5009 saved-fp
5010 saved-r10
5011 saved-r11
5012 saved-r12
5013 saved-pr
5014 local-n
5016 local-1
5017 local-0 <- fp points here. */
5019 /* Number of bytes pushed for anonymous args, used to pass information
5020 between expand_prologue and expand_epilogue. */
5022 /* Adjust the stack by SIZE bytes. REG holds the rtl of the register to be
5023 adjusted. If epilogue_p is zero, this is for a prologue; otherwise, it's
5024 for an epilogue and a negative value means that it's for a sibcall
5025 epilogue. If LIVE_REGS_MASK is nonzero, it points to a HARD_REG_SET of
5026 all the registers that are about to be restored, and hence dead. */
5028 static void
5029 output_stack_adjust (int size, rtx reg, int epilogue_p,
5030 HARD_REG_SET *live_regs_mask)
5032 rtx (*emit_fn) (rtx) = epilogue_p ? &emit_insn : &frame_insn;
5033 if (size)
5035 HOST_WIDE_INT align = STACK_BOUNDARY / BITS_PER_UNIT;
5037 /* This test is bogus, as output_stack_adjust is used to re-align the
5038 stack. */
5039 #if 0
5040 gcc_assert (!(size % align));
5041 #endif
5043 if (CONST_OK_FOR_ADD (size))
5044 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size)));
5045 /* Try to do it with two partial adjustments; however, we must make
5046 sure that the stack is properly aligned at all times, in case
5047 an interrupt occurs between the two partial adjustments. */
5048 else if (CONST_OK_FOR_ADD (size / 2 & -align)
5049 && CONST_OK_FOR_ADD (size - (size / 2 & -align)))
5051 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size / 2 & -align)));
5052 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size - (size / 2 & -align))));
5054 else
5056 rtx const_reg;
5057 rtx insn;
5058 int temp = epilogue_p ? 7 : (TARGET_SH5 ? 0 : 1);
5059 int i;
5061 /* If TEMP is invalid, we could temporarily save a general
5062 register to MACL. However, there is currently no need
5063 to handle this case, so just die when we see it. */
5064 if (epilogue_p < 0
5065 || current_function_interrupt
5066 || ! call_really_used_regs[temp] || fixed_regs[temp])
5067 temp = -1;
5068 if (temp < 0 && ! current_function_interrupt
5069 && (TARGET_SHMEDIA || epilogue_p >= 0))
5071 HARD_REG_SET temps;
5072 COPY_HARD_REG_SET (temps, call_used_reg_set);
5073 AND_COMPL_HARD_REG_SET (temps, call_fixed_reg_set);
5074 if (epilogue_p > 0)
5076 int nreg = 0;
5077 if (current_function_return_rtx)
5079 enum machine_mode mode;
5080 mode = GET_MODE (current_function_return_rtx);
5081 if (BASE_RETURN_VALUE_REG (mode) == FIRST_RET_REG)
5082 nreg = HARD_REGNO_NREGS (FIRST_RET_REG, mode);
5084 for (i = 0; i < nreg; i++)
5085 CLEAR_HARD_REG_BIT (temps, FIRST_RET_REG + i);
5086 if (current_function_calls_eh_return)
5088 CLEAR_HARD_REG_BIT (temps, EH_RETURN_STACKADJ_REGNO);
5089 for (i = 0; i <= 3; i++)
5090 CLEAR_HARD_REG_BIT (temps, EH_RETURN_DATA_REGNO (i));
5093 if (TARGET_SHMEDIA && epilogue_p < 0)
5094 for (i = FIRST_TARGET_REG; i <= LAST_TARGET_REG; i++)
5095 CLEAR_HARD_REG_BIT (temps, i);
5096 if (epilogue_p <= 0)
5098 for (i = FIRST_PARM_REG;
5099 i < FIRST_PARM_REG + NPARM_REGS (SImode); i++)
5100 CLEAR_HARD_REG_BIT (temps, i);
5101 if (cfun->static_chain_decl != NULL)
5102 CLEAR_HARD_REG_BIT (temps, STATIC_CHAIN_REGNUM);
5104 temp = scavenge_reg (&temps);
5106 if (temp < 0 && live_regs_mask)
5107 temp = scavenge_reg (live_regs_mask);
5108 if (temp < 0)
5110 rtx adj_reg, tmp_reg, mem;
5112 /* If we reached here, the most likely case is the (sibcall)
5113 epilogue for non SHmedia. Put a special push/pop sequence
5114 for such case as the last resort. This looks lengthy but
5115 would not be problem because it seems to be very
5116 rare. */
5118 gcc_assert (!TARGET_SHMEDIA && epilogue_p);
5121 /* ??? There is still the slight possibility that r4 or
5122 r5 have been reserved as fixed registers or assigned
5123 as global registers, and they change during an
5124 interrupt. There are possible ways to handle this:
5126 - If we are adjusting the frame pointer (r14), we can do
5127 with a single temp register and an ordinary push / pop
5128 on the stack.
5129 - Grab any call-used or call-saved registers (i.e. not
5130 fixed or globals) for the temps we need. We might
5131 also grab r14 if we are adjusting the stack pointer.
5132 If we can't find enough available registers, issue
5133 a diagnostic and die - the user must have reserved
5134 way too many registers.
5135 But since all this is rather unlikely to happen and
5136 would require extra testing, we just die if r4 / r5
5137 are not available. */
5138 gcc_assert (!fixed_regs[4] && !fixed_regs[5]
5139 && !global_regs[4] && !global_regs[5]);
5141 adj_reg = gen_rtx_REG (GET_MODE (reg), 4);
5142 tmp_reg = gen_rtx_REG (GET_MODE (reg), 5);
5143 emit_move_insn (gen_rtx_MEM (Pmode, reg), adj_reg);
5144 emit_insn (GEN_MOV (adj_reg, GEN_INT (size)));
5145 emit_insn (GEN_ADD3 (adj_reg, adj_reg, reg));
5146 mem = gen_rtx_MEM (Pmode, gen_rtx_PRE_DEC (Pmode, adj_reg));
5147 emit_move_insn (mem, tmp_reg);
5148 emit_move_insn (tmp_reg, gen_rtx_MEM (Pmode, reg));
5149 mem = gen_rtx_MEM (Pmode, gen_rtx_PRE_DEC (Pmode, adj_reg));
5150 emit_move_insn (mem, tmp_reg);
5151 emit_move_insn (reg, adj_reg);
5152 mem = gen_rtx_MEM (Pmode, gen_rtx_POST_INC (Pmode, reg));
5153 emit_move_insn (adj_reg, mem);
5154 mem = gen_rtx_MEM (Pmode, gen_rtx_POST_INC (Pmode, reg));
5155 emit_move_insn (tmp_reg, mem);
5156 return;
5158 const_reg = gen_rtx_REG (GET_MODE (reg), temp);
5160 /* If SIZE is negative, subtract the positive value.
5161 This sometimes allows a constant pool entry to be shared
5162 between prologue and epilogue code. */
5163 if (size < 0)
5165 emit_insn (GEN_MOV (const_reg, GEN_INT (-size)));
5166 insn = emit_fn (GEN_SUB3 (reg, reg, const_reg));
5168 else
5170 emit_insn (GEN_MOV (const_reg, GEN_INT (size)));
5171 insn = emit_fn (GEN_ADD3 (reg, reg, const_reg));
5173 if (! epilogue_p)
5174 REG_NOTES (insn)
5175 = (gen_rtx_EXPR_LIST
5176 (REG_FRAME_RELATED_EXPR,
5177 gen_rtx_SET (VOIDmode, reg,
5178 gen_rtx_PLUS (SImode, reg, GEN_INT (size))),
5179 REG_NOTES (insn)));
5184 static rtx
5185 frame_insn (rtx x)
5187 x = emit_insn (x);
5188 RTX_FRAME_RELATED_P (x) = 1;
5189 return x;
5192 /* Output RTL to push register RN onto the stack. */
5194 static rtx
5195 push (int rn)
5197 rtx x;
5198 if (rn == FPUL_REG)
5199 x = gen_push_fpul ();
5200 else if (rn == FPSCR_REG)
5201 x = gen_push_fpscr ();
5202 else if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && ! TARGET_FPU_SINGLE
5203 && FP_OR_XD_REGISTER_P (rn))
5205 if (FP_REGISTER_P (rn) && (rn - FIRST_FP_REG) & 1)
5206 return NULL_RTX;
5207 x = gen_push_4 (gen_rtx_REG (DFmode, rn));
5209 else if (TARGET_SH2E && FP_REGISTER_P (rn))
5210 x = gen_push_e (gen_rtx_REG (SFmode, rn));
5211 else
5212 x = gen_push (gen_rtx_REG (SImode, rn));
5214 x = frame_insn (x);
5215 REG_NOTES (x)
5216 = gen_rtx_EXPR_LIST (REG_INC,
5217 gen_rtx_REG (SImode, STACK_POINTER_REGNUM), 0);
5218 return x;
5221 /* Output RTL to pop register RN from the stack. */
5223 static void
5224 pop (int rn)
5226 rtx x;
5227 if (rn == FPUL_REG)
5228 x = gen_pop_fpul ();
5229 else if (rn == FPSCR_REG)
5230 x = gen_pop_fpscr ();
5231 else if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && ! TARGET_FPU_SINGLE
5232 && FP_OR_XD_REGISTER_P (rn))
5234 if (FP_REGISTER_P (rn) && (rn - FIRST_FP_REG) & 1)
5235 return;
5236 x = gen_pop_4 (gen_rtx_REG (DFmode, rn));
5238 else if (TARGET_SH2E && FP_REGISTER_P (rn))
5239 x = gen_pop_e (gen_rtx_REG (SFmode, rn));
5240 else
5241 x = gen_pop (gen_rtx_REG (SImode, rn));
5243 x = emit_insn (x);
5244 REG_NOTES (x)
5245 = gen_rtx_EXPR_LIST (REG_INC,
5246 gen_rtx_REG (SImode, STACK_POINTER_REGNUM), 0);
5249 /* Generate code to push the regs specified in the mask. */
5251 static void
5252 push_regs (HARD_REG_SET *mask, int interrupt_handler)
5254 int i;
5255 int skip_fpscr = 0;
5257 /* Push PR last; this gives better latencies after the prologue, and
5258 candidates for the return delay slot when there are no general
5259 registers pushed. */
5260 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
5262 /* If this is an interrupt handler, and the SZ bit varies,
5263 and we have to push any floating point register, we need
5264 to switch to the correct precision first. */
5265 if (i == FIRST_FP_REG && interrupt_handler && TARGET_FMOVD
5266 && hard_regs_intersect_p (mask, &reg_class_contents[DF_REGS]))
5268 HARD_REG_SET unsaved;
5270 push (FPSCR_REG);
5271 COMPL_HARD_REG_SET (unsaved, *mask);
5272 fpscr_set_from_mem (NORMAL_MODE (FP_MODE), unsaved);
5273 skip_fpscr = 1;
5275 if (i != PR_REG
5276 && (i != FPSCR_REG || ! skip_fpscr)
5277 && TEST_HARD_REG_BIT (*mask, i))
5278 push (i);
5280 if (TEST_HARD_REG_BIT (*mask, PR_REG))
5281 push (PR_REG);
5284 /* Calculate how much extra space is needed to save all callee-saved
5285 target registers.
5286 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
5288 static int
5289 shmedia_target_regs_stack_space (HARD_REG_SET *live_regs_mask)
5291 int reg;
5292 int stack_space = 0;
5293 int interrupt_handler = sh_cfun_interrupt_handler_p ();
5295 for (reg = LAST_TARGET_REG; reg >= FIRST_TARGET_REG; reg--)
5296 if ((! call_really_used_regs[reg] || interrupt_handler)
5297 && ! TEST_HARD_REG_BIT (*live_regs_mask, reg))
5298 /* Leave space to save this target register on the stack,
5299 in case target register allocation wants to use it. */
5300 stack_space += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
5301 return stack_space;
5304 /* Decide whether we should reserve space for callee-save target registers,
5305 in case target register allocation wants to use them. REGS_SAVED is
5306 the space, in bytes, that is already required for register saves.
5307 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
5309 static int
5310 shmedia_reserve_space_for_target_registers_p (int regs_saved,
5311 HARD_REG_SET *live_regs_mask)
5313 if (optimize_size)
5314 return 0;
5315 return shmedia_target_regs_stack_space (live_regs_mask) <= regs_saved;
5318 /* Decide how much space to reserve for callee-save target registers
5319 in case target register allocation wants to use them.
5320 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
5322 static int
5323 shmedia_target_regs_stack_adjust (HARD_REG_SET *live_regs_mask)
5325 if (shmedia_space_reserved_for_target_registers)
5326 return shmedia_target_regs_stack_space (live_regs_mask);
5327 else
5328 return 0;
5331 /* Work out the registers which need to be saved, both as a mask and a
5332 count of saved words. Return the count.
5334 If doing a pragma interrupt function, then push all regs used by the
5335 function, and if we call another function (we can tell by looking at PR),
5336 make sure that all the regs it clobbers are safe too. */
5338 static int
5339 calc_live_regs (HARD_REG_SET *live_regs_mask)
5341 unsigned int reg;
5342 int count;
5343 int interrupt_handler;
5344 int pr_live, has_call;
5346 interrupt_handler = sh_cfun_interrupt_handler_p ();
5348 CLEAR_HARD_REG_SET (*live_regs_mask);
5349 if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && interrupt_handler
5350 && regs_ever_live[FPSCR_REG])
5351 target_flags &= ~MASK_FPU_SINGLE;
5352 /* If we can save a lot of saves by switching to double mode, do that. */
5353 else if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && TARGET_FPU_SINGLE)
5354 for (count = 0, reg = FIRST_FP_REG; reg <= LAST_FP_REG; reg += 2)
5355 if (regs_ever_live[reg] && regs_ever_live[reg+1]
5356 && (! call_really_used_regs[reg]
5357 || (interrupt_handler && ! pragma_trapa))
5358 && ++count > 2)
5360 target_flags &= ~MASK_FPU_SINGLE;
5361 break;
5363 /* PR_MEDIA_REG is a general purpose register, thus global_alloc already
5364 knows how to use it. That means the pseudo originally allocated for
5365 the initial value can become the PR_MEDIA_REG hard register, as seen for
5366 execute/20010122-1.c:test9. */
5367 if (TARGET_SHMEDIA)
5368 /* ??? this function is called from initial_elimination_offset, hence we
5369 can't use the result of sh_media_register_for_return here. */
5370 pr_live = sh_pr_n_sets ();
5371 else
5373 rtx pr_initial = has_hard_reg_initial_val (Pmode, PR_REG);
5374 pr_live = (pr_initial
5375 ? (GET_CODE (pr_initial) != REG
5376 || REGNO (pr_initial) != (PR_REG))
5377 : regs_ever_live[PR_REG]);
5378 /* For Shcompact, if not optimizing, we end up with a memory reference
5379 using the return address pointer for __builtin_return_address even
5380 though there is no actual need to put the PR register on the stack. */
5381 pr_live |= regs_ever_live[RETURN_ADDRESS_POINTER_REGNUM];
5383 /* Force PR to be live if the prologue has to call the SHmedia
5384 argument decoder or register saver. */
5385 if (TARGET_SHCOMPACT
5386 && ((current_function_args_info.call_cookie
5387 & ~ CALL_COOKIE_RET_TRAMP (1))
5388 || current_function_has_nonlocal_label))
5389 pr_live = 1;
5390 has_call = TARGET_SHMEDIA ? ! leaf_function_p () : pr_live;
5391 for (count = 0, reg = FIRST_PSEUDO_REGISTER; reg-- != 0; )
5393 if (reg == (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG)
5394 ? pr_live
5395 : (interrupt_handler && ! pragma_trapa)
5396 ? (/* Need to save all the regs ever live. */
5397 (regs_ever_live[reg]
5398 || (call_really_used_regs[reg]
5399 && (! fixed_regs[reg] || reg == MACH_REG || reg == MACL_REG
5400 || reg == PIC_OFFSET_TABLE_REGNUM)
5401 && has_call)
5402 || (has_call && REGISTER_NATURAL_MODE (reg) == SImode
5403 && (GENERAL_REGISTER_P (reg) || TARGET_REGISTER_P (reg))))
5404 && reg != STACK_POINTER_REGNUM && reg != ARG_POINTER_REGNUM
5405 && reg != RETURN_ADDRESS_POINTER_REGNUM
5406 && reg != T_REG && reg != GBR_REG
5407 /* Push fpscr only on targets which have FPU */
5408 && (reg != FPSCR_REG || TARGET_FPU_ANY))
5409 : (/* Only push those regs which are used and need to be saved. */
5410 (TARGET_SHCOMPACT
5411 && flag_pic
5412 && current_function_args_info.call_cookie
5413 && reg == PIC_OFFSET_TABLE_REGNUM)
5414 || (regs_ever_live[reg] && ! call_really_used_regs[reg])
5415 || (current_function_calls_eh_return
5416 && (reg == EH_RETURN_DATA_REGNO (0)
5417 || reg == EH_RETURN_DATA_REGNO (1)
5418 || reg == EH_RETURN_DATA_REGNO (2)
5419 || reg == EH_RETURN_DATA_REGNO (3)))
5420 || ((reg == MACL_REG || reg == MACH_REG)
5421 && regs_ever_live[reg]
5422 && sh_cfun_attr_renesas_p ())
5425 SET_HARD_REG_BIT (*live_regs_mask, reg);
5426 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
5428 if ((TARGET_SH4 || TARGET_SH2A_DOUBLE || TARGET_SH5) && TARGET_FMOVD
5429 && GET_MODE_CLASS (REGISTER_NATURAL_MODE (reg)) == MODE_FLOAT)
5431 if (FP_REGISTER_P (reg))
5433 if (! TARGET_FPU_SINGLE && ! regs_ever_live[reg ^ 1])
5435 SET_HARD_REG_BIT (*live_regs_mask, (reg ^ 1));
5436 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg ^ 1));
5439 else if (XD_REGISTER_P (reg))
5441 /* Must switch to double mode to access these registers. */
5442 target_flags &= ~MASK_FPU_SINGLE;
5447 /* If we have a target register optimization pass after prologue / epilogue
5448 threading, we need to assume all target registers will be live even if
5449 they aren't now. */
5450 if (flag_branch_target_load_optimize2
5451 && TARGET_SAVE_ALL_TARGET_REGS
5452 && shmedia_space_reserved_for_target_registers)
5453 for (reg = LAST_TARGET_REG; reg >= FIRST_TARGET_REG; reg--)
5454 if ((! call_really_used_regs[reg] || interrupt_handler)
5455 && ! TEST_HARD_REG_BIT (*live_regs_mask, reg))
5457 SET_HARD_REG_BIT (*live_regs_mask, reg);
5458 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
5460 /* If this is an interrupt handler, we don't have any call-clobbered
5461 registers we can conveniently use for target register save/restore.
5462 Make sure we save at least one general purpose register when we need
5463 to save target registers. */
5464 if (interrupt_handler
5465 && hard_regs_intersect_p (live_regs_mask,
5466 &reg_class_contents[TARGET_REGS])
5467 && ! hard_regs_intersect_p (live_regs_mask,
5468 &reg_class_contents[GENERAL_REGS]))
5470 SET_HARD_REG_BIT (*live_regs_mask, R0_REG);
5471 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (R0_REG));
5474 return count;
5477 /* Code to generate prologue and epilogue sequences */
5479 /* PUSHED is the number of bytes that are being pushed on the
5480 stack for register saves. Return the frame size, padded
5481 appropriately so that the stack stays properly aligned. */
5482 static HOST_WIDE_INT
5483 rounded_frame_size (int pushed)
5485 HOST_WIDE_INT size = get_frame_size ();
5486 HOST_WIDE_INT align = STACK_BOUNDARY / BITS_PER_UNIT;
5488 return ((size + pushed + align - 1) & -align) - pushed;
5491 /* Choose a call-clobbered target-branch register that remains
5492 unchanged along the whole function. We set it up as the return
5493 value in the prologue. */
5495 sh_media_register_for_return (void)
5497 int regno;
5498 int tr0_used;
5500 if (! current_function_is_leaf)
5501 return -1;
5502 if (lookup_attribute ("interrupt_handler",
5503 DECL_ATTRIBUTES (current_function_decl)))
5504 return -1;
5505 if (sh_cfun_interrupt_handler_p ())
5506 return -1;
5508 tr0_used = flag_pic && regs_ever_live[PIC_OFFSET_TABLE_REGNUM];
5510 for (regno = FIRST_TARGET_REG + tr0_used; regno <= LAST_TARGET_REG; regno++)
5511 if (call_really_used_regs[regno] && ! regs_ever_live[regno])
5512 return regno;
5514 return -1;
5517 /* The maximum registers we need to save are:
5518 - 62 general purpose registers (r15 is stack pointer, r63 is zero)
5519 - 32 floating point registers (for each pair, we save none,
5520 one single precision value, or a double precision value).
5521 - 8 target registers
5522 - add 1 entry for a delimiter. */
5523 #define MAX_SAVED_REGS (62+32+8)
5525 typedef struct save_entry_s
5527 unsigned char reg;
5528 unsigned char mode;
5529 short offset;
5530 } save_entry;
5532 #define MAX_TEMPS 4
5534 /* There will be a delimiter entry with VOIDmode both at the start and the
5535 end of a filled in schedule. The end delimiter has the offset of the
5536 save with the smallest (i.e. most negative) offset. */
5537 typedef struct save_schedule_s
5539 save_entry entries[MAX_SAVED_REGS + 2];
5540 int temps[MAX_TEMPS+1];
5541 } save_schedule;
5543 /* Fill in SCHEDULE according to LIVE_REGS_MASK. If RESTORE is nonzero,
5544 use reverse order. Returns the last entry written to (not counting
5545 the delimiter). OFFSET_BASE is a number to be added to all offset
5546 entries. */
5548 static save_entry *
5549 sh5_schedule_saves (HARD_REG_SET *live_regs_mask, save_schedule *schedule,
5550 int offset_base)
5552 int align, i;
5553 save_entry *entry = schedule->entries;
5554 int tmpx = 0;
5555 int offset;
5557 if (! current_function_interrupt)
5558 for (i = FIRST_GENERAL_REG; tmpx < MAX_TEMPS && i <= LAST_GENERAL_REG; i++)
5559 if (call_really_used_regs[i] && ! fixed_regs[i] && i != PR_MEDIA_REG
5560 && ! FUNCTION_ARG_REGNO_P (i)
5561 && i != FIRST_RET_REG
5562 && ! (cfun->static_chain_decl != NULL && i == STATIC_CHAIN_REGNUM)
5563 && ! (current_function_calls_eh_return
5564 && (i == EH_RETURN_STACKADJ_REGNO
5565 || ((unsigned) i >= EH_RETURN_DATA_REGNO (0)
5566 && (unsigned) i <= EH_RETURN_DATA_REGNO (3)))))
5567 schedule->temps[tmpx++] = i;
5568 entry->reg = -1;
5569 entry->mode = VOIDmode;
5570 entry->offset = offset_base;
5571 entry++;
5572 /* We loop twice: first, we save 8-byte aligned registers in the
5573 higher addresses, that are known to be aligned. Then, we
5574 proceed to saving 32-bit registers that don't need 8-byte
5575 alignment.
5576 If this is an interrupt function, all registers that need saving
5577 need to be saved in full. moreover, we need to postpone saving
5578 target registers till we have saved some general purpose registers
5579 we can then use as scratch registers. */
5580 offset = offset_base;
5581 for (align = 1; align >= 0; align--)
5583 for (i = FIRST_PSEUDO_REGISTER - 1; i >= 0; i--)
5584 if (TEST_HARD_REG_BIT (*live_regs_mask, i))
5586 enum machine_mode mode = REGISTER_NATURAL_MODE (i);
5587 int reg = i;
5589 if (current_function_interrupt)
5591 if (TARGET_REGISTER_P (i))
5592 continue;
5593 if (GENERAL_REGISTER_P (i))
5594 mode = DImode;
5596 if (mode == SFmode && (i % 2) == 1
5597 && ! TARGET_FPU_SINGLE && FP_REGISTER_P (i)
5598 && (TEST_HARD_REG_BIT (*live_regs_mask, (i ^ 1))))
5600 mode = DFmode;
5601 i--;
5602 reg--;
5605 /* If we're doing the aligned pass and this is not aligned,
5606 or we're doing the unaligned pass and this is aligned,
5607 skip it. */
5608 if ((GET_MODE_SIZE (mode) % (STACK_BOUNDARY / BITS_PER_UNIT) == 0)
5609 != align)
5610 continue;
5612 if (current_function_interrupt
5613 && GENERAL_REGISTER_P (i)
5614 && tmpx < MAX_TEMPS)
5615 schedule->temps[tmpx++] = i;
5617 offset -= GET_MODE_SIZE (mode);
5618 entry->reg = i;
5619 entry->mode = mode;
5620 entry->offset = offset;
5621 entry++;
5623 if (align && current_function_interrupt)
5624 for (i = LAST_TARGET_REG; i >= FIRST_TARGET_REG; i--)
5625 if (TEST_HARD_REG_BIT (*live_regs_mask, i))
5627 offset -= GET_MODE_SIZE (DImode);
5628 entry->reg = i;
5629 entry->mode = DImode;
5630 entry->offset = offset;
5631 entry++;
5634 entry->reg = -1;
5635 entry->mode = VOIDmode;
5636 entry->offset = offset;
5637 schedule->temps[tmpx] = -1;
5638 return entry - 1;
5641 void
5642 sh_expand_prologue (void)
5644 HARD_REG_SET live_regs_mask;
5645 int d, i;
5646 int d_rounding = 0;
5647 int save_flags = target_flags;
5648 int pretend_args;
5650 current_function_interrupt = sh_cfun_interrupt_handler_p ();
5652 /* We have pretend args if we had an object sent partially in registers
5653 and partially on the stack, e.g. a large structure. */
5654 pretend_args = current_function_pretend_args_size;
5655 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl)
5656 && (NPARM_REGS(SImode)
5657 > current_function_args_info.arg_count[(int) SH_ARG_INT]))
5658 pretend_args = 0;
5659 output_stack_adjust (-pretend_args
5660 - current_function_args_info.stack_regs * 8,
5661 stack_pointer_rtx, 0, NULL);
5663 if (TARGET_SHCOMPACT && flag_pic && current_function_args_info.call_cookie)
5664 /* We're going to use the PIC register to load the address of the
5665 incoming-argument decoder and/or of the return trampoline from
5666 the GOT, so make sure the PIC register is preserved and
5667 initialized. */
5668 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
5670 if (TARGET_SHCOMPACT
5671 && (current_function_args_info.call_cookie & ~ CALL_COOKIE_RET_TRAMP(1)))
5673 int reg;
5675 /* First, make all registers with incoming arguments that will
5676 be pushed onto the stack live, so that register renaming
5677 doesn't overwrite them. */
5678 for (reg = 0; reg < NPARM_REGS (SImode); reg++)
5679 if (CALL_COOKIE_STACKSEQ_GET (current_function_args_info.call_cookie)
5680 >= NPARM_REGS (SImode) - reg)
5681 for (; reg < NPARM_REGS (SImode); reg++)
5682 emit_insn (gen_shcompact_preserve_incoming_args
5683 (gen_rtx_REG (SImode, FIRST_PARM_REG + reg)));
5684 else if (CALL_COOKIE_INT_REG_GET
5685 (current_function_args_info.call_cookie, reg) == 1)
5686 emit_insn (gen_shcompact_preserve_incoming_args
5687 (gen_rtx_REG (SImode, FIRST_PARM_REG + reg)));
5689 emit_move_insn (gen_rtx_REG (Pmode, MACL_REG),
5690 stack_pointer_rtx);
5691 emit_move_insn (gen_rtx_REG (SImode, R0_REG),
5692 GEN_INT (current_function_args_info.call_cookie));
5693 emit_move_insn (gen_rtx_REG (SImode, MACH_REG),
5694 gen_rtx_REG (SImode, R0_REG));
5696 else if (TARGET_SHMEDIA)
5698 int tr = sh_media_register_for_return ();
5700 if (tr >= 0)
5702 rtx insn = emit_move_insn (gen_rtx_REG (DImode, tr),
5703 gen_rtx_REG (DImode, PR_MEDIA_REG));
5705 /* ??? We should suppress saving pr when we don't need it, but this
5706 is tricky because of builtin_return_address. */
5708 /* If this function only exits with sibcalls, this copy
5709 will be flagged as dead. */
5710 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD,
5711 const0_rtx,
5712 REG_NOTES (insn));
5716 /* Emit the code for SETUP_VARARGS. */
5717 if (current_function_stdarg)
5719 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl))
5721 /* Push arg regs as if they'd been provided by caller in stack. */
5722 for (i = 0; i < NPARM_REGS(SImode); i++)
5724 int rn = NPARM_REGS(SImode) + FIRST_PARM_REG - i - 1;
5725 rtx insn;
5727 if (i >= (NPARM_REGS(SImode)
5728 - current_function_args_info.arg_count[(int) SH_ARG_INT]
5730 break;
5731 insn = push (rn);
5732 RTX_FRAME_RELATED_P (insn) = 0;
5737 /* If we're supposed to switch stacks at function entry, do so now. */
5738 if (sp_switch)
5739 emit_insn (gen_sp_switch_1 ());
5741 d = calc_live_regs (&live_regs_mask);
5742 /* ??? Maybe we could save some switching if we can move a mode switch
5743 that already happens to be at the function start into the prologue. */
5744 if (target_flags != save_flags && ! current_function_interrupt)
5745 emit_insn (gen_toggle_sz ());
5747 if (TARGET_SH5)
5749 int offset_base, offset;
5750 rtx r0 = NULL_RTX;
5751 int offset_in_r0 = -1;
5752 int sp_in_r0 = 0;
5753 int tregs_space = shmedia_target_regs_stack_adjust (&live_regs_mask);
5754 int total_size, save_size;
5755 save_schedule schedule;
5756 save_entry *entry;
5757 int *tmp_pnt;
5759 if (call_really_used_regs[R0_REG] && ! fixed_regs[R0_REG]
5760 && ! current_function_interrupt)
5761 r0 = gen_rtx_REG (Pmode, R0_REG);
5763 /* D is the actual number of bytes that we need for saving registers,
5764 however, in initial_elimination_offset we have committed to using
5765 an additional TREGS_SPACE amount of bytes - in order to keep both
5766 addresses to arguments supplied by the caller and local variables
5767 valid, we must keep this gap. Place it between the incoming
5768 arguments and the actually saved registers in a bid to optimize
5769 locality of reference. */
5770 total_size = d + tregs_space;
5771 total_size += rounded_frame_size (total_size);
5772 save_size = total_size - rounded_frame_size (d);
5773 if (save_size % (STACK_BOUNDARY / BITS_PER_UNIT))
5774 d_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
5775 - save_size % (STACK_BOUNDARY / BITS_PER_UNIT));
5777 /* If adjusting the stack in a single step costs nothing extra, do so.
5778 I.e. either if a single addi is enough, or we need a movi anyway,
5779 and we don't exceed the maximum offset range (the test for the
5780 latter is conservative for simplicity). */
5781 if (TARGET_SHMEDIA
5782 && (CONST_OK_FOR_I10 (-total_size)
5783 || (! CONST_OK_FOR_I10 (-(save_size + d_rounding))
5784 && total_size <= 2044)))
5785 d_rounding = total_size - save_size;
5787 offset_base = d + d_rounding;
5789 output_stack_adjust (-(save_size + d_rounding), stack_pointer_rtx,
5790 0, NULL);
5792 sh5_schedule_saves (&live_regs_mask, &schedule, offset_base);
5793 tmp_pnt = schedule.temps;
5794 for (entry = &schedule.entries[1]; entry->mode != VOIDmode; entry++)
5796 enum machine_mode mode = entry->mode;
5797 unsigned int reg = entry->reg;
5798 rtx reg_rtx, mem_rtx, pre_dec = NULL_RTX;
5799 rtx orig_reg_rtx;
5801 offset = entry->offset;
5803 reg_rtx = gen_rtx_REG (mode, reg);
5805 mem_rtx = gen_rtx_MEM (mode,
5806 gen_rtx_PLUS (Pmode,
5807 stack_pointer_rtx,
5808 GEN_INT (offset)));
5810 GO_IF_LEGITIMATE_ADDRESS (mode, XEXP (mem_rtx, 0), try_pre_dec);
5812 gcc_assert (r0);
5813 mem_rtx = NULL_RTX;
5815 try_pre_dec:
5817 if (HAVE_PRE_DECREMENT
5818 && (offset_in_r0 - offset == GET_MODE_SIZE (mode)
5819 || mem_rtx == NULL_RTX
5820 || reg == PR_REG || SPECIAL_REGISTER_P (reg)))
5822 pre_dec = gen_rtx_MEM (mode,
5823 gen_rtx_PRE_DEC (Pmode, r0));
5825 GO_IF_LEGITIMATE_ADDRESS (mode, XEXP (pre_dec, 0),
5826 pre_dec_ok);
5828 pre_dec = NULL_RTX;
5830 break;
5832 pre_dec_ok:
5833 mem_rtx = NULL_RTX;
5834 offset += GET_MODE_SIZE (mode);
5836 while (0);
5838 if (mem_rtx != NULL_RTX)
5839 goto addr_ok;
5841 if (offset_in_r0 == -1)
5843 emit_move_insn (r0, GEN_INT (offset));
5844 offset_in_r0 = offset;
5846 else if (offset != offset_in_r0)
5848 emit_move_insn (r0,
5849 gen_rtx_PLUS
5850 (Pmode, r0,
5851 GEN_INT (offset - offset_in_r0)));
5852 offset_in_r0 += offset - offset_in_r0;
5855 if (pre_dec != NULL_RTX)
5857 if (! sp_in_r0)
5859 emit_move_insn (r0,
5860 gen_rtx_PLUS
5861 (Pmode, r0, stack_pointer_rtx));
5862 sp_in_r0 = 1;
5865 offset -= GET_MODE_SIZE (mode);
5866 offset_in_r0 -= GET_MODE_SIZE (mode);
5868 mem_rtx = pre_dec;
5870 else if (sp_in_r0)
5871 mem_rtx = gen_rtx_MEM (mode, r0);
5872 else
5873 mem_rtx = gen_rtx_MEM (mode,
5874 gen_rtx_PLUS (Pmode,
5875 stack_pointer_rtx,
5876 r0));
5878 /* We must not use an r0-based address for target-branch
5879 registers or for special registers without pre-dec
5880 memory addresses, since we store their values in r0
5881 first. */
5882 gcc_assert (!TARGET_REGISTER_P (reg)
5883 && ((reg != PR_REG && !SPECIAL_REGISTER_P (reg))
5884 || mem_rtx == pre_dec));
5886 addr_ok:
5887 orig_reg_rtx = reg_rtx;
5888 if (TARGET_REGISTER_P (reg)
5889 || ((reg == PR_REG || SPECIAL_REGISTER_P (reg))
5890 && mem_rtx != pre_dec))
5892 rtx tmp_reg = gen_rtx_REG (GET_MODE (reg_rtx), *tmp_pnt);
5894 emit_move_insn (tmp_reg, reg_rtx);
5896 if (REGNO (tmp_reg) == R0_REG)
5898 offset_in_r0 = -1;
5899 sp_in_r0 = 0;
5900 gcc_assert (!refers_to_regno_p
5901 (R0_REG, R0_REG+1, mem_rtx, (rtx *) 0));
5904 if (*++tmp_pnt <= 0)
5905 tmp_pnt = schedule.temps;
5907 reg_rtx = tmp_reg;
5910 rtx insn;
5912 /* Mark as interesting for dwarf cfi generator */
5913 insn = emit_move_insn (mem_rtx, reg_rtx);
5914 RTX_FRAME_RELATED_P (insn) = 1;
5915 /* If we use an intermediate register for the save, we can't
5916 describe this exactly in cfi as a copy of the to-be-saved
5917 register into the temporary register and then the temporary
5918 register on the stack, because the temporary register can
5919 have a different natural size than the to-be-saved register.
5920 Thus, we gloss over the intermediate copy and pretend we do
5921 a direct save from the to-be-saved register. */
5922 if (REGNO (reg_rtx) != reg)
5924 rtx set, note_rtx;
5926 set = gen_rtx_SET (VOIDmode, mem_rtx, orig_reg_rtx);
5927 note_rtx = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, set,
5928 REG_NOTES (insn));
5929 REG_NOTES (insn) = note_rtx;
5932 if (TARGET_SHCOMPACT && (offset_in_r0 != -1))
5934 rtx reg_rtx = gen_rtx_REG (mode, reg);
5935 rtx set, note_rtx;
5936 rtx mem_rtx = gen_rtx_MEM (mode,
5937 gen_rtx_PLUS (Pmode,
5938 stack_pointer_rtx,
5939 GEN_INT (offset)));
5941 set = gen_rtx_SET (VOIDmode, mem_rtx, reg_rtx);
5942 note_rtx = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, set,
5943 REG_NOTES (insn));
5944 REG_NOTES (insn) = note_rtx;
5949 gcc_assert (entry->offset == d_rounding);
5951 else
5952 push_regs (&live_regs_mask, current_function_interrupt);
5954 if (flag_pic && regs_ever_live[PIC_OFFSET_TABLE_REGNUM])
5956 rtx insn = get_last_insn ();
5957 rtx last = emit_insn (gen_GOTaddr2picreg ());
5959 /* Mark these insns as possibly dead. Sometimes, flow2 may
5960 delete all uses of the PIC register. In this case, let it
5961 delete the initialization too. */
5964 insn = NEXT_INSN (insn);
5966 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD,
5967 const0_rtx,
5968 REG_NOTES (insn));
5970 while (insn != last);
5973 if (SHMEDIA_REGS_STACK_ADJUST ())
5975 /* This must NOT go through the PLT, otherwise mach and macl
5976 may be clobbered. */
5977 function_symbol (gen_rtx_REG (Pmode, R0_REG),
5978 (TARGET_FPU_ANY
5979 ? "__GCC_push_shmedia_regs"
5980 : "__GCC_push_shmedia_regs_nofpu"), SFUNC_GOT);
5981 emit_insn (gen_shmedia_save_restore_regs_compact
5982 (GEN_INT (-SHMEDIA_REGS_STACK_ADJUST ())));
5985 if (target_flags != save_flags && ! current_function_interrupt)
5987 rtx insn = emit_insn (gen_toggle_sz ());
5989 /* If we're lucky, a mode switch in the function body will
5990 overwrite fpscr, turning this insn dead. Tell flow this
5991 insn is ok to delete. */
5992 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD,
5993 const0_rtx,
5994 REG_NOTES (insn));
5997 target_flags = save_flags;
5999 output_stack_adjust (-rounded_frame_size (d) + d_rounding,
6000 stack_pointer_rtx, 0, NULL);
6002 if (frame_pointer_needed)
6003 frame_insn (GEN_MOV (frame_pointer_rtx, stack_pointer_rtx));
6005 if (TARGET_SHCOMPACT
6006 && (current_function_args_info.call_cookie & ~ CALL_COOKIE_RET_TRAMP(1)))
6008 /* This must NOT go through the PLT, otherwise mach and macl
6009 may be clobbered. */
6010 function_symbol (gen_rtx_REG (Pmode, R0_REG),
6011 "__GCC_shcompact_incoming_args", SFUNC_GOT);
6012 emit_insn (gen_shcompact_incoming_args ());
6016 void
6017 sh_expand_epilogue (bool sibcall_p)
6019 HARD_REG_SET live_regs_mask;
6020 int d, i;
6021 int d_rounding = 0;
6023 int save_flags = target_flags;
6024 int frame_size, save_size;
6025 int fpscr_deferred = 0;
6026 int e = sibcall_p ? -1 : 1;
6028 d = calc_live_regs (&live_regs_mask);
6030 save_size = d;
6031 frame_size = rounded_frame_size (d);
6033 if (TARGET_SH5)
6035 int tregs_space = shmedia_target_regs_stack_adjust (&live_regs_mask);
6036 int total_size;
6037 if (d % (STACK_BOUNDARY / BITS_PER_UNIT))
6038 d_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
6039 - d % (STACK_BOUNDARY / BITS_PER_UNIT));
6041 total_size = d + tregs_space;
6042 total_size += rounded_frame_size (total_size);
6043 save_size = total_size - frame_size;
6045 /* If adjusting the stack in a single step costs nothing extra, do so.
6046 I.e. either if a single addi is enough, or we need a movi anyway,
6047 and we don't exceed the maximum offset range (the test for the
6048 latter is conservative for simplicity). */
6049 if (TARGET_SHMEDIA
6050 && ! frame_pointer_needed
6051 && (CONST_OK_FOR_I10 (total_size)
6052 || (! CONST_OK_FOR_I10 (save_size + d_rounding)
6053 && total_size <= 2044)))
6054 d_rounding = frame_size;
6056 frame_size -= d_rounding;
6059 if (frame_pointer_needed)
6061 /* We must avoid scheduling the epilogue with previous basic blocks
6062 when exception handling is enabled. See PR/18032. */
6063 if (flag_exceptions)
6064 emit_insn (gen_blockage ());
6065 output_stack_adjust (frame_size, frame_pointer_rtx, e, &live_regs_mask);
6067 /* We must avoid moving the stack pointer adjustment past code
6068 which reads from the local frame, else an interrupt could
6069 occur after the SP adjustment and clobber data in the local
6070 frame. */
6071 emit_insn (gen_blockage ());
6072 emit_insn (GEN_MOV (stack_pointer_rtx, frame_pointer_rtx));
6074 else if (frame_size)
6076 /* We must avoid moving the stack pointer adjustment past code
6077 which reads from the local frame, else an interrupt could
6078 occur after the SP adjustment and clobber data in the local
6079 frame. */
6080 emit_insn (gen_blockage ());
6081 output_stack_adjust (frame_size, stack_pointer_rtx, e, &live_regs_mask);
6084 if (SHMEDIA_REGS_STACK_ADJUST ())
6086 function_symbol (gen_rtx_REG (Pmode, R0_REG),
6087 (TARGET_FPU_ANY
6088 ? "__GCC_pop_shmedia_regs"
6089 : "__GCC_pop_shmedia_regs_nofpu"), SFUNC_GOT);
6090 /* This must NOT go through the PLT, otherwise mach and macl
6091 may be clobbered. */
6092 emit_insn (gen_shmedia_save_restore_regs_compact
6093 (GEN_INT (SHMEDIA_REGS_STACK_ADJUST ())));
6096 /* Pop all the registers. */
6098 if (target_flags != save_flags && ! current_function_interrupt)
6099 emit_insn (gen_toggle_sz ());
6100 if (TARGET_SH5)
6102 int offset_base, offset;
6103 int offset_in_r0 = -1;
6104 int sp_in_r0 = 0;
6105 rtx r0 = gen_rtx_REG (Pmode, R0_REG);
6106 save_schedule schedule;
6107 save_entry *entry;
6108 int *tmp_pnt;
6110 entry = sh5_schedule_saves (&live_regs_mask, &schedule, d_rounding);
6111 offset_base = -entry[1].offset + d_rounding;
6112 tmp_pnt = schedule.temps;
6113 for (; entry->mode != VOIDmode; entry--)
6115 enum machine_mode mode = entry->mode;
6116 int reg = entry->reg;
6117 rtx reg_rtx, mem_rtx, post_inc = NULL_RTX, insn;
6119 offset = offset_base + entry->offset;
6120 reg_rtx = gen_rtx_REG (mode, reg);
6122 mem_rtx = gen_rtx_MEM (mode,
6123 gen_rtx_PLUS (Pmode,
6124 stack_pointer_rtx,
6125 GEN_INT (offset)));
6127 GO_IF_LEGITIMATE_ADDRESS (mode, XEXP (mem_rtx, 0), try_post_inc);
6129 mem_rtx = NULL_RTX;
6131 try_post_inc:
6133 if (HAVE_POST_INCREMENT
6134 && (offset == offset_in_r0
6135 || (offset + GET_MODE_SIZE (mode) != d + d_rounding
6136 && mem_rtx == NULL_RTX)
6137 || reg == PR_REG || SPECIAL_REGISTER_P (reg)))
6139 post_inc = gen_rtx_MEM (mode,
6140 gen_rtx_POST_INC (Pmode, r0));
6142 GO_IF_LEGITIMATE_ADDRESS (mode, XEXP (post_inc, 0),
6143 post_inc_ok);
6145 post_inc = NULL_RTX;
6147 break;
6149 post_inc_ok:
6150 mem_rtx = NULL_RTX;
6152 while (0);
6154 if (mem_rtx != NULL_RTX)
6155 goto addr_ok;
6157 if (offset_in_r0 == -1)
6159 emit_move_insn (r0, GEN_INT (offset));
6160 offset_in_r0 = offset;
6162 else if (offset != offset_in_r0)
6164 emit_move_insn (r0,
6165 gen_rtx_PLUS
6166 (Pmode, r0,
6167 GEN_INT (offset - offset_in_r0)));
6168 offset_in_r0 += offset - offset_in_r0;
6171 if (post_inc != NULL_RTX)
6173 if (! sp_in_r0)
6175 emit_move_insn (r0,
6176 gen_rtx_PLUS
6177 (Pmode, r0, stack_pointer_rtx));
6178 sp_in_r0 = 1;
6181 mem_rtx = post_inc;
6183 offset_in_r0 += GET_MODE_SIZE (mode);
6185 else if (sp_in_r0)
6186 mem_rtx = gen_rtx_MEM (mode, r0);
6187 else
6188 mem_rtx = gen_rtx_MEM (mode,
6189 gen_rtx_PLUS (Pmode,
6190 stack_pointer_rtx,
6191 r0));
6193 gcc_assert ((reg != PR_REG && !SPECIAL_REGISTER_P (reg))
6194 || mem_rtx == post_inc);
6196 addr_ok:
6197 if ((reg == PR_REG || SPECIAL_REGISTER_P (reg))
6198 && mem_rtx != post_inc)
6200 insn = emit_move_insn (r0, mem_rtx);
6201 mem_rtx = r0;
6203 else if (TARGET_REGISTER_P (reg))
6205 rtx tmp_reg = gen_rtx_REG (mode, *tmp_pnt);
6207 /* Give the scheduler a bit of freedom by using up to
6208 MAX_TEMPS registers in a round-robin fashion. */
6209 insn = emit_move_insn (tmp_reg, mem_rtx);
6210 mem_rtx = tmp_reg;
6211 if (*++tmp_pnt < 0)
6212 tmp_pnt = schedule.temps;
6215 insn = emit_move_insn (reg_rtx, mem_rtx);
6216 if (reg == PR_MEDIA_REG && sh_media_register_for_return () >= 0)
6217 /* This is dead, unless we return with a sibcall. */
6218 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD,
6219 const0_rtx,
6220 REG_NOTES (insn));
6223 gcc_assert (entry->offset + offset_base == d + d_rounding);
6225 else /* ! TARGET_SH5 */
6227 save_size = 0;
6228 if (TEST_HARD_REG_BIT (live_regs_mask, PR_REG))
6229 pop (PR_REG);
6230 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
6232 int j = (FIRST_PSEUDO_REGISTER - 1) - i;
6234 if (j == FPSCR_REG && current_function_interrupt && TARGET_FMOVD
6235 && hard_regs_intersect_p (&live_regs_mask,
6236 &reg_class_contents[DF_REGS]))
6237 fpscr_deferred = 1;
6238 else if (j != PR_REG && TEST_HARD_REG_BIT (live_regs_mask, j))
6239 pop (j);
6240 if (j == FIRST_FP_REG && fpscr_deferred)
6241 pop (FPSCR_REG);
6245 if (target_flags != save_flags && ! current_function_interrupt)
6246 emit_insn (gen_toggle_sz ());
6247 target_flags = save_flags;
6249 output_stack_adjust (current_function_pretend_args_size
6250 + save_size + d_rounding
6251 + current_function_args_info.stack_regs * 8,
6252 stack_pointer_rtx, e, NULL);
6254 if (current_function_calls_eh_return)
6255 emit_insn (GEN_ADD3 (stack_pointer_rtx, stack_pointer_rtx,
6256 EH_RETURN_STACKADJ_RTX));
6258 /* Switch back to the normal stack if necessary. */
6259 if (sp_switch)
6260 emit_insn (gen_sp_switch_2 ());
6262 /* Tell flow the insn that pops PR isn't dead. */
6263 /* PR_REG will never be live in SHmedia mode, and we don't need to
6264 USE PR_MEDIA_REG, since it will be explicitly copied to TR0_REG
6265 by the return pattern. */
6266 if (TEST_HARD_REG_BIT (live_regs_mask, PR_REG))
6267 emit_insn (gen_rtx_USE (VOIDmode, gen_rtx_REG (SImode, PR_REG)));
6270 static int sh_need_epilogue_known = 0;
6273 sh_need_epilogue (void)
6275 if (! sh_need_epilogue_known)
6277 rtx epilogue;
6279 start_sequence ();
6280 sh_expand_epilogue (0);
6281 epilogue = get_insns ();
6282 end_sequence ();
6283 sh_need_epilogue_known = (epilogue == NULL ? -1 : 1);
6285 return sh_need_epilogue_known > 0;
6288 /* Emit code to change the current function's return address to RA.
6289 TEMP is available as a scratch register, if needed. */
6291 void
6292 sh_set_return_address (rtx ra, rtx tmp)
6294 HARD_REG_SET live_regs_mask;
6295 int d;
6296 int pr_reg = TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG;
6297 int pr_offset;
6299 d = calc_live_regs (&live_regs_mask);
6301 /* If pr_reg isn't life, we can set it (or the register given in
6302 sh_media_register_for_return) directly. */
6303 if (! TEST_HARD_REG_BIT (live_regs_mask, pr_reg))
6305 rtx rr;
6307 if (TARGET_SHMEDIA)
6309 int rr_regno = sh_media_register_for_return ();
6311 if (rr_regno < 0)
6312 rr_regno = pr_reg;
6314 rr = gen_rtx_REG (DImode, rr_regno);
6316 else
6317 rr = gen_rtx_REG (SImode, pr_reg);
6319 emit_insn (GEN_MOV (rr, ra));
6320 /* Tell flow the register for return isn't dead. */
6321 emit_insn (gen_rtx_USE (VOIDmode, rr));
6322 return;
6325 if (TARGET_SH5)
6327 int offset;
6328 save_schedule schedule;
6329 save_entry *entry;
6331 entry = sh5_schedule_saves (&live_regs_mask, &schedule, 0);
6332 offset = entry[1].offset;
6333 for (; entry->mode != VOIDmode; entry--)
6334 if (entry->reg == pr_reg)
6335 goto found;
6337 /* We can't find pr register. */
6338 gcc_unreachable ();
6340 found:
6341 offset = entry->offset - offset;
6342 pr_offset = (rounded_frame_size (d) + offset
6343 + SHMEDIA_REGS_STACK_ADJUST ());
6345 else
6346 pr_offset = rounded_frame_size (d);
6348 emit_insn (GEN_MOV (tmp, GEN_INT (pr_offset)));
6349 emit_insn (GEN_ADD3 (tmp, tmp, frame_pointer_rtx));
6351 tmp = gen_rtx_MEM (Pmode, tmp);
6352 emit_insn (GEN_MOV (tmp, ra));
6355 /* Clear variables at function end. */
6357 static void
6358 sh_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
6359 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
6361 trap_exit = pragma_interrupt = pragma_trapa = pragma_nosave_low_regs = 0;
6362 sh_need_epilogue_known = 0;
6363 sp_switch = NULL_RTX;
6366 static rtx
6367 sh_builtin_saveregs (void)
6369 /* First unnamed integer register. */
6370 int first_intreg = current_function_args_info.arg_count[(int) SH_ARG_INT];
6371 /* Number of integer registers we need to save. */
6372 int n_intregs = MAX (0, NPARM_REGS (SImode) - first_intreg);
6373 /* First unnamed SFmode float reg */
6374 int first_floatreg = current_function_args_info.arg_count[(int) SH_ARG_FLOAT];
6375 /* Number of SFmode float regs to save. */
6376 int n_floatregs = MAX (0, NPARM_REGS (SFmode) - first_floatreg);
6377 rtx regbuf, fpregs;
6378 int bufsize, regno;
6379 HOST_WIDE_INT alias_set;
6381 if (TARGET_SH5)
6383 if (n_intregs)
6385 int pushregs = n_intregs;
6387 while (pushregs < NPARM_REGS (SImode) - 1
6388 && (CALL_COOKIE_INT_REG_GET
6389 (current_function_args_info.call_cookie,
6390 NPARM_REGS (SImode) - pushregs)
6391 == 1))
6393 current_function_args_info.call_cookie
6394 &= ~ CALL_COOKIE_INT_REG (NPARM_REGS (SImode)
6395 - pushregs, 1);
6396 pushregs++;
6399 if (pushregs == NPARM_REGS (SImode))
6400 current_function_args_info.call_cookie
6401 |= (CALL_COOKIE_INT_REG (0, 1)
6402 | CALL_COOKIE_STACKSEQ (pushregs - 1));
6403 else
6404 current_function_args_info.call_cookie
6405 |= CALL_COOKIE_STACKSEQ (pushregs);
6407 current_function_pretend_args_size += 8 * n_intregs;
6409 if (TARGET_SHCOMPACT)
6410 return const0_rtx;
6413 if (! TARGET_SH2E && ! TARGET_SH4 && ! TARGET_SH5)
6415 error ("__builtin_saveregs not supported by this subtarget");
6416 return const0_rtx;
6419 if (TARGET_SHMEDIA)
6420 n_floatregs = 0;
6422 /* Allocate block of memory for the regs. */
6423 /* ??? If n_intregs + n_floatregs == 0, should we allocate at least 1 byte?
6424 Or can assign_stack_local accept a 0 SIZE argument? */
6425 bufsize = (n_intregs * UNITS_PER_WORD) + (n_floatregs * UNITS_PER_WORD);
6427 if (TARGET_SHMEDIA)
6428 regbuf = gen_rtx_MEM (BLKmode,
6429 gen_rtx_REG (Pmode, ARG_POINTER_REGNUM));
6430 else if (n_floatregs & 1)
6432 rtx addr;
6434 regbuf = assign_stack_local (BLKmode, bufsize + UNITS_PER_WORD, 0);
6435 addr = copy_to_mode_reg (Pmode, XEXP (regbuf, 0));
6436 emit_insn (gen_iorsi3 (addr, addr, GEN_INT (UNITS_PER_WORD)));
6437 regbuf = change_address (regbuf, BLKmode, addr);
6439 else
6440 regbuf = assign_stack_local (BLKmode, bufsize, 0);
6441 alias_set = get_varargs_alias_set ();
6442 set_mem_alias_set (regbuf, alias_set);
6444 /* Save int args.
6445 This is optimized to only save the regs that are necessary. Explicitly
6446 named args need not be saved. */
6447 if (n_intregs > 0)
6448 move_block_from_reg (BASE_ARG_REG (SImode) + first_intreg,
6449 adjust_address (regbuf, BLKmode,
6450 n_floatregs * UNITS_PER_WORD),
6451 n_intregs);
6453 if (TARGET_SHMEDIA)
6454 /* Return the address of the regbuf. */
6455 return XEXP (regbuf, 0);
6457 /* Save float args.
6458 This is optimized to only save the regs that are necessary. Explicitly
6459 named args need not be saved.
6460 We explicitly build a pointer to the buffer because it halves the insn
6461 count when not optimizing (otherwise the pointer is built for each reg
6462 saved).
6463 We emit the moves in reverse order so that we can use predecrement. */
6465 fpregs = copy_to_mode_reg (Pmode,
6466 plus_constant (XEXP (regbuf, 0),
6467 n_floatregs * UNITS_PER_WORD));
6468 if (TARGET_SH4 || TARGET_SH2A_DOUBLE)
6470 rtx mem;
6471 for (regno = NPARM_REGS (DFmode) - 2; regno >= first_floatreg; regno -= 2)
6473 emit_insn (gen_addsi3 (fpregs, fpregs,
6474 GEN_INT (-2 * UNITS_PER_WORD)));
6475 mem = gen_rtx_MEM (DFmode, fpregs);
6476 set_mem_alias_set (mem, alias_set);
6477 emit_move_insn (mem,
6478 gen_rtx_REG (DFmode, BASE_ARG_REG (DFmode) + regno));
6480 regno = first_floatreg;
6481 if (regno & 1)
6483 emit_insn (gen_addsi3 (fpregs, fpregs, GEN_INT (-UNITS_PER_WORD)));
6484 mem = gen_rtx_MEM (SFmode, fpregs);
6485 set_mem_alias_set (mem, alias_set);
6486 emit_move_insn (mem,
6487 gen_rtx_REG (SFmode, BASE_ARG_REG (SFmode) + regno
6488 - (TARGET_LITTLE_ENDIAN != 0)));
6491 else
6492 for (regno = NPARM_REGS (SFmode) - 1; regno >= first_floatreg; regno--)
6494 rtx mem;
6496 emit_insn (gen_addsi3 (fpregs, fpregs, GEN_INT (-UNITS_PER_WORD)));
6497 mem = gen_rtx_MEM (SFmode, fpregs);
6498 set_mem_alias_set (mem, alias_set);
6499 emit_move_insn (mem,
6500 gen_rtx_REG (SFmode, BASE_ARG_REG (SFmode) + regno));
6503 /* Return the address of the regbuf. */
6504 return XEXP (regbuf, 0);
6507 /* Define the `__builtin_va_list' type for the ABI. */
6509 static tree
6510 sh_build_builtin_va_list (void)
6512 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
6513 tree record;
6515 if (TARGET_SH5 || (! TARGET_SH2E && ! TARGET_SH4)
6516 || TARGET_HITACHI || sh_cfun_attr_renesas_p ())
6517 return ptr_type_node;
6519 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
6521 f_next_o = build_decl (FIELD_DECL, get_identifier ("__va_next_o"),
6522 ptr_type_node);
6523 f_next_o_limit = build_decl (FIELD_DECL,
6524 get_identifier ("__va_next_o_limit"),
6525 ptr_type_node);
6526 f_next_fp = build_decl (FIELD_DECL, get_identifier ("__va_next_fp"),
6527 ptr_type_node);
6528 f_next_fp_limit = build_decl (FIELD_DECL,
6529 get_identifier ("__va_next_fp_limit"),
6530 ptr_type_node);
6531 f_next_stack = build_decl (FIELD_DECL, get_identifier ("__va_next_stack"),
6532 ptr_type_node);
6534 DECL_FIELD_CONTEXT (f_next_o) = record;
6535 DECL_FIELD_CONTEXT (f_next_o_limit) = record;
6536 DECL_FIELD_CONTEXT (f_next_fp) = record;
6537 DECL_FIELD_CONTEXT (f_next_fp_limit) = record;
6538 DECL_FIELD_CONTEXT (f_next_stack) = record;
6540 TYPE_FIELDS (record) = f_next_o;
6541 TREE_CHAIN (f_next_o) = f_next_o_limit;
6542 TREE_CHAIN (f_next_o_limit) = f_next_fp;
6543 TREE_CHAIN (f_next_fp) = f_next_fp_limit;
6544 TREE_CHAIN (f_next_fp_limit) = f_next_stack;
6546 layout_type (record);
6548 return record;
6551 /* Implement `va_start' for varargs and stdarg. */
6553 void
6554 sh_va_start (tree valist, rtx nextarg)
6556 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
6557 tree next_o, next_o_limit, next_fp, next_fp_limit, next_stack;
6558 tree t, u;
6559 int nfp, nint;
6561 if (TARGET_SH5)
6563 expand_builtin_saveregs ();
6564 std_expand_builtin_va_start (valist, nextarg);
6565 return;
6568 if ((! TARGET_SH2E && ! TARGET_SH4)
6569 || TARGET_HITACHI || sh_cfun_attr_renesas_p ())
6571 std_expand_builtin_va_start (valist, nextarg);
6572 return;
6575 f_next_o = TYPE_FIELDS (va_list_type_node);
6576 f_next_o_limit = TREE_CHAIN (f_next_o);
6577 f_next_fp = TREE_CHAIN (f_next_o_limit);
6578 f_next_fp_limit = TREE_CHAIN (f_next_fp);
6579 f_next_stack = TREE_CHAIN (f_next_fp_limit);
6581 next_o = build (COMPONENT_REF, TREE_TYPE (f_next_o), valist, f_next_o,
6582 NULL_TREE);
6583 next_o_limit = build (COMPONENT_REF, TREE_TYPE (f_next_o_limit),
6584 valist, f_next_o_limit, NULL_TREE);
6585 next_fp = build (COMPONENT_REF, TREE_TYPE (f_next_fp), valist, f_next_fp,
6586 NULL_TREE);
6587 next_fp_limit = build (COMPONENT_REF, TREE_TYPE (f_next_fp_limit),
6588 valist, f_next_fp_limit, NULL_TREE);
6589 next_stack = build (COMPONENT_REF, TREE_TYPE (f_next_stack),
6590 valist, f_next_stack, NULL_TREE);
6592 /* Call __builtin_saveregs. */
6593 u = make_tree (ptr_type_node, expand_builtin_saveregs ());
6594 t = build (MODIFY_EXPR, ptr_type_node, next_fp, u);
6595 TREE_SIDE_EFFECTS (t) = 1;
6596 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6598 nfp = current_function_args_info.arg_count[SH_ARG_FLOAT];
6599 if (nfp < 8)
6600 nfp = 8 - nfp;
6601 else
6602 nfp = 0;
6603 u = fold (build (PLUS_EXPR, ptr_type_node, u,
6604 build_int_cst (NULL_TREE, UNITS_PER_WORD * nfp)));
6605 t = build (MODIFY_EXPR, ptr_type_node, next_fp_limit, u);
6606 TREE_SIDE_EFFECTS (t) = 1;
6607 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6609 t = build (MODIFY_EXPR, ptr_type_node, next_o, u);
6610 TREE_SIDE_EFFECTS (t) = 1;
6611 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6613 nint = current_function_args_info.arg_count[SH_ARG_INT];
6614 if (nint < 4)
6615 nint = 4 - nint;
6616 else
6617 nint = 0;
6618 u = fold (build (PLUS_EXPR, ptr_type_node, u,
6619 build_int_cst (NULL_TREE, UNITS_PER_WORD * nint)));
6620 t = build (MODIFY_EXPR, ptr_type_node, next_o_limit, u);
6621 TREE_SIDE_EFFECTS (t) = 1;
6622 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6624 u = make_tree (ptr_type_node, nextarg);
6625 t = build (MODIFY_EXPR, ptr_type_node, next_stack, u);
6626 TREE_SIDE_EFFECTS (t) = 1;
6627 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6630 /* Implement `va_arg'. */
6632 static tree
6633 sh_gimplify_va_arg_expr (tree valist, tree type, tree *pre_p,
6634 tree *post_p ATTRIBUTE_UNUSED)
6636 HOST_WIDE_INT size, rsize;
6637 tree tmp, pptr_type_node;
6638 tree addr, lab_over = NULL, result = NULL;
6639 int pass_by_ref = targetm.calls.must_pass_in_stack (TYPE_MODE (type), type);
6641 if (pass_by_ref)
6642 type = build_pointer_type (type);
6644 size = int_size_in_bytes (type);
6645 rsize = (size + UNITS_PER_WORD - 1) & -UNITS_PER_WORD;
6646 pptr_type_node = build_pointer_type (ptr_type_node);
6648 if (! TARGET_SH5 && (TARGET_SH2E || TARGET_SH4)
6649 && ! (TARGET_HITACHI || sh_cfun_attr_renesas_p ()))
6651 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
6652 tree next_o, next_o_limit, next_fp, next_fp_limit, next_stack;
6653 int pass_as_float;
6654 tree lab_false;
6656 f_next_o = TYPE_FIELDS (va_list_type_node);
6657 f_next_o_limit = TREE_CHAIN (f_next_o);
6658 f_next_fp = TREE_CHAIN (f_next_o_limit);
6659 f_next_fp_limit = TREE_CHAIN (f_next_fp);
6660 f_next_stack = TREE_CHAIN (f_next_fp_limit);
6662 next_o = build (COMPONENT_REF, TREE_TYPE (f_next_o), valist, f_next_o,
6663 NULL_TREE);
6664 next_o_limit = build (COMPONENT_REF, TREE_TYPE (f_next_o_limit),
6665 valist, f_next_o_limit, NULL_TREE);
6666 next_fp = build (COMPONENT_REF, TREE_TYPE (f_next_fp),
6667 valist, f_next_fp, NULL_TREE);
6668 next_fp_limit = build (COMPONENT_REF, TREE_TYPE (f_next_fp_limit),
6669 valist, f_next_fp_limit, NULL_TREE);
6670 next_stack = build (COMPONENT_REF, TREE_TYPE (f_next_stack),
6671 valist, f_next_stack, NULL_TREE);
6673 /* Structures with a single member with a distinct mode are passed
6674 like their member. This is relevant if the latter has a REAL_TYPE
6675 or COMPLEX_TYPE type. */
6676 if (TREE_CODE (type) == RECORD_TYPE
6677 && TYPE_FIELDS (type)
6678 && TREE_CODE (TYPE_FIELDS (type)) == FIELD_DECL
6679 && (TREE_CODE (TREE_TYPE (TYPE_FIELDS (type))) == REAL_TYPE
6680 || TREE_CODE (TREE_TYPE (TYPE_FIELDS (type))) == COMPLEX_TYPE)
6681 && TREE_CHAIN (TYPE_FIELDS (type)) == NULL_TREE)
6682 type = TREE_TYPE (TYPE_FIELDS (type));
6684 if (TARGET_SH4)
6686 pass_as_float = ((TREE_CODE (type) == REAL_TYPE && size <= 8)
6687 || (TREE_CODE (type) == COMPLEX_TYPE
6688 && TREE_CODE (TREE_TYPE (type)) == REAL_TYPE
6689 && size <= 16));
6691 else
6693 pass_as_float = (TREE_CODE (type) == REAL_TYPE && size == 4);
6696 addr = create_tmp_var (pptr_type_node, NULL);
6697 lab_false = create_artificial_label ();
6698 lab_over = create_artificial_label ();
6700 valist = build1 (INDIRECT_REF, ptr_type_node, addr);
6702 if (pass_as_float)
6704 int first_floatreg
6705 = current_function_args_info.arg_count[(int) SH_ARG_FLOAT];
6706 int n_floatregs = MAX (0, NPARM_REGS (SFmode) - first_floatreg);
6708 tmp = build (GE_EXPR, boolean_type_node, next_fp, next_fp_limit);
6709 tmp = build (COND_EXPR, void_type_node, tmp,
6710 build (GOTO_EXPR, void_type_node, lab_false),
6711 NULL);
6712 gimplify_and_add (tmp, pre_p);
6714 if (TYPE_ALIGN (type) > BITS_PER_WORD
6715 || (((TREE_CODE (type) == REAL_TYPE && size == 8) || size == 16)
6716 && (n_floatregs & 1)))
6718 tmp = fold_convert (ptr_type_node, size_int (UNITS_PER_WORD));
6719 tmp = build (BIT_AND_EXPR, ptr_type_node, next_fp, tmp);
6720 tmp = build (PLUS_EXPR, ptr_type_node, next_fp, tmp);
6721 tmp = build (MODIFY_EXPR, ptr_type_node, next_fp, tmp);
6722 gimplify_and_add (tmp, pre_p);
6725 tmp = build1 (ADDR_EXPR, pptr_type_node, next_fp);
6726 tmp = build (MODIFY_EXPR, void_type_node, addr, tmp);
6727 gimplify_and_add (tmp, pre_p);
6729 #ifdef FUNCTION_ARG_SCmode_WART
6730 if (TYPE_MODE (type) == SCmode && TARGET_SH4 && TARGET_LITTLE_ENDIAN)
6732 tree subtype = TREE_TYPE (type);
6733 tree real, imag;
6735 imag = std_gimplify_va_arg_expr (valist, subtype, pre_p, NULL);
6736 imag = get_initialized_tmp_var (imag, pre_p, NULL);
6738 real = std_gimplify_va_arg_expr (valist, subtype, pre_p, NULL);
6739 real = get_initialized_tmp_var (real, pre_p, NULL);
6741 result = build (COMPLEX_EXPR, type, real, imag);
6742 result = get_initialized_tmp_var (result, pre_p, NULL);
6744 #endif /* FUNCTION_ARG_SCmode_WART */
6746 tmp = build (GOTO_EXPR, void_type_node, lab_over);
6747 gimplify_and_add (tmp, pre_p);
6749 tmp = build (LABEL_EXPR, void_type_node, lab_false);
6750 gimplify_and_add (tmp, pre_p);
6752 tmp = build1 (ADDR_EXPR, pptr_type_node, next_stack);
6753 tmp = build (MODIFY_EXPR, void_type_node, addr, tmp);
6754 gimplify_and_add (tmp, pre_p);
6756 else
6758 tmp = fold_convert (ptr_type_node, size_int (rsize));
6759 tmp = build (PLUS_EXPR, ptr_type_node, next_o, tmp);
6760 tmp = build (GT_EXPR, boolean_type_node, tmp, next_o_limit);
6761 tmp = build (COND_EXPR, void_type_node, tmp,
6762 build (GOTO_EXPR, void_type_node, lab_false),
6763 NULL);
6764 gimplify_and_add (tmp, pre_p);
6766 tmp = build1 (ADDR_EXPR, pptr_type_node, next_o);
6767 tmp = build (MODIFY_EXPR, void_type_node, addr, tmp);
6768 gimplify_and_add (tmp, pre_p);
6770 tmp = build (GOTO_EXPR, void_type_node, lab_over);
6771 gimplify_and_add (tmp, pre_p);
6773 tmp = build (LABEL_EXPR, void_type_node, lab_false);
6774 gimplify_and_add (tmp, pre_p);
6776 if (size > 4 && ! TARGET_SH4)
6778 tmp = build (MODIFY_EXPR, ptr_type_node, next_o, next_o_limit);
6779 gimplify_and_add (tmp, pre_p);
6782 tmp = build1 (ADDR_EXPR, pptr_type_node, next_stack);
6783 tmp = build (MODIFY_EXPR, void_type_node, addr, tmp);
6784 gimplify_and_add (tmp, pre_p);
6787 if (!result)
6789 tmp = build (LABEL_EXPR, void_type_node, lab_over);
6790 gimplify_and_add (tmp, pre_p);
6794 /* ??? In va-sh.h, there had been code to make values larger than
6795 size 8 indirect. This does not match the FUNCTION_ARG macros. */
6797 tmp = std_gimplify_va_arg_expr (valist, type, pre_p, NULL);
6798 if (result)
6800 tmp = build (MODIFY_EXPR, void_type_node, result, tmp);
6801 gimplify_and_add (tmp, pre_p);
6803 tmp = build (LABEL_EXPR, void_type_node, lab_over);
6804 gimplify_and_add (tmp, pre_p);
6806 else
6807 result = tmp;
6809 if (pass_by_ref)
6810 result = build_fold_indirect_ref (result);
6812 return result;
6815 bool
6816 sh_promote_prototypes (tree type)
6818 if (TARGET_HITACHI)
6819 return 0;
6820 if (! type)
6821 return 1;
6822 return ! sh_attr_renesas_p (type);
6825 /* Whether an argument must be passed by reference. On SHcompact, we
6826 pretend arguments wider than 32-bits that would have been passed in
6827 registers are passed by reference, so that an SHmedia trampoline
6828 loads them into the full 64-bits registers. */
6830 static int
6831 shcompact_byref (CUMULATIVE_ARGS *cum, enum machine_mode mode,
6832 tree type, bool named)
6834 unsigned HOST_WIDE_INT size;
6836 if (type)
6837 size = int_size_in_bytes (type);
6838 else
6839 size = GET_MODE_SIZE (mode);
6841 if (cum->arg_count[SH_ARG_INT] < NPARM_REGS (SImode)
6842 && (!named
6843 || GET_SH_ARG_CLASS (mode) == SH_ARG_INT
6844 || (GET_SH_ARG_CLASS (mode) == SH_ARG_FLOAT
6845 && cum->arg_count[SH_ARG_FLOAT] >= NPARM_REGS (SFmode)))
6846 && size > 4
6847 && !SHCOMPACT_FORCE_ON_STACK (mode, type)
6848 && !SH5_WOULD_BE_PARTIAL_NREGS (*cum, mode, type, named))
6849 return size;
6850 else
6851 return 0;
6854 static bool
6855 sh_pass_by_reference (CUMULATIVE_ARGS *cum, enum machine_mode mode,
6856 tree type, bool named)
6858 if (targetm.calls.must_pass_in_stack (mode, type))
6859 return true;
6861 /* ??? std_gimplify_va_arg_expr passes NULL for cum. That function
6862 wants to know about pass-by-reference semantics for incoming
6863 arguments. */
6864 if (! cum)
6865 return false;
6867 if (TARGET_SHCOMPACT)
6869 cum->byref = shcompact_byref (cum, mode, type, named);
6870 return cum->byref != 0;
6873 return false;
6876 static bool
6877 sh_callee_copies (CUMULATIVE_ARGS *cum, enum machine_mode mode,
6878 tree type, bool named ATTRIBUTE_UNUSED)
6880 /* ??? How can it possibly be correct to return true only on the
6881 caller side of the equation? Is there someplace else in the
6882 sh backend that's magically producing the copies? */
6883 return (cum->outgoing
6884 && ((mode == BLKmode ? TYPE_ALIGN (type) : GET_MODE_ALIGNMENT (mode))
6885 % SH_MIN_ALIGN_FOR_CALLEE_COPY == 0));
6888 static int
6889 sh_arg_partial_bytes (CUMULATIVE_ARGS *cum, enum machine_mode mode,
6890 tree type, bool named ATTRIBUTE_UNUSED)
6892 int words = 0;
6894 if (!TARGET_SH5
6895 && PASS_IN_REG_P (*cum, mode, type)
6896 && !(TARGET_SH4 || TARGET_SH2A_DOUBLE)
6897 && (ROUND_REG (*cum, mode)
6898 + (mode != BLKmode
6899 ? ROUND_ADVANCE (GET_MODE_SIZE (mode))
6900 : ROUND_ADVANCE (int_size_in_bytes (type)))
6901 > NPARM_REGS (mode)))
6902 words = NPARM_REGS (mode) - ROUND_REG (*cum, mode);
6904 else if (!TARGET_SHCOMPACT
6905 && SH5_WOULD_BE_PARTIAL_NREGS (*cum, mode, type, named))
6906 words = NPARM_REGS (SImode) - cum->arg_count[SH_ARG_INT];
6908 return words * UNITS_PER_WORD;
6912 /* Define where to put the arguments to a function.
6913 Value is zero to push the argument on the stack,
6914 or a hard register in which to store the argument.
6916 MODE is the argument's machine mode.
6917 TYPE is the data type of the argument (as a tree).
6918 This is null for libcalls where that information may
6919 not be available.
6920 CUM is a variable of type CUMULATIVE_ARGS which gives info about
6921 the preceding args and about the function being called.
6922 NAMED is nonzero if this argument is a named parameter
6923 (otherwise it is an extra parameter matching an ellipsis).
6925 On SH the first args are normally in registers
6926 and the rest are pushed. Any arg that starts within the first
6927 NPARM_REGS words is at least partially passed in a register unless
6928 its data type forbids. */
6932 sh_function_arg (CUMULATIVE_ARGS *ca, enum machine_mode mode,
6933 tree type, int named)
6935 if (! TARGET_SH5 && mode == VOIDmode)
6936 return GEN_INT (ca->renesas_abi ? 1 : 0);
6938 if (! TARGET_SH5
6939 && PASS_IN_REG_P (*ca, mode, type)
6940 && (named || ! (TARGET_HITACHI || ca->renesas_abi)))
6942 int regno;
6944 if (mode == SCmode && TARGET_SH4 && TARGET_LITTLE_ENDIAN
6945 && (! FUNCTION_ARG_SCmode_WART || (ROUND_REG (*ca, mode) & 1)))
6947 rtx r1 = gen_rtx_EXPR_LIST (VOIDmode,
6948 gen_rtx_REG (SFmode,
6949 BASE_ARG_REG (mode)
6950 + (ROUND_REG (*ca, mode) ^ 1)),
6951 const0_rtx);
6952 rtx r2 = gen_rtx_EXPR_LIST (VOIDmode,
6953 gen_rtx_REG (SFmode,
6954 BASE_ARG_REG (mode)
6955 + ((ROUND_REG (*ca, mode) + 1) ^ 1)),
6956 GEN_INT (4));
6957 return gen_rtx_PARALLEL(SCmode, gen_rtvec(2, r1, r2));
6960 /* If the alignment of a DF value causes an SF register to be
6961 skipped, we will use that skipped register for the next SF
6962 value. */
6963 if ((TARGET_HITACHI || ca->renesas_abi)
6964 && ca->free_single_fp_reg
6965 && mode == SFmode)
6966 return gen_rtx_REG (mode, ca->free_single_fp_reg);
6968 regno = (BASE_ARG_REG (mode) + ROUND_REG (*ca, mode))
6969 ^ (mode == SFmode && TARGET_SH4
6970 && TARGET_LITTLE_ENDIAN != 0
6971 && ! TARGET_HITACHI && ! ca->renesas_abi);
6972 return gen_rtx_REG (mode, regno);
6976 if (TARGET_SH5)
6978 if (mode == VOIDmode && TARGET_SHCOMPACT)
6979 return GEN_INT (ca->call_cookie);
6981 /* The following test assumes unnamed arguments are promoted to
6982 DFmode. */
6983 if (mode == SFmode && ca->free_single_fp_reg)
6984 return SH5_PROTOTYPED_FLOAT_ARG (*ca, mode, ca->free_single_fp_reg);
6986 if ((GET_SH_ARG_CLASS (mode) == SH_ARG_FLOAT)
6987 && (named || ! ca->prototype_p)
6988 && ca->arg_count[(int) SH_ARG_FLOAT] < NPARM_REGS (SFmode))
6990 if (! ca->prototype_p && TARGET_SHMEDIA)
6991 return SH5_PROTOTYPELESS_FLOAT_ARG (*ca, mode);
6993 return SH5_PROTOTYPED_FLOAT_ARG (*ca, mode,
6994 FIRST_FP_PARM_REG
6995 + ca->arg_count[(int) SH_ARG_FLOAT]);
6998 if (ca->arg_count[(int) SH_ARG_INT] < NPARM_REGS (SImode)
6999 && (! TARGET_SHCOMPACT
7000 || (! SHCOMPACT_FORCE_ON_STACK (mode, type)
7001 && ! SH5_WOULD_BE_PARTIAL_NREGS (*ca, mode,
7002 type, named))))
7004 return gen_rtx_REG (mode, (FIRST_PARM_REG
7005 + ca->arg_count[(int) SH_ARG_INT]));
7008 return 0;
7011 return 0;
7014 /* Update the data in CUM to advance over an argument
7015 of mode MODE and data type TYPE.
7016 (TYPE is null for libcalls where that information may not be
7017 available.) */
7019 void
7020 sh_function_arg_advance (CUMULATIVE_ARGS *ca, enum machine_mode mode,
7021 tree type, int named)
7023 if (ca->force_mem)
7024 ca->force_mem = 0;
7025 else if (TARGET_SH5)
7027 tree type2 = (ca->byref && type
7028 ? TREE_TYPE (type)
7029 : type);
7030 enum machine_mode mode2 = (ca->byref && type
7031 ? TYPE_MODE (type2)
7032 : mode);
7033 int dwords = ((ca->byref
7034 ? ca->byref
7035 : mode2 == BLKmode
7036 ? int_size_in_bytes (type2)
7037 : GET_MODE_SIZE (mode2)) + 7) / 8;
7038 int numregs = MIN (dwords, NPARM_REGS (SImode)
7039 - ca->arg_count[(int) SH_ARG_INT]);
7041 if (numregs)
7043 ca->arg_count[(int) SH_ARG_INT] += numregs;
7044 if (TARGET_SHCOMPACT
7045 && SHCOMPACT_FORCE_ON_STACK (mode2, type2))
7047 ca->call_cookie
7048 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
7049 - numregs, 1);
7050 /* N.B. We want this also for outgoing. */
7051 ca->stack_regs += numregs;
7053 else if (ca->byref)
7055 if (! ca->outgoing)
7056 ca->stack_regs += numregs;
7057 ca->byref_regs += numregs;
7058 ca->byref = 0;
7060 ca->call_cookie
7061 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
7062 - numregs, 2);
7063 while (--numregs);
7064 ca->call_cookie
7065 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
7066 - 1, 1);
7068 else if (dwords > numregs)
7070 int pushregs = numregs;
7072 if (TARGET_SHCOMPACT)
7073 ca->stack_regs += numregs;
7074 while (pushregs < NPARM_REGS (SImode) - 1
7075 && (CALL_COOKIE_INT_REG_GET
7076 (ca->call_cookie,
7077 NPARM_REGS (SImode) - pushregs)
7078 == 1))
7080 ca->call_cookie
7081 &= ~ CALL_COOKIE_INT_REG (NPARM_REGS (SImode)
7082 - pushregs, 1);
7083 pushregs++;
7085 if (numregs == NPARM_REGS (SImode))
7086 ca->call_cookie
7087 |= CALL_COOKIE_INT_REG (0, 1)
7088 | CALL_COOKIE_STACKSEQ (numregs - 1);
7089 else
7090 ca->call_cookie
7091 |= CALL_COOKIE_STACKSEQ (numregs);
7094 if (GET_SH_ARG_CLASS (mode2) == SH_ARG_FLOAT
7095 && (named || ! ca->prototype_p))
7097 if (mode2 == SFmode && ca->free_single_fp_reg)
7098 ca->free_single_fp_reg = 0;
7099 else if (ca->arg_count[(int) SH_ARG_FLOAT]
7100 < NPARM_REGS (SFmode))
7102 int numfpregs
7103 = MIN ((GET_MODE_SIZE (mode2) + 7) / 8 * 2,
7104 NPARM_REGS (SFmode)
7105 - ca->arg_count[(int) SH_ARG_FLOAT]);
7107 ca->arg_count[(int) SH_ARG_FLOAT] += numfpregs;
7109 if (TARGET_SHCOMPACT && ! ca->prototype_p)
7111 if (ca->outgoing && numregs > 0)
7114 ca->call_cookie
7115 |= (CALL_COOKIE_INT_REG
7116 (ca->arg_count[(int) SH_ARG_INT]
7117 - numregs + ((numfpregs - 2) / 2),
7118 4 + (ca->arg_count[(int) SH_ARG_FLOAT]
7119 - numfpregs) / 2));
7121 while (numfpregs -= 2);
7123 else if (mode2 == SFmode && (named)
7124 && (ca->arg_count[(int) SH_ARG_FLOAT]
7125 < NPARM_REGS (SFmode)))
7126 ca->free_single_fp_reg
7127 = FIRST_FP_PARM_REG - numfpregs
7128 + ca->arg_count[(int) SH_ARG_FLOAT] + 1;
7131 return;
7134 if ((TARGET_HITACHI || ca->renesas_abi) && TARGET_FPU_DOUBLE)
7136 /* Note that we've used the skipped register. */
7137 if (mode == SFmode && ca->free_single_fp_reg)
7139 ca->free_single_fp_reg = 0;
7140 return;
7142 /* When we have a DF after an SF, there's an SF register that get
7143 skipped in order to align the DF value. We note this skipped
7144 register, because the next SF value will use it, and not the
7145 SF that follows the DF. */
7146 if (mode == DFmode
7147 && ROUND_REG (*ca, DFmode) != ROUND_REG (*ca, SFmode))
7149 ca->free_single_fp_reg = (ROUND_REG (*ca, SFmode)
7150 + BASE_ARG_REG (mode));
7154 if (! ((TARGET_SH4 || TARGET_SH2A) || ca->renesas_abi)
7155 || PASS_IN_REG_P (*ca, mode, type))
7156 (ca->arg_count[(int) GET_SH_ARG_CLASS (mode)]
7157 = (ROUND_REG (*ca, mode)
7158 + (mode == BLKmode
7159 ? ROUND_ADVANCE (int_size_in_bytes (type))
7160 : ROUND_ADVANCE (GET_MODE_SIZE (mode)))));
7163 /* The Renesas calling convention doesn't quite fit into this scheme since
7164 the address is passed like an invisible argument, but one that is always
7165 passed in memory. */
7166 static rtx
7167 sh_struct_value_rtx (tree fndecl, int incoming ATTRIBUTE_UNUSED)
7169 if (TARGET_HITACHI || sh_attr_renesas_p (fndecl))
7170 return 0;
7171 return gen_rtx_REG (Pmode, 2);
7174 /* Worker function for TARGET_RETURN_IN_MEMORY. */
7176 static bool
7177 sh_return_in_memory (tree type, tree fndecl)
7179 if (TARGET_SH5)
7181 if (TYPE_MODE (type) == BLKmode)
7182 return ((unsigned HOST_WIDE_INT) int_size_in_bytes (type)) > 8;
7183 else
7184 return GET_MODE_SIZE (TYPE_MODE (type)) > 8;
7186 else
7188 return (TYPE_MODE (type) == BLKmode
7189 || ((TARGET_HITACHI || sh_attr_renesas_p (fndecl))
7190 && TREE_CODE (type) == RECORD_TYPE));
7194 /* We actually emit the code in sh_expand_prologue. We used to use
7195 a static variable to flag that we need to emit this code, but that
7196 doesn't when inlining, when functions are deferred and then emitted
7197 later. Fortunately, we already have two flags that are part of struct
7198 function that tell if a function uses varargs or stdarg. */
7199 static void
7200 sh_setup_incoming_varargs (CUMULATIVE_ARGS *ca,
7201 enum machine_mode mode,
7202 tree type,
7203 int *pretend_arg_size,
7204 int second_time ATTRIBUTE_UNUSED)
7206 gcc_assert (current_function_stdarg);
7207 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl))
7209 int named_parm_regs, anon_parm_regs;
7211 named_parm_regs = (ROUND_REG (*ca, mode)
7212 + (mode == BLKmode
7213 ? ROUND_ADVANCE (int_size_in_bytes (type))
7214 : ROUND_ADVANCE (GET_MODE_SIZE (mode))));
7215 anon_parm_regs = NPARM_REGS (SImode) - named_parm_regs;
7216 if (anon_parm_regs > 0)
7217 *pretend_arg_size = anon_parm_regs * 4;
7221 static bool
7222 sh_strict_argument_naming (CUMULATIVE_ARGS *ca ATTRIBUTE_UNUSED)
7224 return TARGET_SH5;
7227 static bool
7228 sh_pretend_outgoing_varargs_named (CUMULATIVE_ARGS *ca)
7230 return ! (TARGET_HITACHI || ca->renesas_abi) && ! TARGET_SH5;
7234 /* Define the offset between two registers, one to be eliminated, and
7235 the other its replacement, at the start of a routine. */
7238 initial_elimination_offset (int from, int to)
7240 int regs_saved;
7241 int regs_saved_rounding = 0;
7242 int total_saved_regs_space;
7243 int total_auto_space;
7244 int save_flags = target_flags;
7245 int copy_flags;
7246 HARD_REG_SET live_regs_mask;
7248 shmedia_space_reserved_for_target_registers = false;
7249 regs_saved = calc_live_regs (&live_regs_mask);
7250 regs_saved += SHMEDIA_REGS_STACK_ADJUST ();
7252 if (shmedia_reserve_space_for_target_registers_p (regs_saved, &live_regs_mask))
7254 shmedia_space_reserved_for_target_registers = true;
7255 regs_saved += shmedia_target_regs_stack_adjust (&live_regs_mask);
7258 if (TARGET_SH5 && regs_saved % (STACK_BOUNDARY / BITS_PER_UNIT))
7259 regs_saved_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
7260 - regs_saved % (STACK_BOUNDARY / BITS_PER_UNIT));
7262 total_auto_space = rounded_frame_size (regs_saved) - regs_saved_rounding;
7263 copy_flags = target_flags;
7264 target_flags = save_flags;
7266 total_saved_regs_space = regs_saved + regs_saved_rounding;
7268 if (from == ARG_POINTER_REGNUM && to == FRAME_POINTER_REGNUM)
7269 return total_saved_regs_space + total_auto_space
7270 + current_function_args_info.byref_regs * 8;
7272 if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
7273 return total_saved_regs_space + total_auto_space
7274 + current_function_args_info.byref_regs * 8;
7276 /* Initial gap between fp and sp is 0. */
7277 if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
7278 return 0;
7280 gcc_assert (from == RETURN_ADDRESS_POINTER_REGNUM
7281 && (to == FRAME_POINTER_REGNUM || to == STACK_POINTER_REGNUM));
7282 if (TARGET_SH5)
7284 int n = total_saved_regs_space;
7285 int pr_reg = TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG;
7286 save_schedule schedule;
7287 save_entry *entry;
7289 n += total_auto_space;
7291 /* If it wasn't saved, there's not much we can do. */
7292 if (! TEST_HARD_REG_BIT (live_regs_mask, pr_reg))
7293 return n;
7295 target_flags = copy_flags;
7297 sh5_schedule_saves (&live_regs_mask, &schedule, n);
7298 for (entry = &schedule.entries[1]; entry->mode != VOIDmode; entry++)
7299 if (entry->reg == pr_reg)
7301 target_flags = save_flags;
7302 return entry->offset;
7304 gcc_unreachable ();
7306 else
7307 return total_auto_space;
7310 /* Handle machine specific pragmas to be semi-compatible with Renesas
7311 compiler. */
7313 void
7314 sh_pr_interrupt (struct cpp_reader *pfile ATTRIBUTE_UNUSED)
7316 pragma_interrupt = 1;
7319 void
7320 sh_pr_trapa (struct cpp_reader *pfile ATTRIBUTE_UNUSED)
7322 pragma_interrupt = pragma_trapa = 1;
7325 void
7326 sh_pr_nosave_low_regs (struct cpp_reader *pfile ATTRIBUTE_UNUSED)
7328 pragma_nosave_low_regs = 1;
7331 /* Generate 'handle_interrupt' attribute for decls */
7333 static void
7334 sh_insert_attributes (tree node, tree *attributes)
7336 if (! pragma_interrupt
7337 || TREE_CODE (node) != FUNCTION_DECL)
7338 return;
7340 /* We are only interested in fields. */
7341 if (!DECL_P (node))
7342 return;
7344 /* Add a 'handle_interrupt' attribute. */
7345 * attributes = tree_cons (get_identifier ("interrupt_handler"), NULL, * attributes);
7347 return;
7350 /* Supported attributes:
7352 interrupt_handler -- specifies this function is an interrupt handler.
7354 sp_switch -- specifies an alternate stack for an interrupt handler
7355 to run on.
7357 trap_exit -- use a trapa to exit an interrupt function instead of
7358 an rte instruction.
7360 renesas -- use Renesas calling/layout conventions (functions and
7361 structures).
7365 const struct attribute_spec sh_attribute_table[] =
7367 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
7368 { "interrupt_handler", 0, 0, true, false, false, sh_handle_interrupt_handler_attribute },
7369 { "sp_switch", 1, 1, true, false, false, sh_handle_sp_switch_attribute },
7370 { "trap_exit", 1, 1, true, false, false, sh_handle_trap_exit_attribute },
7371 { "renesas", 0, 0, false, true, false, sh_handle_renesas_attribute },
7372 #ifdef SYMBIAN
7373 /* Symbian support adds three new attributes:
7374 dllexport - for exporting a function/variable that will live in a dll
7375 dllimport - for importing a function/variable from a dll
7377 Microsoft allows multiple declspecs in one __declspec, separating
7378 them with spaces. We do NOT support this. Instead, use __declspec
7379 multiple times. */
7380 { "dllimport", 0, 0, true, false, false, sh_symbian_handle_dll_attribute },
7381 { "dllexport", 0, 0, true, false, false, sh_symbian_handle_dll_attribute },
7382 #endif
7383 { NULL, 0, 0, false, false, false, NULL }
7386 /* Handle an "interrupt_handler" attribute; arguments as in
7387 struct attribute_spec.handler. */
7388 static tree
7389 sh_handle_interrupt_handler_attribute (tree *node, tree name,
7390 tree args ATTRIBUTE_UNUSED,
7391 int flags ATTRIBUTE_UNUSED,
7392 bool *no_add_attrs)
7394 if (TREE_CODE (*node) != FUNCTION_DECL)
7396 warning (OPT_Wattributes, "%qs attribute only applies to functions",
7397 IDENTIFIER_POINTER (name));
7398 *no_add_attrs = true;
7400 else if (TARGET_SHCOMPACT)
7402 error ("attribute interrupt_handler is not compatible with -m5-compact");
7403 *no_add_attrs = true;
7406 return NULL_TREE;
7409 /* Handle an "sp_switch" attribute; arguments as in
7410 struct attribute_spec.handler. */
7411 static tree
7412 sh_handle_sp_switch_attribute (tree *node, tree name, tree args,
7413 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
7415 if (TREE_CODE (*node) != FUNCTION_DECL)
7417 warning (OPT_Wattributes, "%qs attribute only applies to functions",
7418 IDENTIFIER_POINTER (name));
7419 *no_add_attrs = true;
7421 else if (!pragma_interrupt)
7423 /* The sp_switch attribute only has meaning for interrupt functions. */
7424 warning (OPT_Wattributes, "%qs attribute only applies to "
7425 "interrupt functions", IDENTIFIER_POINTER (name));
7426 *no_add_attrs = true;
7428 else if (TREE_CODE (TREE_VALUE (args)) != STRING_CST)
7430 /* The argument must be a constant string. */
7431 warning (OPT_Wattributes, "%qs attribute argument not a string constant",
7432 IDENTIFIER_POINTER (name));
7433 *no_add_attrs = true;
7435 else
7437 const char *s = ggc_strdup (TREE_STRING_POINTER (TREE_VALUE (args)));
7438 sp_switch = gen_rtx_SYMBOL_REF (VOIDmode, s);
7441 return NULL_TREE;
7444 /* Handle an "trap_exit" attribute; arguments as in
7445 struct attribute_spec.handler. */
7446 static tree
7447 sh_handle_trap_exit_attribute (tree *node, tree name, tree args,
7448 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
7450 if (TREE_CODE (*node) != FUNCTION_DECL)
7452 warning (OPT_Wattributes, "%qs attribute only applies to functions",
7453 IDENTIFIER_POINTER (name));
7454 *no_add_attrs = true;
7456 else if (!pragma_interrupt)
7458 /* The trap_exit attribute only has meaning for interrupt functions. */
7459 warning (OPT_Wattributes, "%qs attribute only applies to "
7460 "interrupt functions", IDENTIFIER_POINTER (name));
7461 *no_add_attrs = true;
7463 else if (TREE_CODE (TREE_VALUE (args)) != INTEGER_CST)
7465 /* The argument must be a constant integer. */
7466 warning (OPT_Wattributes, "%qs attribute argument not an "
7467 "integer constant", IDENTIFIER_POINTER (name));
7468 *no_add_attrs = true;
7470 else
7472 trap_exit = TREE_INT_CST_LOW (TREE_VALUE (args));
7475 return NULL_TREE;
7478 static tree
7479 sh_handle_renesas_attribute (tree *node ATTRIBUTE_UNUSED,
7480 tree name ATTRIBUTE_UNUSED,
7481 tree args ATTRIBUTE_UNUSED,
7482 int flags ATTRIBUTE_UNUSED,
7483 bool *no_add_attrs ATTRIBUTE_UNUSED)
7485 return NULL_TREE;
7488 /* True if __attribute__((renesas)) or -mrenesas. */
7490 sh_attr_renesas_p (tree td)
7492 if (TARGET_HITACHI)
7493 return 1;
7494 if (td == 0)
7495 return 0;
7496 if (DECL_P (td))
7497 td = TREE_TYPE (td);
7498 return (lookup_attribute ("renesas", TYPE_ATTRIBUTES (td))
7499 != NULL_TREE);
7502 /* True if __attribute__((renesas)) or -mrenesas, for the current
7503 function. */
7505 sh_cfun_attr_renesas_p (void)
7507 return sh_attr_renesas_p (current_function_decl);
7511 sh_cfun_interrupt_handler_p (void)
7513 return (lookup_attribute ("interrupt_handler",
7514 DECL_ATTRIBUTES (current_function_decl))
7515 != NULL_TREE);
7518 /* Like default_pch_valid_p, but only check certain target_flags. */
7519 const char *
7520 sh_pch_valid_p (const void *data_p, size_t len)
7522 #ifdef TARGET_OPTIONS
7523 /* ??? We have a copy of this in toplev.c, but it is static. */
7524 static const struct
7526 const char *const prefix;
7527 const char **const variable;
7528 const char *const description;
7529 const char *const value;
7531 target_options[] = TARGET_OPTIONS;
7532 #endif
7534 const char *data = (const char *)data_p;
7535 const char *flag_that_differs = NULL;
7536 size_t i;
7537 int old_flags;
7539 /* -fpic and -fpie also usually make a PCH invalid. */
7540 if (data[0] != flag_pic)
7541 return _("created and used with different settings of -fpic");
7542 if (data[1] != flag_pie)
7543 return _("created and used with different settings of -fpie");
7544 data += 2;
7546 /* Check target_flags. */
7547 memcpy (&old_flags, data, sizeof (target_flags));
7548 if ((old_flags ^ target_flags) & (MASK_SH1 | MASK_SH2 | MASK_SH3
7549 | MASK_SH_E | MASK_HARD_SH4
7550 | MASK_FPU_SINGLE | MASK_SH4))
7551 return _("created and used with different architectures / ABIs");
7552 if ((old_flags ^ target_flags) & MASK_HITACHI)
7553 return _("created and used with different ABIs");
7554 if ((old_flags ^ target_flags) & MASK_LITTLE_ENDIAN)
7555 return _("created and used with different endianness");
7557 data += sizeof (target_flags);
7558 len -= sizeof (target_flags);
7560 /* Check string options. */
7561 #ifdef TARGET_OPTIONS
7562 for (i = 0; i < ARRAY_SIZE (target_options); i++)
7564 const char *str = *target_options[i].variable;
7565 size_t l;
7566 if (! str)
7567 str = "";
7568 l = strlen (str) + 1;
7569 if (len < l || memcmp (data, str, l) != 0)
7571 flag_that_differs = target_options[i].prefix;
7572 goto make_message;
7574 data += l;
7575 len -= l;
7577 #endif
7579 return NULL;
7581 make_message:
7583 char *r;
7584 asprintf (&r, _("created and used with differing settings of '-m%s'"),
7585 flag_that_differs);
7586 if (r == NULL)
7587 return _("out of memory");
7588 return r;
7592 /* Predicates used by the templates. */
7594 /* Returns 1 if OP is MACL, MACH or PR. The input must be a REG rtx.
7595 Used only in general_movsrc_operand. */
7598 system_reg_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
7600 switch (REGNO (op))
7602 case PR_REG:
7603 case MACL_REG:
7604 case MACH_REG:
7605 return 1;
7607 return 0;
7610 /* Returns 1 if OP can be source of a simple move operation.
7611 Same as general_operand, but a LABEL_REF is valid, PRE_DEC is
7612 invalid as are subregs of system registers. */
7615 general_movsrc_operand (rtx op, enum machine_mode mode)
7617 if (GET_CODE (op) == MEM)
7619 rtx inside = XEXP (op, 0);
7620 if (GET_CODE (inside) == CONST)
7621 inside = XEXP (inside, 0);
7623 if (GET_CODE (inside) == LABEL_REF)
7624 return 1;
7626 if (GET_CODE (inside) == PLUS
7627 && GET_CODE (XEXP (inside, 0)) == LABEL_REF
7628 && GET_CODE (XEXP (inside, 1)) == CONST_INT)
7629 return 1;
7631 /* Only post inc allowed. */
7632 if (GET_CODE (inside) == PRE_DEC)
7633 return 0;
7636 if ((mode == QImode || mode == HImode)
7637 && (GET_CODE (op) == SUBREG
7638 && GET_CODE (XEXP (op, 0)) == REG
7639 && system_reg_operand (XEXP (op, 0), mode)))
7640 return 0;
7642 if (TARGET_SHMEDIA
7643 && (GET_CODE (op) == PARALLEL || GET_CODE (op) == CONST_VECTOR)
7644 && sh_rep_vec (op, mode))
7645 return 1;
7646 if (TARGET_SHMEDIA && 1
7647 && GET_CODE (op) == SUBREG && GET_MODE (op) == mode
7648 && SUBREG_REG (op) == const0_rtx && subreg_lowpart_p (op))
7649 /* FIXME */ abort (); /* return 1; */
7650 return general_operand (op, mode);
7653 /* Returns 1 if OP can be a destination of a move.
7654 Same as general_operand, but no preinc allowed. */
7657 general_movdst_operand (rtx op, enum machine_mode mode)
7659 /* Only pre dec allowed. */
7660 if (GET_CODE (op) == MEM && GET_CODE (XEXP (op, 0)) == POST_INC)
7661 return 0;
7662 if (mode == DImode && TARGET_SHMEDIA && GET_CODE (op) == SUBREG
7663 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (op))) < 8
7664 && ! (high_life_started || reload_completed))
7665 return 0;
7667 return general_operand (op, mode);
7670 /* Returns 1 if OP is a normal arithmetic register. */
7673 arith_reg_operand (rtx op, enum machine_mode mode)
7675 if (register_operand (op, mode))
7677 int regno;
7679 if (GET_CODE (op) == REG)
7680 regno = REGNO (op);
7681 else if (GET_CODE (op) == SUBREG && GET_CODE (SUBREG_REG (op)) == REG)
7682 regno = REGNO (SUBREG_REG (op));
7683 else
7684 return 1;
7686 return (regno != T_REG && regno != PR_REG
7687 && ! TARGET_REGISTER_P (regno)
7688 && (regno != FPUL_REG || TARGET_SH4)
7689 && regno != MACH_REG && regno != MACL_REG);
7691 /* Allow a no-op sign extension - compare LOAD_EXTEND_OP.
7692 We allow SImode here, as not using an FP register is just a matter of
7693 proper register allocation. */
7694 if (TARGET_SHMEDIA
7695 && GET_MODE (op) == DImode && GET_CODE (op) == SIGN_EXTEND
7696 && GET_MODE (XEXP (op, 0)) == SImode
7697 && GET_CODE (XEXP (op, 0)) != SUBREG)
7698 return register_operand (XEXP (op, 0), VOIDmode);
7699 #if 0 /* Can't do this because of PROMOTE_MODE for unsigned vars. */
7700 if (GET_MODE (op) == SImode && GET_CODE (op) == SIGN_EXTEND
7701 && GET_MODE (XEXP (op, 0)) == HImode
7702 && GET_CODE (XEXP (op, 0)) == REG
7703 && REGNO (XEXP (op, 0)) <= LAST_GENERAL_REG)
7704 return register_operand (XEXP (op, 0), VOIDmode);
7705 #endif
7706 if (GET_MODE_CLASS (GET_MODE (op)) == MODE_VECTOR_INT
7707 && GET_CODE (op) == SUBREG
7708 && GET_MODE (SUBREG_REG (op)) == DImode
7709 && GET_CODE (SUBREG_REG (op)) == SIGN_EXTEND
7710 && GET_MODE (XEXP (SUBREG_REG (op), 0)) == SImode
7711 && GET_CODE (XEXP (SUBREG_REG (op), 0)) != SUBREG)
7712 return register_operand (XEXP (SUBREG_REG (op), 0), VOIDmode);
7713 return 0;
7716 /* Like above, but for DImode destinations: forbid paradoxical DImode subregs,
7717 because this would lead to missing sign extensions when truncating from
7718 DImode to SImode. */
7720 arith_reg_dest (rtx op, enum machine_mode mode)
7722 if (mode == DImode && GET_CODE (op) == SUBREG
7723 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (op))) < 8
7724 && TARGET_SHMEDIA)
7725 return 0;
7726 return arith_reg_operand (op, mode);
7729 /* Like arith_reg_operand, but for register source operands of narrow
7730 logical SHMEDIA operations: forbid subregs of DImode / TImode regs. */
7732 logical_reg_operand (rtx op, enum machine_mode mode)
7734 if (TARGET_SHMEDIA
7735 && GET_CODE (op) == SUBREG
7736 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (op))) > 4
7737 && mode != DImode)
7738 return 0;
7739 return arith_reg_operand (op, mode);
7743 int_gpr_dest (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
7745 enum machine_mode op_mode = GET_MODE (op);
7747 if (GET_MODE_CLASS (op_mode) != MODE_INT
7748 || GET_MODE_SIZE (op_mode) >= UNITS_PER_WORD)
7749 return 0;
7750 if (! reload_completed)
7751 return 0;
7752 return true_regnum (op) <= LAST_GENERAL_REG;
7756 fp_arith_reg_operand (rtx op, enum machine_mode mode)
7758 if (register_operand (op, mode))
7760 int regno;
7762 if (GET_CODE (op) == REG)
7763 regno = REGNO (op);
7764 else if (GET_CODE (op) == SUBREG && GET_CODE (SUBREG_REG (op)) == REG)
7765 regno = REGNO (SUBREG_REG (op));
7766 else
7767 return 1;
7769 return (regno >= FIRST_PSEUDO_REGISTER
7770 || FP_REGISTER_P (regno));
7772 return 0;
7776 fp_arith_reg_dest (rtx op, enum machine_mode mode)
7778 if (mode == DImode && GET_CODE (op) == SUBREG
7779 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (op))) < 8)
7780 return 0;
7781 return fp_arith_reg_operand (op, mode);
7784 /* Returns 1 if OP is a valid source operand for an arithmetic insn. */
7787 arith_operand (rtx op, enum machine_mode mode)
7789 if (arith_reg_operand (op, mode))
7790 return 1;
7792 if (TARGET_SHMEDIA)
7794 /* FIXME: We should be checking whether the CONST_INT fits in a
7795 CONST_OK_FOR_I16 here, but this causes reload_cse to crash when
7796 attempting to transform a sequence of two 64-bit sets of the
7797 same register from literal constants into a set and an add,
7798 when the difference is too wide for an add. */
7799 if (GET_CODE (op) == CONST_INT
7800 || EXTRA_CONSTRAINT_C16 (op))
7801 return 1;
7802 else if (GET_CODE (op) == TRUNCATE
7803 && ! system_reg_operand (XEXP (op, 0), VOIDmode)
7804 && (mode == VOIDmode || mode == GET_MODE (op))
7805 && (GET_MODE_SIZE (GET_MODE (op))
7806 < GET_MODE_SIZE (GET_MODE (XEXP (op, 0))))
7807 && (! FP_REGISTER_P (REGNO (XEXP (op, 0)))
7808 || GET_MODE_SIZE (GET_MODE (op)) == 4))
7809 return register_operand (XEXP (op, 0), VOIDmode);
7810 else
7811 return 0;
7813 else if (GET_CODE (op) == CONST_INT && CONST_OK_FOR_I08 (INTVAL (op)))
7814 return 1;
7816 return 0;
7819 /* Returns 1 if OP is a valid source operand for a compare insn. */
7822 arith_reg_or_0_operand (rtx op, enum machine_mode mode)
7824 if (arith_reg_operand (op, mode))
7825 return 1;
7827 if (EXTRA_CONSTRAINT_Z (op))
7828 return 1;
7830 return 0;
7833 /* Return 1 if OP is a valid source operand for xor. */
7836 xor_operand (rtx op, enum machine_mode mode)
7838 if (GET_CODE (op) == CONST_INT)
7839 return (TARGET_SHMEDIA
7840 ? (CONST_OK_FOR_I06 (INTVAL (op))
7841 || (no_new_pseudos && INTVAL (op) == 0xff))
7842 : CONST_OK_FOR_K08 (INTVAL (op)));
7843 if (TARGET_SHMEDIA
7844 && mode != DImode && GET_CODE (op) == SUBREG
7845 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (op))) > 4)
7846 return 0;
7847 return arith_reg_operand (op, mode);
7850 /* Return 1 if OP is a valid source operand for shmedia cmpgt / cmpgtu. */
7852 cmp_operand (rtx op, enum machine_mode mode)
7854 if (GET_CODE (op) == CONST_INT && CONST_OK_FOR_N (INTVAL (op)))
7855 return 1;
7856 if (TARGET_SHMEDIA
7857 && mode != DImode && GET_CODE (op) == SUBREG
7858 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (op))) > 4)
7859 return 0;
7860 return arith_reg_operand (op, mode);
7863 /* Returns 1 if OP is a valid source operand for a logical operation. */
7866 logical_operand (rtx op, enum machine_mode mode)
7868 if (TARGET_SHMEDIA
7869 && mode != DImode && GET_CODE (op) == SUBREG
7870 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (op))) > 4)
7871 return 0;
7873 if (arith_reg_operand (op, mode))
7874 return 1;
7876 if (TARGET_SHMEDIA)
7878 if (GET_CODE (op) == CONST_INT && CONST_OK_FOR_I10 (INTVAL (op)))
7879 return 1;
7880 else
7881 return 0;
7883 else if (GET_CODE (op) == CONST_INT && CONST_OK_FOR_K08 (INTVAL (op)))
7884 return 1;
7886 return 0;
7890 and_operand (rtx op, enum machine_mode mode)
7892 if (logical_operand (op, mode))
7893 return 1;
7895 /* Check mshflo.l / mshflhi.l opportunities. */
7896 if (TARGET_SHMEDIA
7897 && mode == DImode
7898 && GET_CODE (op) == CONST_INT
7899 && CONST_OK_FOR_J16 (INTVAL (op)))
7900 return 1;
7902 return 0;
7905 /* Nonzero if OP is a floating point value with value 0.0. */
7908 fp_zero_operand (rtx op)
7910 REAL_VALUE_TYPE r;
7912 if (GET_MODE (op) != SFmode)
7913 return 0;
7915 REAL_VALUE_FROM_CONST_DOUBLE (r, op);
7916 return REAL_VALUES_EQUAL (r, dconst0) && ! REAL_VALUE_MINUS_ZERO (r);
7919 /* Nonzero if OP is a floating point value with value 1.0. */
7922 fp_one_operand (rtx op)
7924 REAL_VALUE_TYPE r;
7926 if (GET_MODE (op) != SFmode)
7927 return 0;
7929 REAL_VALUE_FROM_CONST_DOUBLE (r, op);
7930 return REAL_VALUES_EQUAL (r, dconst1);
7933 /* For -m4 and -m4-single-only, mode switching is used. If we are
7934 compiling without -mfmovd, movsf_ie isn't taken into account for
7935 mode switching. We could check in machine_dependent_reorg for
7936 cases where we know we are in single precision mode, but there is
7937 interface to find that out during reload, so we must avoid
7938 choosing an fldi alternative during reload and thus failing to
7939 allocate a scratch register for the constant loading. */
7941 fldi_ok (void)
7943 return ! TARGET_SH4 || TARGET_FMOVD || reload_completed;
7947 tertiary_reload_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
7949 enum rtx_code code = GET_CODE (op);
7950 return code == MEM || (TARGET_SH4 && code == CONST_DOUBLE);
7954 fpscr_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
7956 return (GET_CODE (op) == REG
7957 && (REGNO (op) == FPSCR_REG
7958 || (REGNO (op) >= FIRST_PSEUDO_REGISTER
7959 && !(reload_in_progress || reload_completed)))
7960 && GET_MODE (op) == PSImode);
7964 fpul_operand (rtx op, enum machine_mode mode)
7966 if (TARGET_SHMEDIA)
7967 return fp_arith_reg_operand (op, mode);
7969 return (GET_CODE (op) == REG
7970 && (REGNO (op) == FPUL_REG || REGNO (op) >= FIRST_PSEUDO_REGISTER)
7971 && GET_MODE (op) == mode);
7975 symbol_ref_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
7977 return (GET_CODE (op) == SYMBOL_REF);
7980 /* Return the TLS type for TLS symbols, 0 for otherwise. */
7982 tls_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
7984 if (GET_CODE (op) != SYMBOL_REF)
7985 return 0;
7986 return SYMBOL_REF_TLS_MODEL (op);
7990 commutative_float_operator (rtx op, enum machine_mode mode)
7992 if (GET_MODE (op) != mode)
7993 return 0;
7994 switch (GET_CODE (op))
7996 case PLUS:
7997 case MULT:
7998 return 1;
7999 default:
8000 break;
8002 return 0;
8006 noncommutative_float_operator (rtx op, enum machine_mode mode)
8008 if (GET_MODE (op) != mode)
8009 return 0;
8010 switch (GET_CODE (op))
8012 case MINUS:
8013 case DIV:
8014 return 1;
8015 default:
8016 break;
8018 return 0;
8022 unary_float_operator (rtx op, enum machine_mode mode)
8024 if (GET_MODE (op) != mode)
8025 return 0;
8026 switch (GET_CODE (op))
8028 case ABS:
8029 case NEG:
8030 case SQRT:
8031 return 1;
8032 default:
8033 break;
8035 return 0;
8039 binary_float_operator (rtx op, enum machine_mode mode)
8041 if (GET_MODE (op) != mode)
8042 return 0;
8043 switch (GET_CODE (op))
8045 case PLUS:
8046 case MINUS:
8047 case MULT:
8048 case DIV:
8049 return 1;
8050 default:
8051 break;
8053 return 0;
8057 binary_logical_operator (rtx op, enum machine_mode mode)
8059 if (GET_MODE (op) != mode)
8060 return 0;
8061 switch (GET_CODE (op))
8063 case IOR:
8064 case AND:
8065 case XOR:
8066 return 1;
8067 default:
8068 break;
8070 return 0;
8074 equality_comparison_operator (rtx op, enum machine_mode mode)
8076 return ((mode == VOIDmode || GET_MODE (op) == mode)
8077 && (GET_CODE (op) == EQ || GET_CODE (op) == NE));
8081 greater_comparison_operator (rtx op, enum machine_mode mode)
8083 if (mode != VOIDmode && GET_MODE (op) != mode)
8084 return 0;
8085 switch (GET_CODE (op))
8087 case GT:
8088 case GE:
8089 case GTU:
8090 case GEU:
8091 return 1;
8092 default:
8093 return 0;
8098 less_comparison_operator (rtx op, enum machine_mode mode)
8100 if (mode != VOIDmode && GET_MODE (op) != mode)
8101 return 0;
8102 switch (GET_CODE (op))
8104 case LT:
8105 case LE:
8106 case LTU:
8107 case LEU:
8108 return 1;
8109 default:
8110 return 0;
8115 shift_operator (rtx op, enum machine_mode mode)
8117 if (mode != VOIDmode && GET_MODE (op) != mode)
8118 return 0;
8119 switch (GET_CODE (op))
8121 case ASHIFT:
8122 case ASHIFTRT:
8123 case LSHIFTRT:
8124 return 1;
8125 default:
8126 return 0;
8131 logical_operator (rtx op, enum machine_mode mode)
8133 if (mode != VOIDmode && GET_MODE (op) != mode)
8134 return 0;
8135 switch (GET_CODE (op))
8137 case AND:
8138 case IOR:
8139 case XOR:
8140 return 1;
8141 default:
8142 return 0;
8146 /* Accept pseudos and branch target registers. */
8148 target_reg_operand (rtx op, enum machine_mode mode)
8150 if (mode == VOIDmode
8151 ? GET_MODE (op) != Pmode && GET_MODE (op) != PDImode
8152 : mode != GET_MODE (op))
8153 return 0;
8155 if (GET_CODE (op) == SUBREG)
8156 op = XEXP (op, 0);
8158 if (GET_CODE (op) != REG)
8159 return 0;
8161 /* We must protect ourselves from matching pseudos that are virtual
8162 register, because they will eventually be replaced with hardware
8163 registers that aren't branch-target registers. */
8164 if (REGNO (op) > LAST_VIRTUAL_REGISTER
8165 || TARGET_REGISTER_P (REGNO (op)))
8166 return 1;
8168 return 0;
8171 /* Same as target_reg_operand, except that label_refs and symbol_refs
8172 are accepted before reload. */
8174 target_operand (rtx op, enum machine_mode mode)
8176 if (mode != VOIDmode && mode != Pmode)
8177 return 0;
8179 if ((GET_MODE (op) == Pmode || GET_MODE (op) == VOIDmode)
8180 && EXTRA_CONSTRAINT_Csy (op))
8181 return ! reload_completed;
8183 return target_reg_operand (op, mode);
8187 mextr_bit_offset (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
8189 HOST_WIDE_INT i;
8191 if (GET_CODE (op) != CONST_INT)
8192 return 0;
8193 i = INTVAL (op);
8194 return i >= 1 * 8 && i <= 7 * 8 && (i & 7) == 0;
8198 extend_reg_operand (rtx op, enum machine_mode mode)
8200 return (GET_CODE (op) == TRUNCATE
8201 ? arith_operand
8202 : arith_reg_operand) (op, mode);
8206 trunc_hi_operand (rtx op, enum machine_mode mode)
8208 enum machine_mode op_mode = GET_MODE (op);
8210 if (op_mode != SImode && op_mode != DImode
8211 && op_mode != V4HImode && op_mode != V2SImode)
8212 return 0;
8213 return extend_reg_operand (op, mode);
8217 extend_reg_or_0_operand (rtx op, enum machine_mode mode)
8219 return (GET_CODE (op) == TRUNCATE
8220 ? arith_operand
8221 : arith_reg_or_0_operand) (op, mode);
8225 minuend_operand (rtx op, enum machine_mode mode)
8227 return op == constm1_rtx || extend_reg_or_0_operand (op, mode);
8231 general_extend_operand (rtx op, enum machine_mode mode)
8233 return (GET_CODE (op) == TRUNCATE
8234 ? arith_operand
8235 : nonimmediate_operand) (op, mode);
8239 ua_address_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
8241 if (GET_CODE (op) == PLUS
8242 && (GET_CODE (XEXP (op, 1)) != CONST_INT
8243 || ! CONST_OK_FOR_I06 (INTVAL (XEXP (op, 1)))))
8244 return 0;
8245 return address_operand (op, QImode);
8249 cache_address_operand (rtx op, enum machine_mode mode)
8251 if (GET_CODE (op) == PLUS)
8253 if (GET_CODE (XEXP (op, 0)) != REG)
8254 return 0;
8255 if (GET_CODE (XEXP (op, 1)) != CONST_INT
8256 || (INTVAL (XEXP (op, 1)) & 31))
8257 return 0;
8259 else if (GET_CODE (op) != REG)
8260 return 0;
8261 return address_operand (op, mode);
8265 inqhi_operand (rtx op, enum machine_mode mode)
8267 if (GET_CODE (op) != TRUNCATE || mode != GET_MODE (op))
8268 return 0;
8269 op = XEXP (op, 0);
8270 /* Can't use true_regnum here because copy_cost wants to know about
8271 SECONDARY_INPUT_RELOAD_CLASS. */
8272 return GET_CODE (op) == REG && FP_REGISTER_P (REGNO (op));
8276 sh_rep_vec (rtx v, enum machine_mode mode)
8278 int i;
8279 rtx x, y;
8281 if ((GET_CODE (v) != CONST_VECTOR && GET_CODE (v) != PARALLEL)
8282 || (GET_MODE (v) != mode && mode != VOIDmode))
8283 return 0;
8284 i = XVECLEN (v, 0) - 2;
8285 x = XVECEXP (v, 0, i + 1);
8286 if (GET_MODE_UNIT_SIZE (mode) == 1)
8288 y = XVECEXP (v, 0, i);
8289 for (i -= 2; i >= 0; i -= 2)
8290 if (! rtx_equal_p (XVECEXP (v, 0, i + 1), x)
8291 || ! rtx_equal_p (XVECEXP (v, 0, i), y))
8292 return 0;
8294 else
8295 for (; i >= 0; i--)
8296 if (XVECEXP (v, 0, i) != x)
8297 return 0;
8298 return 1;
8301 /* Determine if V is a constant vector matching MODE with only one element
8302 that is not a sign extension. Two byte-sized elements count as one. */
8304 sh_1el_vec (rtx v, enum machine_mode mode)
8306 int unit_size;
8307 int i, last, least, sign_ix;
8308 rtx sign;
8310 if (GET_CODE (v) != CONST_VECTOR
8311 || (GET_MODE (v) != mode && mode != VOIDmode))
8312 return 0;
8313 /* Determine numbers of last and of least significant elements. */
8314 last = XVECLEN (v, 0) - 1;
8315 least = TARGET_LITTLE_ENDIAN ? 0 : last;
8316 if (GET_CODE (XVECEXP (v, 0, least)) != CONST_INT)
8317 return 0;
8318 sign_ix = least;
8319 if (GET_MODE_UNIT_SIZE (mode) == 1)
8320 sign_ix = TARGET_LITTLE_ENDIAN ? 1 : last - 1;
8321 if (GET_CODE (XVECEXP (v, 0, sign_ix)) != CONST_INT)
8322 return 0;
8323 unit_size = GET_MODE_UNIT_SIZE (GET_MODE (v));
8324 sign = (INTVAL (XVECEXP (v, 0, sign_ix)) >> (unit_size * BITS_PER_UNIT - 1)
8325 ? constm1_rtx : const0_rtx);
8326 i = XVECLEN (v, 0) - 1;
8328 if (i != least && i != sign_ix && XVECEXP (v, 0, i) != sign)
8329 return 0;
8330 while (--i);
8331 return 1;
8335 sh_const_vec (rtx v, enum machine_mode mode)
8337 int i;
8339 if (GET_CODE (v) != CONST_VECTOR
8340 || (GET_MODE (v) != mode && mode != VOIDmode))
8341 return 0;
8342 i = XVECLEN (v, 0) - 1;
8343 for (; i >= 0; i--)
8344 if (GET_CODE (XVECEXP (v, 0, i)) != CONST_INT)
8345 return 0;
8346 return 1;
8349 /* Return the destination address of a branch. */
8351 static int
8352 branch_dest (rtx branch)
8354 rtx dest = SET_SRC (PATTERN (branch));
8355 int dest_uid;
8357 if (GET_CODE (dest) == IF_THEN_ELSE)
8358 dest = XEXP (dest, 1);
8359 dest = XEXP (dest, 0);
8360 dest_uid = INSN_UID (dest);
8361 return INSN_ADDRESSES (dest_uid);
8364 /* Return nonzero if REG is not used after INSN.
8365 We assume REG is a reload reg, and therefore does
8366 not live past labels. It may live past calls or jumps though. */
8368 reg_unused_after (rtx reg, rtx insn)
8370 enum rtx_code code;
8371 rtx set;
8373 /* If the reg is set by this instruction, then it is safe for our
8374 case. Disregard the case where this is a store to memory, since
8375 we are checking a register used in the store address. */
8376 set = single_set (insn);
8377 if (set && GET_CODE (SET_DEST (set)) != MEM
8378 && reg_overlap_mentioned_p (reg, SET_DEST (set)))
8379 return 1;
8381 while ((insn = NEXT_INSN (insn)))
8383 rtx set;
8384 if (!INSN_P (insn))
8385 continue;
8387 code = GET_CODE (insn);
8389 #if 0
8390 /* If this is a label that existed before reload, then the register
8391 if dead here. However, if this is a label added by reorg, then
8392 the register may still be live here. We can't tell the difference,
8393 so we just ignore labels completely. */
8394 if (code == CODE_LABEL)
8395 return 1;
8396 /* else */
8397 #endif
8399 if (code == JUMP_INSN)
8400 return 0;
8402 /* If this is a sequence, we must handle them all at once.
8403 We could have for instance a call that sets the target register,
8404 and an insn in a delay slot that uses the register. In this case,
8405 we must return 0. */
8406 else if (code == INSN && GET_CODE (PATTERN (insn)) == SEQUENCE)
8408 int i;
8409 int retval = 0;
8411 for (i = 0; i < XVECLEN (PATTERN (insn), 0); i++)
8413 rtx this_insn = XVECEXP (PATTERN (insn), 0, i);
8414 rtx set = single_set (this_insn);
8416 if (GET_CODE (this_insn) == CALL_INSN)
8417 code = CALL_INSN;
8418 else if (GET_CODE (this_insn) == JUMP_INSN)
8420 if (INSN_ANNULLED_BRANCH_P (this_insn))
8421 return 0;
8422 code = JUMP_INSN;
8425 if (set && reg_overlap_mentioned_p (reg, SET_SRC (set)))
8426 return 0;
8427 if (set && reg_overlap_mentioned_p (reg, SET_DEST (set)))
8429 if (GET_CODE (SET_DEST (set)) != MEM)
8430 retval = 1;
8431 else
8432 return 0;
8434 if (set == 0
8435 && reg_overlap_mentioned_p (reg, PATTERN (this_insn)))
8436 return 0;
8438 if (retval == 1)
8439 return 1;
8440 else if (code == JUMP_INSN)
8441 return 0;
8444 set = single_set (insn);
8445 if (set && reg_overlap_mentioned_p (reg, SET_SRC (set)))
8446 return 0;
8447 if (set && reg_overlap_mentioned_p (reg, SET_DEST (set)))
8448 return GET_CODE (SET_DEST (set)) != MEM;
8449 if (set == 0 && reg_overlap_mentioned_p (reg, PATTERN (insn)))
8450 return 0;
8452 if (code == CALL_INSN && call_really_used_regs[REGNO (reg)])
8453 return 1;
8455 return 1;
8458 #include "ggc.h"
8460 static GTY(()) rtx fpscr_rtx;
8462 get_fpscr_rtx (void)
8464 if (! fpscr_rtx)
8466 fpscr_rtx = gen_rtx_REG (PSImode, FPSCR_REG);
8467 REG_USERVAR_P (fpscr_rtx) = 1;
8468 mark_user_reg (fpscr_rtx);
8470 if (! reload_completed || mdep_reorg_phase != SH_AFTER_MDEP_REORG)
8471 mark_user_reg (fpscr_rtx);
8472 return fpscr_rtx;
8475 void
8476 emit_sf_insn (rtx pat)
8478 emit_insn (pat);
8481 void
8482 emit_df_insn (rtx pat)
8484 emit_insn (pat);
8487 void
8488 expand_sf_unop (rtx (*fun) (rtx, rtx, rtx), rtx *operands)
8490 emit_sf_insn ((*fun) (operands[0], operands[1], get_fpscr_rtx ()));
8493 void
8494 expand_sf_binop (rtx (*fun) (rtx, rtx, rtx, rtx), rtx *operands)
8496 emit_sf_insn ((*fun) (operands[0], operands[1], operands[2],
8497 get_fpscr_rtx ()));
8500 void
8501 expand_df_unop (rtx (*fun) (rtx, rtx, rtx), rtx *operands)
8503 emit_df_insn ((*fun) (operands[0], operands[1], get_fpscr_rtx ()));
8506 void
8507 expand_df_binop (rtx (*fun) (rtx, rtx, rtx, rtx), rtx *operands)
8509 emit_df_insn ((*fun) (operands[0], operands[1], operands[2],
8510 get_fpscr_rtx ()));
8513 /* ??? gcc does flow analysis strictly after common subexpression
8514 elimination. As a result, common subexpression elimination fails
8515 when there are some intervening statements setting the same register.
8516 If we did nothing about this, this would hurt the precision switching
8517 for SH4 badly. There is some cse after reload, but it is unable to
8518 undo the extra register pressure from the unused instructions, and
8519 it cannot remove auto-increment loads.
8521 A C code example that shows this flow/cse weakness for (at least) SH
8522 and sparc (as of gcc ss-970706) is this:
8524 double
8525 f(double a)
8527 double d;
8528 d = 0.1;
8529 a += d;
8530 d = 1.1;
8531 d = 0.1;
8532 a *= d;
8533 return a;
8536 So we add another pass before common subexpression elimination, to
8537 remove assignments that are dead due to a following assignment in the
8538 same basic block. */
8540 static void
8541 mark_use (rtx x, rtx *reg_set_block)
8543 enum rtx_code code;
8545 if (! x)
8546 return;
8547 code = GET_CODE (x);
8548 switch (code)
8550 case REG:
8552 int regno = REGNO (x);
8553 int nregs = (regno < FIRST_PSEUDO_REGISTER
8554 ? HARD_REGNO_NREGS (regno, GET_MODE (x))
8555 : 1);
8558 reg_set_block[regno + nregs - 1] = 0;
8560 while (--nregs);
8561 break;
8563 case SET:
8565 rtx dest = SET_DEST (x);
8567 if (GET_CODE (dest) == SUBREG)
8568 dest = SUBREG_REG (dest);
8569 if (GET_CODE (dest) != REG)
8570 mark_use (dest, reg_set_block);
8571 mark_use (SET_SRC (x), reg_set_block);
8572 break;
8574 case CLOBBER:
8575 break;
8576 default:
8578 const char *fmt = GET_RTX_FORMAT (code);
8579 int i, j;
8580 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
8582 if (fmt[i] == 'e')
8583 mark_use (XEXP (x, i), reg_set_block);
8584 else if (fmt[i] == 'E')
8585 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
8586 mark_use (XVECEXP (x, i, j), reg_set_block);
8588 break;
8593 static rtx get_free_reg (HARD_REG_SET);
8595 /* This function returns a register to use to load the address to load
8596 the fpscr from. Currently it always returns r1 or r7, but when we are
8597 able to use pseudo registers after combine, or have a better mechanism
8598 for choosing a register, it should be done here. */
8599 /* REGS_LIVE is the liveness information for the point for which we
8600 need this allocation. In some bare-bones exit blocks, r1 is live at the
8601 start. We can even have all of r0..r3 being live:
8602 __complex__ long long f (double d) { if (d == 0) return 2; else return 3; }
8603 INSN before which new insns are placed with will clobber the register
8604 we return. If a basic block consists only of setting the return value
8605 register to a pseudo and using that register, the return value is not
8606 live before or after this block, yet we we'll insert our insns right in
8607 the middle. */
8609 static rtx
8610 get_free_reg (HARD_REG_SET regs_live)
8612 if (! TEST_HARD_REG_BIT (regs_live, 1))
8613 return gen_rtx_REG (Pmode, 1);
8615 /* Hard reg 1 is live; since this is a SMALL_REGISTER_CLASSES target,
8616 there shouldn't be anything but a jump before the function end. */
8617 gcc_assert (!TEST_HARD_REG_BIT (regs_live, 7));
8618 return gen_rtx_REG (Pmode, 7);
8621 /* This function will set the fpscr from memory.
8622 MODE is the mode we are setting it to. */
8623 void
8624 fpscr_set_from_mem (int mode, HARD_REG_SET regs_live)
8626 enum attr_fp_mode fp_mode = mode;
8627 rtx addr_reg = get_free_reg (regs_live);
8629 if (fp_mode == (enum attr_fp_mode) ACTUAL_NORMAL_MODE (FP_MODE))
8630 emit_insn (gen_fpu_switch1 (addr_reg));
8631 else
8632 emit_insn (gen_fpu_switch0 (addr_reg));
8635 /* Is the given character a logical line separator for the assembler? */
8636 #ifndef IS_ASM_LOGICAL_LINE_SEPARATOR
8637 #define IS_ASM_LOGICAL_LINE_SEPARATOR(C) ((C) == ';')
8638 #endif
8641 sh_insn_length_adjustment (rtx insn)
8643 /* Instructions with unfilled delay slots take up an extra two bytes for
8644 the nop in the delay slot. */
8645 if (((GET_CODE (insn) == INSN
8646 && GET_CODE (PATTERN (insn)) != USE
8647 && GET_CODE (PATTERN (insn)) != CLOBBER)
8648 || GET_CODE (insn) == CALL_INSN
8649 || (GET_CODE (insn) == JUMP_INSN
8650 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC
8651 && GET_CODE (PATTERN (insn)) != ADDR_VEC))
8652 && GET_CODE (PATTERN (NEXT_INSN (PREV_INSN (insn)))) != SEQUENCE
8653 && get_attr_needs_delay_slot (insn) == NEEDS_DELAY_SLOT_YES)
8654 return 2;
8656 /* SH2e has a bug that prevents the use of annulled branches, so if
8657 the delay slot is not filled, we'll have to put a NOP in it. */
8658 if (sh_cpu == CPU_SH2E
8659 && GET_CODE (insn) == JUMP_INSN
8660 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC
8661 && GET_CODE (PATTERN (insn)) != ADDR_VEC
8662 && get_attr_type (insn) == TYPE_CBRANCH
8663 && GET_CODE (PATTERN (NEXT_INSN (PREV_INSN (insn)))) != SEQUENCE)
8664 return 2;
8666 /* sh-dsp parallel processing insn take four bytes instead of two. */
8668 if (GET_CODE (insn) == INSN)
8670 int sum = 0;
8671 rtx body = PATTERN (insn);
8672 const char *template;
8673 char c;
8674 int maybe_label = 1;
8676 if (GET_CODE (body) == ASM_INPUT)
8677 template = XSTR (body, 0);
8678 else if (asm_noperands (body) >= 0)
8679 template
8680 = decode_asm_operands (body, NULL, NULL, NULL, NULL);
8681 else
8682 return 0;
8685 int ppi_adjust = 0;
8688 c = *template++;
8689 while (c == ' ' || c == '\t');
8690 /* all sh-dsp parallel-processing insns start with p.
8691 The only non-ppi sh insn starting with p is pref.
8692 The only ppi starting with pr is prnd. */
8693 if ((c == 'p' || c == 'P') && strncasecmp ("re", template, 2))
8694 ppi_adjust = 2;
8695 /* The repeat pseudo-insn expands two three insns, a total of
8696 six bytes in size. */
8697 else if ((c == 'r' || c == 'R')
8698 && ! strncasecmp ("epeat", template, 5))
8699 ppi_adjust = 4;
8700 while (c && c != '\n' && ! IS_ASM_LOGICAL_LINE_SEPARATOR (c))
8702 /* If this is a label, it is obviously not a ppi insn. */
8703 if (c == ':' && maybe_label)
8705 ppi_adjust = 0;
8706 break;
8708 else if (c == '\'' || c == '"')
8709 maybe_label = 0;
8710 c = *template++;
8712 sum += ppi_adjust;
8713 maybe_label = c != ':';
8715 while (c);
8716 return sum;
8718 return 0;
8721 /* Return TRUE if X references a SYMBOL_REF or LABEL_REF whose symbol
8722 isn't protected by a PIC unspec. */
8724 nonpic_symbol_mentioned_p (rtx x)
8726 register const char *fmt;
8727 register int i;
8729 if (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF
8730 || GET_CODE (x) == PC)
8731 return 1;
8733 /* We don't want to look into the possible MEM location of a
8734 CONST_DOUBLE, since we're not going to use it, in general. */
8735 if (GET_CODE (x) == CONST_DOUBLE)
8736 return 0;
8738 if (GET_CODE (x) == UNSPEC
8739 && (XINT (x, 1) == UNSPEC_PIC
8740 || XINT (x, 1) == UNSPEC_GOT
8741 || XINT (x, 1) == UNSPEC_GOTOFF
8742 || XINT (x, 1) == UNSPEC_GOTPLT
8743 || XINT (x, 1) == UNSPEC_GOTTPOFF
8744 || XINT (x, 1) == UNSPEC_DTPOFF
8745 || XINT (x, 1) == UNSPEC_PLT))
8746 return 0;
8748 fmt = GET_RTX_FORMAT (GET_CODE (x));
8749 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
8751 if (fmt[i] == 'E')
8753 register int j;
8755 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
8756 if (nonpic_symbol_mentioned_p (XVECEXP (x, i, j)))
8757 return 1;
8759 else if (fmt[i] == 'e' && nonpic_symbol_mentioned_p (XEXP (x, i)))
8760 return 1;
8763 return 0;
8766 /* Convert a non-PIC address in `orig' to a PIC address using @GOT or
8767 @GOTOFF in `reg'. */
8769 legitimize_pic_address (rtx orig, enum machine_mode mode ATTRIBUTE_UNUSED,
8770 rtx reg)
8772 if (tls_symbolic_operand (orig, Pmode))
8773 return orig;
8775 if (GET_CODE (orig) == LABEL_REF
8776 || (GET_CODE (orig) == SYMBOL_REF && SYMBOL_REF_LOCAL_P (orig)))
8778 if (reg == 0)
8779 reg = gen_reg_rtx (Pmode);
8781 emit_insn (gen_symGOTOFF2reg (reg, orig));
8782 return reg;
8784 else if (GET_CODE (orig) == SYMBOL_REF)
8786 if (reg == 0)
8787 reg = gen_reg_rtx (Pmode);
8789 emit_insn (gen_symGOT2reg (reg, orig));
8790 return reg;
8792 return orig;
8795 /* Mark the use of a constant in the literal table. If the constant
8796 has multiple labels, make it unique. */
8797 static rtx
8798 mark_constant_pool_use (rtx x)
8800 rtx insn, lab, pattern;
8802 if (x == NULL)
8803 return x;
8805 switch (GET_CODE (x))
8807 case LABEL_REF:
8808 x = XEXP (x, 0);
8809 case CODE_LABEL:
8810 break;
8811 default:
8812 return x;
8815 /* Get the first label in the list of labels for the same constant
8816 and delete another labels in the list. */
8817 lab = x;
8818 for (insn = PREV_INSN (x); insn; insn = PREV_INSN (insn))
8820 if (GET_CODE (insn) != CODE_LABEL
8821 || LABEL_REFS (insn) != NEXT_INSN (insn))
8822 break;
8823 lab = insn;
8826 for (insn = LABEL_REFS (lab); insn; insn = LABEL_REFS (insn))
8827 INSN_DELETED_P (insn) = 1;
8829 /* Mark constants in a window. */
8830 for (insn = NEXT_INSN (x); insn; insn = NEXT_INSN (insn))
8832 if (GET_CODE (insn) != INSN)
8833 continue;
8835 pattern = PATTERN (insn);
8836 if (GET_CODE (pattern) != UNSPEC_VOLATILE)
8837 continue;
8839 switch (XINT (pattern, 1))
8841 case UNSPECV_CONST2:
8842 case UNSPECV_CONST4:
8843 case UNSPECV_CONST8:
8844 XVECEXP (pattern, 0, 1) = const1_rtx;
8845 break;
8846 case UNSPECV_WINDOW_END:
8847 if (XVECEXP (pattern, 0, 0) == x)
8848 return lab;
8849 break;
8850 case UNSPECV_CONST_END:
8851 return lab;
8852 default:
8853 break;
8857 return lab;
8861 ua_offset (rtx c, enum machine_mode mode ATTRIBUTE_UNUSED)
8863 return GET_CODE (c) == CONST_INT && CONST_OK_FOR_I06 (INTVAL (c));
8866 /* Return true if it's possible to redirect BRANCH1 to the destination
8867 of an unconditional jump BRANCH2. We only want to do this if the
8868 resulting branch will have a short displacement. */
8870 sh_can_redirect_branch (rtx branch1, rtx branch2)
8872 if (flag_expensive_optimizations && simplejump_p (branch2))
8874 rtx dest = XEXP (SET_SRC (single_set (branch2)), 0);
8875 rtx insn;
8876 int distance;
8878 for (distance = 0, insn = NEXT_INSN (branch1);
8879 insn && distance < 256;
8880 insn = PREV_INSN (insn))
8882 if (insn == dest)
8883 return 1;
8884 else
8885 distance += get_attr_length (insn);
8887 for (distance = 0, insn = NEXT_INSN (branch1);
8888 insn && distance < 256;
8889 insn = NEXT_INSN (insn))
8891 if (insn == dest)
8892 return 1;
8893 else
8894 distance += get_attr_length (insn);
8897 return 0;
8900 /* Return nonzero if register old_reg can be renamed to register new_reg. */
8902 sh_hard_regno_rename_ok (unsigned int old_reg ATTRIBUTE_UNUSED,
8903 unsigned int new_reg)
8905 /* Interrupt functions can only use registers that have already been
8906 saved by the prologue, even if they would normally be
8907 call-clobbered. */
8909 if (sh_cfun_interrupt_handler_p () && !regs_ever_live[new_reg])
8910 return 0;
8912 return 1;
8915 /* Function to update the integer COST
8916 based on the relationship between INSN that is dependent on
8917 DEP_INSN through the dependence LINK. The default is to make no
8918 adjustment to COST. This can be used for example to specify to
8919 the scheduler that an output- or anti-dependence does not incur
8920 the same cost as a data-dependence. The return value should be
8921 the new value for COST. */
8922 static int
8923 sh_adjust_cost (rtx insn, rtx link ATTRIBUTE_UNUSED, rtx dep_insn, int cost)
8925 rtx reg, use_pat;
8927 if (TARGET_SHMEDIA)
8929 /* On SHmedia, if the dependence is an anti-dependence or
8930 output-dependence, there is no cost. */
8931 if (REG_NOTE_KIND (link) != 0)
8933 /* However, dependencies between target register loads and
8934 uses of the register in a subsequent block that are separated
8935 by a conditional branch are not modelled - we have to do with
8936 the anti-dependency between the target register load and the
8937 conditional branch that ends the current block. */
8938 if (REG_NOTE_KIND (link) == REG_DEP_ANTI
8939 && GET_CODE (PATTERN (dep_insn)) == SET
8940 && (get_attr_type (dep_insn) == TYPE_PT_MEDIA
8941 || get_attr_type (dep_insn) == TYPE_PTABS_MEDIA)
8942 && get_attr_type (insn) == TYPE_CBRANCH_MEDIA)
8944 int orig_cost = cost;
8945 rtx note = find_reg_note (insn, REG_BR_PROB, 0);
8946 rtx target = ((! note
8947 || INTVAL (XEXP (note, 0)) * 2 < REG_BR_PROB_BASE)
8948 ? insn : JUMP_LABEL (insn));
8949 /* On the likely path, the branch costs 1, on the unlikely path,
8950 it costs 3. */
8951 cost--;
8953 target = next_active_insn (target);
8954 while (target && ! flow_dependent_p (target, dep_insn)
8955 && --cost > 0);
8956 /* If two branches are executed in immediate succession, with the
8957 first branch properly predicted, this causes a stall at the
8958 second branch, hence we won't need the target for the
8959 second branch for two cycles after the launch of the first
8960 branch. */
8961 if (cost > orig_cost - 2)
8962 cost = orig_cost - 2;
8964 else
8965 cost = 0;
8968 else if (get_attr_is_mac_media (insn)
8969 && get_attr_is_mac_media (dep_insn))
8970 cost = 1;
8972 else if (! reload_completed
8973 && GET_CODE (PATTERN (insn)) == SET
8974 && GET_CODE (SET_SRC (PATTERN (insn))) == FLOAT
8975 && GET_CODE (PATTERN (dep_insn)) == SET
8976 && fp_arith_reg_operand (SET_SRC (PATTERN (dep_insn)), VOIDmode)
8977 && cost < 4)
8978 cost = 4;
8979 /* Schedule the ptabs for a casesi_jump_media in preference to stuff
8980 that is needed at the target. */
8981 else if (get_attr_type (insn) == TYPE_JUMP_MEDIA
8982 && ! flow_dependent_p (insn, dep_insn))
8983 cost--;
8985 else if (REG_NOTE_KIND (link) == 0)
8987 enum attr_type dep_type, type;
8989 if (recog_memoized (insn) < 0
8990 || recog_memoized (dep_insn) < 0)
8991 return cost;
8993 dep_type = get_attr_type (dep_insn);
8994 if (dep_type == TYPE_FLOAD || dep_type == TYPE_PCFLOAD)
8995 cost--;
8996 if ((dep_type == TYPE_LOAD_SI || dep_type == TYPE_PCLOAD_SI)
8997 && (type = get_attr_type (insn)) != TYPE_CALL
8998 && type != TYPE_SFUNC)
8999 cost--;
9001 /* The only input for a call that is timing-critical is the
9002 function's address. */
9003 if (GET_CODE(insn) == CALL_INSN)
9005 rtx call = PATTERN (insn);
9007 if (GET_CODE (call) == PARALLEL)
9008 call = XVECEXP (call, 0 ,0);
9009 if (GET_CODE (call) == SET)
9010 call = SET_SRC (call);
9011 if (GET_CODE (call) == CALL && GET_CODE (XEXP (call, 0)) == MEM
9012 /* sibcalli_thunk uses a symbol_ref in an unspec. */
9013 && (GET_CODE (XEXP (XEXP (call, 0), 0)) == UNSPEC
9014 || ! reg_set_p (XEXP (XEXP (call, 0), 0), dep_insn)))
9015 cost = 0;
9017 /* Likewise, the most timing critical input for an sfuncs call
9018 is the function address. However, sfuncs typically start
9019 using their arguments pretty quickly.
9020 Assume a four cycle delay before they are needed. */
9021 /* All sfunc calls are parallels with at least four components.
9022 Exploit this to avoid unnecessary calls to sfunc_uses_reg. */
9023 else if (GET_CODE (PATTERN (insn)) == PARALLEL
9024 && XVECLEN (PATTERN (insn), 0) >= 4
9025 && (reg = sfunc_uses_reg (insn)))
9027 if (! reg_set_p (reg, dep_insn))
9028 cost -= 4;
9030 /* When the preceding instruction loads the shift amount of
9031 the following SHAD/SHLD, the latency of the load is increased
9032 by 1 cycle. */
9033 else if (TARGET_SH4
9034 && get_attr_type (insn) == TYPE_DYN_SHIFT
9035 && get_attr_any_int_load (dep_insn) == ANY_INT_LOAD_YES
9036 && reg_overlap_mentioned_p (SET_DEST (PATTERN (dep_insn)),
9037 XEXP (SET_SRC (single_set (insn)),
9038 1)))
9039 cost++;
9040 /* When an LS group instruction with a latency of less than
9041 3 cycles is followed by a double-precision floating-point
9042 instruction, FIPR, or FTRV, the latency of the first
9043 instruction is increased to 3 cycles. */
9044 else if (cost < 3
9045 && get_attr_insn_class (dep_insn) == INSN_CLASS_LS_GROUP
9046 && get_attr_dfp_comp (insn) == DFP_COMP_YES)
9047 cost = 3;
9048 /* The lsw register of a double-precision computation is ready one
9049 cycle earlier. */
9050 else if (reload_completed
9051 && get_attr_dfp_comp (dep_insn) == DFP_COMP_YES
9052 && (use_pat = single_set (insn))
9053 && ! regno_use_in (REGNO (SET_DEST (single_set (dep_insn))),
9054 SET_SRC (use_pat)))
9055 cost -= 1;
9057 if (get_attr_any_fp_comp (dep_insn) == ANY_FP_COMP_YES
9058 && get_attr_late_fp_use (insn) == LATE_FP_USE_YES)
9059 cost -= 1;
9061 /* An anti-dependence penalty of two applies if the first insn is a double
9062 precision fadd / fsub / fmul. */
9063 else if (REG_NOTE_KIND (link) == REG_DEP_ANTI
9064 && recog_memoized (dep_insn) >= 0
9065 && get_attr_type (dep_insn) == TYPE_DFP_ARITH
9066 /* A lot of alleged anti-flow dependences are fake,
9067 so check this one is real. */
9068 && flow_dependent_p (dep_insn, insn))
9069 cost = 2;
9072 return cost;
9075 /* Check if INSN is flow-dependent on DEP_INSN. Can also be used to check
9076 if DEP_INSN is anti-flow dependent on INSN. */
9077 static int
9078 flow_dependent_p (rtx insn, rtx dep_insn)
9080 rtx tmp = PATTERN (insn);
9082 note_stores (PATTERN (dep_insn), flow_dependent_p_1, &tmp);
9083 return tmp == NULL_RTX;
9086 /* A helper function for flow_dependent_p called through note_stores. */
9087 static void
9088 flow_dependent_p_1 (rtx x, rtx pat ATTRIBUTE_UNUSED, void *data)
9090 rtx * pinsn = (rtx *) data;
9092 if (*pinsn && reg_referenced_p (x, *pinsn))
9093 *pinsn = NULL_RTX;
9096 /* For use by ALLOCATE_INITIAL_VALUE. Note that sh.md contains some
9097 'special function' patterns (type sfunc) that clobber pr, but that
9098 do not look like function calls to leaf_function_p. Hence we must
9099 do this extra check. */
9101 sh_pr_n_sets (void)
9103 return REG_N_SETS (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG);
9106 /* This function returns "2" to indicate dual issue for the SH4
9107 processor. To be used by the DFA pipeline description. */
9108 static int
9109 sh_issue_rate (void)
9111 if (TARGET_SUPERSCALAR)
9112 return 2;
9113 else
9114 return 1;
9117 /* Functions for ready queue reordering for sched1. */
9119 /* Get weight for mode for a set x. */
9120 static short
9121 find_set_regmode_weight (rtx x, enum machine_mode mode)
9123 if (GET_CODE (x) == CLOBBER && register_operand (SET_DEST (x), mode))
9124 return 1;
9125 if (GET_CODE (x) == SET && register_operand (SET_DEST (x), mode))
9127 if (GET_CODE (SET_DEST (x)) == REG)
9129 if (!reg_mentioned_p (SET_DEST (x), SET_SRC (x)))
9130 return 1;
9131 else
9132 return 0;
9134 return 1;
9136 return 0;
9139 /* Get regmode weight for insn. */
9140 static short
9141 find_insn_regmode_weight (rtx insn, enum machine_mode mode)
9143 short reg_weight = 0;
9144 rtx x;
9146 /* Increment weight for each register born here. */
9147 x = PATTERN (insn);
9148 reg_weight += find_set_regmode_weight (x, mode);
9149 if (GET_CODE (x) == PARALLEL)
9151 int j;
9152 for (j = XVECLEN (x, 0) - 1; j >= 0; j--)
9154 x = XVECEXP (PATTERN (insn), 0, j);
9155 reg_weight += find_set_regmode_weight (x, mode);
9158 /* Decrement weight for each register that dies here. */
9159 for (x = REG_NOTES (insn); x; x = XEXP (x, 1))
9161 if (REG_NOTE_KIND (x) == REG_DEAD || REG_NOTE_KIND (x) == REG_UNUSED)
9163 rtx note = XEXP (x, 0);
9164 if (GET_CODE (note) == REG && GET_MODE (note) == mode)
9165 reg_weight--;
9168 return reg_weight;
9171 /* Calculate regmode weights for all insns of a basic block. */
9172 static void
9173 find_regmode_weight (int b, enum machine_mode mode)
9175 rtx insn, next_tail, head, tail;
9177 get_block_head_tail (b, &head, &tail);
9178 next_tail = NEXT_INSN (tail);
9180 for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
9182 /* Handle register life information. */
9183 if (!INSN_P (insn))
9184 continue;
9186 if (mode == SFmode)
9187 INSN_REGMODE_WEIGHT (insn, mode) =
9188 find_insn_regmode_weight (insn, mode) + 2 * find_insn_regmode_weight (insn, DFmode);
9189 else if (mode == SImode)
9190 INSN_REGMODE_WEIGHT (insn, mode) =
9191 find_insn_regmode_weight (insn, mode) + 2 * find_insn_regmode_weight (insn, DImode);
9195 /* Comparison function for ready queue sorting. */
9196 static int
9197 rank_for_reorder (const void *x, const void *y)
9199 rtx tmp = *(const rtx *) y;
9200 rtx tmp2 = *(const rtx *) x;
9202 /* The insn in a schedule group should be issued the first. */
9203 if (SCHED_GROUP_P (tmp) != SCHED_GROUP_P (tmp2))
9204 return SCHED_GROUP_P (tmp2) ? 1 : -1;
9206 /* If insns are equally good, sort by INSN_LUID (original insn order), This
9207 minimizes instruction movement, thus minimizing sched's effect on
9208 register pressure. */
9209 return INSN_LUID (tmp) - INSN_LUID (tmp2);
9212 /* Resort the array A in which only element at index N may be out of order. */
9213 static void
9214 swap_reorder (rtx *a, int n)
9216 rtx insn = a[n - 1];
9217 int i = n - 2;
9219 while (i >= 0 && rank_for_reorder (a + i, &insn) >= 0)
9221 a[i + 1] = a[i];
9222 i -= 1;
9224 a[i + 1] = insn;
9227 #define SCHED_REORDER(READY, N_READY) \
9228 do \
9230 if ((N_READY) == 2) \
9231 swap_reorder (READY, N_READY); \
9232 else if ((N_READY) > 2) \
9233 qsort (READY, N_READY, sizeof (rtx), rank_for_reorder); \
9235 while (0)
9237 /* Sort the ready list READY by ascending priority, using the SCHED_REORDER
9238 macro. */
9239 static void
9240 ready_reorder (rtx *ready, int nready)
9242 SCHED_REORDER (ready, nready);
9245 /* Calculate regmode weights for all insns of all basic block. */
9246 static void
9247 sh_md_init_global (FILE *dump ATTRIBUTE_UNUSED,
9248 int verbose ATTRIBUTE_UNUSED,
9249 int old_max_uid)
9251 basic_block b;
9253 regmode_weight[0] = (short *) xcalloc (old_max_uid, sizeof (short));
9254 regmode_weight[1] = (short *) xcalloc (old_max_uid, sizeof (short));
9256 FOR_EACH_BB_REVERSE (b)
9258 find_regmode_weight (b->index, SImode);
9259 find_regmode_weight (b->index, SFmode);
9262 CURR_REGMODE_PRESSURE (SImode) = 0;
9263 CURR_REGMODE_PRESSURE (SFmode) = 0;
9267 /* Cleanup. */
9268 static void
9269 sh_md_finish_global (FILE *dump ATTRIBUTE_UNUSED,
9270 int verbose ATTRIBUTE_UNUSED)
9272 if (regmode_weight[0])
9274 free (regmode_weight[0]);
9275 regmode_weight[0] = NULL;
9277 if (regmode_weight[1])
9279 free (regmode_weight[1]);
9280 regmode_weight[1] = NULL;
9284 /* Cache the can_issue_more so that we can return it from reorder2. Also,
9285 keep count of register pressures on SImode and SFmode. */
9286 static int
9287 sh_variable_issue (FILE *dump ATTRIBUTE_UNUSED,
9288 int sched_verbose ATTRIBUTE_UNUSED,
9289 rtx insn,
9290 int can_issue_more)
9292 if (GET_CODE (PATTERN (insn)) != USE
9293 && GET_CODE (PATTERN (insn)) != CLOBBER)
9294 cached_can_issue_more = can_issue_more - 1;
9295 else
9296 cached_can_issue_more = can_issue_more;
9298 if (reload_completed)
9299 return cached_can_issue_more;
9301 CURR_REGMODE_PRESSURE (SImode) += INSN_REGMODE_WEIGHT (insn, SImode);
9302 CURR_REGMODE_PRESSURE (SFmode) += INSN_REGMODE_WEIGHT (insn, SFmode);
9304 return cached_can_issue_more;
9307 static void
9308 sh_md_init (FILE *dump ATTRIBUTE_UNUSED,
9309 int verbose ATTRIBUTE_UNUSED,
9310 int veclen ATTRIBUTE_UNUSED)
9312 CURR_REGMODE_PRESSURE (SImode) = 0;
9313 CURR_REGMODE_PRESSURE (SFmode) = 0;
9316 /* Some magic numbers. */
9317 /* Pressure on register r0 can lead to spill failures. so avoid sched1 for
9318 functions that already have high pressure on r0. */
9319 #define R0_MAX_LIFE_REGIONS 2
9320 #define R0_MAX_LIVE_LENGTH 12
9321 /* Register Pressure thresholds for SImode and SFmode registers. */
9322 #define SIMODE_MAX_WEIGHT 5
9323 #define SFMODE_MAX_WEIGHT 10
9325 /* Return true if the pressure is high for MODE. */
9326 static short
9327 high_pressure (enum machine_mode mode)
9329 /* Pressure on register r0 can lead to spill failures. so avoid sched1 for
9330 functions that already have high pressure on r0. */
9331 if ((REG_N_SETS (0) - REG_N_DEATHS (0)) >= R0_MAX_LIFE_REGIONS
9332 && REG_LIVE_LENGTH (0) >= R0_MAX_LIVE_LENGTH)
9333 return 1;
9335 if (mode == SFmode)
9336 return (CURR_REGMODE_PRESSURE (SFmode) > SFMODE_MAX_WEIGHT);
9337 else
9338 return (CURR_REGMODE_PRESSURE (SImode) > SIMODE_MAX_WEIGHT);
9341 /* Reorder ready queue if register pressure is high. */
9342 static int
9343 sh_reorder (FILE *dump ATTRIBUTE_UNUSED,
9344 int sched_verbose ATTRIBUTE_UNUSED,
9345 rtx *ready,
9346 int *n_readyp,
9347 int clock_var ATTRIBUTE_UNUSED)
9349 if (reload_completed)
9350 return sh_issue_rate ();
9352 if (high_pressure (SFmode) || high_pressure (SImode))
9354 ready_reorder (ready, *n_readyp);
9357 return sh_issue_rate ();
9360 /* Skip cycles if the current register pressure is high. */
9361 static int
9362 sh_reorder2 (FILE *dump ATTRIBUTE_UNUSED,
9363 int sched_verbose ATTRIBUTE_UNUSED,
9364 rtx *ready ATTRIBUTE_UNUSED,
9365 int *n_readyp ATTRIBUTE_UNUSED,
9366 int clock_var ATTRIBUTE_UNUSED)
9368 if (reload_completed)
9369 return cached_can_issue_more;
9371 if (high_pressure(SFmode) || high_pressure (SImode))
9372 skip_cycles = 1;
9374 return cached_can_issue_more;
9377 /* Skip cycles without sorting the ready queue. This will move insn from
9378 Q->R. If this is the last cycle we are skipping; allow sorting of ready
9379 queue by sh_reorder. */
9381 /* Generally, skipping these many cycles are sufficient for all insns to move
9382 from Q -> R. */
9383 #define MAX_SKIPS 8
9385 static int
9386 sh_dfa_new_cycle (FILE *sched_dump ATTRIBUTE_UNUSED,
9387 int sched_verbose ATTRIBUTE_UNUSED,
9388 rtx insn ATTRIBUTE_UNUSED,
9389 int last_clock_var,
9390 int clock_var,
9391 int *sort_p)
9393 if (reload_completed)
9394 return 0;
9396 if (skip_cycles)
9398 if ((clock_var - last_clock_var) < MAX_SKIPS)
9400 *sort_p = 0;
9401 return 1;
9403 /* If this is the last cycle we are skipping, allow reordering of R. */
9404 if ((clock_var - last_clock_var) == MAX_SKIPS)
9406 *sort_p = 1;
9407 return 1;
9411 skip_cycles = 0;
9413 return 0;
9416 /* SHmedia requires registers for branches, so we can't generate new
9417 branches past reload. */
9418 static bool
9419 sh_cannot_modify_jumps_p (void)
9421 return (TARGET_SHMEDIA && (reload_in_progress || reload_completed));
9424 static int
9425 sh_target_reg_class (void)
9427 return TARGET_SHMEDIA ? TARGET_REGS : NO_REGS;
9430 static bool
9431 sh_optimize_target_register_callee_saved (bool after_prologue_epilogue_gen)
9433 HARD_REG_SET dummy;
9434 rtx insn;
9436 if (! shmedia_space_reserved_for_target_registers)
9437 return 0;
9438 if (after_prologue_epilogue_gen && ! TARGET_SAVE_ALL_TARGET_REGS)
9439 return 0;
9440 if (calc_live_regs (&dummy) >= 6 * 8)
9441 return 1;
9442 /* This is a borderline case. See if we got a nested loop, or a loop
9443 with a call, or with more than 4 labels inside. */
9444 for (insn = get_insns(); insn; insn = NEXT_INSN (insn))
9446 if (GET_CODE (insn) == NOTE
9447 && NOTE_LINE_NUMBER (insn) == NOTE_INSN_LOOP_BEG)
9449 int labels = 0;
9453 insn = NEXT_INSN (insn);
9454 if ((GET_CODE (insn) == NOTE
9455 && NOTE_LINE_NUMBER (insn) == NOTE_INSN_LOOP_BEG)
9456 || GET_CODE (insn) == CALL_INSN
9457 || (GET_CODE (insn) == CODE_LABEL && ++labels > 4))
9458 return 1;
9460 while (GET_CODE (insn) != NOTE
9461 || NOTE_LINE_NUMBER (insn) != NOTE_INSN_LOOP_END);
9464 return 0;
9467 static bool
9468 sh_ms_bitfield_layout_p (tree record_type ATTRIBUTE_UNUSED)
9470 return (TARGET_SH5 || TARGET_HITACHI || sh_attr_renesas_p (record_type));
9474 On the SH1..SH4, the trampoline looks like
9475 2 0002 D202 mov.l l2,r2
9476 1 0000 D301 mov.l l1,r3
9477 3 0004 422B jmp @r2
9478 4 0006 0009 nop
9479 5 0008 00000000 l1: .long area
9480 6 000c 00000000 l2: .long function
9482 SH5 (compact) uses r1 instead of r3 for the static chain. */
9485 /* Emit RTL insns to initialize the variable parts of a trampoline.
9486 FNADDR is an RTX for the address of the function's pure code.
9487 CXT is an RTX for the static chain value for the function. */
9489 void
9490 sh_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
9492 if (TARGET_SHMEDIA64)
9494 rtx tramp_templ;
9495 int fixed_len;
9497 rtx movi1 = GEN_INT (0xcc000010);
9498 rtx shori1 = GEN_INT (0xc8000010);
9499 rtx src, dst;
9501 /* The following trampoline works within a +- 128 KB range for cxt:
9502 ptb/u cxt,tr1; movi fnaddr >> 48,r0; shori fnaddr >> 32,r0;
9503 shori fnaddr >> 16,r0; shori fnaddr,r0; ptabs/l r0,tr0
9504 gettr tr1,r1; blink tr0,r63 */
9505 /* Address rounding makes it hard to compute the exact bounds of the
9506 offset for this trampoline, but we have a rather generous offset
9507 range, so frame_offset should do fine as an upper bound. */
9508 if (cxt == virtual_stack_vars_rtx && frame_offset < 0x20000)
9510 /* ??? could optimize this trampoline initialization
9511 by writing DImode words with two insns each. */
9512 rtx mask = force_reg (DImode, GEN_INT (0x3fffc00));
9513 rtx insn = gen_rtx_MINUS (DImode, cxt, tramp);
9514 insn = gen_rtx_ASHIFT (DImode, insn, GEN_INT (10-2));
9515 insn = gen_rtx_AND (DImode, insn, mask);
9516 /* Or in ptb/u .,tr1 pattern */
9517 insn = gen_rtx_IOR (DImode, insn, gen_int_mode (0xec000010, SImode));
9518 insn = force_operand (insn, NULL_RTX);
9519 insn = gen_lowpart (SImode, insn);
9520 emit_move_insn (gen_rtx_MEM (SImode, tramp), insn);
9521 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (38));
9522 insn = gen_rtx_AND (DImode, insn, mask);
9523 insn = force_operand (gen_rtx_IOR (DImode, movi1, insn), NULL_RTX);
9524 insn = gen_lowpart (SImode, insn);
9525 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 4)), insn);
9526 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (22));
9527 insn = gen_rtx_AND (DImode, insn, mask);
9528 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
9529 insn = gen_lowpart (SImode, insn);
9530 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 8)), insn);
9531 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (6));
9532 insn = gen_rtx_AND (DImode, insn, mask);
9533 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
9534 insn = gen_lowpart (SImode, insn);
9535 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 12)),
9536 insn);
9537 insn = gen_rtx_ASHIFT (DImode, fnaddr, GEN_INT (10));
9538 insn = gen_rtx_AND (DImode, insn, mask);
9539 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
9540 insn = gen_lowpart (SImode, insn);
9541 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 16)),
9542 insn);
9543 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 20)),
9544 GEN_INT (0x6bf10600));
9545 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 24)),
9546 GEN_INT (0x4415fc10));
9547 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 28)),
9548 GEN_INT (0x4401fff0));
9549 emit_insn (gen_ic_invalidate_line (tramp));
9550 return;
9552 tramp_templ = gen_rtx_SYMBOL_REF (Pmode,"__GCC_nested_trampoline");
9553 fixed_len = TRAMPOLINE_SIZE - 2 * GET_MODE_SIZE (Pmode);
9555 tramp_templ = gen_datalabel_ref (tramp_templ);
9556 dst = gen_rtx_MEM (BLKmode, tramp);
9557 src = gen_rtx_MEM (BLKmode, tramp_templ);
9558 set_mem_align (dst, 256);
9559 set_mem_align (src, 64);
9560 emit_block_move (dst, src, GEN_INT (fixed_len), BLOCK_OP_NORMAL);
9562 emit_move_insn (gen_rtx_MEM (Pmode, plus_constant (tramp, fixed_len)),
9563 fnaddr);
9564 emit_move_insn (gen_rtx_MEM (Pmode,
9565 plus_constant (tramp,
9566 fixed_len
9567 + GET_MODE_SIZE (Pmode))),
9568 cxt);
9569 emit_insn (gen_ic_invalidate_line (tramp));
9570 return;
9572 else if (TARGET_SHMEDIA)
9574 /* movi fnaddr >> 16,r1; shori fnaddr,r1; ptabs/l r1,tr0
9575 movi cxt >> 16,r1; shori cxt,r1; blink tr0,r63 */
9576 rtx quad0 = gen_reg_rtx (DImode), cxtload = gen_reg_rtx (DImode);
9577 rtx quad1 = gen_reg_rtx (DImode), quad2 = gen_reg_rtx (DImode);
9578 /* movi 0,r1: 0xcc000010 shori 0,r1: c8000010 concatenated,
9579 rotated 10 right, and higher 16 bit of every 32 selected. */
9580 rtx movishori
9581 = force_reg (V2HImode, (simplify_gen_subreg
9582 (V2HImode, GEN_INT (0x4330432), SImode, 0)));
9583 rtx ptabs = force_reg (DImode, GEN_INT (0x6bf10600));
9584 rtx blink = force_reg (DImode, GEN_INT (0x4401fff0));
9586 tramp = force_reg (Pmode, tramp);
9587 fnaddr = force_reg (SImode, fnaddr);
9588 cxt = force_reg (SImode, cxt);
9589 emit_insn (gen_mshflo_w_x (gen_rtx_SUBREG (V4HImode, quad0, 0),
9590 gen_rtx_SUBREG (V2HImode, fnaddr, 0),
9591 movishori));
9592 emit_insn (gen_rotrdi3_mextr (quad0, quad0,
9593 GEN_INT (TARGET_LITTLE_ENDIAN ? 24 : 56)));
9594 emit_insn (gen_ashldi3_media (quad0, quad0, const2_rtx));
9595 emit_move_insn (gen_rtx_MEM (DImode, tramp), quad0);
9596 emit_insn (gen_mshflo_w_x (gen_rtx_SUBREG (V4HImode, cxtload, 0),
9597 gen_rtx_SUBREG (V2HImode, cxt, 0),
9598 movishori));
9599 emit_insn (gen_rotrdi3_mextr (cxtload, cxtload,
9600 GEN_INT (TARGET_LITTLE_ENDIAN ? 24 : 56)));
9601 emit_insn (gen_ashldi3_media (cxtload, cxtload, const2_rtx));
9602 if (TARGET_LITTLE_ENDIAN)
9604 emit_insn (gen_mshflo_l_di (quad1, ptabs, cxtload));
9605 emit_insn (gen_mextr4 (quad2, cxtload, blink));
9607 else
9609 emit_insn (gen_mextr4 (quad1, cxtload, ptabs));
9610 emit_insn (gen_mshflo_l_di (quad2, blink, cxtload));
9612 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, 8)), quad1);
9613 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, 16)), quad2);
9614 emit_insn (gen_ic_invalidate_line (tramp));
9615 return;
9617 else if (TARGET_SHCOMPACT)
9619 emit_insn (gen_initialize_trampoline (tramp, cxt, fnaddr));
9620 return;
9622 emit_move_insn (gen_rtx_MEM (SImode, tramp),
9623 gen_int_mode (TARGET_LITTLE_ENDIAN ? 0xd301d202 : 0xd202d301,
9624 SImode));
9625 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 4)),
9626 gen_int_mode (TARGET_LITTLE_ENDIAN ? 0x0009422b : 0x422b0009,
9627 SImode));
9628 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 8)),
9629 cxt);
9630 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 12)),
9631 fnaddr);
9632 if (TARGET_HARVARD)
9634 if (TARGET_USERMODE)
9635 emit_library_call (function_symbol (NULL, "__ic_invalidate",
9636 FUNCTION_ORDINARY),
9637 0, VOIDmode, 1, tramp, SImode);
9638 else
9639 emit_insn (gen_ic_invalidate_line (tramp));
9643 /* FIXME: This is overly conservative. A SHcompact function that
9644 receives arguments ``by reference'' will have them stored in its
9645 own stack frame, so it must not pass pointers or references to
9646 these arguments to other functions by means of sibling calls. */
9647 /* If PIC, we cannot make sibling calls to global functions
9648 because the PLT requires r12 to be live. */
9649 static bool
9650 sh_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
9652 return (1
9653 && (! TARGET_SHCOMPACT
9654 || current_function_args_info.stack_regs == 0)
9655 && ! sh_cfun_interrupt_handler_p ()
9656 && (! flag_pic
9657 || (decl && ! TREE_PUBLIC (decl))
9658 || (decl && DECL_VISIBILITY (decl) != VISIBILITY_DEFAULT)));
9661 /* Machine specific built-in functions. */
9663 struct builtin_description
9665 const enum insn_code icode;
9666 const char *const name;
9667 int signature;
9670 /* describe number and signedness of arguments; arg[0] == result
9671 (1: unsigned, 2: signed, 4: don't care, 8: pointer 0: no argument */
9672 /* 9: 64 bit pointer, 10: 32 bit pointer */
9673 static const char signature_args[][4] =
9675 #define SH_BLTIN_V2SI2 0
9676 { 4, 4 },
9677 #define SH_BLTIN_V4HI2 1
9678 { 4, 4 },
9679 #define SH_BLTIN_V2SI3 2
9680 { 4, 4, 4 },
9681 #define SH_BLTIN_V4HI3 3
9682 { 4, 4, 4 },
9683 #define SH_BLTIN_V8QI3 4
9684 { 4, 4, 4 },
9685 #define SH_BLTIN_MAC_HISI 5
9686 { 1, 4, 4, 1 },
9687 #define SH_BLTIN_SH_HI 6
9688 { 4, 4, 1 },
9689 #define SH_BLTIN_SH_SI 7
9690 { 4, 4, 1 },
9691 #define SH_BLTIN_V4HI2V2SI 8
9692 { 4, 4, 4 },
9693 #define SH_BLTIN_V4HI2V8QI 9
9694 { 4, 4, 4 },
9695 #define SH_BLTIN_SISF 10
9696 { 4, 2 },
9697 #define SH_BLTIN_LDUA_L 11
9698 { 2, 10 },
9699 #define SH_BLTIN_LDUA_Q 12
9700 { 1, 10 },
9701 #define SH_BLTIN_STUA_L 13
9702 { 0, 10, 2 },
9703 #define SH_BLTIN_STUA_Q 14
9704 { 0, 10, 1 },
9705 #define SH_BLTIN_LDUA_L64 15
9706 { 2, 9 },
9707 #define SH_BLTIN_LDUA_Q64 16
9708 { 1, 9 },
9709 #define SH_BLTIN_STUA_L64 17
9710 { 0, 9, 2 },
9711 #define SH_BLTIN_STUA_Q64 18
9712 { 0, 9, 1 },
9713 #define SH_BLTIN_NUM_SHARED_SIGNATURES 19
9714 #define SH_BLTIN_2 19
9715 #define SH_BLTIN_SU 19
9716 { 1, 2 },
9717 #define SH_BLTIN_3 20
9718 #define SH_BLTIN_SUS 20
9719 { 2, 2, 1 },
9720 #define SH_BLTIN_PSSV 21
9721 { 0, 8, 2, 2 },
9722 #define SH_BLTIN_XXUU 22
9723 #define SH_BLTIN_UUUU 22
9724 { 1, 1, 1, 1 },
9725 #define SH_BLTIN_PV 23
9726 { 0, 8 },
9728 /* mcmv: operands considered unsigned. */
9729 /* mmulsum_wq, msad_ubq: result considered unsigned long long. */
9730 /* mperm: control value considered unsigned int. */
9731 /* mshalds, mshard, mshards, mshlld, mshlrd: shift count is unsigned int. */
9732 /* mshards_q: returns signed short. */
9733 /* nsb: takes long long arg, returns unsigned char. */
9734 static const struct builtin_description bdesc[] =
9736 { CODE_FOR_absv2si2, "__builtin_absv2si2", SH_BLTIN_V2SI2 },
9737 { CODE_FOR_absv4hi2, "__builtin_absv4hi2", SH_BLTIN_V4HI2 },
9738 { CODE_FOR_addv2si3, "__builtin_addv2si3", SH_BLTIN_V2SI3 },
9739 { CODE_FOR_addv4hi3, "__builtin_addv4hi3", SH_BLTIN_V4HI3 },
9740 { CODE_FOR_ssaddv2si3,"__builtin_ssaddv2si3", SH_BLTIN_V2SI3 },
9741 { CODE_FOR_usaddv8qi3,"__builtin_usaddv8qi3", SH_BLTIN_V8QI3 },
9742 { CODE_FOR_ssaddv4hi3,"__builtin_ssaddv4hi3", SH_BLTIN_V4HI3 },
9743 { CODE_FOR_alloco_i, "__builtin_sh_media_ALLOCO", SH_BLTIN_PV },
9744 { CODE_FOR_negcmpeqv8qi,"__builtin_sh_media_MCMPEQ_B", SH_BLTIN_V8QI3 },
9745 { CODE_FOR_negcmpeqv2si,"__builtin_sh_media_MCMPEQ_L", SH_BLTIN_V2SI3 },
9746 { CODE_FOR_negcmpeqv4hi,"__builtin_sh_media_MCMPEQ_W", SH_BLTIN_V4HI3 },
9747 { CODE_FOR_negcmpgtuv8qi,"__builtin_sh_media_MCMPGT_UB", SH_BLTIN_V8QI3 },
9748 { CODE_FOR_negcmpgtv2si,"__builtin_sh_media_MCMPGT_L", SH_BLTIN_V2SI3 },
9749 { CODE_FOR_negcmpgtv4hi,"__builtin_sh_media_MCMPGT_W", SH_BLTIN_V4HI3 },
9750 { CODE_FOR_mcmv, "__builtin_sh_media_MCMV", SH_BLTIN_UUUU },
9751 { CODE_FOR_mcnvs_lw, "__builtin_sh_media_MCNVS_LW", SH_BLTIN_3 },
9752 { CODE_FOR_mcnvs_wb, "__builtin_sh_media_MCNVS_WB", SH_BLTIN_V4HI2V8QI },
9753 { CODE_FOR_mcnvs_wub, "__builtin_sh_media_MCNVS_WUB", SH_BLTIN_V4HI2V8QI },
9754 { CODE_FOR_mextr1, "__builtin_sh_media_MEXTR1", SH_BLTIN_V8QI3 },
9755 { CODE_FOR_mextr2, "__builtin_sh_media_MEXTR2", SH_BLTIN_V8QI3 },
9756 { CODE_FOR_mextr3, "__builtin_sh_media_MEXTR3", SH_BLTIN_V8QI3 },
9757 { CODE_FOR_mextr4, "__builtin_sh_media_MEXTR4", SH_BLTIN_V8QI3 },
9758 { CODE_FOR_mextr5, "__builtin_sh_media_MEXTR5", SH_BLTIN_V8QI3 },
9759 { CODE_FOR_mextr6, "__builtin_sh_media_MEXTR6", SH_BLTIN_V8QI3 },
9760 { CODE_FOR_mextr7, "__builtin_sh_media_MEXTR7", SH_BLTIN_V8QI3 },
9761 { CODE_FOR_mmacfx_wl, "__builtin_sh_media_MMACFX_WL", SH_BLTIN_MAC_HISI },
9762 { CODE_FOR_mmacnfx_wl,"__builtin_sh_media_MMACNFX_WL", SH_BLTIN_MAC_HISI },
9763 { CODE_FOR_mulv2si3, "__builtin_mulv2si3", SH_BLTIN_V2SI3, },
9764 { CODE_FOR_mulv4hi3, "__builtin_mulv4hi3", SH_BLTIN_V4HI3 },
9765 { CODE_FOR_mmulfx_l, "__builtin_sh_media_MMULFX_L", SH_BLTIN_V2SI3 },
9766 { CODE_FOR_mmulfx_w, "__builtin_sh_media_MMULFX_W", SH_BLTIN_V4HI3 },
9767 { CODE_FOR_mmulfxrp_w,"__builtin_sh_media_MMULFXRP_W", SH_BLTIN_V4HI3 },
9768 { CODE_FOR_mmulhi_wl, "__builtin_sh_media_MMULHI_WL", SH_BLTIN_V4HI2V2SI },
9769 { CODE_FOR_mmullo_wl, "__builtin_sh_media_MMULLO_WL", SH_BLTIN_V4HI2V2SI },
9770 { CODE_FOR_mmulsum_wq,"__builtin_sh_media_MMULSUM_WQ", SH_BLTIN_XXUU },
9771 { CODE_FOR_mperm_w, "__builtin_sh_media_MPERM_W", SH_BLTIN_SH_HI },
9772 { CODE_FOR_msad_ubq, "__builtin_sh_media_MSAD_UBQ", SH_BLTIN_XXUU },
9773 { CODE_FOR_mshalds_l, "__builtin_sh_media_MSHALDS_L", SH_BLTIN_SH_SI },
9774 { CODE_FOR_mshalds_w, "__builtin_sh_media_MSHALDS_W", SH_BLTIN_SH_HI },
9775 { CODE_FOR_ashrv2si3, "__builtin_ashrv2si3", SH_BLTIN_SH_SI },
9776 { CODE_FOR_ashrv4hi3, "__builtin_ashrv4hi3", SH_BLTIN_SH_HI },
9777 { CODE_FOR_mshards_q, "__builtin_sh_media_MSHARDS_Q", SH_BLTIN_SUS },
9778 { CODE_FOR_mshfhi_b, "__builtin_sh_media_MSHFHI_B", SH_BLTIN_V8QI3 },
9779 { CODE_FOR_mshfhi_l, "__builtin_sh_media_MSHFHI_L", SH_BLTIN_V2SI3 },
9780 { CODE_FOR_mshfhi_w, "__builtin_sh_media_MSHFHI_W", SH_BLTIN_V4HI3 },
9781 { CODE_FOR_mshflo_b, "__builtin_sh_media_MSHFLO_B", SH_BLTIN_V8QI3 },
9782 { CODE_FOR_mshflo_l, "__builtin_sh_media_MSHFLO_L", SH_BLTIN_V2SI3 },
9783 { CODE_FOR_mshflo_w, "__builtin_sh_media_MSHFLO_W", SH_BLTIN_V4HI3 },
9784 { CODE_FOR_ashlv2si3, "__builtin_ashlv2si3", SH_BLTIN_SH_SI },
9785 { CODE_FOR_ashlv4hi3, "__builtin_ashlv4hi3", SH_BLTIN_SH_HI },
9786 { CODE_FOR_lshrv2si3, "__builtin_lshrv2si3", SH_BLTIN_SH_SI },
9787 { CODE_FOR_lshrv4hi3, "__builtin_lshrv4hi3", SH_BLTIN_SH_HI },
9788 { CODE_FOR_subv2si3, "__builtin_subv2si3", SH_BLTIN_V2SI3 },
9789 { CODE_FOR_subv4hi3, "__builtin_subv4hi3", SH_BLTIN_V4HI3 },
9790 { CODE_FOR_sssubv2si3,"__builtin_sssubv2si3", SH_BLTIN_V2SI3 },
9791 { CODE_FOR_ussubv8qi3,"__builtin_ussubv8qi3", SH_BLTIN_V8QI3 },
9792 { CODE_FOR_sssubv4hi3,"__builtin_sssubv4hi3", SH_BLTIN_V4HI3 },
9793 { CODE_FOR_fcosa_s, "__builtin_sh_media_FCOSA_S", SH_BLTIN_SISF },
9794 { CODE_FOR_fsina_s, "__builtin_sh_media_FSINA_S", SH_BLTIN_SISF },
9795 { CODE_FOR_fipr, "__builtin_sh_media_FIPR_S", SH_BLTIN_3 },
9796 { CODE_FOR_ftrv, "__builtin_sh_media_FTRV_S", SH_BLTIN_3 },
9797 { CODE_FOR_mac_media, "__builtin_sh_media_FMAC_S", SH_BLTIN_3 },
9798 { CODE_FOR_sqrtdf2, "__builtin_sh_media_FSQRT_D", SH_BLTIN_2 },
9799 { CODE_FOR_sqrtsf2, "__builtin_sh_media_FSQRT_S", SH_BLTIN_2 },
9800 { CODE_FOR_fsrra_s, "__builtin_sh_media_FSRRA_S", SH_BLTIN_2 },
9801 { CODE_FOR_ldhi_l, "__builtin_sh_media_LDHI_L", SH_BLTIN_LDUA_L },
9802 { CODE_FOR_ldhi_q, "__builtin_sh_media_LDHI_Q", SH_BLTIN_LDUA_Q },
9803 { CODE_FOR_ldlo_l, "__builtin_sh_media_LDLO_L", SH_BLTIN_LDUA_L },
9804 { CODE_FOR_ldlo_q, "__builtin_sh_media_LDLO_Q", SH_BLTIN_LDUA_Q },
9805 { CODE_FOR_sthi_l, "__builtin_sh_media_STHI_L", SH_BLTIN_STUA_L },
9806 { CODE_FOR_sthi_q, "__builtin_sh_media_STHI_Q", SH_BLTIN_STUA_Q },
9807 { CODE_FOR_stlo_l, "__builtin_sh_media_STLO_L", SH_BLTIN_STUA_L },
9808 { CODE_FOR_stlo_q, "__builtin_sh_media_STLO_Q", SH_BLTIN_STUA_Q },
9809 { CODE_FOR_ldhi_l64, "__builtin_sh_media_LDHI_L", SH_BLTIN_LDUA_L64 },
9810 { CODE_FOR_ldhi_q64, "__builtin_sh_media_LDHI_Q", SH_BLTIN_LDUA_Q64 },
9811 { CODE_FOR_ldlo_l64, "__builtin_sh_media_LDLO_L", SH_BLTIN_LDUA_L64 },
9812 { CODE_FOR_ldlo_q64, "__builtin_sh_media_LDLO_Q", SH_BLTIN_LDUA_Q64 },
9813 { CODE_FOR_sthi_l64, "__builtin_sh_media_STHI_L", SH_BLTIN_STUA_L64 },
9814 { CODE_FOR_sthi_q64, "__builtin_sh_media_STHI_Q", SH_BLTIN_STUA_Q64 },
9815 { CODE_FOR_stlo_l64, "__builtin_sh_media_STLO_L", SH_BLTIN_STUA_L64 },
9816 { CODE_FOR_stlo_q64, "__builtin_sh_media_STLO_Q", SH_BLTIN_STUA_Q64 },
9817 { CODE_FOR_nsb, "__builtin_sh_media_NSB", SH_BLTIN_SU },
9818 { CODE_FOR_byterev, "__builtin_sh_media_BYTEREV", SH_BLTIN_2 },
9819 { CODE_FOR_prefetch, "__builtin_sh_media_PREFO", SH_BLTIN_PSSV },
9822 static void
9823 sh_media_init_builtins (void)
9825 tree shared[SH_BLTIN_NUM_SHARED_SIGNATURES];
9826 const struct builtin_description *d;
9828 memset (shared, 0, sizeof shared);
9829 for (d = bdesc; d - bdesc < (int) ARRAY_SIZE (bdesc); d++)
9831 tree type, arg_type = 0;
9832 int signature = d->signature;
9833 int i;
9835 if (signature < SH_BLTIN_NUM_SHARED_SIGNATURES && shared[signature])
9836 type = shared[signature];
9837 else
9839 int has_result = signature_args[signature][0] != 0;
9841 if ((signature_args[signature][1] & 8)
9842 && (((signature_args[signature][1] & 1) && TARGET_SHMEDIA32)
9843 || ((signature_args[signature][1] & 2) && TARGET_SHMEDIA64)))
9844 continue;
9845 if (! TARGET_FPU_ANY
9846 && FLOAT_MODE_P (insn_data[d->icode].operand[0].mode))
9847 continue;
9848 type = void_list_node;
9849 for (i = 3; ; i--)
9851 int arg = signature_args[signature][i];
9852 int opno = i - 1 + has_result;
9854 if (arg & 8)
9855 arg_type = ptr_type_node;
9856 else if (arg)
9857 arg_type = (*lang_hooks.types.type_for_mode)
9858 (insn_data[d->icode].operand[opno].mode,
9859 (arg & 1));
9860 else if (i)
9861 continue;
9862 else
9863 arg_type = void_type_node;
9864 if (i == 0)
9865 break;
9866 type = tree_cons (NULL_TREE, arg_type, type);
9868 type = build_function_type (arg_type, type);
9869 if (signature < SH_BLTIN_NUM_SHARED_SIGNATURES)
9870 shared[signature] = type;
9872 lang_hooks.builtin_function (d->name, type, d - bdesc, BUILT_IN_MD,
9873 NULL, NULL_TREE);
9877 /* Implements target hook vector_mode_supported_p. */
9878 bool
9879 sh_vector_mode_supported_p (enum machine_mode mode)
9881 if (TARGET_FPU_ANY
9882 && ((mode == V2SFmode)
9883 || (mode == V4SFmode)
9884 || (mode == V16SFmode)))
9885 return true;
9887 else if (TARGET_SHMEDIA
9888 && ((mode == V8QImode)
9889 || (mode == V2HImode)
9890 || (mode == V4HImode)
9891 || (mode == V2SImode)))
9892 return true;
9894 return false;
9897 /* Implements target hook dwarf_calling_convention. Return an enum
9898 of dwarf_calling_convention. */
9900 sh_dwarf_calling_convention (tree func)
9902 if (sh_attr_renesas_p (func))
9903 return DW_CC_GNU_renesas_sh;
9905 return DW_CC_normal;
9908 static void
9909 sh_init_builtins (void)
9911 if (TARGET_SHMEDIA)
9912 sh_media_init_builtins ();
9915 /* Expand an expression EXP that calls a built-in function,
9916 with result going to TARGET if that's convenient
9917 (and in mode MODE if that's convenient).
9918 SUBTARGET may be used as the target for computing one of EXP's operands.
9919 IGNORE is nonzero if the value is to be ignored. */
9921 static rtx
9922 sh_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
9923 enum machine_mode mode ATTRIBUTE_UNUSED, int ignore)
9925 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
9926 tree arglist = TREE_OPERAND (exp, 1);
9927 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
9928 const struct builtin_description *d = &bdesc[fcode];
9929 enum insn_code icode = d->icode;
9930 int signature = d->signature;
9931 enum machine_mode tmode = VOIDmode;
9932 int nop = 0, i;
9933 rtx op[4];
9934 rtx pat = 0;
9936 if (signature_args[signature][0])
9938 if (ignore)
9939 return 0;
9941 tmode = insn_data[icode].operand[0].mode;
9942 if (! target
9943 || GET_MODE (target) != tmode
9944 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
9945 target = gen_reg_rtx (tmode);
9946 op[nop++] = target;
9948 else
9949 target = 0;
9951 for (i = 1; i <= 3; i++, nop++)
9953 tree arg;
9954 enum machine_mode opmode, argmode;
9955 tree optype;
9957 if (! signature_args[signature][i])
9958 break;
9959 arg = TREE_VALUE (arglist);
9960 if (arg == error_mark_node)
9961 return const0_rtx;
9962 arglist = TREE_CHAIN (arglist);
9963 if (signature_args[signature][i] & 8)
9965 opmode = ptr_mode;
9966 optype = ptr_type_node;
9968 else
9970 opmode = insn_data[icode].operand[nop].mode;
9971 optype = (*lang_hooks.types.type_for_mode) (opmode, 0);
9973 argmode = TYPE_MODE (TREE_TYPE (arg));
9974 if (argmode != opmode)
9975 arg = build1 (NOP_EXPR, optype, arg);
9976 op[nop] = expand_expr (arg, NULL_RTX, opmode, 0);
9977 if (! (*insn_data[icode].operand[nop].predicate) (op[nop], opmode))
9978 op[nop] = copy_to_mode_reg (opmode, op[nop]);
9981 switch (nop)
9983 case 1:
9984 pat = (*insn_data[d->icode].genfun) (op[0]);
9985 break;
9986 case 2:
9987 pat = (*insn_data[d->icode].genfun) (op[0], op[1]);
9988 break;
9989 case 3:
9990 pat = (*insn_data[d->icode].genfun) (op[0], op[1], op[2]);
9991 break;
9992 case 4:
9993 pat = (*insn_data[d->icode].genfun) (op[0], op[1], op[2], op[3]);
9994 break;
9995 default:
9996 gcc_unreachable ();
9998 if (! pat)
9999 return 0;
10000 emit_insn (pat);
10001 return target;
10004 void
10005 sh_expand_unop_v2sf (enum rtx_code code, rtx op0, rtx op1)
10007 rtx sel0 = const0_rtx;
10008 rtx sel1 = const1_rtx;
10009 rtx (*fn) (rtx, rtx, rtx, rtx, rtx) = gen_unary_sf_op;
10010 rtx op = gen_rtx_fmt_e (code, SFmode, op1);
10012 emit_insn ((*fn) (op0, op1, op, sel0, sel0));
10013 emit_insn ((*fn) (op0, op1, op, sel1, sel1));
10016 void
10017 sh_expand_binop_v2sf (enum rtx_code code, rtx op0, rtx op1, rtx op2)
10019 rtx sel0 = const0_rtx;
10020 rtx sel1 = const1_rtx;
10021 rtx (*fn) (rtx, rtx, rtx, rtx, rtx, rtx, rtx, rtx)
10022 = gen_binary_sf_op;
10023 rtx op = gen_rtx_fmt_ee (code, SFmode, op1, op2);
10025 emit_insn ((*fn) (op0, op1, op2, op, sel0, sel0, sel0, sel1));
10026 emit_insn ((*fn) (op0, op1, op2, op, sel1, sel1, sel1, sel0));
10029 /* Return the class of registers for which a mode change from FROM to TO
10030 is invalid. */
10031 bool
10032 sh_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
10033 enum reg_class class)
10035 /* We want to enable the use of SUBREGs as a means to
10036 VEC_SELECT a single element of a vector. */
10037 if (to == SFmode && VECTOR_MODE_P (from) && GET_MODE_INNER (from) == SFmode)
10038 return (reg_classes_intersect_p (GENERAL_REGS, class));
10040 if (GET_MODE_SIZE (from) != GET_MODE_SIZE (to))
10042 if (TARGET_LITTLE_ENDIAN)
10044 if (GET_MODE_SIZE (to) < 8 || GET_MODE_SIZE (from) < 8)
10045 return reg_classes_intersect_p (DF_REGS, class);
10047 else
10049 if (GET_MODE_SIZE (from) < 8)
10050 return reg_classes_intersect_p (DF_HI_REGS, class);
10053 return 0;
10057 /* If ADDRESS refers to a CODE_LABEL, add NUSES to the number of times
10058 that label is used. */
10060 void
10061 sh_mark_label (rtx address, int nuses)
10063 if (GOTOFF_P (address))
10065 /* Extract the label or symbol. */
10066 address = XEXP (address, 0);
10067 if (GET_CODE (address) == PLUS)
10068 address = XEXP (address, 0);
10069 address = XVECEXP (address, 0, 0);
10071 if (GET_CODE (address) == LABEL_REF
10072 && GET_CODE (XEXP (address, 0)) == CODE_LABEL)
10073 LABEL_NUSES (XEXP (address, 0)) += nuses;
10076 /* Compute extra cost of moving data between one register class
10077 and another. */
10079 /* If SECONDARY*_RELOAD_CLASS says something about the src/dst pair, regclass
10080 uses this information. Hence, the general register <-> floating point
10081 register information here is not used for SFmode. */
10084 sh_register_move_cost (enum machine_mode mode,
10085 enum reg_class srcclass, enum reg_class dstclass)
10087 if (dstclass == T_REGS || dstclass == PR_REGS)
10088 return 10;
10090 if (dstclass == MAC_REGS && srcclass == MAC_REGS)
10091 return 4;
10093 if (mode == SImode && ! TARGET_SHMEDIA && TARGET_FMOVD
10094 && REGCLASS_HAS_FP_REG (srcclass)
10095 && REGCLASS_HAS_FP_REG (dstclass))
10096 return 4;
10098 if ((REGCLASS_HAS_FP_REG (dstclass) && srcclass == MAC_REGS)
10099 || (dstclass == MAC_REGS && REGCLASS_HAS_FP_REG (srcclass)))
10100 return 9;
10102 if ((REGCLASS_HAS_FP_REG (dstclass)
10103 && REGCLASS_HAS_GENERAL_REG (srcclass))
10104 || (REGCLASS_HAS_GENERAL_REG (dstclass)
10105 && REGCLASS_HAS_FP_REG (srcclass)))
10106 return ((TARGET_SHMEDIA ? 4 : TARGET_FMOVD ? 8 : 12)
10107 * ((GET_MODE_SIZE (mode) + 7) / 8U));
10109 if ((dstclass == FPUL_REGS
10110 && REGCLASS_HAS_GENERAL_REG (srcclass))
10111 || (srcclass == FPUL_REGS
10112 && REGCLASS_HAS_GENERAL_REG (dstclass)))
10113 return 5;
10115 if ((dstclass == FPUL_REGS
10116 && (srcclass == PR_REGS || srcclass == MAC_REGS || srcclass == T_REGS))
10117 || (srcclass == FPUL_REGS
10118 && (dstclass == PR_REGS || dstclass == MAC_REGS)))
10119 return 7;
10121 if ((srcclass == TARGET_REGS && ! REGCLASS_HAS_GENERAL_REG (dstclass))
10122 || ((dstclass) == TARGET_REGS && ! REGCLASS_HAS_GENERAL_REG (srcclass)))
10123 return 20;
10125 /* ??? ptabs faults on (value & 0x3) == 0x3 */
10126 if (TARGET_SHMEDIA
10127 && ((srcclass) == TARGET_REGS || (srcclass) == SIBCALL_REGS))
10129 if (*sh_gettrcost_str)
10130 return atoi (sh_gettrcost_str);
10131 else if (!TARGET_PT_FIXED)
10132 return 100;
10135 if ((srcclass == FPSCR_REGS && ! REGCLASS_HAS_GENERAL_REG (dstclass))
10136 || (dstclass == FPSCR_REGS && ! REGCLASS_HAS_GENERAL_REG (srcclass)))
10137 return 4;
10139 if (TARGET_SHMEDIA
10140 || (TARGET_FMOVD
10141 && ! REGCLASS_HAS_GENERAL_REG (srcclass)
10142 && ! REGCLASS_HAS_GENERAL_REG (dstclass)))
10143 return 2 * ((GET_MODE_SIZE (mode) + 7) / 8U);
10145 return 2 * ((GET_MODE_SIZE (mode) + 3) / 4U);
10148 /* Like register_operand, but take into account that SHMEDIA can use
10149 the constant zero like a general register. */
10151 sh_register_operand (rtx op, enum machine_mode mode)
10153 if (op == CONST0_RTX (mode) && TARGET_SHMEDIA)
10154 return 1;
10155 return register_operand (op, mode);
10159 cmpsi_operand (rtx op, enum machine_mode mode)
10161 if (GET_CODE (op) == REG && REGNO (op) == T_REG
10162 && GET_MODE (op) == SImode
10163 && TARGET_SH1)
10164 return 1;
10165 return arith_operand (op, mode);
10169 shift_count_reg_operand (rtx op, enum machine_mode mode)
10171 if ((GET_CODE (op) == ZERO_EXTEND || GET_CODE (op) == SIGN_EXTEND
10172 || (GET_CODE (op) == SUBREG && SUBREG_BYTE (op) == 0))
10173 && (mode == VOIDmode || mode == GET_MODE (op))
10174 && GET_MODE_BITSIZE (GET_MODE (XEXP (op, 0))) >= 6
10175 && GET_MODE_CLASS (GET_MODE (XEXP (op, 0))) == MODE_INT)
10177 mode = VOIDmode;
10179 op = XEXP (op, 0);
10180 while ((GET_CODE (op) == ZERO_EXTEND || GET_CODE (op) == SIGN_EXTEND
10181 || GET_CODE (op) == TRUNCATE)
10182 && GET_MODE_BITSIZE (GET_MODE (XEXP (op, 0))) >= 6
10183 && GET_MODE_CLASS (GET_MODE (XEXP (op, 0))) == MODE_INT);
10186 return arith_reg_operand (op, mode);
10190 shift_count_operand (rtx op, enum machine_mode mode)
10192 return (CONSTANT_P (op)
10193 ? (GET_CODE (op) == CONST_INT
10194 ? (unsigned) INTVAL (op) < GET_MODE_BITSIZE (mode)
10195 : nonmemory_operand (op, mode))
10196 : shift_count_reg_operand (op, mode));
10199 static rtx emit_load_ptr (rtx, rtx);
10201 static rtx
10202 emit_load_ptr (rtx reg, rtx addr)
10204 rtx mem = gen_rtx_MEM (ptr_mode, addr);
10206 if (Pmode != ptr_mode)
10207 mem = gen_rtx_SIGN_EXTEND (Pmode, mem);
10208 return emit_move_insn (reg, mem);
10211 static void
10212 sh_output_mi_thunk (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED,
10213 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
10214 tree function)
10216 CUMULATIVE_ARGS cum;
10217 int structure_value_byref = 0;
10218 rtx this, this_value, sibcall, insns, funexp;
10219 tree funtype = TREE_TYPE (function);
10220 int simple_add = CONST_OK_FOR_ADD (delta);
10221 int did_load = 0;
10222 rtx scratch0, scratch1, scratch2;
10223 unsigned i;
10225 reload_completed = 1;
10226 epilogue_completed = 1;
10227 no_new_pseudos = 1;
10228 current_function_uses_only_leaf_regs = 1;
10229 reset_block_changes ();
10231 emit_note (NOTE_INSN_PROLOGUE_END);
10233 /* Find the "this" pointer. We have such a wide range of ABIs for the
10234 SH that it's best to do this completely machine independently.
10235 "this" is passed as first argument, unless a structure return pointer
10236 comes first, in which case "this" comes second. */
10237 INIT_CUMULATIVE_ARGS (cum, funtype, NULL_RTX, 0, 1);
10238 #ifndef PCC_STATIC_STRUCT_RETURN
10239 if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
10240 structure_value_byref = 1;
10241 #endif /* not PCC_STATIC_STRUCT_RETURN */
10242 if (structure_value_byref && sh_struct_value_rtx (function, 0) == 0)
10244 tree ptype = build_pointer_type (TREE_TYPE (funtype));
10246 FUNCTION_ARG_ADVANCE (cum, Pmode, ptype, 1);
10248 this = FUNCTION_ARG (cum, Pmode, ptr_type_node, 1);
10250 /* For SHcompact, we only have r0 for a scratch register: r1 is the
10251 static chain pointer (even if you can't have nested virtual functions
10252 right now, someone might implement them sometime), and the rest of the
10253 registers are used for argument passing, are callee-saved, or reserved. */
10254 /* We need to check call_used_regs / fixed_regs in case -fcall_saved-reg /
10255 -ffixed-reg has been used. */
10256 if (! call_used_regs[0] || fixed_regs[0])
10257 error ("r0 needs to be available as a call-clobbered register");
10258 scratch0 = scratch1 = scratch2 = gen_rtx_REG (Pmode, 0);
10259 if (! TARGET_SH5)
10261 if (call_used_regs[1] && ! fixed_regs[1])
10262 scratch1 = gen_rtx_REG (ptr_mode, 1);
10263 /* N.B., if not TARGET_HITACHI, register 2 is used to pass the pointer
10264 pointing where to return struct values. */
10265 if (call_used_regs[3] && ! fixed_regs[3])
10266 scratch2 = gen_rtx_REG (Pmode, 3);
10268 else if (TARGET_SHMEDIA)
10270 for (i = FIRST_GENERAL_REG; i <= LAST_GENERAL_REG; i++)
10271 if (i != REGNO (scratch0) &&
10272 call_used_regs[i] && ! fixed_regs[i] && ! FUNCTION_ARG_REGNO_P (i))
10274 scratch1 = gen_rtx_REG (ptr_mode, i);
10275 break;
10277 if (scratch1 == scratch0)
10278 error ("Need a second call-clobbered general purpose register");
10279 for (i = FIRST_TARGET_REG; i <= LAST_TARGET_REG; i++)
10280 if (call_used_regs[i] && ! fixed_regs[i])
10282 scratch2 = gen_rtx_REG (Pmode, i);
10283 break;
10285 if (scratch2 == scratch0)
10286 error ("Need a call-clobbered target register");
10289 this_value = plus_constant (this, delta);
10290 if (vcall_offset
10291 && (simple_add || scratch0 != scratch1)
10292 && strict_memory_address_p (ptr_mode, this_value))
10294 emit_load_ptr (scratch0, this_value);
10295 did_load = 1;
10298 if (!delta)
10299 ; /* Do nothing. */
10300 else if (simple_add)
10301 emit_move_insn (this, this_value);
10302 else
10304 emit_move_insn (scratch1, GEN_INT (delta));
10305 emit_insn (gen_add2_insn (this, scratch1));
10308 if (vcall_offset)
10310 rtx offset_addr;
10312 if (!did_load)
10313 emit_load_ptr (scratch0, this);
10315 offset_addr = plus_constant (scratch0, vcall_offset);
10316 if (strict_memory_address_p (ptr_mode, offset_addr))
10317 ; /* Do nothing. */
10318 else if (! TARGET_SH5 && scratch0 != scratch1)
10320 /* scratch0 != scratch1, and we have indexed loads. Get better
10321 schedule by loading the offset into r1 and using an indexed
10322 load - then the load of r1 can issue before the load from
10323 (this + delta) finishes. */
10324 emit_move_insn (scratch1, GEN_INT (vcall_offset));
10325 offset_addr = gen_rtx_PLUS (Pmode, scratch0, scratch1);
10327 else if (CONST_OK_FOR_ADD (vcall_offset))
10329 emit_insn (gen_add2_insn (scratch0, GEN_INT (vcall_offset)));
10330 offset_addr = scratch0;
10332 else if (scratch0 != scratch1)
10334 emit_move_insn (scratch1, GEN_INT (vcall_offset));
10335 emit_insn (gen_add2_insn (scratch0, scratch1));
10336 offset_addr = scratch0;
10338 else
10339 gcc_unreachable (); /* FIXME */
10340 emit_load_ptr (scratch0, offset_addr);
10342 if (Pmode != ptr_mode)
10343 scratch0 = gen_rtx_TRUNCATE (ptr_mode, scratch0);
10344 emit_insn (gen_add2_insn (this, scratch0));
10347 /* Generate a tail call to the target function. */
10348 if (! TREE_USED (function))
10350 assemble_external (function);
10351 TREE_USED (function) = 1;
10353 funexp = XEXP (DECL_RTL (function), 0);
10354 /* If the function is overridden, so is the thunk, hence we don't
10355 need GOT addressing even if this is a public symbol. */
10356 #if 0
10357 if (TARGET_SH1 && ! flag_weak)
10358 sibcall = gen_sibcalli_thunk (funexp, const0_rtx);
10359 else
10360 #endif
10361 if (TARGET_SH2 && flag_pic)
10363 sibcall = gen_sibcall_pcrel (funexp, const0_rtx);
10364 XEXP (XVECEXP (sibcall, 0, 2), 0) = scratch2;
10366 else
10368 if (TARGET_SHMEDIA && flag_pic)
10370 funexp = gen_sym2PIC (funexp);
10371 PUT_MODE (funexp, Pmode);
10373 emit_move_insn (scratch2, funexp);
10374 funexp = gen_rtx_MEM (FUNCTION_MODE, scratch2);
10375 sibcall = gen_sibcall (funexp, const0_rtx, NULL_RTX);
10377 sibcall = emit_call_insn (sibcall);
10378 SIBLING_CALL_P (sibcall) = 1;
10379 use_reg (&CALL_INSN_FUNCTION_USAGE (sibcall), this);
10380 emit_barrier ();
10382 /* Run just enough of rest_of_compilation to do scheduling and get
10383 the insns emitted. Note that use_thunk calls
10384 assemble_start_function and assemble_end_function. */
10386 insn_locators_initialize ();
10387 insns = get_insns ();
10389 if (optimize > 0 && flag_schedule_insns_after_reload)
10391 /* Initialize the bitmap obstacks. */
10392 bitmap_obstack_initialize (NULL);
10393 bitmap_obstack_initialize (&reg_obstack);
10394 if (! cfun->cfg)
10395 init_flow ();
10396 rtl_register_cfg_hooks ();
10397 find_basic_blocks (insns);
10398 life_analysis (dump_file, PROP_FINAL);
10400 split_all_insns (1);
10402 schedule_insns (dump_file);
10405 sh_reorg ();
10407 if (optimize > 0 && flag_delayed_branch)
10409 if (! cfun->cfg)
10411 init_flow ();
10412 find_basic_blocks (insns);
10414 dbr_schedule (insns, dump_file);
10416 shorten_branches (insns);
10417 final_start_function (insns, file, 1);
10418 final (insns, file, 1);
10419 final_end_function ();
10421 if (optimize > 0 && flag_schedule_insns_after_reload)
10423 /* Release all memory allocated by flow. */
10424 free_basic_block_vars ();
10426 /* Release the bitmap obstacks. */
10427 bitmap_obstack_release (&reg_obstack);
10428 bitmap_obstack_release (NULL);
10431 reload_completed = 0;
10432 epilogue_completed = 0;
10433 no_new_pseudos = 0;
10437 function_symbol (rtx target, const char *name, enum sh_function_kind kind)
10439 rtx sym;
10441 /* If this is not an ordinary function, the name usually comes from a
10442 string literal or an sprintf buffer. Make sure we use the same
10443 string consistently, so that cse will be able to unify address loads. */
10444 if (kind != FUNCTION_ORDINARY)
10445 name = IDENTIFIER_POINTER (get_identifier (name));
10446 sym = gen_rtx_SYMBOL_REF (Pmode, name);
10447 SYMBOL_REF_FLAGS (sym) = SYMBOL_FLAG_FUNCTION;
10448 if (flag_pic)
10449 switch (kind)
10451 case FUNCTION_ORDINARY:
10452 break;
10453 case SFUNC_GOT:
10455 rtx reg = target ? target : gen_reg_rtx (Pmode);
10457 emit_insn (gen_symGOT2reg (reg, sym));
10458 sym = reg;
10459 break;
10461 case SFUNC_STATIC:
10463 /* ??? To allow cse to work, we use GOTOFF relocations.
10464 we could add combiner patterns to transform this into
10465 straight pc-relative calls with sym2PIC / bsrf when
10466 label load and function call are still 1:1 and in the
10467 same basic block during combine. */
10468 rtx reg = target ? target : gen_reg_rtx (Pmode);
10470 emit_insn (gen_symGOTOFF2reg (reg, sym));
10471 sym = reg;
10472 break;
10475 if (target && sym != target)
10477 emit_move_insn (target, sym);
10478 return target;
10480 return sym;
10483 /* Find the number of a general purpose register in S. */
10484 static int
10485 scavenge_reg (HARD_REG_SET *s)
10487 int r;
10488 for (r = FIRST_GENERAL_REG; r <= LAST_GENERAL_REG; r++)
10489 if (TEST_HARD_REG_BIT (*s, r))
10490 return r;
10491 return -1;
10495 sh_get_pr_initial_val (void)
10497 rtx val;
10499 /* ??? Unfortunately, get_hard_reg_initial_val doesn't always work for the
10500 PR register on SHcompact, because it might be clobbered by the prologue.
10501 We check first if that is known to be the case. */
10502 if (TARGET_SHCOMPACT
10503 && ((current_function_args_info.call_cookie
10504 & ~ CALL_COOKIE_RET_TRAMP (1))
10505 || current_function_has_nonlocal_label))
10506 return gen_rtx_MEM (SImode, return_address_pointer_rtx);
10508 /* If we haven't finished rtl generation, there might be a nonlocal label
10509 that we haven't seen yet.
10510 ??? get_hard_reg_initial_val fails if it is called while no_new_pseudos
10511 is set, unless it has been called before for the same register. And even
10512 then, we end in trouble if we didn't use the register in the same
10513 basic block before. So call get_hard_reg_initial_val now and wrap it
10514 in an unspec if we might need to replace it. */
10515 /* ??? We also must do this for TARGET_SH1 in general, because otherwise
10516 combine can put the pseudo returned by get_hard_reg_initial_val into
10517 instructions that need a general purpose registers, which will fail to
10518 be recognized when the pseudo becomes allocated to PR. */
10520 = get_hard_reg_initial_val (Pmode, TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG);
10521 if (TARGET_SH1)
10522 return gen_rtx_UNSPEC (SImode, gen_rtvec (1, val), UNSPEC_RA);
10523 return val;
10527 sh_expand_t_scc (enum rtx_code code, rtx target)
10529 rtx result = target;
10530 HOST_WIDE_INT val;
10532 if (GET_CODE (sh_compare_op0) != REG || REGNO (sh_compare_op0) != T_REG
10533 || GET_CODE (sh_compare_op1) != CONST_INT)
10534 return 0;
10535 if (GET_CODE (result) != REG)
10536 result = gen_reg_rtx (SImode);
10537 val = INTVAL (sh_compare_op1);
10538 if ((code == EQ && val == 1) || (code == NE && val == 0))
10539 emit_insn (gen_movt (result));
10540 else if ((code == EQ && val == 0) || (code == NE && val == 1))
10542 emit_insn (gen_rtx_CLOBBER (VOIDmode, result));
10543 emit_insn (gen_subc (result, result, result));
10544 emit_insn (gen_addsi3 (result, result, const1_rtx));
10546 else if (code == EQ || code == NE)
10547 emit_insn (gen_move_insn (result, GEN_INT (code == NE)));
10548 else
10549 return 0;
10550 if (result != target)
10551 emit_move_insn (target, result);
10552 return 1;
10555 /* INSN is an sfunc; return the rtx that describes the address used. */
10556 static rtx
10557 extract_sfunc_addr (rtx insn)
10559 rtx pattern, part = NULL_RTX;
10560 int len, i;
10562 pattern = PATTERN (insn);
10563 len = XVECLEN (pattern, 0);
10564 for (i = 0; i < len; i++)
10566 part = XVECEXP (pattern, 0, i);
10567 if (GET_CODE (part) == USE && GET_MODE (XEXP (part, 0)) == Pmode
10568 && GENERAL_REGISTER_P (true_regnum (XEXP (part, 0))))
10569 return XEXP (part, 0);
10571 gcc_assert (GET_CODE (XVECEXP (pattern, 0, 0)) == UNSPEC_VOLATILE);
10572 return XVECEXP (XVECEXP (pattern, 0, 0), 0, 1);
10575 /* Verify that the register in use_sfunc_addr still agrees with the address
10576 used in the sfunc. This prevents fill_slots_from_thread from changing
10577 use_sfunc_addr.
10578 INSN is the use_sfunc_addr instruction, and REG is the register it
10579 guards. */
10581 check_use_sfunc_addr (rtx insn, rtx reg)
10583 /* Search for the sfunc. It should really come right after INSN. */
10584 while ((insn = NEXT_INSN (insn)))
10586 if (GET_CODE (insn) == CODE_LABEL || GET_CODE (insn) == JUMP_INSN)
10587 break;
10588 if (! INSN_P (insn))
10589 continue;
10591 if (GET_CODE (PATTERN (insn)) == SEQUENCE)
10592 insn = XVECEXP (PATTERN (insn), 0, 0);
10593 if (GET_CODE (PATTERN (insn)) != PARALLEL
10594 || get_attr_type (insn) != TYPE_SFUNC)
10595 continue;
10596 return rtx_equal_p (extract_sfunc_addr (insn), reg);
10598 gcc_unreachable ();
10601 /* Returns 1 if OP is a MEM that can be source of a simple move operation. */
10604 unaligned_load_operand (rtx op, enum machine_mode mode)
10606 rtx inside;
10608 if (GET_CODE (op) != MEM || GET_MODE (op) != mode)
10609 return 0;
10611 inside = XEXP (op, 0);
10613 if (GET_CODE (inside) == POST_INC)
10614 inside = XEXP (inside, 0);
10616 if (GET_CODE (inside) == REG)
10617 return 1;
10619 return 0;
10622 /* This function returns a constant rtx that represents pi / 2**15 in
10623 SFmode. it's used to scale SFmode angles, in radians, to a
10624 fixed-point signed 16.16-bit fraction of a full circle, i.e., 2*pi
10625 maps to 0x10000). */
10627 static GTY(()) rtx sh_fsca_sf2int_rtx;
10630 sh_fsca_sf2int (void)
10632 if (! sh_fsca_sf2int_rtx)
10634 REAL_VALUE_TYPE rv;
10636 real_from_string (&rv, "10430.378350470453");
10637 sh_fsca_sf2int_rtx = const_double_from_real_value (rv, SFmode);
10640 return sh_fsca_sf2int_rtx;
10643 /* This function returns a constant rtx that represents pi / 2**15 in
10644 DFmode. it's used to scale DFmode angles, in radians, to a
10645 fixed-point signed 16.16-bit fraction of a full circle, i.e., 2*pi
10646 maps to 0x10000). */
10648 static GTY(()) rtx sh_fsca_df2int_rtx;
10651 sh_fsca_df2int (void)
10653 if (! sh_fsca_df2int_rtx)
10655 REAL_VALUE_TYPE rv;
10657 real_from_string (&rv, "10430.378350470453");
10658 sh_fsca_df2int_rtx = const_double_from_real_value (rv, DFmode);
10661 return sh_fsca_df2int_rtx;
10664 /* This function returns a constant rtx that represents 2**15 / pi in
10665 SFmode. it's used to scale a fixed-point signed 16.16-bit fraction
10666 of a full circle back to a SFmode value, i.e., 0x10000 maps to
10667 2*pi). */
10669 static GTY(()) rtx sh_fsca_int2sf_rtx;
10672 sh_fsca_int2sf (void)
10674 if (! sh_fsca_int2sf_rtx)
10676 REAL_VALUE_TYPE rv;
10678 real_from_string (&rv, "9.587379924285257e-5");
10679 sh_fsca_int2sf_rtx = const_double_from_real_value (rv, SFmode);
10682 return sh_fsca_int2sf_rtx;
10685 /* Initialize the CUMULATIVE_ARGS structure. */
10687 void
10688 sh_init_cumulative_args (CUMULATIVE_ARGS * pcum,
10689 tree fntype,
10690 rtx libname ATTRIBUTE_UNUSED,
10691 tree fndecl,
10692 signed int n_named_args,
10693 enum machine_mode mode)
10695 pcum->arg_count [(int) SH_ARG_FLOAT] = 0;
10696 pcum->free_single_fp_reg = 0;
10697 pcum->stack_regs = 0;
10698 pcum->byref_regs = 0;
10699 pcum->byref = 0;
10700 pcum->outgoing = (n_named_args == -1) ? 0 : 1;
10702 /* XXX - Should we check TARGET_HITACHI here ??? */
10703 pcum->renesas_abi = sh_attr_renesas_p (fntype) ? 1 : 0;
10705 if (fntype)
10707 pcum->force_mem = ((TARGET_HITACHI || pcum->renesas_abi)
10708 && aggregate_value_p (TREE_TYPE (fntype), fndecl));
10709 pcum->prototype_p = TYPE_ARG_TYPES (fntype) ? TRUE : FALSE;
10710 pcum->arg_count [(int) SH_ARG_INT]
10711 = TARGET_SH5 && aggregate_value_p (TREE_TYPE (fntype), fndecl);
10713 pcum->call_cookie
10714 = CALL_COOKIE_RET_TRAMP (TARGET_SHCOMPACT
10715 && pcum->arg_count [(int) SH_ARG_INT] == 0
10716 && (TYPE_MODE (TREE_TYPE (fntype)) == BLKmode
10717 ? int_size_in_bytes (TREE_TYPE (fntype))
10718 : GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (fntype)))) > 4
10719 && (BASE_RETURN_VALUE_REG (TYPE_MODE (TREE_TYPE (fntype)))
10720 == FIRST_RET_REG));
10722 else
10724 pcum->arg_count [(int) SH_ARG_INT] = 0;
10725 pcum->prototype_p = FALSE;
10726 if (mode != VOIDmode)
10728 pcum->call_cookie =
10729 CALL_COOKIE_RET_TRAMP (TARGET_SHCOMPACT
10730 && GET_MODE_SIZE (mode) > 4
10731 && BASE_RETURN_VALUE_REG (mode) == FIRST_RET_REG);
10733 /* If the default ABI is the Renesas ABI then all library
10734 calls must assume that the library will be using the
10735 Renesas ABI. So if the function would return its result
10736 in memory then we must force the address of this memory
10737 block onto the stack. Ideally we would like to call
10738 targetm.calls.return_in_memory() here but we do not have
10739 the TYPE or the FNDECL available so we synthesize the
10740 contents of that function as best we can. */
10741 pcum->force_mem =
10742 (TARGET_DEFAULT & MASK_HITACHI)
10743 && (mode == BLKmode
10744 || (GET_MODE_SIZE (mode) > 4
10745 && !(mode == DFmode
10746 && TARGET_FPU_DOUBLE)));
10748 else
10750 pcum->call_cookie = 0;
10751 pcum->force_mem = FALSE;
10756 /* Determine if two hard register sets intersect.
10757 Return 1 if they do. */
10759 static int
10760 hard_regs_intersect_p (HARD_REG_SET *a, HARD_REG_SET *b)
10762 HARD_REG_SET c;
10763 COPY_HARD_REG_SET (c, *a);
10764 AND_HARD_REG_SET (c, *b);
10765 GO_IF_HARD_REG_SUBSET (c, reg_class_contents[(int) NO_REGS], lose);
10766 return 1;
10767 lose:
10768 return 0;
10771 #ifdef TARGET_ADJUST_UNROLL_MAX
10772 static int
10773 sh_adjust_unroll_max (struct loop * loop, int insn_count,
10774 int max_unrolled_insns, int strength_reduce_p,
10775 int unroll_type)
10777 /* This doesn't work in 4.0 because the old unroller & loop.h is gone. */
10778 if (TARGET_ADJUST_UNROLL && TARGET_SHMEDIA)
10780 /* Throttle back loop unrolling so that the costs of using more
10781 targets than the eight target register we have don't outweigh
10782 the benefits of unrolling. */
10783 rtx insn;
10784 int n_labels = 0, n_calls = 0, n_exit_dest = 0, n_inner_loops = -1;
10785 int n_barriers = 0;
10786 rtx dest;
10787 int i;
10788 rtx exit_dest[8];
10789 int threshold;
10790 int unroll_benefit = 0, mem_latency = 0;
10791 int base_cost, best_cost, cost;
10792 int factor, best_factor;
10793 int n_dest;
10794 unsigned max_iterations = 32767;
10795 int n_iterations;
10796 int need_precond = 0, precond = 0;
10797 basic_block * bbs = get_loop_body (loop);
10798 struct niter_desc *desc;
10800 /* Assume that all labels inside the loop are used from inside the
10801 loop. If the loop has multiple entry points, it is unlikely to
10802 be unrolled anyways.
10803 Also assume that all calls are to different functions. That is
10804 somewhat pessimistic, but if you have lots of calls, unrolling the
10805 loop is not likely to gain you much in the first place. */
10806 i = loop->num_nodes - 1;
10807 for (insn = BB_HEAD (bbs[i]); ; )
10809 if (GET_CODE (insn) == CODE_LABEL)
10810 n_labels++;
10811 else if (GET_CODE (insn) == CALL_INSN)
10812 n_calls++;
10813 else if (GET_CODE (insn) == NOTE
10814 && NOTE_LINE_NUMBER (insn) == NOTE_INSN_LOOP_BEG)
10815 n_inner_loops++;
10816 else if (GET_CODE (insn) == BARRIER)
10817 n_barriers++;
10818 if (insn != BB_END (bbs[i]))
10819 insn = NEXT_INSN (insn);
10820 else if (--i >= 0)
10821 insn = BB_HEAD (bbs[i]);
10822 else
10823 break;
10825 free (bbs);
10826 /* One label for the loop top is normal, and it won't be duplicated by
10827 unrolling. */
10828 if (n_labels <= 1)
10829 return max_unrolled_insns;
10830 if (n_inner_loops > 0)
10831 return 0;
10832 for (dest = loop->exit_labels; dest && n_exit_dest < 8;
10833 dest = LABEL_NEXTREF (dest))
10835 for (i = n_exit_dest - 1;
10836 i >= 0 && XEXP (dest, 0) != XEXP (exit_dest[i], 0); i--);
10837 if (i < 0)
10838 exit_dest[n_exit_dest++] = dest;
10840 /* If the loop top and call and exit destinations are enough to fill up
10841 the target registers, we're unlikely to do any more damage by
10842 unrolling. */
10843 if (n_calls + n_exit_dest >= 7)
10844 return max_unrolled_insns;
10846 /* ??? In the new loop unroller, there is no longer any strength
10847 reduction information available. Thus, when it comes to unrolling,
10848 we know the cost of everything, but we know the value of nothing. */
10849 #if 0
10850 if (strength_reduce_p
10851 && (unroll_type == LPT_UNROLL_RUNTIME
10852 || unroll_type == LPT_UNROLL_CONSTANT
10853 || unroll_type == LPT_PEEL_COMPLETELY))
10855 struct loop_ivs *ivs = LOOP_IVS (loop);
10856 struct iv_class *bl;
10858 /* We'll save one compare-and-branch in each loop body copy
10859 but the last one. */
10860 unroll_benefit = 1;
10861 /* Assess the benefit of removing biv & giv updates. */
10862 for (bl = ivs->list; bl; bl = bl->next)
10864 rtx increment = biv_total_increment (bl);
10865 struct induction *v;
10867 if (increment && GET_CODE (increment) == CONST_INT)
10869 unroll_benefit++;
10870 for (v = bl->giv; v; v = v->next_iv)
10872 if (! v->ignore && v->same == 0
10873 && GET_CODE (v->mult_val) == CONST_INT)
10874 unroll_benefit++;
10875 /* If this giv uses an array, try to determine
10876 a maximum iteration count from the size of the
10877 array. This need not be correct all the time,
10878 but should not be too far off the mark too often. */
10879 while (v->giv_type == DEST_ADDR)
10881 rtx mem = PATTERN (v->insn);
10882 tree mem_expr, type, size_tree;
10884 if (GET_CODE (SET_SRC (mem)) == MEM)
10885 mem = SET_SRC (mem);
10886 else if (GET_CODE (SET_DEST (mem)) == MEM)
10887 mem = SET_DEST (mem);
10888 else
10889 break;
10890 mem_expr = MEM_EXPR (mem);
10891 if (! mem_expr)
10892 break;
10893 type = TREE_TYPE (mem_expr);
10894 if (TREE_CODE (type) != ARRAY_TYPE
10895 || ! TYPE_SIZE (type) || ! TYPE_SIZE_UNIT (type))
10896 break;
10897 size_tree = fold (build (TRUNC_DIV_EXPR,
10898 bitsizetype,
10899 TYPE_SIZE (type),
10900 TYPE_SIZE_UNIT (type)));
10901 if (TREE_CODE (size_tree) == INTEGER_CST
10902 && ! TREE_INT_CST_HIGH (size_tree)
10903 && TREE_INT_CST_LOW (size_tree) < max_iterations)
10904 max_iterations = TREE_INT_CST_LOW (size_tree);
10905 break;
10911 #else /* 0 */
10912 /* Assume there is at least some benefit. */
10913 unroll_benefit = 1;
10914 #endif /* 0 */
10916 desc = get_simple_loop_desc (loop);
10917 n_iterations = desc->const_iter ? desc->niter : 0;
10918 max_iterations
10919 = max_iterations < desc->niter_max ? max_iterations : desc->niter_max;
10921 if (! strength_reduce_p || ! n_iterations)
10922 need_precond = 1;
10923 if (! n_iterations)
10925 n_iterations
10926 = max_iterations < 3 ? max_iterations : max_iterations * 3 / 4;
10927 if (! n_iterations)
10928 return 0;
10930 #if 0 /* ??? See above - missing induction variable information. */
10931 while (unroll_benefit > 1) /* no loop */
10933 /* We include the benefit of biv/ giv updates. Check if some or
10934 all of these updates are likely to fit into a scheduling
10935 bubble of a load.
10936 We check for the following case:
10937 - All the insns leading to the first JUMP_INSN are in a strict
10938 dependency chain.
10939 - there is at least one memory reference in them.
10941 When we find such a pattern, we assume that we can hide as many
10942 updates as the total of the load latency is, if we have an
10943 unroll factor of at least two. We might or might not also do
10944 this without unrolling, so rather than considering this as an
10945 extra unroll benefit, discount it in the unroll benefits of unroll
10946 factors higher than two. */
10948 rtx set, last_set;
10950 insn = next_active_insn (loop->start);
10951 last_set = single_set (insn);
10952 if (! last_set)
10953 break;
10954 if (GET_CODE (SET_SRC (last_set)) == MEM)
10955 mem_latency += 2;
10956 for (insn = NEXT_INSN (insn); insn != end; insn = NEXT_INSN (insn))
10958 if (! INSN_P (insn))
10959 continue;
10960 if (GET_CODE (insn) == JUMP_INSN)
10961 break;
10962 if (! reg_referenced_p (SET_DEST (last_set), PATTERN (insn)))
10964 /* Check if this is a to-be-reduced giv insn. */
10965 struct loop_ivs *ivs = LOOP_IVS (loop);
10966 struct iv_class *bl;
10967 struct induction *v;
10968 for (bl = ivs->list; bl; bl = bl->next)
10970 if (bl->biv->insn == insn)
10971 goto is_biv;
10972 for (v = bl->giv; v; v = v->next_iv)
10973 if (v->insn == insn)
10974 goto is_giv;
10976 mem_latency--;
10977 is_biv:
10978 is_giv:
10979 continue;
10981 set = single_set (insn);
10982 if (! set)
10983 continue;
10984 if (GET_CODE (SET_SRC (set)) == MEM)
10985 mem_latency += 2;
10986 last_set = set;
10988 if (mem_latency < 0)
10989 mem_latency = 0;
10990 else if (mem_latency > unroll_benefit - 1)
10991 mem_latency = unroll_benefit - 1;
10992 break;
10994 #endif /* 0 */
10995 if (n_labels + (unroll_benefit + n_labels * 8) / n_iterations
10996 <= unroll_benefit)
10997 return max_unrolled_insns;
10999 n_dest = n_labels + n_calls + n_exit_dest;
11000 base_cost = n_dest <= 8 ? 0 : n_dest - 7;
11001 best_cost = 0;
11002 best_factor = 1;
11003 if (n_barriers * 2 > n_labels - 1)
11004 n_barriers = (n_labels - 1) / 2;
11005 for (factor = 2; factor <= 8; factor++)
11007 /* Bump up preconditioning cost for each power of two. */
11008 if (! (factor & (factor-1)))
11009 precond += 4;
11010 /* When preconditioning, only powers of two will be considered. */
11011 else if (need_precond)
11012 continue;
11013 n_dest = ((unroll_type != LPT_PEEL_COMPLETELY)
11014 + (n_labels - 1) * factor + n_calls + n_exit_dest
11015 - (n_barriers * factor >> 1)
11016 + need_precond);
11017 cost
11018 = ((n_dest <= 8 ? 0 : n_dest - 7)
11019 - base_cost * factor
11020 - ((factor > 2 ? unroll_benefit - mem_latency : unroll_benefit)
11021 * (factor - (unroll_type != LPT_PEEL_COMPLETELY)))
11022 + ((unroll_benefit + 1 + (n_labels - 1) * factor)
11023 / n_iterations));
11024 if (need_precond)
11025 cost += (precond + unroll_benefit * factor / 2) / n_iterations;
11026 if (cost < best_cost)
11028 best_cost = cost;
11029 best_factor = factor;
11032 threshold = best_factor * insn_count;
11033 if (max_unrolled_insns > threshold)
11034 max_unrolled_insns = threshold;
11036 return max_unrolled_insns;
11038 #endif /* TARGET_ADJUST_UNROLL_MAX */
11040 /* Replace any occurrence of FROM(n) in X with TO(n). The function does
11041 not enter into CONST_DOUBLE for the replace.
11043 Note that copying is not done so X must not be shared unless all copies
11044 are to be modified.
11046 This is like replace_rtx, except that we operate on N_REPLACEMENTS
11047 replacements simultaneously - FROM(n) is replacements[n*2] and to(n) is
11048 replacements[n*2+1] - and that we take mode changes into account.
11050 If a replacement is ambiguous, return NULL_RTX.
11052 If MODIFY is zero, don't modify any rtl in place,
11053 just return zero or nonzero for failure / success. */
11056 replace_n_hard_rtx (rtx x, rtx *replacements, int n_replacements, int modify)
11058 int i, j;
11059 const char *fmt;
11061 /* The following prevents loops occurrence when we change MEM in
11062 CONST_DOUBLE onto the same CONST_DOUBLE. */
11063 if (x != 0 && GET_CODE (x) == CONST_DOUBLE)
11064 return x;
11066 for (i = n_replacements - 1; i >= 0 ; i--)
11067 if (x == replacements[i*2] && GET_MODE (x) == GET_MODE (replacements[i*2+1]))
11068 return replacements[i*2+1];
11070 /* Allow this function to make replacements in EXPR_LISTs. */
11071 if (x == 0)
11072 return 0;
11074 if (GET_CODE (x) == SUBREG)
11076 rtx new = replace_n_hard_rtx (SUBREG_REG (x), replacements,
11077 n_replacements, modify);
11079 if (GET_CODE (new) == CONST_INT)
11081 x = simplify_subreg (GET_MODE (x), new,
11082 GET_MODE (SUBREG_REG (x)),
11083 SUBREG_BYTE (x));
11084 if (! x)
11085 abort ();
11087 else if (modify)
11088 SUBREG_REG (x) = new;
11090 return x;
11092 else if (GET_CODE (x) == REG)
11094 unsigned regno = REGNO (x);
11095 unsigned nregs = (regno < FIRST_PSEUDO_REGISTER
11096 ? HARD_REGNO_NREGS (regno, GET_MODE (x)) : 1);
11097 rtx result = NULL_RTX;
11099 for (i = n_replacements - 1; i >= 0; i--)
11101 rtx from = replacements[i*2];
11102 rtx to = replacements[i*2+1];
11103 unsigned from_regno, from_nregs, to_regno, new_regno;
11105 if (GET_CODE (from) != REG)
11106 continue;
11107 from_regno = REGNO (from);
11108 from_nregs = (from_regno < FIRST_PSEUDO_REGISTER
11109 ? HARD_REGNO_NREGS (from_regno, GET_MODE (from)) : 1);
11110 if (regno < from_regno + from_nregs && regno + nregs > from_regno)
11112 if (regno < from_regno
11113 || regno + nregs > from_regno + nregs
11114 || GET_CODE (to) != REG
11115 || result)
11116 return NULL_RTX;
11117 to_regno = REGNO (to);
11118 if (to_regno < FIRST_PSEUDO_REGISTER)
11120 new_regno = regno + to_regno - from_regno;
11121 if ((unsigned) HARD_REGNO_NREGS (new_regno, GET_MODE (x))
11122 != nregs)
11123 return NULL_RTX;
11124 result = gen_rtx_REG (GET_MODE (x), new_regno);
11126 else if (GET_MODE (x) <= GET_MODE (to))
11127 result = gen_lowpart_common (GET_MODE (x), to);
11128 else
11129 result = gen_lowpart_SUBREG (GET_MODE (x), to);
11132 return result ? result : x;
11134 else if (GET_CODE (x) == ZERO_EXTEND)
11136 rtx new = replace_n_hard_rtx (XEXP (x, 0), replacements,
11137 n_replacements, modify);
11139 if (GET_CODE (new) == CONST_INT)
11141 x = simplify_unary_operation (ZERO_EXTEND, GET_MODE (x),
11142 new, GET_MODE (XEXP (x, 0)));
11143 if (! x)
11144 abort ();
11146 else if (modify)
11147 XEXP (x, 0) = new;
11149 return x;
11152 fmt = GET_RTX_FORMAT (GET_CODE (x));
11153 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
11155 rtx new;
11157 if (fmt[i] == 'e')
11159 new = replace_n_hard_rtx (XEXP (x, i), replacements,
11160 n_replacements, modify);
11161 if (!new)
11162 return NULL_RTX;
11163 if (modify)
11164 XEXP (x, i) = new;
11166 else if (fmt[i] == 'E')
11167 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
11169 new = replace_n_hard_rtx (XVECEXP (x, i, j), replacements,
11170 n_replacements, modify);
11171 if (!new)
11172 return NULL_RTX;
11173 if (modify)
11174 XVECEXP (x, i, j) = new;
11178 return x;
11182 sh_gen_truncate (enum machine_mode mode, rtx x, int need_sign_ext)
11184 enum rtx_code code = TRUNCATE;
11186 if (GET_CODE (x) == ZERO_EXTEND || GET_CODE (x) == SIGN_EXTEND)
11188 rtx inner = XEXP (x, 0);
11189 enum machine_mode inner_mode = GET_MODE (inner);
11191 if (inner_mode == mode)
11192 return inner;
11193 else if (GET_MODE_SIZE (inner_mode) >= GET_MODE_SIZE (mode))
11194 x = inner;
11195 else if (GET_MODE_SIZE (inner_mode) < GET_MODE_SIZE (mode)
11196 && (! need_sign_ext || GET_CODE (x) == SIGN_EXTEND))
11198 code = GET_CODE (x);
11199 x = inner;
11202 return gen_rtx_fmt_e (code, mode, x);
11205 /* called via for_each_rtx after reload, to clean up truncates of
11206 registers that span multiple actual hard registers. */
11208 shmedia_cleanup_truncate (rtx *p, void *n_changes)
11210 rtx x = *p, reg;
11212 if (GET_CODE (x) != TRUNCATE)
11213 return 0;
11214 reg = XEXP (x, 0);
11215 if (GET_MODE_SIZE (GET_MODE (reg)) > 8 && GET_CODE (reg) == REG)
11217 enum machine_mode reg_mode = GET_MODE (reg);
11218 XEXP (x, 0) = simplify_subreg (DImode, reg, reg_mode,
11219 subreg_lowpart_offset (DImode, reg_mode));
11220 *(int*) n_changes += 1;
11221 return -1;
11223 return 0;
11226 /* Load and store depend on the highpart of the address. However,
11227 set_attr_alternative does not give well-defined results before reload,
11228 so we must look at the rtl ourselves to see if any of the feeding
11229 registers is used in a memref. */
11231 /* Called by sh_contains_memref_p via for_each_rtx. */
11232 static int
11233 sh_contains_memref_p_1 (rtx *loc, void *data ATTRIBUTE_UNUSED)
11235 return (GET_CODE (*loc) == MEM);
11238 /* Return non-zero iff INSN contains a MEM. */
11240 sh_contains_memref_p (rtx insn)
11242 return for_each_rtx (&PATTERN (insn), &sh_contains_memref_p_1, NULL);
11245 /* FNADDR is the MEM expression from a call expander. Return an address
11246 to use in an SHmedia insn pattern. */
11248 shmedia_prepare_call_address (rtx fnaddr, int is_sibcall)
11250 int is_sym;
11252 fnaddr = XEXP (fnaddr, 0);
11253 is_sym = GET_CODE (fnaddr) == SYMBOL_REF;
11254 if (flag_pic && is_sym)
11256 if (! SYMBOL_REF_LOCAL_P (fnaddr))
11258 rtx reg = gen_reg_rtx (Pmode);
11260 /* We must not use GOTPLT for sibcalls, because PIC_REG
11261 must be restored before the PLT code gets to run. */
11262 if (is_sibcall)
11263 emit_insn (gen_symGOT2reg (reg, fnaddr));
11264 else
11265 emit_insn (gen_symGOTPLT2reg (reg, fnaddr));
11266 fnaddr = reg;
11268 else
11270 fnaddr = gen_sym2PIC (fnaddr);
11271 PUT_MODE (fnaddr, Pmode);
11274 /* If ptabs might trap, make this visible to the rest of the compiler.
11275 We generally assume that symbols pertain to valid locations, but
11276 it is possible to generate invalid symbols with asm or linker tricks.
11277 In a list of functions where each returns its successor, an invalid
11278 symbol might denote an empty list. */
11279 if (!TARGET_PT_FIXED
11280 && (!is_sym || TARGET_INVALID_SYMBOLS)
11281 && (!REG_P (fnaddr) || ! TARGET_REGISTER_P (REGNO (fnaddr))))
11283 rtx tr = gen_reg_rtx (PDImode);
11285 emit_insn (gen_ptabs (tr, fnaddr));
11286 fnaddr = tr;
11288 else if (! target_reg_operand (fnaddr, Pmode))
11289 fnaddr = copy_to_mode_reg (Pmode, fnaddr);
11290 return fnaddr;
11293 const char *sh_multcost_str = "";
11294 const char *sh_gettrcost_str = "";
11295 const char *sh_div_str = "";
11296 const char *sh_divsi3_libfunc = "";
11297 const char *cut2_workaround_str = "";
11298 enum sh_divide_strategy_e sh_div_strategy = SH_DIV_STRATEGY_DEFAULT;
11300 /* This defines the storage for the variable part of a -mboard= option.
11301 It is only required when using the sh-superh-elf target */
11302 #ifdef _SUPERH_H
11303 const char * boardtype = "7750p2";
11304 const char * osruntime = "bare";
11305 #endif
11307 #include "gt-sh.h"