2004-11-25 J"orn Rennecke <joern.rennecke@st.com>
[official-gcc.git] / gcc / config / sh / sh.c
blob162f5c66d7a92f36a3e845ebd1b3b6a6e683c9e3
1 /* Output routines for GCC for Renesas / SuperH SH.
2 Copyright (C) 1993, 1994, 1995, 1997, 1997, 1998, 1999, 2000, 2001, 2002,
3 2003, 2004 Free Software Foundation, Inc.
4 Contributed by Steve Chamberlain (sac@cygnus.com).
5 Improved by Jim Wilson (wilson@cygnus.com).
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 2, or (at your option)
12 any later version.
14 GCC is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING. If not, write to
21 the Free Software Foundation, 59 Temple Place - Suite 330,
22 Boston, MA 02111-1307, USA. */
24 #include "config.h"
25 #include "system.h"
26 #include "coretypes.h"
27 #include "tm.h"
28 #include "insn-config.h"
29 #include "rtl.h"
30 #include "tree.h"
31 #include "flags.h"
32 #include "expr.h"
33 #include "optabs.h"
34 #include "function.h"
35 #include "regs.h"
36 #include "hard-reg-set.h"
37 #include "output.h"
38 #include "insn-attr.h"
39 #include "toplev.h"
40 #include "recog.h"
41 #include "c-pragma.h"
42 #include "integrate.h"
43 #include "dwarf2.h"
44 #include "tm_p.h"
45 #include "target.h"
46 #include "target-def.h"
47 #include "real.h"
48 #include "langhooks.h"
49 #include "basic-block.h"
50 #include "ra.h"
51 #include "cfglayout.h"
52 #include "intl.h"
53 #include "sched-int.h"
54 #include "ggc.h"
55 #include "tree-gimple.h"
58 int code_for_indirect_jump_scratch = CODE_FOR_indirect_jump_scratch;
60 #define MSW (TARGET_LITTLE_ENDIAN ? 1 : 0)
61 #define LSW (TARGET_LITTLE_ENDIAN ? 0 : 1)
63 /* These are some macros to abstract register modes. */
64 #define CONST_OK_FOR_ADD(size) \
65 (TARGET_SHMEDIA ? CONST_OK_FOR_I10 (size) : CONST_OK_FOR_I08 (size))
66 #define GEN_MOV (*(TARGET_SHMEDIA64 ? gen_movdi : gen_movsi))
67 #define GEN_ADD3 (*(TARGET_SHMEDIA64 ? gen_adddi3 : gen_addsi3))
68 #define GEN_SUB3 (*(TARGET_SHMEDIA64 ? gen_subdi3 : gen_subsi3))
70 /* Set to 1 by expand_prologue() when the function is an interrupt handler. */
71 int current_function_interrupt;
73 /* ??? The pragma interrupt support will not work for SH3. */
74 /* This is set by #pragma interrupt and #pragma trapa, and causes gcc to
75 output code for the next function appropriate for an interrupt handler. */
76 int pragma_interrupt;
78 /* This is set by the trap_exit attribute for functions. It specifies
79 a trap number to be used in a trapa instruction at function exit
80 (instead of an rte instruction). */
81 int trap_exit;
83 /* This is used by the sp_switch attribute for functions. It specifies
84 a variable holding the address of the stack the interrupt function
85 should switch to/from at entry/exit. */
86 rtx sp_switch;
88 /* This is set by #pragma trapa, and is similar to the above, except that
89 the compiler doesn't emit code to preserve all registers. */
90 static int pragma_trapa;
92 /* This is set by #pragma nosave_low_regs. This is useful on the SH3,
93 which has a separate set of low regs for User and Supervisor modes.
94 This should only be used for the lowest level of interrupts. Higher levels
95 of interrupts must save the registers in case they themselves are
96 interrupted. */
97 int pragma_nosave_low_regs;
99 /* This is used for communication between TARGET_SETUP_INCOMING_VARARGS and
100 sh_expand_prologue. */
101 int current_function_anonymous_args;
103 /* Global variables for machine-dependent things. */
105 /* Which cpu are we scheduling for. */
106 enum processor_type sh_cpu;
108 /* Definitions used in ready queue reordering for first scheduling pass. */
110 /* Reg weights arrays for modes SFmode and SImode, indexed by insn LUID. */
111 static short *regmode_weight[2];
113 /* Total SFmode and SImode weights of scheduled insns. */
114 static int curr_regmode_pressure[2];
116 /* If true, skip cycles for Q -> R movement. */
117 static int skip_cycles = 0;
119 /* Cached value of can_issue_more. This is cached in sh_variable_issue hook
120 and returned from sh_reorder2. */
121 static short cached_can_issue_more;
123 /* Saved operands from the last compare to use when we generate an scc
124 or bcc insn. */
126 rtx sh_compare_op0;
127 rtx sh_compare_op1;
129 /* Provides the class number of the smallest class containing
130 reg number. */
132 enum reg_class regno_reg_class[FIRST_PSEUDO_REGISTER] =
134 R0_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
135 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
136 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
137 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
138 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
139 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
140 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
141 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
142 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
143 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
144 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
145 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
146 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
147 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
148 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
149 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
150 FP0_REGS,FP_REGS, FP_REGS, FP_REGS,
151 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
152 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
153 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
154 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
155 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
156 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
157 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
158 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
159 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
160 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
161 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
162 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
163 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
164 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
165 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
166 TARGET_REGS, TARGET_REGS, TARGET_REGS, TARGET_REGS,
167 TARGET_REGS, TARGET_REGS, TARGET_REGS, TARGET_REGS,
168 DF_REGS, DF_REGS, DF_REGS, DF_REGS,
169 DF_REGS, DF_REGS, DF_REGS, DF_REGS,
170 NO_REGS, GENERAL_REGS, PR_REGS, T_REGS,
171 MAC_REGS, MAC_REGS, FPUL_REGS, FPSCR_REGS,
172 GENERAL_REGS,
175 char sh_register_names[FIRST_PSEUDO_REGISTER] \
176 [MAX_REGISTER_NAME_LENGTH + 1] = SH_REGISTER_NAMES_INITIALIZER;
178 char sh_additional_register_names[ADDREGNAMES_SIZE] \
179 [MAX_ADDITIONAL_REGISTER_NAME_LENGTH + 1]
180 = SH_ADDITIONAL_REGISTER_NAMES_INITIALIZER;
182 /* Provide reg_class from a letter such as appears in the machine
183 description. *: target independently reserved letter.
184 reg_class_from_letter['e' - 'a'] is set to NO_REGS for TARGET_FMOVD. */
186 enum reg_class reg_class_from_letter[] =
188 /* a */ ALL_REGS, /* b */ TARGET_REGS, /* c */ FPSCR_REGS, /* d */ DF_REGS,
189 /* e */ FP_REGS, /* f */ FP_REGS, /* g **/ NO_REGS, /* h */ NO_REGS,
190 /* i **/ NO_REGS, /* j */ NO_REGS, /* k */ SIBCALL_REGS, /* l */ PR_REGS,
191 /* m **/ NO_REGS, /* n **/ NO_REGS, /* o **/ NO_REGS, /* p **/ NO_REGS,
192 /* q */ NO_REGS, /* r **/ NO_REGS, /* s **/ NO_REGS, /* t */ T_REGS,
193 /* u */ NO_REGS, /* v */ NO_REGS, /* w */ FP0_REGS, /* x */ MAC_REGS,
194 /* y */ FPUL_REGS, /* z */ R0_REGS
197 int assembler_dialect;
199 static bool shmedia_space_reserved_for_target_registers;
201 static void split_branches (rtx);
202 static int branch_dest (rtx);
203 static void force_into (rtx, rtx);
204 static void print_slot (rtx);
205 static rtx add_constant (rtx, enum machine_mode, rtx);
206 static void dump_table (rtx, rtx);
207 static int hi_const (rtx);
208 static int broken_move (rtx);
209 static int mova_p (rtx);
210 static rtx find_barrier (int, rtx, rtx);
211 static int noncall_uses_reg (rtx, rtx, rtx *);
212 static rtx gen_block_redirect (rtx, int, int);
213 static void sh_reorg (void);
214 static void output_stack_adjust (int, rtx, int, HARD_REG_SET *);
215 static rtx frame_insn (rtx);
216 static rtx push (int);
217 static void pop (int);
218 static void push_regs (HARD_REG_SET *, int);
219 static int calc_live_regs (HARD_REG_SET *);
220 static void mark_use (rtx, rtx *);
221 static HOST_WIDE_INT rounded_frame_size (int);
222 static rtx mark_constant_pool_use (rtx);
223 const struct attribute_spec sh_attribute_table[];
224 static tree sh_handle_interrupt_handler_attribute (tree *, tree, tree, int, bool *);
225 static tree sh_handle_sp_switch_attribute (tree *, tree, tree, int, bool *);
226 static tree sh_handle_trap_exit_attribute (tree *, tree, tree, int, bool *);
227 static tree sh_handle_renesas_attribute (tree *, tree, tree, int, bool *);
228 static void sh_output_function_epilogue (FILE *, HOST_WIDE_INT);
229 static void sh_insert_attributes (tree, tree *);
230 static int sh_adjust_cost (rtx, rtx, rtx, int);
231 static int sh_issue_rate (void);
232 static int sh_dfa_new_cycle (FILE *, int, rtx, int, int, int *sort_p);
233 static short find_set_regmode_weight (rtx, enum machine_mode);
234 static short find_insn_regmode_weight (rtx, enum machine_mode);
235 static void find_regmode_weight (int, enum machine_mode);
236 static void sh_md_init_global (FILE *, int, int);
237 static void sh_md_finish_global (FILE *, int);
238 static int rank_for_reorder (const void *, const void *);
239 static void swap_reorder (rtx *, int);
240 static void ready_reorder (rtx *, int);
241 static short high_pressure (enum machine_mode);
242 static int sh_reorder (FILE *, int, rtx *, int *, int);
243 static int sh_reorder2 (FILE *, int, rtx *, int *, int);
244 static void sh_md_init (FILE *, int, int);
245 static int sh_variable_issue (FILE *, int, rtx, int);
247 static bool sh_function_ok_for_sibcall (tree, tree);
249 static bool sh_cannot_modify_jumps_p (void);
250 static int sh_target_reg_class (void);
251 static bool sh_optimize_target_register_callee_saved (bool);
252 static bool sh_ms_bitfield_layout_p (tree);
254 static void sh_init_builtins (void);
255 static void sh_media_init_builtins (void);
256 static rtx sh_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
257 static void sh_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT, tree);
258 static void sh_file_start (void);
259 static int flow_dependent_p (rtx, rtx);
260 static void flow_dependent_p_1 (rtx, rtx, void *);
261 static int shiftcosts (rtx);
262 static int andcosts (rtx);
263 static int addsubcosts (rtx);
264 static int multcosts (rtx);
265 static bool unspec_caller_rtx_p (rtx);
266 static bool sh_cannot_copy_insn_p (rtx);
267 static bool sh_rtx_costs (rtx, int, int, int *);
268 static int sh_address_cost (rtx);
269 static int shmedia_target_regs_stack_space (HARD_REG_SET *);
270 static int shmedia_reserve_space_for_target_registers_p (int, HARD_REG_SET *);
271 static int shmedia_target_regs_stack_adjust (HARD_REG_SET *);
272 static int scavenge_reg (HARD_REG_SET *s);
273 struct save_schedule_s;
274 static struct save_entry_s *sh5_schedule_saves (HARD_REG_SET *,
275 struct save_schedule_s *, int);
277 static rtx sh_struct_value_rtx (tree, int);
278 static bool sh_return_in_memory (tree, tree);
279 static rtx sh_builtin_saveregs (void);
280 static void sh_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode, tree, int *, int);
281 static bool sh_strict_argument_naming (CUMULATIVE_ARGS *);
282 static bool sh_pretend_outgoing_varargs_named (CUMULATIVE_ARGS *);
283 static tree sh_build_builtin_va_list (void);
284 static tree sh_gimplify_va_arg_expr (tree, tree, tree *, tree *);
285 static bool sh_pass_by_reference (CUMULATIVE_ARGS *, enum machine_mode,
286 tree, bool);
287 static bool sh_callee_copies (CUMULATIVE_ARGS *, enum machine_mode,
288 tree, bool);
289 static int sh_dwarf_calling_convention (tree);
292 /* Initialize the GCC target structure. */
293 #undef TARGET_ATTRIBUTE_TABLE
294 #define TARGET_ATTRIBUTE_TABLE sh_attribute_table
296 /* The next two are used for debug info when compiling with -gdwarf. */
297 #undef TARGET_ASM_UNALIGNED_HI_OP
298 #define TARGET_ASM_UNALIGNED_HI_OP "\t.uaword\t"
299 #undef TARGET_ASM_UNALIGNED_SI_OP
300 #define TARGET_ASM_UNALIGNED_SI_OP "\t.ualong\t"
302 /* These are NULLed out on non-SH5 in OVERRIDE_OPTIONS. */
303 #undef TARGET_ASM_UNALIGNED_DI_OP
304 #define TARGET_ASM_UNALIGNED_DI_OP "\t.uaquad\t"
305 #undef TARGET_ASM_ALIGNED_DI_OP
306 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
308 #undef TARGET_ASM_FUNCTION_EPILOGUE
309 #define TARGET_ASM_FUNCTION_EPILOGUE sh_output_function_epilogue
311 #undef TARGET_ASM_OUTPUT_MI_THUNK
312 #define TARGET_ASM_OUTPUT_MI_THUNK sh_output_mi_thunk
314 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
315 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_tree_hwi_hwi_tree_true
317 #undef TARGET_ASM_FILE_START
318 #define TARGET_ASM_FILE_START sh_file_start
319 #undef TARGET_ASM_FILE_START_FILE_DIRECTIVE
320 #define TARGET_ASM_FILE_START_FILE_DIRECTIVE true
322 #undef TARGET_INSERT_ATTRIBUTES
323 #define TARGET_INSERT_ATTRIBUTES sh_insert_attributes
325 #undef TARGET_SCHED_ADJUST_COST
326 #define TARGET_SCHED_ADJUST_COST sh_adjust_cost
328 #undef TARGET_SCHED_ISSUE_RATE
329 #define TARGET_SCHED_ISSUE_RATE sh_issue_rate
331 /* The next 5 hooks have been implemented for reenabling sched1. With the
332 help of these macros we are limiting the movement of insns in sched1 to
333 reduce the register pressure. The overall idea is to keep count of SImode
334 and SFmode regs required by already scheduled insns. When these counts
335 cross some threshold values; give priority to insns that free registers.
336 The insn that frees registers is most likely to be the insn with lowest
337 LUID (original insn order); but such an insn might be there in the stalled
338 queue (Q) instead of the ready queue (R). To solve this, we skip cycles
339 upto a max of 8 cycles so that such insns may move from Q -> R.
341 The description of the hooks are as below:
343 TARGET_SCHED_INIT_GLOBAL: Added a new target hook in the generic
344 scheduler; it is called inside the sched_init function just after
345 find_insn_reg_weights function call. It is used to calculate the SImode
346 and SFmode weights of insns of basic blocks; much similar to what
347 find_insn_reg_weights does.
348 TARGET_SCHED_FINISH_GLOBAL: Corresponding cleanup hook.
350 TARGET_SCHED_DFA_NEW_CYCLE: Skip cycles if high register pressure is
351 indicated by TARGET_SCHED_REORDER2; doing this may move insns from
352 (Q)->(R).
354 TARGET_SCHED_REORDER: If the register pressure for SImode or SFmode is
355 high; reorder the ready queue so that the insn with lowest LUID will be
356 issued next.
358 TARGET_SCHED_REORDER2: If the register pressure is high, indicate to
359 TARGET_SCHED_DFA_NEW_CYCLE to skip cycles.
361 TARGET_SCHED_VARIABLE_ISSUE: Cache the value of can_issue_more so that it
362 can be returned from TARGET_SCHED_REORDER2.
364 TARGET_SCHED_INIT: Reset the register pressure counting variables. */
366 #undef TARGET_SCHED_DFA_NEW_CYCLE
367 #define TARGET_SCHED_DFA_NEW_CYCLE sh_dfa_new_cycle
369 #undef TARGET_SCHED_INIT_GLOBAL
370 #define TARGET_SCHED_INIT_GLOBAL sh_md_init_global
372 #undef TARGET_SCHED_FINISH_GLOBAL
373 #define TARGET_SCHED_FINISH_GLOBAL sh_md_finish_global
375 #undef TARGET_SCHED_VARIABLE_ISSUE
376 #define TARGET_SCHED_VARIABLE_ISSUE sh_variable_issue
378 #undef TARGET_SCHED_REORDER
379 #define TARGET_SCHED_REORDER sh_reorder
381 #undef TARGET_SCHED_REORDER2
382 #define TARGET_SCHED_REORDER2 sh_reorder2
384 #undef TARGET_SCHED_INIT
385 #define TARGET_SCHED_INIT sh_md_init
387 #undef TARGET_CANNOT_MODIFY_JUMPS_P
388 #define TARGET_CANNOT_MODIFY_JUMPS_P sh_cannot_modify_jumps_p
389 #undef TARGET_BRANCH_TARGET_REGISTER_CLASS
390 #define TARGET_BRANCH_TARGET_REGISTER_CLASS sh_target_reg_class
391 #undef TARGET_BRANCH_TARGET_REGISTER_CALLEE_SAVED
392 #define TARGET_BRANCH_TARGET_REGISTER_CALLEE_SAVED \
393 sh_optimize_target_register_callee_saved
395 #undef TARGET_MS_BITFIELD_LAYOUT_P
396 #define TARGET_MS_BITFIELD_LAYOUT_P sh_ms_bitfield_layout_p
398 #undef TARGET_INIT_BUILTINS
399 #define TARGET_INIT_BUILTINS sh_init_builtins
400 #undef TARGET_EXPAND_BUILTIN
401 #define TARGET_EXPAND_BUILTIN sh_expand_builtin
403 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
404 #define TARGET_FUNCTION_OK_FOR_SIBCALL sh_function_ok_for_sibcall
406 #undef TARGET_CANNOT_COPY_INSN_P
407 #define TARGET_CANNOT_COPY_INSN_P sh_cannot_copy_insn_p
408 #undef TARGET_RTX_COSTS
409 #define TARGET_RTX_COSTS sh_rtx_costs
410 #undef TARGET_ADDRESS_COST
411 #define TARGET_ADDRESS_COST sh_address_cost
413 #undef TARGET_MACHINE_DEPENDENT_REORG
414 #define TARGET_MACHINE_DEPENDENT_REORG sh_reorg
416 #ifdef HAVE_AS_TLS
417 #undef TARGET_HAVE_TLS
418 #define TARGET_HAVE_TLS true
419 #endif
421 #undef TARGET_PROMOTE_PROTOTYPES
422 #define TARGET_PROMOTE_PROTOTYPES sh_promote_prototypes
423 #undef TARGET_PROMOTE_FUNCTION_ARGS
424 #define TARGET_PROMOTE_FUNCTION_ARGS sh_promote_prototypes
425 #undef TARGET_PROMOTE_FUNCTION_RETURN
426 #define TARGET_PROMOTE_FUNCTION_RETURN sh_promote_prototypes
428 #undef TARGET_STRUCT_VALUE_RTX
429 #define TARGET_STRUCT_VALUE_RTX sh_struct_value_rtx
430 #undef TARGET_RETURN_IN_MEMORY
431 #define TARGET_RETURN_IN_MEMORY sh_return_in_memory
433 #undef TARGET_EXPAND_BUILTIN_SAVEREGS
434 #define TARGET_EXPAND_BUILTIN_SAVEREGS sh_builtin_saveregs
435 #undef TARGET_SETUP_INCOMING_VARARGS
436 #define TARGET_SETUP_INCOMING_VARARGS sh_setup_incoming_varargs
437 #undef TARGET_STRICT_ARGUMENT_NAMING
438 #define TARGET_STRICT_ARGUMENT_NAMING sh_strict_argument_naming
439 #undef TARGET_PRETEND_OUTGOING_VARARGS_NAMED
440 #define TARGET_PRETEND_OUTGOING_VARARGS_NAMED sh_pretend_outgoing_varargs_named
441 #undef TARGET_MUST_PASS_IN_STACK
442 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
443 #undef TARGET_PASS_BY_REFERENCE
444 #define TARGET_PASS_BY_REFERENCE sh_pass_by_reference
445 #undef TARGET_CALLEE_COPIES
446 #define TARGET_CALLEE_COPIES sh_callee_copies
448 #undef TARGET_BUILD_BUILTIN_VA_LIST
449 #define TARGET_BUILD_BUILTIN_VA_LIST sh_build_builtin_va_list
450 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
451 #define TARGET_GIMPLIFY_VA_ARG_EXPR sh_gimplify_va_arg_expr
453 #undef TARGET_VECTOR_MODE_SUPPORTED_P
454 #define TARGET_VECTOR_MODE_SUPPORTED_P sh_vector_mode_supported_p
456 #undef TARGET_PCH_VALID_P
457 #define TARGET_PCH_VALID_P sh_pch_valid_p
459 #undef TARGET_DWARF_CALLING_CONVENTION
460 #define TARGET_DWARF_CALLING_CONVENTION sh_dwarf_calling_convention
462 /* Return regmode weight for insn. */
463 #define INSN_REGMODE_WEIGHT(INSN, MODE) regmode_weight[((MODE) == SImode) ? 0 : 1][INSN_UID (INSN)]
465 /* Return current register pressure for regmode. */
466 #define CURR_REGMODE_PRESSURE(MODE) curr_regmode_pressure[((MODE) == SImode) ? 0 : 1]
468 #ifdef SYMBIAN
470 #undef TARGET_ENCODE_SECTION_INFO
471 #define TARGET_ENCODE_SECTION_INFO sh_symbian_encode_section_info
472 #undef TARGET_STRIP_NAME_ENCODING
473 #define TARGET_STRIP_NAME_ENCODING sh_symbian_strip_name_encoding
474 #undef TARGET_CXX_IMPORT_EXPORT_CLASS
475 #define TARGET_CXX_IMPORT_EXPORT_CLASS symbian_import_export_class
477 #endif /* SYMBIAN */
479 struct gcc_target targetm = TARGET_INITIALIZER;
481 /* Print the operand address in x to the stream. */
483 void
484 print_operand_address (FILE *stream, rtx x)
486 switch (GET_CODE (x))
488 case REG:
489 case SUBREG:
490 fprintf (stream, "@%s", reg_names[true_regnum (x)]);
491 break;
493 case PLUS:
495 rtx base = XEXP (x, 0);
496 rtx index = XEXP (x, 1);
498 switch (GET_CODE (index))
500 case CONST_INT:
501 fprintf (stream, "@(%d,%s)", (int) INTVAL (index),
502 reg_names[true_regnum (base)]);
503 break;
505 case REG:
506 case SUBREG:
508 int base_num = true_regnum (base);
509 int index_num = true_regnum (index);
511 fprintf (stream, "@(r0,%s)",
512 reg_names[MAX (base_num, index_num)]);
513 break;
516 default:
517 debug_rtx (x);
518 abort ();
521 break;
523 case PRE_DEC:
524 fprintf (stream, "@-%s", reg_names[true_regnum (XEXP (x, 0))]);
525 break;
527 case POST_INC:
528 fprintf (stream, "@%s+", reg_names[true_regnum (XEXP (x, 0))]);
529 break;
531 default:
532 x = mark_constant_pool_use (x);
533 output_addr_const (stream, x);
534 break;
538 /* Print operand x (an rtx) in assembler syntax to file stream
539 according to modifier code.
541 '.' print a .s if insn needs delay slot
542 ',' print LOCAL_LABEL_PREFIX
543 '@' print trap, rte or rts depending upon pragma interruptness
544 '#' output a nop if there is nothing to put in the delay slot
545 ''' print likelihood suffix (/u for unlikely).
546 'O' print a constant without the #
547 'R' print the LSW of a dp value - changes if in little endian
548 'S' print the MSW of a dp value - changes if in little endian
549 'T' print the next word of a dp value - same as 'R' in big endian mode.
550 'M' print an `x' if `m' will print `base,index'.
551 'N' print 'r63' if the operand is (const_int 0).
552 'd' print a V2SF reg as dN instead of fpN.
553 'm' print a pair `base,offset' or `base,index', for LD and ST.
554 'u' prints the lowest 16 bits of CONST_INT, as an unsigned value.
555 'o' output an operator. */
557 void
558 print_operand (FILE *stream, rtx x, int code)
560 switch (code)
562 case '.':
563 if (final_sequence
564 && ! INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))
565 && get_attr_length (XVECEXP (final_sequence, 0, 1)))
566 fprintf (stream, ASSEMBLER_DIALECT ? "/s" : ".s");
567 break;
568 case ',':
569 fprintf (stream, "%s", LOCAL_LABEL_PREFIX);
570 break;
571 case '@':
572 if (trap_exit)
573 fprintf (stream, "trapa #%d", trap_exit);
574 else if (sh_cfun_interrupt_handler_p ())
575 fprintf (stream, "rte");
576 else
577 fprintf (stream, "rts");
578 break;
579 case '#':
580 /* Output a nop if there's nothing in the delay slot. */
581 if (dbr_sequence_length () == 0)
582 fprintf (stream, "\n\tnop");
583 break;
584 case '\'':
586 rtx note = find_reg_note (current_output_insn, REG_BR_PROB, 0);
588 if (note && INTVAL (XEXP (note, 0)) * 2 < REG_BR_PROB_BASE)
589 fputs ("/u", stream);
590 break;
592 case 'O':
593 x = mark_constant_pool_use (x);
594 output_addr_const (stream, x);
595 break;
596 case 'R':
597 fputs (reg_names[REGNO (x) + LSW], (stream));
598 break;
599 case 'S':
600 fputs (reg_names[REGNO (x) + MSW], (stream));
601 break;
602 case 'T':
603 /* Next word of a double. */
604 switch (GET_CODE (x))
606 case REG:
607 fputs (reg_names[REGNO (x) + 1], (stream));
608 break;
609 case MEM:
610 if (GET_CODE (XEXP (x, 0)) != PRE_DEC
611 && GET_CODE (XEXP (x, 0)) != POST_INC)
612 x = adjust_address (x, SImode, 4);
613 print_operand_address (stream, XEXP (x, 0));
614 break;
615 default:
616 break;
618 break;
619 case 'o':
620 switch (GET_CODE (x))
622 case PLUS: fputs ("add", stream); break;
623 case MINUS: fputs ("sub", stream); break;
624 case MULT: fputs ("mul", stream); break;
625 case DIV: fputs ("div", stream); break;
626 case EQ: fputs ("eq", stream); break;
627 case NE: fputs ("ne", stream); break;
628 case GT: case LT: fputs ("gt", stream); break;
629 case GE: case LE: fputs ("ge", stream); break;
630 case GTU: case LTU: fputs ("gtu", stream); break;
631 case GEU: case LEU: fputs ("geu", stream); break;
632 default:
633 break;
635 break;
636 case 'M':
637 if (GET_CODE (x) == MEM
638 && GET_CODE (XEXP (x, 0)) == PLUS
639 && (GET_CODE (XEXP (XEXP (x, 0), 1)) == REG
640 || GET_CODE (XEXP (XEXP (x, 0), 1)) == SUBREG))
641 fputc ('x', stream);
642 break;
644 case 'm':
645 if (GET_CODE (x) != MEM)
646 abort ();
647 x = XEXP (x, 0);
648 switch (GET_CODE (x))
650 case REG:
651 case SUBREG:
652 print_operand (stream, x, 0);
653 fputs (", 0", stream);
654 break;
656 case PLUS:
657 print_operand (stream, XEXP (x, 0), 0);
658 fputs (", ", stream);
659 print_operand (stream, XEXP (x, 1), 0);
660 break;
662 default:
663 abort ();
665 break;
667 case 'd':
668 if (GET_CODE (x) != REG || GET_MODE (x) != V2SFmode)
669 abort ();
671 fprintf ((stream), "d%s", reg_names[REGNO (x)] + 1);
672 break;
674 case 'N':
675 if (x == CONST0_RTX (GET_MODE (x)))
677 fprintf ((stream), "r63");
678 break;
680 goto default_output;
681 case 'u':
682 if (GET_CODE (x) == CONST_INT)
684 fprintf ((stream), "%u", (unsigned) INTVAL (x) & (0x10000 - 1));
685 break;
687 /* Fall through. */
689 default_output:
690 default:
691 switch (GET_CODE (x))
693 /* FIXME: We need this on SHmedia32 because reload generates
694 some sign-extended HI or QI loads into DImode registers
695 but, because Pmode is SImode, the address ends up with a
696 subreg:SI of the DImode register. Maybe reload should be
697 fixed so as to apply alter_subreg to such loads? */
698 case SUBREG:
699 if (SUBREG_BYTE (x) != 0
700 || GET_CODE (SUBREG_REG (x)) != REG)
701 abort ();
703 x = SUBREG_REG (x);
704 /* Fall through. */
706 case REG:
707 if (FP_REGISTER_P (REGNO (x))
708 && GET_MODE (x) == V16SFmode)
709 fprintf ((stream), "mtrx%s", reg_names[REGNO (x)] + 2);
710 else if (FP_REGISTER_P (REGNO (x))
711 && GET_MODE (x) == V4SFmode)
712 fprintf ((stream), "fv%s", reg_names[REGNO (x)] + 2);
713 else if (GET_CODE (x) == REG
714 && GET_MODE (x) == V2SFmode)
715 fprintf ((stream), "fp%s", reg_names[REGNO (x)] + 2);
716 else if (FP_REGISTER_P (REGNO (x))
717 && GET_MODE_SIZE (GET_MODE (x)) > 4)
718 fprintf ((stream), "d%s", reg_names[REGNO (x)] + 1);
719 else
720 fputs (reg_names[REGNO (x)], (stream));
721 break;
723 case MEM:
724 output_address (XEXP (x, 0));
725 break;
727 case CONST:
728 if (TARGET_SHMEDIA
729 && GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
730 && GET_MODE (XEXP (x, 0)) == DImode
731 && GET_CODE (XEXP (XEXP (x, 0), 0)) == TRUNCATE
732 && GET_MODE (XEXP (XEXP (x, 0), 0)) == HImode)
734 rtx val = XEXP (XEXP (XEXP (x, 0), 0), 0);
736 fputc ('(', stream);
737 if (GET_CODE (val) == ASHIFTRT)
739 fputc ('(', stream);
740 if (GET_CODE (XEXP (val, 0)) == CONST)
741 fputc ('(', stream);
742 output_addr_const (stream, XEXP (val, 0));
743 if (GET_CODE (XEXP (val, 0)) == CONST)
744 fputc (')', stream);
745 fputs (" >> ", stream);
746 output_addr_const (stream, XEXP (val, 1));
747 fputc (')', stream);
749 else
751 if (GET_CODE (val) == CONST)
752 fputc ('(', stream);
753 output_addr_const (stream, val);
754 if (GET_CODE (val) == CONST)
755 fputc (')', stream);
757 fputs (" & 65535)", stream);
758 break;
761 /* Fall through. */
762 default:
763 if (TARGET_SH1)
764 fputc ('#', stream);
765 output_addr_const (stream, x);
766 break;
768 break;
772 /* Like force_operand, but guarantees that VALUE ends up in TARGET. */
773 static void
774 force_into (rtx value, rtx target)
776 value = force_operand (value, target);
777 if (! rtx_equal_p (value, target))
778 emit_insn (gen_move_insn (target, value));
781 /* Emit code to perform a block move. Choose the best method.
783 OPERANDS[0] is the destination.
784 OPERANDS[1] is the source.
785 OPERANDS[2] is the size.
786 OPERANDS[3] is the alignment safe to use. */
789 expand_block_move (rtx *operands)
791 int align = INTVAL (operands[3]);
792 int constp = (GET_CODE (operands[2]) == CONST_INT);
793 int bytes = (constp ? INTVAL (operands[2]) : 0);
795 if (! constp)
796 return 0;
798 /* If we could use mov.l to move words and dest is word-aligned, we
799 can use movua.l for loads and still generate a relatively short
800 and efficient sequence. */
801 if (TARGET_SH4A_ARCH && align < 4
802 && MEM_ALIGN (operands[0]) >= 32
803 && can_move_by_pieces (bytes, 32))
805 rtx dest = copy_rtx (operands[0]);
806 rtx src = copy_rtx (operands[1]);
807 /* We could use different pseudos for each copied word, but
808 since movua can only load into r0, it's kind of
809 pointless. */
810 rtx temp = gen_reg_rtx (SImode);
811 rtx src_addr = copy_addr_to_reg (XEXP (src, 0));
812 int copied = 0;
814 while (copied + 4 <= bytes)
816 rtx to = adjust_address (dest, SImode, copied);
817 rtx from = adjust_automodify_address (src, SImode, src_addr, copied);
819 emit_insn (gen_movua (temp, from));
820 emit_move_insn (src_addr, plus_constant (src_addr, 4));
821 emit_move_insn (to, temp);
822 copied += 4;
825 if (copied < bytes)
826 move_by_pieces (adjust_address (dest, BLKmode, copied),
827 adjust_automodify_address (src, BLKmode,
828 src_addr, copied),
829 bytes - copied, align, 0);
831 return 1;
834 /* If it isn't a constant number of bytes, or if it doesn't have 4 byte
835 alignment, or if it isn't a multiple of 4 bytes, then fail. */
836 if (align < 4 || (bytes % 4 != 0))
837 return 0;
839 if (TARGET_HARD_SH4)
841 if (bytes < 12)
842 return 0;
843 else if (bytes == 12)
845 tree entry_name;
846 rtx sym;
847 rtx func_addr_rtx;
848 rtx r4 = gen_rtx_REG (SImode, 4);
849 rtx r5 = gen_rtx_REG (SImode, 5);
851 entry_name = get_identifier ("__movmemSI12_i4");
853 sym = function_symbol (IDENTIFIER_POINTER (entry_name));
854 func_addr_rtx = copy_to_mode_reg (Pmode, sym);
855 force_into (XEXP (operands[0], 0), r4);
856 force_into (XEXP (operands[1], 0), r5);
857 emit_insn (gen_block_move_real_i4 (func_addr_rtx));
858 return 1;
860 else if (! TARGET_SMALLCODE)
862 tree entry_name;
863 rtx sym;
864 rtx func_addr_rtx;
865 int dwords;
866 rtx r4 = gen_rtx_REG (SImode, 4);
867 rtx r5 = gen_rtx_REG (SImode, 5);
868 rtx r6 = gen_rtx_REG (SImode, 6);
870 entry_name = get_identifier (bytes & 4
871 ? "__movmem_i4_odd"
872 : "__movmem_i4_even");
873 sym = function_symbol (IDENTIFIER_POINTER (entry_name));
874 func_addr_rtx = copy_to_mode_reg (Pmode, sym);
875 force_into (XEXP (operands[0], 0), r4);
876 force_into (XEXP (operands[1], 0), r5);
878 dwords = bytes >> 3;
879 emit_insn (gen_move_insn (r6, GEN_INT (dwords - 1)));
880 emit_insn (gen_block_lump_real_i4 (func_addr_rtx));
881 return 1;
883 else
884 return 0;
886 if (bytes < 64)
888 char entry[30];
889 tree entry_name;
890 rtx sym;
891 rtx func_addr_rtx;
892 rtx r4 = gen_rtx_REG (SImode, 4);
893 rtx r5 = gen_rtx_REG (SImode, 5);
895 sprintf (entry, "__movmemSI%d", bytes);
896 entry_name = get_identifier (entry);
897 sym = function_symbol (IDENTIFIER_POINTER (entry_name));
898 func_addr_rtx = copy_to_mode_reg (Pmode, sym);
899 force_into (XEXP (operands[0], 0), r4);
900 force_into (XEXP (operands[1], 0), r5);
901 emit_insn (gen_block_move_real (func_addr_rtx));
902 return 1;
905 /* This is the same number of bytes as a memcpy call, but to a different
906 less common function name, so this will occasionally use more space. */
907 if (! TARGET_SMALLCODE)
909 tree entry_name;
910 rtx sym;
911 rtx func_addr_rtx;
912 int final_switch, while_loop;
913 rtx r4 = gen_rtx_REG (SImode, 4);
914 rtx r5 = gen_rtx_REG (SImode, 5);
915 rtx r6 = gen_rtx_REG (SImode, 6);
917 entry_name = get_identifier ("__movmem");
918 sym = function_symbol (IDENTIFIER_POINTER (entry_name));
919 func_addr_rtx = copy_to_mode_reg (Pmode, sym);
920 force_into (XEXP (operands[0], 0), r4);
921 force_into (XEXP (operands[1], 0), r5);
923 /* r6 controls the size of the move. 16 is decremented from it
924 for each 64 bytes moved. Then the negative bit left over is used
925 as an index into a list of move instructions. e.g., a 72 byte move
926 would be set up with size(r6) = 14, for one iteration through the
927 big while loop, and a switch of -2 for the last part. */
929 final_switch = 16 - ((bytes / 4) % 16);
930 while_loop = ((bytes / 4) / 16 - 1) * 16;
931 emit_insn (gen_move_insn (r6, GEN_INT (while_loop + final_switch)));
932 emit_insn (gen_block_lump_real (func_addr_rtx));
933 return 1;
936 return 0;
939 /* Prepare operands for a move define_expand; specifically, one of the
940 operands must be in a register. */
943 prepare_move_operands (rtx operands[], enum machine_mode mode)
945 if ((mode == SImode || mode == DImode)
946 && flag_pic
947 && ! ((mode == Pmode || mode == ptr_mode)
948 && tls_symbolic_operand (operands[1], Pmode) != 0))
950 rtx temp;
951 if (SYMBOLIC_CONST_P (operands[1]))
953 if (GET_CODE (operands[0]) == MEM)
954 operands[1] = force_reg (Pmode, operands[1]);
955 else if (TARGET_SHMEDIA
956 && GET_CODE (operands[1]) == LABEL_REF
957 && target_reg_operand (operands[0], mode))
958 /* It's ok. */;
959 else
961 temp = no_new_pseudos ? operands[0] : gen_reg_rtx (Pmode);
962 operands[1] = legitimize_pic_address (operands[1], mode, temp);
965 else if (GET_CODE (operands[1]) == CONST
966 && GET_CODE (XEXP (operands[1], 0)) == PLUS
967 && SYMBOLIC_CONST_P (XEXP (XEXP (operands[1], 0), 0)))
969 temp = no_new_pseudos ? operands[0] : gen_reg_rtx (Pmode);
970 temp = legitimize_pic_address (XEXP (XEXP (operands[1], 0), 0),
971 mode, temp);
972 operands[1] = expand_binop (mode, add_optab, temp,
973 XEXP (XEXP (operands[1], 0), 1),
974 no_new_pseudos ? temp
975 : gen_reg_rtx (Pmode),
976 0, OPTAB_LIB_WIDEN);
980 if (! reload_in_progress && ! reload_completed)
982 /* Copy the source to a register if both operands aren't registers. */
983 if (! register_operand (operands[0], mode)
984 && ! sh_register_operand (operands[1], mode))
985 operands[1] = copy_to_mode_reg (mode, operands[1]);
987 if (GET_CODE (operands[0]) == MEM && ! memory_operand (operands[0], mode))
989 /* This is like change_address_1 (operands[0], mode, 0, 1) ,
990 except that we can't use that function because it is static. */
991 rtx new = change_address (operands[0], mode, 0);
992 MEM_COPY_ATTRIBUTES (new, operands[0]);
993 operands[0] = new;
996 /* This case can happen while generating code to move the result
997 of a library call to the target. Reject `st r0,@(rX,rY)' because
998 reload will fail to find a spill register for rX, since r0 is already
999 being used for the source. */
1000 else if (refers_to_regno_p (R0_REG, R0_REG + 1, operands[1], (rtx *)0)
1001 && GET_CODE (operands[0]) == MEM
1002 && GET_CODE (XEXP (operands[0], 0)) == PLUS
1003 && GET_CODE (XEXP (XEXP (operands[0], 0), 1)) == REG)
1004 operands[1] = copy_to_mode_reg (mode, operands[1]);
1007 if (mode == Pmode || mode == ptr_mode)
1009 rtx op0, op1;
1010 enum tls_model tls_kind;
1012 op0 = operands[0];
1013 op1 = operands[1];
1014 if ((tls_kind = tls_symbolic_operand (op1, Pmode)))
1016 rtx tga_op1, tga_ret, tmp, tmp2;
1018 switch (tls_kind)
1020 case TLS_MODEL_GLOBAL_DYNAMIC:
1021 tga_ret = gen_rtx_REG (Pmode, R0_REG);
1022 emit_call_insn (gen_tls_global_dynamic (tga_ret, op1));
1023 op1 = tga_ret;
1024 break;
1026 case TLS_MODEL_LOCAL_DYNAMIC:
1027 tga_ret = gen_rtx_REG (Pmode, R0_REG);
1028 emit_call_insn (gen_tls_local_dynamic (tga_ret, op1));
1030 tmp = gen_reg_rtx (Pmode);
1031 emit_move_insn (tmp, tga_ret);
1033 if (register_operand (op0, Pmode))
1034 tmp2 = op0;
1035 else
1036 tmp2 = gen_reg_rtx (Pmode);
1038 emit_insn (gen_symDTPOFF2reg (tmp2, op1, tmp));
1039 op1 = tmp2;
1040 break;
1042 case TLS_MODEL_INITIAL_EXEC:
1043 if (! flag_pic)
1045 /* Don't schedule insns for getting GOT address when
1046 the first scheduling is enabled, to avoid spill
1047 failures for R0. */
1048 if (flag_schedule_insns)
1049 emit_insn (gen_blockage ());
1050 emit_insn (gen_GOTaddr2picreg ());
1051 emit_insn (gen_rtx_USE (VOIDmode, gen_rtx_REG (SImode,
1052 PIC_REG)));
1053 if (flag_schedule_insns)
1054 emit_insn (gen_blockage ());
1056 tga_op1 = no_new_pseudos ? op0 : gen_reg_rtx (Pmode);
1057 tmp = gen_sym2GOTTPOFF (op1);
1058 emit_insn (gen_tls_initial_exec (tga_op1, tmp));
1059 op1 = tga_op1;
1060 break;
1062 case TLS_MODEL_LOCAL_EXEC:
1063 tmp2 = gen_reg_rtx (Pmode);
1064 emit_insn (gen_load_gbr (tmp2));
1065 tmp = gen_reg_rtx (Pmode);
1066 emit_insn (gen_symTPOFF2reg (tmp, op1));
1068 if (register_operand (op0, Pmode))
1069 op1 = op0;
1070 else
1071 op1 = gen_reg_rtx (Pmode);
1073 emit_insn (gen_addsi3 (op1, tmp, tmp2));
1074 break;
1076 default:
1077 abort ();
1079 operands[1] = op1;
1083 return 0;
1086 /* Prepare the operands for an scc instruction; make sure that the
1087 compare has been done. */
1089 prepare_scc_operands (enum rtx_code code)
1091 rtx t_reg = gen_rtx_REG (SImode, T_REG);
1092 enum rtx_code oldcode = code;
1093 enum machine_mode mode;
1095 /* First need a compare insn. */
1096 switch (code)
1098 case NE:
1099 /* It isn't possible to handle this case. */
1100 abort ();
1101 case LT:
1102 code = GT;
1103 break;
1104 case LE:
1105 code = GE;
1106 break;
1107 case LTU:
1108 code = GTU;
1109 break;
1110 case LEU:
1111 code = GEU;
1112 break;
1113 default:
1114 break;
1116 if (code != oldcode)
1118 rtx tmp = sh_compare_op0;
1119 sh_compare_op0 = sh_compare_op1;
1120 sh_compare_op1 = tmp;
1123 mode = GET_MODE (sh_compare_op0);
1124 if (mode == VOIDmode)
1125 mode = GET_MODE (sh_compare_op1);
1127 sh_compare_op0 = force_reg (mode, sh_compare_op0);
1128 if ((code != EQ && code != NE
1129 && (sh_compare_op1 != const0_rtx
1130 || code == GTU || code == GEU || code == LTU || code == LEU))
1131 || (mode == DImode && sh_compare_op1 != const0_rtx)
1132 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
1133 sh_compare_op1 = force_reg (mode, sh_compare_op1);
1135 if ((TARGET_SH4 || TARGET_SH2A) && GET_MODE_CLASS (mode) == MODE_FLOAT)
1136 (mode == SFmode ? emit_sf_insn : emit_df_insn)
1137 (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2,
1138 gen_rtx_SET (VOIDmode, t_reg,
1139 gen_rtx_fmt_ee (code, SImode,
1140 sh_compare_op0, sh_compare_op1)),
1141 gen_rtx_USE (VOIDmode, get_fpscr_rtx ()))));
1142 else
1143 emit_insn (gen_rtx_SET (VOIDmode, t_reg,
1144 gen_rtx_fmt_ee (code, SImode,
1145 sh_compare_op0, sh_compare_op1)));
1147 return t_reg;
1150 /* Called from the md file, set up the operands of a compare instruction. */
1152 void
1153 from_compare (rtx *operands, int code)
1155 enum machine_mode mode = GET_MODE (sh_compare_op0);
1156 rtx insn;
1157 if (mode == VOIDmode)
1158 mode = GET_MODE (sh_compare_op1);
1159 if (code != EQ
1160 || mode == DImode
1161 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
1163 /* Force args into regs, since we can't use constants here. */
1164 sh_compare_op0 = force_reg (mode, sh_compare_op0);
1165 if (sh_compare_op1 != const0_rtx
1166 || code == GTU || code == GEU
1167 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
1168 sh_compare_op1 = force_reg (mode, sh_compare_op1);
1170 if (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT && code == GE)
1172 from_compare (operands, GT);
1173 insn = gen_ieee_ccmpeqsf_t (sh_compare_op0, sh_compare_op1);
1175 else
1176 insn = gen_rtx_SET (VOIDmode,
1177 gen_rtx_REG (SImode, T_REG),
1178 gen_rtx_fmt_ee (code, SImode,
1179 sh_compare_op0, sh_compare_op1));
1180 if ((TARGET_SH4 || TARGET_SH2A) && GET_MODE_CLASS (mode) == MODE_FLOAT)
1182 insn = gen_rtx_PARALLEL (VOIDmode,
1183 gen_rtvec (2, insn,
1184 gen_rtx_USE (VOIDmode, get_fpscr_rtx ())));
1185 (mode == SFmode ? emit_sf_insn : emit_df_insn) (insn);
1187 else
1188 emit_insn (insn);
1191 /* Functions to output assembly code. */
1193 /* Return a sequence of instructions to perform DI or DF move.
1195 Since the SH cannot move a DI or DF in one instruction, we have
1196 to take care when we see overlapping source and dest registers. */
1198 const char *
1199 output_movedouble (rtx insn ATTRIBUTE_UNUSED, rtx operands[],
1200 enum machine_mode mode)
1202 rtx dst = operands[0];
1203 rtx src = operands[1];
1205 if (GET_CODE (dst) == MEM
1206 && GET_CODE (XEXP (dst, 0)) == PRE_DEC)
1207 return "mov.l %T1,%0\n\tmov.l %1,%0";
1209 if (register_operand (dst, mode)
1210 && register_operand (src, mode))
1212 if (REGNO (src) == MACH_REG)
1213 return "sts mach,%S0\n\tsts macl,%R0";
1215 /* When mov.d r1,r2 do r2->r3 then r1->r2;
1216 when mov.d r1,r0 do r1->r0 then r2->r1. */
1218 if (REGNO (src) + 1 == REGNO (dst))
1219 return "mov %T1,%T0\n\tmov %1,%0";
1220 else
1221 return "mov %1,%0\n\tmov %T1,%T0";
1223 else if (GET_CODE (src) == CONST_INT)
1225 if (INTVAL (src) < 0)
1226 output_asm_insn ("mov #-1,%S0", operands);
1227 else
1228 output_asm_insn ("mov #0,%S0", operands);
1230 return "mov %1,%R0";
1232 else if (GET_CODE (src) == MEM)
1234 int ptrreg = -1;
1235 int dreg = REGNO (dst);
1236 rtx inside = XEXP (src, 0);
1238 if (GET_CODE (inside) == REG)
1239 ptrreg = REGNO (inside);
1240 else if (GET_CODE (inside) == SUBREG)
1241 ptrreg = subreg_regno (inside);
1242 else if (GET_CODE (inside) == PLUS)
1244 ptrreg = REGNO (XEXP (inside, 0));
1245 /* ??? A r0+REG address shouldn't be possible here, because it isn't
1246 an offsettable address. Unfortunately, offsettable addresses use
1247 QImode to check the offset, and a QImode offsettable address
1248 requires r0 for the other operand, which is not currently
1249 supported, so we can't use the 'o' constraint.
1250 Thus we must check for and handle r0+REG addresses here.
1251 We punt for now, since this is likely very rare. */
1252 if (GET_CODE (XEXP (inside, 1)) == REG)
1253 abort ();
1255 else if (GET_CODE (inside) == LABEL_REF)
1256 return "mov.l %1,%0\n\tmov.l %1+4,%T0";
1257 else if (GET_CODE (inside) == POST_INC)
1258 return "mov.l %1,%0\n\tmov.l %1,%T0";
1259 else
1260 abort ();
1262 /* Work out the safe way to copy. Copy into the second half first. */
1263 if (dreg == ptrreg)
1264 return "mov.l %T1,%T0\n\tmov.l %1,%0";
1267 return "mov.l %1,%0\n\tmov.l %T1,%T0";
1270 /* Print an instruction which would have gone into a delay slot after
1271 another instruction, but couldn't because the other instruction expanded
1272 into a sequence where putting the slot insn at the end wouldn't work. */
1274 static void
1275 print_slot (rtx insn)
1277 final_scan_insn (XVECEXP (insn, 0, 1), asm_out_file, optimize, 0, 1, NULL);
1279 INSN_DELETED_P (XVECEXP (insn, 0, 1)) = 1;
1282 const char *
1283 output_far_jump (rtx insn, rtx op)
1285 struct { rtx lab, reg, op; } this;
1286 rtx braf_base_lab = NULL_RTX;
1287 const char *jump;
1288 int far;
1289 int offset = branch_dest (insn) - INSN_ADDRESSES (INSN_UID (insn));
1290 rtx prev;
1292 this.lab = gen_label_rtx ();
1294 if (TARGET_SH2
1295 && offset >= -32764
1296 && offset - get_attr_length (insn) <= 32766)
1298 far = 0;
1299 jump = "mov.w %O0,%1; braf %1";
1301 else
1303 far = 1;
1304 if (flag_pic)
1306 if (TARGET_SH2)
1307 jump = "mov.l %O0,%1; braf %1";
1308 else
1309 jump = "mov.l r0,@-r15; mova %O0,r0; mov.l @r0,%1; add r0,%1; mov.l @r15+,r0; jmp @%1";
1311 else
1312 jump = "mov.l %O0,%1; jmp @%1";
1314 /* If we have a scratch register available, use it. */
1315 if (GET_CODE ((prev = prev_nonnote_insn (insn))) == INSN
1316 && INSN_CODE (prev) == CODE_FOR_indirect_jump_scratch)
1318 this.reg = SET_DEST (XVECEXP (PATTERN (prev), 0, 0));
1319 if (REGNO (this.reg) == R0_REG && flag_pic && ! TARGET_SH2)
1320 jump = "mov.l r1,@-r15; mova %O0,r0; mov.l @r0,r1; add r1,r0; mov.l @r15+,r1; jmp @%1";
1321 output_asm_insn (jump, &this.lab);
1322 if (dbr_sequence_length ())
1323 print_slot (final_sequence);
1324 else
1325 output_asm_insn ("nop", 0);
1327 else
1329 /* Output the delay slot insn first if any. */
1330 if (dbr_sequence_length ())
1331 print_slot (final_sequence);
1333 this.reg = gen_rtx_REG (SImode, 13);
1334 /* We must keep the stack aligned to 8-byte boundaries on SH5.
1335 Fortunately, MACL is fixed and call-clobbered, and we never
1336 need its value across jumps, so save r13 in it instead of in
1337 the stack. */
1338 if (TARGET_SH5)
1339 output_asm_insn ("lds r13, macl", 0);
1340 else
1341 output_asm_insn ("mov.l r13,@-r15", 0);
1342 output_asm_insn (jump, &this.lab);
1343 if (TARGET_SH5)
1344 output_asm_insn ("sts macl, r13", 0);
1345 else
1346 output_asm_insn ("mov.l @r15+,r13", 0);
1348 if (far && flag_pic && TARGET_SH2)
1350 braf_base_lab = gen_label_rtx ();
1351 (*targetm.asm_out.internal_label) (asm_out_file, "L",
1352 CODE_LABEL_NUMBER (braf_base_lab));
1354 if (far)
1355 output_asm_insn (".align 2", 0);
1356 (*targetm.asm_out.internal_label) (asm_out_file, "L", CODE_LABEL_NUMBER (this.lab));
1357 this.op = op;
1358 if (far && flag_pic)
1360 if (TARGET_SH2)
1361 this.lab = braf_base_lab;
1362 output_asm_insn (".long %O2-%O0", &this.lab);
1364 else
1365 output_asm_insn (far ? ".long %O2" : ".word %O2-%O0", &this.lab);
1366 return "";
1369 /* Local label counter, used for constants in the pool and inside
1370 pattern branches. */
1372 static int lf = 100;
1374 /* Output code for ordinary branches. */
1376 const char *
1377 output_branch (int logic, rtx insn, rtx *operands)
1379 switch (get_attr_length (insn))
1381 case 6:
1382 /* This can happen if filling the delay slot has caused a forward
1383 branch to exceed its range (we could reverse it, but only
1384 when we know we won't overextend other branches; this should
1385 best be handled by relaxation).
1386 It can also happen when other condbranches hoist delay slot insn
1387 from their destination, thus leading to code size increase.
1388 But the branch will still be in the range -4092..+4098 bytes. */
1390 if (! TARGET_RELAX)
1392 int label = lf++;
1393 /* The call to print_slot will clobber the operands. */
1394 rtx op0 = operands[0];
1396 /* If the instruction in the delay slot is annulled (true), then
1397 there is no delay slot where we can put it now. The only safe
1398 place for it is after the label. final will do that by default. */
1400 if (final_sequence
1401 && ! INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))
1402 && get_attr_length (XVECEXP (final_sequence, 0, 1)))
1404 asm_fprintf (asm_out_file, "\tb%s%ss\t%LLF%d\n", logic ? "f" : "t",
1405 ASSEMBLER_DIALECT ? "/" : ".", label);
1406 print_slot (final_sequence);
1408 else
1409 asm_fprintf (asm_out_file, "\tb%s\t%LLF%d\n", logic ? "f" : "t", label);
1411 output_asm_insn ("bra\t%l0", &op0);
1412 fprintf (asm_out_file, "\tnop\n");
1413 (*targetm.asm_out.internal_label) (asm_out_file, "LF", label);
1415 return "";
1417 /* When relaxing, handle this like a short branch. The linker
1418 will fix it up if it still doesn't fit after relaxation. */
1419 case 2:
1420 return logic ? "bt%.\t%l0" : "bf%.\t%l0";
1422 /* These are for SH2e, in which we have to account for the
1423 extra nop because of the hardware bug in annulled branches. */
1424 case 8:
1425 if (! TARGET_RELAX)
1427 int label = lf++;
1429 if (final_sequence
1430 && INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0)))
1431 abort ();
1432 asm_fprintf (asm_out_file, "b%s%ss\t%LLF%d\n",
1433 logic ? "f" : "t",
1434 ASSEMBLER_DIALECT ? "/" : ".", label);
1435 fprintf (asm_out_file, "\tnop\n");
1436 output_asm_insn ("bra\t%l0", operands);
1437 fprintf (asm_out_file, "\tnop\n");
1438 (*targetm.asm_out.internal_label) (asm_out_file, "LF", label);
1440 return "";
1442 /* When relaxing, fall through. */
1443 case 4:
1445 char buffer[10];
1447 sprintf (buffer, "b%s%ss\t%%l0",
1448 logic ? "t" : "f",
1449 ASSEMBLER_DIALECT ? "/" : ".");
1450 output_asm_insn (buffer, &operands[0]);
1451 return "nop";
1454 default:
1455 /* There should be no longer branches now - that would
1456 indicate that something has destroyed the branches set
1457 up in machine_dependent_reorg. */
1458 abort ();
1462 const char *
1463 output_branchy_insn (enum rtx_code code, const char *template,
1464 rtx insn, rtx *operands)
1466 rtx next_insn = NEXT_INSN (insn);
1468 if (next_insn && GET_CODE (next_insn) == JUMP_INSN && condjump_p (next_insn))
1470 rtx src = SET_SRC (PATTERN (next_insn));
1471 if (GET_CODE (src) == IF_THEN_ELSE && GET_CODE (XEXP (src, 0)) != code)
1473 /* Following branch not taken */
1474 operands[9] = gen_label_rtx ();
1475 emit_label_after (operands[9], next_insn);
1476 INSN_ADDRESSES_NEW (operands[9],
1477 INSN_ADDRESSES (INSN_UID (next_insn))
1478 + get_attr_length (next_insn));
1479 return template;
1481 else
1483 int offset = (branch_dest (next_insn)
1484 - INSN_ADDRESSES (INSN_UID (next_insn)) + 4);
1485 if (offset >= -252 && offset <= 258)
1487 if (GET_CODE (src) == IF_THEN_ELSE)
1488 /* branch_true */
1489 src = XEXP (src, 1);
1490 operands[9] = src;
1491 return template;
1495 operands[9] = gen_label_rtx ();
1496 emit_label_after (operands[9], insn);
1497 INSN_ADDRESSES_NEW (operands[9],
1498 INSN_ADDRESSES (INSN_UID (insn))
1499 + get_attr_length (insn));
1500 return template;
1503 const char *
1504 output_ieee_ccmpeq (rtx insn, rtx *operands)
1506 return output_branchy_insn (NE, "bt\t%l9\\;fcmp/eq\t%1,%0", insn, operands);
1509 /* Output the start of the assembler file. */
1511 static void
1512 sh_file_start (void)
1514 default_file_start ();
1516 #ifdef SYMBIAN
1517 /* Declare the .directive section before it is used. */
1518 fputs ("\t.section .directive, \"SM\", @progbits, 1\n", asm_out_file);
1519 fputs ("\t.asciz \"#<SYMEDIT>#\\n\"\n", asm_out_file);
1520 #endif
1522 if (TARGET_ELF)
1523 /* We need to show the text section with the proper
1524 attributes as in TEXT_SECTION_ASM_OP, before dwarf2out
1525 emits it without attributes in TEXT_SECTION_ASM_OP, else GAS
1526 will complain. We can teach GAS specifically about the
1527 default attributes for our choice of text section, but
1528 then we would have to change GAS again if/when we change
1529 the text section name. */
1530 fprintf (asm_out_file, "%s\n", TEXT_SECTION_ASM_OP);
1531 else
1532 /* Switch to the data section so that the coffsem symbol
1533 isn't in the text section. */
1534 data_section ();
1536 if (TARGET_LITTLE_ENDIAN)
1537 fputs ("\t.little\n", asm_out_file);
1539 if (!TARGET_ELF)
1541 if (TARGET_SHCOMPACT)
1542 fputs ("\t.mode\tSHcompact\n", asm_out_file);
1543 else if (TARGET_SHMEDIA)
1544 fprintf (asm_out_file, "\t.mode\tSHmedia\n\t.abi\t%i\n",
1545 TARGET_SHMEDIA64 ? 64 : 32);
1549 /* Check if PAT includes UNSPEC_CALLER unspec pattern. */
1551 static bool
1552 unspec_caller_rtx_p (rtx pat)
1554 switch (GET_CODE (pat))
1556 case CONST:
1557 return unspec_caller_rtx_p (XEXP (pat, 0));
1558 case PLUS:
1559 case MINUS:
1560 if (unspec_caller_rtx_p (XEXP (pat, 0)))
1561 return true;
1562 return unspec_caller_rtx_p (XEXP (pat, 1));
1563 case UNSPEC:
1564 if (XINT (pat, 1) == UNSPEC_CALLER)
1565 return true;
1566 default:
1567 break;
1570 return false;
1573 /* Indicate that INSN cannot be duplicated. This is true for insn
1574 that generates an unique label. */
1576 static bool
1577 sh_cannot_copy_insn_p (rtx insn)
1579 rtx pat;
1581 if (!reload_completed || !flag_pic)
1582 return false;
1584 if (GET_CODE (insn) != INSN)
1585 return false;
1586 if (asm_noperands (insn) >= 0)
1587 return false;
1589 pat = PATTERN (insn);
1590 if (GET_CODE (pat) != SET)
1591 return false;
1592 pat = SET_SRC (pat);
1594 if (unspec_caller_rtx_p (pat))
1595 return true;
1597 return false;
1600 /* Actual number of instructions used to make a shift by N. */
1601 static const char ashiftrt_insns[] =
1602 { 0,1,2,3,4,5,8,8,8,8,8,8,8,8,8,8,2,3,4,5,8,8,8,8,8,8,8,8,8,8,8,2};
1604 /* Left shift and logical right shift are the same. */
1605 static const char shift_insns[] =
1606 { 0,1,1,2,2,3,3,4,1,2,2,3,3,4,3,3,1,2,2,3,3,4,3,3,2,3,3,4,4,4,3,3};
1608 /* Individual shift amounts needed to get the above length sequences.
1609 One bit right shifts clobber the T bit, so when possible, put one bit
1610 shifts in the middle of the sequence, so the ends are eligible for
1611 branch delay slots. */
1612 static const short shift_amounts[32][5] = {
1613 {0}, {1}, {2}, {2, 1},
1614 {2, 2}, {2, 1, 2}, {2, 2, 2}, {2, 2, 1, 2},
1615 {8}, {8, 1}, {8, 2}, {8, 1, 2},
1616 {8, 2, 2}, {8, 2, 1, 2}, {8, -2, 8}, {8, -1, 8},
1617 {16}, {16, 1}, {16, 2}, {16, 1, 2},
1618 {16, 2, 2}, {16, 2, 1, 2}, {16, -2, 8}, {16, -1, 8},
1619 {16, 8}, {16, 1, 8}, {16, 8, 2}, {16, 8, 1, 2},
1620 {16, 8, 2, 2}, {16, -1, -2, 16}, {16, -2, 16}, {16, -1, 16}};
1622 /* Likewise, but for shift amounts < 16, up to three highmost bits
1623 might be clobbered. This is typically used when combined with some
1624 kind of sign or zero extension. */
1626 static const char ext_shift_insns[] =
1627 { 0,1,1,2,2,3,2,2,1,2,2,3,3,3,2,2,1,2,2,3,3,4,3,3,2,3,3,4,4,4,3,3};
1629 static const short ext_shift_amounts[32][4] = {
1630 {0}, {1}, {2}, {2, 1},
1631 {2, 2}, {2, 1, 2}, {8, -2}, {8, -1},
1632 {8}, {8, 1}, {8, 2}, {8, 1, 2},
1633 {8, 2, 2}, {16, -2, -1}, {16, -2}, {16, -1},
1634 {16}, {16, 1}, {16, 2}, {16, 1, 2},
1635 {16, 2, 2}, {16, 2, 1, 2}, {16, -2, 8}, {16, -1, 8},
1636 {16, 8}, {16, 1, 8}, {16, 8, 2}, {16, 8, 1, 2},
1637 {16, 8, 2, 2}, {16, -1, -2, 16}, {16, -2, 16}, {16, -1, 16}};
1639 /* Assuming we have a value that has been sign-extended by at least one bit,
1640 can we use the ext_shift_amounts with the last shift turned to an arithmetic shift
1641 to shift it by N without data loss, and quicker than by other means? */
1642 #define EXT_SHIFT_SIGNED(n) (((n) | 8) == 15)
1644 /* This is used in length attributes in sh.md to help compute the length
1645 of arbitrary constant shift instructions. */
1648 shift_insns_rtx (rtx insn)
1650 rtx set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
1651 int shift_count = INTVAL (XEXP (set_src, 1));
1652 enum rtx_code shift_code = GET_CODE (set_src);
1654 switch (shift_code)
1656 case ASHIFTRT:
1657 return ashiftrt_insns[shift_count];
1658 case LSHIFTRT:
1659 case ASHIFT:
1660 return shift_insns[shift_count];
1661 default:
1662 abort ();
1666 /* Return the cost of a shift. */
1668 static inline int
1669 shiftcosts (rtx x)
1671 int value;
1673 if (TARGET_SHMEDIA)
1674 return 1;
1676 if (GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD)
1678 if (GET_MODE (x) == DImode
1679 && GET_CODE (XEXP (x, 1)) == CONST_INT
1680 && INTVAL (XEXP (x, 1)) == 1)
1681 return 2;
1683 /* Everything else is invalid, because there is no pattern for it. */
1684 return 10000;
1686 /* If shift by a non constant, then this will be expensive. */
1687 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
1688 return SH_DYNAMIC_SHIFT_COST;
1690 value = INTVAL (XEXP (x, 1));
1692 /* Otherwise, return the true cost in instructions. */
1693 if (GET_CODE (x) == ASHIFTRT)
1695 int cost = ashiftrt_insns[value];
1696 /* If SH3, then we put the constant in a reg and use shad. */
1697 if (cost > 1 + SH_DYNAMIC_SHIFT_COST)
1698 cost = 1 + SH_DYNAMIC_SHIFT_COST;
1699 return cost;
1701 else
1702 return shift_insns[value];
1705 /* Return the cost of an AND operation. */
1707 static inline int
1708 andcosts (rtx x)
1710 int i;
1712 /* Anding with a register is a single cycle and instruction. */
1713 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
1714 return 1;
1716 i = INTVAL (XEXP (x, 1));
1718 if (TARGET_SHMEDIA)
1720 if ((GET_CODE (XEXP (x, 1)) == CONST_INT
1721 && CONST_OK_FOR_I16 (INTVAL (XEXP (x, 1))))
1722 || EXTRA_CONSTRAINT_C16 (XEXP (x, 1)))
1723 return 1;
1724 else
1725 return 2;
1728 /* These constants are single cycle extu.[bw] instructions. */
1729 if (i == 0xff || i == 0xffff)
1730 return 1;
1731 /* Constants that can be used in an and immediate instruction in a single
1732 cycle, but this requires r0, so make it a little more expensive. */
1733 if (CONST_OK_FOR_K08 (i))
1734 return 2;
1735 /* Constants that can be loaded with a mov immediate and an and.
1736 This case is probably unnecessary. */
1737 if (CONST_OK_FOR_I08 (i))
1738 return 2;
1739 /* Any other constants requires a 2 cycle pc-relative load plus an and.
1740 This case is probably unnecessary. */
1741 return 3;
1744 /* Return the cost of an addition or a subtraction. */
1746 static inline int
1747 addsubcosts (rtx x)
1749 /* Adding a register is a single cycle insn. */
1750 if (GET_CODE (XEXP (x, 1)) == REG
1751 || GET_CODE (XEXP (x, 1)) == SUBREG)
1752 return 1;
1754 /* Likewise for small constants. */
1755 if (GET_CODE (XEXP (x, 1)) == CONST_INT
1756 && CONST_OK_FOR_ADD (INTVAL (XEXP (x, 1))))
1757 return 1;
1759 if (TARGET_SHMEDIA)
1760 switch (GET_CODE (XEXP (x, 1)))
1762 case CONST:
1763 case LABEL_REF:
1764 case SYMBOL_REF:
1765 return TARGET_SHMEDIA64 ? 5 : 3;
1767 case CONST_INT:
1768 if (CONST_OK_FOR_I16 (INTVAL (XEXP (x, 1))))
1769 return 2;
1770 else if (CONST_OK_FOR_I16 (INTVAL (XEXP (x, 1)) >> 16))
1771 return 3;
1772 else if (CONST_OK_FOR_I16 ((INTVAL (XEXP (x, 1)) >> 16) >> 16))
1773 return 4;
1775 /* Fall through. */
1776 default:
1777 return 5;
1780 /* Any other constant requires a 2 cycle pc-relative load plus an
1781 addition. */
1782 return 3;
1785 /* Return the cost of a multiply. */
1786 static inline int
1787 multcosts (rtx x ATTRIBUTE_UNUSED)
1789 if (TARGET_SHMEDIA)
1790 return 3;
1792 if (TARGET_SH2)
1794 /* We have a mul insn, so we can never take more than the mul and the
1795 read of the mac reg, but count more because of the latency and extra
1796 reg usage. */
1797 if (TARGET_SMALLCODE)
1798 return 2;
1799 return 3;
1802 /* If we're aiming at small code, then just count the number of
1803 insns in a multiply call sequence. */
1804 if (TARGET_SMALLCODE)
1805 return 5;
1807 /* Otherwise count all the insns in the routine we'd be calling too. */
1808 return 20;
1811 /* Compute a (partial) cost for rtx X. Return true if the complete
1812 cost has been computed, and false if subexpressions should be
1813 scanned. In either case, *TOTAL contains the cost result. */
1815 static bool
1816 sh_rtx_costs (rtx x, int code, int outer_code, int *total)
1818 switch (code)
1820 case CONST_INT:
1821 if (TARGET_SHMEDIA)
1823 if (INTVAL (x) == 0)
1824 *total = 0;
1825 else if (outer_code == AND && and_operand ((x), DImode))
1826 *total = 0;
1827 else if ((outer_code == IOR || outer_code == XOR
1828 || outer_code == PLUS)
1829 && CONST_OK_FOR_I10 (INTVAL (x)))
1830 *total = 0;
1831 else if (CONST_OK_FOR_I16 (INTVAL (x)))
1832 *total = COSTS_N_INSNS (outer_code != SET);
1833 else if (CONST_OK_FOR_I16 (INTVAL (x) >> 16))
1834 *total = COSTS_N_INSNS (2);
1835 else if (CONST_OK_FOR_I16 ((INTVAL (x) >> 16) >> 16))
1836 *total = COSTS_N_INSNS (3);
1837 else
1838 *total = COSTS_N_INSNS (4);
1839 return true;
1841 if (CONST_OK_FOR_I08 (INTVAL (x)))
1842 *total = 0;
1843 else if ((outer_code == AND || outer_code == IOR || outer_code == XOR)
1844 && CONST_OK_FOR_K08 (INTVAL (x)))
1845 *total = 1;
1846 else
1847 *total = 8;
1848 return true;
1850 case CONST:
1851 case LABEL_REF:
1852 case SYMBOL_REF:
1853 if (TARGET_SHMEDIA64)
1854 *total = COSTS_N_INSNS (4);
1855 else if (TARGET_SHMEDIA32)
1856 *total = COSTS_N_INSNS (2);
1857 else
1858 *total = 5;
1859 return true;
1861 case CONST_DOUBLE:
1862 if (TARGET_SHMEDIA)
1863 *total = COSTS_N_INSNS (4);
1864 else
1865 *total = 10;
1866 return true;
1868 case PLUS:
1869 *total = COSTS_N_INSNS (addsubcosts (x));
1870 return true;
1872 case AND:
1873 *total = COSTS_N_INSNS (andcosts (x));
1874 return true;
1876 case MULT:
1877 *total = COSTS_N_INSNS (multcosts (x));
1878 return true;
1880 case ASHIFT:
1881 case ASHIFTRT:
1882 case LSHIFTRT:
1883 *total = COSTS_N_INSNS (shiftcosts (x));
1884 return true;
1886 case DIV:
1887 case UDIV:
1888 case MOD:
1889 case UMOD:
1890 *total = COSTS_N_INSNS (20);
1891 return true;
1893 case FLOAT:
1894 case FIX:
1895 *total = 100;
1896 return true;
1898 default:
1899 return false;
1903 /* Compute the cost of an address. For the SH, all valid addresses are
1904 the same cost. Use a slightly higher cost for reg + reg addressing,
1905 since it increases pressure on r0. */
1907 static int
1908 sh_address_cost (rtx X)
1910 return (GET_CODE (X) == PLUS
1911 && ! CONSTANT_P (XEXP (X, 1))
1912 && ! TARGET_SHMEDIA ? 1 : 0);
1915 /* Code to expand a shift. */
1917 void
1918 gen_ashift (int type, int n, rtx reg)
1920 /* Negative values here come from the shift_amounts array. */
1921 if (n < 0)
1923 if (type == ASHIFT)
1924 type = LSHIFTRT;
1925 else
1926 type = ASHIFT;
1927 n = -n;
1930 switch (type)
1932 case ASHIFTRT:
1933 emit_insn (gen_ashrsi3_k (reg, reg, GEN_INT (n)));
1934 break;
1935 case LSHIFTRT:
1936 if (n == 1)
1937 emit_insn (gen_lshrsi3_m (reg, reg, GEN_INT (n)));
1938 else
1939 emit_insn (gen_lshrsi3_k (reg, reg, GEN_INT (n)));
1940 break;
1941 case ASHIFT:
1942 emit_insn (gen_ashlsi3_std (reg, reg, GEN_INT (n)));
1943 break;
1947 /* Same for HImode */
1949 void
1950 gen_ashift_hi (int type, int n, rtx reg)
1952 /* Negative values here come from the shift_amounts array. */
1953 if (n < 0)
1955 if (type == ASHIFT)
1956 type = LSHIFTRT;
1957 else
1958 type = ASHIFT;
1959 n = -n;
1962 switch (type)
1964 case ASHIFTRT:
1965 case LSHIFTRT:
1966 /* We don't have HImode right shift operations because using the
1967 ordinary 32 bit shift instructions for that doesn't generate proper
1968 zero/sign extension.
1969 gen_ashift_hi is only called in contexts where we know that the
1970 sign extension works out correctly. */
1972 int offset = 0;
1973 if (GET_CODE (reg) == SUBREG)
1975 offset = SUBREG_BYTE (reg);
1976 reg = SUBREG_REG (reg);
1978 gen_ashift (type, n, gen_rtx_SUBREG (SImode, reg, offset));
1979 break;
1981 case ASHIFT:
1982 emit_insn (gen_ashlhi3_k (reg, reg, GEN_INT (n)));
1983 break;
1987 /* Output RTL to split a constant shift into its component SH constant
1988 shift instructions. */
1990 void
1991 gen_shifty_op (int code, rtx *operands)
1993 int value = INTVAL (operands[2]);
1994 int max, i;
1996 /* Truncate the shift count in case it is out of bounds. */
1997 value = value & 0x1f;
1999 if (value == 31)
2001 if (code == LSHIFTRT)
2003 emit_insn (gen_rotlsi3_1 (operands[0], operands[0]));
2004 emit_insn (gen_movt (operands[0]));
2005 return;
2007 else if (code == ASHIFT)
2009 /* There is a two instruction sequence for 31 bit left shifts,
2010 but it requires r0. */
2011 if (GET_CODE (operands[0]) == REG && REGNO (operands[0]) == 0)
2013 emit_insn (gen_andsi3 (operands[0], operands[0], const1_rtx));
2014 emit_insn (gen_rotlsi3_31 (operands[0], operands[0]));
2015 return;
2019 else if (value == 0)
2021 /* This can happen when not optimizing. We must output something here
2022 to prevent the compiler from aborting in final.c after the try_split
2023 call. */
2024 emit_insn (gen_nop ());
2025 return;
2028 max = shift_insns[value];
2029 for (i = 0; i < max; i++)
2030 gen_ashift (code, shift_amounts[value][i], operands[0]);
2033 /* Same as above, but optimized for values where the topmost bits don't
2034 matter. */
2036 void
2037 gen_shifty_hi_op (int code, rtx *operands)
2039 int value = INTVAL (operands[2]);
2040 int max, i;
2041 void (*gen_fun) (int, int, rtx);
2043 /* This operation is used by and_shl for SImode values with a few
2044 high bits known to be cleared. */
2045 value &= 31;
2046 if (value == 0)
2048 emit_insn (gen_nop ());
2049 return;
2052 gen_fun = GET_MODE (operands[0]) == HImode ? gen_ashift_hi : gen_ashift;
2053 if (code == ASHIFT)
2055 max = ext_shift_insns[value];
2056 for (i = 0; i < max; i++)
2057 gen_fun (code, ext_shift_amounts[value][i], operands[0]);
2059 else
2060 /* When shifting right, emit the shifts in reverse order, so that
2061 solitary negative values come first. */
2062 for (i = ext_shift_insns[value] - 1; i >= 0; i--)
2063 gen_fun (code, ext_shift_amounts[value][i], operands[0]);
2066 /* Output RTL for an arithmetic right shift. */
2068 /* ??? Rewrite to use super-optimizer sequences. */
2071 expand_ashiftrt (rtx *operands)
2073 rtx sym;
2074 rtx wrk;
2075 char func[18];
2076 tree func_name;
2077 int value;
2079 if (TARGET_SH3)
2081 if (GET_CODE (operands[2]) != CONST_INT)
2083 rtx count = copy_to_mode_reg (SImode, operands[2]);
2084 emit_insn (gen_negsi2 (count, count));
2085 emit_insn (gen_ashrsi3_d (operands[0], operands[1], count));
2086 return 1;
2088 else if (ashiftrt_insns[INTVAL (operands[2]) & 31]
2089 > 1 + SH_DYNAMIC_SHIFT_COST)
2091 rtx count
2092 = force_reg (SImode, GEN_INT (- (INTVAL (operands[2]) & 31)));
2093 emit_insn (gen_ashrsi3_d (operands[0], operands[1], count));
2094 return 1;
2097 if (GET_CODE (operands[2]) != CONST_INT)
2098 return 0;
2100 value = INTVAL (operands[2]) & 31;
2102 if (value == 31)
2104 emit_insn (gen_ashrsi2_31 (operands[0], operands[1]));
2105 return 1;
2107 else if (value >= 16 && value <= 19)
2109 wrk = gen_reg_rtx (SImode);
2110 emit_insn (gen_ashrsi2_16 (wrk, operands[1]));
2111 value -= 16;
2112 while (value--)
2113 gen_ashift (ASHIFTRT, 1, wrk);
2114 emit_move_insn (operands[0], wrk);
2115 return 1;
2117 /* Expand a short sequence inline, longer call a magic routine. */
2118 else if (value <= 5)
2120 wrk = gen_reg_rtx (SImode);
2121 emit_move_insn (wrk, operands[1]);
2122 while (value--)
2123 gen_ashift (ASHIFTRT, 1, wrk);
2124 emit_move_insn (operands[0], wrk);
2125 return 1;
2128 wrk = gen_reg_rtx (Pmode);
2130 /* Load the value into an arg reg and call a helper. */
2131 emit_move_insn (gen_rtx_REG (SImode, 4), operands[1]);
2132 sprintf (func, "__ashiftrt_r4_%d", value);
2133 func_name = get_identifier (func);
2134 sym = function_symbol (IDENTIFIER_POINTER (func_name));
2135 emit_move_insn (wrk, sym);
2136 emit_insn (gen_ashrsi3_n (GEN_INT (value), wrk));
2137 emit_move_insn (operands[0], gen_rtx_REG (SImode, 4));
2138 return 1;
2142 sh_dynamicalize_shift_p (rtx count)
2144 return shift_insns[INTVAL (count)] > 1 + SH_DYNAMIC_SHIFT_COST;
2147 /* Try to find a good way to implement the combiner pattern
2148 [(set (match_operand:SI 0 "register_operand" "r")
2149 (and:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
2150 (match_operand:SI 2 "const_int_operand" "n"))
2151 (match_operand:SI 3 "const_int_operand" "n"))) .
2152 LEFT_RTX is operand 2 in the above pattern, and MASK_RTX is operand 3.
2153 return 0 for simple right / left or left/right shift combination.
2154 return 1 for a combination of shifts with zero_extend.
2155 return 2 for a combination of shifts with an AND that needs r0.
2156 return 3 for a combination of shifts with an AND that needs an extra
2157 scratch register, when the three highmost bits of the AND mask are clear.
2158 return 4 for a combination of shifts with an AND that needs an extra
2159 scratch register, when any of the three highmost bits of the AND mask
2160 is set.
2161 If ATTRP is set, store an initial right shift width in ATTRP[0],
2162 and the instruction length in ATTRP[1] . These values are not valid
2163 when returning 0.
2164 When ATTRP is set and returning 1, ATTRP[2] gets set to the index into
2165 shift_amounts for the last shift value that is to be used before the
2166 sign extend. */
2168 shl_and_kind (rtx left_rtx, rtx mask_rtx, int *attrp)
2170 unsigned HOST_WIDE_INT mask, lsb, mask2, lsb2;
2171 int left = INTVAL (left_rtx), right;
2172 int best = 0;
2173 int cost, best_cost = 10000;
2174 int best_right = 0, best_len = 0;
2175 int i;
2176 int can_ext;
2178 if (left < 0 || left > 31)
2179 return 0;
2180 if (GET_CODE (mask_rtx) == CONST_INT)
2181 mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> left;
2182 else
2183 mask = (unsigned HOST_WIDE_INT) GET_MODE_MASK (SImode) >> left;
2184 /* Can this be expressed as a right shift / left shift pair? */
2185 lsb = ((mask ^ (mask - 1)) >> 1) + 1;
2186 right = exact_log2 (lsb);
2187 mask2 = ~(mask + lsb - 1);
2188 lsb2 = ((mask2 ^ (mask2 - 1)) >> 1) + 1;
2189 /* mask has no zeroes but trailing zeroes <==> ! mask2 */
2190 if (! mask2)
2191 best_cost = shift_insns[right] + shift_insns[right + left];
2192 /* mask has no trailing zeroes <==> ! right */
2193 else if (! right && mask2 == ~(lsb2 - 1))
2195 int late_right = exact_log2 (lsb2);
2196 best_cost = shift_insns[left + late_right] + shift_insns[late_right];
2198 /* Try to use zero extend. */
2199 if (mask2 == ~(lsb2 - 1))
2201 int width, first;
2203 for (width = 8; width <= 16; width += 8)
2205 /* Can we zero-extend right away? */
2206 if (lsb2 == (unsigned HOST_WIDE_INT) 1 << width)
2208 cost
2209 = 1 + ext_shift_insns[right] + ext_shift_insns[left + right];
2210 if (cost < best_cost)
2212 best = 1;
2213 best_cost = cost;
2214 best_right = right;
2215 best_len = cost;
2216 if (attrp)
2217 attrp[2] = -1;
2219 continue;
2221 /* ??? Could try to put zero extend into initial right shift,
2222 or even shift a bit left before the right shift. */
2223 /* Determine value of first part of left shift, to get to the
2224 zero extend cut-off point. */
2225 first = width - exact_log2 (lsb2) + right;
2226 if (first >= 0 && right + left - first >= 0)
2228 cost = ext_shift_insns[right] + ext_shift_insns[first] + 1
2229 + ext_shift_insns[right + left - first];
2230 if (cost < best_cost)
2232 best = 1;
2233 best_cost = cost;
2234 best_right = right;
2235 best_len = cost;
2236 if (attrp)
2237 attrp[2] = first;
2242 /* Try to use r0 AND pattern */
2243 for (i = 0; i <= 2; i++)
2245 if (i > right)
2246 break;
2247 if (! CONST_OK_FOR_K08 (mask >> i))
2248 continue;
2249 cost = (i != 0) + 2 + ext_shift_insns[left + i];
2250 if (cost < best_cost)
2252 best = 2;
2253 best_cost = cost;
2254 best_right = i;
2255 best_len = cost - 1;
2258 /* Try to use a scratch register to hold the AND operand. */
2259 can_ext = ((mask << left) & ((unsigned HOST_WIDE_INT) 3 << 30)) == 0;
2260 for (i = 0; i <= 2; i++)
2262 if (i > right)
2263 break;
2264 cost = (i != 0) + (CONST_OK_FOR_I08 (mask >> i) ? 2 : 3)
2265 + (can_ext ? ext_shift_insns : shift_insns)[left + i];
2266 if (cost < best_cost)
2268 best = 4 - can_ext;
2269 best_cost = cost;
2270 best_right = i;
2271 best_len = cost - 1 - ! CONST_OK_FOR_I08 (mask >> i);
2275 if (attrp)
2277 attrp[0] = best_right;
2278 attrp[1] = best_len;
2280 return best;
2283 /* This is used in length attributes of the unnamed instructions
2284 corresponding to shl_and_kind return values of 1 and 2. */
2286 shl_and_length (rtx insn)
2288 rtx set_src, left_rtx, mask_rtx;
2289 int attributes[3];
2291 set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
2292 left_rtx = XEXP (XEXP (set_src, 0), 1);
2293 mask_rtx = XEXP (set_src, 1);
2294 shl_and_kind (left_rtx, mask_rtx, attributes);
2295 return attributes[1];
2298 /* This is used in length attribute of the and_shl_scratch instruction. */
2301 shl_and_scr_length (rtx insn)
2303 rtx set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
2304 int len = shift_insns[INTVAL (XEXP (set_src, 1))];
2305 rtx op = XEXP (set_src, 0);
2306 len += shift_insns[INTVAL (XEXP (op, 1))] + 1;
2307 op = XEXP (XEXP (op, 0), 0);
2308 return len + shift_insns[INTVAL (XEXP (op, 1))];
2311 /* Generate rtl for instructions for which shl_and_kind advised a particular
2312 method of generating them, i.e. returned zero. */
2315 gen_shl_and (rtx dest, rtx left_rtx, rtx mask_rtx, rtx source)
2317 int attributes[3];
2318 unsigned HOST_WIDE_INT mask;
2319 int kind = shl_and_kind (left_rtx, mask_rtx, attributes);
2320 int right, total_shift;
2321 void (*shift_gen_fun) (int, rtx *) = gen_shifty_hi_op;
2323 right = attributes[0];
2324 total_shift = INTVAL (left_rtx) + right;
2325 mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> total_shift;
2326 switch (kind)
2328 default:
2329 return -1;
2330 case 1:
2332 int first = attributes[2];
2333 rtx operands[3];
2335 if (first < 0)
2337 emit_insn ((mask << right) <= 0xff
2338 ? gen_zero_extendqisi2 (dest,
2339 gen_lowpart (QImode, source))
2340 : gen_zero_extendhisi2 (dest,
2341 gen_lowpart (HImode, source)));
2342 source = dest;
2344 if (source != dest)
2345 emit_insn (gen_movsi (dest, source));
2346 operands[0] = dest;
2347 if (right)
2349 operands[2] = GEN_INT (right);
2350 gen_shifty_hi_op (LSHIFTRT, operands);
2352 if (first > 0)
2354 operands[2] = GEN_INT (first);
2355 gen_shifty_hi_op (ASHIFT, operands);
2356 total_shift -= first;
2357 mask <<= first;
2359 if (first >= 0)
2360 emit_insn (mask <= 0xff
2361 ? gen_zero_extendqisi2 (dest, gen_lowpart (QImode, dest))
2362 : gen_zero_extendhisi2 (dest, gen_lowpart (HImode, dest)));
2363 if (total_shift > 0)
2365 operands[2] = GEN_INT (total_shift);
2366 gen_shifty_hi_op (ASHIFT, operands);
2368 break;
2370 case 4:
2371 shift_gen_fun = gen_shifty_op;
2372 case 3:
2373 /* If the topmost bit that matters is set, set the topmost bits
2374 that don't matter. This way, we might be able to get a shorter
2375 signed constant. */
2376 if (mask & ((HOST_WIDE_INT) 1 << (31 - total_shift)))
2377 mask |= (HOST_WIDE_INT) ~0 << (31 - total_shift);
2378 case 2:
2379 /* Don't expand fine-grained when combining, because that will
2380 make the pattern fail. */
2381 if (currently_expanding_to_rtl
2382 || reload_in_progress || reload_completed)
2384 rtx operands[3];
2386 /* Cases 3 and 4 should be handled by this split
2387 only while combining */
2388 if (kind > 2)
2389 abort ();
2390 if (right)
2392 emit_insn (gen_lshrsi3 (dest, source, GEN_INT (right)));
2393 source = dest;
2395 emit_insn (gen_andsi3 (dest, source, GEN_INT (mask)));
2396 if (total_shift)
2398 operands[0] = dest;
2399 operands[1] = dest;
2400 operands[2] = GEN_INT (total_shift);
2401 shift_gen_fun (ASHIFT, operands);
2403 break;
2405 else
2407 int neg = 0;
2408 if (kind != 4 && total_shift < 16)
2410 neg = -ext_shift_amounts[total_shift][1];
2411 if (neg > 0)
2412 neg -= ext_shift_amounts[total_shift][2];
2413 else
2414 neg = 0;
2416 emit_insn (gen_and_shl_scratch (dest, source,
2417 GEN_INT (right),
2418 GEN_INT (mask),
2419 GEN_INT (total_shift + neg),
2420 GEN_INT (neg)));
2421 emit_insn (gen_movsi (dest, dest));
2422 break;
2425 return 0;
2428 /* Try to find a good way to implement the combiner pattern
2429 [(set (match_operand:SI 0 "register_operand" "=r")
2430 (sign_extract:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
2431 (match_operand:SI 2 "const_int_operand" "n")
2432 (match_operand:SI 3 "const_int_operand" "n")
2433 (const_int 0)))
2434 (clobber (reg:SI T_REG))]
2435 LEFT_RTX is operand 2 in the above pattern, and SIZE_RTX is operand 3.
2436 return 0 for simple left / right shift combination.
2437 return 1 for left shift / 8 bit sign extend / left shift.
2438 return 2 for left shift / 16 bit sign extend / left shift.
2439 return 3 for left shift / 8 bit sign extend / shift / sign extend.
2440 return 4 for left shift / 16 bit sign extend / shift / sign extend.
2441 return 5 for left shift / 16 bit sign extend / right shift
2442 return 6 for < 8 bit sign extend / left shift.
2443 return 7 for < 8 bit sign extend / left shift / single right shift.
2444 If COSTP is nonzero, assign the calculated cost to *COSTP. */
2447 shl_sext_kind (rtx left_rtx, rtx size_rtx, int *costp)
2449 int left, size, insize, ext;
2450 int cost = 0, best_cost;
2451 int kind;
2453 left = INTVAL (left_rtx);
2454 size = INTVAL (size_rtx);
2455 insize = size - left;
2456 if (insize <= 0)
2457 abort ();
2458 /* Default to left / right shift. */
2459 kind = 0;
2460 best_cost = shift_insns[32 - insize] + ashiftrt_insns[32 - size];
2461 if (size <= 16)
2463 /* 16 bit shift / sign extend / 16 bit shift */
2464 cost = shift_insns[16 - insize] + 1 + ashiftrt_insns[16 - size];
2465 /* If ashiftrt_insns[16 - size] is 8, this choice will be overridden
2466 below, by alternative 3 or something even better. */
2467 if (cost < best_cost)
2469 kind = 5;
2470 best_cost = cost;
2473 /* Try a plain sign extend between two shifts. */
2474 for (ext = 16; ext >= insize; ext -= 8)
2476 if (ext <= size)
2478 cost = ext_shift_insns[ext - insize] + 1 + shift_insns[size - ext];
2479 if (cost < best_cost)
2481 kind = ext / (unsigned) 8;
2482 best_cost = cost;
2485 /* Check if we can do a sloppy shift with a final signed shift
2486 restoring the sign. */
2487 if (EXT_SHIFT_SIGNED (size - ext))
2488 cost = ext_shift_insns[ext - insize] + ext_shift_insns[size - ext] + 1;
2489 /* If not, maybe it's still cheaper to do the second shift sloppy,
2490 and do a final sign extend? */
2491 else if (size <= 16)
2492 cost = ext_shift_insns[ext - insize] + 1
2493 + ext_shift_insns[size > ext ? size - ext : ext - size] + 1;
2494 else
2495 continue;
2496 if (cost < best_cost)
2498 kind = ext / (unsigned) 8 + 2;
2499 best_cost = cost;
2502 /* Check if we can sign extend in r0 */
2503 if (insize < 8)
2505 cost = 3 + shift_insns[left];
2506 if (cost < best_cost)
2508 kind = 6;
2509 best_cost = cost;
2511 /* Try the same with a final signed shift. */
2512 if (left < 31)
2514 cost = 3 + ext_shift_insns[left + 1] + 1;
2515 if (cost < best_cost)
2517 kind = 7;
2518 best_cost = cost;
2522 if (TARGET_SH3)
2524 /* Try to use a dynamic shift. */
2525 cost = shift_insns[32 - insize] + 1 + SH_DYNAMIC_SHIFT_COST;
2526 if (cost < best_cost)
2528 kind = 0;
2529 best_cost = cost;
2532 if (costp)
2533 *costp = cost;
2534 return kind;
2537 /* Function to be used in the length attribute of the instructions
2538 implementing this pattern. */
2541 shl_sext_length (rtx insn)
2543 rtx set_src, left_rtx, size_rtx;
2544 int cost;
2546 set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
2547 left_rtx = XEXP (XEXP (set_src, 0), 1);
2548 size_rtx = XEXP (set_src, 1);
2549 shl_sext_kind (left_rtx, size_rtx, &cost);
2550 return cost;
2553 /* Generate rtl for this pattern */
2556 gen_shl_sext (rtx dest, rtx left_rtx, rtx size_rtx, rtx source)
2558 int kind;
2559 int left, size, insize, cost;
2560 rtx operands[3];
2562 kind = shl_sext_kind (left_rtx, size_rtx, &cost);
2563 left = INTVAL (left_rtx);
2564 size = INTVAL (size_rtx);
2565 insize = size - left;
2566 switch (kind)
2568 case 1:
2569 case 2:
2570 case 3:
2571 case 4:
2573 int ext = kind & 1 ? 8 : 16;
2574 int shift2 = size - ext;
2576 /* Don't expand fine-grained when combining, because that will
2577 make the pattern fail. */
2578 if (! currently_expanding_to_rtl
2579 && ! reload_in_progress && ! reload_completed)
2581 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
2582 emit_insn (gen_movsi (dest, source));
2583 break;
2585 if (dest != source)
2586 emit_insn (gen_movsi (dest, source));
2587 operands[0] = dest;
2588 if (ext - insize)
2590 operands[2] = GEN_INT (ext - insize);
2591 gen_shifty_hi_op (ASHIFT, operands);
2593 emit_insn (kind & 1
2594 ? gen_extendqisi2 (dest, gen_lowpart (QImode, dest))
2595 : gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
2596 if (kind <= 2)
2598 if (shift2)
2600 operands[2] = GEN_INT (shift2);
2601 gen_shifty_op (ASHIFT, operands);
2604 else
2606 if (shift2 > 0)
2608 if (EXT_SHIFT_SIGNED (shift2))
2610 operands[2] = GEN_INT (shift2 + 1);
2611 gen_shifty_op (ASHIFT, operands);
2612 operands[2] = const1_rtx;
2613 gen_shifty_op (ASHIFTRT, operands);
2614 break;
2616 operands[2] = GEN_INT (shift2);
2617 gen_shifty_hi_op (ASHIFT, operands);
2619 else if (shift2)
2621 operands[2] = GEN_INT (-shift2);
2622 gen_shifty_hi_op (LSHIFTRT, operands);
2624 emit_insn (size <= 8
2625 ? gen_extendqisi2 (dest, gen_lowpart (QImode, dest))
2626 : gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
2628 break;
2630 case 5:
2632 int i = 16 - size;
2633 if (! currently_expanding_to_rtl
2634 && ! reload_in_progress && ! reload_completed)
2635 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
2636 else
2638 operands[0] = dest;
2639 operands[2] = GEN_INT (16 - insize);
2640 gen_shifty_hi_op (ASHIFT, operands);
2641 emit_insn (gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
2643 /* Don't use gen_ashrsi3 because it generates new pseudos. */
2644 while (--i >= 0)
2645 gen_ashift (ASHIFTRT, 1, dest);
2646 break;
2648 case 6:
2649 case 7:
2650 /* Don't expand fine-grained when combining, because that will
2651 make the pattern fail. */
2652 if (! currently_expanding_to_rtl
2653 && ! reload_in_progress && ! reload_completed)
2655 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
2656 emit_insn (gen_movsi (dest, source));
2657 break;
2659 emit_insn (gen_andsi3 (dest, source, GEN_INT ((1 << insize) - 1)));
2660 emit_insn (gen_xorsi3 (dest, dest, GEN_INT (1 << (insize - 1))));
2661 emit_insn (gen_addsi3 (dest, dest, GEN_INT (-1 << (insize - 1))));
2662 operands[0] = dest;
2663 operands[2] = kind == 7 ? GEN_INT (left + 1) : left_rtx;
2664 gen_shifty_op (ASHIFT, operands);
2665 if (kind == 7)
2666 emit_insn (gen_ashrsi3_k (dest, dest, const1_rtx));
2667 break;
2668 default:
2669 return -1;
2671 return 0;
2674 /* Prefix a symbol_ref name with "datalabel". */
2677 gen_datalabel_ref (rtx sym)
2679 if (GET_CODE (sym) == LABEL_REF)
2680 return gen_rtx_CONST (GET_MODE (sym),
2681 gen_rtx_UNSPEC (GET_MODE (sym),
2682 gen_rtvec (1, sym),
2683 UNSPEC_DATALABEL));
2685 if (GET_CODE (sym) != SYMBOL_REF)
2686 abort ();
2688 return sym;
2692 /* The SH cannot load a large constant into a register, constants have to
2693 come from a pc relative load. The reference of a pc relative load
2694 instruction must be less than 1k infront of the instruction. This
2695 means that we often have to dump a constant inside a function, and
2696 generate code to branch around it.
2698 It is important to minimize this, since the branches will slow things
2699 down and make things bigger.
2701 Worst case code looks like:
2703 mov.l L1,rn
2704 bra L2
2706 align
2707 L1: .long value
2711 mov.l L3,rn
2712 bra L4
2714 align
2715 L3: .long value
2719 We fix this by performing a scan before scheduling, which notices which
2720 instructions need to have their operands fetched from the constant table
2721 and builds the table.
2723 The algorithm is:
2725 scan, find an instruction which needs a pcrel move. Look forward, find the
2726 last barrier which is within MAX_COUNT bytes of the requirement.
2727 If there isn't one, make one. Process all the instructions between
2728 the find and the barrier.
2730 In the above example, we can tell that L3 is within 1k of L1, so
2731 the first move can be shrunk from the 3 insn+constant sequence into
2732 just 1 insn, and the constant moved to L3 to make:
2734 mov.l L1,rn
2736 mov.l L3,rn
2737 bra L4
2739 align
2740 L3:.long value
2741 L4:.long value
2743 Then the second move becomes the target for the shortening process. */
2745 typedef struct
2747 rtx value; /* Value in table. */
2748 rtx label; /* Label of value. */
2749 rtx wend; /* End of window. */
2750 enum machine_mode mode; /* Mode of value. */
2752 /* True if this constant is accessed as part of a post-increment
2753 sequence. Note that HImode constants are never accessed in this way. */
2754 bool part_of_sequence_p;
2755 } pool_node;
2757 /* The maximum number of constants that can fit into one pool, since
2758 the pc relative range is 0...1020 bytes and constants are at least 4
2759 bytes long. */
2761 #define MAX_POOL_SIZE (1020/4)
2762 static pool_node pool_vector[MAX_POOL_SIZE];
2763 static int pool_size;
2764 static rtx pool_window_label;
2765 static int pool_window_last;
2767 /* ??? If we need a constant in HImode which is the truncated value of a
2768 constant we need in SImode, we could combine the two entries thus saving
2769 two bytes. Is this common enough to be worth the effort of implementing
2770 it? */
2772 /* ??? This stuff should be done at the same time that we shorten branches.
2773 As it is now, we must assume that all branches are the maximum size, and
2774 this causes us to almost always output constant pools sooner than
2775 necessary. */
2777 /* Add a constant to the pool and return its label. */
2779 static rtx
2780 add_constant (rtx x, enum machine_mode mode, rtx last_value)
2782 int i;
2783 rtx lab, new, ref, newref;
2785 /* First see if we've already got it. */
2786 for (i = 0; i < pool_size; i++)
2788 if (x->code == pool_vector[i].value->code
2789 && mode == pool_vector[i].mode)
2791 if (x->code == CODE_LABEL)
2793 if (XINT (x, 3) != XINT (pool_vector[i].value, 3))
2794 continue;
2796 if (rtx_equal_p (x, pool_vector[i].value))
2798 lab = new = 0;
2799 if (! last_value
2800 || ! i
2801 || ! rtx_equal_p (last_value, pool_vector[i-1].value))
2803 new = gen_label_rtx ();
2804 LABEL_REFS (new) = pool_vector[i].label;
2805 pool_vector[i].label = lab = new;
2807 if (lab && pool_window_label)
2809 newref = gen_rtx_LABEL_REF (VOIDmode, pool_window_label);
2810 ref = pool_vector[pool_window_last].wend;
2811 LABEL_NEXTREF (newref) = ref;
2812 pool_vector[pool_window_last].wend = newref;
2814 if (new)
2815 pool_window_label = new;
2816 pool_window_last = i;
2817 return lab;
2822 /* Need a new one. */
2823 pool_vector[pool_size].value = x;
2824 if (last_value && rtx_equal_p (last_value, pool_vector[pool_size - 1].value))
2826 lab = 0;
2827 pool_vector[pool_size - 1].part_of_sequence_p = true;
2829 else
2830 lab = gen_label_rtx ();
2831 pool_vector[pool_size].mode = mode;
2832 pool_vector[pool_size].label = lab;
2833 pool_vector[pool_size].wend = NULL_RTX;
2834 pool_vector[pool_size].part_of_sequence_p = (lab == 0);
2835 if (lab && pool_window_label)
2837 newref = gen_rtx_LABEL_REF (VOIDmode, pool_window_label);
2838 ref = pool_vector[pool_window_last].wend;
2839 LABEL_NEXTREF (newref) = ref;
2840 pool_vector[pool_window_last].wend = newref;
2842 if (lab)
2843 pool_window_label = lab;
2844 pool_window_last = pool_size;
2845 pool_size++;
2846 return lab;
2849 /* Output the literal table. START, if nonzero, is the first instruction
2850 this table is needed for, and also indicates that there is at least one
2851 casesi_worker_2 instruction; We have to emit the operand3 labels from
2852 these insns at a 4-byte aligned position. BARRIER is the barrier
2853 after which we are to place the table. */
2855 static void
2856 dump_table (rtx start, rtx barrier)
2858 rtx scan = barrier;
2859 int i;
2860 int need_align = 1;
2861 rtx lab, ref;
2862 int have_df = 0;
2864 /* Do two passes, first time dump out the HI sized constants. */
2866 for (i = 0; i < pool_size; i++)
2868 pool_node *p = &pool_vector[i];
2870 if (p->mode == HImode)
2872 if (need_align)
2874 scan = emit_insn_after (gen_align_2 (), scan);
2875 need_align = 0;
2877 for (lab = p->label; lab; lab = LABEL_REFS (lab))
2878 scan = emit_label_after (lab, scan);
2879 scan = emit_insn_after (gen_consttable_2 (p->value, const0_rtx),
2880 scan);
2881 for (ref = p->wend; ref; ref = LABEL_NEXTREF (ref))
2883 lab = XEXP (ref, 0);
2884 scan = emit_insn_after (gen_consttable_window_end (lab), scan);
2887 else if (p->mode == DFmode)
2888 have_df = 1;
2891 need_align = 1;
2893 if (start)
2895 scan = emit_insn_after (gen_align_4 (), scan);
2896 need_align = 0;
2897 for (; start != barrier; start = NEXT_INSN (start))
2898 if (GET_CODE (start) == INSN
2899 && recog_memoized (start) == CODE_FOR_casesi_worker_2)
2901 rtx src = SET_SRC (XVECEXP (PATTERN (start), 0, 0));
2902 rtx lab = XEXP (XVECEXP (src, 0, 3), 0);
2904 scan = emit_label_after (lab, scan);
2907 if (TARGET_FMOVD && TARGET_ALIGN_DOUBLE && have_df)
2909 rtx align_insn = NULL_RTX;
2911 scan = emit_label_after (gen_label_rtx (), scan);
2912 scan = emit_insn_after (gen_align_log (GEN_INT (3)), scan);
2913 need_align = 0;
2915 for (i = 0; i < pool_size; i++)
2917 pool_node *p = &pool_vector[i];
2919 switch (p->mode)
2921 case HImode:
2922 break;
2923 case SImode:
2924 case SFmode:
2925 if (align_insn && !p->part_of_sequence_p)
2927 for (lab = p->label; lab; lab = LABEL_REFS (lab))
2928 emit_label_before (lab, align_insn);
2929 emit_insn_before (gen_consttable_4 (p->value, const0_rtx),
2930 align_insn);
2931 for (ref = p->wend; ref; ref = LABEL_NEXTREF (ref))
2933 lab = XEXP (ref, 0);
2934 emit_insn_before (gen_consttable_window_end (lab),
2935 align_insn);
2937 delete_insn (align_insn);
2938 align_insn = NULL_RTX;
2939 continue;
2941 else
2943 for (lab = p->label; lab; lab = LABEL_REFS (lab))
2944 scan = emit_label_after (lab, scan);
2945 scan = emit_insn_after (gen_consttable_4 (p->value,
2946 const0_rtx), scan);
2947 need_align = ! need_align;
2949 break;
2950 case DFmode:
2951 if (need_align)
2953 scan = emit_insn_after (gen_align_log (GEN_INT (3)), scan);
2954 align_insn = scan;
2955 need_align = 0;
2957 case DImode:
2958 for (lab = p->label; lab; lab = LABEL_REFS (lab))
2959 scan = emit_label_after (lab, scan);
2960 scan = emit_insn_after (gen_consttable_8 (p->value, const0_rtx),
2961 scan);
2962 break;
2963 default:
2964 abort ();
2965 break;
2968 if (p->mode != HImode)
2970 for (ref = p->wend; ref; ref = LABEL_NEXTREF (ref))
2972 lab = XEXP (ref, 0);
2973 scan = emit_insn_after (gen_consttable_window_end (lab),
2974 scan);
2979 pool_size = 0;
2982 for (i = 0; i < pool_size; i++)
2984 pool_node *p = &pool_vector[i];
2986 switch (p->mode)
2988 case HImode:
2989 break;
2990 case SImode:
2991 case SFmode:
2992 if (need_align)
2994 need_align = 0;
2995 scan = emit_label_after (gen_label_rtx (), scan);
2996 scan = emit_insn_after (gen_align_4 (), scan);
2998 for (lab = p->label; lab; lab = LABEL_REFS (lab))
2999 scan = emit_label_after (lab, scan);
3000 scan = emit_insn_after (gen_consttable_4 (p->value, const0_rtx),
3001 scan);
3002 break;
3003 case DFmode:
3004 case DImode:
3005 if (need_align)
3007 need_align = 0;
3008 scan = emit_label_after (gen_label_rtx (), scan);
3009 scan = emit_insn_after (gen_align_4 (), scan);
3011 for (lab = p->label; lab; lab = LABEL_REFS (lab))
3012 scan = emit_label_after (lab, scan);
3013 scan = emit_insn_after (gen_consttable_8 (p->value, const0_rtx),
3014 scan);
3015 break;
3016 default:
3017 abort ();
3018 break;
3021 if (p->mode != HImode)
3023 for (ref = p->wend; ref; ref = LABEL_NEXTREF (ref))
3025 lab = XEXP (ref, 0);
3026 scan = emit_insn_after (gen_consttable_window_end (lab), scan);
3031 scan = emit_insn_after (gen_consttable_end (), scan);
3032 scan = emit_barrier_after (scan);
3033 pool_size = 0;
3034 pool_window_label = NULL_RTX;
3035 pool_window_last = 0;
3038 /* Return nonzero if constant would be an ok source for a
3039 mov.w instead of a mov.l. */
3041 static int
3042 hi_const (rtx src)
3044 return (GET_CODE (src) == CONST_INT
3045 && INTVAL (src) >= -32768
3046 && INTVAL (src) <= 32767);
3049 /* Nonzero if the insn is a move instruction which needs to be fixed. */
3051 /* ??? For a DImode/DFmode moves, we don't need to fix it if each half of the
3052 CONST_DOUBLE input value is CONST_OK_FOR_I08. For a SFmode move, we don't
3053 need to fix it if the input value is CONST_OK_FOR_I08. */
3055 static int
3056 broken_move (rtx insn)
3058 if (GET_CODE (insn) == INSN)
3060 rtx pat = PATTERN (insn);
3061 if (GET_CODE (pat) == PARALLEL)
3062 pat = XVECEXP (pat, 0, 0);
3063 if (GET_CODE (pat) == SET
3064 /* We can load any 8 bit value if we don't care what the high
3065 order bits end up as. */
3066 && GET_MODE (SET_DEST (pat)) != QImode
3067 && (CONSTANT_P (SET_SRC (pat))
3068 /* Match mova_const. */
3069 || (GET_CODE (SET_SRC (pat)) == UNSPEC
3070 && XINT (SET_SRC (pat), 1) == UNSPEC_MOVA
3071 && GET_CODE (XVECEXP (SET_SRC (pat), 0, 0)) == CONST))
3072 && ! (TARGET_SH2E
3073 && GET_CODE (SET_SRC (pat)) == CONST_DOUBLE
3074 && (fp_zero_operand (SET_SRC (pat))
3075 || fp_one_operand (SET_SRC (pat)))
3076 /* ??? If this is a -m4 or -m4-single compilation, in general
3077 we don't know the current setting of fpscr, so disable fldi.
3078 There is an exception if this was a register-register move
3079 before reload - and hence it was ascertained that we have
3080 single precision setting - and in a post-reload optimization
3081 we changed this to do a constant load. In that case
3082 we don't have an r0 clobber, hence we must use fldi. */
3083 && (! TARGET_SH4 || TARGET_FMOVD
3084 || (GET_CODE (XEXP (XVECEXP (PATTERN (insn), 0, 2), 0))
3085 == SCRATCH))
3086 && GET_CODE (SET_DEST (pat)) == REG
3087 && FP_REGISTER_P (REGNO (SET_DEST (pat))))
3088 && ! (TARGET_SH2A
3089 && GET_MODE (SET_DEST (pat)) == SImode
3090 && GET_CODE (SET_SRC (pat)) == CONST_INT
3091 && CONST_OK_FOR_I20 (INTVAL (SET_SRC (pat))))
3092 && (GET_CODE (SET_SRC (pat)) != CONST_INT
3093 || ! CONST_OK_FOR_I08 (INTVAL (SET_SRC (pat)))))
3094 return 1;
3097 return 0;
3100 static int
3101 mova_p (rtx insn)
3103 return (GET_CODE (insn) == INSN
3104 && GET_CODE (PATTERN (insn)) == SET
3105 && GET_CODE (SET_SRC (PATTERN (insn))) == UNSPEC
3106 && XINT (SET_SRC (PATTERN (insn)), 1) == UNSPEC_MOVA
3107 /* Don't match mova_const. */
3108 && GET_CODE (XVECEXP (SET_SRC (PATTERN (insn)), 0, 0)) == LABEL_REF);
3111 /* Fix up a mova from a switch that went out of range. */
3112 static void
3113 fixup_mova (rtx mova)
3115 if (! flag_pic)
3117 SET_SRC (PATTERN (mova)) = XVECEXP (SET_SRC (PATTERN (mova)), 0, 0);
3118 INSN_CODE (mova) = -1;
3120 else
3122 rtx worker = mova;
3123 rtx lab = gen_label_rtx ();
3124 rtx wpat, wpat0, wpat1, wsrc, diff;
3128 worker = NEXT_INSN (worker);
3129 if (! worker
3130 || GET_CODE (worker) == CODE_LABEL
3131 || GET_CODE (worker) == JUMP_INSN)
3132 abort ();
3133 } while (recog_memoized (worker) != CODE_FOR_casesi_worker_1);
3134 wpat = PATTERN (worker);
3135 wpat0 = XVECEXP (wpat, 0, 0);
3136 wpat1 = XVECEXP (wpat, 0, 1);
3137 wsrc = SET_SRC (wpat0);
3138 PATTERN (worker) = (gen_casesi_worker_2
3139 (SET_DEST (wpat0), XVECEXP (wsrc, 0, 1),
3140 XEXP (XVECEXP (wsrc, 0, 2), 0), lab,
3141 XEXP (wpat1, 0)));
3142 INSN_CODE (worker) = -1;
3143 diff = gen_rtx_MINUS (Pmode, XVECEXP (SET_SRC (PATTERN (mova)), 0, 0),
3144 gen_rtx_LABEL_REF (Pmode, lab));
3145 diff = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, diff), UNSPEC_PIC);
3146 SET_SRC (PATTERN (mova)) = gen_rtx_CONST (Pmode, diff);
3147 INSN_CODE (mova) = -1;
3151 /* Find the last barrier from insn FROM which is close enough to hold the
3152 constant pool. If we can't find one, then create one near the end of
3153 the range. */
3155 static rtx
3156 find_barrier (int num_mova, rtx mova, rtx from)
3158 int count_si = 0;
3159 int count_hi = 0;
3160 int found_hi = 0;
3161 int found_si = 0;
3162 int found_di = 0;
3163 int hi_align = 2;
3164 int si_align = 2;
3165 int leading_mova = num_mova;
3166 rtx barrier_before_mova = 0, found_barrier = 0, good_barrier = 0;
3167 int si_limit;
3168 int hi_limit;
3170 /* For HImode: range is 510, add 4 because pc counts from address of
3171 second instruction after this one, subtract 2 for the jump instruction
3172 that we may need to emit before the table, subtract 2 for the instruction
3173 that fills the jump delay slot (in very rare cases, reorg will take an
3174 instruction from after the constant pool or will leave the delay slot
3175 empty). This gives 510.
3176 For SImode: range is 1020, add 4 because pc counts from address of
3177 second instruction after this one, subtract 2 in case pc is 2 byte
3178 aligned, subtract 2 for the jump instruction that we may need to emit
3179 before the table, subtract 2 for the instruction that fills the jump
3180 delay slot. This gives 1018. */
3182 /* The branch will always be shortened now that the reference address for
3183 forward branches is the successor address, thus we need no longer make
3184 adjustments to the [sh]i_limit for -O0. */
3186 si_limit = 1018;
3187 hi_limit = 510;
3189 while (from && count_si < si_limit && count_hi < hi_limit)
3191 int inc = get_attr_length (from);
3192 int new_align = 1;
3194 if (GET_CODE (from) == CODE_LABEL)
3196 if (optimize)
3197 new_align = 1 << label_to_alignment (from);
3198 else if (GET_CODE (prev_nonnote_insn (from)) == BARRIER)
3199 new_align = 1 << barrier_align (from);
3200 else
3201 new_align = 1;
3202 inc = 0;
3205 if (GET_CODE (from) == BARRIER)
3208 found_barrier = from;
3210 /* If we are at the end of the function, or in front of an alignment
3211 instruction, we need not insert an extra alignment. We prefer
3212 this kind of barrier. */
3213 if (barrier_align (from) > 2)
3214 good_barrier = from;
3217 if (broken_move (from))
3219 rtx pat, src, dst;
3220 enum machine_mode mode;
3222 pat = PATTERN (from);
3223 if (GET_CODE (pat) == PARALLEL)
3224 pat = XVECEXP (pat, 0, 0);
3225 src = SET_SRC (pat);
3226 dst = SET_DEST (pat);
3227 mode = GET_MODE (dst);
3229 /* We must explicitly check the mode, because sometimes the
3230 front end will generate code to load unsigned constants into
3231 HImode targets without properly sign extending them. */
3232 if (mode == HImode
3233 || (mode == SImode && hi_const (src) && REGNO (dst) != FPUL_REG))
3235 found_hi += 2;
3236 /* We put the short constants before the long constants, so
3237 we must count the length of short constants in the range
3238 for the long constants. */
3239 /* ??? This isn't optimal, but is easy to do. */
3240 si_limit -= 2;
3242 else
3244 /* We dump DF/DI constants before SF/SI ones, because
3245 the limit is the same, but the alignment requirements
3246 are higher. We may waste up to 4 additional bytes
3247 for alignment, and the DF/DI constant may have
3248 another SF/SI constant placed before it. */
3249 if (TARGET_SHCOMPACT
3250 && ! found_di
3251 && (mode == DFmode || mode == DImode))
3253 found_di = 1;
3254 si_limit -= 8;
3256 while (si_align > 2 && found_si + si_align - 2 > count_si)
3257 si_align >>= 1;
3258 if (found_si > count_si)
3259 count_si = found_si;
3260 found_si += GET_MODE_SIZE (mode);
3261 if (num_mova)
3262 si_limit -= GET_MODE_SIZE (mode);
3265 /* See the code in machine_dependent_reorg, which has a similar if
3266 statement that generates a new mova insn in many cases. */
3267 if (GET_CODE (dst) == REG && FP_ANY_REGISTER_P (REGNO (dst)))
3268 inc += 2;
3271 if (mova_p (from))
3273 if (! num_mova++)
3275 leading_mova = 0;
3276 mova = from;
3277 barrier_before_mova = good_barrier ? good_barrier : found_barrier;
3279 if (found_si > count_si)
3280 count_si = found_si;
3282 else if (GET_CODE (from) == JUMP_INSN
3283 && (GET_CODE (PATTERN (from)) == ADDR_VEC
3284 || GET_CODE (PATTERN (from)) == ADDR_DIFF_VEC))
3286 if (num_mova)
3287 num_mova--;
3288 if (barrier_align (next_real_insn (from)) == align_jumps_log)
3290 /* We have just passed the barrier in front of the
3291 ADDR_DIFF_VEC, which is stored in found_barrier. Since
3292 the ADDR_DIFF_VEC is accessed as data, just like our pool
3293 constants, this is a good opportunity to accommodate what
3294 we have gathered so far.
3295 If we waited any longer, we could end up at a barrier in
3296 front of code, which gives worse cache usage for separated
3297 instruction / data caches. */
3298 good_barrier = found_barrier;
3299 break;
3301 else
3303 rtx body = PATTERN (from);
3304 inc = XVECLEN (body, 1) * GET_MODE_SIZE (GET_MODE (body));
3307 /* For the SH1, we generate alignments even after jumps-around-jumps. */
3308 else if (GET_CODE (from) == JUMP_INSN
3309 && ! TARGET_SH2
3310 && ! TARGET_SMALLCODE)
3311 new_align = 4;
3313 if (found_si)
3315 count_si += inc;
3316 if (new_align > si_align)
3318 si_limit -= (count_si - 1) & (new_align - si_align);
3319 si_align = new_align;
3321 count_si = (count_si + new_align - 1) & -new_align;
3323 if (found_hi)
3325 count_hi += inc;
3326 if (new_align > hi_align)
3328 hi_limit -= (count_hi - 1) & (new_align - hi_align);
3329 hi_align = new_align;
3331 count_hi = (count_hi + new_align - 1) & -new_align;
3333 from = NEXT_INSN (from);
3336 if (num_mova)
3338 if (leading_mova)
3340 /* Try as we might, the leading mova is out of range. Change
3341 it into a load (which will become a pcload) and retry. */
3342 fixup_mova (mova);
3343 return find_barrier (0, 0, mova);
3345 else
3347 /* Insert the constant pool table before the mova instruction,
3348 to prevent the mova label reference from going out of range. */
3349 from = mova;
3350 good_barrier = found_barrier = barrier_before_mova;
3354 if (found_barrier)
3356 if (good_barrier && next_real_insn (found_barrier))
3357 found_barrier = good_barrier;
3359 else
3361 /* We didn't find a barrier in time to dump our stuff,
3362 so we'll make one. */
3363 rtx label = gen_label_rtx ();
3365 /* If we exceeded the range, then we must back up over the last
3366 instruction we looked at. Otherwise, we just need to undo the
3367 NEXT_INSN at the end of the loop. */
3368 if (count_hi > hi_limit || count_si > si_limit)
3369 from = PREV_INSN (PREV_INSN (from));
3370 else
3371 from = PREV_INSN (from);
3373 /* Walk back to be just before any jump or label.
3374 Putting it before a label reduces the number of times the branch
3375 around the constant pool table will be hit. Putting it before
3376 a jump makes it more likely that the bra delay slot will be
3377 filled. */
3378 while (GET_CODE (from) == JUMP_INSN || GET_CODE (from) == NOTE
3379 || GET_CODE (from) == CODE_LABEL)
3380 from = PREV_INSN (from);
3382 from = emit_jump_insn_after (gen_jump (label), from);
3383 JUMP_LABEL (from) = label;
3384 LABEL_NUSES (label) = 1;
3385 found_barrier = emit_barrier_after (from);
3386 emit_label_after (label, found_barrier);
3389 return found_barrier;
3392 /* If the instruction INSN is implemented by a special function, and we can
3393 positively find the register that is used to call the sfunc, and this
3394 register is not used anywhere else in this instruction - except as the
3395 destination of a set, return this register; else, return 0. */
3397 sfunc_uses_reg (rtx insn)
3399 int i;
3400 rtx pattern, part, reg_part, reg;
3402 if (GET_CODE (insn) != INSN)
3403 return 0;
3404 pattern = PATTERN (insn);
3405 if (GET_CODE (pattern) != PARALLEL || get_attr_type (insn) != TYPE_SFUNC)
3406 return 0;
3408 for (reg_part = 0, i = XVECLEN (pattern, 0) - 1; i >= 1; i--)
3410 part = XVECEXP (pattern, 0, i);
3411 if (GET_CODE (part) == USE && GET_MODE (XEXP (part, 0)) == SImode)
3412 reg_part = part;
3414 if (! reg_part)
3415 return 0;
3416 reg = XEXP (reg_part, 0);
3417 for (i = XVECLEN (pattern, 0) - 1; i >= 0; i--)
3419 part = XVECEXP (pattern, 0, i);
3420 if (part == reg_part || GET_CODE (part) == CLOBBER)
3421 continue;
3422 if (reg_mentioned_p (reg, ((GET_CODE (part) == SET
3423 && GET_CODE (SET_DEST (part)) == REG)
3424 ? SET_SRC (part) : part)))
3425 return 0;
3427 return reg;
3430 /* See if the only way in which INSN uses REG is by calling it, or by
3431 setting it while calling it. Set *SET to a SET rtx if the register
3432 is set by INSN. */
3434 static int
3435 noncall_uses_reg (rtx reg, rtx insn, rtx *set)
3437 rtx pattern, reg2;
3439 *set = NULL_RTX;
3441 reg2 = sfunc_uses_reg (insn);
3442 if (reg2 && REGNO (reg2) == REGNO (reg))
3444 pattern = single_set (insn);
3445 if (pattern
3446 && GET_CODE (SET_DEST (pattern)) == REG
3447 && REGNO (reg) == REGNO (SET_DEST (pattern)))
3448 *set = pattern;
3449 return 0;
3451 if (GET_CODE (insn) != CALL_INSN)
3453 /* We don't use rtx_equal_p because we don't care if the mode is
3454 different. */
3455 pattern = single_set (insn);
3456 if (pattern
3457 && GET_CODE (SET_DEST (pattern)) == REG
3458 && REGNO (reg) == REGNO (SET_DEST (pattern)))
3460 rtx par, part;
3461 int i;
3463 *set = pattern;
3464 par = PATTERN (insn);
3465 if (GET_CODE (par) == PARALLEL)
3466 for (i = XVECLEN (par, 0) - 1; i >= 0; i--)
3468 part = XVECEXP (par, 0, i);
3469 if (GET_CODE (part) != SET && reg_mentioned_p (reg, part))
3470 return 1;
3472 return reg_mentioned_p (reg, SET_SRC (pattern));
3475 return 1;
3478 pattern = PATTERN (insn);
3480 if (GET_CODE (pattern) == PARALLEL)
3482 int i;
3484 for (i = XVECLEN (pattern, 0) - 1; i >= 1; i--)
3485 if (reg_mentioned_p (reg, XVECEXP (pattern, 0, i)))
3486 return 1;
3487 pattern = XVECEXP (pattern, 0, 0);
3490 if (GET_CODE (pattern) == SET)
3492 if (reg_mentioned_p (reg, SET_DEST (pattern)))
3494 /* We don't use rtx_equal_p, because we don't care if the
3495 mode is different. */
3496 if (GET_CODE (SET_DEST (pattern)) != REG
3497 || REGNO (reg) != REGNO (SET_DEST (pattern)))
3498 return 1;
3500 *set = pattern;
3503 pattern = SET_SRC (pattern);
3506 if (GET_CODE (pattern) != CALL
3507 || GET_CODE (XEXP (pattern, 0)) != MEM
3508 || ! rtx_equal_p (reg, XEXP (XEXP (pattern, 0), 0)))
3509 return 1;
3511 return 0;
3514 /* Given a X, a pattern of an insn or a part of it, return a mask of used
3515 general registers. Bits 0..15 mean that the respective registers
3516 are used as inputs in the instruction. Bits 16..31 mean that the
3517 registers 0..15, respectively, are used as outputs, or are clobbered.
3518 IS_DEST should be set to 16 if X is the destination of a SET, else to 0. */
3520 regs_used (rtx x, int is_dest)
3522 enum rtx_code code;
3523 const char *fmt;
3524 int i, used = 0;
3526 if (! x)
3527 return used;
3528 code = GET_CODE (x);
3529 switch (code)
3531 case REG:
3532 if (REGNO (x) < 16)
3533 return (((1 << HARD_REGNO_NREGS (0, GET_MODE (x))) - 1)
3534 << (REGNO (x) + is_dest));
3535 return 0;
3536 case SUBREG:
3538 rtx y = SUBREG_REG (x);
3540 if (GET_CODE (y) != REG)
3541 break;
3542 if (REGNO (y) < 16)
3543 return (((1 << HARD_REGNO_NREGS (0, GET_MODE (x))) - 1)
3544 << (REGNO (y) +
3545 subreg_regno_offset (REGNO (y),
3546 GET_MODE (y),
3547 SUBREG_BYTE (x),
3548 GET_MODE (x)) + is_dest));
3549 return 0;
3551 case SET:
3552 return regs_used (SET_SRC (x), 0) | regs_used (SET_DEST (x), 16);
3553 case RETURN:
3554 /* If there was a return value, it must have been indicated with USE. */
3555 return 0x00ffff00;
3556 case CLOBBER:
3557 is_dest = 1;
3558 break;
3559 case MEM:
3560 is_dest = 0;
3561 break;
3562 case CALL:
3563 used |= 0x00ff00f0;
3564 break;
3565 default:
3566 break;
3569 fmt = GET_RTX_FORMAT (code);
3571 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
3573 if (fmt[i] == 'E')
3575 register int j;
3576 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
3577 used |= regs_used (XVECEXP (x, i, j), is_dest);
3579 else if (fmt[i] == 'e')
3580 used |= regs_used (XEXP (x, i), is_dest);
3582 return used;
3585 /* Create an instruction that prevents redirection of a conditional branch
3586 to the destination of the JUMP with address ADDR.
3587 If the branch needs to be implemented as an indirect jump, try to find
3588 a scratch register for it.
3589 If NEED_BLOCK is 0, don't do anything unless we need a scratch register.
3590 If any preceding insn that doesn't fit into a delay slot is good enough,
3591 pass 1. Pass 2 if a definite blocking insn is needed.
3592 -1 is used internally to avoid deep recursion.
3593 If a blocking instruction is made or recognized, return it. */
3595 static rtx
3596 gen_block_redirect (rtx jump, int addr, int need_block)
3598 int dead = 0;
3599 rtx prev = prev_nonnote_insn (jump);
3600 rtx dest;
3602 /* First, check if we already have an instruction that satisfies our need. */
3603 if (prev && GET_CODE (prev) == INSN && ! INSN_DELETED_P (prev))
3605 if (INSN_CODE (prev) == CODE_FOR_indirect_jump_scratch)
3606 return prev;
3607 if (GET_CODE (PATTERN (prev)) == USE
3608 || GET_CODE (PATTERN (prev)) == CLOBBER
3609 || get_attr_in_delay_slot (prev) == IN_DELAY_SLOT_YES)
3610 prev = jump;
3611 else if ((need_block &= ~1) < 0)
3612 return prev;
3613 else if (recog_memoized (prev) == CODE_FOR_block_branch_redirect)
3614 need_block = 0;
3616 if (GET_CODE (PATTERN (jump)) == RETURN)
3618 if (! need_block)
3619 return prev;
3620 /* Reorg even does nasty things with return insns that cause branches
3621 to go out of range - see find_end_label and callers. */
3622 return emit_insn_before (gen_block_branch_redirect (const0_rtx) , jump);
3624 /* We can't use JUMP_LABEL here because it might be undefined
3625 when not optimizing. */
3626 dest = XEXP (SET_SRC (PATTERN (jump)), 0);
3627 /* If the branch is out of range, try to find a scratch register for it. */
3628 if (optimize
3629 && (INSN_ADDRESSES (INSN_UID (dest)) - addr + (unsigned) 4092
3630 > 4092 + 4098))
3632 rtx scan;
3633 /* Don't look for the stack pointer as a scratch register,
3634 it would cause trouble if an interrupt occurred. */
3635 unsigned try = 0x7fff, used;
3636 int jump_left = flag_expensive_optimizations + 1;
3638 /* It is likely that the most recent eligible instruction is wanted for
3639 the delay slot. Therefore, find out which registers it uses, and
3640 try to avoid using them. */
3642 for (scan = jump; (scan = PREV_INSN (scan)); )
3644 enum rtx_code code;
3646 if (INSN_DELETED_P (scan))
3647 continue;
3648 code = GET_CODE (scan);
3649 if (code == CODE_LABEL || code == JUMP_INSN)
3650 break;
3651 if (code == INSN
3652 && GET_CODE (PATTERN (scan)) != USE
3653 && GET_CODE (PATTERN (scan)) != CLOBBER
3654 && get_attr_in_delay_slot (scan) == IN_DELAY_SLOT_YES)
3656 try &= ~regs_used (PATTERN (scan), 0);
3657 break;
3660 for (used = dead = 0, scan = JUMP_LABEL (jump);
3661 (scan = NEXT_INSN (scan)); )
3663 enum rtx_code code;
3665 if (INSN_DELETED_P (scan))
3666 continue;
3667 code = GET_CODE (scan);
3668 if (INSN_P (scan))
3670 used |= regs_used (PATTERN (scan), 0);
3671 if (code == CALL_INSN)
3672 used |= regs_used (CALL_INSN_FUNCTION_USAGE (scan), 0);
3673 dead |= (used >> 16) & ~used;
3674 if (dead & try)
3676 dead &= try;
3677 break;
3679 if (code == JUMP_INSN)
3681 if (jump_left-- && simplejump_p (scan))
3682 scan = JUMP_LABEL (scan);
3683 else
3684 break;
3688 /* Mask out the stack pointer again, in case it was
3689 the only 'free' register we have found. */
3690 dead &= 0x7fff;
3692 /* If the immediate destination is still in range, check for possible
3693 threading with a jump beyond the delay slot insn.
3694 Don't check if we are called recursively; the jump has been or will be
3695 checked in a different invocation then. */
3697 else if (optimize && need_block >= 0)
3699 rtx next = next_active_insn (next_active_insn (dest));
3700 if (next && GET_CODE (next) == JUMP_INSN
3701 && GET_CODE (PATTERN (next)) == SET
3702 && recog_memoized (next) == CODE_FOR_jump_compact)
3704 dest = JUMP_LABEL (next);
3705 if (dest
3706 && (INSN_ADDRESSES (INSN_UID (dest)) - addr + (unsigned) 4092
3707 > 4092 + 4098))
3708 gen_block_redirect (next, INSN_ADDRESSES (INSN_UID (next)), -1);
3712 if (dead)
3714 rtx reg = gen_rtx_REG (SImode, exact_log2 (dead & -dead));
3716 /* It would be nice if we could convert the jump into an indirect
3717 jump / far branch right now, and thus exposing all constituent
3718 instructions to further optimization. However, reorg uses
3719 simplejump_p to determine if there is an unconditional jump where
3720 it should try to schedule instructions from the target of the
3721 branch; simplejump_p fails for indirect jumps even if they have
3722 a JUMP_LABEL. */
3723 rtx insn = emit_insn_before (gen_indirect_jump_scratch
3724 (reg, GEN_INT (INSN_UID (JUMP_LABEL (jump))))
3725 , jump);
3726 /* ??? We would like this to have the scope of the jump, but that
3727 scope will change when a delay slot insn of an inner scope is added.
3728 Hence, after delay slot scheduling, we'll have to expect
3729 NOTE_INSN_BLOCK_END notes between the indirect_jump_scratch and
3730 the jump. */
3732 INSN_LOCATOR (insn) = INSN_LOCATOR (jump);
3733 INSN_CODE (insn) = CODE_FOR_indirect_jump_scratch;
3734 return insn;
3736 else if (need_block)
3737 /* We can't use JUMP_LABEL here because it might be undefined
3738 when not optimizing. */
3739 return emit_insn_before (gen_block_branch_redirect
3740 (GEN_INT (INSN_UID (XEXP (SET_SRC (PATTERN (jump)), 0))))
3741 , jump);
3742 return prev;
3745 #define CONDJUMP_MIN -252
3746 #define CONDJUMP_MAX 262
3747 struct far_branch
3749 /* A label (to be placed) in front of the jump
3750 that jumps to our ultimate destination. */
3751 rtx near_label;
3752 /* Where we are going to insert it if we cannot move the jump any farther,
3753 or the jump itself if we have picked up an existing jump. */
3754 rtx insert_place;
3755 /* The ultimate destination. */
3756 rtx far_label;
3757 struct far_branch *prev;
3758 /* If the branch has already been created, its address;
3759 else the address of its first prospective user. */
3760 int address;
3763 static void gen_far_branch (struct far_branch *);
3764 enum mdep_reorg_phase_e mdep_reorg_phase;
3765 static void
3766 gen_far_branch (struct far_branch *bp)
3768 rtx insn = bp->insert_place;
3769 rtx jump;
3770 rtx label = gen_label_rtx ();
3772 emit_label_after (label, insn);
3773 if (bp->far_label)
3775 jump = emit_jump_insn_after (gen_jump (bp->far_label), insn);
3776 LABEL_NUSES (bp->far_label)++;
3778 else
3779 jump = emit_jump_insn_after (gen_return (), insn);
3780 /* Emit a barrier so that reorg knows that any following instructions
3781 are not reachable via a fall-through path.
3782 But don't do this when not optimizing, since we wouldn't suppress the
3783 alignment for the barrier then, and could end up with out-of-range
3784 pc-relative loads. */
3785 if (optimize)
3786 emit_barrier_after (jump);
3787 emit_label_after (bp->near_label, insn);
3788 JUMP_LABEL (jump) = bp->far_label;
3789 if (! invert_jump (insn, label, 1))
3790 abort ();
3791 /* If we are branching around a jump (rather than a return), prevent
3792 reorg from using an insn from the jump target as the delay slot insn -
3793 when reorg did this, it pessimized code (we rather hide the delay slot)
3794 and it could cause branches to go out of range. */
3795 if (bp->far_label)
3796 (emit_insn_after
3797 (gen_stuff_delay_slot
3798 (GEN_INT (INSN_UID (XEXP (SET_SRC (PATTERN (jump)), 0))),
3799 GEN_INT (recog_memoized (insn) == CODE_FOR_branch_false)),
3800 insn));
3801 /* Prevent reorg from undoing our splits. */
3802 gen_block_redirect (jump, bp->address += 2, 2);
3805 /* Fix up ADDR_DIFF_VECs. */
3806 void
3807 fixup_addr_diff_vecs (rtx first)
3809 rtx insn;
3811 for (insn = first; insn; insn = NEXT_INSN (insn))
3813 rtx vec_lab, pat, prev, prevpat, x, braf_label;
3815 if (GET_CODE (insn) != JUMP_INSN
3816 || GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
3817 continue;
3818 pat = PATTERN (insn);
3819 vec_lab = XEXP (XEXP (pat, 0), 0);
3821 /* Search the matching casesi_jump_2. */
3822 for (prev = vec_lab; ; prev = PREV_INSN (prev))
3824 if (GET_CODE (prev) != JUMP_INSN)
3825 continue;
3826 prevpat = PATTERN (prev);
3827 if (GET_CODE (prevpat) != PARALLEL || XVECLEN (prevpat, 0) != 2)
3828 continue;
3829 x = XVECEXP (prevpat, 0, 1);
3830 if (GET_CODE (x) != USE)
3831 continue;
3832 x = XEXP (x, 0);
3833 if (GET_CODE (x) == LABEL_REF && XEXP (x, 0) == vec_lab)
3834 break;
3836 /* FIXME: This is a bug in the optimizer, but it seems harmless
3837 to just avoid panicing. */
3838 if (!prev)
3839 continue;
3841 /* Emit the reference label of the braf where it belongs, right after
3842 the casesi_jump_2 (i.e. braf). */
3843 braf_label = XEXP (XEXP (SET_SRC (XVECEXP (prevpat, 0, 0)), 1), 0);
3844 emit_label_after (braf_label, prev);
3846 /* Fix up the ADDR_DIF_VEC to be relative
3847 to the reference address of the braf. */
3848 XEXP (XEXP (pat, 0), 0) = braf_label;
3852 /* BARRIER_OR_LABEL is either a BARRIER or a CODE_LABEL immediately following
3853 a barrier. Return the base 2 logarithm of the desired alignment. */
3855 barrier_align (rtx barrier_or_label)
3857 rtx next = next_real_insn (barrier_or_label), pat, prev;
3858 int slot, credit, jump_to_next = 0;
3860 if (! next)
3861 return 0;
3863 pat = PATTERN (next);
3865 if (GET_CODE (pat) == ADDR_DIFF_VEC)
3866 return 2;
3868 if (GET_CODE (pat) == UNSPEC_VOLATILE && XINT (pat, 1) == UNSPECV_ALIGN)
3869 /* This is a barrier in front of a constant table. */
3870 return 0;
3872 prev = prev_real_insn (barrier_or_label);
3873 if (GET_CODE (PATTERN (prev)) == ADDR_DIFF_VEC)
3875 pat = PATTERN (prev);
3876 /* If this is a very small table, we want to keep the alignment after
3877 the table to the minimum for proper code alignment. */
3878 return ((TARGET_SMALLCODE
3879 || ((unsigned) XVECLEN (pat, 1) * GET_MODE_SIZE (GET_MODE (pat))
3880 <= (unsigned) 1 << (CACHE_LOG - 2)))
3881 ? 1 << TARGET_SHMEDIA : align_jumps_log);
3884 if (TARGET_SMALLCODE)
3885 return 0;
3887 if (! TARGET_SH2 || ! optimize)
3888 return align_jumps_log;
3890 /* When fixing up pcloads, a constant table might be inserted just before
3891 the basic block that ends with the barrier. Thus, we can't trust the
3892 instruction lengths before that. */
3893 if (mdep_reorg_phase > SH_FIXUP_PCLOAD)
3895 /* Check if there is an immediately preceding branch to the insn beyond
3896 the barrier. We must weight the cost of discarding useful information
3897 from the current cache line when executing this branch and there is
3898 an alignment, against that of fetching unneeded insn in front of the
3899 branch target when there is no alignment. */
3901 /* There are two delay_slot cases to consider. One is the simple case
3902 where the preceding branch is to the insn beyond the barrier (simple
3903 delay slot filling), and the other is where the preceding branch has
3904 a delay slot that is a duplicate of the insn after the barrier
3905 (fill_eager_delay_slots) and the branch is to the insn after the insn
3906 after the barrier. */
3908 /* PREV is presumed to be the JUMP_INSN for the barrier under
3909 investigation. Skip to the insn before it. */
3910 prev = prev_real_insn (prev);
3912 for (slot = 2, credit = (1 << (CACHE_LOG - 2)) + 2;
3913 credit >= 0 && prev && GET_CODE (prev) == INSN;
3914 prev = prev_real_insn (prev))
3916 jump_to_next = 0;
3917 if (GET_CODE (PATTERN (prev)) == USE
3918 || GET_CODE (PATTERN (prev)) == CLOBBER)
3919 continue;
3920 if (GET_CODE (PATTERN (prev)) == SEQUENCE)
3922 prev = XVECEXP (PATTERN (prev), 0, 1);
3923 if (INSN_UID (prev) == INSN_UID (next))
3925 /* Delay slot was filled with insn at jump target. */
3926 jump_to_next = 1;
3927 continue;
3931 if (slot &&
3932 get_attr_in_delay_slot (prev) == IN_DELAY_SLOT_YES)
3933 slot = 0;
3934 credit -= get_attr_length (prev);
3936 if (prev
3937 && GET_CODE (prev) == JUMP_INSN
3938 && JUMP_LABEL (prev))
3940 rtx x;
3941 if (jump_to_next
3942 || next_real_insn (JUMP_LABEL (prev)) == next
3943 /* If relax_delay_slots() decides NEXT was redundant
3944 with some previous instruction, it will have
3945 redirected PREV's jump to the following insn. */
3946 || JUMP_LABEL (prev) == next_nonnote_insn (next)
3947 /* There is no upper bound on redundant instructions
3948 that might have been skipped, but we must not put an
3949 alignment where none had been before. */
3950 || (x = (NEXT_INSN (NEXT_INSN (PREV_INSN (prev)))),
3951 (INSN_P (x)
3952 && (INSN_CODE (x) == CODE_FOR_block_branch_redirect
3953 || INSN_CODE (x) == CODE_FOR_indirect_jump_scratch
3954 || INSN_CODE (x) == CODE_FOR_stuff_delay_slot))))
3956 rtx pat = PATTERN (prev);
3957 if (GET_CODE (pat) == PARALLEL)
3958 pat = XVECEXP (pat, 0, 0);
3959 if (credit - slot >= (GET_CODE (SET_SRC (pat)) == PC ? 2 : 0))
3960 return 0;
3965 return align_jumps_log;
3968 /* If we are inside a phony loop, almost any kind of label can turn up as the
3969 first one in the loop. Aligning a braf label causes incorrect switch
3970 destination addresses; we can detect braf labels because they are
3971 followed by a BARRIER.
3972 Applying loop alignment to small constant or switch tables is a waste
3973 of space, so we suppress this too. */
3975 sh_loop_align (rtx label)
3977 rtx next = label;
3980 next = next_nonnote_insn (next);
3981 while (next && GET_CODE (next) == CODE_LABEL);
3983 if (! next
3984 || ! INSN_P (next)
3985 || GET_CODE (PATTERN (next)) == ADDR_DIFF_VEC
3986 || recog_memoized (next) == CODE_FOR_consttable_2)
3987 return 0;
3989 return align_loops_log;
3992 /* Do a final pass over the function, just before delayed branch
3993 scheduling. */
3995 static void
3996 sh_reorg (void)
3998 rtx first, insn, mova = NULL_RTX;
3999 int num_mova;
4000 rtx r0_rtx = gen_rtx_REG (Pmode, 0);
4001 rtx r0_inc_rtx = gen_rtx_POST_INC (Pmode, r0_rtx);
4003 first = get_insns ();
4005 /* We must split call insns before introducing `mova's. If we're
4006 optimizing, they'll have already been split. Otherwise, make
4007 sure we don't split them too late. */
4008 if (! optimize)
4009 split_all_insns_noflow ();
4011 if (TARGET_SHMEDIA)
4012 return;
4014 /* If relaxing, generate pseudo-ops to associate function calls with
4015 the symbols they call. It does no harm to not generate these
4016 pseudo-ops. However, when we can generate them, it enables to
4017 linker to potentially relax the jsr to a bsr, and eliminate the
4018 register load and, possibly, the constant pool entry. */
4020 mdep_reorg_phase = SH_INSERT_USES_LABELS;
4021 if (TARGET_RELAX)
4023 /* Remove all REG_LABEL notes. We want to use them for our own
4024 purposes. This works because none of the remaining passes
4025 need to look at them.
4027 ??? But it may break in the future. We should use a machine
4028 dependent REG_NOTE, or some other approach entirely. */
4029 for (insn = first; insn; insn = NEXT_INSN (insn))
4031 if (INSN_P (insn))
4033 rtx note;
4035 while ((note = find_reg_note (insn, REG_LABEL, NULL_RTX)) != 0)
4036 remove_note (insn, note);
4040 for (insn = first; insn; insn = NEXT_INSN (insn))
4042 rtx pattern, reg, link, set, scan, dies, label;
4043 int rescan = 0, foundinsn = 0;
4045 if (GET_CODE (insn) == CALL_INSN)
4047 pattern = PATTERN (insn);
4049 if (GET_CODE (pattern) == PARALLEL)
4050 pattern = XVECEXP (pattern, 0, 0);
4051 if (GET_CODE (pattern) == SET)
4052 pattern = SET_SRC (pattern);
4054 if (GET_CODE (pattern) != CALL
4055 || GET_CODE (XEXP (pattern, 0)) != MEM)
4056 continue;
4058 reg = XEXP (XEXP (pattern, 0), 0);
4060 else
4062 reg = sfunc_uses_reg (insn);
4063 if (! reg)
4064 continue;
4067 if (GET_CODE (reg) != REG)
4068 continue;
4070 /* This is a function call via REG. If the only uses of REG
4071 between the time that it is set and the time that it dies
4072 are in function calls, then we can associate all the
4073 function calls with the setting of REG. */
4075 for (link = LOG_LINKS (insn); link; link = XEXP (link, 1))
4077 if (REG_NOTE_KIND (link) != 0)
4078 continue;
4079 set = single_set (XEXP (link, 0));
4080 if (set && rtx_equal_p (reg, SET_DEST (set)))
4082 link = XEXP (link, 0);
4083 break;
4087 if (! link)
4089 /* ??? Sometimes global register allocation will have
4090 deleted the insn pointed to by LOG_LINKS. Try
4091 scanning backward to find where the register is set. */
4092 for (scan = PREV_INSN (insn);
4093 scan && GET_CODE (scan) != CODE_LABEL;
4094 scan = PREV_INSN (scan))
4096 if (! INSN_P (scan))
4097 continue;
4099 if (! reg_mentioned_p (reg, scan))
4100 continue;
4102 if (noncall_uses_reg (reg, scan, &set))
4103 break;
4105 if (set)
4107 link = scan;
4108 break;
4113 if (! link)
4114 continue;
4116 /* The register is set at LINK. */
4118 /* We can only optimize the function call if the register is
4119 being set to a symbol. In theory, we could sometimes
4120 optimize calls to a constant location, but the assembler
4121 and linker do not support that at present. */
4122 if (GET_CODE (SET_SRC (set)) != SYMBOL_REF
4123 && GET_CODE (SET_SRC (set)) != LABEL_REF)
4124 continue;
4126 /* Scan forward from LINK to the place where REG dies, and
4127 make sure that the only insns which use REG are
4128 themselves function calls. */
4130 /* ??? This doesn't work for call targets that were allocated
4131 by reload, since there may not be a REG_DEAD note for the
4132 register. */
4134 dies = NULL_RTX;
4135 for (scan = NEXT_INSN (link); scan; scan = NEXT_INSN (scan))
4137 rtx scanset;
4139 /* Don't try to trace forward past a CODE_LABEL if we haven't
4140 seen INSN yet. Ordinarily, we will only find the setting insn
4141 in LOG_LINKS if it is in the same basic block. However,
4142 cross-jumping can insert code labels in between the load and
4143 the call, and can result in situations where a single call
4144 insn may have two targets depending on where we came from. */
4146 if (GET_CODE (scan) == CODE_LABEL && ! foundinsn)
4147 break;
4149 if (! INSN_P (scan))
4150 continue;
4152 /* Don't try to trace forward past a JUMP. To optimize
4153 safely, we would have to check that all the
4154 instructions at the jump destination did not use REG. */
4156 if (GET_CODE (scan) == JUMP_INSN)
4157 break;
4159 if (! reg_mentioned_p (reg, scan))
4160 continue;
4162 if (noncall_uses_reg (reg, scan, &scanset))
4163 break;
4165 if (scan == insn)
4166 foundinsn = 1;
4168 if (scan != insn
4169 && (GET_CODE (scan) == CALL_INSN || sfunc_uses_reg (scan)))
4171 /* There is a function call to this register other
4172 than the one we are checking. If we optimize
4173 this call, we need to rescan again below. */
4174 rescan = 1;
4177 /* ??? We shouldn't have to worry about SCANSET here.
4178 We should just be able to check for a REG_DEAD note
4179 on a function call. However, the REG_DEAD notes are
4180 apparently not dependable around libcalls; c-torture
4181 execute/920501-2 is a test case. If SCANSET is set,
4182 then this insn sets the register, so it must have
4183 died earlier. Unfortunately, this will only handle
4184 the cases in which the register is, in fact, set in a
4185 later insn. */
4187 /* ??? We shouldn't have to use FOUNDINSN here.
4188 However, the LOG_LINKS fields are apparently not
4189 entirely reliable around libcalls;
4190 newlib/libm/math/e_pow.c is a test case. Sometimes
4191 an insn will appear in LOG_LINKS even though it is
4192 not the most recent insn which sets the register. */
4194 if (foundinsn
4195 && (scanset
4196 || find_reg_note (scan, REG_DEAD, reg)))
4198 dies = scan;
4199 break;
4203 if (! dies)
4205 /* Either there was a branch, or some insn used REG
4206 other than as a function call address. */
4207 continue;
4210 /* Create a code label, and put it in a REG_LABEL note on
4211 the insn which sets the register, and on each call insn
4212 which uses the register. In final_prescan_insn we look
4213 for the REG_LABEL notes, and output the appropriate label
4214 or pseudo-op. */
4216 label = gen_label_rtx ();
4217 REG_NOTES (link) = gen_rtx_INSN_LIST (REG_LABEL, label,
4218 REG_NOTES (link));
4219 REG_NOTES (insn) = gen_rtx_INSN_LIST (REG_LABEL, label,
4220 REG_NOTES (insn));
4221 if (rescan)
4223 scan = link;
4226 rtx reg2;
4228 scan = NEXT_INSN (scan);
4229 if (scan != insn
4230 && ((GET_CODE (scan) == CALL_INSN
4231 && reg_mentioned_p (reg, scan))
4232 || ((reg2 = sfunc_uses_reg (scan))
4233 && REGNO (reg2) == REGNO (reg))))
4234 REG_NOTES (scan)
4235 = gen_rtx_INSN_LIST (REG_LABEL, label, REG_NOTES (scan));
4237 while (scan != dies);
4242 if (TARGET_SH2)
4243 fixup_addr_diff_vecs (first);
4245 if (optimize)
4247 mdep_reorg_phase = SH_SHORTEN_BRANCHES0;
4248 shorten_branches (first);
4250 /* Scan the function looking for move instructions which have to be
4251 changed to pc-relative loads and insert the literal tables. */
4253 mdep_reorg_phase = SH_FIXUP_PCLOAD;
4254 for (insn = first, num_mova = 0; insn; insn = NEXT_INSN (insn))
4256 if (mova_p (insn))
4258 /* ??? basic block reordering can move a switch table dispatch
4259 below the switch table. Check if that has happened.
4260 We only have the addresses available when optimizing; but then,
4261 this check shouldn't be needed when not optimizing. */
4262 rtx label_ref = XVECEXP (SET_SRC (PATTERN (insn)), 0, 0);
4263 if (optimize
4264 && (INSN_ADDRESSES (INSN_UID (insn))
4265 > INSN_ADDRESSES (INSN_UID (XEXP (label_ref, 0)))))
4267 /* Change the mova into a load.
4268 broken_move will then return true for it. */
4269 fixup_mova (insn);
4271 else if (! num_mova++)
4272 mova = insn;
4274 else if (GET_CODE (insn) == JUMP_INSN
4275 && GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC
4276 && num_mova)
4278 rtx scan;
4279 int total;
4281 num_mova--;
4283 /* Some code might have been inserted between the mova and
4284 its ADDR_DIFF_VEC. Check if the mova is still in range. */
4285 for (scan = mova, total = 0; scan != insn; scan = NEXT_INSN (scan))
4286 total += get_attr_length (scan);
4288 /* range of mova is 1020, add 4 because pc counts from address of
4289 second instruction after this one, subtract 2 in case pc is 2
4290 byte aligned. Possible alignment needed for the ADDR_DIFF_VEC
4291 cancels out with alignment effects of the mova itself. */
4292 if (total > 1022)
4294 /* Change the mova into a load, and restart scanning
4295 there. broken_move will then return true for mova. */
4296 fixup_mova (mova);
4297 insn = mova;
4300 if (broken_move (insn)
4301 || (GET_CODE (insn) == INSN
4302 && recog_memoized (insn) == CODE_FOR_casesi_worker_2))
4304 rtx scan;
4305 /* Scan ahead looking for a barrier to stick the constant table
4306 behind. */
4307 rtx barrier = find_barrier (num_mova, mova, insn);
4308 rtx last_float_move = NULL_RTX, last_float = 0, *last_float_addr = NULL;
4309 int need_aligned_label = 0;
4311 if (num_mova && ! mova_p (mova))
4313 /* find_barrier had to change the first mova into a
4314 pcload; thus, we have to start with this new pcload. */
4315 insn = mova;
4316 num_mova = 0;
4318 /* Now find all the moves between the points and modify them. */
4319 for (scan = insn; scan != barrier; scan = NEXT_INSN (scan))
4321 if (GET_CODE (scan) == CODE_LABEL)
4322 last_float = 0;
4323 if (GET_CODE (scan) == INSN
4324 && recog_memoized (scan) == CODE_FOR_casesi_worker_2)
4325 need_aligned_label = 1;
4326 if (broken_move (scan))
4328 rtx *patp = &PATTERN (scan), pat = *patp;
4329 rtx src, dst;
4330 rtx lab;
4331 rtx newsrc;
4332 enum machine_mode mode;
4334 if (GET_CODE (pat) == PARALLEL)
4335 patp = &XVECEXP (pat, 0, 0), pat = *patp;
4336 src = SET_SRC (pat);
4337 dst = SET_DEST (pat);
4338 mode = GET_MODE (dst);
4340 if (mode == SImode && hi_const (src)
4341 && REGNO (dst) != FPUL_REG)
4343 int offset = 0;
4345 mode = HImode;
4346 while (GET_CODE (dst) == SUBREG)
4348 offset += subreg_regno_offset (REGNO (SUBREG_REG (dst)),
4349 GET_MODE (SUBREG_REG (dst)),
4350 SUBREG_BYTE (dst),
4351 GET_MODE (dst));
4352 dst = SUBREG_REG (dst);
4354 dst = gen_rtx_REG (HImode, REGNO (dst) + offset);
4356 if (GET_CODE (dst) == REG && FP_ANY_REGISTER_P (REGNO (dst)))
4358 /* This must be an insn that clobbers r0. */
4359 rtx *clobberp = &XVECEXP (PATTERN (scan), 0,
4360 XVECLEN (PATTERN (scan), 0)
4361 - 1);
4362 rtx clobber = *clobberp;
4364 if (GET_CODE (clobber) != CLOBBER
4365 || ! rtx_equal_p (XEXP (clobber, 0), r0_rtx))
4366 abort ();
4368 if (last_float
4369 && reg_set_between_p (r0_rtx, last_float_move, scan))
4370 last_float = 0;
4371 if (last_float
4372 && TARGET_SHCOMPACT
4373 && GET_MODE_SIZE (mode) != 4
4374 && GET_MODE_SIZE (GET_MODE (last_float)) == 4)
4375 last_float = 0;
4376 lab = add_constant (src, mode, last_float);
4377 if (lab)
4378 emit_insn_before (gen_mova (lab), scan);
4379 else
4381 /* There will be a REG_UNUSED note for r0 on
4382 LAST_FLOAT_MOVE; we have to change it to REG_INC,
4383 lest reorg:mark_target_live_regs will not
4384 consider r0 to be used, and we end up with delay
4385 slot insn in front of SCAN that clobbers r0. */
4386 rtx note
4387 = find_regno_note (last_float_move, REG_UNUSED, 0);
4389 /* If we are not optimizing, then there may not be
4390 a note. */
4391 if (note)
4392 PUT_MODE (note, REG_INC);
4394 *last_float_addr = r0_inc_rtx;
4396 last_float_move = scan;
4397 last_float = src;
4398 newsrc = gen_rtx_MEM (mode,
4399 (((TARGET_SH4 && ! TARGET_FMOVD)
4400 || REGNO (dst) == FPUL_REG)
4401 ? r0_inc_rtx
4402 : r0_rtx));
4403 last_float_addr = &XEXP (newsrc, 0);
4405 /* Remove the clobber of r0. */
4406 *clobberp = gen_rtx_CLOBBER (GET_MODE (clobber),
4407 gen_rtx_SCRATCH (Pmode));
4409 /* This is a mova needing a label. Create it. */
4410 else if (GET_CODE (src) == UNSPEC
4411 && XINT (src, 1) == UNSPEC_MOVA
4412 && GET_CODE (XVECEXP (src, 0, 0)) == CONST)
4414 lab = add_constant (XVECEXP (src, 0, 0), mode, 0);
4415 newsrc = gen_rtx_LABEL_REF (VOIDmode, lab);
4416 newsrc = gen_rtx_UNSPEC (SImode,
4417 gen_rtvec (1, newsrc),
4418 UNSPEC_MOVA);
4420 else
4422 lab = add_constant (src, mode, 0);
4423 newsrc = gen_rtx_LABEL_REF (VOIDmode, lab);
4424 newsrc = gen_const_mem (mode, newsrc);
4426 *patp = gen_rtx_SET (VOIDmode, dst, newsrc);
4427 INSN_CODE (scan) = -1;
4430 dump_table (need_aligned_label ? insn : 0, barrier);
4431 insn = barrier;
4435 mdep_reorg_phase = SH_SHORTEN_BRANCHES1;
4436 INSN_ADDRESSES_FREE ();
4437 split_branches (first);
4439 /* The INSN_REFERENCES_ARE_DELAYED in sh.h is problematic because it
4440 also has an effect on the register that holds the address of the sfunc.
4441 Insert an extra dummy insn in front of each sfunc that pretends to
4442 use this register. */
4443 if (flag_delayed_branch)
4445 for (insn = first; insn; insn = NEXT_INSN (insn))
4447 rtx reg = sfunc_uses_reg (insn);
4449 if (! reg)
4450 continue;
4451 emit_insn_before (gen_use_sfunc_addr (reg), insn);
4454 #if 0
4455 /* fpscr is not actually a user variable, but we pretend it is for the
4456 sake of the previous optimization passes, since we want it handled like
4457 one. However, we don't have any debugging information for it, so turn
4458 it into a non-user variable now. */
4459 if (TARGET_SH4)
4460 REG_USERVAR_P (get_fpscr_rtx ()) = 0;
4461 #endif
4462 mdep_reorg_phase = SH_AFTER_MDEP_REORG;
4466 get_dest_uid (rtx label, int max_uid)
4468 rtx dest = next_real_insn (label);
4469 int dest_uid;
4470 if (! dest)
4471 /* This can happen for an undefined label. */
4472 return 0;
4473 dest_uid = INSN_UID (dest);
4474 /* If this is a newly created branch redirection blocking instruction,
4475 we cannot index the branch_uid or insn_addresses arrays with its
4476 uid. But then, we won't need to, because the actual destination is
4477 the following branch. */
4478 while (dest_uid >= max_uid)
4480 dest = NEXT_INSN (dest);
4481 dest_uid = INSN_UID (dest);
4483 if (GET_CODE (dest) == JUMP_INSN && GET_CODE (PATTERN (dest)) == RETURN)
4484 return 0;
4485 return dest_uid;
4488 /* Split condbranches that are out of range. Also add clobbers for
4489 scratch registers that are needed in far jumps.
4490 We do this before delay slot scheduling, so that it can take our
4491 newly created instructions into account. It also allows us to
4492 find branches with common targets more easily. */
4494 static void
4495 split_branches (rtx first)
4497 rtx insn;
4498 struct far_branch **uid_branch, *far_branch_list = 0;
4499 int max_uid = get_max_uid ();
4501 /* Find out which branches are out of range. */
4502 shorten_branches (first);
4504 uid_branch = (struct far_branch **) alloca (max_uid * sizeof *uid_branch);
4505 memset ((char *) uid_branch, 0, max_uid * sizeof *uid_branch);
4507 for (insn = first; insn; insn = NEXT_INSN (insn))
4508 if (! INSN_P (insn))
4509 continue;
4510 else if (INSN_DELETED_P (insn))
4512 /* Shorten_branches would split this instruction again,
4513 so transform it into a note. */
4514 PUT_CODE (insn, NOTE);
4515 NOTE_LINE_NUMBER (insn) = NOTE_INSN_DELETED;
4516 NOTE_SOURCE_FILE (insn) = 0;
4518 else if (GET_CODE (insn) == JUMP_INSN
4519 /* Don't mess with ADDR_DIFF_VEC */
4520 && (GET_CODE (PATTERN (insn)) == SET
4521 || GET_CODE (PATTERN (insn)) == RETURN))
4523 enum attr_type type = get_attr_type (insn);
4524 if (type == TYPE_CBRANCH)
4526 rtx next, beyond;
4528 if (get_attr_length (insn) > 4)
4530 rtx src = SET_SRC (PATTERN (insn));
4531 rtx olabel = XEXP (XEXP (src, 1), 0);
4532 int addr = INSN_ADDRESSES (INSN_UID (insn));
4533 rtx label = 0;
4534 int dest_uid = get_dest_uid (olabel, max_uid);
4535 struct far_branch *bp = uid_branch[dest_uid];
4537 /* redirect_jump needs a valid JUMP_LABEL, and it might delete
4538 the label if the LABEL_NUSES count drops to zero. There is
4539 always a jump_optimize pass that sets these values, but it
4540 proceeds to delete unreferenced code, and then if not
4541 optimizing, to un-delete the deleted instructions, thus
4542 leaving labels with too low uses counts. */
4543 if (! optimize)
4545 JUMP_LABEL (insn) = olabel;
4546 LABEL_NUSES (olabel)++;
4548 if (! bp)
4550 bp = (struct far_branch *) alloca (sizeof *bp);
4551 uid_branch[dest_uid] = bp;
4552 bp->prev = far_branch_list;
4553 far_branch_list = bp;
4554 bp->far_label
4555 = XEXP (XEXP (SET_SRC (PATTERN (insn)), 1), 0);
4556 LABEL_NUSES (bp->far_label)++;
4558 else
4560 label = bp->near_label;
4561 if (! label && bp->address - addr >= CONDJUMP_MIN)
4563 rtx block = bp->insert_place;
4565 if (GET_CODE (PATTERN (block)) == RETURN)
4566 block = PREV_INSN (block);
4567 else
4568 block = gen_block_redirect (block,
4569 bp->address, 2);
4570 label = emit_label_after (gen_label_rtx (),
4571 PREV_INSN (block));
4572 bp->near_label = label;
4574 else if (label && ! NEXT_INSN (label))
4576 if (addr + 2 - bp->address <= CONDJUMP_MAX)
4577 bp->insert_place = insn;
4578 else
4579 gen_far_branch (bp);
4582 if (! label
4583 || (NEXT_INSN (label) && bp->address - addr < CONDJUMP_MIN))
4585 bp->near_label = label = gen_label_rtx ();
4586 bp->insert_place = insn;
4587 bp->address = addr;
4589 if (! redirect_jump (insn, label, 1))
4590 abort ();
4592 else
4594 /* get_attr_length (insn) == 2 */
4595 /* Check if we have a pattern where reorg wants to redirect
4596 the branch to a label from an unconditional branch that
4597 is too far away. */
4598 /* We can't use JUMP_LABEL here because it might be undefined
4599 when not optimizing. */
4600 /* A syntax error might cause beyond to be NULL_RTX. */
4601 beyond
4602 = next_active_insn (XEXP (XEXP (SET_SRC (PATTERN (insn)), 1),
4603 0));
4605 if (beyond
4606 && (GET_CODE (beyond) == JUMP_INSN
4607 || ((beyond = next_active_insn (beyond))
4608 && GET_CODE (beyond) == JUMP_INSN))
4609 && GET_CODE (PATTERN (beyond)) == SET
4610 && recog_memoized (beyond) == CODE_FOR_jump_compact
4611 && ((INSN_ADDRESSES
4612 (INSN_UID (XEXP (SET_SRC (PATTERN (beyond)), 0)))
4613 - INSN_ADDRESSES (INSN_UID (insn)) + (unsigned) 252)
4614 > 252 + 258 + 2))
4615 gen_block_redirect (beyond,
4616 INSN_ADDRESSES (INSN_UID (beyond)), 1);
4619 next = next_active_insn (insn);
4621 if ((GET_CODE (next) == JUMP_INSN
4622 || ((next = next_active_insn (next))
4623 && GET_CODE (next) == JUMP_INSN))
4624 && GET_CODE (PATTERN (next)) == SET
4625 && recog_memoized (next) == CODE_FOR_jump_compact
4626 && ((INSN_ADDRESSES
4627 (INSN_UID (XEXP (SET_SRC (PATTERN (next)), 0)))
4628 - INSN_ADDRESSES (INSN_UID (insn)) + (unsigned) 252)
4629 > 252 + 258 + 2))
4630 gen_block_redirect (next, INSN_ADDRESSES (INSN_UID (next)), 1);
4632 else if (type == TYPE_JUMP || type == TYPE_RETURN)
4634 int addr = INSN_ADDRESSES (INSN_UID (insn));
4635 rtx far_label = 0;
4636 int dest_uid = 0;
4637 struct far_branch *bp;
4639 if (type == TYPE_JUMP)
4641 far_label = XEXP (SET_SRC (PATTERN (insn)), 0);
4642 dest_uid = get_dest_uid (far_label, max_uid);
4643 if (! dest_uid)
4645 /* Parse errors can lead to labels outside
4646 the insn stream. */
4647 if (! NEXT_INSN (far_label))
4648 continue;
4650 if (! optimize)
4652 JUMP_LABEL (insn) = far_label;
4653 LABEL_NUSES (far_label)++;
4655 redirect_jump (insn, NULL_RTX, 1);
4656 far_label = 0;
4659 bp = uid_branch[dest_uid];
4660 if (! bp)
4662 bp = (struct far_branch *) alloca (sizeof *bp);
4663 uid_branch[dest_uid] = bp;
4664 bp->prev = far_branch_list;
4665 far_branch_list = bp;
4666 bp->near_label = 0;
4667 bp->far_label = far_label;
4668 if (far_label)
4669 LABEL_NUSES (far_label)++;
4671 else if (bp->near_label && ! NEXT_INSN (bp->near_label))
4672 if (addr - bp->address <= CONDJUMP_MAX)
4673 emit_label_after (bp->near_label, PREV_INSN (insn));
4674 else
4676 gen_far_branch (bp);
4677 bp->near_label = 0;
4679 else
4680 bp->near_label = 0;
4681 bp->address = addr;
4682 bp->insert_place = insn;
4683 if (! far_label)
4684 emit_insn_before (gen_block_branch_redirect (const0_rtx), insn);
4685 else
4686 gen_block_redirect (insn, addr, bp->near_label ? 2 : 0);
4689 /* Generate all pending far branches,
4690 and free our references to the far labels. */
4691 while (far_branch_list)
4693 if (far_branch_list->near_label
4694 && ! NEXT_INSN (far_branch_list->near_label))
4695 gen_far_branch (far_branch_list);
4696 if (optimize
4697 && far_branch_list->far_label
4698 && ! --LABEL_NUSES (far_branch_list->far_label))
4699 delete_insn (far_branch_list->far_label);
4700 far_branch_list = far_branch_list->prev;
4703 /* Instruction length information is no longer valid due to the new
4704 instructions that have been generated. */
4705 init_insn_lengths ();
4708 /* Dump out instruction addresses, which is useful for debugging the
4709 constant pool table stuff.
4711 If relaxing, output the label and pseudo-ops used to link together
4712 calls and the instruction which set the registers. */
4714 /* ??? The addresses printed by this routine for insns are nonsense for
4715 insns which are inside of a sequence where none of the inner insns have
4716 variable length. This is because the second pass of shorten_branches
4717 does not bother to update them. */
4719 void
4720 final_prescan_insn (rtx insn, rtx *opvec ATTRIBUTE_UNUSED,
4721 int noperands ATTRIBUTE_UNUSED)
4723 if (TARGET_DUMPISIZE)
4724 fprintf (asm_out_file, "\n! at %04x\n", INSN_ADDRESSES (INSN_UID (insn)));
4726 if (TARGET_RELAX)
4728 rtx note;
4730 note = find_reg_note (insn, REG_LABEL, NULL_RTX);
4731 if (note)
4733 rtx pattern;
4735 pattern = PATTERN (insn);
4736 if (GET_CODE (pattern) == PARALLEL)
4737 pattern = XVECEXP (pattern, 0, 0);
4738 if (GET_CODE (pattern) == CALL
4739 || (GET_CODE (pattern) == SET
4740 && (GET_CODE (SET_SRC (pattern)) == CALL
4741 || get_attr_type (insn) == TYPE_SFUNC)))
4742 asm_fprintf (asm_out_file, "\t.uses %LL%d\n",
4743 CODE_LABEL_NUMBER (XEXP (note, 0)));
4744 else if (GET_CODE (pattern) == SET)
4745 (*targetm.asm_out.internal_label) (asm_out_file, "L",
4746 CODE_LABEL_NUMBER (XEXP (note, 0)));
4747 else
4748 abort ();
4753 /* Dump out any constants accumulated in the final pass. These will
4754 only be labels. */
4756 const char *
4757 output_jump_label_table (void)
4759 int i;
4761 if (pool_size)
4763 fprintf (asm_out_file, "\t.align 2\n");
4764 for (i = 0; i < pool_size; i++)
4766 pool_node *p = &pool_vector[i];
4768 (*targetm.asm_out.internal_label) (asm_out_file, "L",
4769 CODE_LABEL_NUMBER (p->label));
4770 output_asm_insn (".long %O0", &p->value);
4772 pool_size = 0;
4775 return "";
4778 /* A full frame looks like:
4780 arg-5
4781 arg-4
4782 [ if current_function_anonymous_args
4783 arg-3
4784 arg-2
4785 arg-1
4786 arg-0 ]
4787 saved-fp
4788 saved-r10
4789 saved-r11
4790 saved-r12
4791 saved-pr
4792 local-n
4794 local-1
4795 local-0 <- fp points here. */
4797 /* Number of bytes pushed for anonymous args, used to pass information
4798 between expand_prologue and expand_epilogue. */
4800 /* Adjust the stack by SIZE bytes. REG holds the rtl of the register to be
4801 adjusted. If epilogue_p is zero, this is for a prologue; otherwise, it's
4802 for an epilogue and a negative value means that it's for a sibcall
4803 epilogue. If LIVE_REGS_MASK is nonzero, it points to a HARD_REG_SET of
4804 all the registers that are about to be restored, and hence dead. */
4806 static void
4807 output_stack_adjust (int size, rtx reg, int epilogue_p,
4808 HARD_REG_SET *live_regs_mask)
4810 rtx (*emit_fn) (rtx) = epilogue_p ? &emit_insn : &frame_insn;
4811 if (size)
4813 HOST_WIDE_INT align = STACK_BOUNDARY / BITS_PER_UNIT;
4815 /* This test is bogus, as output_stack_adjust is used to re-align the
4816 stack. */
4817 #if 0
4818 if (size % align)
4819 abort ();
4820 #endif
4822 if (CONST_OK_FOR_ADD (size))
4823 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size)));
4824 /* Try to do it with two partial adjustments; however, we must make
4825 sure that the stack is properly aligned at all times, in case
4826 an interrupt occurs between the two partial adjustments. */
4827 else if (CONST_OK_FOR_ADD (size / 2 & -align)
4828 && CONST_OK_FOR_ADD (size - (size / 2 & -align)))
4830 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size / 2 & -align)));
4831 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size - (size / 2 & -align))));
4833 else
4835 rtx const_reg;
4836 rtx insn;
4837 int temp = epilogue_p ? 7 : (TARGET_SH5 ? 0 : 1);
4838 int i;
4840 /* If TEMP is invalid, we could temporarily save a general
4841 register to MACL. However, there is currently no need
4842 to handle this case, so just abort when we see it. */
4843 if (epilogue_p < 0
4844 || current_function_interrupt
4845 || ! call_really_used_regs[temp] || fixed_regs[temp])
4846 temp = -1;
4847 if (temp < 0 && ! current_function_interrupt
4848 && (TARGET_SHMEDIA || epilogue_p >= 0))
4850 HARD_REG_SET temps;
4851 COPY_HARD_REG_SET (temps, call_used_reg_set);
4852 AND_COMPL_HARD_REG_SET (temps, call_fixed_reg_set);
4853 if (epilogue_p > 0)
4855 int nreg = 0;
4856 if (current_function_return_rtx)
4858 enum machine_mode mode;
4859 mode = GET_MODE (current_function_return_rtx);
4860 if (BASE_RETURN_VALUE_REG (mode) == FIRST_RET_REG)
4861 nreg = HARD_REGNO_NREGS (FIRST_RET_REG, mode);
4863 for (i = 0; i < nreg; i++)
4864 CLEAR_HARD_REG_BIT (temps, FIRST_RET_REG + i);
4865 if (current_function_calls_eh_return)
4867 CLEAR_HARD_REG_BIT (temps, EH_RETURN_STACKADJ_REGNO);
4868 for (i = 0; i <= 3; i++)
4869 CLEAR_HARD_REG_BIT (temps, EH_RETURN_DATA_REGNO (i));
4872 if (TARGET_SHMEDIA && epilogue_p < 0)
4873 for (i = FIRST_TARGET_REG; i <= LAST_TARGET_REG; i++)
4874 CLEAR_HARD_REG_BIT (temps, i);
4875 if (epilogue_p <= 0)
4877 for (i = FIRST_PARM_REG;
4878 i < FIRST_PARM_REG + NPARM_REGS (SImode); i++)
4879 CLEAR_HARD_REG_BIT (temps, i);
4880 if (cfun->static_chain_decl != NULL)
4881 CLEAR_HARD_REG_BIT (temps, STATIC_CHAIN_REGNUM);
4883 temp = scavenge_reg (&temps);
4885 if (temp < 0 && live_regs_mask)
4886 temp = scavenge_reg (live_regs_mask);
4887 if (temp < 0)
4889 /* If we reached here, the most likely case is the (sibcall)
4890 epilogue for non SHmedia. Put a special push/pop sequence
4891 for such case as the last resort. This looks lengthy but
4892 would not be problem because it seems to be very rare. */
4893 if (! TARGET_SHMEDIA && epilogue_p)
4895 rtx adj_reg, tmp_reg, mem;
4897 /* ??? There is still the slight possibility that r4 or r5
4898 have been reserved as fixed registers or assigned as
4899 global registers, and they change during an interrupt.
4900 There are possible ways to handle this:
4901 - If we are adjusting the frame pointer (r14), we can do
4902 with a single temp register and an ordinary push / pop
4903 on the stack.
4904 - Grab any call-used or call-saved registers (i.e. not
4905 fixed or globals) for the temps we need. We might
4906 also grab r14 if we are adjusting the stack pointer.
4907 If we can't find enough available registers, issue
4908 a diagnostic and abort - the user must have reserved
4909 way too many registers.
4910 But since all this is rather unlikely to happen and
4911 would require extra testing, we just abort if r4 / r5
4912 are not available. */
4913 if (fixed_regs[4] || fixed_regs[5]
4914 || global_regs[4] || global_regs[5])
4915 abort ();
4917 adj_reg = gen_rtx_REG (GET_MODE (reg), 4);
4918 tmp_reg = gen_rtx_REG (GET_MODE (reg), 5);
4919 emit_move_insn (gen_rtx_MEM (Pmode, reg), adj_reg);
4920 emit_insn (GEN_MOV (adj_reg, GEN_INT (size)));
4921 emit_insn (GEN_ADD3 (adj_reg, adj_reg, reg));
4922 mem = gen_rtx_MEM (Pmode, gen_rtx_PRE_DEC (Pmode, adj_reg));
4923 emit_move_insn (mem, tmp_reg);
4924 emit_move_insn (tmp_reg, gen_rtx_MEM (Pmode, reg));
4925 mem = gen_rtx_MEM (Pmode, gen_rtx_PRE_DEC (Pmode, adj_reg));
4926 emit_move_insn (mem, tmp_reg);
4927 emit_move_insn (reg, adj_reg);
4928 mem = gen_rtx_MEM (Pmode, gen_rtx_POST_INC (Pmode, reg));
4929 emit_move_insn (adj_reg, mem);
4930 mem = gen_rtx_MEM (Pmode, gen_rtx_POST_INC (Pmode, reg));
4931 emit_move_insn (tmp_reg, mem);
4932 return;
4934 else
4935 abort ();
4937 const_reg = gen_rtx_REG (GET_MODE (reg), temp);
4939 /* If SIZE is negative, subtract the positive value.
4940 This sometimes allows a constant pool entry to be shared
4941 between prologue and epilogue code. */
4942 if (size < 0)
4944 emit_insn (GEN_MOV (const_reg, GEN_INT (-size)));
4945 insn = emit_fn (GEN_SUB3 (reg, reg, const_reg));
4947 else
4949 emit_insn (GEN_MOV (const_reg, GEN_INT (size)));
4950 insn = emit_fn (GEN_ADD3 (reg, reg, const_reg));
4952 if (! epilogue_p)
4953 REG_NOTES (insn)
4954 = (gen_rtx_EXPR_LIST
4955 (REG_FRAME_RELATED_EXPR,
4956 gen_rtx_SET (VOIDmode, reg,
4957 gen_rtx_PLUS (SImode, reg, GEN_INT (size))),
4958 REG_NOTES (insn)));
4963 static rtx
4964 frame_insn (rtx x)
4966 x = emit_insn (x);
4967 RTX_FRAME_RELATED_P (x) = 1;
4968 return x;
4971 /* Output RTL to push register RN onto the stack. */
4973 static rtx
4974 push (int rn)
4976 rtx x;
4977 if (rn == FPUL_REG)
4978 x = gen_push_fpul ();
4979 else if (rn == FPSCR_REG)
4980 x = gen_push_fpscr ();
4981 else if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && ! TARGET_FPU_SINGLE
4982 && FP_OR_XD_REGISTER_P (rn))
4984 if (FP_REGISTER_P (rn) && (rn - FIRST_FP_REG) & 1)
4985 return NULL_RTX;
4986 x = gen_push_4 (gen_rtx_REG (DFmode, rn));
4988 else if (TARGET_SH2E && FP_REGISTER_P (rn))
4989 x = gen_push_e (gen_rtx_REG (SFmode, rn));
4990 else
4991 x = gen_push (gen_rtx_REG (SImode, rn));
4993 x = frame_insn (x);
4994 REG_NOTES (x)
4995 = gen_rtx_EXPR_LIST (REG_INC,
4996 gen_rtx_REG (SImode, STACK_POINTER_REGNUM), 0);
4997 return x;
5000 /* Output RTL to pop register RN from the stack. */
5002 static void
5003 pop (int rn)
5005 rtx x;
5006 if (rn == FPUL_REG)
5007 x = gen_pop_fpul ();
5008 else if (rn == FPSCR_REG)
5009 x = gen_pop_fpscr ();
5010 else if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && ! TARGET_FPU_SINGLE
5011 && FP_OR_XD_REGISTER_P (rn))
5013 if (FP_REGISTER_P (rn) && (rn - FIRST_FP_REG) & 1)
5014 return;
5015 x = gen_pop_4 (gen_rtx_REG (DFmode, rn));
5017 else if (TARGET_SH2E && FP_REGISTER_P (rn))
5018 x = gen_pop_e (gen_rtx_REG (SFmode, rn));
5019 else
5020 x = gen_pop (gen_rtx_REG (SImode, rn));
5022 x = emit_insn (x);
5023 REG_NOTES (x)
5024 = gen_rtx_EXPR_LIST (REG_INC,
5025 gen_rtx_REG (SImode, STACK_POINTER_REGNUM), 0);
5028 /* Generate code to push the regs specified in the mask. */
5030 static void
5031 push_regs (HARD_REG_SET *mask, int interrupt_handler)
5033 int i;
5034 int skip_fpscr = 0;
5036 /* Push PR last; this gives better latencies after the prologue, and
5037 candidates for the return delay slot when there are no general
5038 registers pushed. */
5039 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
5041 /* If this is an interrupt handler, and the SZ bit varies,
5042 and we have to push any floating point register, we need
5043 to switch to the correct precision first. */
5044 if (i == FIRST_FP_REG && interrupt_handler && TARGET_FMOVD
5045 && hard_regs_intersect_p (mask, &reg_class_contents[DF_REGS]))
5047 HARD_REG_SET unsaved;
5049 push (FPSCR_REG);
5050 COMPL_HARD_REG_SET (unsaved, *mask);
5051 fpscr_set_from_mem (NORMAL_MODE (FP_MODE), unsaved);
5052 skip_fpscr = 1;
5054 if (i != PR_REG
5055 && (i != FPSCR_REG || ! skip_fpscr)
5056 && TEST_HARD_REG_BIT (*mask, i))
5057 push (i);
5059 if (TEST_HARD_REG_BIT (*mask, PR_REG))
5060 push (PR_REG);
5063 /* Calculate how much extra space is needed to save all callee-saved
5064 target registers.
5065 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
5067 static int
5068 shmedia_target_regs_stack_space (HARD_REG_SET *live_regs_mask)
5070 int reg;
5071 int stack_space = 0;
5072 int interrupt_handler = sh_cfun_interrupt_handler_p ();
5074 for (reg = LAST_TARGET_REG; reg >= FIRST_TARGET_REG; reg--)
5075 if ((! call_really_used_regs[reg] || interrupt_handler)
5076 && ! TEST_HARD_REG_BIT (*live_regs_mask, reg))
5077 /* Leave space to save this target register on the stack,
5078 in case target register allocation wants to use it. */
5079 stack_space += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
5080 return stack_space;
5083 /* Decide whether we should reserve space for callee-save target registers,
5084 in case target register allocation wants to use them. REGS_SAVED is
5085 the space, in bytes, that is already required for register saves.
5086 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
5088 static int
5089 shmedia_reserve_space_for_target_registers_p (int regs_saved,
5090 HARD_REG_SET *live_regs_mask)
5092 if (optimize_size)
5093 return 0;
5094 return shmedia_target_regs_stack_space (live_regs_mask) <= regs_saved;
5097 /* Decide how much space to reserve for callee-save target registers
5098 in case target register allocation wants to use them.
5099 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
5101 static int
5102 shmedia_target_regs_stack_adjust (HARD_REG_SET *live_regs_mask)
5104 if (shmedia_space_reserved_for_target_registers)
5105 return shmedia_target_regs_stack_space (live_regs_mask);
5106 else
5107 return 0;
5110 /* Work out the registers which need to be saved, both as a mask and a
5111 count of saved words. Return the count.
5113 If doing a pragma interrupt function, then push all regs used by the
5114 function, and if we call another function (we can tell by looking at PR),
5115 make sure that all the regs it clobbers are safe too. */
5117 static int
5118 calc_live_regs (HARD_REG_SET *live_regs_mask)
5120 unsigned int reg;
5121 int count;
5122 int interrupt_handler;
5123 int pr_live, has_call;
5125 interrupt_handler = sh_cfun_interrupt_handler_p ();
5127 CLEAR_HARD_REG_SET (*live_regs_mask);
5128 if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && interrupt_handler
5129 && regs_ever_live[FPSCR_REG])
5130 target_flags &= ~FPU_SINGLE_BIT;
5131 /* If we can save a lot of saves by switching to double mode, do that. */
5132 else if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && TARGET_FPU_SINGLE)
5133 for (count = 0, reg = FIRST_FP_REG; reg <= LAST_FP_REG; reg += 2)
5134 if (regs_ever_live[reg] && regs_ever_live[reg+1]
5135 && (! call_really_used_regs[reg]
5136 || (interrupt_handler && ! pragma_trapa))
5137 && ++count > 2)
5139 target_flags &= ~FPU_SINGLE_BIT;
5140 break;
5142 /* PR_MEDIA_REG is a general purpose register, thus global_alloc already
5143 knows how to use it. That means the pseudo originally allocated for
5144 the initial value can become the PR_MEDIA_REG hard register, as seen for
5145 execute/20010122-1.c:test9. */
5146 if (TARGET_SHMEDIA)
5147 /* ??? this function is called from initial_elimination_offset, hence we
5148 can't use the result of sh_media_register_for_return here. */
5149 pr_live = sh_pr_n_sets ();
5150 else
5152 rtx pr_initial = has_hard_reg_initial_val (Pmode, PR_REG);
5153 pr_live = (pr_initial
5154 ? (GET_CODE (pr_initial) != REG
5155 || REGNO (pr_initial) != (PR_REG))
5156 : regs_ever_live[PR_REG]);
5157 /* For Shcompact, if not optimizing, we end up with a memory reference
5158 using the return address pointer for __builtin_return_address even
5159 though there is no actual need to put the PR register on the stack. */
5160 pr_live |= regs_ever_live[RETURN_ADDRESS_POINTER_REGNUM];
5162 /* Force PR to be live if the prologue has to call the SHmedia
5163 argument decoder or register saver. */
5164 if (TARGET_SHCOMPACT
5165 && ((current_function_args_info.call_cookie
5166 & ~ CALL_COOKIE_RET_TRAMP (1))
5167 || current_function_has_nonlocal_label))
5168 pr_live = 1;
5169 has_call = TARGET_SHMEDIA ? ! leaf_function_p () : pr_live;
5170 for (count = 0, reg = FIRST_PSEUDO_REGISTER; reg-- != 0; )
5172 if (reg == (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG)
5173 ? pr_live
5174 : (interrupt_handler && ! pragma_trapa)
5175 ? (/* Need to save all the regs ever live. */
5176 (regs_ever_live[reg]
5177 || (call_really_used_regs[reg]
5178 && (! fixed_regs[reg] || reg == MACH_REG || reg == MACL_REG
5179 || reg == PIC_OFFSET_TABLE_REGNUM)
5180 && has_call)
5181 || (has_call && REGISTER_NATURAL_MODE (reg) == SImode
5182 && (GENERAL_REGISTER_P (reg) || TARGET_REGISTER_P (reg))))
5183 && reg != STACK_POINTER_REGNUM && reg != ARG_POINTER_REGNUM
5184 && reg != RETURN_ADDRESS_POINTER_REGNUM
5185 && reg != T_REG && reg != GBR_REG
5186 /* Push fpscr only on targets which have FPU */
5187 && (reg != FPSCR_REG || TARGET_FPU_ANY))
5188 : (/* Only push those regs which are used and need to be saved. */
5189 (TARGET_SHCOMPACT
5190 && flag_pic
5191 && current_function_args_info.call_cookie
5192 && reg == PIC_OFFSET_TABLE_REGNUM)
5193 || (regs_ever_live[reg] && ! call_really_used_regs[reg])
5194 || (current_function_calls_eh_return
5195 && (reg == EH_RETURN_DATA_REGNO (0)
5196 || reg == EH_RETURN_DATA_REGNO (1)
5197 || reg == EH_RETURN_DATA_REGNO (2)
5198 || reg == EH_RETURN_DATA_REGNO (3)))
5199 || ((reg == MACL_REG || reg == MACH_REG)
5200 && regs_ever_live[reg]
5201 && sh_cfun_attr_renesas_p ())
5204 SET_HARD_REG_BIT (*live_regs_mask, reg);
5205 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
5207 if ((TARGET_SH4 || TARGET_SH2A_DOUBLE || TARGET_SH5) && TARGET_FMOVD
5208 && GET_MODE_CLASS (REGISTER_NATURAL_MODE (reg)) == MODE_FLOAT)
5210 if (FP_REGISTER_P (reg))
5212 if (! TARGET_FPU_SINGLE && ! regs_ever_live[reg ^ 1])
5214 SET_HARD_REG_BIT (*live_regs_mask, (reg ^ 1));
5215 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg ^ 1));
5218 else if (XD_REGISTER_P (reg))
5220 /* Must switch to double mode to access these registers. */
5221 target_flags &= ~FPU_SINGLE_BIT;
5226 /* If we have a target register optimization pass after prologue / epilogue
5227 threading, we need to assume all target registers will be live even if
5228 they aren't now. */
5229 if (flag_branch_target_load_optimize2
5230 && TARGET_SAVE_ALL_TARGET_REGS
5231 && shmedia_space_reserved_for_target_registers)
5232 for (reg = LAST_TARGET_REG; reg >= FIRST_TARGET_REG; reg--)
5233 if ((! call_really_used_regs[reg] || interrupt_handler)
5234 && ! TEST_HARD_REG_BIT (*live_regs_mask, reg))
5236 SET_HARD_REG_BIT (*live_regs_mask, reg);
5237 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
5239 /* If this is an interrupt handler, we don't have any call-clobbered
5240 registers we can conveniently use for target register save/restore.
5241 Make sure we save at least one general purpose register when we need
5242 to save target registers. */
5243 if (interrupt_handler
5244 && hard_regs_intersect_p (live_regs_mask,
5245 &reg_class_contents[TARGET_REGS])
5246 && ! hard_regs_intersect_p (live_regs_mask,
5247 &reg_class_contents[GENERAL_REGS]))
5249 SET_HARD_REG_BIT (*live_regs_mask, R0_REG);
5250 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (R0_REG));
5253 return count;
5256 /* Code to generate prologue and epilogue sequences */
5258 /* PUSHED is the number of bytes that are being pushed on the
5259 stack for register saves. Return the frame size, padded
5260 appropriately so that the stack stays properly aligned. */
5261 static HOST_WIDE_INT
5262 rounded_frame_size (int pushed)
5264 HOST_WIDE_INT size = get_frame_size ();
5265 HOST_WIDE_INT align = STACK_BOUNDARY / BITS_PER_UNIT;
5267 return ((size + pushed + align - 1) & -align) - pushed;
5270 /* Choose a call-clobbered target-branch register that remains
5271 unchanged along the whole function. We set it up as the return
5272 value in the prologue. */
5274 sh_media_register_for_return (void)
5276 int regno;
5277 int tr0_used;
5279 if (! current_function_is_leaf)
5280 return -1;
5281 if (lookup_attribute ("interrupt_handler",
5282 DECL_ATTRIBUTES (current_function_decl)))
5283 return -1;
5285 tr0_used = flag_pic && regs_ever_live[PIC_OFFSET_TABLE_REGNUM];
5287 for (regno = FIRST_TARGET_REG + tr0_used; regno <= LAST_TARGET_REG; regno++)
5288 if (call_really_used_regs[regno] && ! regs_ever_live[regno])
5289 return regno;
5291 return -1;
5294 /* The maximum registers we need to save are:
5295 - 62 general purpose registers (r15 is stack pointer, r63 is zero)
5296 - 32 floating point registers (for each pair, we save none,
5297 one single precision value, or a double precision value).
5298 - 8 target registers
5299 - add 1 entry for a delimiter. */
5300 #define MAX_SAVED_REGS (62+32+8)
5302 typedef struct save_entry_s
5304 unsigned char reg;
5305 unsigned char mode;
5306 short offset;
5307 } save_entry;
5309 #define MAX_TEMPS 4
5311 /* There will be a delimiter entry with VOIDmode both at the start and the
5312 end of a filled in schedule. The end delimiter has the offset of the
5313 save with the smallest (i.e. most negative) offset. */
5314 typedef struct save_schedule_s
5316 save_entry entries[MAX_SAVED_REGS + 2];
5317 int temps[MAX_TEMPS+1];
5318 } save_schedule;
5320 /* Fill in SCHEDULE according to LIVE_REGS_MASK. If RESTORE is nonzero,
5321 use reverse order. Returns the last entry written to (not counting
5322 the delimiter). OFFSET_BASE is a number to be added to all offset
5323 entries. */
5325 static save_entry *
5326 sh5_schedule_saves (HARD_REG_SET *live_regs_mask, save_schedule *schedule,
5327 int offset_base)
5329 int align, i;
5330 save_entry *entry = schedule->entries;
5331 int tmpx = 0;
5332 int offset;
5334 if (! current_function_interrupt)
5335 for (i = FIRST_GENERAL_REG; tmpx < MAX_TEMPS && i <= LAST_GENERAL_REG; i++)
5336 if (call_really_used_regs[i] && ! fixed_regs[i] && i != PR_MEDIA_REG
5337 && ! FUNCTION_ARG_REGNO_P (i)
5338 && i != FIRST_RET_REG
5339 && ! (cfun->static_chain_decl != NULL && i == STATIC_CHAIN_REGNUM)
5340 && ! (current_function_calls_eh_return
5341 && (i == EH_RETURN_STACKADJ_REGNO
5342 || ((unsigned) i >= EH_RETURN_DATA_REGNO (0)
5343 && (unsigned) i <= EH_RETURN_DATA_REGNO (3)))))
5344 schedule->temps[tmpx++] = i;
5345 entry->reg = -1;
5346 entry->mode = VOIDmode;
5347 entry->offset = offset_base;
5348 entry++;
5349 /* We loop twice: first, we save 8-byte aligned registers in the
5350 higher addresses, that are known to be aligned. Then, we
5351 proceed to saving 32-bit registers that don't need 8-byte
5352 alignment.
5353 If this is an interrupt function, all registers that need saving
5354 need to be saved in full. moreover, we need to postpone saving
5355 target registers till we have saved some general purpose registers
5356 we can then use as scratch registers. */
5357 offset = offset_base;
5358 for (align = 1; align >= 0; align--)
5360 for (i = FIRST_PSEUDO_REGISTER - 1; i >= 0; i--)
5361 if (TEST_HARD_REG_BIT (*live_regs_mask, i))
5363 enum machine_mode mode = REGISTER_NATURAL_MODE (i);
5364 int reg = i;
5366 if (current_function_interrupt)
5368 if (TARGET_REGISTER_P (i))
5369 continue;
5370 if (GENERAL_REGISTER_P (i))
5371 mode = DImode;
5373 if (mode == SFmode && (i % 2) == 1
5374 && ! TARGET_FPU_SINGLE && FP_REGISTER_P (i)
5375 && (TEST_HARD_REG_BIT (*live_regs_mask, (i ^ 1))))
5377 mode = DFmode;
5378 i--;
5379 reg--;
5382 /* If we're doing the aligned pass and this is not aligned,
5383 or we're doing the unaligned pass and this is aligned,
5384 skip it. */
5385 if ((GET_MODE_SIZE (mode) % (STACK_BOUNDARY / BITS_PER_UNIT) == 0)
5386 != align)
5387 continue;
5389 if (current_function_interrupt
5390 && GENERAL_REGISTER_P (i)
5391 && tmpx < MAX_TEMPS)
5392 schedule->temps[tmpx++] = i;
5394 offset -= GET_MODE_SIZE (mode);
5395 entry->reg = i;
5396 entry->mode = mode;
5397 entry->offset = offset;
5398 entry++;
5400 if (align && current_function_interrupt)
5401 for (i = LAST_TARGET_REG; i >= FIRST_TARGET_REG; i--)
5402 if (TEST_HARD_REG_BIT (*live_regs_mask, i))
5404 offset -= GET_MODE_SIZE (DImode);
5405 entry->reg = i;
5406 entry->mode = DImode;
5407 entry->offset = offset;
5408 entry++;
5411 entry->reg = -1;
5412 entry->mode = VOIDmode;
5413 entry->offset = offset;
5414 schedule->temps[tmpx] = -1;
5415 return entry - 1;
5418 void
5419 sh_expand_prologue (void)
5421 HARD_REG_SET live_regs_mask;
5422 int d, i;
5423 int d_rounding = 0;
5424 int save_flags = target_flags;
5425 int pretend_args;
5427 current_function_interrupt = sh_cfun_interrupt_handler_p ();
5429 /* We have pretend args if we had an object sent partially in registers
5430 and partially on the stack, e.g. a large structure. */
5431 pretend_args = current_function_pretend_args_size;
5432 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl)
5433 && (NPARM_REGS(SImode)
5434 > current_function_args_info.arg_count[(int) SH_ARG_INT]))
5435 pretend_args = 0;
5436 output_stack_adjust (-pretend_args
5437 - current_function_args_info.stack_regs * 8,
5438 stack_pointer_rtx, 0, NULL);
5440 if (TARGET_SHCOMPACT && flag_pic && current_function_args_info.call_cookie)
5441 /* We're going to use the PIC register to load the address of the
5442 incoming-argument decoder and/or of the return trampoline from
5443 the GOT, so make sure the PIC register is preserved and
5444 initialized. */
5445 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
5447 if (TARGET_SHCOMPACT
5448 && (current_function_args_info.call_cookie & ~ CALL_COOKIE_RET_TRAMP(1)))
5450 int reg;
5452 /* First, make all registers with incoming arguments that will
5453 be pushed onto the stack live, so that register renaming
5454 doesn't overwrite them. */
5455 for (reg = 0; reg < NPARM_REGS (SImode); reg++)
5456 if (CALL_COOKIE_STACKSEQ_GET (current_function_args_info.call_cookie)
5457 >= NPARM_REGS (SImode) - reg)
5458 for (; reg < NPARM_REGS (SImode); reg++)
5459 emit_insn (gen_shcompact_preserve_incoming_args
5460 (gen_rtx_REG (SImode, FIRST_PARM_REG + reg)));
5461 else if (CALL_COOKIE_INT_REG_GET
5462 (current_function_args_info.call_cookie, reg) == 1)
5463 emit_insn (gen_shcompact_preserve_incoming_args
5464 (gen_rtx_REG (SImode, FIRST_PARM_REG + reg)));
5466 emit_move_insn (gen_rtx_REG (Pmode, MACL_REG),
5467 stack_pointer_rtx);
5468 emit_move_insn (gen_rtx_REG (SImode, R0_REG),
5469 GEN_INT (current_function_args_info.call_cookie));
5470 emit_move_insn (gen_rtx_REG (SImode, MACH_REG),
5471 gen_rtx_REG (SImode, R0_REG));
5473 else if (TARGET_SHMEDIA)
5475 int tr = sh_media_register_for_return ();
5477 if (tr >= 0)
5479 rtx insn = emit_move_insn (gen_rtx_REG (DImode, tr),
5480 gen_rtx_REG (DImode, PR_MEDIA_REG));
5482 /* ??? We should suppress saving pr when we don't need it, but this
5483 is tricky because of builtin_return_address. */
5485 /* If this function only exits with sibcalls, this copy
5486 will be flagged as dead. */
5487 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD,
5488 const0_rtx,
5489 REG_NOTES (insn));
5493 /* Emit the code for SETUP_VARARGS. */
5494 if (current_function_stdarg)
5496 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl))
5498 /* Push arg regs as if they'd been provided by caller in stack. */
5499 for (i = 0; i < NPARM_REGS(SImode); i++)
5501 int rn = NPARM_REGS(SImode) + FIRST_PARM_REG - i - 1;
5502 rtx insn;
5504 if (i >= (NPARM_REGS(SImode)
5505 - current_function_args_info.arg_count[(int) SH_ARG_INT]
5507 break;
5508 insn = push (rn);
5509 RTX_FRAME_RELATED_P (insn) = 0;
5514 /* If we're supposed to switch stacks at function entry, do so now. */
5515 if (sp_switch)
5516 emit_insn (gen_sp_switch_1 ());
5518 d = calc_live_regs (&live_regs_mask);
5519 /* ??? Maybe we could save some switching if we can move a mode switch
5520 that already happens to be at the function start into the prologue. */
5521 if (target_flags != save_flags && ! current_function_interrupt)
5522 emit_insn (gen_toggle_sz ());
5524 if (TARGET_SH5)
5526 int offset_base, offset;
5527 rtx r0 = NULL_RTX;
5528 int offset_in_r0 = -1;
5529 int sp_in_r0 = 0;
5530 int tregs_space = shmedia_target_regs_stack_adjust (&live_regs_mask);
5531 int total_size, save_size;
5532 save_schedule schedule;
5533 save_entry *entry;
5534 int *tmp_pnt;
5536 if (call_really_used_regs[R0_REG] && ! fixed_regs[R0_REG]
5537 && ! current_function_interrupt)
5538 r0 = gen_rtx_REG (Pmode, R0_REG);
5540 /* D is the actual number of bytes that we need for saving registers,
5541 however, in initial_elimination_offset we have committed to using
5542 an additional TREGS_SPACE amount of bytes - in order to keep both
5543 addresses to arguments supplied by the caller and local variables
5544 valid, we must keep this gap. Place it between the incoming
5545 arguments and the actually saved registers in a bid to optimize
5546 locality of reference. */
5547 total_size = d + tregs_space;
5548 total_size += rounded_frame_size (total_size);
5549 save_size = total_size - rounded_frame_size (d);
5550 if (save_size % (STACK_BOUNDARY / BITS_PER_UNIT))
5551 d_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
5552 - save_size % (STACK_BOUNDARY / BITS_PER_UNIT));
5554 /* If adjusting the stack in a single step costs nothing extra, do so.
5555 I.e. either if a single addi is enough, or we need a movi anyway,
5556 and we don't exceed the maximum offset range (the test for the
5557 latter is conservative for simplicity). */
5558 if (TARGET_SHMEDIA
5559 && (CONST_OK_FOR_I10 (-total_size)
5560 || (! CONST_OK_FOR_I10 (-(save_size + d_rounding))
5561 && total_size <= 2044)))
5562 d_rounding = total_size - save_size;
5564 offset_base = d + d_rounding;
5566 output_stack_adjust (-(save_size + d_rounding), stack_pointer_rtx,
5567 0, NULL);
5569 sh5_schedule_saves (&live_regs_mask, &schedule, offset_base);
5570 tmp_pnt = schedule.temps;
5571 for (entry = &schedule.entries[1]; entry->mode != VOIDmode; entry++)
5573 enum machine_mode mode = entry->mode;
5574 unsigned int reg = entry->reg;
5575 rtx reg_rtx, mem_rtx, pre_dec = NULL_RTX;
5576 rtx orig_reg_rtx;
5578 offset = entry->offset;
5580 reg_rtx = gen_rtx_REG (mode, reg);
5582 mem_rtx = gen_rtx_MEM (mode,
5583 gen_rtx_PLUS (Pmode,
5584 stack_pointer_rtx,
5585 GEN_INT (offset)));
5587 GO_IF_LEGITIMATE_ADDRESS (mode, XEXP (mem_rtx, 0), try_pre_dec);
5589 if (! r0)
5590 abort ();
5591 mem_rtx = NULL_RTX;
5593 try_pre_dec:
5595 if (HAVE_PRE_DECREMENT
5596 && (offset_in_r0 - offset == GET_MODE_SIZE (mode)
5597 || mem_rtx == NULL_RTX
5598 || reg == PR_REG || SPECIAL_REGISTER_P (reg)))
5600 pre_dec = gen_rtx_MEM (mode,
5601 gen_rtx_PRE_DEC (Pmode, r0));
5603 GO_IF_LEGITIMATE_ADDRESS (mode, XEXP (pre_dec, 0),
5604 pre_dec_ok);
5606 pre_dec = NULL_RTX;
5608 break;
5610 pre_dec_ok:
5611 mem_rtx = NULL_RTX;
5612 offset += GET_MODE_SIZE (mode);
5614 while (0);
5616 if (mem_rtx != NULL_RTX)
5617 goto addr_ok;
5619 if (offset_in_r0 == -1)
5621 emit_move_insn (r0, GEN_INT (offset));
5622 offset_in_r0 = offset;
5624 else if (offset != offset_in_r0)
5626 emit_move_insn (r0,
5627 gen_rtx_PLUS
5628 (Pmode, r0,
5629 GEN_INT (offset - offset_in_r0)));
5630 offset_in_r0 += offset - offset_in_r0;
5633 if (pre_dec != NULL_RTX)
5635 if (! sp_in_r0)
5637 emit_move_insn (r0,
5638 gen_rtx_PLUS
5639 (Pmode, r0, stack_pointer_rtx));
5640 sp_in_r0 = 1;
5643 offset -= GET_MODE_SIZE (mode);
5644 offset_in_r0 -= GET_MODE_SIZE (mode);
5646 mem_rtx = pre_dec;
5648 else if (sp_in_r0)
5649 mem_rtx = gen_rtx_MEM (mode, r0);
5650 else
5651 mem_rtx = gen_rtx_MEM (mode,
5652 gen_rtx_PLUS (Pmode,
5653 stack_pointer_rtx,
5654 r0));
5656 /* We must not use an r0-based address for target-branch
5657 registers or for special registers without pre-dec
5658 memory addresses, since we store their values in r0
5659 first. */
5660 if (TARGET_REGISTER_P (reg)
5661 || ((reg == PR_REG || SPECIAL_REGISTER_P (reg))
5662 && mem_rtx != pre_dec))
5663 abort ();
5665 addr_ok:
5666 orig_reg_rtx = reg_rtx;
5667 if (TARGET_REGISTER_P (reg)
5668 || ((reg == PR_REG || SPECIAL_REGISTER_P (reg))
5669 && mem_rtx != pre_dec))
5671 rtx tmp_reg = gen_rtx_REG (GET_MODE (reg_rtx), *tmp_pnt);
5673 emit_move_insn (tmp_reg, reg_rtx);
5675 if (REGNO (tmp_reg) == R0_REG)
5677 offset_in_r0 = -1;
5678 sp_in_r0 = 0;
5679 if (refers_to_regno_p (R0_REG, R0_REG+1, mem_rtx, (rtx *) 0))
5680 abort ();
5683 if (*++tmp_pnt <= 0)
5684 tmp_pnt = schedule.temps;
5686 reg_rtx = tmp_reg;
5689 rtx insn;
5691 /* Mark as interesting for dwarf cfi generator */
5692 insn = emit_move_insn (mem_rtx, reg_rtx);
5693 RTX_FRAME_RELATED_P (insn) = 1;
5694 /* If we use an intermediate register for the save, we can't
5695 describe this exactly in cfi as a copy of the to-be-saved
5696 register into the temporary register and then the temporary
5697 register on the stack, because the temporary register can
5698 have a different natural size than the to-be-saved register.
5699 Thus, we gloss over the intermediate copy and pretend we do
5700 a direct save from the to-be-saved register. */
5701 if (REGNO (reg_rtx) != reg)
5703 rtx set, note_rtx;
5705 set = gen_rtx_SET (VOIDmode, mem_rtx, orig_reg_rtx);
5706 note_rtx = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, set,
5707 REG_NOTES (insn));
5708 REG_NOTES (insn) = note_rtx;
5711 if (TARGET_SHCOMPACT && (offset_in_r0 != -1))
5713 rtx reg_rtx = gen_rtx_REG (mode, reg);
5714 rtx set, note_rtx;
5715 rtx mem_rtx = gen_rtx_MEM (mode,
5716 gen_rtx_PLUS (Pmode,
5717 stack_pointer_rtx,
5718 GEN_INT (offset)));
5720 set = gen_rtx_SET (VOIDmode, mem_rtx, reg_rtx);
5721 note_rtx = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, set,
5722 REG_NOTES (insn));
5723 REG_NOTES (insn) = note_rtx;
5728 if (entry->offset != d_rounding)
5729 abort ();
5731 else
5732 push_regs (&live_regs_mask, current_function_interrupt);
5734 if (flag_pic && regs_ever_live[PIC_OFFSET_TABLE_REGNUM])
5736 rtx insn = get_last_insn ();
5737 rtx last = emit_insn (gen_GOTaddr2picreg ());
5739 /* Mark these insns as possibly dead. Sometimes, flow2 may
5740 delete all uses of the PIC register. In this case, let it
5741 delete the initialization too. */
5744 insn = NEXT_INSN (insn);
5746 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD,
5747 const0_rtx,
5748 REG_NOTES (insn));
5750 while (insn != last);
5753 if (SHMEDIA_REGS_STACK_ADJUST ())
5755 emit_move_insn (gen_rtx_REG (Pmode, R0_REG),
5756 function_symbol (TARGET_FPU_ANY
5757 ? "__GCC_push_shmedia_regs"
5758 : "__GCC_push_shmedia_regs_nofpu"));
5759 /* This must NOT go through the PLT, otherwise mach and macl
5760 may be clobbered. */
5761 emit_insn (gen_shmedia_save_restore_regs_compact
5762 (GEN_INT (-SHMEDIA_REGS_STACK_ADJUST ())));
5765 if (target_flags != save_flags && ! current_function_interrupt)
5767 rtx insn = emit_insn (gen_toggle_sz ());
5769 /* If we're lucky, a mode switch in the function body will
5770 overwrite fpscr, turning this insn dead. Tell flow this
5771 insn is ok to delete. */
5772 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD,
5773 const0_rtx,
5774 REG_NOTES (insn));
5777 target_flags = save_flags;
5779 output_stack_adjust (-rounded_frame_size (d) + d_rounding,
5780 stack_pointer_rtx, 0, NULL);
5782 if (frame_pointer_needed)
5783 frame_insn (GEN_MOV (frame_pointer_rtx, stack_pointer_rtx));
5785 if (TARGET_SHCOMPACT
5786 && (current_function_args_info.call_cookie & ~ CALL_COOKIE_RET_TRAMP(1)))
5788 /* This must NOT go through the PLT, otherwise mach and macl
5789 may be clobbered. */
5790 emit_move_insn (gen_rtx_REG (Pmode, R0_REG),
5791 function_symbol ("__GCC_shcompact_incoming_args"));
5792 emit_insn (gen_shcompact_incoming_args ());
5796 void
5797 sh_expand_epilogue (bool sibcall_p)
5799 HARD_REG_SET live_regs_mask;
5800 int d, i;
5801 int d_rounding = 0;
5803 int save_flags = target_flags;
5804 int frame_size, save_size;
5805 int fpscr_deferred = 0;
5806 int e = sibcall_p ? -1 : 1;
5808 d = calc_live_regs (&live_regs_mask);
5810 save_size = d;
5811 frame_size = rounded_frame_size (d);
5813 if (TARGET_SH5)
5815 int tregs_space = shmedia_target_regs_stack_adjust (&live_regs_mask);
5816 int total_size;
5817 if (d % (STACK_BOUNDARY / BITS_PER_UNIT))
5818 d_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
5819 - d % (STACK_BOUNDARY / BITS_PER_UNIT));
5821 total_size = d + tregs_space;
5822 total_size += rounded_frame_size (total_size);
5823 save_size = total_size - frame_size;
5825 /* If adjusting the stack in a single step costs nothing extra, do so.
5826 I.e. either if a single addi is enough, or we need a movi anyway,
5827 and we don't exceed the maximum offset range (the test for the
5828 latter is conservative for simplicity). */
5829 if (TARGET_SHMEDIA
5830 && ! frame_pointer_needed
5831 && (CONST_OK_FOR_I10 (total_size)
5832 || (! CONST_OK_FOR_I10 (save_size + d_rounding)
5833 && total_size <= 2044)))
5834 d_rounding = frame_size;
5836 frame_size -= d_rounding;
5839 if (frame_pointer_needed)
5841 /* We must avoid scheduling the epilogue with previous basic blocks
5842 when exception handling is enabled. See PR/18032. */
5843 if (flag_exceptions)
5844 emit_insn (gen_blockage ());
5845 output_stack_adjust (frame_size, frame_pointer_rtx, e, &live_regs_mask);
5847 /* We must avoid moving the stack pointer adjustment past code
5848 which reads from the local frame, else an interrupt could
5849 occur after the SP adjustment and clobber data in the local
5850 frame. */
5851 emit_insn (gen_blockage ());
5852 emit_insn (GEN_MOV (stack_pointer_rtx, frame_pointer_rtx));
5854 else if (frame_size)
5856 /* We must avoid moving the stack pointer adjustment past code
5857 which reads from the local frame, else an interrupt could
5858 occur after the SP adjustment and clobber data in the local
5859 frame. */
5860 emit_insn (gen_blockage ());
5861 output_stack_adjust (frame_size, stack_pointer_rtx, e, &live_regs_mask);
5864 if (SHMEDIA_REGS_STACK_ADJUST ())
5866 emit_move_insn (gen_rtx_REG (Pmode, R0_REG),
5867 function_symbol (TARGET_FPU_ANY
5868 ? "__GCC_pop_shmedia_regs"
5869 : "__GCC_pop_shmedia_regs_nofpu"));
5870 /* This must NOT go through the PLT, otherwise mach and macl
5871 may be clobbered. */
5872 emit_insn (gen_shmedia_save_restore_regs_compact
5873 (GEN_INT (SHMEDIA_REGS_STACK_ADJUST ())));
5876 /* Pop all the registers. */
5878 if (target_flags != save_flags && ! current_function_interrupt)
5879 emit_insn (gen_toggle_sz ());
5880 if (TARGET_SH5)
5882 int offset_base, offset;
5883 int offset_in_r0 = -1;
5884 int sp_in_r0 = 0;
5885 rtx r0 = gen_rtx_REG (Pmode, R0_REG);
5886 save_schedule schedule;
5887 save_entry *entry;
5888 int *tmp_pnt;
5890 entry = sh5_schedule_saves (&live_regs_mask, &schedule, d_rounding);
5891 offset_base = -entry[1].offset + d_rounding;
5892 tmp_pnt = schedule.temps;
5893 for (; entry->mode != VOIDmode; entry--)
5895 enum machine_mode mode = entry->mode;
5896 int reg = entry->reg;
5897 rtx reg_rtx, mem_rtx, post_inc = NULL_RTX, insn;
5899 offset = offset_base + entry->offset;
5900 reg_rtx = gen_rtx_REG (mode, reg);
5902 mem_rtx = gen_rtx_MEM (mode,
5903 gen_rtx_PLUS (Pmode,
5904 stack_pointer_rtx,
5905 GEN_INT (offset)));
5907 GO_IF_LEGITIMATE_ADDRESS (mode, XEXP (mem_rtx, 0), try_post_inc);
5909 mem_rtx = NULL_RTX;
5911 try_post_inc:
5913 if (HAVE_POST_INCREMENT
5914 && (offset == offset_in_r0
5915 || (offset + GET_MODE_SIZE (mode) != d + d_rounding
5916 && mem_rtx == NULL_RTX)
5917 || reg == PR_REG || SPECIAL_REGISTER_P (reg)))
5919 post_inc = gen_rtx_MEM (mode,
5920 gen_rtx_POST_INC (Pmode, r0));
5922 GO_IF_LEGITIMATE_ADDRESS (mode, XEXP (post_inc, 0),
5923 post_inc_ok);
5925 post_inc = NULL_RTX;
5927 break;
5929 post_inc_ok:
5930 mem_rtx = NULL_RTX;
5932 while (0);
5934 if (mem_rtx != NULL_RTX)
5935 goto addr_ok;
5937 if (offset_in_r0 == -1)
5939 emit_move_insn (r0, GEN_INT (offset));
5940 offset_in_r0 = offset;
5942 else if (offset != offset_in_r0)
5944 emit_move_insn (r0,
5945 gen_rtx_PLUS
5946 (Pmode, r0,
5947 GEN_INT (offset - offset_in_r0)));
5948 offset_in_r0 += offset - offset_in_r0;
5951 if (post_inc != NULL_RTX)
5953 if (! sp_in_r0)
5955 emit_move_insn (r0,
5956 gen_rtx_PLUS
5957 (Pmode, r0, stack_pointer_rtx));
5958 sp_in_r0 = 1;
5961 mem_rtx = post_inc;
5963 offset_in_r0 += GET_MODE_SIZE (mode);
5965 else if (sp_in_r0)
5966 mem_rtx = gen_rtx_MEM (mode, r0);
5967 else
5968 mem_rtx = gen_rtx_MEM (mode,
5969 gen_rtx_PLUS (Pmode,
5970 stack_pointer_rtx,
5971 r0));
5973 if ((reg == PR_REG || SPECIAL_REGISTER_P (reg))
5974 && mem_rtx != post_inc)
5975 abort ();
5977 addr_ok:
5978 if ((reg == PR_REG || SPECIAL_REGISTER_P (reg))
5979 && mem_rtx != post_inc)
5981 insn = emit_move_insn (r0, mem_rtx);
5982 mem_rtx = r0;
5984 else if (TARGET_REGISTER_P (reg))
5986 rtx tmp_reg = gen_rtx_REG (mode, *tmp_pnt);
5988 /* Give the scheduler a bit of freedom by using up to
5989 MAX_TEMPS registers in a round-robin fashion. */
5990 insn = emit_move_insn (tmp_reg, mem_rtx);
5991 mem_rtx = tmp_reg;
5992 if (*++tmp_pnt < 0)
5993 tmp_pnt = schedule.temps;
5996 insn = emit_move_insn (reg_rtx, mem_rtx);
5997 if (reg == PR_MEDIA_REG && sh_media_register_for_return () >= 0)
5998 /* This is dead, unless we return with a sibcall. */
5999 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD,
6000 const0_rtx,
6001 REG_NOTES (insn));
6004 if (entry->offset + offset_base != d + d_rounding)
6005 abort ();
6007 else /* ! TARGET_SH5 */
6009 save_size = 0;
6010 if (TEST_HARD_REG_BIT (live_regs_mask, PR_REG))
6011 pop (PR_REG);
6012 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
6014 int j = (FIRST_PSEUDO_REGISTER - 1) - i;
6016 if (j == FPSCR_REG && current_function_interrupt && TARGET_FMOVD
6017 && hard_regs_intersect_p (&live_regs_mask,
6018 &reg_class_contents[DF_REGS]))
6019 fpscr_deferred = 1;
6020 else if (j != PR_REG && TEST_HARD_REG_BIT (live_regs_mask, j))
6021 pop (j);
6022 if (j == FIRST_FP_REG && fpscr_deferred)
6023 pop (FPSCR_REG);
6027 if (target_flags != save_flags && ! current_function_interrupt)
6028 emit_insn (gen_toggle_sz ());
6029 target_flags = save_flags;
6031 output_stack_adjust (current_function_pretend_args_size
6032 + save_size + d_rounding
6033 + current_function_args_info.stack_regs * 8,
6034 stack_pointer_rtx, e, NULL);
6036 if (current_function_calls_eh_return)
6037 emit_insn (GEN_ADD3 (stack_pointer_rtx, stack_pointer_rtx,
6038 EH_RETURN_STACKADJ_RTX));
6040 /* Switch back to the normal stack if necessary. */
6041 if (sp_switch)
6042 emit_insn (gen_sp_switch_2 ());
6044 /* Tell flow the insn that pops PR isn't dead. */
6045 /* PR_REG will never be live in SHmedia mode, and we don't need to
6046 USE PR_MEDIA_REG, since it will be explicitly copied to TR0_REG
6047 by the return pattern. */
6048 if (TEST_HARD_REG_BIT (live_regs_mask, PR_REG))
6049 emit_insn (gen_rtx_USE (VOIDmode, gen_rtx_REG (SImode, PR_REG)));
6052 static int sh_need_epilogue_known = 0;
6055 sh_need_epilogue (void)
6057 if (! sh_need_epilogue_known)
6059 rtx epilogue;
6061 start_sequence ();
6062 sh_expand_epilogue (0);
6063 epilogue = get_insns ();
6064 end_sequence ();
6065 sh_need_epilogue_known = (epilogue == NULL ? -1 : 1);
6067 return sh_need_epilogue_known > 0;
6070 /* Emit code to change the current function's return address to RA.
6071 TEMP is available as a scratch register, if needed. */
6073 void
6074 sh_set_return_address (rtx ra, rtx tmp)
6076 HARD_REG_SET live_regs_mask;
6077 int d;
6078 int pr_reg = TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG;
6079 int pr_offset;
6081 d = calc_live_regs (&live_regs_mask);
6083 /* If pr_reg isn't life, we can set it (or the register given in
6084 sh_media_register_for_return) directly. */
6085 if (! TEST_HARD_REG_BIT (live_regs_mask, pr_reg))
6087 rtx rr;
6089 if (TARGET_SHMEDIA)
6091 int rr_regno = sh_media_register_for_return ();
6093 if (rr_regno < 0)
6094 rr_regno = pr_reg;
6096 rr = gen_rtx_REG (DImode, rr_regno);
6098 else
6099 rr = gen_rtx_REG (SImode, pr_reg);
6101 emit_insn (GEN_MOV (rr, ra));
6102 /* Tell flow the register for return isn't dead. */
6103 emit_insn (gen_rtx_USE (VOIDmode, rr));
6104 return;
6107 if (TARGET_SH5)
6109 int offset;
6110 save_schedule schedule;
6111 save_entry *entry;
6113 entry = sh5_schedule_saves (&live_regs_mask, &schedule, 0);
6114 offset = entry[1].offset;
6115 for (; entry->mode != VOIDmode; entry--)
6116 if (entry->reg == pr_reg)
6117 goto found;
6119 /* We can't find pr register. */
6120 abort ();
6122 found:
6123 offset = entry->offset - offset;
6124 pr_offset = (rounded_frame_size (d) + offset
6125 + SHMEDIA_REGS_STACK_ADJUST ());
6127 else
6128 pr_offset = rounded_frame_size (d);
6130 emit_insn (GEN_MOV (tmp, GEN_INT (pr_offset)));
6131 emit_insn (GEN_ADD3 (tmp, tmp, frame_pointer_rtx));
6133 tmp = gen_rtx_MEM (Pmode, tmp);
6134 emit_insn (GEN_MOV (tmp, ra));
6137 /* Clear variables at function end. */
6139 static void
6140 sh_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
6141 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
6143 trap_exit = pragma_interrupt = pragma_trapa = pragma_nosave_low_regs = 0;
6144 sh_need_epilogue_known = 0;
6145 sp_switch = NULL_RTX;
6148 static rtx
6149 sh_builtin_saveregs (void)
6151 /* First unnamed integer register. */
6152 int first_intreg = current_function_args_info.arg_count[(int) SH_ARG_INT];
6153 /* Number of integer registers we need to save. */
6154 int n_intregs = MAX (0, NPARM_REGS (SImode) - first_intreg);
6155 /* First unnamed SFmode float reg */
6156 int first_floatreg = current_function_args_info.arg_count[(int) SH_ARG_FLOAT];
6157 /* Number of SFmode float regs to save. */
6158 int n_floatregs = MAX (0, NPARM_REGS (SFmode) - first_floatreg);
6159 rtx regbuf, fpregs;
6160 int bufsize, regno;
6161 HOST_WIDE_INT alias_set;
6163 if (TARGET_SH5)
6165 if (n_intregs)
6167 int pushregs = n_intregs;
6169 while (pushregs < NPARM_REGS (SImode) - 1
6170 && (CALL_COOKIE_INT_REG_GET
6171 (current_function_args_info.call_cookie,
6172 NPARM_REGS (SImode) - pushregs)
6173 == 1))
6175 current_function_args_info.call_cookie
6176 &= ~ CALL_COOKIE_INT_REG (NPARM_REGS (SImode)
6177 - pushregs, 1);
6178 pushregs++;
6181 if (pushregs == NPARM_REGS (SImode))
6182 current_function_args_info.call_cookie
6183 |= (CALL_COOKIE_INT_REG (0, 1)
6184 | CALL_COOKIE_STACKSEQ (pushregs - 1));
6185 else
6186 current_function_args_info.call_cookie
6187 |= CALL_COOKIE_STACKSEQ (pushregs);
6189 current_function_pretend_args_size += 8 * n_intregs;
6191 if (TARGET_SHCOMPACT)
6192 return const0_rtx;
6195 if (! TARGET_SH2E && ! TARGET_SH4 && ! TARGET_SH5)
6197 error ("__builtin_saveregs not supported by this subtarget");
6198 return const0_rtx;
6201 if (TARGET_SHMEDIA)
6202 n_floatregs = 0;
6204 /* Allocate block of memory for the regs. */
6205 /* ??? If n_intregs + n_floatregs == 0, should we allocate at least 1 byte?
6206 Or can assign_stack_local accept a 0 SIZE argument? */
6207 bufsize = (n_intregs * UNITS_PER_WORD) + (n_floatregs * UNITS_PER_WORD);
6209 if (TARGET_SHMEDIA)
6210 regbuf = gen_rtx_MEM (BLKmode,
6211 gen_rtx_REG (Pmode, ARG_POINTER_REGNUM));
6212 else if (n_floatregs & 1)
6214 rtx addr;
6216 regbuf = assign_stack_local (BLKmode, bufsize + UNITS_PER_WORD, 0);
6217 addr = copy_to_mode_reg (Pmode, XEXP (regbuf, 0));
6218 emit_insn (gen_iorsi3 (addr, addr, GEN_INT (UNITS_PER_WORD)));
6219 regbuf = change_address (regbuf, BLKmode, addr);
6221 else
6222 regbuf = assign_stack_local (BLKmode, bufsize, 0);
6223 alias_set = get_varargs_alias_set ();
6224 set_mem_alias_set (regbuf, alias_set);
6226 /* Save int args.
6227 This is optimized to only save the regs that are necessary. Explicitly
6228 named args need not be saved. */
6229 if (n_intregs > 0)
6230 move_block_from_reg (BASE_ARG_REG (SImode) + first_intreg,
6231 adjust_address (regbuf, BLKmode,
6232 n_floatregs * UNITS_PER_WORD),
6233 n_intregs);
6235 if (TARGET_SHMEDIA)
6236 /* Return the address of the regbuf. */
6237 return XEXP (regbuf, 0);
6239 /* Save float args.
6240 This is optimized to only save the regs that are necessary. Explicitly
6241 named args need not be saved.
6242 We explicitly build a pointer to the buffer because it halves the insn
6243 count when not optimizing (otherwise the pointer is built for each reg
6244 saved).
6245 We emit the moves in reverse order so that we can use predecrement. */
6247 fpregs = gen_reg_rtx (Pmode);
6248 emit_move_insn (fpregs, XEXP (regbuf, 0));
6249 emit_insn (gen_addsi3 (fpregs, fpregs,
6250 GEN_INT (n_floatregs * UNITS_PER_WORD)));
6251 if (TARGET_SH4 || TARGET_SH2A_DOUBLE)
6253 rtx mem;
6254 for (regno = NPARM_REGS (DFmode) - 2; regno >= first_floatreg; regno -= 2)
6256 emit_insn (gen_addsi3 (fpregs, fpregs,
6257 GEN_INT (-2 * UNITS_PER_WORD)));
6258 mem = gen_rtx_MEM (DFmode, fpregs);
6259 set_mem_alias_set (mem, alias_set);
6260 emit_move_insn (mem,
6261 gen_rtx_REG (DFmode, BASE_ARG_REG (DFmode) + regno));
6263 regno = first_floatreg;
6264 if (regno & 1)
6266 emit_insn (gen_addsi3 (fpregs, fpregs, GEN_INT (-UNITS_PER_WORD)));
6267 mem = gen_rtx_MEM (SFmode, fpregs);
6268 set_mem_alias_set (mem, alias_set);
6269 emit_move_insn (mem,
6270 gen_rtx_REG (SFmode, BASE_ARG_REG (SFmode) + regno
6271 - (TARGET_LITTLE_ENDIAN != 0)));
6274 else
6275 for (regno = NPARM_REGS (SFmode) - 1; regno >= first_floatreg; regno--)
6277 rtx mem;
6279 emit_insn (gen_addsi3 (fpregs, fpregs, GEN_INT (-UNITS_PER_WORD)));
6280 mem = gen_rtx_MEM (SFmode, fpregs);
6281 set_mem_alias_set (mem, alias_set);
6282 emit_move_insn (mem,
6283 gen_rtx_REG (SFmode, BASE_ARG_REG (SFmode) + regno));
6286 /* Return the address of the regbuf. */
6287 return XEXP (regbuf, 0);
6290 /* Define the `__builtin_va_list' type for the ABI. */
6292 static tree
6293 sh_build_builtin_va_list (void)
6295 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
6296 tree record;
6298 if (TARGET_SH5 || (! TARGET_SH2E && ! TARGET_SH4)
6299 || TARGET_HITACHI || sh_cfun_attr_renesas_p ())
6300 return ptr_type_node;
6302 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
6304 f_next_o = build_decl (FIELD_DECL, get_identifier ("__va_next_o"),
6305 ptr_type_node);
6306 f_next_o_limit = build_decl (FIELD_DECL,
6307 get_identifier ("__va_next_o_limit"),
6308 ptr_type_node);
6309 f_next_fp = build_decl (FIELD_DECL, get_identifier ("__va_next_fp"),
6310 ptr_type_node);
6311 f_next_fp_limit = build_decl (FIELD_DECL,
6312 get_identifier ("__va_next_fp_limit"),
6313 ptr_type_node);
6314 f_next_stack = build_decl (FIELD_DECL, get_identifier ("__va_next_stack"),
6315 ptr_type_node);
6317 DECL_FIELD_CONTEXT (f_next_o) = record;
6318 DECL_FIELD_CONTEXT (f_next_o_limit) = record;
6319 DECL_FIELD_CONTEXT (f_next_fp) = record;
6320 DECL_FIELD_CONTEXT (f_next_fp_limit) = record;
6321 DECL_FIELD_CONTEXT (f_next_stack) = record;
6323 TYPE_FIELDS (record) = f_next_o;
6324 TREE_CHAIN (f_next_o) = f_next_o_limit;
6325 TREE_CHAIN (f_next_o_limit) = f_next_fp;
6326 TREE_CHAIN (f_next_fp) = f_next_fp_limit;
6327 TREE_CHAIN (f_next_fp_limit) = f_next_stack;
6329 layout_type (record);
6331 return record;
6334 /* Implement `va_start' for varargs and stdarg. */
6336 void
6337 sh_va_start (tree valist, rtx nextarg)
6339 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
6340 tree next_o, next_o_limit, next_fp, next_fp_limit, next_stack;
6341 tree t, u;
6342 int nfp, nint;
6344 if (TARGET_SH5)
6346 expand_builtin_saveregs ();
6347 std_expand_builtin_va_start (valist, nextarg);
6348 return;
6351 if ((! TARGET_SH2E && ! TARGET_SH4)
6352 || TARGET_HITACHI || sh_cfun_attr_renesas_p ())
6354 std_expand_builtin_va_start (valist, nextarg);
6355 return;
6358 f_next_o = TYPE_FIELDS (va_list_type_node);
6359 f_next_o_limit = TREE_CHAIN (f_next_o);
6360 f_next_fp = TREE_CHAIN (f_next_o_limit);
6361 f_next_fp_limit = TREE_CHAIN (f_next_fp);
6362 f_next_stack = TREE_CHAIN (f_next_fp_limit);
6364 next_o = build (COMPONENT_REF, TREE_TYPE (f_next_o), valist, f_next_o,
6365 NULL_TREE);
6366 next_o_limit = build (COMPONENT_REF, TREE_TYPE (f_next_o_limit),
6367 valist, f_next_o_limit, NULL_TREE);
6368 next_fp = build (COMPONENT_REF, TREE_TYPE (f_next_fp), valist, f_next_fp,
6369 NULL_TREE);
6370 next_fp_limit = build (COMPONENT_REF, TREE_TYPE (f_next_fp_limit),
6371 valist, f_next_fp_limit, NULL_TREE);
6372 next_stack = build (COMPONENT_REF, TREE_TYPE (f_next_stack),
6373 valist, f_next_stack, NULL_TREE);
6375 /* Call __builtin_saveregs. */
6376 u = make_tree (ptr_type_node, expand_builtin_saveregs ());
6377 t = build (MODIFY_EXPR, ptr_type_node, next_fp, u);
6378 TREE_SIDE_EFFECTS (t) = 1;
6379 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6381 nfp = current_function_args_info.arg_count[SH_ARG_FLOAT];
6382 if (nfp < 8)
6383 nfp = 8 - nfp;
6384 else
6385 nfp = 0;
6386 u = fold (build (PLUS_EXPR, ptr_type_node, u,
6387 build_int_cst (NULL_TREE, UNITS_PER_WORD * nfp)));
6388 t = build (MODIFY_EXPR, ptr_type_node, next_fp_limit, u);
6389 TREE_SIDE_EFFECTS (t) = 1;
6390 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6392 t = build (MODIFY_EXPR, ptr_type_node, next_o, u);
6393 TREE_SIDE_EFFECTS (t) = 1;
6394 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6396 nint = current_function_args_info.arg_count[SH_ARG_INT];
6397 if (nint < 4)
6398 nint = 4 - nint;
6399 else
6400 nint = 0;
6401 u = fold (build (PLUS_EXPR, ptr_type_node, u,
6402 build_int_cst (NULL_TREE, UNITS_PER_WORD * nint)));
6403 t = build (MODIFY_EXPR, ptr_type_node, next_o_limit, u);
6404 TREE_SIDE_EFFECTS (t) = 1;
6405 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6407 u = make_tree (ptr_type_node, nextarg);
6408 t = build (MODIFY_EXPR, ptr_type_node, next_stack, u);
6409 TREE_SIDE_EFFECTS (t) = 1;
6410 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6413 /* Implement `va_arg'. */
6415 static tree
6416 sh_gimplify_va_arg_expr (tree valist, tree type, tree *pre_p,
6417 tree *post_p ATTRIBUTE_UNUSED)
6419 HOST_WIDE_INT size, rsize;
6420 tree tmp, pptr_type_node;
6421 tree addr, lab_over = NULL, result = NULL;
6422 int pass_by_ref = targetm.calls.must_pass_in_stack (TYPE_MODE (type), type);
6424 if (pass_by_ref)
6425 type = build_pointer_type (type);
6427 size = int_size_in_bytes (type);
6428 rsize = (size + UNITS_PER_WORD - 1) & -UNITS_PER_WORD;
6429 pptr_type_node = build_pointer_type (ptr_type_node);
6431 if (! TARGET_SH5 && (TARGET_SH2E || TARGET_SH4)
6432 && ! (TARGET_HITACHI || sh_cfun_attr_renesas_p ()))
6434 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
6435 tree next_o, next_o_limit, next_fp, next_fp_limit, next_stack;
6436 int pass_as_float;
6437 tree lab_false;
6439 f_next_o = TYPE_FIELDS (va_list_type_node);
6440 f_next_o_limit = TREE_CHAIN (f_next_o);
6441 f_next_fp = TREE_CHAIN (f_next_o_limit);
6442 f_next_fp_limit = TREE_CHAIN (f_next_fp);
6443 f_next_stack = TREE_CHAIN (f_next_fp_limit);
6445 next_o = build (COMPONENT_REF, TREE_TYPE (f_next_o), valist, f_next_o,
6446 NULL_TREE);
6447 next_o_limit = build (COMPONENT_REF, TREE_TYPE (f_next_o_limit),
6448 valist, f_next_o_limit, NULL_TREE);
6449 next_fp = build (COMPONENT_REF, TREE_TYPE (f_next_fp),
6450 valist, f_next_fp, NULL_TREE);
6451 next_fp_limit = build (COMPONENT_REF, TREE_TYPE (f_next_fp_limit),
6452 valist, f_next_fp_limit, NULL_TREE);
6453 next_stack = build (COMPONENT_REF, TREE_TYPE (f_next_stack),
6454 valist, f_next_stack, NULL_TREE);
6456 /* Structures with a single member with a distinct mode are passed
6457 like their member. This is relevant if the latter has a REAL_TYPE
6458 or COMPLEX_TYPE type. */
6459 if (TREE_CODE (type) == RECORD_TYPE
6460 && TYPE_FIELDS (type)
6461 && TREE_CODE (TYPE_FIELDS (type)) == FIELD_DECL
6462 && (TREE_CODE (TREE_TYPE (TYPE_FIELDS (type))) == REAL_TYPE
6463 || TREE_CODE (TREE_TYPE (TYPE_FIELDS (type))) == COMPLEX_TYPE)
6464 && TREE_CHAIN (TYPE_FIELDS (type)) == NULL_TREE)
6465 type = TREE_TYPE (TYPE_FIELDS (type));
6467 if (TARGET_SH4)
6469 pass_as_float = ((TREE_CODE (type) == REAL_TYPE && size <= 8)
6470 || (TREE_CODE (type) == COMPLEX_TYPE
6471 && TREE_CODE (TREE_TYPE (type)) == REAL_TYPE
6472 && size <= 16));
6474 else
6476 pass_as_float = (TREE_CODE (type) == REAL_TYPE && size == 4);
6479 addr = create_tmp_var (pptr_type_node, NULL);
6480 lab_false = create_artificial_label ();
6481 lab_over = create_artificial_label ();
6483 valist = build1 (INDIRECT_REF, ptr_type_node, addr);
6485 if (pass_as_float)
6487 int first_floatreg
6488 = current_function_args_info.arg_count[(int) SH_ARG_FLOAT];
6489 int n_floatregs = MAX (0, NPARM_REGS (SFmode) - first_floatreg);
6491 tmp = build (GE_EXPR, boolean_type_node, next_fp, next_fp_limit);
6492 tmp = build (COND_EXPR, void_type_node, tmp,
6493 build (GOTO_EXPR, void_type_node, lab_false),
6494 NULL);
6495 gimplify_and_add (tmp, pre_p);
6497 if (TYPE_ALIGN (type) > BITS_PER_WORD
6498 || (((TREE_CODE (type) == REAL_TYPE && size == 8) || size == 16)
6499 && (n_floatregs & 1)))
6501 tmp = fold_convert (ptr_type_node, size_int (UNITS_PER_WORD));
6502 tmp = build (BIT_AND_EXPR, ptr_type_node, next_fp, tmp);
6503 tmp = build (PLUS_EXPR, ptr_type_node, next_fp, tmp);
6504 tmp = build (MODIFY_EXPR, ptr_type_node, next_fp, tmp);
6505 gimplify_and_add (tmp, pre_p);
6508 tmp = build1 (ADDR_EXPR, pptr_type_node, next_fp);
6509 tmp = build (MODIFY_EXPR, void_type_node, addr, tmp);
6510 gimplify_and_add (tmp, pre_p);
6512 #ifdef FUNCTION_ARG_SCmode_WART
6513 if (TYPE_MODE (type) == SCmode && TARGET_SH4 && TARGET_LITTLE_ENDIAN)
6515 tree subtype = TREE_TYPE (type);
6516 tree real, imag;
6518 imag = std_gimplify_va_arg_expr (valist, subtype, pre_p, NULL);
6519 imag = get_initialized_tmp_var (imag, pre_p, NULL);
6521 real = std_gimplify_va_arg_expr (valist, subtype, pre_p, NULL);
6522 real = get_initialized_tmp_var (real, pre_p, NULL);
6524 result = build (COMPLEX_EXPR, type, real, imag);
6525 result = get_initialized_tmp_var (result, pre_p, NULL);
6527 #endif /* FUNCTION_ARG_SCmode_WART */
6529 tmp = build (GOTO_EXPR, void_type_node, lab_over);
6530 gimplify_and_add (tmp, pre_p);
6532 tmp = build (LABEL_EXPR, void_type_node, lab_false);
6533 gimplify_and_add (tmp, pre_p);
6535 tmp = build1 (ADDR_EXPR, pptr_type_node, next_stack);
6536 tmp = build (MODIFY_EXPR, void_type_node, addr, tmp);
6537 gimplify_and_add (tmp, pre_p);
6539 else
6541 tmp = fold_convert (ptr_type_node, size_int (rsize));
6542 tmp = build (PLUS_EXPR, ptr_type_node, next_o, tmp);
6543 tmp = build (GT_EXPR, boolean_type_node, tmp, next_o_limit);
6544 tmp = build (COND_EXPR, void_type_node, tmp,
6545 build (GOTO_EXPR, void_type_node, lab_false),
6546 NULL);
6547 gimplify_and_add (tmp, pre_p);
6549 tmp = build1 (ADDR_EXPR, pptr_type_node, next_o);
6550 tmp = build (MODIFY_EXPR, void_type_node, addr, tmp);
6551 gimplify_and_add (tmp, pre_p);
6553 tmp = build (GOTO_EXPR, void_type_node, lab_over);
6554 gimplify_and_add (tmp, pre_p);
6556 tmp = build (LABEL_EXPR, void_type_node, lab_false);
6557 gimplify_and_add (tmp, pre_p);
6559 if (size > 4 && ! TARGET_SH4)
6561 tmp = build (MODIFY_EXPR, ptr_type_node, next_o, next_o_limit);
6562 gimplify_and_add (tmp, pre_p);
6565 tmp = build1 (ADDR_EXPR, pptr_type_node, next_stack);
6566 tmp = build (MODIFY_EXPR, void_type_node, addr, tmp);
6567 gimplify_and_add (tmp, pre_p);
6570 if (!result)
6572 tmp = build (LABEL_EXPR, void_type_node, lab_over);
6573 gimplify_and_add (tmp, pre_p);
6577 /* ??? In va-sh.h, there had been code to make values larger than
6578 size 8 indirect. This does not match the FUNCTION_ARG macros. */
6580 tmp = std_gimplify_va_arg_expr (valist, type, pre_p, NULL);
6581 if (result)
6583 tmp = build (MODIFY_EXPR, void_type_node, result, tmp);
6584 gimplify_and_add (tmp, pre_p);
6586 tmp = build (LABEL_EXPR, void_type_node, lab_over);
6587 gimplify_and_add (tmp, pre_p);
6589 else
6590 result = tmp;
6592 if (pass_by_ref)
6593 result = build_fold_indirect_ref (result);
6595 return result;
6598 bool
6599 sh_promote_prototypes (tree type)
6601 if (TARGET_HITACHI)
6602 return 0;
6603 if (! type)
6604 return 1;
6605 return ! sh_attr_renesas_p (type);
6608 /* Whether an argument must be passed by reference. On SHcompact, we
6609 pretend arguments wider than 32-bits that would have been passed in
6610 registers are passed by reference, so that an SHmedia trampoline
6611 loads them into the full 64-bits registers. */
6613 static int
6614 shcompact_byref (CUMULATIVE_ARGS *cum, enum machine_mode mode,
6615 tree type, bool named)
6617 unsigned HOST_WIDE_INT size;
6619 if (type)
6620 size = int_size_in_bytes (type);
6621 else
6622 size = GET_MODE_SIZE (mode);
6624 if (cum->arg_count[SH_ARG_INT] < NPARM_REGS (SImode)
6625 && (!named
6626 || GET_SH_ARG_CLASS (mode) == SH_ARG_INT
6627 || (GET_SH_ARG_CLASS (mode) == SH_ARG_FLOAT
6628 && cum->arg_count[SH_ARG_FLOAT] >= NPARM_REGS (SFmode)))
6629 && size > 4
6630 && !SHCOMPACT_FORCE_ON_STACK (mode, type)
6631 && !SH5_WOULD_BE_PARTIAL_NREGS (*cum, mode, type, named))
6632 return size;
6633 else
6634 return 0;
6637 static bool
6638 sh_pass_by_reference (CUMULATIVE_ARGS *cum, enum machine_mode mode,
6639 tree type, bool named)
6641 if (targetm.calls.must_pass_in_stack (mode, type))
6642 return true;
6644 /* ??? std_gimplify_va_arg_expr passes NULL for cum. That function
6645 wants to know about pass-by-reference semantics for incoming
6646 arguments. */
6647 if (! cum)
6648 return false;
6650 if (TARGET_SHCOMPACT)
6652 cum->byref = shcompact_byref (cum, mode, type, named);
6653 return cum->byref != 0;
6656 return false;
6659 static bool
6660 sh_callee_copies (CUMULATIVE_ARGS *cum, enum machine_mode mode,
6661 tree type, bool named ATTRIBUTE_UNUSED)
6663 /* ??? How can it possibly be correct to return true only on the
6664 caller side of the equation? Is there someplace else in the
6665 sh backend that's magically producing the copies? */
6666 return (cum->outgoing
6667 && ((mode == BLKmode ? TYPE_ALIGN (type) : GET_MODE_ALIGNMENT (mode))
6668 % SH_MIN_ALIGN_FOR_CALLEE_COPY == 0));
6671 /* Define where to put the arguments to a function.
6672 Value is zero to push the argument on the stack,
6673 or a hard register in which to store the argument.
6675 MODE is the argument's machine mode.
6676 TYPE is the data type of the argument (as a tree).
6677 This is null for libcalls where that information may
6678 not be available.
6679 CUM is a variable of type CUMULATIVE_ARGS which gives info about
6680 the preceding args and about the function being called.
6681 NAMED is nonzero if this argument is a named parameter
6682 (otherwise it is an extra parameter matching an ellipsis).
6684 On SH the first args are normally in registers
6685 and the rest are pushed. Any arg that starts within the first
6686 NPARM_REGS words is at least partially passed in a register unless
6687 its data type forbids. */
6691 sh_function_arg (CUMULATIVE_ARGS *ca, enum machine_mode mode,
6692 tree type, int named)
6694 if (! TARGET_SH5 && mode == VOIDmode)
6695 return GEN_INT (ca->renesas_abi ? 1 : 0);
6697 if (! TARGET_SH5
6698 && PASS_IN_REG_P (*ca, mode, type)
6699 && (named || ! (TARGET_HITACHI || ca->renesas_abi)))
6701 int regno;
6703 if (mode == SCmode && TARGET_SH4 && TARGET_LITTLE_ENDIAN
6704 && (! FUNCTION_ARG_SCmode_WART || (ROUND_REG (*ca, mode) & 1)))
6706 rtx r1 = gen_rtx_EXPR_LIST (VOIDmode,
6707 gen_rtx_REG (SFmode,
6708 BASE_ARG_REG (mode)
6709 + (ROUND_REG (*ca, mode) ^ 1)),
6710 const0_rtx);
6711 rtx r2 = gen_rtx_EXPR_LIST (VOIDmode,
6712 gen_rtx_REG (SFmode,
6713 BASE_ARG_REG (mode)
6714 + ((ROUND_REG (*ca, mode) + 1) ^ 1)),
6715 GEN_INT (4));
6716 return gen_rtx_PARALLEL(SCmode, gen_rtvec(2, r1, r2));
6719 /* If the alignment of a DF value causes an SF register to be
6720 skipped, we will use that skipped register for the next SF
6721 value. */
6722 if ((TARGET_HITACHI || ca->renesas_abi)
6723 && ca->free_single_fp_reg
6724 && mode == SFmode)
6725 return gen_rtx_REG (mode, ca->free_single_fp_reg);
6727 regno = (BASE_ARG_REG (mode) + ROUND_REG (*ca, mode))
6728 ^ (mode == SFmode && TARGET_SH4
6729 && TARGET_LITTLE_ENDIAN != 0
6730 && ! TARGET_HITACHI && ! ca->renesas_abi);
6731 return gen_rtx_REG (mode, regno);
6735 if (TARGET_SH5)
6737 if (mode == VOIDmode && TARGET_SHCOMPACT)
6738 return GEN_INT (ca->call_cookie);
6740 /* The following test assumes unnamed arguments are promoted to
6741 DFmode. */
6742 if (mode == SFmode && ca->free_single_fp_reg)
6743 return SH5_PROTOTYPED_FLOAT_ARG (*ca, mode, ca->free_single_fp_reg);
6745 if ((GET_SH_ARG_CLASS (mode) == SH_ARG_FLOAT)
6746 && (named || ! ca->prototype_p)
6747 && ca->arg_count[(int) SH_ARG_FLOAT] < NPARM_REGS (SFmode))
6749 if (! ca->prototype_p && TARGET_SHMEDIA)
6750 return SH5_PROTOTYPELESS_FLOAT_ARG (*ca, mode);
6752 return SH5_PROTOTYPED_FLOAT_ARG (*ca, mode,
6753 FIRST_FP_PARM_REG
6754 + ca->arg_count[(int) SH_ARG_FLOAT]);
6757 if (ca->arg_count[(int) SH_ARG_INT] < NPARM_REGS (SImode)
6758 && (! TARGET_SHCOMPACT
6759 || (! SHCOMPACT_FORCE_ON_STACK (mode, type)
6760 && ! SH5_WOULD_BE_PARTIAL_NREGS (*ca, mode,
6761 type, named))))
6763 return gen_rtx_REG (mode, (FIRST_PARM_REG
6764 + ca->arg_count[(int) SH_ARG_INT]));
6767 return 0;
6770 return 0;
6773 /* Update the data in CUM to advance over an argument
6774 of mode MODE and data type TYPE.
6775 (TYPE is null for libcalls where that information may not be
6776 available.) */
6778 void
6779 sh_function_arg_advance (CUMULATIVE_ARGS *ca, enum machine_mode mode,
6780 tree type, int named)
6782 if (ca->force_mem)
6783 ca->force_mem = 0;
6784 else if (TARGET_SH5)
6786 tree type2 = (ca->byref && type
6787 ? TREE_TYPE (type)
6788 : type);
6789 enum machine_mode mode2 = (ca->byref && type
6790 ? TYPE_MODE (type2)
6791 : mode);
6792 int dwords = ((ca->byref
6793 ? ca->byref
6794 : mode2 == BLKmode
6795 ? int_size_in_bytes (type2)
6796 : GET_MODE_SIZE (mode2)) + 7) / 8;
6797 int numregs = MIN (dwords, NPARM_REGS (SImode)
6798 - ca->arg_count[(int) SH_ARG_INT]);
6800 if (numregs)
6802 ca->arg_count[(int) SH_ARG_INT] += numregs;
6803 if (TARGET_SHCOMPACT
6804 && SHCOMPACT_FORCE_ON_STACK (mode2, type2))
6806 ca->call_cookie
6807 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
6808 - numregs, 1);
6809 /* N.B. We want this also for outgoing. */
6810 ca->stack_regs += numregs;
6812 else if (ca->byref)
6814 if (! ca->outgoing)
6815 ca->stack_regs += numregs;
6816 ca->byref_regs += numregs;
6817 ca->byref = 0;
6819 ca->call_cookie
6820 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
6821 - numregs, 2);
6822 while (--numregs);
6823 ca->call_cookie
6824 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
6825 - 1, 1);
6827 else if (dwords > numregs)
6829 int pushregs = numregs;
6831 if (TARGET_SHCOMPACT)
6832 ca->stack_regs += numregs;
6833 while (pushregs < NPARM_REGS (SImode) - 1
6834 && (CALL_COOKIE_INT_REG_GET
6835 (ca->call_cookie,
6836 NPARM_REGS (SImode) - pushregs)
6837 == 1))
6839 ca->call_cookie
6840 &= ~ CALL_COOKIE_INT_REG (NPARM_REGS (SImode)
6841 - pushregs, 1);
6842 pushregs++;
6844 if (numregs == NPARM_REGS (SImode))
6845 ca->call_cookie
6846 |= CALL_COOKIE_INT_REG (0, 1)
6847 | CALL_COOKIE_STACKSEQ (numregs - 1);
6848 else
6849 ca->call_cookie
6850 |= CALL_COOKIE_STACKSEQ (numregs);
6853 if (GET_SH_ARG_CLASS (mode2) == SH_ARG_FLOAT
6854 && (named || ! ca->prototype_p))
6856 if (mode2 == SFmode && ca->free_single_fp_reg)
6857 ca->free_single_fp_reg = 0;
6858 else if (ca->arg_count[(int) SH_ARG_FLOAT]
6859 < NPARM_REGS (SFmode))
6861 int numfpregs
6862 = MIN ((GET_MODE_SIZE (mode2) + 7) / 8 * 2,
6863 NPARM_REGS (SFmode)
6864 - ca->arg_count[(int) SH_ARG_FLOAT]);
6866 ca->arg_count[(int) SH_ARG_FLOAT] += numfpregs;
6868 if (TARGET_SHCOMPACT && ! ca->prototype_p)
6870 if (ca->outgoing && numregs > 0)
6873 ca->call_cookie
6874 |= (CALL_COOKIE_INT_REG
6875 (ca->arg_count[(int) SH_ARG_INT]
6876 - numregs + ((numfpregs - 2) / 2),
6877 4 + (ca->arg_count[(int) SH_ARG_FLOAT]
6878 - numfpregs) / 2));
6880 while (numfpregs -= 2);
6882 else if (mode2 == SFmode && (named)
6883 && (ca->arg_count[(int) SH_ARG_FLOAT]
6884 < NPARM_REGS (SFmode)))
6885 ca->free_single_fp_reg
6886 = FIRST_FP_PARM_REG - numfpregs
6887 + ca->arg_count[(int) SH_ARG_FLOAT] + 1;
6890 return;
6893 if ((TARGET_HITACHI || ca->renesas_abi) && TARGET_FPU_DOUBLE)
6895 /* Note that we've used the skipped register. */
6896 if (mode == SFmode && ca->free_single_fp_reg)
6898 ca->free_single_fp_reg = 0;
6899 return;
6901 /* When we have a DF after an SF, there's an SF register that get
6902 skipped in order to align the DF value. We note this skipped
6903 register, because the next SF value will use it, and not the
6904 SF that follows the DF. */
6905 if (mode == DFmode
6906 && ROUND_REG (*ca, DFmode) != ROUND_REG (*ca, SFmode))
6908 ca->free_single_fp_reg = (ROUND_REG (*ca, SFmode)
6909 + BASE_ARG_REG (mode));
6913 if (! ((TARGET_SH4 || TARGET_SH2A) || ca->renesas_abi)
6914 || PASS_IN_REG_P (*ca, mode, type))
6915 (ca->arg_count[(int) GET_SH_ARG_CLASS (mode)]
6916 = (ROUND_REG (*ca, mode)
6917 + (mode == BLKmode
6918 ? ROUND_ADVANCE (int_size_in_bytes (type))
6919 : ROUND_ADVANCE (GET_MODE_SIZE (mode)))));
6922 /* The Renesas calling convention doesn't quite fit into this scheme since
6923 the address is passed like an invisible argument, but one that is always
6924 passed in memory. */
6925 static rtx
6926 sh_struct_value_rtx (tree fndecl, int incoming ATTRIBUTE_UNUSED)
6928 if (TARGET_HITACHI || sh_attr_renesas_p (fndecl))
6929 return 0;
6930 return gen_rtx_REG (Pmode, 2);
6933 /* Worker function for TARGET_RETURN_IN_MEMORY. */
6935 static bool
6936 sh_return_in_memory (tree type, tree fndecl)
6938 if (TARGET_SH5)
6940 if (TYPE_MODE (type) == BLKmode)
6941 return ((unsigned HOST_WIDE_INT) int_size_in_bytes (type)) > 8;
6942 else
6943 return GET_MODE_SIZE (TYPE_MODE (type)) > 8;
6945 else
6947 return (TYPE_MODE (type) == BLKmode
6948 || ((TARGET_HITACHI || sh_attr_renesas_p (fndecl))
6949 && TREE_CODE (type) == RECORD_TYPE));
6953 /* We actually emit the code in sh_expand_prologue. We used to use
6954 a static variable to flag that we need to emit this code, but that
6955 doesn't when inlining, when functions are deferred and then emitted
6956 later. Fortunately, we already have two flags that are part of struct
6957 function that tell if a function uses varargs or stdarg. */
6958 static void
6959 sh_setup_incoming_varargs (CUMULATIVE_ARGS *ca,
6960 enum machine_mode mode,
6961 tree type,
6962 int *pretend_arg_size,
6963 int second_time ATTRIBUTE_UNUSED)
6965 if (! current_function_stdarg)
6966 abort ();
6967 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl))
6969 int named_parm_regs, anon_parm_regs;
6971 named_parm_regs = (ROUND_REG (*ca, mode)
6972 + (mode == BLKmode
6973 ? ROUND_ADVANCE (int_size_in_bytes (type))
6974 : ROUND_ADVANCE (GET_MODE_SIZE (mode))));
6975 anon_parm_regs = NPARM_REGS (SImode) - named_parm_regs;
6976 if (anon_parm_regs > 0)
6977 *pretend_arg_size = anon_parm_regs * 4;
6981 static bool
6982 sh_strict_argument_naming (CUMULATIVE_ARGS *ca ATTRIBUTE_UNUSED)
6984 return TARGET_SH5;
6987 static bool
6988 sh_pretend_outgoing_varargs_named (CUMULATIVE_ARGS *ca)
6990 return ! (TARGET_HITACHI || ca->renesas_abi) && ! TARGET_SH5;
6994 /* Define the offset between two registers, one to be eliminated, and
6995 the other its replacement, at the start of a routine. */
6998 initial_elimination_offset (int from, int to)
7000 int regs_saved;
7001 int regs_saved_rounding = 0;
7002 int total_saved_regs_space;
7003 int total_auto_space;
7004 int save_flags = target_flags;
7005 int copy_flags;
7006 HARD_REG_SET live_regs_mask;
7008 shmedia_space_reserved_for_target_registers = false;
7009 regs_saved = calc_live_regs (&live_regs_mask);
7010 regs_saved += SHMEDIA_REGS_STACK_ADJUST ();
7012 if (shmedia_reserve_space_for_target_registers_p (regs_saved, &live_regs_mask))
7014 shmedia_space_reserved_for_target_registers = true;
7015 regs_saved += shmedia_target_regs_stack_adjust (&live_regs_mask);
7018 if (TARGET_SH5 && regs_saved % (STACK_BOUNDARY / BITS_PER_UNIT))
7019 regs_saved_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
7020 - regs_saved % (STACK_BOUNDARY / BITS_PER_UNIT));
7022 total_auto_space = rounded_frame_size (regs_saved) - regs_saved_rounding;
7023 copy_flags = target_flags;
7024 target_flags = save_flags;
7026 total_saved_regs_space = regs_saved + regs_saved_rounding;
7028 if (from == ARG_POINTER_REGNUM && to == FRAME_POINTER_REGNUM)
7029 return total_saved_regs_space + total_auto_space
7030 + current_function_args_info.byref_regs * 8;
7032 if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
7033 return total_saved_regs_space + total_auto_space
7034 + current_function_args_info.byref_regs * 8;
7036 /* Initial gap between fp and sp is 0. */
7037 if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
7038 return 0;
7040 if (from == RETURN_ADDRESS_POINTER_REGNUM
7041 && (to == FRAME_POINTER_REGNUM || to == STACK_POINTER_REGNUM))
7043 if (TARGET_SH5)
7045 int n = total_saved_regs_space;
7046 int pr_reg = TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG;
7047 save_schedule schedule;
7048 save_entry *entry;
7050 n += total_auto_space;
7052 /* If it wasn't saved, there's not much we can do. */
7053 if (! TEST_HARD_REG_BIT (live_regs_mask, pr_reg))
7054 return n;
7056 target_flags = copy_flags;
7058 sh5_schedule_saves (&live_regs_mask, &schedule, n);
7059 for (entry = &schedule.entries[1]; entry->mode != VOIDmode; entry++)
7060 if (entry->reg == pr_reg)
7062 target_flags = save_flags;
7063 return entry->offset;
7065 abort ();
7067 else
7068 return total_auto_space;
7071 abort ();
7074 /* Handle machine specific pragmas to be semi-compatible with Renesas
7075 compiler. */
7077 void
7078 sh_pr_interrupt (struct cpp_reader *pfile ATTRIBUTE_UNUSED)
7080 pragma_interrupt = 1;
7083 void
7084 sh_pr_trapa (struct cpp_reader *pfile ATTRIBUTE_UNUSED)
7086 pragma_interrupt = pragma_trapa = 1;
7089 void
7090 sh_pr_nosave_low_regs (struct cpp_reader *pfile ATTRIBUTE_UNUSED)
7092 pragma_nosave_low_regs = 1;
7095 /* Generate 'handle_interrupt' attribute for decls */
7097 static void
7098 sh_insert_attributes (tree node, tree *attributes)
7100 if (! pragma_interrupt
7101 || TREE_CODE (node) != FUNCTION_DECL)
7102 return;
7104 /* We are only interested in fields. */
7105 if (!DECL_P (node))
7106 return;
7108 /* Add a 'handle_interrupt' attribute. */
7109 * attributes = tree_cons (get_identifier ("interrupt_handler"), NULL, * attributes);
7111 return;
7114 /* Supported attributes:
7116 interrupt_handler -- specifies this function is an interrupt handler.
7118 sp_switch -- specifies an alternate stack for an interrupt handler
7119 to run on.
7121 trap_exit -- use a trapa to exit an interrupt function instead of
7122 an rte instruction.
7124 renesas -- use Renesas calling/layout conventions (functions and
7125 structures).
7129 const struct attribute_spec sh_attribute_table[] =
7131 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
7132 { "interrupt_handler", 0, 0, true, false, false, sh_handle_interrupt_handler_attribute },
7133 { "sp_switch", 1, 1, true, false, false, sh_handle_sp_switch_attribute },
7134 { "trap_exit", 1, 1, true, false, false, sh_handle_trap_exit_attribute },
7135 { "renesas", 0, 0, false, true, false, sh_handle_renesas_attribute },
7136 #ifdef SYMBIAN
7137 /* Symbian support adds three new attributes:
7138 dllexport - for exporting a function/variable that will live in a dll
7139 dllimport - for importing a function/variable from a dll
7141 Microsoft allows multiple declspecs in one __declspec, separating
7142 them with spaces. We do NOT support this. Instead, use __declspec
7143 multiple times. */
7144 { "dllimport", 0, 0, true, false, false, sh_symbian_handle_dll_attribute },
7145 { "dllexport", 0, 0, true, false, false, sh_symbian_handle_dll_attribute },
7146 #endif
7147 { NULL, 0, 0, false, false, false, NULL }
7150 /* Handle an "interrupt_handler" attribute; arguments as in
7151 struct attribute_spec.handler. */
7152 static tree
7153 sh_handle_interrupt_handler_attribute (tree *node, tree name,
7154 tree args ATTRIBUTE_UNUSED,
7155 int flags ATTRIBUTE_UNUSED,
7156 bool *no_add_attrs)
7158 if (TREE_CODE (*node) != FUNCTION_DECL)
7160 warning ("%qs attribute only applies to functions",
7161 IDENTIFIER_POINTER (name));
7162 *no_add_attrs = true;
7164 else if (TARGET_SHCOMPACT)
7166 error ("attribute interrupt_handler is not compatible with -m5-compact");
7167 *no_add_attrs = true;
7170 return NULL_TREE;
7173 /* Handle an "sp_switch" attribute; arguments as in
7174 struct attribute_spec.handler. */
7175 static tree
7176 sh_handle_sp_switch_attribute (tree *node, tree name, tree args,
7177 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
7179 if (TREE_CODE (*node) != FUNCTION_DECL)
7181 warning ("%qs attribute only applies to functions",
7182 IDENTIFIER_POINTER (name));
7183 *no_add_attrs = true;
7185 else if (!pragma_interrupt)
7187 /* The sp_switch attribute only has meaning for interrupt functions. */
7188 warning ("%qs attribute only applies to interrupt functions",
7189 IDENTIFIER_POINTER (name));
7190 *no_add_attrs = true;
7192 else if (TREE_CODE (TREE_VALUE (args)) != STRING_CST)
7194 /* The argument must be a constant string. */
7195 warning ("%qs attribute argument not a string constant",
7196 IDENTIFIER_POINTER (name));
7197 *no_add_attrs = true;
7199 else
7201 const char *s = ggc_strdup (TREE_STRING_POINTER (TREE_VALUE (args)));
7202 sp_switch = gen_rtx_SYMBOL_REF (VOIDmode, s);
7205 return NULL_TREE;
7208 /* Handle an "trap_exit" attribute; arguments as in
7209 struct attribute_spec.handler. */
7210 static tree
7211 sh_handle_trap_exit_attribute (tree *node, tree name, tree args,
7212 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
7214 if (TREE_CODE (*node) != FUNCTION_DECL)
7216 warning ("%qs attribute only applies to functions",
7217 IDENTIFIER_POINTER (name));
7218 *no_add_attrs = true;
7220 else if (!pragma_interrupt)
7222 /* The trap_exit attribute only has meaning for interrupt functions. */
7223 warning ("%qs attribute only applies to interrupt functions",
7224 IDENTIFIER_POINTER (name));
7225 *no_add_attrs = true;
7227 else if (TREE_CODE (TREE_VALUE (args)) != INTEGER_CST)
7229 /* The argument must be a constant integer. */
7230 warning ("%qs attribute argument not an integer constant",
7231 IDENTIFIER_POINTER (name));
7232 *no_add_attrs = true;
7234 else
7236 trap_exit = TREE_INT_CST_LOW (TREE_VALUE (args));
7239 return NULL_TREE;
7242 static tree
7243 sh_handle_renesas_attribute (tree *node ATTRIBUTE_UNUSED,
7244 tree name ATTRIBUTE_UNUSED,
7245 tree args ATTRIBUTE_UNUSED,
7246 int flags ATTRIBUTE_UNUSED,
7247 bool *no_add_attrs ATTRIBUTE_UNUSED)
7249 return NULL_TREE;
7252 /* True if __attribute__((renesas)) or -mrenesas. */
7254 sh_attr_renesas_p (tree td)
7256 if (TARGET_HITACHI)
7257 return 1;
7258 if (td == 0)
7259 return 0;
7260 if (DECL_P (td))
7261 td = TREE_TYPE (td);
7262 return (lookup_attribute ("renesas", TYPE_ATTRIBUTES (td))
7263 != NULL_TREE);
7266 /* True if __attribute__((renesas)) or -mrenesas, for the current
7267 function. */
7269 sh_cfun_attr_renesas_p (void)
7271 return sh_attr_renesas_p (current_function_decl);
7275 sh_cfun_interrupt_handler_p (void)
7277 return (lookup_attribute ("interrupt_handler",
7278 DECL_ATTRIBUTES (current_function_decl))
7279 != NULL_TREE);
7282 /* ??? target_switches in toplev.c is static, hence we have to duplicate it. */
7283 static const struct
7285 const char *const name;
7286 const int value;
7287 const char *const description;
7289 sh_target_switches[] = TARGET_SWITCHES;
7290 #define target_switches sh_target_switches
7292 /* Like default_pch_valid_p, but take flag_mask into account. */
7293 const char *
7294 sh_pch_valid_p (const void *data_p, size_t len)
7296 const char *data = (const char *)data_p;
7297 const char *flag_that_differs = NULL;
7298 size_t i;
7299 int old_flags;
7300 int flag_mask
7301 = (SH1_BIT | SH2_BIT | SH3_BIT | SH_E_BIT | HARD_SH4_BIT | FPU_SINGLE_BIT
7302 | SH4_BIT | HITACHI_BIT | LITTLE_ENDIAN_BIT);
7304 /* -fpic and -fpie also usually make a PCH invalid. */
7305 if (data[0] != flag_pic)
7306 return _("created and used with different settings of -fpic");
7307 if (data[1] != flag_pie)
7308 return _("created and used with different settings of -fpie");
7309 data += 2;
7311 /* Check target_flags. */
7312 memcpy (&old_flags, data, sizeof (target_flags));
7313 if (((old_flags ^ target_flags) & flag_mask) != 0)
7315 for (i = 0; i < ARRAY_SIZE (target_switches); i++)
7317 int bits;
7319 bits = target_switches[i].value;
7320 if (bits < 0)
7321 bits = -bits;
7322 bits &= flag_mask;
7323 if ((target_flags & bits) != (old_flags & bits))
7325 flag_that_differs = target_switches[i].name;
7326 goto make_message;
7329 abort ();
7331 data += sizeof (target_flags);
7332 len -= sizeof (target_flags);
7334 /* Check string options. */
7335 #ifdef TARGET_OPTIONS
7336 for (i = 0; i < ARRAY_SIZE (target_options); i++)
7338 const char *str = *target_options[i].variable;
7339 size_t l;
7340 if (! str)
7341 str = "";
7342 l = strlen (str) + 1;
7343 if (len < l || memcmp (data, str, l) != 0)
7345 flag_that_differs = target_options[i].prefix;
7346 goto make_message;
7348 data += l;
7349 len -= l;
7351 #endif
7353 return NULL;
7355 make_message:
7357 char *r;
7358 asprintf (&r, _("created and used with differing settings of '-m%s'"),
7359 flag_that_differs);
7360 if (r == NULL)
7361 return _("out of memory");
7362 return r;
7366 /* Predicates used by the templates. */
7368 /* Returns 1 if OP is MACL, MACH or PR. The input must be a REG rtx.
7369 Used only in general_movsrc_operand. */
7372 system_reg_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
7374 switch (REGNO (op))
7376 case PR_REG:
7377 case MACL_REG:
7378 case MACH_REG:
7379 return 1;
7381 return 0;
7384 /* Returns 1 if OP can be source of a simple move operation.
7385 Same as general_operand, but a LABEL_REF is valid, PRE_DEC is
7386 invalid as are subregs of system registers. */
7389 general_movsrc_operand (rtx op, enum machine_mode mode)
7391 if (GET_CODE (op) == MEM)
7393 rtx inside = XEXP (op, 0);
7394 if (GET_CODE (inside) == CONST)
7395 inside = XEXP (inside, 0);
7397 if (GET_CODE (inside) == LABEL_REF)
7398 return 1;
7400 if (GET_CODE (inside) == PLUS
7401 && GET_CODE (XEXP (inside, 0)) == LABEL_REF
7402 && GET_CODE (XEXP (inside, 1)) == CONST_INT)
7403 return 1;
7405 /* Only post inc allowed. */
7406 if (GET_CODE (inside) == PRE_DEC)
7407 return 0;
7410 if ((mode == QImode || mode == HImode)
7411 && (GET_CODE (op) == SUBREG
7412 && GET_CODE (XEXP (op, 0)) == REG
7413 && system_reg_operand (XEXP (op, 0), mode)))
7414 return 0;
7416 return general_operand (op, mode);
7419 /* Returns 1 if OP can be a destination of a move.
7420 Same as general_operand, but no preinc allowed. */
7423 general_movdst_operand (rtx op, enum machine_mode mode)
7425 /* Only pre dec allowed. */
7426 if (GET_CODE (op) == MEM && GET_CODE (XEXP (op, 0)) == POST_INC)
7427 return 0;
7429 return general_operand (op, mode);
7432 /* Returns 1 if OP is a normal arithmetic register. */
7435 arith_reg_operand (rtx op, enum machine_mode mode)
7437 if (register_operand (op, mode))
7439 int regno;
7441 if (GET_CODE (op) == REG)
7442 regno = REGNO (op);
7443 else if (GET_CODE (op) == SUBREG && GET_CODE (SUBREG_REG (op)) == REG)
7444 regno = REGNO (SUBREG_REG (op));
7445 else
7446 return 1;
7448 return (regno != T_REG && regno != PR_REG
7449 && ! TARGET_REGISTER_P (regno)
7450 && (regno != FPUL_REG || TARGET_SH4)
7451 && regno != MACH_REG && regno != MACL_REG);
7453 return 0;
7456 /* Like above, but for DImode destinations: forbid paradoxical DImode subregs,
7457 because this would lead to missing sign extensions when truncating from
7458 DImode to SImode. */
7460 arith_reg_dest (rtx op, enum machine_mode mode)
7462 if (mode == DImode && GET_CODE (op) == SUBREG
7463 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (op))) < 8)
7464 return 0;
7465 return arith_reg_operand (op, mode);
7469 int_gpr_dest (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
7471 enum machine_mode op_mode = GET_MODE (op);
7473 if (GET_MODE_CLASS (op_mode) != MODE_INT
7474 || GET_MODE_SIZE (op_mode) >= UNITS_PER_WORD)
7475 return 0;
7476 if (! reload_completed)
7477 return 0;
7478 return true_regnum (op) <= LAST_GENERAL_REG;
7482 fp_arith_reg_operand (rtx op, enum machine_mode mode)
7484 if (register_operand (op, mode))
7486 int regno;
7488 if (GET_CODE (op) == REG)
7489 regno = REGNO (op);
7490 else if (GET_CODE (op) == SUBREG && GET_CODE (SUBREG_REG (op)) == REG)
7491 regno = REGNO (SUBREG_REG (op));
7492 else
7493 return 1;
7495 return (regno >= FIRST_PSEUDO_REGISTER
7496 || FP_REGISTER_P (regno));
7498 return 0;
7501 /* Returns 1 if OP is a valid source operand for an arithmetic insn. */
7504 arith_operand (rtx op, enum machine_mode mode)
7506 if (arith_reg_operand (op, mode))
7507 return 1;
7509 if (TARGET_SHMEDIA)
7511 /* FIXME: We should be checking whether the CONST_INT fits in a
7512 CONST_OK_FOR_I16 here, but this causes reload_cse to crash when
7513 attempting to transform a sequence of two 64-bit sets of the
7514 same register from literal constants into a set and an add,
7515 when the difference is too wide for an add. */
7516 if (GET_CODE (op) == CONST_INT
7517 || EXTRA_CONSTRAINT_C16 (op))
7518 return 1;
7519 else
7520 return 0;
7522 else if (GET_CODE (op) == CONST_INT && CONST_OK_FOR_I08 (INTVAL (op)))
7523 return 1;
7525 return 0;
7528 /* Returns 1 if OP is a valid source operand for a compare insn. */
7531 arith_reg_or_0_operand (rtx op, enum machine_mode mode)
7533 if (arith_reg_operand (op, mode))
7534 return 1;
7536 if (EXTRA_CONSTRAINT_Z (op))
7537 return 1;
7539 return 0;
7542 /* Return 1 if OP is a valid source operand for an SHmedia operation
7543 that takes either a register or a 6-bit immediate. */
7546 shmedia_6bit_operand (rtx op, enum machine_mode mode)
7548 return (arith_reg_operand (op, mode)
7549 || (GET_CODE (op) == CONST_INT && CONST_OK_FOR_I06 (INTVAL (op))));
7552 /* Returns 1 if OP is a valid source operand for a logical operation. */
7555 logical_operand (rtx op, enum machine_mode mode)
7557 if (arith_reg_operand (op, mode))
7558 return 1;
7560 if (TARGET_SHMEDIA)
7562 if (GET_CODE (op) == CONST_INT && CONST_OK_FOR_I10 (INTVAL (op)))
7563 return 1;
7564 else
7565 return 0;
7567 else if (GET_CODE (op) == CONST_INT && CONST_OK_FOR_K08 (INTVAL (op)))
7568 return 1;
7570 return 0;
7574 and_operand (rtx op, enum machine_mode mode)
7576 if (logical_operand (op, mode))
7577 return 1;
7579 /* Check mshflo.l / mshflhi.l opportunities. */
7580 if (TARGET_SHMEDIA
7581 && mode == DImode
7582 && GET_CODE (op) == CONST_INT
7583 && CONST_OK_FOR_J16 (INTVAL (op)))
7584 return 1;
7586 return 0;
7589 /* Nonzero if OP is a floating point value with value 0.0. */
7592 fp_zero_operand (rtx op)
7594 REAL_VALUE_TYPE r;
7596 if (GET_MODE (op) != SFmode)
7597 return 0;
7599 REAL_VALUE_FROM_CONST_DOUBLE (r, op);
7600 return REAL_VALUES_EQUAL (r, dconst0) && ! REAL_VALUE_MINUS_ZERO (r);
7603 /* Nonzero if OP is a floating point value with value 1.0. */
7606 fp_one_operand (rtx op)
7608 REAL_VALUE_TYPE r;
7610 if (GET_MODE (op) != SFmode)
7611 return 0;
7613 REAL_VALUE_FROM_CONST_DOUBLE (r, op);
7614 return REAL_VALUES_EQUAL (r, dconst1);
7617 /* For -m4 and -m4-single-only, mode switching is used. If we are
7618 compiling without -mfmovd, movsf_ie isn't taken into account for
7619 mode switching. We could check in machine_dependent_reorg for
7620 cases where we know we are in single precision mode, but there is
7621 interface to find that out during reload, so we must avoid
7622 choosing an fldi alternative during reload and thus failing to
7623 allocate a scratch register for the constant loading. */
7625 fldi_ok (void)
7627 return ! TARGET_SH4 || TARGET_FMOVD || reload_completed;
7631 tertiary_reload_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
7633 enum rtx_code code = GET_CODE (op);
7634 return code == MEM || (TARGET_SH4 && code == CONST_DOUBLE);
7638 fpscr_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
7640 return (GET_CODE (op) == REG
7641 && (REGNO (op) == FPSCR_REG
7642 || (REGNO (op) >= FIRST_PSEUDO_REGISTER
7643 && !(reload_in_progress || reload_completed)))
7644 && GET_MODE (op) == PSImode);
7648 fpul_operand (rtx op, enum machine_mode mode)
7650 if (TARGET_SHMEDIA)
7651 return fp_arith_reg_operand (op, mode);
7653 return (GET_CODE (op) == REG
7654 && (REGNO (op) == FPUL_REG || REGNO (op) >= FIRST_PSEUDO_REGISTER)
7655 && GET_MODE (op) == mode);
7659 symbol_ref_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
7661 return (GET_CODE (op) == SYMBOL_REF);
7664 /* Return the TLS type for TLS symbols, 0 for otherwise. */
7666 tls_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
7668 if (GET_CODE (op) != SYMBOL_REF)
7669 return 0;
7670 return SYMBOL_REF_TLS_MODEL (op);
7674 commutative_float_operator (rtx op, enum machine_mode mode)
7676 if (GET_MODE (op) != mode)
7677 return 0;
7678 switch (GET_CODE (op))
7680 case PLUS:
7681 case MULT:
7682 return 1;
7683 default:
7684 break;
7686 return 0;
7690 noncommutative_float_operator (rtx op, enum machine_mode mode)
7692 if (GET_MODE (op) != mode)
7693 return 0;
7694 switch (GET_CODE (op))
7696 case MINUS:
7697 case DIV:
7698 return 1;
7699 default:
7700 break;
7702 return 0;
7706 unary_float_operator (rtx op, enum machine_mode mode)
7708 if (GET_MODE (op) != mode)
7709 return 0;
7710 switch (GET_CODE (op))
7712 case ABS:
7713 case NEG:
7714 case SQRT:
7715 return 1;
7716 default:
7717 break;
7719 return 0;
7723 binary_float_operator (rtx op, enum machine_mode mode)
7725 if (GET_MODE (op) != mode)
7726 return 0;
7727 switch (GET_CODE (op))
7729 case PLUS:
7730 case MINUS:
7731 case MULT:
7732 case DIV:
7733 return 1;
7734 default:
7735 break;
7737 return 0;
7741 binary_logical_operator (rtx op, enum machine_mode mode)
7743 if (GET_MODE (op) != mode)
7744 return 0;
7745 switch (GET_CODE (op))
7747 case IOR:
7748 case AND:
7749 case XOR:
7750 return 1;
7751 default:
7752 break;
7754 return 0;
7758 equality_comparison_operator (rtx op, enum machine_mode mode)
7760 return ((mode == VOIDmode || GET_MODE (op) == mode)
7761 && (GET_CODE (op) == EQ || GET_CODE (op) == NE));
7765 greater_comparison_operator (rtx op, enum machine_mode mode)
7767 if (mode != VOIDmode && GET_MODE (op) == mode)
7768 return 0;
7769 switch (GET_CODE (op))
7771 case GT:
7772 case GE:
7773 case GTU:
7774 case GEU:
7775 return 1;
7776 default:
7777 return 0;
7782 less_comparison_operator (rtx op, enum machine_mode mode)
7784 if (mode != VOIDmode && GET_MODE (op) == mode)
7785 return 0;
7786 switch (GET_CODE (op))
7788 case LT:
7789 case LE:
7790 case LTU:
7791 case LEU:
7792 return 1;
7793 default:
7794 return 0;
7798 /* Accept pseudos and branch target registers. */
7800 target_reg_operand (rtx op, enum machine_mode mode)
7802 if (mode != DImode
7803 || GET_MODE (op) != DImode)
7804 return 0;
7806 if (GET_CODE (op) == SUBREG)
7807 op = XEXP (op, 0);
7809 if (GET_CODE (op) != REG)
7810 return 0;
7812 /* We must protect ourselves from matching pseudos that are virtual
7813 register, because they will eventually be replaced with hardware
7814 registers that aren't branch-target registers. */
7815 if (REGNO (op) > LAST_VIRTUAL_REGISTER
7816 || TARGET_REGISTER_P (REGNO (op)))
7817 return 1;
7819 return 0;
7822 /* Same as target_reg_operand, except that label_refs and symbol_refs
7823 are accepted before reload. */
7825 target_operand (rtx op, enum machine_mode mode)
7827 if (mode != DImode)
7828 return 0;
7830 if ((GET_MODE (op) == DImode || GET_MODE (op) == VOIDmode)
7831 && EXTRA_CONSTRAINT_Csy (op))
7832 return ! reload_completed;
7834 return target_reg_operand (op, mode);
7838 mextr_bit_offset (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
7840 HOST_WIDE_INT i;
7842 if (GET_CODE (op) != CONST_INT)
7843 return 0;
7844 i = INTVAL (op);
7845 return i >= 1 * 8 && i <= 7 * 8 && (i & 7) == 0;
7849 extend_reg_operand (rtx op, enum machine_mode mode)
7851 return (GET_CODE (op) == TRUNCATE
7852 ? arith_operand
7853 : arith_reg_operand) (op, mode);
7857 trunc_hi_operand (rtx op, enum machine_mode mode)
7859 enum machine_mode op_mode = GET_MODE (op);
7861 if (op_mode != SImode && op_mode != DImode
7862 && op_mode != V4HImode && op_mode != V2SImode)
7863 return 0;
7864 return extend_reg_operand (op, mode);
7868 extend_reg_or_0_operand (rtx op, enum machine_mode mode)
7870 return (GET_CODE (op) == TRUNCATE
7871 ? arith_operand
7872 : arith_reg_or_0_operand) (op, mode);
7876 general_extend_operand (rtx op, enum machine_mode mode)
7878 return (GET_CODE (op) == TRUNCATE
7879 ? arith_operand
7880 : nonimmediate_operand) (op, mode);
7884 inqhi_operand (rtx op, enum machine_mode mode)
7886 if (GET_CODE (op) != TRUNCATE || mode != GET_MODE (op))
7887 return 0;
7888 op = XEXP (op, 0);
7889 /* Can't use true_regnum here because copy_cost wants to know about
7890 SECONDARY_INPUT_RELOAD_CLASS. */
7891 return GET_CODE (op) == REG && FP_REGISTER_P (REGNO (op));
7895 sh_rep_vec (rtx v, enum machine_mode mode)
7897 int i;
7898 rtx x, y;
7900 if ((GET_CODE (v) != CONST_VECTOR && GET_CODE (v) != PARALLEL)
7901 || (GET_MODE (v) != mode && mode != VOIDmode))
7902 return 0;
7903 i = XVECLEN (v, 0) - 2;
7904 x = XVECEXP (v, 0, i + 1);
7905 if (GET_MODE_UNIT_SIZE (mode) == 1)
7907 y = XVECEXP (v, 0, i);
7908 for (i -= 2; i >= 0; i -= 2)
7909 if (! rtx_equal_p (XVECEXP (v, 0, i + 1), x)
7910 || ! rtx_equal_p (XVECEXP (v, 0, i), y))
7911 return 0;
7913 else
7914 for (; i >= 0; i--)
7915 if (XVECEXP (v, 0, i) != x)
7916 return 0;
7917 return 1;
7920 /* Determine if V is a constant vector matching MODE with only one element
7921 that is not a sign extension. Two byte-sized elements count as one. */
7923 sh_1el_vec (rtx v, enum machine_mode mode)
7925 int unit_size;
7926 int i, last, least, sign_ix;
7927 rtx sign;
7929 if (GET_CODE (v) != CONST_VECTOR
7930 || (GET_MODE (v) != mode && mode != VOIDmode))
7931 return 0;
7932 /* Determine numbers of last and of least significant elements. */
7933 last = XVECLEN (v, 0) - 1;
7934 least = TARGET_LITTLE_ENDIAN ? 0 : last;
7935 if (GET_CODE (XVECEXP (v, 0, least)) != CONST_INT)
7936 return 0;
7937 sign_ix = least;
7938 if (GET_MODE_UNIT_SIZE (mode) == 1)
7939 sign_ix = TARGET_LITTLE_ENDIAN ? 1 : last - 1;
7940 if (GET_CODE (XVECEXP (v, 0, sign_ix)) != CONST_INT)
7941 return 0;
7942 unit_size = GET_MODE_UNIT_SIZE (GET_MODE (v));
7943 sign = (INTVAL (XVECEXP (v, 0, sign_ix)) >> (unit_size * BITS_PER_UNIT - 1)
7944 ? constm1_rtx : const0_rtx);
7945 i = XVECLEN (v, 0) - 1;
7947 if (i != least && i != sign_ix && XVECEXP (v, 0, i) != sign)
7948 return 0;
7949 while (--i);
7950 return 1;
7954 sh_const_vec (rtx v, enum machine_mode mode)
7956 int i;
7958 if (GET_CODE (v) != CONST_VECTOR
7959 || (GET_MODE (v) != mode && mode != VOIDmode))
7960 return 0;
7961 i = XVECLEN (v, 0) - 1;
7962 for (; i >= 0; i--)
7963 if (GET_CODE (XVECEXP (v, 0, i)) != CONST_INT)
7964 return 0;
7965 return 1;
7968 /* Return the destination address of a branch. */
7970 static int
7971 branch_dest (rtx branch)
7973 rtx dest = SET_SRC (PATTERN (branch));
7974 int dest_uid;
7976 if (GET_CODE (dest) == IF_THEN_ELSE)
7977 dest = XEXP (dest, 1);
7978 dest = XEXP (dest, 0);
7979 dest_uid = INSN_UID (dest);
7980 return INSN_ADDRESSES (dest_uid);
7983 /* Return nonzero if REG is not used after INSN.
7984 We assume REG is a reload reg, and therefore does
7985 not live past labels. It may live past calls or jumps though. */
7987 reg_unused_after (rtx reg, rtx insn)
7989 enum rtx_code code;
7990 rtx set;
7992 /* If the reg is set by this instruction, then it is safe for our
7993 case. Disregard the case where this is a store to memory, since
7994 we are checking a register used in the store address. */
7995 set = single_set (insn);
7996 if (set && GET_CODE (SET_DEST (set)) != MEM
7997 && reg_overlap_mentioned_p (reg, SET_DEST (set)))
7998 return 1;
8000 while ((insn = NEXT_INSN (insn)))
8002 rtx set;
8003 if (!INSN_P (insn))
8004 continue;
8006 code = GET_CODE (insn);
8008 #if 0
8009 /* If this is a label that existed before reload, then the register
8010 if dead here. However, if this is a label added by reorg, then
8011 the register may still be live here. We can't tell the difference,
8012 so we just ignore labels completely. */
8013 if (code == CODE_LABEL)
8014 return 1;
8015 /* else */
8016 #endif
8018 if (code == JUMP_INSN)
8019 return 0;
8021 /* If this is a sequence, we must handle them all at once.
8022 We could have for instance a call that sets the target register,
8023 and an insn in a delay slot that uses the register. In this case,
8024 we must return 0. */
8025 else if (code == INSN && GET_CODE (PATTERN (insn)) == SEQUENCE)
8027 int i;
8028 int retval = 0;
8030 for (i = 0; i < XVECLEN (PATTERN (insn), 0); i++)
8032 rtx this_insn = XVECEXP (PATTERN (insn), 0, i);
8033 rtx set = single_set (this_insn);
8035 if (GET_CODE (this_insn) == CALL_INSN)
8036 code = CALL_INSN;
8037 else if (GET_CODE (this_insn) == JUMP_INSN)
8039 if (INSN_ANNULLED_BRANCH_P (this_insn))
8040 return 0;
8041 code = JUMP_INSN;
8044 if (set && reg_overlap_mentioned_p (reg, SET_SRC (set)))
8045 return 0;
8046 if (set && reg_overlap_mentioned_p (reg, SET_DEST (set)))
8048 if (GET_CODE (SET_DEST (set)) != MEM)
8049 retval = 1;
8050 else
8051 return 0;
8053 if (set == 0
8054 && reg_overlap_mentioned_p (reg, PATTERN (this_insn)))
8055 return 0;
8057 if (retval == 1)
8058 return 1;
8059 else if (code == JUMP_INSN)
8060 return 0;
8063 set = single_set (insn);
8064 if (set && reg_overlap_mentioned_p (reg, SET_SRC (set)))
8065 return 0;
8066 if (set && reg_overlap_mentioned_p (reg, SET_DEST (set)))
8067 return GET_CODE (SET_DEST (set)) != MEM;
8068 if (set == 0 && reg_overlap_mentioned_p (reg, PATTERN (insn)))
8069 return 0;
8071 if (code == CALL_INSN && call_really_used_regs[REGNO (reg)])
8072 return 1;
8074 return 1;
8077 #include "ggc.h"
8079 static GTY(()) rtx fpscr_rtx;
8081 get_fpscr_rtx (void)
8083 if (! fpscr_rtx)
8085 fpscr_rtx = gen_rtx_REG (PSImode, FPSCR_REG);
8086 REG_USERVAR_P (fpscr_rtx) = 1;
8087 mark_user_reg (fpscr_rtx);
8089 if (! reload_completed || mdep_reorg_phase != SH_AFTER_MDEP_REORG)
8090 mark_user_reg (fpscr_rtx);
8091 return fpscr_rtx;
8094 void
8095 emit_sf_insn (rtx pat)
8097 emit_insn (pat);
8100 void
8101 emit_df_insn (rtx pat)
8103 emit_insn (pat);
8106 void
8107 expand_sf_unop (rtx (*fun) (rtx, rtx, rtx), rtx *operands)
8109 emit_sf_insn ((*fun) (operands[0], operands[1], get_fpscr_rtx ()));
8112 void
8113 expand_sf_binop (rtx (*fun) (rtx, rtx, rtx, rtx), rtx *operands)
8115 emit_sf_insn ((*fun) (operands[0], operands[1], operands[2],
8116 get_fpscr_rtx ()));
8119 void
8120 expand_df_unop (rtx (*fun) (rtx, rtx, rtx), rtx *operands)
8122 emit_df_insn ((*fun) (operands[0], operands[1], get_fpscr_rtx ()));
8125 void
8126 expand_df_binop (rtx (*fun) (rtx, rtx, rtx, rtx), rtx *operands)
8128 emit_df_insn ((*fun) (operands[0], operands[1], operands[2],
8129 get_fpscr_rtx ()));
8132 /* ??? gcc does flow analysis strictly after common subexpression
8133 elimination. As a result, common subexpression elimination fails
8134 when there are some intervening statements setting the same register.
8135 If we did nothing about this, this would hurt the precision switching
8136 for SH4 badly. There is some cse after reload, but it is unable to
8137 undo the extra register pressure from the unused instructions, and
8138 it cannot remove auto-increment loads.
8140 A C code example that shows this flow/cse weakness for (at least) SH
8141 and sparc (as of gcc ss-970706) is this:
8143 double
8144 f(double a)
8146 double d;
8147 d = 0.1;
8148 a += d;
8149 d = 1.1;
8150 d = 0.1;
8151 a *= d;
8152 return a;
8155 So we add another pass before common subexpression elimination, to
8156 remove assignments that are dead due to a following assignment in the
8157 same basic block. */
8159 static void
8160 mark_use (rtx x, rtx *reg_set_block)
8162 enum rtx_code code;
8164 if (! x)
8165 return;
8166 code = GET_CODE (x);
8167 switch (code)
8169 case REG:
8171 int regno = REGNO (x);
8172 int nregs = (regno < FIRST_PSEUDO_REGISTER
8173 ? HARD_REGNO_NREGS (regno, GET_MODE (x))
8174 : 1);
8177 reg_set_block[regno + nregs - 1] = 0;
8179 while (--nregs);
8180 break;
8182 case SET:
8184 rtx dest = SET_DEST (x);
8186 if (GET_CODE (dest) == SUBREG)
8187 dest = SUBREG_REG (dest);
8188 if (GET_CODE (dest) != REG)
8189 mark_use (dest, reg_set_block);
8190 mark_use (SET_SRC (x), reg_set_block);
8191 break;
8193 case CLOBBER:
8194 break;
8195 default:
8197 const char *fmt = GET_RTX_FORMAT (code);
8198 int i, j;
8199 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
8201 if (fmt[i] == 'e')
8202 mark_use (XEXP (x, i), reg_set_block);
8203 else if (fmt[i] == 'E')
8204 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
8205 mark_use (XVECEXP (x, i, j), reg_set_block);
8207 break;
8212 static rtx get_free_reg (HARD_REG_SET);
8214 /* This function returns a register to use to load the address to load
8215 the fpscr from. Currently it always returns r1 or r7, but when we are
8216 able to use pseudo registers after combine, or have a better mechanism
8217 for choosing a register, it should be done here. */
8218 /* REGS_LIVE is the liveness information for the point for which we
8219 need this allocation. In some bare-bones exit blocks, r1 is live at the
8220 start. We can even have all of r0..r3 being live:
8221 __complex__ long long f (double d) { if (d == 0) return 2; else return 3; }
8222 INSN before which new insns are placed with will clobber the register
8223 we return. If a basic block consists only of setting the return value
8224 register to a pseudo and using that register, the return value is not
8225 live before or after this block, yet we we'll insert our insns right in
8226 the middle. */
8228 static rtx
8229 get_free_reg (HARD_REG_SET regs_live)
8231 if (! TEST_HARD_REG_BIT (regs_live, 1))
8232 return gen_rtx_REG (Pmode, 1);
8234 /* Hard reg 1 is live; since this is a SMALL_REGISTER_CLASSES target,
8235 there shouldn't be anything but a jump before the function end. */
8236 if (! TEST_HARD_REG_BIT (regs_live, 7))
8237 return gen_rtx_REG (Pmode, 7);
8239 abort ();
8242 /* This function will set the fpscr from memory.
8243 MODE is the mode we are setting it to. */
8244 void
8245 fpscr_set_from_mem (int mode, HARD_REG_SET regs_live)
8247 enum attr_fp_mode fp_mode = mode;
8248 rtx addr_reg = get_free_reg (regs_live);
8250 if (fp_mode == (enum attr_fp_mode) ACTUAL_NORMAL_MODE (FP_MODE))
8251 emit_insn (gen_fpu_switch1 (addr_reg));
8252 else
8253 emit_insn (gen_fpu_switch0 (addr_reg));
8256 /* Is the given character a logical line separator for the assembler? */
8257 #ifndef IS_ASM_LOGICAL_LINE_SEPARATOR
8258 #define IS_ASM_LOGICAL_LINE_SEPARATOR(C) ((C) == ';')
8259 #endif
8262 sh_insn_length_adjustment (rtx insn)
8264 /* Instructions with unfilled delay slots take up an extra two bytes for
8265 the nop in the delay slot. */
8266 if (((GET_CODE (insn) == INSN
8267 && GET_CODE (PATTERN (insn)) != USE
8268 && GET_CODE (PATTERN (insn)) != CLOBBER)
8269 || GET_CODE (insn) == CALL_INSN
8270 || (GET_CODE (insn) == JUMP_INSN
8271 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC
8272 && GET_CODE (PATTERN (insn)) != ADDR_VEC))
8273 && GET_CODE (PATTERN (NEXT_INSN (PREV_INSN (insn)))) != SEQUENCE
8274 && get_attr_needs_delay_slot (insn) == NEEDS_DELAY_SLOT_YES)
8275 return 2;
8277 /* SH2e has a bug that prevents the use of annulled branches, so if
8278 the delay slot is not filled, we'll have to put a NOP in it. */
8279 if (sh_cpu == CPU_SH2E
8280 && GET_CODE (insn) == JUMP_INSN
8281 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC
8282 && GET_CODE (PATTERN (insn)) != ADDR_VEC
8283 && get_attr_type (insn) == TYPE_CBRANCH
8284 && GET_CODE (PATTERN (NEXT_INSN (PREV_INSN (insn)))) != SEQUENCE)
8285 return 2;
8287 /* sh-dsp parallel processing insn take four bytes instead of two. */
8289 if (GET_CODE (insn) == INSN)
8291 int sum = 0;
8292 rtx body = PATTERN (insn);
8293 const char *template;
8294 char c;
8295 int maybe_label = 1;
8297 if (GET_CODE (body) == ASM_INPUT)
8298 template = XSTR (body, 0);
8299 else if (asm_noperands (body) >= 0)
8300 template
8301 = decode_asm_operands (body, NULL, NULL, NULL, NULL);
8302 else
8303 return 0;
8306 int ppi_adjust = 0;
8309 c = *template++;
8310 while (c == ' ' || c == '\t');
8311 /* all sh-dsp parallel-processing insns start with p.
8312 The only non-ppi sh insn starting with p is pref.
8313 The only ppi starting with pr is prnd. */
8314 if ((c == 'p' || c == 'P') && strncasecmp ("re", template, 2))
8315 ppi_adjust = 2;
8316 /* The repeat pseudo-insn expands two three insns, a total of
8317 six bytes in size. */
8318 else if ((c == 'r' || c == 'R')
8319 && ! strncasecmp ("epeat", template, 5))
8320 ppi_adjust = 4;
8321 while (c && c != '\n' && ! IS_ASM_LOGICAL_LINE_SEPARATOR (c))
8323 /* If this is a label, it is obviously not a ppi insn. */
8324 if (c == ':' && maybe_label)
8326 ppi_adjust = 0;
8327 break;
8329 else if (c == '\'' || c == '"')
8330 maybe_label = 0;
8331 c = *template++;
8333 sum += ppi_adjust;
8334 maybe_label = c != ':';
8336 while (c);
8337 return sum;
8339 return 0;
8342 /* Return TRUE if X references a SYMBOL_REF or LABEL_REF whose symbol
8343 isn't protected by a PIC unspec. */
8345 nonpic_symbol_mentioned_p (rtx x)
8347 register const char *fmt;
8348 register int i;
8350 if (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF
8351 || GET_CODE (x) == PC)
8352 return 1;
8354 /* We don't want to look into the possible MEM location of a
8355 CONST_DOUBLE, since we're not going to use it, in general. */
8356 if (GET_CODE (x) == CONST_DOUBLE)
8357 return 0;
8359 if (GET_CODE (x) == UNSPEC
8360 && (XINT (x, 1) == UNSPEC_PIC
8361 || XINT (x, 1) == UNSPEC_GOT
8362 || XINT (x, 1) == UNSPEC_GOTOFF
8363 || XINT (x, 1) == UNSPEC_GOTPLT
8364 || XINT (x, 1) == UNSPEC_GOTTPOFF
8365 || XINT (x, 1) == UNSPEC_DTPOFF
8366 || XINT (x, 1) == UNSPEC_PLT))
8367 return 0;
8369 fmt = GET_RTX_FORMAT (GET_CODE (x));
8370 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
8372 if (fmt[i] == 'E')
8374 register int j;
8376 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
8377 if (nonpic_symbol_mentioned_p (XVECEXP (x, i, j)))
8378 return 1;
8380 else if (fmt[i] == 'e' && nonpic_symbol_mentioned_p (XEXP (x, i)))
8381 return 1;
8384 return 0;
8387 /* Convert a non-PIC address in `orig' to a PIC address using @GOT or
8388 @GOTOFF in `reg'. */
8390 legitimize_pic_address (rtx orig, enum machine_mode mode ATTRIBUTE_UNUSED,
8391 rtx reg)
8393 if (tls_symbolic_operand (orig, Pmode))
8394 return orig;
8396 if (GET_CODE (orig) == LABEL_REF
8397 || (GET_CODE (orig) == SYMBOL_REF && SYMBOL_REF_LOCAL_P (orig)))
8399 if (reg == 0)
8400 reg = gen_reg_rtx (Pmode);
8402 emit_insn (gen_symGOTOFF2reg (reg, orig));
8403 return reg;
8405 else if (GET_CODE (orig) == SYMBOL_REF)
8407 if (reg == 0)
8408 reg = gen_reg_rtx (Pmode);
8410 emit_insn (gen_symGOT2reg (reg, orig));
8411 return reg;
8413 return orig;
8416 /* Mark the use of a constant in the literal table. If the constant
8417 has multiple labels, make it unique. */
8418 static rtx
8419 mark_constant_pool_use (rtx x)
8421 rtx insn, lab, pattern;
8423 if (x == NULL)
8424 return x;
8426 switch (GET_CODE (x))
8428 case LABEL_REF:
8429 x = XEXP (x, 0);
8430 case CODE_LABEL:
8431 break;
8432 default:
8433 return x;
8436 /* Get the first label in the list of labels for the same constant
8437 and delete another labels in the list. */
8438 lab = x;
8439 for (insn = PREV_INSN (x); insn; insn = PREV_INSN (insn))
8441 if (GET_CODE (insn) != CODE_LABEL
8442 || LABEL_REFS (insn) != NEXT_INSN (insn))
8443 break;
8444 lab = insn;
8447 for (insn = LABEL_REFS (lab); insn; insn = LABEL_REFS (insn))
8448 INSN_DELETED_P (insn) = 1;
8450 /* Mark constants in a window. */
8451 for (insn = NEXT_INSN (x); insn; insn = NEXT_INSN (insn))
8453 if (GET_CODE (insn) != INSN)
8454 continue;
8456 pattern = PATTERN (insn);
8457 if (GET_CODE (pattern) != UNSPEC_VOLATILE)
8458 continue;
8460 switch (XINT (pattern, 1))
8462 case UNSPECV_CONST2:
8463 case UNSPECV_CONST4:
8464 case UNSPECV_CONST8:
8465 XVECEXP (pattern, 0, 1) = const1_rtx;
8466 break;
8467 case UNSPECV_WINDOW_END:
8468 if (XVECEXP (pattern, 0, 0) == x)
8469 return lab;
8470 break;
8471 case UNSPECV_CONST_END:
8472 return lab;
8473 default:
8474 break;
8478 return lab;
8481 /* Return true if it's possible to redirect BRANCH1 to the destination
8482 of an unconditional jump BRANCH2. We only want to do this if the
8483 resulting branch will have a short displacement. */
8485 sh_can_redirect_branch (rtx branch1, rtx branch2)
8487 if (flag_expensive_optimizations && simplejump_p (branch2))
8489 rtx dest = XEXP (SET_SRC (single_set (branch2)), 0);
8490 rtx insn;
8491 int distance;
8493 for (distance = 0, insn = NEXT_INSN (branch1);
8494 insn && distance < 256;
8495 insn = PREV_INSN (insn))
8497 if (insn == dest)
8498 return 1;
8499 else
8500 distance += get_attr_length (insn);
8502 for (distance = 0, insn = NEXT_INSN (branch1);
8503 insn && distance < 256;
8504 insn = NEXT_INSN (insn))
8506 if (insn == dest)
8507 return 1;
8508 else
8509 distance += get_attr_length (insn);
8512 return 0;
8515 /* Return nonzero if register old_reg can be renamed to register new_reg. */
8517 sh_hard_regno_rename_ok (unsigned int old_reg ATTRIBUTE_UNUSED,
8518 unsigned int new_reg)
8520 /* Interrupt functions can only use registers that have already been
8521 saved by the prologue, even if they would normally be
8522 call-clobbered. */
8524 if (sh_cfun_interrupt_handler_p () && !regs_ever_live[new_reg])
8525 return 0;
8527 return 1;
8530 /* Function to update the integer COST
8531 based on the relationship between INSN that is dependent on
8532 DEP_INSN through the dependence LINK. The default is to make no
8533 adjustment to COST. This can be used for example to specify to
8534 the scheduler that an output- or anti-dependence does not incur
8535 the same cost as a data-dependence. The return value should be
8536 the new value for COST. */
8537 static int
8538 sh_adjust_cost (rtx insn, rtx link ATTRIBUTE_UNUSED, rtx dep_insn, int cost)
8540 rtx reg, use_pat;
8542 if (TARGET_SHMEDIA)
8544 /* On SHmedia, if the dependence is an anti-dependence or
8545 output-dependence, there is no cost. */
8546 if (REG_NOTE_KIND (link) != 0)
8547 cost = 0;
8549 if (get_attr_is_mac_media (insn)
8550 && get_attr_is_mac_media (dep_insn))
8551 cost = 1;
8553 else if (REG_NOTE_KIND (link) == 0)
8555 enum attr_type dep_type, type;
8557 if (recog_memoized (insn) < 0
8558 || recog_memoized (dep_insn) < 0)
8559 return cost;
8561 dep_type = get_attr_type (dep_insn);
8562 if (dep_type == TYPE_FLOAD || dep_type == TYPE_PCFLOAD)
8563 cost--;
8564 if ((dep_type == TYPE_LOAD_SI || dep_type == TYPE_PCLOAD_SI)
8565 && (type = get_attr_type (insn)) != TYPE_CALL
8566 && type != TYPE_SFUNC)
8567 cost--;
8569 /* The only input for a call that is timing-critical is the
8570 function's address. */
8571 if (GET_CODE(insn) == CALL_INSN)
8573 rtx call = PATTERN (insn);
8575 if (GET_CODE (call) == PARALLEL)
8576 call = XVECEXP (call, 0 ,0);
8577 if (GET_CODE (call) == SET)
8578 call = SET_SRC (call);
8579 if (GET_CODE (call) == CALL && GET_CODE (XEXP (call, 0)) == MEM
8580 && ! reg_set_p (XEXP (XEXP (call, 0), 0), dep_insn))
8581 cost = 0;
8583 /* Likewise, the most timing critical input for an sfuncs call
8584 is the function address. However, sfuncs typically start
8585 using their arguments pretty quickly.
8586 Assume a four cycle delay before they are needed. */
8587 /* All sfunc calls are parallels with at least four components.
8588 Exploit this to avoid unnecessary calls to sfunc_uses_reg. */
8589 else if (GET_CODE (PATTERN (insn)) == PARALLEL
8590 && XVECLEN (PATTERN (insn), 0) >= 4
8591 && (reg = sfunc_uses_reg (insn)))
8593 if (! reg_set_p (reg, dep_insn))
8594 cost -= 4;
8596 /* When the preceding instruction loads the shift amount of
8597 the following SHAD/SHLD, the latency of the load is increased
8598 by 1 cycle. */
8599 else if (TARGET_SH4
8600 && get_attr_type (insn) == TYPE_DYN_SHIFT
8601 && get_attr_any_int_load (dep_insn) == ANY_INT_LOAD_YES
8602 && reg_overlap_mentioned_p (SET_DEST (PATTERN (dep_insn)),
8603 XEXP (SET_SRC (single_set (insn)),
8604 1)))
8605 cost++;
8606 /* When an LS group instruction with a latency of less than
8607 3 cycles is followed by a double-precision floating-point
8608 instruction, FIPR, or FTRV, the latency of the first
8609 instruction is increased to 3 cycles. */
8610 else if (cost < 3
8611 && get_attr_insn_class (dep_insn) == INSN_CLASS_LS_GROUP
8612 && get_attr_dfp_comp (insn) == DFP_COMP_YES)
8613 cost = 3;
8614 /* The lsw register of a double-precision computation is ready one
8615 cycle earlier. */
8616 else if (reload_completed
8617 && get_attr_dfp_comp (dep_insn) == DFP_COMP_YES
8618 && (use_pat = single_set (insn))
8619 && ! regno_use_in (REGNO (SET_DEST (single_set (dep_insn))),
8620 SET_SRC (use_pat)))
8621 cost -= 1;
8623 if (get_attr_any_fp_comp (dep_insn) == ANY_FP_COMP_YES
8624 && get_attr_late_fp_use (insn) == LATE_FP_USE_YES)
8625 cost -= 1;
8627 /* An anti-dependence penalty of two applies if the first insn is a double
8628 precision fadd / fsub / fmul. */
8629 else if (REG_NOTE_KIND (link) == REG_DEP_ANTI
8630 && recog_memoized (dep_insn) >= 0
8631 && get_attr_type (dep_insn) == TYPE_DFP_ARITH
8632 /* A lot of alleged anti-flow dependences are fake,
8633 so check this one is real. */
8634 && flow_dependent_p (dep_insn, insn))
8635 cost = 2;
8638 return cost;
8641 /* Check if INSN is flow-dependent on DEP_INSN. Can also be used to check
8642 if DEP_INSN is anti-flow dependent on INSN. */
8643 static int
8644 flow_dependent_p (rtx insn, rtx dep_insn)
8646 rtx tmp = PATTERN (insn);
8648 note_stores (PATTERN (dep_insn), flow_dependent_p_1, &tmp);
8649 return tmp == NULL_RTX;
8652 /* A helper function for flow_dependent_p called through note_stores. */
8653 static void
8654 flow_dependent_p_1 (rtx x, rtx pat ATTRIBUTE_UNUSED, void *data)
8656 rtx * pinsn = (rtx *) data;
8658 if (*pinsn && reg_referenced_p (x, *pinsn))
8659 *pinsn = NULL_RTX;
8662 /* For use by ALLOCATE_INITIAL_VALUE. Note that sh.md contains some
8663 'special function' patterns (type sfunc) that clobber pr, but that
8664 do not look like function calls to leaf_function_p. Hence we must
8665 do this extra check. */
8667 sh_pr_n_sets (void)
8669 return REG_N_SETS (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG);
8672 /* This function returns "2" to indicate dual issue for the SH4
8673 processor. To be used by the DFA pipeline description. */
8674 static int
8675 sh_issue_rate (void)
8677 if (TARGET_SUPERSCALAR)
8678 return 2;
8679 else
8680 return 1;
8683 /* Functions for ready queue reordering for sched1. */
8685 /* Get weight for mode for a set x. */
8686 static short
8687 find_set_regmode_weight (rtx x, enum machine_mode mode)
8689 if (GET_CODE (x) == CLOBBER && register_operand (SET_DEST (x), mode))
8690 return 1;
8691 if (GET_CODE (x) == SET && register_operand (SET_DEST (x), mode))
8693 if (GET_CODE (SET_DEST (x)) == REG)
8695 if (!reg_mentioned_p (SET_DEST (x), SET_SRC (x)))
8696 return 1;
8697 else
8698 return 0;
8700 return 1;
8702 return 0;
8705 /* Get regmode weight for insn. */
8706 static short
8707 find_insn_regmode_weight (rtx insn, enum machine_mode mode)
8709 short reg_weight = 0;
8710 rtx x;
8712 /* Increment weight for each register born here. */
8713 x = PATTERN (insn);
8714 reg_weight += find_set_regmode_weight (x, mode);
8715 if (GET_CODE (x) == PARALLEL)
8717 int j;
8718 for (j = XVECLEN (x, 0) - 1; j >= 0; j--)
8720 x = XVECEXP (PATTERN (insn), 0, j);
8721 reg_weight += find_set_regmode_weight (x, mode);
8724 /* Decrement weight for each register that dies here. */
8725 for (x = REG_NOTES (insn); x; x = XEXP (x, 1))
8727 if (REG_NOTE_KIND (x) == REG_DEAD || REG_NOTE_KIND (x) == REG_UNUSED)
8729 rtx note = XEXP (x, 0);
8730 if (GET_CODE (note) == REG && GET_MODE (note) == mode)
8731 reg_weight--;
8734 return reg_weight;
8737 /* Calculate regmode weights for all insns of a basic block. */
8738 static void
8739 find_regmode_weight (int b, enum machine_mode mode)
8741 rtx insn, next_tail, head, tail;
8743 get_block_head_tail (b, &head, &tail);
8744 next_tail = NEXT_INSN (tail);
8746 for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
8748 /* Handle register life information. */
8749 if (!INSN_P (insn))
8750 continue;
8752 if (mode == SFmode)
8753 INSN_REGMODE_WEIGHT (insn, mode) =
8754 find_insn_regmode_weight (insn, mode) + 2 * find_insn_regmode_weight (insn, DFmode);
8755 else if (mode == SImode)
8756 INSN_REGMODE_WEIGHT (insn, mode) =
8757 find_insn_regmode_weight (insn, mode) + 2 * find_insn_regmode_weight (insn, DImode);
8761 /* Comparison function for ready queue sorting. */
8762 static int
8763 rank_for_reorder (const void *x, const void *y)
8765 rtx tmp = *(const rtx *) y;
8766 rtx tmp2 = *(const rtx *) x;
8768 /* The insn in a schedule group should be issued the first. */
8769 if (SCHED_GROUP_P (tmp) != SCHED_GROUP_P (tmp2))
8770 return SCHED_GROUP_P (tmp2) ? 1 : -1;
8772 /* If insns are equally good, sort by INSN_LUID (original insn order), This
8773 minimizes instruction movement, thus minimizing sched's effect on
8774 register pressure. */
8775 return INSN_LUID (tmp) - INSN_LUID (tmp2);
8778 /* Resort the array A in which only element at index N may be out of order. */
8779 static void
8780 swap_reorder (rtx *a, int n)
8782 rtx insn = a[n - 1];
8783 int i = n - 2;
8785 while (i >= 0 && rank_for_reorder (a + i, &insn) >= 0)
8787 a[i + 1] = a[i];
8788 i -= 1;
8790 a[i + 1] = insn;
8793 #define SCHED_REORDER(READY, N_READY) \
8794 do \
8796 if ((N_READY) == 2) \
8797 swap_reorder (READY, N_READY); \
8798 else if ((N_READY) > 2) \
8799 qsort (READY, N_READY, sizeof (rtx), rank_for_reorder); \
8801 while (0)
8803 /* Sort the ready list READY by ascending priority, using the SCHED_REORDER
8804 macro. */
8805 static void
8806 ready_reorder (rtx *ready, int nready)
8808 SCHED_REORDER (ready, nready);
8811 /* Calculate regmode weights for all insns of all basic block. */
8812 static void
8813 sh_md_init_global (FILE *dump ATTRIBUTE_UNUSED,
8814 int verbose ATTRIBUTE_UNUSED,
8815 int old_max_uid)
8817 basic_block b;
8819 regmode_weight[0] = (short *) xcalloc (old_max_uid, sizeof (short));
8820 regmode_weight[1] = (short *) xcalloc (old_max_uid, sizeof (short));
8822 FOR_EACH_BB_REVERSE (b)
8824 find_regmode_weight (b->index, SImode);
8825 find_regmode_weight (b->index, SFmode);
8828 CURR_REGMODE_PRESSURE (SImode) = 0;
8829 CURR_REGMODE_PRESSURE (SFmode) = 0;
8833 /* Cleanup. */
8834 static void
8835 sh_md_finish_global (FILE *dump ATTRIBUTE_UNUSED,
8836 int verbose ATTRIBUTE_UNUSED)
8838 if (regmode_weight[0])
8840 free (regmode_weight[0]);
8841 regmode_weight[0] = NULL;
8843 if (regmode_weight[1])
8845 free (regmode_weight[1]);
8846 regmode_weight[1] = NULL;
8850 /* Cache the can_issue_more so that we can return it from reorder2. Also,
8851 keep count of register pressures on SImode and SFmode. */
8852 static int
8853 sh_variable_issue (FILE *dump ATTRIBUTE_UNUSED,
8854 int sched_verbose ATTRIBUTE_UNUSED,
8855 rtx insn,
8856 int can_issue_more)
8858 if (GET_CODE (PATTERN (insn)) != USE
8859 && GET_CODE (PATTERN (insn)) != CLOBBER)
8860 cached_can_issue_more = can_issue_more - 1;
8861 else
8862 cached_can_issue_more = can_issue_more;
8864 if (reload_completed)
8865 return cached_can_issue_more;
8867 CURR_REGMODE_PRESSURE (SImode) += INSN_REGMODE_WEIGHT (insn, SImode);
8868 CURR_REGMODE_PRESSURE (SFmode) += INSN_REGMODE_WEIGHT (insn, SFmode);
8870 return cached_can_issue_more;
8873 static void
8874 sh_md_init (FILE *dump ATTRIBUTE_UNUSED,
8875 int verbose ATTRIBUTE_UNUSED,
8876 int veclen ATTRIBUTE_UNUSED)
8878 CURR_REGMODE_PRESSURE (SImode) = 0;
8879 CURR_REGMODE_PRESSURE (SFmode) = 0;
8882 /* Some magic numbers. */
8883 /* Pressure on register r0 can lead to spill failures. so avoid sched1 for
8884 functions that already have high pressure on r0. */
8885 #define R0_MAX_LIFE_REGIONS 2
8886 #define R0_MAX_LIVE_LENGTH 12
8887 /* Register Pressure thresholds for SImode and SFmode registers. */
8888 #define SIMODE_MAX_WEIGHT 5
8889 #define SFMODE_MAX_WEIGHT 10
8891 /* Return true if the pressure is high for MODE. */
8892 static short
8893 high_pressure (enum machine_mode mode)
8895 /* Pressure on register r0 can lead to spill failures. so avoid sched1 for
8896 functions that already have high pressure on r0. */
8897 if ((REG_N_SETS (0) - REG_N_DEATHS (0)) >= R0_MAX_LIFE_REGIONS
8898 && REG_LIVE_LENGTH (0) >= R0_MAX_LIVE_LENGTH)
8899 return 1;
8901 if (mode == SFmode)
8902 return (CURR_REGMODE_PRESSURE (SFmode) > SFMODE_MAX_WEIGHT);
8903 else
8904 return (CURR_REGMODE_PRESSURE (SImode) > SIMODE_MAX_WEIGHT);
8907 /* Reorder ready queue if register pressure is high. */
8908 static int
8909 sh_reorder (FILE *dump ATTRIBUTE_UNUSED,
8910 int sched_verbose ATTRIBUTE_UNUSED,
8911 rtx *ready,
8912 int *n_readyp,
8913 int clock_var ATTRIBUTE_UNUSED)
8915 if (reload_completed)
8916 return sh_issue_rate ();
8918 if (high_pressure (SFmode) || high_pressure (SImode))
8920 ready_reorder (ready, *n_readyp);
8923 return sh_issue_rate ();
8926 /* Skip cycles if the current register pressure is high. */
8927 static int
8928 sh_reorder2 (FILE *dump ATTRIBUTE_UNUSED,
8929 int sched_verbose ATTRIBUTE_UNUSED,
8930 rtx *ready ATTRIBUTE_UNUSED,
8931 int *n_readyp ATTRIBUTE_UNUSED,
8932 int clock_var ATTRIBUTE_UNUSED)
8934 if (reload_completed)
8935 return cached_can_issue_more;
8937 if (high_pressure(SFmode) || high_pressure (SImode))
8938 skip_cycles = 1;
8940 return cached_can_issue_more;
8943 /* Skip cycles without sorting the ready queue. This will move insn from
8944 Q->R. If this is the last cycle we are skipping; allow sorting of ready
8945 queue by sh_reorder. */
8947 /* Generally, skipping these many cycles are sufficient for all insns to move
8948 from Q -> R. */
8949 #define MAX_SKIPS 8
8951 static int
8952 sh_dfa_new_cycle (FILE *sched_dump ATTRIBUTE_UNUSED,
8953 int sched_verbose ATTRIBUTE_UNUSED,
8954 rtx insn ATTRIBUTE_UNUSED,
8955 int last_clock_var,
8956 int clock_var,
8957 int *sort_p)
8959 if (reload_completed)
8960 return 0;
8962 if (skip_cycles)
8964 if ((clock_var - last_clock_var) < MAX_SKIPS)
8966 *sort_p = 0;
8967 return 1;
8969 /* If this is the last cycle we are skipping, allow reordering of R. */
8970 if ((clock_var - last_clock_var) == MAX_SKIPS)
8972 *sort_p = 1;
8973 return 1;
8977 skip_cycles = 0;
8979 return 0;
8982 /* SHmedia requires registers for branches, so we can't generate new
8983 branches past reload. */
8984 static bool
8985 sh_cannot_modify_jumps_p (void)
8987 return (TARGET_SHMEDIA && (reload_in_progress || reload_completed));
8990 static int
8991 sh_target_reg_class (void)
8993 return TARGET_SHMEDIA ? TARGET_REGS : NO_REGS;
8996 static bool
8997 sh_optimize_target_register_callee_saved (bool after_prologue_epilogue_gen)
8999 return (shmedia_space_reserved_for_target_registers
9000 && (! after_prologue_epilogue_gen || TARGET_SAVE_ALL_TARGET_REGS));
9003 static bool
9004 sh_ms_bitfield_layout_p (tree record_type ATTRIBUTE_UNUSED)
9006 return (TARGET_SH5 || TARGET_HITACHI || sh_attr_renesas_p (record_type));
9010 On the SH1..SH4, the trampoline looks like
9011 2 0002 D202 mov.l l2,r2
9012 1 0000 D301 mov.l l1,r3
9013 3 0004 422B jmp @r2
9014 4 0006 0009 nop
9015 5 0008 00000000 l1: .long area
9016 6 000c 00000000 l2: .long function
9018 SH5 (compact) uses r1 instead of r3 for the static chain. */
9021 /* Emit RTL insns to initialize the variable parts of a trampoline.
9022 FNADDR is an RTX for the address of the function's pure code.
9023 CXT is an RTX for the static chain value for the function. */
9025 void
9026 sh_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
9028 if (TARGET_SHMEDIA64)
9030 rtx tramp_templ;
9031 int fixed_len;
9033 rtx movi1 = GEN_INT (0xcc000010);
9034 rtx shori1 = GEN_INT (0xc8000010);
9035 rtx src, dst;
9037 /* The following trampoline works within a +- 128 KB range for cxt:
9038 ptb/u cxt,tr1; movi fnaddr >> 48,r0; shori fnaddr >> 32,r0;
9039 shori fnaddr >> 16,r0; shori fnaddr,r0; ptabs/l r0,tr0
9040 gettr tr1,r1; blink tr0,r63 */
9041 /* Address rounding makes it hard to compute the exact bounds of the
9042 offset for this trampoline, but we have a rather generous offset
9043 range, so frame_offset should do fine as an upper bound. */
9044 if (cxt == virtual_stack_vars_rtx && frame_offset < 0x20000)
9046 /* ??? could optimize this trampoline initialization
9047 by writing DImode words with two insns each. */
9048 rtx mask = force_reg (DImode, GEN_INT (0x3fffc00));
9049 rtx insn = gen_rtx_MINUS (DImode, cxt, tramp);
9050 insn = gen_rtx_ASHIFT (DImode, insn, GEN_INT (10-2));
9051 insn = gen_rtx_AND (DImode, insn, mask);
9052 /* Or in ptb/u .,tr1 pattern */
9053 insn = gen_rtx_IOR (DImode, insn, gen_int_mode (0xec000010, SImode));
9054 insn = force_operand (insn, NULL_RTX);
9055 insn = gen_lowpart (SImode, insn);
9056 emit_move_insn (gen_rtx_MEM (SImode, tramp), insn);
9057 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (38));
9058 insn = gen_rtx_AND (DImode, insn, mask);
9059 insn = force_operand (gen_rtx_IOR (DImode, movi1, insn), NULL_RTX);
9060 insn = gen_lowpart (SImode, insn);
9061 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 4)), insn);
9062 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (22));
9063 insn = gen_rtx_AND (DImode, insn, mask);
9064 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
9065 insn = gen_lowpart (SImode, insn);
9066 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 8)), insn);
9067 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (6));
9068 insn = gen_rtx_AND (DImode, insn, mask);
9069 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
9070 insn = gen_lowpart (SImode, insn);
9071 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 12)),
9072 insn);
9073 insn = gen_rtx_ASHIFT (DImode, fnaddr, GEN_INT (10));
9074 insn = gen_rtx_AND (DImode, insn, mask);
9075 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
9076 insn = gen_lowpart (SImode, insn);
9077 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 16)),
9078 insn);
9079 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 20)),
9080 GEN_INT (0x6bf10600));
9081 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 24)),
9082 GEN_INT (0x4415fc10));
9083 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 28)),
9084 GEN_INT (0x4401fff0));
9085 emit_insn (gen_ic_invalidate_line (tramp));
9086 return;
9088 tramp_templ = gen_rtx_SYMBOL_REF (Pmode,"__GCC_nested_trampoline");
9089 fixed_len = TRAMPOLINE_SIZE - 2 * GET_MODE_SIZE (Pmode);
9091 tramp_templ = gen_datalabel_ref (tramp_templ);
9092 dst = gen_rtx_MEM (BLKmode, tramp);
9093 src = gen_rtx_MEM (BLKmode, tramp_templ);
9094 set_mem_align (dst, 256);
9095 set_mem_align (src, 64);
9096 emit_block_move (dst, src, GEN_INT (fixed_len), BLOCK_OP_NORMAL);
9098 emit_move_insn (gen_rtx_MEM (Pmode, plus_constant (tramp, fixed_len)),
9099 fnaddr);
9100 emit_move_insn (gen_rtx_MEM (Pmode,
9101 plus_constant (tramp,
9102 fixed_len
9103 + GET_MODE_SIZE (Pmode))),
9104 cxt);
9105 emit_insn (gen_ic_invalidate_line (tramp));
9106 return;
9108 else if (TARGET_SHMEDIA)
9110 /* movi fnaddr >> 16,r1; shori fnaddr,r1; ptabs/l r1,tr0
9111 movi cxt >> 16,r1; shori cxt,r1; blink tr0,r63 */
9112 rtx quad0 = gen_reg_rtx (DImode), cxtload = gen_reg_rtx (DImode);
9113 rtx quad1 = gen_reg_rtx (DImode), quad2 = gen_reg_rtx (DImode);
9114 /* movi 0,r1: 0xcc000010 shori 0,r1: c8000010 concatenated,
9115 rotated 10 right, and higher 16 bit of every 32 selected. */
9116 rtx movishori
9117 = force_reg (V2HImode, (simplify_gen_subreg
9118 (V2HImode, GEN_INT (0x4330432), SImode, 0)));
9119 rtx ptabs = force_reg (DImode, GEN_INT (0x6bf10600));
9120 rtx blink = force_reg (DImode, GEN_INT (0x4401fff0));
9122 tramp = force_reg (Pmode, tramp);
9123 fnaddr = force_reg (SImode, fnaddr);
9124 cxt = force_reg (SImode, cxt);
9125 emit_insn (gen_mshflo_w_x (gen_rtx_SUBREG (V4HImode, quad0, 0),
9126 gen_rtx_SUBREG (V2HImode, fnaddr, 0),
9127 movishori));
9128 emit_insn (gen_rotrdi3_mextr (quad0, quad0,
9129 GEN_INT (TARGET_LITTLE_ENDIAN ? 24 : 56)));
9130 emit_insn (gen_ashldi3_media (quad0, quad0, const2_rtx));
9131 emit_move_insn (gen_rtx_MEM (DImode, tramp), quad0);
9132 emit_insn (gen_mshflo_w_x (gen_rtx_SUBREG (V4HImode, cxtload, 0),
9133 gen_rtx_SUBREG (V2HImode, cxt, 0),
9134 movishori));
9135 emit_insn (gen_rotrdi3_mextr (cxtload, cxtload,
9136 GEN_INT (TARGET_LITTLE_ENDIAN ? 24 : 56)));
9137 emit_insn (gen_ashldi3_media (cxtload, cxtload, const2_rtx));
9138 if (TARGET_LITTLE_ENDIAN)
9140 emit_insn (gen_mshflo_l_di (quad1, ptabs, cxtload));
9141 emit_insn (gen_mextr4 (quad2, cxtload, blink));
9143 else
9145 emit_insn (gen_mextr4 (quad1, cxtload, ptabs));
9146 emit_insn (gen_mshflo_l_di (quad2, blink, cxtload));
9148 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, 8)), quad1);
9149 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, 16)), quad2);
9150 emit_insn (gen_ic_invalidate_line (tramp));
9151 return;
9153 else if (TARGET_SHCOMPACT)
9155 emit_insn (gen_initialize_trampoline (tramp, cxt, fnaddr));
9156 return;
9158 emit_move_insn (gen_rtx_MEM (SImode, tramp),
9159 gen_int_mode (TARGET_LITTLE_ENDIAN ? 0xd301d202 : 0xd202d301,
9160 SImode));
9161 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 4)),
9162 gen_int_mode (TARGET_LITTLE_ENDIAN ? 0x0009422b : 0x422b0009,
9163 SImode));
9164 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 8)),
9165 cxt);
9166 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 12)),
9167 fnaddr);
9168 if (TARGET_HARVARD)
9170 if (TARGET_USERMODE)
9171 emit_library_call (function_symbol ("__ic_invalidate"),
9172 0, VOIDmode, 1, tramp, SImode);
9173 else
9174 emit_insn (gen_ic_invalidate_line (tramp));
9178 /* FIXME: This is overly conservative. A SHcompact function that
9179 receives arguments ``by reference'' will have them stored in its
9180 own stack frame, so it must not pass pointers or references to
9181 these arguments to other functions by means of sibling calls. */
9182 static bool
9183 sh_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
9185 return (decl
9186 && (! TARGET_SHCOMPACT
9187 || current_function_args_info.stack_regs == 0)
9188 && ! sh_cfun_interrupt_handler_p ());
9191 /* Machine specific built-in functions. */
9193 struct builtin_description
9195 const enum insn_code icode;
9196 const char *const name;
9197 int signature;
9200 /* describe number and signedness of arguments; arg[0] == result
9201 (1: unsigned, 2: signed, 4: don't care, 8: pointer 0: no argument */
9202 static const char signature_args[][4] =
9204 #define SH_BLTIN_V2SI2 0
9205 { 4, 4 },
9206 #define SH_BLTIN_V4HI2 1
9207 { 4, 4 },
9208 #define SH_BLTIN_V2SI3 2
9209 { 4, 4, 4 },
9210 #define SH_BLTIN_V4HI3 3
9211 { 4, 4, 4 },
9212 #define SH_BLTIN_V8QI3 4
9213 { 4, 4, 4 },
9214 #define SH_BLTIN_MAC_HISI 5
9215 { 1, 4, 4, 1 },
9216 #define SH_BLTIN_SH_HI 6
9217 { 4, 4, 1 },
9218 #define SH_BLTIN_SH_SI 7
9219 { 4, 4, 1 },
9220 #define SH_BLTIN_V4HI2V2SI 8
9221 { 4, 4, 4 },
9222 #define SH_BLTIN_V4HI2V8QI 9
9223 { 4, 4, 4 },
9224 #define SH_BLTIN_SISF 10
9225 { 4, 2 },
9226 #define SH_BLTIN_LDUA_L 11
9227 { 2, 8 },
9228 #define SH_BLTIN_LDUA_Q 12
9229 { 1, 8 },
9230 #define SH_BLTIN_STUA_L 13
9231 { 0, 8, 2 },
9232 #define SH_BLTIN_STUA_Q 14
9233 { 0, 8, 1 },
9234 #define SH_BLTIN_UDI 15
9235 { 0, 8, 1 },
9236 #define SH_BLTIN_NUM_SHARED_SIGNATURES 16
9237 #define SH_BLTIN_2 16
9238 #define SH_BLTIN_SU 16
9239 { 1, 2 },
9240 #define SH_BLTIN_3 17
9241 #define SH_BLTIN_SUS 17
9242 { 2, 2, 1 },
9243 #define SH_BLTIN_PSSV 18
9244 { 0, 8, 2, 2 },
9245 #define SH_BLTIN_XXUU 19
9246 #define SH_BLTIN_UUUU 19
9247 { 1, 1, 1, 1 },
9248 #define SH_BLTIN_PV 20
9249 { 0, 8 },
9251 /* mcmv: operands considered unsigned. */
9252 /* mmulsum_wq, msad_ubq: result considered unsigned long long. */
9253 /* mperm: control value considered unsigned int. */
9254 /* mshalds, mshard, mshards, mshlld, mshlrd: shift count is unsigned int. */
9255 /* mshards_q: returns signed short. */
9256 /* nsb: takes long long arg, returns unsigned char. */
9257 static const struct builtin_description bdesc[] =
9259 { CODE_FOR_absv2si2, "__builtin_absv2si2", SH_BLTIN_V2SI2 },
9260 { CODE_FOR_absv4hi2, "__builtin_absv4hi2", SH_BLTIN_V4HI2 },
9261 { CODE_FOR_addv2si3, "__builtin_addv2si3", SH_BLTIN_V2SI3 },
9262 { CODE_FOR_addv4hi3, "__builtin_addv4hi3", SH_BLTIN_V4HI3 },
9263 { CODE_FOR_ssaddv2si3,"__builtin_ssaddv2si3", SH_BLTIN_V2SI3 },
9264 { CODE_FOR_usaddv8qi3,"__builtin_usaddv8qi3", SH_BLTIN_V8QI3 },
9265 { CODE_FOR_ssaddv4hi3,"__builtin_ssaddv4hi3", SH_BLTIN_V4HI3 },
9266 #if 0
9267 { CODE_FOR_alloco32, "__builtin_sh_media_ALLOCO", SH_BLTIN_PV },
9268 { CODE_FOR_alloco64, "__builtin_sh_media_ALLOCO", SH_BLTIN_PV },
9269 #endif
9270 { CODE_FOR_negcmpeqv8qi,"__builtin_sh_media_MCMPEQ_B", SH_BLTIN_V8QI3 },
9271 { CODE_FOR_negcmpeqv2si,"__builtin_sh_media_MCMPEQ_L", SH_BLTIN_V2SI3 },
9272 { CODE_FOR_negcmpeqv4hi,"__builtin_sh_media_MCMPEQ_W", SH_BLTIN_V4HI3 },
9273 { CODE_FOR_negcmpgtuv8qi,"__builtin_sh_media_MCMPGT_UB", SH_BLTIN_V8QI3 },
9274 { CODE_FOR_negcmpgtv2si,"__builtin_sh_media_MCMPGT_L", SH_BLTIN_V2SI3 },
9275 { CODE_FOR_negcmpgtv4hi,"__builtin_sh_media_MCMPGT_W", SH_BLTIN_V4HI3 },
9276 { CODE_FOR_mcmv, "__builtin_sh_media_MCMV", SH_BLTIN_UUUU },
9277 { CODE_FOR_mcnvs_lw, "__builtin_sh_media_MCNVS_LW", SH_BLTIN_3 },
9278 { CODE_FOR_mcnvs_wb, "__builtin_sh_media_MCNVS_WB", SH_BLTIN_V4HI2V8QI },
9279 { CODE_FOR_mcnvs_wub, "__builtin_sh_media_MCNVS_WUB", SH_BLTIN_V4HI2V8QI },
9280 { CODE_FOR_mextr1, "__builtin_sh_media_MEXTR1", SH_BLTIN_UDI },
9281 { CODE_FOR_mextr2, "__builtin_sh_media_MEXTR2", SH_BLTIN_UDI },
9282 { CODE_FOR_mextr3, "__builtin_sh_media_MEXTR3", SH_BLTIN_UDI },
9283 { CODE_FOR_mextr4, "__builtin_sh_media_MEXTR4", SH_BLTIN_UDI },
9284 { CODE_FOR_mextr5, "__builtin_sh_media_MEXTR5", SH_BLTIN_UDI },
9285 { CODE_FOR_mextr6, "__builtin_sh_media_MEXTR6", SH_BLTIN_UDI },
9286 { CODE_FOR_mextr7, "__builtin_sh_media_MEXTR7", SH_BLTIN_UDI },
9287 { CODE_FOR_mmacfx_wl, "__builtin_sh_media_MMACFX_WL", SH_BLTIN_MAC_HISI },
9288 { CODE_FOR_mmacnfx_wl,"__builtin_sh_media_MMACNFX_WL", SH_BLTIN_MAC_HISI },
9289 { CODE_FOR_mulv2si3, "__builtin_mulv2si3", SH_BLTIN_V2SI3, },
9290 { CODE_FOR_mulv4hi3, "__builtin_mulv4hi3", SH_BLTIN_V4HI3 },
9291 { CODE_FOR_mmulfx_l, "__builtin_sh_media_MMULFX_L", SH_BLTIN_V2SI3 },
9292 { CODE_FOR_mmulfx_w, "__builtin_sh_media_MMULFX_W", SH_BLTIN_V4HI3 },
9293 { CODE_FOR_mmulfxrp_w,"__builtin_sh_media_MMULFXRP_W", SH_BLTIN_V4HI3 },
9294 { CODE_FOR_mmulhi_wl, "__builtin_sh_media_MMULHI_WL", SH_BLTIN_V4HI2V2SI },
9295 { CODE_FOR_mmullo_wl, "__builtin_sh_media_MMULLO_WL", SH_BLTIN_V4HI2V2SI },
9296 { CODE_FOR_mmulsum_wq,"__builtin_sh_media_MMULSUM_WQ", SH_BLTIN_XXUU },
9297 { CODE_FOR_mperm_w, "__builtin_sh_media_MPERM_W", SH_BLTIN_SH_HI },
9298 { CODE_FOR_msad_ubq, "__builtin_sh_media_MSAD_UBQ", SH_BLTIN_XXUU },
9299 { CODE_FOR_mshalds_l, "__builtin_sh_media_MSHALDS_L", SH_BLTIN_SH_SI },
9300 { CODE_FOR_mshalds_w, "__builtin_sh_media_MSHALDS_W", SH_BLTIN_SH_HI },
9301 { CODE_FOR_ashrv2si3, "__builtin_ashrv2si3", SH_BLTIN_SH_SI },
9302 { CODE_FOR_ashrv4hi3, "__builtin_ashrv4hi3", SH_BLTIN_SH_HI },
9303 { CODE_FOR_mshards_q, "__builtin_sh_media_MSHARDS_Q", SH_BLTIN_SUS },
9304 { CODE_FOR_mshfhi_b, "__builtin_sh_media_MSHFHI_B", SH_BLTIN_V8QI3 },
9305 { CODE_FOR_mshfhi_l, "__builtin_sh_media_MSHFHI_L", SH_BLTIN_V2SI3 },
9306 { CODE_FOR_mshfhi_w, "__builtin_sh_media_MSHFHI_W", SH_BLTIN_V4HI3 },
9307 { CODE_FOR_mshflo_b, "__builtin_sh_media_MSHFLO_B", SH_BLTIN_V8QI3 },
9308 { CODE_FOR_mshflo_l, "__builtin_sh_media_MSHFLO_L", SH_BLTIN_V2SI3 },
9309 { CODE_FOR_mshflo_w, "__builtin_sh_media_MSHFLO_W", SH_BLTIN_V4HI3 },
9310 { CODE_FOR_ashlv2si3, "__builtin_ashlv2si3", SH_BLTIN_SH_SI },
9311 { CODE_FOR_ashlv4hi3, "__builtin_ashlv4hi3", SH_BLTIN_SH_HI },
9312 { CODE_FOR_lshrv2si3, "__builtin_lshrv2si3", SH_BLTIN_SH_SI },
9313 { CODE_FOR_lshrv4hi3, "__builtin_lshrv4hi3", SH_BLTIN_SH_HI },
9314 { CODE_FOR_subv2si3, "__builtin_subv2si3", SH_BLTIN_V2SI3 },
9315 { CODE_FOR_subv4hi3, "__builtin_subv4hi3", SH_BLTIN_V4HI3 },
9316 { CODE_FOR_sssubv2si3,"__builtin_sssubv2si3", SH_BLTIN_V2SI3 },
9317 { CODE_FOR_ussubv8qi3,"__builtin_ussubv8qi3", SH_BLTIN_V8QI3 },
9318 { CODE_FOR_sssubv4hi3,"__builtin_sssubv4hi3", SH_BLTIN_V4HI3 },
9319 { CODE_FOR_fcosa_s, "__builtin_sh_media_FCOSA_S", SH_BLTIN_SISF },
9320 { CODE_FOR_fsina_s, "__builtin_sh_media_FSINA_S", SH_BLTIN_SISF },
9321 { CODE_FOR_fipr, "__builtin_sh_media_FIPR_S", SH_BLTIN_3 },
9322 { CODE_FOR_ftrv, "__builtin_sh_media_FTRV_S", SH_BLTIN_3 },
9323 { CODE_FOR_fsrra_s, "__builtin_sh_media_FSRRA_S", SH_BLTIN_2 },
9324 #if 0
9325 { CODE_FOR_ldhi_l, "__builtin_sh_media_LDHI_L", SH_BLTIN_LDUA_L },
9326 { CODE_FOR_ldhi_q, "__builtin_sh_media_LDHI_Q", SH_BLTIN_LDUA_Q },
9327 { CODE_FOR_ldlo_l, "__builtin_sh_media_LDLO_L", SH_BLTIN_LDUA_L },
9328 { CODE_FOR_ldlo_q, "__builtin_sh_media_LDLO_Q", SH_BLTIN_LDUA_Q },
9329 { CODE_FOR_sthi_l, "__builtin_sh_media_STHI_L", SH_BLTIN_STUA_L },
9330 { CODE_FOR_sthi_q, "__builtin_sh_media_STHI_Q", SH_BLTIN_STUA_Q },
9331 { CODE_FOR_stlo_l, "__builtin_sh_media_STLO_L", SH_BLTIN_STUA_L },
9332 { CODE_FOR_stlo_q, "__builtin_sh_media_STLO_Q", SH_BLTIN_STUA_Q },
9333 { CODE_FOR_ldhi_l64, "__builtin_sh_media_LDHI_L", SH_BLTIN_LDUA_L },
9334 { CODE_FOR_ldhi_q64, "__builtin_sh_media_LDHI_Q", SH_BLTIN_LDUA_Q },
9335 { CODE_FOR_ldlo_l64, "__builtin_sh_media_LDLO_L", SH_BLTIN_LDUA_L },
9336 { CODE_FOR_ldlo_q64, "__builtin_sh_media_LDLO_Q", SH_BLTIN_LDUA_Q },
9337 { CODE_FOR_sthi_l64, "__builtin_sh_media_STHI_L", SH_BLTIN_STUA_L },
9338 { CODE_FOR_sthi_q64, "__builtin_sh_media_STHI_Q", SH_BLTIN_STUA_Q },
9339 { CODE_FOR_stlo_l64, "__builtin_sh_media_STLO_L", SH_BLTIN_STUA_L },
9340 { CODE_FOR_stlo_q64, "__builtin_sh_media_STLO_Q", SH_BLTIN_STUA_Q },
9341 #endif
9342 { CODE_FOR_nsb, "__builtin_sh_media_NSB", SH_BLTIN_SU },
9343 { CODE_FOR_byterev, "__builtin_sh_media_BYTEREV", SH_BLTIN_2 },
9344 #if 0
9345 { CODE_FOR_prefetch32,"__builtin_sh_media_PREFO", SH_BLTIN_PSSV },
9346 { CODE_FOR_prefetch64,"__builtin_sh_media_PREFO", SH_BLTIN_PSSV }
9347 #endif
9350 static void
9351 sh_media_init_builtins (void)
9353 tree shared[SH_BLTIN_NUM_SHARED_SIGNATURES];
9354 const struct builtin_description *d;
9356 memset (shared, 0, sizeof shared);
9357 for (d = bdesc; d - bdesc < (int) ARRAY_SIZE (bdesc); d++)
9359 tree type, arg_type;
9360 int signature = d->signature;
9361 int i;
9363 if (signature < SH_BLTIN_NUM_SHARED_SIGNATURES && shared[signature])
9364 type = shared[signature];
9365 else
9367 int has_result = signature_args[signature][0] != 0;
9369 if (signature_args[signature][1] == 8
9370 && (insn_data[d->icode].operand[has_result].mode != Pmode))
9371 continue;
9372 if (! TARGET_FPU_ANY
9373 && FLOAT_MODE_P (insn_data[d->icode].operand[0].mode))
9374 continue;
9375 type = void_list_node;
9376 for (i = 3; ; i--)
9378 int arg = signature_args[signature][i];
9379 int opno = i - 1 + has_result;
9381 if (arg == 8)
9382 arg_type = ptr_type_node;
9383 else if (arg)
9384 arg_type = ((*lang_hooks.types.type_for_mode)
9385 (insn_data[d->icode].operand[opno].mode,
9386 (arg & 1)));
9387 else if (i)
9388 continue;
9389 else
9390 arg_type = void_type_node;
9391 if (i == 0)
9392 break;
9393 type = tree_cons (NULL_TREE, arg_type, type);
9395 type = build_function_type (arg_type, type);
9396 if (signature < SH_BLTIN_NUM_SHARED_SIGNATURES)
9397 shared[signature] = type;
9399 lang_hooks.builtin_function (d->name, type, d - bdesc, BUILT_IN_MD,
9400 NULL, NULL_TREE);
9404 /* Implements target hook vector_mode_supported_p. */
9405 bool
9406 sh_vector_mode_supported_p (enum machine_mode mode)
9408 if (TARGET_FPU_ANY
9409 && ((mode == V2SFmode)
9410 || (mode == V4SFmode)
9411 || (mode == V16SFmode)))
9412 return true;
9414 else if (TARGET_SHMEDIA
9415 && ((mode == V8QImode)
9416 || (mode == V2HImode)
9417 || (mode == V4HImode)
9418 || (mode == V2SImode)))
9419 return true;
9421 return false;
9424 /* Implements target hook dwarf_calling_convention. Return an enum
9425 of dwarf_calling_convention. */
9427 sh_dwarf_calling_convention (tree func)
9429 if (sh_attr_renesas_p (func))
9430 return DW_CC_GNU_renesas_sh;
9432 return DW_CC_normal;
9435 static void
9436 sh_init_builtins (void)
9438 if (TARGET_SHMEDIA)
9439 sh_media_init_builtins ();
9442 /* Expand an expression EXP that calls a built-in function,
9443 with result going to TARGET if that's convenient
9444 (and in mode MODE if that's convenient).
9445 SUBTARGET may be used as the target for computing one of EXP's operands.
9446 IGNORE is nonzero if the value is to be ignored. */
9448 static rtx
9449 sh_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
9450 enum machine_mode mode ATTRIBUTE_UNUSED, int ignore)
9452 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
9453 tree arglist = TREE_OPERAND (exp, 1);
9454 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
9455 const struct builtin_description *d = &bdesc[fcode];
9456 enum insn_code icode = d->icode;
9457 int signature = d->signature;
9458 enum machine_mode tmode = VOIDmode;
9459 int nop = 0, i;
9460 rtx op[4];
9461 rtx pat;
9463 if (signature_args[signature][0])
9465 if (ignore)
9466 return 0;
9468 tmode = insn_data[icode].operand[0].mode;
9469 if (! target
9470 || GET_MODE (target) != tmode
9471 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
9472 target = gen_reg_rtx (tmode);
9473 op[nop++] = target;
9475 else
9476 target = 0;
9478 for (i = 1; i <= 3; i++, nop++)
9480 tree arg;
9481 enum machine_mode opmode, argmode;
9483 if (! signature_args[signature][i])
9484 break;
9485 arg = TREE_VALUE (arglist);
9486 if (arg == error_mark_node)
9487 return const0_rtx;
9488 arglist = TREE_CHAIN (arglist);
9489 opmode = insn_data[icode].operand[nop].mode;
9490 argmode = TYPE_MODE (TREE_TYPE (arg));
9491 if (argmode != opmode)
9492 arg = build1 (NOP_EXPR,
9493 (*lang_hooks.types.type_for_mode) (opmode, 0), arg);
9494 op[nop] = expand_expr (arg, NULL_RTX, opmode, 0);
9495 if (! (*insn_data[icode].operand[nop].predicate) (op[nop], opmode))
9496 op[nop] = copy_to_mode_reg (opmode, op[nop]);
9499 switch (nop)
9501 case 1:
9502 pat = (*insn_data[d->icode].genfun) (op[0]);
9503 break;
9504 case 2:
9505 pat = (*insn_data[d->icode].genfun) (op[0], op[1]);
9506 break;
9507 case 3:
9508 pat = (*insn_data[d->icode].genfun) (op[0], op[1], op[2]);
9509 break;
9510 case 4:
9511 pat = (*insn_data[d->icode].genfun) (op[0], op[1], op[2], op[3]);
9512 break;
9513 default:
9514 abort ();
9516 if (! pat)
9517 return 0;
9518 emit_insn (pat);
9519 return target;
9522 void
9523 sh_expand_unop_v2sf (enum rtx_code code, rtx op0, rtx op1)
9525 rtx sel0 = const0_rtx;
9526 rtx sel1 = const1_rtx;
9527 rtx (*fn) (rtx, rtx, rtx, rtx, rtx) = gen_unary_sf_op;
9528 rtx op = gen_rtx_fmt_e (code, SFmode, op1);
9530 emit_insn ((*fn) (op0, op1, op, sel0, sel0));
9531 emit_insn ((*fn) (op0, op1, op, sel1, sel1));
9534 void
9535 sh_expand_binop_v2sf (enum rtx_code code, rtx op0, rtx op1, rtx op2)
9537 rtx sel0 = const0_rtx;
9538 rtx sel1 = const1_rtx;
9539 rtx (*fn) (rtx, rtx, rtx, rtx, rtx, rtx, rtx, rtx)
9540 = gen_binary_sf_op;
9541 rtx op = gen_rtx_fmt_ee (code, SFmode, op1, op2);
9543 emit_insn ((*fn) (op0, op1, op2, op, sel0, sel0, sel0, sel1));
9544 emit_insn ((*fn) (op0, op1, op2, op, sel1, sel1, sel1, sel0));
9547 /* Return the class of registers for which a mode change from FROM to TO
9548 is invalid. */
9549 bool
9550 sh_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
9551 enum reg_class class)
9553 /* We want to enable the use of SUBREGs as a means to
9554 VEC_SELECT a single element of a vector. */
9555 if (to == SFmode && VECTOR_MODE_P (from) && GET_MODE_INNER (from) == SFmode)
9556 return (reg_classes_intersect_p (GENERAL_REGS, class));
9558 if (GET_MODE_SIZE (from) != GET_MODE_SIZE (to))
9560 if (TARGET_LITTLE_ENDIAN)
9562 if (GET_MODE_SIZE (to) < 8 || GET_MODE_SIZE (from) < 8)
9563 return reg_classes_intersect_p (DF_REGS, class);
9565 else
9567 if (GET_MODE_SIZE (from) < 8)
9568 return reg_classes_intersect_p (DF_HI_REGS, class);
9571 return 0;
9575 /* If ADDRESS refers to a CODE_LABEL, add NUSES to the number of times
9576 that label is used. */
9578 void
9579 sh_mark_label (rtx address, int nuses)
9581 if (GOTOFF_P (address))
9583 /* Extract the label or symbol. */
9584 address = XEXP (address, 0);
9585 if (GET_CODE (address) == PLUS)
9586 address = XEXP (address, 0);
9587 address = XVECEXP (address, 0, 0);
9589 if (GET_CODE (address) == LABEL_REF
9590 && GET_CODE (XEXP (address, 0)) == CODE_LABEL)
9591 LABEL_NUSES (XEXP (address, 0)) += nuses;
9594 /* Compute extra cost of moving data between one register class
9595 and another. */
9597 /* If SECONDARY*_RELOAD_CLASS says something about the src/dst pair, regclass
9598 uses this information. Hence, the general register <-> floating point
9599 register information here is not used for SFmode. */
9602 sh_register_move_cost (enum machine_mode mode,
9603 enum reg_class srcclass, enum reg_class dstclass)
9605 if (dstclass == T_REGS || dstclass == PR_REGS)
9606 return 10;
9608 if (dstclass == MAC_REGS && srcclass == MAC_REGS)
9609 return 4;
9611 if (mode == SImode && ! TARGET_SHMEDIA && TARGET_FMOVD
9612 && REGCLASS_HAS_FP_REG (srcclass)
9613 && REGCLASS_HAS_FP_REG (dstclass))
9614 return 4;
9616 if ((REGCLASS_HAS_FP_REG (dstclass) && srcclass == MAC_REGS)
9617 || (dstclass == MAC_REGS && REGCLASS_HAS_FP_REG (srcclass)))
9618 return 9;
9620 if ((REGCLASS_HAS_FP_REG (dstclass)
9621 && REGCLASS_HAS_GENERAL_REG (srcclass))
9622 || (REGCLASS_HAS_GENERAL_REG (dstclass)
9623 && REGCLASS_HAS_FP_REG (srcclass)))
9624 return ((TARGET_SHMEDIA ? 4 : TARGET_FMOVD ? 8 : 12)
9625 * ((GET_MODE_SIZE (mode) + 7) / 8U));
9627 if ((dstclass == FPUL_REGS
9628 && REGCLASS_HAS_GENERAL_REG (srcclass))
9629 || (srcclass == FPUL_REGS
9630 && REGCLASS_HAS_GENERAL_REG (dstclass)))
9631 return 5;
9633 if ((dstclass == FPUL_REGS
9634 && (srcclass == PR_REGS || srcclass == MAC_REGS || srcclass == T_REGS))
9635 || (srcclass == FPUL_REGS
9636 && (dstclass == PR_REGS || dstclass == MAC_REGS)))
9637 return 7;
9639 if ((srcclass == TARGET_REGS && ! REGCLASS_HAS_GENERAL_REG (dstclass))
9640 || ((dstclass) == TARGET_REGS && ! REGCLASS_HAS_GENERAL_REG (srcclass)))
9641 return 20;
9643 if ((srcclass == FPSCR_REGS && ! REGCLASS_HAS_GENERAL_REG (dstclass))
9644 || (dstclass == FPSCR_REGS && ! REGCLASS_HAS_GENERAL_REG (srcclass)))
9645 return 4;
9647 if (TARGET_SHMEDIA
9648 || (TARGET_FMOVD
9649 && ! REGCLASS_HAS_GENERAL_REG (srcclass)
9650 && ! REGCLASS_HAS_GENERAL_REG (dstclass)))
9651 return 2 * ((GET_MODE_SIZE (mode) + 7) / 8U);
9653 return 2 * ((GET_MODE_SIZE (mode) + 3) / 4U);
9656 /* Like register_operand, but take into account that SHMEDIA can use
9657 the constant zero like a general register. */
9659 sh_register_operand (rtx op, enum machine_mode mode)
9661 if (op == CONST0_RTX (mode) && TARGET_SHMEDIA)
9662 return 1;
9663 return register_operand (op, mode);
9667 cmpsi_operand (rtx op, enum machine_mode mode)
9669 if (GET_CODE (op) == REG && REGNO (op) == T_REG
9670 && GET_MODE (op) == SImode)
9671 return 1;
9672 return arith_operand (op, mode);
9675 static rtx emit_load_ptr (rtx, rtx);
9677 static rtx
9678 emit_load_ptr (rtx reg, rtx addr)
9680 rtx mem = gen_rtx_MEM (ptr_mode, addr);
9682 if (Pmode != ptr_mode)
9683 mem = gen_rtx_SIGN_EXTEND (Pmode, mem);
9684 return emit_move_insn (reg, mem);
9687 void
9688 sh_output_mi_thunk (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED,
9689 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
9690 tree function)
9692 CUMULATIVE_ARGS cum;
9693 int structure_value_byref = 0;
9694 rtx this, this_value, sibcall, insns, funexp;
9695 tree funtype = TREE_TYPE (function);
9696 int simple_add = CONST_OK_FOR_ADD (delta);
9697 int did_load = 0;
9698 rtx scratch0, scratch1, scratch2;
9700 reload_completed = 1;
9701 epilogue_completed = 1;
9702 no_new_pseudos = 1;
9703 current_function_uses_only_leaf_regs = 1;
9704 reset_block_changes ();
9706 emit_note (NOTE_INSN_PROLOGUE_END);
9708 /* Find the "this" pointer. We have such a wide range of ABIs for the
9709 SH that it's best to do this completely machine independently.
9710 "this" is passed as first argument, unless a structure return pointer
9711 comes first, in which case "this" comes second. */
9712 INIT_CUMULATIVE_ARGS (cum, funtype, NULL_RTX, 0, 1);
9713 #ifndef PCC_STATIC_STRUCT_RETURN
9714 if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
9715 structure_value_byref = 1;
9716 #endif /* not PCC_STATIC_STRUCT_RETURN */
9717 if (structure_value_byref && sh_struct_value_rtx (function, 0) == 0)
9719 tree ptype = build_pointer_type (TREE_TYPE (funtype));
9721 FUNCTION_ARG_ADVANCE (cum, Pmode, ptype, 1);
9723 this = FUNCTION_ARG (cum, Pmode, ptr_type_node, 1);
9725 /* For SHcompact, we only have r0 for a scratch register: r1 is the
9726 static chain pointer (even if you can't have nested virtual functions
9727 right now, someone might implement them sometime), and the rest of the
9728 registers are used for argument passing, are callee-saved, or reserved. */
9729 scratch0 = scratch1 = scratch2 = gen_rtx_REG (Pmode, 0);
9730 if (! TARGET_SH5)
9732 scratch1 = gen_rtx_REG (ptr_mode, 1);
9733 /* N.B., if not TARGET_HITACHI, register 2 is used to pass the pointer
9734 pointing where to return struct values. */
9735 scratch2 = gen_rtx_REG (Pmode, 3);
9737 else if (TARGET_SHMEDIA)
9739 scratch1 = gen_rtx_REG (ptr_mode, 21);
9740 scratch2 = gen_rtx_REG (Pmode, TR0_REG);
9743 this_value = plus_constant (this, delta);
9744 if (vcall_offset
9745 && (simple_add || scratch0 != scratch1)
9746 && strict_memory_address_p (ptr_mode, this_value))
9748 emit_load_ptr (scratch0, this_value);
9749 did_load = 1;
9752 if (!delta)
9753 ; /* Do nothing. */
9754 else if (simple_add)
9755 emit_move_insn (this, this_value);
9756 else
9758 emit_move_insn (scratch1, GEN_INT (delta));
9759 emit_insn (gen_add2_insn (this, scratch1));
9762 if (vcall_offset)
9764 rtx offset_addr;
9766 if (!did_load)
9767 emit_load_ptr (scratch0, this);
9769 offset_addr = plus_constant (scratch0, vcall_offset);
9770 if (strict_memory_address_p (ptr_mode, offset_addr))
9771 ; /* Do nothing. */
9772 else if (! TARGET_SH5)
9774 /* scratch0 != scratch1, and we have indexed loads. Get better
9775 schedule by loading the offset into r1 and using an indexed
9776 load - then the load of r1 can issue before the load from
9777 (this + delta) finishes. */
9778 emit_move_insn (scratch1, GEN_INT (vcall_offset));
9779 offset_addr = gen_rtx_PLUS (Pmode, scratch0, scratch1);
9781 else if (CONST_OK_FOR_ADD (vcall_offset))
9783 emit_insn (gen_add2_insn (scratch0, GEN_INT (vcall_offset)));
9784 offset_addr = scratch0;
9786 else if (scratch0 != scratch1)
9788 emit_move_insn (scratch1, GEN_INT (vcall_offset));
9789 emit_insn (gen_add2_insn (scratch0, scratch1));
9790 offset_addr = scratch0;
9792 else
9793 abort (); /* FIXME */
9794 emit_load_ptr (scratch0, offset_addr);
9796 if (Pmode != ptr_mode)
9797 scratch0 = gen_rtx_TRUNCATE (ptr_mode, scratch0);
9798 emit_insn (gen_add2_insn (this, scratch0));
9801 /* Generate a tail call to the target function. */
9802 if (! TREE_USED (function))
9804 assemble_external (function);
9805 TREE_USED (function) = 1;
9807 funexp = XEXP (DECL_RTL (function), 0);
9808 emit_move_insn (scratch2, funexp);
9809 funexp = gen_rtx_MEM (FUNCTION_MODE, scratch2);
9810 sibcall = emit_call_insn (gen_sibcall (funexp, const0_rtx, NULL_RTX));
9811 SIBLING_CALL_P (sibcall) = 1;
9812 use_reg (&CALL_INSN_FUNCTION_USAGE (sibcall), this);
9813 emit_barrier ();
9815 /* Run just enough of rest_of_compilation to do scheduling and get
9816 the insns emitted. Note that use_thunk calls
9817 assemble_start_function and assemble_end_function. */
9819 insn_locators_initialize ();
9820 insns = get_insns ();
9822 if (optimize > 0 && flag_schedule_insns_after_reload)
9824 if (! basic_block_info)
9825 init_flow ();
9826 rtl_register_cfg_hooks ();
9827 find_basic_blocks (insns, max_reg_num (), dump_file);
9828 life_analysis (dump_file, PROP_FINAL);
9830 split_all_insns (1);
9832 schedule_insns (dump_file);
9835 sh_reorg ();
9837 if (optimize > 0 && flag_delayed_branch)
9838 dbr_schedule (insns, dump_file);
9839 shorten_branches (insns);
9840 final_start_function (insns, file, 1);
9841 final (insns, file, 1, 0);
9842 final_end_function ();
9844 if (optimize > 0 && flag_schedule_insns_after_reload)
9846 /* Release all memory allocated by flow. */
9847 free_basic_block_vars ();
9850 reload_completed = 0;
9851 epilogue_completed = 0;
9852 no_new_pseudos = 0;
9856 function_symbol (const char *name)
9858 rtx sym = gen_rtx_SYMBOL_REF (Pmode, name);
9859 SYMBOL_REF_FLAGS (sym) = SYMBOL_FLAG_FUNCTION;
9860 return sym;
9863 /* Find the number of a general purpose register in S. */
9864 static int
9865 scavenge_reg (HARD_REG_SET *s)
9867 int r;
9868 for (r = FIRST_GENERAL_REG; r <= LAST_GENERAL_REG; r++)
9869 if (TEST_HARD_REG_BIT (*s, r))
9870 return r;
9871 return -1;
9875 sh_get_pr_initial_val (void)
9877 rtx val;
9879 /* ??? Unfortunately, get_hard_reg_initial_val doesn't always work for the
9880 PR register on SHcompact, because it might be clobbered by the prologue.
9881 We check first if that is known to be the case. */
9882 if (TARGET_SHCOMPACT
9883 && ((current_function_args_info.call_cookie
9884 & ~ CALL_COOKIE_RET_TRAMP (1))
9885 || current_function_has_nonlocal_label))
9886 return gen_rtx_MEM (SImode, return_address_pointer_rtx);
9888 /* If we haven't finished rtl generation, there might be a nonlocal label
9889 that we haven't seen yet.
9890 ??? get_hard_reg_initial_val fails if it is called while no_new_pseudos
9891 is set, unless it has been called before for the same register. And even
9892 then, we end in trouble if we didn't use the register in the same
9893 basic block before. So call get_hard_reg_initial_val now and wrap it
9894 in an unspec if we might need to replace it. */
9895 /* ??? We also must do this for TARGET_SH1 in general, because otherwise
9896 combine can put the pseudo returned by get_hard_reg_initial_val into
9897 instructions that need a general purpose registers, which will fail to
9898 be recognized when the pseudo becomes allocated to PR. */
9900 = get_hard_reg_initial_val (Pmode, TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG);
9901 if (TARGET_SH1)
9902 return gen_rtx_UNSPEC (SImode, gen_rtvec (1, val), UNSPEC_RA);
9903 return val;
9907 sh_expand_t_scc (enum rtx_code code, rtx target)
9909 rtx result = target;
9910 HOST_WIDE_INT val;
9912 if (GET_CODE (sh_compare_op0) != REG || REGNO (sh_compare_op0) != T_REG
9913 || GET_CODE (sh_compare_op1) != CONST_INT)
9914 return 0;
9915 if (GET_CODE (result) != REG)
9916 result = gen_reg_rtx (SImode);
9917 val = INTVAL (sh_compare_op1);
9918 if ((code == EQ && val == 1) || (code == NE && val == 0))
9919 emit_insn (gen_movt (result));
9920 else if ((code == EQ && val == 0) || (code == NE && val == 1))
9922 emit_insn (gen_rtx_CLOBBER (VOIDmode, result));
9923 emit_insn (gen_subc (result, result, result));
9924 emit_insn (gen_addsi3 (result, result, const1_rtx));
9926 else if (code == EQ || code == NE)
9927 emit_insn (gen_move_insn (result, GEN_INT (code == NE)));
9928 else
9929 return 0;
9930 if (result != target)
9931 emit_move_insn (target, result);
9932 return 1;
9935 /* INSN is an sfunc; return the rtx that describes the address used. */
9936 static rtx
9937 extract_sfunc_addr (rtx insn)
9939 rtx pattern, part = NULL_RTX;
9940 int len, i;
9942 pattern = PATTERN (insn);
9943 len = XVECLEN (pattern, 0);
9944 for (i = 0; i < len; i++)
9946 part = XVECEXP (pattern, 0, i);
9947 if (GET_CODE (part) == USE && GET_MODE (XEXP (part, 0)) == Pmode
9948 && GENERAL_REGISTER_P (true_regnum (XEXP (part, 0))))
9949 return XEXP (part, 0);
9951 if (GET_CODE (XVECEXP (pattern, 0, 0)) == UNSPEC_VOLATILE)
9952 return XVECEXP (XVECEXP (pattern, 0, 0), 0, 1);
9953 abort ();
9956 /* Verify that the register in use_sfunc_addr still agrees with the address
9957 used in the sfunc. This prevents fill_slots_from_thread from changing
9958 use_sfunc_addr.
9959 INSN is the use_sfunc_addr instruction, and REG is the register it
9960 guards. */
9962 check_use_sfunc_addr (rtx insn, rtx reg)
9964 /* Search for the sfunc. It should really come right after INSN. */
9965 while ((insn = NEXT_INSN (insn)))
9967 if (GET_CODE (insn) == CODE_LABEL || GET_CODE (insn) == JUMP_INSN)
9968 break;
9969 if (! INSN_P (insn))
9970 continue;
9972 if (GET_CODE (PATTERN (insn)) == SEQUENCE)
9973 insn = XVECEXP (PATTERN (insn), 0, 0);
9974 if (GET_CODE (PATTERN (insn)) != PARALLEL
9975 || get_attr_type (insn) != TYPE_SFUNC)
9976 continue;
9977 return rtx_equal_p (extract_sfunc_addr (insn), reg);
9979 abort ();
9982 /* Returns 1 if OP is a MEM that can be source of a simple move operation. */
9985 unaligned_load_operand (rtx op, enum machine_mode mode)
9987 rtx inside;
9989 if (GET_CODE (op) != MEM || GET_MODE (op) != mode)
9990 return 0;
9992 inside = XEXP (op, 0);
9994 if (GET_CODE (inside) == POST_INC)
9995 inside = XEXP (inside, 0);
9997 if (GET_CODE (inside) == REG)
9998 return 1;
10000 return 0;
10003 /* This function returns a constant rtx that represents pi / 2**15 in
10004 SFmode. it's used to scale SFmode angles, in radians, to a
10005 fixed-point signed 16.16-bit fraction of a full circle, i.e., 2*pi
10006 maps to 0x10000). */
10008 static GTY(()) rtx sh_fsca_sf2int_rtx;
10011 sh_fsca_sf2int (void)
10013 if (! sh_fsca_sf2int_rtx)
10015 REAL_VALUE_TYPE rv;
10017 real_from_string (&rv, "10430.378350470453");
10018 sh_fsca_sf2int_rtx = const_double_from_real_value (rv, SFmode);
10021 return sh_fsca_sf2int_rtx;
10024 /* This function returns a constant rtx that represents pi / 2**15 in
10025 DFmode. it's used to scale DFmode angles, in radians, to a
10026 fixed-point signed 16.16-bit fraction of a full circle, i.e., 2*pi
10027 maps to 0x10000). */
10029 static GTY(()) rtx sh_fsca_df2int_rtx;
10032 sh_fsca_df2int (void)
10034 if (! sh_fsca_df2int_rtx)
10036 REAL_VALUE_TYPE rv;
10038 real_from_string (&rv, "10430.378350470453");
10039 sh_fsca_df2int_rtx = const_double_from_real_value (rv, DFmode);
10042 return sh_fsca_df2int_rtx;
10045 /* This function returns a constant rtx that represents 2**15 / pi in
10046 SFmode. it's used to scale a fixed-point signed 16.16-bit fraction
10047 of a full circle back to a SFmode value, i.e., 0x10000 maps to
10048 2*pi). */
10050 static GTY(()) rtx sh_fsca_int2sf_rtx;
10053 sh_fsca_int2sf (void)
10055 if (! sh_fsca_int2sf_rtx)
10057 REAL_VALUE_TYPE rv;
10059 real_from_string (&rv, "9.587379924285257e-5");
10060 sh_fsca_int2sf_rtx = const_double_from_real_value (rv, SFmode);
10063 return sh_fsca_int2sf_rtx;
10066 /* Initialize the CUMULATIVE_ARGS structure. */
10068 void
10069 sh_init_cumulative_args (CUMULATIVE_ARGS * pcum,
10070 tree fntype,
10071 rtx libname ATTRIBUTE_UNUSED,
10072 tree fndecl,
10073 signed int n_named_args,
10074 enum machine_mode mode)
10076 pcum->arg_count [(int) SH_ARG_FLOAT] = 0;
10077 pcum->free_single_fp_reg = 0;
10078 pcum->stack_regs = 0;
10079 pcum->byref_regs = 0;
10080 pcum->byref = 0;
10081 pcum->outgoing = (n_named_args == -1) ? 0 : 1;
10083 /* XXX - Should we check TARGET_HITACHI here ??? */
10084 pcum->renesas_abi = sh_attr_renesas_p (fntype) ? 1 : 0;
10086 if (fntype)
10088 pcum->force_mem = ((TARGET_HITACHI || pcum->renesas_abi)
10089 && aggregate_value_p (TREE_TYPE (fntype), fndecl));
10090 pcum->prototype_p = TYPE_ARG_TYPES (fntype) ? TRUE : FALSE;
10091 pcum->arg_count [(int) SH_ARG_INT]
10092 = TARGET_SH5 && aggregate_value_p (TREE_TYPE (fntype), fndecl);
10094 pcum->call_cookie
10095 = CALL_COOKIE_RET_TRAMP (TARGET_SHCOMPACT
10096 && pcum->arg_count [(int) SH_ARG_INT] == 0
10097 && (TYPE_MODE (TREE_TYPE (fntype)) == BLKmode
10098 ? int_size_in_bytes (TREE_TYPE (fntype))
10099 : GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (fntype)))) > 4
10100 && (BASE_RETURN_VALUE_REG (TYPE_MODE (TREE_TYPE (fntype)))
10101 == FIRST_RET_REG));
10103 else
10105 pcum->arg_count [(int) SH_ARG_INT] = 0;
10106 pcum->prototype_p = FALSE;
10107 if (mode != VOIDmode)
10109 pcum->call_cookie =
10110 CALL_COOKIE_RET_TRAMP (TARGET_SHCOMPACT
10111 && GET_MODE_SIZE (mode) > 4
10112 && BASE_RETURN_VALUE_REG (mode) == FIRST_RET_REG);
10114 /* If the default ABI is the Renesas ABI then all library
10115 calls must assume that the library will be using the
10116 Renesas ABI. So if the function would return its result
10117 in memory then we must force the address of this memory
10118 block onto the stack. Ideally we would like to call
10119 targetm.calls.return_in_memory() here but we do not have
10120 the TYPE or the FNDECL available so we synthesize the
10121 contents of that function as best we can. */
10122 pcum->force_mem =
10123 (TARGET_DEFAULT & HITACHI_BIT)
10124 && (mode == BLKmode
10125 || (GET_MODE_SIZE (mode) > 4
10126 && !(mode == DFmode
10127 && TARGET_FPU_DOUBLE)));
10129 else
10131 pcum->call_cookie = 0;
10132 pcum->force_mem = FALSE;
10137 #include "gt-sh.h"