2004-09-30 Eric Christopher <echristo@redhat.com>
[official-gcc.git] / gcc / config / sh / sh.c
blob349d981ccc98a18e46e65ca52678e1cb8c2a393f
1 /* Output routines for GCC for Renesas / SuperH SH.
2 Copyright (C) 1993, 1994, 1995, 1997, 1997, 1998, 1999, 2000, 2001, 2002,
3 2003, 2004 Free Software Foundation, Inc.
4 Contributed by Steve Chamberlain (sac@cygnus.com).
5 Improved by Jim Wilson (wilson@cygnus.com).
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 2, or (at your option)
12 any later version.
14 GCC is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING. If not, write to
21 the Free Software Foundation, 59 Temple Place - Suite 330,
22 Boston, MA 02111-1307, USA. */
24 #include "config.h"
25 #include "system.h"
26 #include "coretypes.h"
27 #include "tm.h"
28 #include "insn-config.h"
29 #include "rtl.h"
30 #include "tree.h"
31 #include "flags.h"
32 #include "expr.h"
33 #include "optabs.h"
34 #include "function.h"
35 #include "regs.h"
36 #include "hard-reg-set.h"
37 #include "output.h"
38 #include "insn-attr.h"
39 #include "toplev.h"
40 #include "recog.h"
41 #include "c-pragma.h"
42 #include "integrate.h"
43 #include "dwarf2.h"
44 #include "tm_p.h"
45 #include "target.h"
46 #include "target-def.h"
47 #include "real.h"
48 #include "langhooks.h"
49 #include "basic-block.h"
50 #include "ra.h"
51 #include "cfglayout.h"
52 #include "intl.h"
53 #include "sched-int.h"
54 #include "ggc.h"
55 #include "tree-gimple.h"
58 int code_for_indirect_jump_scratch = CODE_FOR_indirect_jump_scratch;
60 #define MSW (TARGET_LITTLE_ENDIAN ? 1 : 0)
61 #define LSW (TARGET_LITTLE_ENDIAN ? 0 : 1)
63 /* These are some macros to abstract register modes. */
64 #define CONST_OK_FOR_ADD(size) \
65 (TARGET_SHMEDIA ? CONST_OK_FOR_I10 (size) : CONST_OK_FOR_I08 (size))
66 #define GEN_MOV (*(TARGET_SHMEDIA64 ? gen_movdi : gen_movsi))
67 #define GEN_ADD3 (*(TARGET_SHMEDIA64 ? gen_adddi3 : gen_addsi3))
68 #define GEN_SUB3 (*(TARGET_SHMEDIA64 ? gen_subdi3 : gen_subsi3))
70 /* Set to 1 by expand_prologue() when the function is an interrupt handler. */
71 int current_function_interrupt;
73 /* ??? The pragma interrupt support will not work for SH3. */
74 /* This is set by #pragma interrupt and #pragma trapa, and causes gcc to
75 output code for the next function appropriate for an interrupt handler. */
76 int pragma_interrupt;
78 /* This is set by the trap_exit attribute for functions. It specifies
79 a trap number to be used in a trapa instruction at function exit
80 (instead of an rte instruction). */
81 int trap_exit;
83 /* This is used by the sp_switch attribute for functions. It specifies
84 a variable holding the address of the stack the interrupt function
85 should switch to/from at entry/exit. */
86 rtx sp_switch;
88 /* This is set by #pragma trapa, and is similar to the above, except that
89 the compiler doesn't emit code to preserve all registers. */
90 static int pragma_trapa;
92 /* This is set by #pragma nosave_low_regs. This is useful on the SH3,
93 which has a separate set of low regs for User and Supervisor modes.
94 This should only be used for the lowest level of interrupts. Higher levels
95 of interrupts must save the registers in case they themselves are
96 interrupted. */
97 int pragma_nosave_low_regs;
99 /* This is used for communication between TARGET_SETUP_INCOMING_VARARGS and
100 sh_expand_prologue. */
101 int current_function_anonymous_args;
103 /* Global variables for machine-dependent things. */
105 /* Which cpu are we scheduling for. */
106 enum processor_type sh_cpu;
108 /* Definitions used in ready queue reordering for first scheduling pass. */
110 /* Reg weights arrays for modes SFmode and SImode, indexed by insn LUID. */
111 static short *regmode_weight[2];
113 /* Total SFmode and SImode weights of scheduled insns. */
114 static int curr_regmode_pressure[2];
116 /* If true, skip cycles for Q -> R movement. */
117 static int skip_cycles = 0;
119 /* Cached value of can_issue_more. This is cached in sh_variable_issue hook
120 and returned from sh_reorder2. */
121 static short cached_can_issue_more;
123 /* Saved operands from the last compare to use when we generate an scc
124 or bcc insn. */
126 rtx sh_compare_op0;
127 rtx sh_compare_op1;
129 /* Provides the class number of the smallest class containing
130 reg number. */
132 enum reg_class regno_reg_class[FIRST_PSEUDO_REGISTER] =
134 R0_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
135 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
136 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
137 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
138 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
139 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
140 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
141 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
142 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
143 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
144 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
145 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
146 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
147 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
148 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
149 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
150 FP0_REGS,FP_REGS, FP_REGS, FP_REGS,
151 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
152 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
153 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
154 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
155 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
156 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
157 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
158 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
159 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
160 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
161 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
162 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
163 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
164 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
165 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
166 TARGET_REGS, TARGET_REGS, TARGET_REGS, TARGET_REGS,
167 TARGET_REGS, TARGET_REGS, TARGET_REGS, TARGET_REGS,
168 DF_REGS, DF_REGS, DF_REGS, DF_REGS,
169 DF_REGS, DF_REGS, DF_REGS, DF_REGS,
170 NO_REGS, GENERAL_REGS, PR_REGS, T_REGS,
171 MAC_REGS, MAC_REGS, FPUL_REGS, FPSCR_REGS,
172 GENERAL_REGS,
175 char sh_register_names[FIRST_PSEUDO_REGISTER] \
176 [MAX_REGISTER_NAME_LENGTH + 1] = SH_REGISTER_NAMES_INITIALIZER;
178 char sh_additional_register_names[ADDREGNAMES_SIZE] \
179 [MAX_ADDITIONAL_REGISTER_NAME_LENGTH + 1]
180 = SH_ADDITIONAL_REGISTER_NAMES_INITIALIZER;
182 /* Provide reg_class from a letter such as appears in the machine
183 description. *: target independently reserved letter.
184 reg_class_from_letter['e' - 'a'] is set to NO_REGS for TARGET_FMOVD. */
186 enum reg_class reg_class_from_letter[] =
188 /* a */ ALL_REGS, /* b */ TARGET_REGS, /* c */ FPSCR_REGS, /* d */ DF_REGS,
189 /* e */ FP_REGS, /* f */ FP_REGS, /* g **/ NO_REGS, /* h */ NO_REGS,
190 /* i **/ NO_REGS, /* j */ NO_REGS, /* k */ SIBCALL_REGS, /* l */ PR_REGS,
191 /* m **/ NO_REGS, /* n **/ NO_REGS, /* o **/ NO_REGS, /* p **/ NO_REGS,
192 /* q */ NO_REGS, /* r **/ NO_REGS, /* s **/ NO_REGS, /* t */ T_REGS,
193 /* u */ NO_REGS, /* v */ NO_REGS, /* w */ FP0_REGS, /* x */ MAC_REGS,
194 /* y */ FPUL_REGS, /* z */ R0_REGS
197 int assembler_dialect;
199 static bool shmedia_space_reserved_for_target_registers;
201 static void split_branches (rtx);
202 static int branch_dest (rtx);
203 static void force_into (rtx, rtx);
204 static void print_slot (rtx);
205 static rtx add_constant (rtx, enum machine_mode, rtx);
206 static void dump_table (rtx, rtx);
207 static int hi_const (rtx);
208 static int broken_move (rtx);
209 static int mova_p (rtx);
210 static rtx find_barrier (int, rtx, rtx);
211 static int noncall_uses_reg (rtx, rtx, rtx *);
212 static rtx gen_block_redirect (rtx, int, int);
213 static void sh_reorg (void);
214 static void output_stack_adjust (int, rtx, int, HARD_REG_SET *);
215 static rtx frame_insn (rtx);
216 static rtx push (int);
217 static void pop (int);
218 static void push_regs (HARD_REG_SET *, int);
219 static int calc_live_regs (HARD_REG_SET *);
220 static void mark_use (rtx, rtx *);
221 static HOST_WIDE_INT rounded_frame_size (int);
222 static rtx mark_constant_pool_use (rtx);
223 const struct attribute_spec sh_attribute_table[];
224 static tree sh_handle_interrupt_handler_attribute (tree *, tree, tree, int, bool *);
225 static tree sh_handle_sp_switch_attribute (tree *, tree, tree, int, bool *);
226 static tree sh_handle_trap_exit_attribute (tree *, tree, tree, int, bool *);
227 static tree sh_handle_renesas_attribute (tree *, tree, tree, int, bool *);
228 static void sh_output_function_epilogue (FILE *, HOST_WIDE_INT);
229 static void sh_insert_attributes (tree, tree *);
230 static int sh_adjust_cost (rtx, rtx, rtx, int);
231 static int sh_issue_rate (void);
232 static int sh_dfa_new_cycle (FILE *, int, rtx, int, int, int *sort_p);
233 static short find_set_regmode_weight (rtx, enum machine_mode);
234 static short find_insn_regmode_weight (rtx, enum machine_mode);
235 static void find_regmode_weight (int, enum machine_mode);
236 static void sh_md_init_global (FILE *, int, int);
237 static void sh_md_finish_global (FILE *, int);
238 static int rank_for_reorder (const void *, const void *);
239 static void swap_reorder (rtx *, int);
240 static void ready_reorder (rtx *, int);
241 static short high_pressure (enum machine_mode);
242 static int sh_reorder (FILE *, int, rtx *, int *, int);
243 static int sh_reorder2 (FILE *, int, rtx *, int *, int);
244 static void sh_md_init (FILE *, int, int);
245 static int sh_variable_issue (FILE *, int, rtx, int);
247 static bool sh_function_ok_for_sibcall (tree, tree);
249 static bool sh_cannot_modify_jumps_p (void);
250 static int sh_target_reg_class (void);
251 static bool sh_optimize_target_register_callee_saved (bool);
252 static bool sh_ms_bitfield_layout_p (tree);
254 static void sh_init_builtins (void);
255 static void sh_media_init_builtins (void);
256 static rtx sh_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
257 static void sh_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT, tree);
258 static void sh_file_start (void);
259 static int flow_dependent_p (rtx, rtx);
260 static void flow_dependent_p_1 (rtx, rtx, void *);
261 static int shiftcosts (rtx);
262 static int andcosts (rtx);
263 static int addsubcosts (rtx);
264 static int multcosts (rtx);
265 static bool unspec_caller_rtx_p (rtx);
266 static bool sh_cannot_copy_insn_p (rtx);
267 static bool sh_rtx_costs (rtx, int, int, int *);
268 static int sh_address_cost (rtx);
269 static int shmedia_target_regs_stack_space (HARD_REG_SET *);
270 static int shmedia_reserve_space_for_target_registers_p (int, HARD_REG_SET *);
271 static int shmedia_target_regs_stack_adjust (HARD_REG_SET *);
272 static int scavenge_reg (HARD_REG_SET *s);
273 struct save_schedule_s;
274 static struct save_entry_s *sh5_schedule_saves (HARD_REG_SET *,
275 struct save_schedule_s *, int);
277 static rtx sh_struct_value_rtx (tree, int);
278 static bool sh_return_in_memory (tree, tree);
279 static rtx sh_builtin_saveregs (void);
280 static void sh_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode, tree, int *, int);
281 static bool sh_strict_argument_naming (CUMULATIVE_ARGS *);
282 static bool sh_pretend_outgoing_varargs_named (CUMULATIVE_ARGS *);
283 static tree sh_build_builtin_va_list (void);
284 static tree sh_gimplify_va_arg_expr (tree, tree, tree *, tree *);
285 static bool sh_pass_by_reference (CUMULATIVE_ARGS *, enum machine_mode,
286 tree, bool);
287 static bool sh_callee_copies (CUMULATIVE_ARGS *, enum machine_mode,
288 tree, bool);
289 static int sh_dwarf_calling_convention (tree);
292 /* Initialize the GCC target structure. */
293 #undef TARGET_ATTRIBUTE_TABLE
294 #define TARGET_ATTRIBUTE_TABLE sh_attribute_table
296 /* The next two are used for debug info when compiling with -gdwarf. */
297 #undef TARGET_ASM_UNALIGNED_HI_OP
298 #define TARGET_ASM_UNALIGNED_HI_OP "\t.uaword\t"
299 #undef TARGET_ASM_UNALIGNED_SI_OP
300 #define TARGET_ASM_UNALIGNED_SI_OP "\t.ualong\t"
302 /* These are NULLed out on non-SH5 in OVERRIDE_OPTIONS. */
303 #undef TARGET_ASM_UNALIGNED_DI_OP
304 #define TARGET_ASM_UNALIGNED_DI_OP "\t.uaquad\t"
305 #undef TARGET_ASM_ALIGNED_DI_OP
306 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
308 #undef TARGET_ASM_FUNCTION_EPILOGUE
309 #define TARGET_ASM_FUNCTION_EPILOGUE sh_output_function_epilogue
311 #undef TARGET_ASM_OUTPUT_MI_THUNK
312 #define TARGET_ASM_OUTPUT_MI_THUNK sh_output_mi_thunk
314 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
315 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_tree_hwi_hwi_tree_true
317 #undef TARGET_ASM_FILE_START
318 #define TARGET_ASM_FILE_START sh_file_start
319 #undef TARGET_ASM_FILE_START_FILE_DIRECTIVE
320 #define TARGET_ASM_FILE_START_FILE_DIRECTIVE true
322 #undef TARGET_INSERT_ATTRIBUTES
323 #define TARGET_INSERT_ATTRIBUTES sh_insert_attributes
325 #undef TARGET_SCHED_ADJUST_COST
326 #define TARGET_SCHED_ADJUST_COST sh_adjust_cost
328 #undef TARGET_SCHED_ISSUE_RATE
329 #define TARGET_SCHED_ISSUE_RATE sh_issue_rate
331 /* The next 5 hooks have been implemented for reenabling sched1. With the
332 help of these macros we are limiting the movement of insns in sched1 to
333 reduce the register pressure. The overall idea is to keep count of SImode
334 and SFmode regs required by already scheduled insns. When these counts
335 cross some threshold values; give priority to insns that free registers.
336 The insn that frees registers is most likely to be the insn with lowest
337 LUID (original insn order); but such an insn might be there in the stalled
338 queue (Q) instead of the ready queue (R). To solve this, we skip cycles
339 upto a max of 8 cycles so that such insns may move from Q -> R.
341 The description of the hooks are as below:
343 TARGET_SCHED_INIT_GLOBAL: Added a new target hook in the generic
344 scheduler; it is called inside the sched_init function just after
345 find_insn_reg_weights function call. It is used to calculate the SImode
346 and SFmode weights of insns of basic blocks; much similar to what
347 find_insn_reg_weights does.
348 TARGET_SCHED_FINISH_GLOBAL: Corresponding cleanup hook.
350 TARGET_SCHED_DFA_NEW_CYCLE: Skip cycles if high register pressure is
351 indicated by TARGET_SCHED_REORDER2; doing this may move insns from
352 (Q)->(R).
354 TARGET_SCHED_REORDER: If the register pressure for SImode or SFmode is
355 high; reorder the ready queue so that the insn with lowest LUID will be
356 issued next.
358 TARGET_SCHED_REORDER2: If the register pressure is high, indicate to
359 TARGET_SCHED_DFA_NEW_CYCLE to skip cycles.
361 TARGET_SCHED_VARIABLE_ISSUE: Cache the value of can_issue_more so that it
362 can be returned from TARGET_SCHED_REORDER2.
364 TARGET_SCHED_INIT: Reset the register pressure counting variables. */
366 #undef TARGET_SCHED_DFA_NEW_CYCLE
367 #define TARGET_SCHED_DFA_NEW_CYCLE sh_dfa_new_cycle
369 #undef TARGET_SCHED_INIT_GLOBAL
370 #define TARGET_SCHED_INIT_GLOBAL sh_md_init_global
372 #undef TARGET_SCHED_FINISH_GLOBAL
373 #define TARGET_SCHED_FINISH_GLOBAL sh_md_finish_global
375 #undef TARGET_SCHED_VARIABLE_ISSUE
376 #define TARGET_SCHED_VARIABLE_ISSUE sh_variable_issue
378 #undef TARGET_SCHED_REORDER
379 #define TARGET_SCHED_REORDER sh_reorder
381 #undef TARGET_SCHED_REORDER2
382 #define TARGET_SCHED_REORDER2 sh_reorder2
384 #undef TARGET_SCHED_INIT
385 #define TARGET_SCHED_INIT sh_md_init
387 #undef TARGET_CANNOT_MODIFY_JUMPS_P
388 #define TARGET_CANNOT_MODIFY_JUMPS_P sh_cannot_modify_jumps_p
389 #undef TARGET_BRANCH_TARGET_REGISTER_CLASS
390 #define TARGET_BRANCH_TARGET_REGISTER_CLASS sh_target_reg_class
391 #undef TARGET_BRANCH_TARGET_REGISTER_CALLEE_SAVED
392 #define TARGET_BRANCH_TARGET_REGISTER_CALLEE_SAVED \
393 sh_optimize_target_register_callee_saved
395 #undef TARGET_MS_BITFIELD_LAYOUT_P
396 #define TARGET_MS_BITFIELD_LAYOUT_P sh_ms_bitfield_layout_p
398 #undef TARGET_INIT_BUILTINS
399 #define TARGET_INIT_BUILTINS sh_init_builtins
400 #undef TARGET_EXPAND_BUILTIN
401 #define TARGET_EXPAND_BUILTIN sh_expand_builtin
403 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
404 #define TARGET_FUNCTION_OK_FOR_SIBCALL sh_function_ok_for_sibcall
406 #undef TARGET_CANNOT_COPY_INSN_P
407 #define TARGET_CANNOT_COPY_INSN_P sh_cannot_copy_insn_p
408 #undef TARGET_RTX_COSTS
409 #define TARGET_RTX_COSTS sh_rtx_costs
410 #undef TARGET_ADDRESS_COST
411 #define TARGET_ADDRESS_COST sh_address_cost
413 #undef TARGET_MACHINE_DEPENDENT_REORG
414 #define TARGET_MACHINE_DEPENDENT_REORG sh_reorg
416 #ifdef HAVE_AS_TLS
417 #undef TARGET_HAVE_TLS
418 #define TARGET_HAVE_TLS true
419 #endif
421 #undef TARGET_PROMOTE_PROTOTYPES
422 #define TARGET_PROMOTE_PROTOTYPES sh_promote_prototypes
423 #undef TARGET_PROMOTE_FUNCTION_ARGS
424 #define TARGET_PROMOTE_FUNCTION_ARGS sh_promote_prototypes
425 #undef TARGET_PROMOTE_FUNCTION_RETURN
426 #define TARGET_PROMOTE_FUNCTION_RETURN sh_promote_prototypes
428 #undef TARGET_STRUCT_VALUE_RTX
429 #define TARGET_STRUCT_VALUE_RTX sh_struct_value_rtx
430 #undef TARGET_RETURN_IN_MEMORY
431 #define TARGET_RETURN_IN_MEMORY sh_return_in_memory
433 #undef TARGET_EXPAND_BUILTIN_SAVEREGS
434 #define TARGET_EXPAND_BUILTIN_SAVEREGS sh_builtin_saveregs
435 #undef TARGET_SETUP_INCOMING_VARARGS
436 #define TARGET_SETUP_INCOMING_VARARGS sh_setup_incoming_varargs
437 #undef TARGET_STRICT_ARGUMENT_NAMING
438 #define TARGET_STRICT_ARGUMENT_NAMING sh_strict_argument_naming
439 #undef TARGET_PRETEND_OUTGOING_VARARGS_NAMED
440 #define TARGET_PRETEND_OUTGOING_VARARGS_NAMED sh_pretend_outgoing_varargs_named
441 #undef TARGET_MUST_PASS_IN_STACK
442 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
443 #undef TARGET_PASS_BY_REFERENCE
444 #define TARGET_PASS_BY_REFERENCE sh_pass_by_reference
445 #undef TARGET_CALLEE_COPIES
446 #define TARGET_CALLEE_COPIES sh_callee_copies
448 #undef TARGET_BUILD_BUILTIN_VA_LIST
449 #define TARGET_BUILD_BUILTIN_VA_LIST sh_build_builtin_va_list
450 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
451 #define TARGET_GIMPLIFY_VA_ARG_EXPR sh_gimplify_va_arg_expr
453 #undef TARGET_VECTOR_MODE_SUPPORTED_P
454 #define TARGET_VECTOR_MODE_SUPPORTED_P sh_vector_mode_supported_p
456 #undef TARGET_PCH_VALID_P
457 #define TARGET_PCH_VALID_P sh_pch_valid_p
459 #undef TARGET_DWARF_CALLING_CONVENTION
460 #define TARGET_DWARF_CALLING_CONVENTION sh_dwarf_calling_convention
462 /* Return regmode weight for insn. */
463 #define INSN_REGMODE_WEIGHT(INSN, MODE) regmode_weight[((MODE) == SImode) ? 0 : 1][INSN_UID (INSN)]
465 /* Return current register pressure for regmode. */
466 #define CURR_REGMODE_PRESSURE(MODE) curr_regmode_pressure[((MODE) == SImode) ? 0 : 1]
468 #ifdef SYMBIAN
470 #undef TARGET_ENCODE_SECTION_INFO
471 #define TARGET_ENCODE_SECTION_INFO sh_symbian_encode_section_info
472 #undef TARGET_STRIP_NAME_ENCODING
473 #define TARGET_STRIP_NAME_ENCODING sh_symbian_strip_name_encoding
474 #undef TARGET_CXX_IMPORT_EXPORT_CLASS
475 #define TARGET_CXX_IMPORT_EXPORT_CLASS symbian_import_export_class
477 #endif /* SYMBIAN */
479 struct gcc_target targetm = TARGET_INITIALIZER;
481 /* Print the operand address in x to the stream. */
483 void
484 print_operand_address (FILE *stream, rtx x)
486 switch (GET_CODE (x))
488 case REG:
489 case SUBREG:
490 fprintf (stream, "@%s", reg_names[true_regnum (x)]);
491 break;
493 case PLUS:
495 rtx base = XEXP (x, 0);
496 rtx index = XEXP (x, 1);
498 switch (GET_CODE (index))
500 case CONST_INT:
501 fprintf (stream, "@(%d,%s)", (int) INTVAL (index),
502 reg_names[true_regnum (base)]);
503 break;
505 case REG:
506 case SUBREG:
508 int base_num = true_regnum (base);
509 int index_num = true_regnum (index);
511 fprintf (stream, "@(r0,%s)",
512 reg_names[MAX (base_num, index_num)]);
513 break;
516 default:
517 debug_rtx (x);
518 abort ();
521 break;
523 case PRE_DEC:
524 fprintf (stream, "@-%s", reg_names[true_regnum (XEXP (x, 0))]);
525 break;
527 case POST_INC:
528 fprintf (stream, "@%s+", reg_names[true_regnum (XEXP (x, 0))]);
529 break;
531 default:
532 x = mark_constant_pool_use (x);
533 output_addr_const (stream, x);
534 break;
538 /* Print operand x (an rtx) in assembler syntax to file stream
539 according to modifier code.
541 '.' print a .s if insn needs delay slot
542 ',' print LOCAL_LABEL_PREFIX
543 '@' print trap, rte or rts depending upon pragma interruptness
544 '#' output a nop if there is nothing to put in the delay slot
545 ''' print likelihood suffix (/u for unlikely).
546 'O' print a constant without the #
547 'R' print the LSW of a dp value - changes if in little endian
548 'S' print the MSW of a dp value - changes if in little endian
549 'T' print the next word of a dp value - same as 'R' in big endian mode.
550 'M' print an `x' if `m' will print `base,index'.
551 'N' print 'r63' if the operand is (const_int 0).
552 'd' print a V2SF reg as dN instead of fpN.
553 'm' print a pair `base,offset' or `base,index', for LD and ST.
554 'u' prints the lowest 16 bits of CONST_INT, as an unsigned value.
555 'o' output an operator. */
557 void
558 print_operand (FILE *stream, rtx x, int code)
560 switch (code)
562 case '.':
563 if (final_sequence
564 && ! INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))
565 && get_attr_length (XVECEXP (final_sequence, 0, 1)))
566 fprintf (stream, ASSEMBLER_DIALECT ? "/s" : ".s");
567 break;
568 case ',':
569 fprintf (stream, "%s", LOCAL_LABEL_PREFIX);
570 break;
571 case '@':
572 if (trap_exit)
573 fprintf (stream, "trapa #%d", trap_exit);
574 else if (sh_cfun_interrupt_handler_p ())
575 fprintf (stream, "rte");
576 else
577 fprintf (stream, "rts");
578 break;
579 case '#':
580 /* Output a nop if there's nothing in the delay slot. */
581 if (dbr_sequence_length () == 0)
582 fprintf (stream, "\n\tnop");
583 break;
584 case '\'':
586 rtx note = find_reg_note (current_output_insn, REG_BR_PROB, 0);
588 if (note && INTVAL (XEXP (note, 0)) * 2 < REG_BR_PROB_BASE)
589 fputs ("/u", stream);
590 break;
592 case 'O':
593 x = mark_constant_pool_use (x);
594 output_addr_const (stream, x);
595 break;
596 case 'R':
597 fputs (reg_names[REGNO (x) + LSW], (stream));
598 break;
599 case 'S':
600 fputs (reg_names[REGNO (x) + MSW], (stream));
601 break;
602 case 'T':
603 /* Next word of a double. */
604 switch (GET_CODE (x))
606 case REG:
607 fputs (reg_names[REGNO (x) + 1], (stream));
608 break;
609 case MEM:
610 if (GET_CODE (XEXP (x, 0)) != PRE_DEC
611 && GET_CODE (XEXP (x, 0)) != POST_INC)
612 x = adjust_address (x, SImode, 4);
613 print_operand_address (stream, XEXP (x, 0));
614 break;
615 default:
616 break;
618 break;
619 case 'o':
620 switch (GET_CODE (x))
622 case PLUS: fputs ("add", stream); break;
623 case MINUS: fputs ("sub", stream); break;
624 case MULT: fputs ("mul", stream); break;
625 case DIV: fputs ("div", stream); break;
626 case EQ: fputs ("eq", stream); break;
627 case NE: fputs ("ne", stream); break;
628 case GT: case LT: fputs ("gt", stream); break;
629 case GE: case LE: fputs ("ge", stream); break;
630 case GTU: case LTU: fputs ("gtu", stream); break;
631 case GEU: case LEU: fputs ("geu", stream); break;
632 default:
633 break;
635 break;
636 case 'M':
637 if (GET_CODE (x) == MEM
638 && GET_CODE (XEXP (x, 0)) == PLUS
639 && (GET_CODE (XEXP (XEXP (x, 0), 1)) == REG
640 || GET_CODE (XEXP (XEXP (x, 0), 1)) == SUBREG))
641 fputc ('x', stream);
642 break;
644 case 'm':
645 if (GET_CODE (x) != MEM)
646 abort ();
647 x = XEXP (x, 0);
648 switch (GET_CODE (x))
650 case REG:
651 case SUBREG:
652 print_operand (stream, x, 0);
653 fputs (", 0", stream);
654 break;
656 case PLUS:
657 print_operand (stream, XEXP (x, 0), 0);
658 fputs (", ", stream);
659 print_operand (stream, XEXP (x, 1), 0);
660 break;
662 default:
663 abort ();
665 break;
667 case 'd':
668 if (GET_CODE (x) != REG || GET_MODE (x) != V2SFmode)
669 abort ();
671 fprintf ((stream), "d%s", reg_names[REGNO (x)] + 1);
672 break;
674 case 'N':
675 if (x == CONST0_RTX (GET_MODE (x)))
677 fprintf ((stream), "r63");
678 break;
680 goto default_output;
681 case 'u':
682 if (GET_CODE (x) == CONST_INT)
684 fprintf ((stream), "%u", (unsigned) INTVAL (x) & (0x10000 - 1));
685 break;
687 /* Fall through. */
689 default_output:
690 default:
691 switch (GET_CODE (x))
693 /* FIXME: We need this on SHmedia32 because reload generates
694 some sign-extended HI or QI loads into DImode registers
695 but, because Pmode is SImode, the address ends up with a
696 subreg:SI of the DImode register. Maybe reload should be
697 fixed so as to apply alter_subreg to such loads? */
698 case SUBREG:
699 if (SUBREG_BYTE (x) != 0
700 || GET_CODE (SUBREG_REG (x)) != REG)
701 abort ();
703 x = SUBREG_REG (x);
704 /* Fall through. */
706 case REG:
707 if (FP_REGISTER_P (REGNO (x))
708 && GET_MODE (x) == V16SFmode)
709 fprintf ((stream), "mtrx%s", reg_names[REGNO (x)] + 2);
710 else if (FP_REGISTER_P (REGNO (x))
711 && GET_MODE (x) == V4SFmode)
712 fprintf ((stream), "fv%s", reg_names[REGNO (x)] + 2);
713 else if (GET_CODE (x) == REG
714 && GET_MODE (x) == V2SFmode)
715 fprintf ((stream), "fp%s", reg_names[REGNO (x)] + 2);
716 else if (FP_REGISTER_P (REGNO (x))
717 && GET_MODE_SIZE (GET_MODE (x)) > 4)
718 fprintf ((stream), "d%s", reg_names[REGNO (x)] + 1);
719 else
720 fputs (reg_names[REGNO (x)], (stream));
721 break;
723 case MEM:
724 output_address (XEXP (x, 0));
725 break;
727 case CONST:
728 if (TARGET_SHMEDIA
729 && GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
730 && GET_MODE (XEXP (x, 0)) == DImode
731 && GET_CODE (XEXP (XEXP (x, 0), 0)) == TRUNCATE
732 && GET_MODE (XEXP (XEXP (x, 0), 0)) == HImode)
734 rtx val = XEXP (XEXP (XEXP (x, 0), 0), 0);
736 fputc ('(', stream);
737 if (GET_CODE (val) == ASHIFTRT)
739 fputc ('(', stream);
740 if (GET_CODE (XEXP (val, 0)) == CONST)
741 fputc ('(', stream);
742 output_addr_const (stream, XEXP (val, 0));
743 if (GET_CODE (XEXP (val, 0)) == CONST)
744 fputc (')', stream);
745 fputs (" >> ", stream);
746 output_addr_const (stream, XEXP (val, 1));
747 fputc (')', stream);
749 else
751 if (GET_CODE (val) == CONST)
752 fputc ('(', stream);
753 output_addr_const (stream, val);
754 if (GET_CODE (val) == CONST)
755 fputc (')', stream);
757 fputs (" & 65535)", stream);
758 break;
761 /* Fall through. */
762 default:
763 if (TARGET_SH1)
764 fputc ('#', stream);
765 output_addr_const (stream, x);
766 break;
768 break;
772 /* Like force_operand, but guarantees that VALUE ends up in TARGET. */
773 static void
774 force_into (rtx value, rtx target)
776 value = force_operand (value, target);
777 if (! rtx_equal_p (value, target))
778 emit_insn (gen_move_insn (target, value));
781 /* Emit code to perform a block move. Choose the best method.
783 OPERANDS[0] is the destination.
784 OPERANDS[1] is the source.
785 OPERANDS[2] is the size.
786 OPERANDS[3] is the alignment safe to use. */
789 expand_block_move (rtx *operands)
791 int align = INTVAL (operands[3]);
792 int constp = (GET_CODE (operands[2]) == CONST_INT);
793 int bytes = (constp ? INTVAL (operands[2]) : 0);
795 if (! constp)
796 return 0;
798 /* If we could use mov.l to move words and dest is word-aligned, we
799 can use movua.l for loads and still generate a relatively short
800 and efficient sequence. */
801 if (TARGET_SH4A_ARCH && align < 4
802 && MEM_ALIGN (operands[0]) >= 32
803 && can_move_by_pieces (bytes, 32))
805 rtx dest = copy_rtx (operands[0]);
806 rtx src = copy_rtx (operands[1]);
807 /* We could use different pseudos for each copied word, but
808 since movua can only load into r0, it's kind of
809 pointless. */
810 rtx temp = gen_reg_rtx (SImode);
811 rtx src_addr = copy_addr_to_reg (XEXP (src, 0));
812 int copied = 0;
814 while (copied + 4 <= bytes)
816 rtx to = adjust_address (dest, SImode, copied);
817 rtx from = adjust_automodify_address (src, SImode, src_addr, copied);
819 emit_insn (gen_movua (temp, from));
820 emit_move_insn (src_addr, plus_constant (src_addr, 4));
821 emit_move_insn (to, temp);
822 copied += 4;
825 if (copied < bytes)
826 move_by_pieces (adjust_address (dest, BLKmode, copied),
827 adjust_automodify_address (src, BLKmode,
828 src_addr, copied),
829 bytes - copied, align, 0);
831 return 1;
834 /* If it isn't a constant number of bytes, or if it doesn't have 4 byte
835 alignment, or if it isn't a multiple of 4 bytes, then fail. */
836 if (align < 4 || (bytes % 4 != 0))
837 return 0;
839 if (TARGET_HARD_SH4)
841 if (bytes < 12)
842 return 0;
843 else if (bytes == 12)
845 tree entry_name;
846 rtx sym;
847 rtx func_addr_rtx;
848 rtx r4 = gen_rtx_REG (SImode, 4);
849 rtx r5 = gen_rtx_REG (SImode, 5);
851 entry_name = get_identifier ("__movmemSI12_i4");
853 sym = function_symbol (IDENTIFIER_POINTER (entry_name));
854 func_addr_rtx = copy_to_mode_reg (Pmode, sym);
855 force_into (XEXP (operands[0], 0), r4);
856 force_into (XEXP (operands[1], 0), r5);
857 emit_insn (gen_block_move_real_i4 (func_addr_rtx));
858 return 1;
860 else if (! TARGET_SMALLCODE)
862 tree entry_name;
863 rtx sym;
864 rtx func_addr_rtx;
865 int dwords;
866 rtx r4 = gen_rtx_REG (SImode, 4);
867 rtx r5 = gen_rtx_REG (SImode, 5);
868 rtx r6 = gen_rtx_REG (SImode, 6);
870 entry_name = get_identifier (bytes & 4
871 ? "__movmem_i4_odd"
872 : "__movmem_i4_even");
873 sym = function_symbol (IDENTIFIER_POINTER (entry_name));
874 func_addr_rtx = copy_to_mode_reg (Pmode, sym);
875 force_into (XEXP (operands[0], 0), r4);
876 force_into (XEXP (operands[1], 0), r5);
878 dwords = bytes >> 3;
879 emit_insn (gen_move_insn (r6, GEN_INT (dwords - 1)));
880 emit_insn (gen_block_lump_real_i4 (func_addr_rtx));
881 return 1;
883 else
884 return 0;
886 if (bytes < 64)
888 char entry[30];
889 tree entry_name;
890 rtx sym;
891 rtx func_addr_rtx;
892 rtx r4 = gen_rtx_REG (SImode, 4);
893 rtx r5 = gen_rtx_REG (SImode, 5);
895 sprintf (entry, "__movmemSI%d", bytes);
896 entry_name = get_identifier (entry);
897 sym = function_symbol (IDENTIFIER_POINTER (entry_name));
898 func_addr_rtx = copy_to_mode_reg (Pmode, sym);
899 force_into (XEXP (operands[0], 0), r4);
900 force_into (XEXP (operands[1], 0), r5);
901 emit_insn (gen_block_move_real (func_addr_rtx));
902 return 1;
905 /* This is the same number of bytes as a memcpy call, but to a different
906 less common function name, so this will occasionally use more space. */
907 if (! TARGET_SMALLCODE)
909 tree entry_name;
910 rtx sym;
911 rtx func_addr_rtx;
912 int final_switch, while_loop;
913 rtx r4 = gen_rtx_REG (SImode, 4);
914 rtx r5 = gen_rtx_REG (SImode, 5);
915 rtx r6 = gen_rtx_REG (SImode, 6);
917 entry_name = get_identifier ("__movmem");
918 sym = function_symbol (IDENTIFIER_POINTER (entry_name));
919 func_addr_rtx = copy_to_mode_reg (Pmode, sym);
920 force_into (XEXP (operands[0], 0), r4);
921 force_into (XEXP (operands[1], 0), r5);
923 /* r6 controls the size of the move. 16 is decremented from it
924 for each 64 bytes moved. Then the negative bit left over is used
925 as an index into a list of move instructions. e.g., a 72 byte move
926 would be set up with size(r6) = 14, for one iteration through the
927 big while loop, and a switch of -2 for the last part. */
929 final_switch = 16 - ((bytes / 4) % 16);
930 while_loop = ((bytes / 4) / 16 - 1) * 16;
931 emit_insn (gen_move_insn (r6, GEN_INT (while_loop + final_switch)));
932 emit_insn (gen_block_lump_real (func_addr_rtx));
933 return 1;
936 return 0;
939 /* Prepare operands for a move define_expand; specifically, one of the
940 operands must be in a register. */
943 prepare_move_operands (rtx operands[], enum machine_mode mode)
945 if ((mode == SImode || mode == DImode)
946 && flag_pic
947 && ! ((mode == Pmode || mode == ptr_mode)
948 && tls_symbolic_operand (operands[1], Pmode) != 0))
950 rtx temp;
951 if (SYMBOLIC_CONST_P (operands[1]))
953 if (GET_CODE (operands[0]) == MEM)
954 operands[1] = force_reg (Pmode, operands[1]);
955 else if (TARGET_SHMEDIA
956 && GET_CODE (operands[1]) == LABEL_REF
957 && target_reg_operand (operands[0], mode))
958 /* It's ok. */;
959 else
961 temp = no_new_pseudos ? operands[0] : gen_reg_rtx (Pmode);
962 operands[1] = legitimize_pic_address (operands[1], mode, temp);
965 else if (GET_CODE (operands[1]) == CONST
966 && GET_CODE (XEXP (operands[1], 0)) == PLUS
967 && SYMBOLIC_CONST_P (XEXP (XEXP (operands[1], 0), 0)))
969 temp = no_new_pseudos ? operands[0] : gen_reg_rtx (Pmode);
970 temp = legitimize_pic_address (XEXP (XEXP (operands[1], 0), 0),
971 mode, temp);
972 operands[1] = expand_binop (mode, add_optab, temp,
973 XEXP (XEXP (operands[1], 0), 1),
974 no_new_pseudos ? temp
975 : gen_reg_rtx (Pmode),
976 0, OPTAB_LIB_WIDEN);
980 if (! reload_in_progress && ! reload_completed)
982 /* Copy the source to a register if both operands aren't registers. */
983 if (! register_operand (operands[0], mode)
984 && ! sh_register_operand (operands[1], mode))
985 operands[1] = copy_to_mode_reg (mode, operands[1]);
987 if (GET_CODE (operands[0]) == MEM && ! memory_operand (operands[0], mode))
989 /* This is like change_address_1 (operands[0], mode, 0, 1) ,
990 except that we can't use that function because it is static. */
991 rtx new = change_address (operands[0], mode, 0);
992 MEM_COPY_ATTRIBUTES (new, operands[0]);
993 operands[0] = new;
996 /* This case can happen while generating code to move the result
997 of a library call to the target. Reject `st r0,@(rX,rY)' because
998 reload will fail to find a spill register for rX, since r0 is already
999 being used for the source. */
1000 else if (refers_to_regno_p (R0_REG, R0_REG + 1, operands[1], (rtx *)0)
1001 && GET_CODE (operands[0]) == MEM
1002 && GET_CODE (XEXP (operands[0], 0)) == PLUS
1003 && GET_CODE (XEXP (XEXP (operands[0], 0), 1)) == REG)
1004 operands[1] = copy_to_mode_reg (mode, operands[1]);
1007 if (mode == Pmode || mode == ptr_mode)
1009 rtx op0, op1;
1010 enum tls_model tls_kind;
1012 op0 = operands[0];
1013 op1 = operands[1];
1014 if ((tls_kind = tls_symbolic_operand (op1, Pmode)))
1016 rtx tga_op1, tga_ret, tmp, tmp2;
1019 switch (tls_kind)
1021 case TLS_MODEL_GLOBAL_DYNAMIC:
1022 tga_ret = gen_rtx_REG (Pmode, R0_REG);
1023 emit_call_insn (gen_tls_global_dynamic (tga_ret, op1));
1024 op1 = tga_ret;
1025 break;
1027 case TLS_MODEL_LOCAL_DYNAMIC:
1028 tga_ret = gen_rtx_REG (Pmode, R0_REG);
1029 emit_call_insn (gen_tls_local_dynamic (tga_ret, op1));
1031 tmp = gen_reg_rtx (Pmode);
1032 emit_move_insn (tmp, tga_ret);
1034 if (register_operand (op0, Pmode))
1035 tmp2 = op0;
1036 else
1037 tmp2 = gen_reg_rtx (Pmode);
1039 emit_insn (gen_symDTPOFF2reg (tmp2, op1, tmp));
1040 op1 = tmp2;
1041 break;
1043 case TLS_MODEL_INITIAL_EXEC:
1044 if (! flag_pic)
1045 emit_insn (gen_GOTaddr2picreg ());
1046 tga_op1 = gen_reg_rtx (Pmode);
1047 tmp = gen_sym2GOTTPOFF (op1);
1048 emit_insn (gen_tls_initial_exec (tga_op1, tmp));
1049 op1 = tga_op1;
1050 break;
1052 case TLS_MODEL_LOCAL_EXEC:
1053 tmp2 = gen_reg_rtx (Pmode);
1054 emit_insn (gen_load_gbr (tmp2));
1055 tmp = gen_reg_rtx (Pmode);
1056 emit_insn (gen_symTPOFF2reg (tmp, op1));
1058 if (register_operand (op0, Pmode))
1059 op1 = op0;
1060 else
1061 op1 = gen_reg_rtx (Pmode);
1063 emit_insn (gen_addsi3 (op1, tmp, tmp2));
1064 break;
1066 default:
1067 abort ();
1069 operands[1] = op1;
1073 return 0;
1076 /* Prepare the operands for an scc instruction; make sure that the
1077 compare has been done. */
1079 prepare_scc_operands (enum rtx_code code)
1081 rtx t_reg = gen_rtx_REG (SImode, T_REG);
1082 enum rtx_code oldcode = code;
1083 enum machine_mode mode;
1085 /* First need a compare insn. */
1086 switch (code)
1088 case NE:
1089 /* It isn't possible to handle this case. */
1090 abort ();
1091 case LT:
1092 code = GT;
1093 break;
1094 case LE:
1095 code = GE;
1096 break;
1097 case LTU:
1098 code = GTU;
1099 break;
1100 case LEU:
1101 code = GEU;
1102 break;
1103 default:
1104 break;
1106 if (code != oldcode)
1108 rtx tmp = sh_compare_op0;
1109 sh_compare_op0 = sh_compare_op1;
1110 sh_compare_op1 = tmp;
1113 mode = GET_MODE (sh_compare_op0);
1114 if (mode == VOIDmode)
1115 mode = GET_MODE (sh_compare_op1);
1117 sh_compare_op0 = force_reg (mode, sh_compare_op0);
1118 if ((code != EQ && code != NE
1119 && (sh_compare_op1 != const0_rtx
1120 || code == GTU || code == GEU || code == LTU || code == LEU))
1121 || (mode == DImode && sh_compare_op1 != const0_rtx)
1122 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
1123 sh_compare_op1 = force_reg (mode, sh_compare_op1);
1125 if ((TARGET_SH4 || TARGET_SH2A) && GET_MODE_CLASS (mode) == MODE_FLOAT)
1126 (mode == SFmode ? emit_sf_insn : emit_df_insn)
1127 (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2,
1128 gen_rtx_SET (VOIDmode, t_reg,
1129 gen_rtx_fmt_ee (code, SImode,
1130 sh_compare_op0, sh_compare_op1)),
1131 gen_rtx_USE (VOIDmode, get_fpscr_rtx ()))));
1132 else
1133 emit_insn (gen_rtx_SET (VOIDmode, t_reg,
1134 gen_rtx_fmt_ee (code, SImode,
1135 sh_compare_op0, sh_compare_op1)));
1137 return t_reg;
1140 /* Called from the md file, set up the operands of a compare instruction. */
1142 void
1143 from_compare (rtx *operands, int code)
1145 enum machine_mode mode = GET_MODE (sh_compare_op0);
1146 rtx insn;
1147 if (mode == VOIDmode)
1148 mode = GET_MODE (sh_compare_op1);
1149 if (code != EQ
1150 || mode == DImode
1151 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
1153 /* Force args into regs, since we can't use constants here. */
1154 sh_compare_op0 = force_reg (mode, sh_compare_op0);
1155 if (sh_compare_op1 != const0_rtx
1156 || code == GTU || code == GEU
1157 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
1158 sh_compare_op1 = force_reg (mode, sh_compare_op1);
1160 if (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT && code == GE)
1162 from_compare (operands, GT);
1163 insn = gen_ieee_ccmpeqsf_t (sh_compare_op0, sh_compare_op1);
1165 else
1166 insn = gen_rtx_SET (VOIDmode,
1167 gen_rtx_REG (SImode, T_REG),
1168 gen_rtx_fmt_ee (code, SImode,
1169 sh_compare_op0, sh_compare_op1));
1170 if ((TARGET_SH4 || TARGET_SH2A) && GET_MODE_CLASS (mode) == MODE_FLOAT)
1172 insn = gen_rtx_PARALLEL (VOIDmode,
1173 gen_rtvec (2, insn,
1174 gen_rtx_USE (VOIDmode, get_fpscr_rtx ())));
1175 (mode == SFmode ? emit_sf_insn : emit_df_insn) (insn);
1177 else
1178 emit_insn (insn);
1181 /* Functions to output assembly code. */
1183 /* Return a sequence of instructions to perform DI or DF move.
1185 Since the SH cannot move a DI or DF in one instruction, we have
1186 to take care when we see overlapping source and dest registers. */
1188 const char *
1189 output_movedouble (rtx insn ATTRIBUTE_UNUSED, rtx operands[],
1190 enum machine_mode mode)
1192 rtx dst = operands[0];
1193 rtx src = operands[1];
1195 if (GET_CODE (dst) == MEM
1196 && GET_CODE (XEXP (dst, 0)) == PRE_DEC)
1197 return "mov.l %T1,%0\n\tmov.l %1,%0";
1199 if (register_operand (dst, mode)
1200 && register_operand (src, mode))
1202 if (REGNO (src) == MACH_REG)
1203 return "sts mach,%S0\n\tsts macl,%R0";
1205 /* When mov.d r1,r2 do r2->r3 then r1->r2;
1206 when mov.d r1,r0 do r1->r0 then r2->r1. */
1208 if (REGNO (src) + 1 == REGNO (dst))
1209 return "mov %T1,%T0\n\tmov %1,%0";
1210 else
1211 return "mov %1,%0\n\tmov %T1,%T0";
1213 else if (GET_CODE (src) == CONST_INT)
1215 if (INTVAL (src) < 0)
1216 output_asm_insn ("mov #-1,%S0", operands);
1217 else
1218 output_asm_insn ("mov #0,%S0", operands);
1220 return "mov %1,%R0";
1222 else if (GET_CODE (src) == MEM)
1224 int ptrreg = -1;
1225 int dreg = REGNO (dst);
1226 rtx inside = XEXP (src, 0);
1228 if (GET_CODE (inside) == REG)
1229 ptrreg = REGNO (inside);
1230 else if (GET_CODE (inside) == SUBREG)
1231 ptrreg = subreg_regno (inside);
1232 else if (GET_CODE (inside) == PLUS)
1234 ptrreg = REGNO (XEXP (inside, 0));
1235 /* ??? A r0+REG address shouldn't be possible here, because it isn't
1236 an offsettable address. Unfortunately, offsettable addresses use
1237 QImode to check the offset, and a QImode offsettable address
1238 requires r0 for the other operand, which is not currently
1239 supported, so we can't use the 'o' constraint.
1240 Thus we must check for and handle r0+REG addresses here.
1241 We punt for now, since this is likely very rare. */
1242 if (GET_CODE (XEXP (inside, 1)) == REG)
1243 abort ();
1245 else if (GET_CODE (inside) == LABEL_REF)
1246 return "mov.l %1,%0\n\tmov.l %1+4,%T0";
1247 else if (GET_CODE (inside) == POST_INC)
1248 return "mov.l %1,%0\n\tmov.l %1,%T0";
1249 else
1250 abort ();
1252 /* Work out the safe way to copy. Copy into the second half first. */
1253 if (dreg == ptrreg)
1254 return "mov.l %T1,%T0\n\tmov.l %1,%0";
1257 return "mov.l %1,%0\n\tmov.l %T1,%T0";
1260 /* Print an instruction which would have gone into a delay slot after
1261 another instruction, but couldn't because the other instruction expanded
1262 into a sequence where putting the slot insn at the end wouldn't work. */
1264 static void
1265 print_slot (rtx insn)
1267 final_scan_insn (XVECEXP (insn, 0, 1), asm_out_file, optimize, 0, 1, NULL);
1269 INSN_DELETED_P (XVECEXP (insn, 0, 1)) = 1;
1272 const char *
1273 output_far_jump (rtx insn, rtx op)
1275 struct { rtx lab, reg, op; } this;
1276 rtx braf_base_lab = NULL_RTX;
1277 const char *jump;
1278 int far;
1279 int offset = branch_dest (insn) - INSN_ADDRESSES (INSN_UID (insn));
1280 rtx prev;
1282 this.lab = gen_label_rtx ();
1284 if (TARGET_SH2
1285 && offset >= -32764
1286 && offset - get_attr_length (insn) <= 32766)
1288 far = 0;
1289 jump = "mov.w %O0,%1; braf %1";
1291 else
1293 far = 1;
1294 if (flag_pic)
1296 if (TARGET_SH2)
1297 jump = "mov.l %O0,%1; braf %1";
1298 else
1299 jump = "mov.l r0,@-r15; mova %O0,r0; mov.l @r0,%1; add r0,%1; mov.l @r15+,r0; jmp @%1";
1301 else
1302 jump = "mov.l %O0,%1; jmp @%1";
1304 /* If we have a scratch register available, use it. */
1305 if (GET_CODE ((prev = prev_nonnote_insn (insn))) == INSN
1306 && INSN_CODE (prev) == CODE_FOR_indirect_jump_scratch)
1308 this.reg = SET_DEST (XVECEXP (PATTERN (prev), 0, 0));
1309 if (REGNO (this.reg) == R0_REG && flag_pic && ! TARGET_SH2)
1310 jump = "mov.l r1,@-r15; mova %O0,r0; mov.l @r0,r1; add r1,r0; mov.l @r15+,r1; jmp @%1";
1311 output_asm_insn (jump, &this.lab);
1312 if (dbr_sequence_length ())
1313 print_slot (final_sequence);
1314 else
1315 output_asm_insn ("nop", 0);
1317 else
1319 /* Output the delay slot insn first if any. */
1320 if (dbr_sequence_length ())
1321 print_slot (final_sequence);
1323 this.reg = gen_rtx_REG (SImode, 13);
1324 /* We must keep the stack aligned to 8-byte boundaries on SH5.
1325 Fortunately, MACL is fixed and call-clobbered, and we never
1326 need its value across jumps, so save r13 in it instead of in
1327 the stack. */
1328 if (TARGET_SH5)
1329 output_asm_insn ("lds r13, macl", 0);
1330 else
1331 output_asm_insn ("mov.l r13,@-r15", 0);
1332 output_asm_insn (jump, &this.lab);
1333 if (TARGET_SH5)
1334 output_asm_insn ("sts macl, r13", 0);
1335 else
1336 output_asm_insn ("mov.l @r15+,r13", 0);
1338 if (far && flag_pic && TARGET_SH2)
1340 braf_base_lab = gen_label_rtx ();
1341 (*targetm.asm_out.internal_label) (asm_out_file, "L",
1342 CODE_LABEL_NUMBER (braf_base_lab));
1344 if (far)
1345 output_asm_insn (".align 2", 0);
1346 (*targetm.asm_out.internal_label) (asm_out_file, "L", CODE_LABEL_NUMBER (this.lab));
1347 this.op = op;
1348 if (far && flag_pic)
1350 if (TARGET_SH2)
1351 this.lab = braf_base_lab;
1352 output_asm_insn (".long %O2-%O0", &this.lab);
1354 else
1355 output_asm_insn (far ? ".long %O2" : ".word %O2-%O0", &this.lab);
1356 return "";
1359 /* Local label counter, used for constants in the pool and inside
1360 pattern branches. */
1362 static int lf = 100;
1364 /* Output code for ordinary branches. */
1366 const char *
1367 output_branch (int logic, rtx insn, rtx *operands)
1369 switch (get_attr_length (insn))
1371 case 6:
1372 /* This can happen if filling the delay slot has caused a forward
1373 branch to exceed its range (we could reverse it, but only
1374 when we know we won't overextend other branches; this should
1375 best be handled by relaxation).
1376 It can also happen when other condbranches hoist delay slot insn
1377 from their destination, thus leading to code size increase.
1378 But the branch will still be in the range -4092..+4098 bytes. */
1380 if (! TARGET_RELAX)
1382 int label = lf++;
1383 /* The call to print_slot will clobber the operands. */
1384 rtx op0 = operands[0];
1386 /* If the instruction in the delay slot is annulled (true), then
1387 there is no delay slot where we can put it now. The only safe
1388 place for it is after the label. final will do that by default. */
1390 if (final_sequence
1391 && ! INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))
1392 && get_attr_length (XVECEXP (final_sequence, 0, 1)))
1394 asm_fprintf (asm_out_file, "\tb%s%ss\t%LLF%d\n", logic ? "f" : "t",
1395 ASSEMBLER_DIALECT ? "/" : ".", label);
1396 print_slot (final_sequence);
1398 else
1399 asm_fprintf (asm_out_file, "\tb%s\t%LLF%d\n", logic ? "f" : "t", label);
1401 output_asm_insn ("bra\t%l0", &op0);
1402 fprintf (asm_out_file, "\tnop\n");
1403 (*targetm.asm_out.internal_label) (asm_out_file, "LF", label);
1405 return "";
1407 /* When relaxing, handle this like a short branch. The linker
1408 will fix it up if it still doesn't fit after relaxation. */
1409 case 2:
1410 return logic ? "bt%.\t%l0" : "bf%.\t%l0";
1412 /* These are for SH2e, in which we have to account for the
1413 extra nop because of the hardware bug in annulled branches. */
1414 case 8:
1415 if (! TARGET_RELAX)
1417 int label = lf++;
1419 if (final_sequence
1420 && INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0)))
1421 abort ();
1422 asm_fprintf (asm_out_file, "b%s%ss\t%LLF%d\n",
1423 logic ? "f" : "t",
1424 ASSEMBLER_DIALECT ? "/" : ".", label);
1425 fprintf (asm_out_file, "\tnop\n");
1426 output_asm_insn ("bra\t%l0", operands);
1427 fprintf (asm_out_file, "\tnop\n");
1428 (*targetm.asm_out.internal_label) (asm_out_file, "LF", label);
1430 return "";
1432 /* When relaxing, fall through. */
1433 case 4:
1435 char buffer[10];
1437 sprintf (buffer, "b%s%ss\t%%l0",
1438 logic ? "t" : "f",
1439 ASSEMBLER_DIALECT ? "/" : ".");
1440 output_asm_insn (buffer, &operands[0]);
1441 return "nop";
1444 default:
1445 /* There should be no longer branches now - that would
1446 indicate that something has destroyed the branches set
1447 up in machine_dependent_reorg. */
1448 abort ();
1452 const char *
1453 output_branchy_insn (enum rtx_code code, const char *template,
1454 rtx insn, rtx *operands)
1456 rtx next_insn = NEXT_INSN (insn);
1458 if (next_insn && GET_CODE (next_insn) == JUMP_INSN && condjump_p (next_insn))
1460 rtx src = SET_SRC (PATTERN (next_insn));
1461 if (GET_CODE (src) == IF_THEN_ELSE && GET_CODE (XEXP (src, 0)) != code)
1463 /* Following branch not taken */
1464 operands[9] = gen_label_rtx ();
1465 emit_label_after (operands[9], next_insn);
1466 INSN_ADDRESSES_NEW (operands[9],
1467 INSN_ADDRESSES (INSN_UID (next_insn))
1468 + get_attr_length (next_insn));
1469 return template;
1471 else
1473 int offset = (branch_dest (next_insn)
1474 - INSN_ADDRESSES (INSN_UID (next_insn)) + 4);
1475 if (offset >= -252 && offset <= 258)
1477 if (GET_CODE (src) == IF_THEN_ELSE)
1478 /* branch_true */
1479 src = XEXP (src, 1);
1480 operands[9] = src;
1481 return template;
1485 operands[9] = gen_label_rtx ();
1486 emit_label_after (operands[9], insn);
1487 INSN_ADDRESSES_NEW (operands[9],
1488 INSN_ADDRESSES (INSN_UID (insn))
1489 + get_attr_length (insn));
1490 return template;
1493 const char *
1494 output_ieee_ccmpeq (rtx insn, rtx *operands)
1496 return output_branchy_insn (NE, "bt\t%l9\\;fcmp/eq\t%1,%0", insn, operands);
1499 /* Output the start of the assembler file. */
1501 static void
1502 sh_file_start (void)
1504 default_file_start ();
1506 #ifdef SYMBIAN
1507 /* Declare the .directive section before it is used. */
1508 fputs ("\t.section .directive, \"SM\", @progbits, 1\n", asm_out_file);
1509 fputs ("\t.asciz \"#<SYMEDIT>#\\n\"\n", asm_out_file);
1510 #endif
1512 if (TARGET_ELF)
1513 /* We need to show the text section with the proper
1514 attributes as in TEXT_SECTION_ASM_OP, before dwarf2out
1515 emits it without attributes in TEXT_SECTION_ASM_OP, else GAS
1516 will complain. We can teach GAS specifically about the
1517 default attributes for our choice of text section, but
1518 then we would have to change GAS again if/when we change
1519 the text section name. */
1520 fprintf (asm_out_file, "%s\n", TEXT_SECTION_ASM_OP);
1521 else
1522 /* Switch to the data section so that the coffsem symbol
1523 isn't in the text section. */
1524 data_section ();
1526 if (TARGET_LITTLE_ENDIAN)
1527 fputs ("\t.little\n", asm_out_file);
1529 if (!TARGET_ELF)
1531 if (TARGET_SHCOMPACT)
1532 fputs ("\t.mode\tSHcompact\n", asm_out_file);
1533 else if (TARGET_SHMEDIA)
1534 fprintf (asm_out_file, "\t.mode\tSHmedia\n\t.abi\t%i\n",
1535 TARGET_SHMEDIA64 ? 64 : 32);
1539 /* Check if PAT includes UNSPEC_CALLER unspec pattern. */
1541 static bool
1542 unspec_caller_rtx_p (rtx pat)
1544 switch (GET_CODE (pat))
1546 case CONST:
1547 return unspec_caller_rtx_p (XEXP (pat, 0));
1548 case PLUS:
1549 case MINUS:
1550 if (unspec_caller_rtx_p (XEXP (pat, 0)))
1551 return true;
1552 return unspec_caller_rtx_p (XEXP (pat, 1));
1553 case UNSPEC:
1554 if (XINT (pat, 1) == UNSPEC_CALLER)
1555 return true;
1556 default:
1557 break;
1560 return false;
1563 /* Indicate that INSN cannot be duplicated. This is true for insn
1564 that generates an unique label. */
1566 static bool
1567 sh_cannot_copy_insn_p (rtx insn)
1569 rtx pat;
1571 if (!reload_completed || !flag_pic)
1572 return false;
1574 if (GET_CODE (insn) != INSN)
1575 return false;
1576 if (asm_noperands (insn) >= 0)
1577 return false;
1579 pat = PATTERN (insn);
1580 if (GET_CODE (pat) != SET)
1581 return false;
1582 pat = SET_SRC (pat);
1584 if (unspec_caller_rtx_p (pat))
1585 return true;
1587 return false;
1590 /* Actual number of instructions used to make a shift by N. */
1591 static const char ashiftrt_insns[] =
1592 { 0,1,2,3,4,5,8,8,8,8,8,8,8,8,8,8,2,3,4,5,8,8,8,8,8,8,8,8,8,8,8,2};
1594 /* Left shift and logical right shift are the same. */
1595 static const char shift_insns[] =
1596 { 0,1,1,2,2,3,3,4,1,2,2,3,3,4,3,3,1,2,2,3,3,4,3,3,2,3,3,4,4,4,3,3};
1598 /* Individual shift amounts needed to get the above length sequences.
1599 One bit right shifts clobber the T bit, so when possible, put one bit
1600 shifts in the middle of the sequence, so the ends are eligible for
1601 branch delay slots. */
1602 static const short shift_amounts[32][5] = {
1603 {0}, {1}, {2}, {2, 1},
1604 {2, 2}, {2, 1, 2}, {2, 2, 2}, {2, 2, 1, 2},
1605 {8}, {8, 1}, {8, 2}, {8, 1, 2},
1606 {8, 2, 2}, {8, 2, 1, 2}, {8, -2, 8}, {8, -1, 8},
1607 {16}, {16, 1}, {16, 2}, {16, 1, 2},
1608 {16, 2, 2}, {16, 2, 1, 2}, {16, -2, 8}, {16, -1, 8},
1609 {16, 8}, {16, 1, 8}, {16, 8, 2}, {16, 8, 1, 2},
1610 {16, 8, 2, 2}, {16, -1, -2, 16}, {16, -2, 16}, {16, -1, 16}};
1612 /* Likewise, but for shift amounts < 16, up to three highmost bits
1613 might be clobbered. This is typically used when combined with some
1614 kind of sign or zero extension. */
1616 static const char ext_shift_insns[] =
1617 { 0,1,1,2,2,3,2,2,1,2,2,3,3,3,2,2,1,2,2,3,3,4,3,3,2,3,3,4,4,4,3,3};
1619 static const short ext_shift_amounts[32][4] = {
1620 {0}, {1}, {2}, {2, 1},
1621 {2, 2}, {2, 1, 2}, {8, -2}, {8, -1},
1622 {8}, {8, 1}, {8, 2}, {8, 1, 2},
1623 {8, 2, 2}, {16, -2, -1}, {16, -2}, {16, -1},
1624 {16}, {16, 1}, {16, 2}, {16, 1, 2},
1625 {16, 2, 2}, {16, 2, 1, 2}, {16, -2, 8}, {16, -1, 8},
1626 {16, 8}, {16, 1, 8}, {16, 8, 2}, {16, 8, 1, 2},
1627 {16, 8, 2, 2}, {16, -1, -2, 16}, {16, -2, 16}, {16, -1, 16}};
1629 /* Assuming we have a value that has been sign-extended by at least one bit,
1630 can we use the ext_shift_amounts with the last shift turned to an arithmetic shift
1631 to shift it by N without data loss, and quicker than by other means? */
1632 #define EXT_SHIFT_SIGNED(n) (((n) | 8) == 15)
1634 /* This is used in length attributes in sh.md to help compute the length
1635 of arbitrary constant shift instructions. */
1638 shift_insns_rtx (rtx insn)
1640 rtx set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
1641 int shift_count = INTVAL (XEXP (set_src, 1));
1642 enum rtx_code shift_code = GET_CODE (set_src);
1644 switch (shift_code)
1646 case ASHIFTRT:
1647 return ashiftrt_insns[shift_count];
1648 case LSHIFTRT:
1649 case ASHIFT:
1650 return shift_insns[shift_count];
1651 default:
1652 abort ();
1656 /* Return the cost of a shift. */
1658 static inline int
1659 shiftcosts (rtx x)
1661 int value;
1663 if (TARGET_SHMEDIA)
1664 return 1;
1666 if (GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD)
1668 if (GET_MODE (x) == DImode
1669 && GET_CODE (XEXP (x, 1)) == CONST_INT
1670 && INTVAL (XEXP (x, 1)) == 1)
1671 return 2;
1673 /* Everything else is invalid, because there is no pattern for it. */
1674 return 10000;
1676 /* If shift by a non constant, then this will be expensive. */
1677 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
1678 return SH_DYNAMIC_SHIFT_COST;
1680 value = INTVAL (XEXP (x, 1));
1682 /* Otherwise, return the true cost in instructions. */
1683 if (GET_CODE (x) == ASHIFTRT)
1685 int cost = ashiftrt_insns[value];
1686 /* If SH3, then we put the constant in a reg and use shad. */
1687 if (cost > 1 + SH_DYNAMIC_SHIFT_COST)
1688 cost = 1 + SH_DYNAMIC_SHIFT_COST;
1689 return cost;
1691 else
1692 return shift_insns[value];
1695 /* Return the cost of an AND operation. */
1697 static inline int
1698 andcosts (rtx x)
1700 int i;
1702 /* Anding with a register is a single cycle and instruction. */
1703 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
1704 return 1;
1706 i = INTVAL (XEXP (x, 1));
1708 if (TARGET_SHMEDIA)
1710 if ((GET_CODE (XEXP (x, 1)) == CONST_INT
1711 && CONST_OK_FOR_I16 (INTVAL (XEXP (x, 1))))
1712 || EXTRA_CONSTRAINT_C16 (XEXP (x, 1)))
1713 return 1;
1714 else
1715 return 2;
1718 /* These constants are single cycle extu.[bw] instructions. */
1719 if (i == 0xff || i == 0xffff)
1720 return 1;
1721 /* Constants that can be used in an and immediate instruction in a single
1722 cycle, but this requires r0, so make it a little more expensive. */
1723 if (CONST_OK_FOR_K08 (i))
1724 return 2;
1725 /* Constants that can be loaded with a mov immediate and an and.
1726 This case is probably unnecessary. */
1727 if (CONST_OK_FOR_I08 (i))
1728 return 2;
1729 /* Any other constants requires a 2 cycle pc-relative load plus an and.
1730 This case is probably unnecessary. */
1731 return 3;
1734 /* Return the cost of an addition or a subtraction. */
1736 static inline int
1737 addsubcosts (rtx x)
1739 /* Adding a register is a single cycle insn. */
1740 if (GET_CODE (XEXP (x, 1)) == REG
1741 || GET_CODE (XEXP (x, 1)) == SUBREG)
1742 return 1;
1744 /* Likewise for small constants. */
1745 if (GET_CODE (XEXP (x, 1)) == CONST_INT
1746 && CONST_OK_FOR_ADD (INTVAL (XEXP (x, 1))))
1747 return 1;
1749 if (TARGET_SHMEDIA)
1750 switch (GET_CODE (XEXP (x, 1)))
1752 case CONST:
1753 case LABEL_REF:
1754 case SYMBOL_REF:
1755 return TARGET_SHMEDIA64 ? 5 : 3;
1757 case CONST_INT:
1758 if (CONST_OK_FOR_I16 (INTVAL (XEXP (x, 1))))
1759 return 2;
1760 else if (CONST_OK_FOR_I16 (INTVAL (XEXP (x, 1)) >> 16))
1761 return 3;
1762 else if (CONST_OK_FOR_I16 ((INTVAL (XEXP (x, 1)) >> 16) >> 16))
1763 return 4;
1765 /* Fall through. */
1766 default:
1767 return 5;
1770 /* Any other constant requires a 2 cycle pc-relative load plus an
1771 addition. */
1772 return 3;
1775 /* Return the cost of a multiply. */
1776 static inline int
1777 multcosts (rtx x ATTRIBUTE_UNUSED)
1779 if (TARGET_SHMEDIA)
1780 return 3;
1782 if (TARGET_SH2)
1784 /* We have a mul insn, so we can never take more than the mul and the
1785 read of the mac reg, but count more because of the latency and extra
1786 reg usage. */
1787 if (TARGET_SMALLCODE)
1788 return 2;
1789 return 3;
1792 /* If we're aiming at small code, then just count the number of
1793 insns in a multiply call sequence. */
1794 if (TARGET_SMALLCODE)
1795 return 5;
1797 /* Otherwise count all the insns in the routine we'd be calling too. */
1798 return 20;
1801 /* Compute a (partial) cost for rtx X. Return true if the complete
1802 cost has been computed, and false if subexpressions should be
1803 scanned. In either case, *TOTAL contains the cost result. */
1805 static bool
1806 sh_rtx_costs (rtx x, int code, int outer_code, int *total)
1808 switch (code)
1810 case CONST_INT:
1811 if (TARGET_SHMEDIA)
1813 if (INTVAL (x) == 0)
1814 *total = 0;
1815 else if (outer_code == AND && and_operand ((x), DImode))
1816 *total = 0;
1817 else if ((outer_code == IOR || outer_code == XOR
1818 || outer_code == PLUS)
1819 && CONST_OK_FOR_I10 (INTVAL (x)))
1820 *total = 0;
1821 else if (CONST_OK_FOR_I16 (INTVAL (x)))
1822 *total = COSTS_N_INSNS (outer_code != SET);
1823 else if (CONST_OK_FOR_I16 (INTVAL (x) >> 16))
1824 *total = COSTS_N_INSNS (2);
1825 else if (CONST_OK_FOR_I16 ((INTVAL (x) >> 16) >> 16))
1826 *total = COSTS_N_INSNS (3);
1827 else
1828 *total = COSTS_N_INSNS (4);
1829 return true;
1831 if (CONST_OK_FOR_I08 (INTVAL (x)))
1832 *total = 0;
1833 else if ((outer_code == AND || outer_code == IOR || outer_code == XOR)
1834 && CONST_OK_FOR_K08 (INTVAL (x)))
1835 *total = 1;
1836 else
1837 *total = 8;
1838 return true;
1840 case CONST:
1841 case LABEL_REF:
1842 case SYMBOL_REF:
1843 if (TARGET_SHMEDIA64)
1844 *total = COSTS_N_INSNS (4);
1845 else if (TARGET_SHMEDIA32)
1846 *total = COSTS_N_INSNS (2);
1847 else
1848 *total = 5;
1849 return true;
1851 case CONST_DOUBLE:
1852 if (TARGET_SHMEDIA)
1853 *total = COSTS_N_INSNS (4);
1854 else
1855 *total = 10;
1856 return true;
1858 case PLUS:
1859 *total = COSTS_N_INSNS (addsubcosts (x));
1860 return true;
1862 case AND:
1863 *total = COSTS_N_INSNS (andcosts (x));
1864 return true;
1866 case MULT:
1867 *total = COSTS_N_INSNS (multcosts (x));
1868 return true;
1870 case ASHIFT:
1871 case ASHIFTRT:
1872 case LSHIFTRT:
1873 *total = COSTS_N_INSNS (shiftcosts (x));
1874 return true;
1876 case DIV:
1877 case UDIV:
1878 case MOD:
1879 case UMOD:
1880 *total = COSTS_N_INSNS (20);
1881 return true;
1883 case FLOAT:
1884 case FIX:
1885 *total = 100;
1886 return true;
1888 default:
1889 return false;
1893 /* Compute the cost of an address. For the SH, all valid addresses are
1894 the same cost. Use a slightly higher cost for reg + reg addressing,
1895 since it increases pressure on r0. */
1897 static int
1898 sh_address_cost (rtx X)
1900 return (GET_CODE (X) == PLUS
1901 && ! CONSTANT_P (XEXP (X, 1))
1902 && ! TARGET_SHMEDIA ? 1 : 0);
1905 /* Code to expand a shift. */
1907 void
1908 gen_ashift (int type, int n, rtx reg)
1910 /* Negative values here come from the shift_amounts array. */
1911 if (n < 0)
1913 if (type == ASHIFT)
1914 type = LSHIFTRT;
1915 else
1916 type = ASHIFT;
1917 n = -n;
1920 switch (type)
1922 case ASHIFTRT:
1923 emit_insn (gen_ashrsi3_k (reg, reg, GEN_INT (n)));
1924 break;
1925 case LSHIFTRT:
1926 if (n == 1)
1927 emit_insn (gen_lshrsi3_m (reg, reg, GEN_INT (n)));
1928 else
1929 emit_insn (gen_lshrsi3_k (reg, reg, GEN_INT (n)));
1930 break;
1931 case ASHIFT:
1932 emit_insn (gen_ashlsi3_std (reg, reg, GEN_INT (n)));
1933 break;
1937 /* Same for HImode */
1939 void
1940 gen_ashift_hi (int type, int n, rtx reg)
1942 /* Negative values here come from the shift_amounts array. */
1943 if (n < 0)
1945 if (type == ASHIFT)
1946 type = LSHIFTRT;
1947 else
1948 type = ASHIFT;
1949 n = -n;
1952 switch (type)
1954 case ASHIFTRT:
1955 case LSHIFTRT:
1956 /* We don't have HImode right shift operations because using the
1957 ordinary 32 bit shift instructions for that doesn't generate proper
1958 zero/sign extension.
1959 gen_ashift_hi is only called in contexts where we know that the
1960 sign extension works out correctly. */
1962 int offset = 0;
1963 if (GET_CODE (reg) == SUBREG)
1965 offset = SUBREG_BYTE (reg);
1966 reg = SUBREG_REG (reg);
1968 gen_ashift (type, n, gen_rtx_SUBREG (SImode, reg, offset));
1969 break;
1971 case ASHIFT:
1972 emit_insn (gen_ashlhi3_k (reg, reg, GEN_INT (n)));
1973 break;
1977 /* Output RTL to split a constant shift into its component SH constant
1978 shift instructions. */
1980 void
1981 gen_shifty_op (int code, rtx *operands)
1983 int value = INTVAL (operands[2]);
1984 int max, i;
1986 /* Truncate the shift count in case it is out of bounds. */
1987 value = value & 0x1f;
1989 if (value == 31)
1991 if (code == LSHIFTRT)
1993 emit_insn (gen_rotlsi3_1 (operands[0], operands[0]));
1994 emit_insn (gen_movt (operands[0]));
1995 return;
1997 else if (code == ASHIFT)
1999 /* There is a two instruction sequence for 31 bit left shifts,
2000 but it requires r0. */
2001 if (GET_CODE (operands[0]) == REG && REGNO (operands[0]) == 0)
2003 emit_insn (gen_andsi3 (operands[0], operands[0], const1_rtx));
2004 emit_insn (gen_rotlsi3_31 (operands[0], operands[0]));
2005 return;
2009 else if (value == 0)
2011 /* This can happen when not optimizing. We must output something here
2012 to prevent the compiler from aborting in final.c after the try_split
2013 call. */
2014 emit_insn (gen_nop ());
2015 return;
2018 max = shift_insns[value];
2019 for (i = 0; i < max; i++)
2020 gen_ashift (code, shift_amounts[value][i], operands[0]);
2023 /* Same as above, but optimized for values where the topmost bits don't
2024 matter. */
2026 void
2027 gen_shifty_hi_op (int code, rtx *operands)
2029 int value = INTVAL (operands[2]);
2030 int max, i;
2031 void (*gen_fun) (int, int, rtx);
2033 /* This operation is used by and_shl for SImode values with a few
2034 high bits known to be cleared. */
2035 value &= 31;
2036 if (value == 0)
2038 emit_insn (gen_nop ());
2039 return;
2042 gen_fun = GET_MODE (operands[0]) == HImode ? gen_ashift_hi : gen_ashift;
2043 if (code == ASHIFT)
2045 max = ext_shift_insns[value];
2046 for (i = 0; i < max; i++)
2047 gen_fun (code, ext_shift_amounts[value][i], operands[0]);
2049 else
2050 /* When shifting right, emit the shifts in reverse order, so that
2051 solitary negative values come first. */
2052 for (i = ext_shift_insns[value] - 1; i >= 0; i--)
2053 gen_fun (code, ext_shift_amounts[value][i], operands[0]);
2056 /* Output RTL for an arithmetic right shift. */
2058 /* ??? Rewrite to use super-optimizer sequences. */
2061 expand_ashiftrt (rtx *operands)
2063 rtx sym;
2064 rtx wrk;
2065 char func[18];
2066 tree func_name;
2067 int value;
2069 if (TARGET_SH3)
2071 if (GET_CODE (operands[2]) != CONST_INT)
2073 rtx count = copy_to_mode_reg (SImode, operands[2]);
2074 emit_insn (gen_negsi2 (count, count));
2075 emit_insn (gen_ashrsi3_d (operands[0], operands[1], count));
2076 return 1;
2078 else if (ashiftrt_insns[INTVAL (operands[2]) & 31]
2079 > 1 + SH_DYNAMIC_SHIFT_COST)
2081 rtx count
2082 = force_reg (SImode, GEN_INT (- (INTVAL (operands[2]) & 31)));
2083 emit_insn (gen_ashrsi3_d (operands[0], operands[1], count));
2084 return 1;
2087 if (GET_CODE (operands[2]) != CONST_INT)
2088 return 0;
2090 value = INTVAL (operands[2]) & 31;
2092 if (value == 31)
2094 emit_insn (gen_ashrsi2_31 (operands[0], operands[1]));
2095 return 1;
2097 else if (value >= 16 && value <= 19)
2099 wrk = gen_reg_rtx (SImode);
2100 emit_insn (gen_ashrsi2_16 (wrk, operands[1]));
2101 value -= 16;
2102 while (value--)
2103 gen_ashift (ASHIFTRT, 1, wrk);
2104 emit_move_insn (operands[0], wrk);
2105 return 1;
2107 /* Expand a short sequence inline, longer call a magic routine. */
2108 else if (value <= 5)
2110 wrk = gen_reg_rtx (SImode);
2111 emit_move_insn (wrk, operands[1]);
2112 while (value--)
2113 gen_ashift (ASHIFTRT, 1, wrk);
2114 emit_move_insn (operands[0], wrk);
2115 return 1;
2118 wrk = gen_reg_rtx (Pmode);
2120 /* Load the value into an arg reg and call a helper. */
2121 emit_move_insn (gen_rtx_REG (SImode, 4), operands[1]);
2122 sprintf (func, "__ashiftrt_r4_%d", value);
2123 func_name = get_identifier (func);
2124 sym = function_symbol (IDENTIFIER_POINTER (func_name));
2125 emit_move_insn (wrk, sym);
2126 emit_insn (gen_ashrsi3_n (GEN_INT (value), wrk));
2127 emit_move_insn (operands[0], gen_rtx_REG (SImode, 4));
2128 return 1;
2132 sh_dynamicalize_shift_p (rtx count)
2134 return shift_insns[INTVAL (count)] > 1 + SH_DYNAMIC_SHIFT_COST;
2137 /* Try to find a good way to implement the combiner pattern
2138 [(set (match_operand:SI 0 "register_operand" "r")
2139 (and:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
2140 (match_operand:SI 2 "const_int_operand" "n"))
2141 (match_operand:SI 3 "const_int_operand" "n"))) .
2142 LEFT_RTX is operand 2 in the above pattern, and MASK_RTX is operand 3.
2143 return 0 for simple right / left or left/right shift combination.
2144 return 1 for a combination of shifts with zero_extend.
2145 return 2 for a combination of shifts with an AND that needs r0.
2146 return 3 for a combination of shifts with an AND that needs an extra
2147 scratch register, when the three highmost bits of the AND mask are clear.
2148 return 4 for a combination of shifts with an AND that needs an extra
2149 scratch register, when any of the three highmost bits of the AND mask
2150 is set.
2151 If ATTRP is set, store an initial right shift width in ATTRP[0],
2152 and the instruction length in ATTRP[1] . These values are not valid
2153 when returning 0.
2154 When ATTRP is set and returning 1, ATTRP[2] gets set to the index into
2155 shift_amounts for the last shift value that is to be used before the
2156 sign extend. */
2158 shl_and_kind (rtx left_rtx, rtx mask_rtx, int *attrp)
2160 unsigned HOST_WIDE_INT mask, lsb, mask2, lsb2;
2161 int left = INTVAL (left_rtx), right;
2162 int best = 0;
2163 int cost, best_cost = 10000;
2164 int best_right = 0, best_len = 0;
2165 int i;
2166 int can_ext;
2168 if (left < 0 || left > 31)
2169 return 0;
2170 if (GET_CODE (mask_rtx) == CONST_INT)
2171 mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> left;
2172 else
2173 mask = (unsigned HOST_WIDE_INT) GET_MODE_MASK (SImode) >> left;
2174 /* Can this be expressed as a right shift / left shift pair? */
2175 lsb = ((mask ^ (mask - 1)) >> 1) + 1;
2176 right = exact_log2 (lsb);
2177 mask2 = ~(mask + lsb - 1);
2178 lsb2 = ((mask2 ^ (mask2 - 1)) >> 1) + 1;
2179 /* mask has no zeroes but trailing zeroes <==> ! mask2 */
2180 if (! mask2)
2181 best_cost = shift_insns[right] + shift_insns[right + left];
2182 /* mask has no trailing zeroes <==> ! right */
2183 else if (! right && mask2 == ~(lsb2 - 1))
2185 int late_right = exact_log2 (lsb2);
2186 best_cost = shift_insns[left + late_right] + shift_insns[late_right];
2188 /* Try to use zero extend. */
2189 if (mask2 == ~(lsb2 - 1))
2191 int width, first;
2193 for (width = 8; width <= 16; width += 8)
2195 /* Can we zero-extend right away? */
2196 if (lsb2 == (unsigned HOST_WIDE_INT) 1 << width)
2198 cost
2199 = 1 + ext_shift_insns[right] + ext_shift_insns[left + right];
2200 if (cost < best_cost)
2202 best = 1;
2203 best_cost = cost;
2204 best_right = right;
2205 best_len = cost;
2206 if (attrp)
2207 attrp[2] = -1;
2209 continue;
2211 /* ??? Could try to put zero extend into initial right shift,
2212 or even shift a bit left before the right shift. */
2213 /* Determine value of first part of left shift, to get to the
2214 zero extend cut-off point. */
2215 first = width - exact_log2 (lsb2) + right;
2216 if (first >= 0 && right + left - first >= 0)
2218 cost = ext_shift_insns[right] + ext_shift_insns[first] + 1
2219 + ext_shift_insns[right + left - first];
2220 if (cost < best_cost)
2222 best = 1;
2223 best_cost = cost;
2224 best_right = right;
2225 best_len = cost;
2226 if (attrp)
2227 attrp[2] = first;
2232 /* Try to use r0 AND pattern */
2233 for (i = 0; i <= 2; i++)
2235 if (i > right)
2236 break;
2237 if (! CONST_OK_FOR_K08 (mask >> i))
2238 continue;
2239 cost = (i != 0) + 2 + ext_shift_insns[left + i];
2240 if (cost < best_cost)
2242 best = 2;
2243 best_cost = cost;
2244 best_right = i;
2245 best_len = cost - 1;
2248 /* Try to use a scratch register to hold the AND operand. */
2249 can_ext = ((mask << left) & ((unsigned HOST_WIDE_INT) 3 << 30)) == 0;
2250 for (i = 0; i <= 2; i++)
2252 if (i > right)
2253 break;
2254 cost = (i != 0) + (CONST_OK_FOR_I08 (mask >> i) ? 2 : 3)
2255 + (can_ext ? ext_shift_insns : shift_insns)[left + i];
2256 if (cost < best_cost)
2258 best = 4 - can_ext;
2259 best_cost = cost;
2260 best_right = i;
2261 best_len = cost - 1 - ! CONST_OK_FOR_I08 (mask >> i);
2265 if (attrp)
2267 attrp[0] = best_right;
2268 attrp[1] = best_len;
2270 return best;
2273 /* This is used in length attributes of the unnamed instructions
2274 corresponding to shl_and_kind return values of 1 and 2. */
2276 shl_and_length (rtx insn)
2278 rtx set_src, left_rtx, mask_rtx;
2279 int attributes[3];
2281 set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
2282 left_rtx = XEXP (XEXP (set_src, 0), 1);
2283 mask_rtx = XEXP (set_src, 1);
2284 shl_and_kind (left_rtx, mask_rtx, attributes);
2285 return attributes[1];
2288 /* This is used in length attribute of the and_shl_scratch instruction. */
2291 shl_and_scr_length (rtx insn)
2293 rtx set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
2294 int len = shift_insns[INTVAL (XEXP (set_src, 1))];
2295 rtx op = XEXP (set_src, 0);
2296 len += shift_insns[INTVAL (XEXP (op, 1))] + 1;
2297 op = XEXP (XEXP (op, 0), 0);
2298 return len + shift_insns[INTVAL (XEXP (op, 1))];
2301 /* Generate rtl for instructions for which shl_and_kind advised a particular
2302 method of generating them, i.e. returned zero. */
2305 gen_shl_and (rtx dest, rtx left_rtx, rtx mask_rtx, rtx source)
2307 int attributes[3];
2308 unsigned HOST_WIDE_INT mask;
2309 int kind = shl_and_kind (left_rtx, mask_rtx, attributes);
2310 int right, total_shift;
2311 void (*shift_gen_fun) (int, rtx *) = gen_shifty_hi_op;
2313 right = attributes[0];
2314 total_shift = INTVAL (left_rtx) + right;
2315 mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> total_shift;
2316 switch (kind)
2318 default:
2319 return -1;
2320 case 1:
2322 int first = attributes[2];
2323 rtx operands[3];
2325 if (first < 0)
2327 emit_insn ((mask << right) <= 0xff
2328 ? gen_zero_extendqisi2 (dest,
2329 gen_lowpart (QImode, source))
2330 : gen_zero_extendhisi2 (dest,
2331 gen_lowpart (HImode, source)));
2332 source = dest;
2334 if (source != dest)
2335 emit_insn (gen_movsi (dest, source));
2336 operands[0] = dest;
2337 if (right)
2339 operands[2] = GEN_INT (right);
2340 gen_shifty_hi_op (LSHIFTRT, operands);
2342 if (first > 0)
2344 operands[2] = GEN_INT (first);
2345 gen_shifty_hi_op (ASHIFT, operands);
2346 total_shift -= first;
2347 mask <<= first;
2349 if (first >= 0)
2350 emit_insn (mask <= 0xff
2351 ? gen_zero_extendqisi2 (dest, gen_lowpart (QImode, dest))
2352 : gen_zero_extendhisi2 (dest, gen_lowpart (HImode, dest)));
2353 if (total_shift > 0)
2355 operands[2] = GEN_INT (total_shift);
2356 gen_shifty_hi_op (ASHIFT, operands);
2358 break;
2360 case 4:
2361 shift_gen_fun = gen_shifty_op;
2362 case 3:
2363 /* If the topmost bit that matters is set, set the topmost bits
2364 that don't matter. This way, we might be able to get a shorter
2365 signed constant. */
2366 if (mask & ((HOST_WIDE_INT) 1 << (31 - total_shift)))
2367 mask |= (HOST_WIDE_INT) ~0 << (31 - total_shift);
2368 case 2:
2369 /* Don't expand fine-grained when combining, because that will
2370 make the pattern fail. */
2371 if (currently_expanding_to_rtl
2372 || reload_in_progress || reload_completed)
2374 rtx operands[3];
2376 /* Cases 3 and 4 should be handled by this split
2377 only while combining */
2378 if (kind > 2)
2379 abort ();
2380 if (right)
2382 emit_insn (gen_lshrsi3 (dest, source, GEN_INT (right)));
2383 source = dest;
2385 emit_insn (gen_andsi3 (dest, source, GEN_INT (mask)));
2386 if (total_shift)
2388 operands[0] = dest;
2389 operands[1] = dest;
2390 operands[2] = GEN_INT (total_shift);
2391 shift_gen_fun (ASHIFT, operands);
2393 break;
2395 else
2397 int neg = 0;
2398 if (kind != 4 && total_shift < 16)
2400 neg = -ext_shift_amounts[total_shift][1];
2401 if (neg > 0)
2402 neg -= ext_shift_amounts[total_shift][2];
2403 else
2404 neg = 0;
2406 emit_insn (gen_and_shl_scratch (dest, source,
2407 GEN_INT (right),
2408 GEN_INT (mask),
2409 GEN_INT (total_shift + neg),
2410 GEN_INT (neg)));
2411 emit_insn (gen_movsi (dest, dest));
2412 break;
2415 return 0;
2418 /* Try to find a good way to implement the combiner pattern
2419 [(set (match_operand:SI 0 "register_operand" "=r")
2420 (sign_extract:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
2421 (match_operand:SI 2 "const_int_operand" "n")
2422 (match_operand:SI 3 "const_int_operand" "n")
2423 (const_int 0)))
2424 (clobber (reg:SI T_REG))]
2425 LEFT_RTX is operand 2 in the above pattern, and SIZE_RTX is operand 3.
2426 return 0 for simple left / right shift combination.
2427 return 1 for left shift / 8 bit sign extend / left shift.
2428 return 2 for left shift / 16 bit sign extend / left shift.
2429 return 3 for left shift / 8 bit sign extend / shift / sign extend.
2430 return 4 for left shift / 16 bit sign extend / shift / sign extend.
2431 return 5 for left shift / 16 bit sign extend / right shift
2432 return 6 for < 8 bit sign extend / left shift.
2433 return 7 for < 8 bit sign extend / left shift / single right shift.
2434 If COSTP is nonzero, assign the calculated cost to *COSTP. */
2437 shl_sext_kind (rtx left_rtx, rtx size_rtx, int *costp)
2439 int left, size, insize, ext;
2440 int cost = 0, best_cost;
2441 int kind;
2443 left = INTVAL (left_rtx);
2444 size = INTVAL (size_rtx);
2445 insize = size - left;
2446 if (insize <= 0)
2447 abort ();
2448 /* Default to left / right shift. */
2449 kind = 0;
2450 best_cost = shift_insns[32 - insize] + ashiftrt_insns[32 - size];
2451 if (size <= 16)
2453 /* 16 bit shift / sign extend / 16 bit shift */
2454 cost = shift_insns[16 - insize] + 1 + ashiftrt_insns[16 - size];
2455 /* If ashiftrt_insns[16 - size] is 8, this choice will be overridden
2456 below, by alternative 3 or something even better. */
2457 if (cost < best_cost)
2459 kind = 5;
2460 best_cost = cost;
2463 /* Try a plain sign extend between two shifts. */
2464 for (ext = 16; ext >= insize; ext -= 8)
2466 if (ext <= size)
2468 cost = ext_shift_insns[ext - insize] + 1 + shift_insns[size - ext];
2469 if (cost < best_cost)
2471 kind = ext / (unsigned) 8;
2472 best_cost = cost;
2475 /* Check if we can do a sloppy shift with a final signed shift
2476 restoring the sign. */
2477 if (EXT_SHIFT_SIGNED (size - ext))
2478 cost = ext_shift_insns[ext - insize] + ext_shift_insns[size - ext] + 1;
2479 /* If not, maybe it's still cheaper to do the second shift sloppy,
2480 and do a final sign extend? */
2481 else if (size <= 16)
2482 cost = ext_shift_insns[ext - insize] + 1
2483 + ext_shift_insns[size > ext ? size - ext : ext - size] + 1;
2484 else
2485 continue;
2486 if (cost < best_cost)
2488 kind = ext / (unsigned) 8 + 2;
2489 best_cost = cost;
2492 /* Check if we can sign extend in r0 */
2493 if (insize < 8)
2495 cost = 3 + shift_insns[left];
2496 if (cost < best_cost)
2498 kind = 6;
2499 best_cost = cost;
2501 /* Try the same with a final signed shift. */
2502 if (left < 31)
2504 cost = 3 + ext_shift_insns[left + 1] + 1;
2505 if (cost < best_cost)
2507 kind = 7;
2508 best_cost = cost;
2512 if (TARGET_SH3)
2514 /* Try to use a dynamic shift. */
2515 cost = shift_insns[32 - insize] + 1 + SH_DYNAMIC_SHIFT_COST;
2516 if (cost < best_cost)
2518 kind = 0;
2519 best_cost = cost;
2522 if (costp)
2523 *costp = cost;
2524 return kind;
2527 /* Function to be used in the length attribute of the instructions
2528 implementing this pattern. */
2531 shl_sext_length (rtx insn)
2533 rtx set_src, left_rtx, size_rtx;
2534 int cost;
2536 set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
2537 left_rtx = XEXP (XEXP (set_src, 0), 1);
2538 size_rtx = XEXP (set_src, 1);
2539 shl_sext_kind (left_rtx, size_rtx, &cost);
2540 return cost;
2543 /* Generate rtl for this pattern */
2546 gen_shl_sext (rtx dest, rtx left_rtx, rtx size_rtx, rtx source)
2548 int kind;
2549 int left, size, insize, cost;
2550 rtx operands[3];
2552 kind = shl_sext_kind (left_rtx, size_rtx, &cost);
2553 left = INTVAL (left_rtx);
2554 size = INTVAL (size_rtx);
2555 insize = size - left;
2556 switch (kind)
2558 case 1:
2559 case 2:
2560 case 3:
2561 case 4:
2563 int ext = kind & 1 ? 8 : 16;
2564 int shift2 = size - ext;
2566 /* Don't expand fine-grained when combining, because that will
2567 make the pattern fail. */
2568 if (! currently_expanding_to_rtl
2569 && ! reload_in_progress && ! reload_completed)
2571 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
2572 emit_insn (gen_movsi (dest, source));
2573 break;
2575 if (dest != source)
2576 emit_insn (gen_movsi (dest, source));
2577 operands[0] = dest;
2578 if (ext - insize)
2580 operands[2] = GEN_INT (ext - insize);
2581 gen_shifty_hi_op (ASHIFT, operands);
2583 emit_insn (kind & 1
2584 ? gen_extendqisi2 (dest, gen_lowpart (QImode, dest))
2585 : gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
2586 if (kind <= 2)
2588 if (shift2)
2590 operands[2] = GEN_INT (shift2);
2591 gen_shifty_op (ASHIFT, operands);
2594 else
2596 if (shift2 > 0)
2598 if (EXT_SHIFT_SIGNED (shift2))
2600 operands[2] = GEN_INT (shift2 + 1);
2601 gen_shifty_op (ASHIFT, operands);
2602 operands[2] = const1_rtx;
2603 gen_shifty_op (ASHIFTRT, operands);
2604 break;
2606 operands[2] = GEN_INT (shift2);
2607 gen_shifty_hi_op (ASHIFT, operands);
2609 else if (shift2)
2611 operands[2] = GEN_INT (-shift2);
2612 gen_shifty_hi_op (LSHIFTRT, operands);
2614 emit_insn (size <= 8
2615 ? gen_extendqisi2 (dest, gen_lowpart (QImode, dest))
2616 : gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
2618 break;
2620 case 5:
2622 int i = 16 - size;
2623 if (! currently_expanding_to_rtl
2624 && ! reload_in_progress && ! reload_completed)
2625 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
2626 else
2628 operands[0] = dest;
2629 operands[2] = GEN_INT (16 - insize);
2630 gen_shifty_hi_op (ASHIFT, operands);
2631 emit_insn (gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
2633 /* Don't use gen_ashrsi3 because it generates new pseudos. */
2634 while (--i >= 0)
2635 gen_ashift (ASHIFTRT, 1, dest);
2636 break;
2638 case 6:
2639 case 7:
2640 /* Don't expand fine-grained when combining, because that will
2641 make the pattern fail. */
2642 if (! currently_expanding_to_rtl
2643 && ! reload_in_progress && ! reload_completed)
2645 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
2646 emit_insn (gen_movsi (dest, source));
2647 break;
2649 emit_insn (gen_andsi3 (dest, source, GEN_INT ((1 << insize) - 1)));
2650 emit_insn (gen_xorsi3 (dest, dest, GEN_INT (1 << (insize - 1))));
2651 emit_insn (gen_addsi3 (dest, dest, GEN_INT (-1 << (insize - 1))));
2652 operands[0] = dest;
2653 operands[2] = kind == 7 ? GEN_INT (left + 1) : left_rtx;
2654 gen_shifty_op (ASHIFT, operands);
2655 if (kind == 7)
2656 emit_insn (gen_ashrsi3_k (dest, dest, const1_rtx));
2657 break;
2658 default:
2659 return -1;
2661 return 0;
2664 /* Prefix a symbol_ref name with "datalabel". */
2667 gen_datalabel_ref (rtx sym)
2669 if (GET_CODE (sym) == LABEL_REF)
2670 return gen_rtx_CONST (GET_MODE (sym),
2671 gen_rtx_UNSPEC (GET_MODE (sym),
2672 gen_rtvec (1, sym),
2673 UNSPEC_DATALABEL));
2675 if (GET_CODE (sym) != SYMBOL_REF)
2676 abort ();
2678 return sym;
2682 /* The SH cannot load a large constant into a register, constants have to
2683 come from a pc relative load. The reference of a pc relative load
2684 instruction must be less than 1k infront of the instruction. This
2685 means that we often have to dump a constant inside a function, and
2686 generate code to branch around it.
2688 It is important to minimize this, since the branches will slow things
2689 down and make things bigger.
2691 Worst case code looks like:
2693 mov.l L1,rn
2694 bra L2
2696 align
2697 L1: .long value
2701 mov.l L3,rn
2702 bra L4
2704 align
2705 L3: .long value
2709 We fix this by performing a scan before scheduling, which notices which
2710 instructions need to have their operands fetched from the constant table
2711 and builds the table.
2713 The algorithm is:
2715 scan, find an instruction which needs a pcrel move. Look forward, find the
2716 last barrier which is within MAX_COUNT bytes of the requirement.
2717 If there isn't one, make one. Process all the instructions between
2718 the find and the barrier.
2720 In the above example, we can tell that L3 is within 1k of L1, so
2721 the first move can be shrunk from the 3 insn+constant sequence into
2722 just 1 insn, and the constant moved to L3 to make:
2724 mov.l L1,rn
2726 mov.l L3,rn
2727 bra L4
2729 align
2730 L3:.long value
2731 L4:.long value
2733 Then the second move becomes the target for the shortening process. */
2735 typedef struct
2737 rtx value; /* Value in table. */
2738 rtx label; /* Label of value. */
2739 rtx wend; /* End of window. */
2740 enum machine_mode mode; /* Mode of value. */
2742 /* True if this constant is accessed as part of a post-increment
2743 sequence. Note that HImode constants are never accessed in this way. */
2744 bool part_of_sequence_p;
2745 } pool_node;
2747 /* The maximum number of constants that can fit into one pool, since
2748 the pc relative range is 0...1020 bytes and constants are at least 4
2749 bytes long. */
2751 #define MAX_POOL_SIZE (1020/4)
2752 static pool_node pool_vector[MAX_POOL_SIZE];
2753 static int pool_size;
2754 static rtx pool_window_label;
2755 static int pool_window_last;
2757 /* ??? If we need a constant in HImode which is the truncated value of a
2758 constant we need in SImode, we could combine the two entries thus saving
2759 two bytes. Is this common enough to be worth the effort of implementing
2760 it? */
2762 /* ??? This stuff should be done at the same time that we shorten branches.
2763 As it is now, we must assume that all branches are the maximum size, and
2764 this causes us to almost always output constant pools sooner than
2765 necessary. */
2767 /* Add a constant to the pool and return its label. */
2769 static rtx
2770 add_constant (rtx x, enum machine_mode mode, rtx last_value)
2772 int i;
2773 rtx lab, new, ref, newref;
2775 /* First see if we've already got it. */
2776 for (i = 0; i < pool_size; i++)
2778 if (x->code == pool_vector[i].value->code
2779 && mode == pool_vector[i].mode)
2781 if (x->code == CODE_LABEL)
2783 if (XINT (x, 3) != XINT (pool_vector[i].value, 3))
2784 continue;
2786 if (rtx_equal_p (x, pool_vector[i].value))
2788 lab = new = 0;
2789 if (! last_value
2790 || ! i
2791 || ! rtx_equal_p (last_value, pool_vector[i-1].value))
2793 new = gen_label_rtx ();
2794 LABEL_REFS (new) = pool_vector[i].label;
2795 pool_vector[i].label = lab = new;
2797 if (lab && pool_window_label)
2799 newref = gen_rtx_LABEL_REF (VOIDmode, pool_window_label);
2800 ref = pool_vector[pool_window_last].wend;
2801 LABEL_NEXTREF (newref) = ref;
2802 pool_vector[pool_window_last].wend = newref;
2804 if (new)
2805 pool_window_label = new;
2806 pool_window_last = i;
2807 return lab;
2812 /* Need a new one. */
2813 pool_vector[pool_size].value = x;
2814 if (last_value && rtx_equal_p (last_value, pool_vector[pool_size - 1].value))
2816 lab = 0;
2817 pool_vector[pool_size - 1].part_of_sequence_p = true;
2819 else
2820 lab = gen_label_rtx ();
2821 pool_vector[pool_size].mode = mode;
2822 pool_vector[pool_size].label = lab;
2823 pool_vector[pool_size].wend = NULL_RTX;
2824 pool_vector[pool_size].part_of_sequence_p = (lab == 0);
2825 if (lab && pool_window_label)
2827 newref = gen_rtx_LABEL_REF (VOIDmode, pool_window_label);
2828 ref = pool_vector[pool_window_last].wend;
2829 LABEL_NEXTREF (newref) = ref;
2830 pool_vector[pool_window_last].wend = newref;
2832 if (lab)
2833 pool_window_label = lab;
2834 pool_window_last = pool_size;
2835 pool_size++;
2836 return lab;
2839 /* Output the literal table. START, if nonzero, is the first instruction
2840 this table is needed for, and also indicates that there is at least one
2841 casesi_worker_2 instruction; We have to emit the operand3 labels from
2842 these insns at a 4-byte aligned position. BARRIER is the barrier
2843 after which we are to place the table. */
2845 static void
2846 dump_table (rtx start, rtx barrier)
2848 rtx scan = barrier;
2849 int i;
2850 int need_align = 1;
2851 rtx lab, ref;
2852 int have_df = 0;
2854 /* Do two passes, first time dump out the HI sized constants. */
2856 for (i = 0; i < pool_size; i++)
2858 pool_node *p = &pool_vector[i];
2860 if (p->mode == HImode)
2862 if (need_align)
2864 scan = emit_insn_after (gen_align_2 (), scan);
2865 need_align = 0;
2867 for (lab = p->label; lab; lab = LABEL_REFS (lab))
2868 scan = emit_label_after (lab, scan);
2869 scan = emit_insn_after (gen_consttable_2 (p->value, const0_rtx),
2870 scan);
2871 for (ref = p->wend; ref; ref = LABEL_NEXTREF (ref))
2873 lab = XEXP (ref, 0);
2874 scan = emit_insn_after (gen_consttable_window_end (lab), scan);
2877 else if (p->mode == DFmode)
2878 have_df = 1;
2881 need_align = 1;
2883 if (start)
2885 scan = emit_insn_after (gen_align_4 (), scan);
2886 need_align = 0;
2887 for (; start != barrier; start = NEXT_INSN (start))
2888 if (GET_CODE (start) == INSN
2889 && recog_memoized (start) == CODE_FOR_casesi_worker_2)
2891 rtx src = SET_SRC (XVECEXP (PATTERN (start), 0, 0));
2892 rtx lab = XEXP (XVECEXP (src, 0, 3), 0);
2894 scan = emit_label_after (lab, scan);
2897 if (TARGET_FMOVD && TARGET_ALIGN_DOUBLE && have_df)
2899 rtx align_insn = NULL_RTX;
2901 scan = emit_label_after (gen_label_rtx (), scan);
2902 scan = emit_insn_after (gen_align_log (GEN_INT (3)), scan);
2903 need_align = 0;
2905 for (i = 0; i < pool_size; i++)
2907 pool_node *p = &pool_vector[i];
2909 switch (p->mode)
2911 case HImode:
2912 break;
2913 case SImode:
2914 case SFmode:
2915 if (align_insn && !p->part_of_sequence_p)
2917 for (lab = p->label; lab; lab = LABEL_REFS (lab))
2918 emit_label_before (lab, align_insn);
2919 emit_insn_before (gen_consttable_4 (p->value, const0_rtx),
2920 align_insn);
2921 for (ref = p->wend; ref; ref = LABEL_NEXTREF (ref))
2923 lab = XEXP (ref, 0);
2924 emit_insn_before (gen_consttable_window_end (lab),
2925 align_insn);
2927 delete_insn (align_insn);
2928 align_insn = NULL_RTX;
2929 continue;
2931 else
2933 for (lab = p->label; lab; lab = LABEL_REFS (lab))
2934 scan = emit_label_after (lab, scan);
2935 scan = emit_insn_after (gen_consttable_4 (p->value,
2936 const0_rtx), scan);
2937 need_align = ! need_align;
2939 break;
2940 case DFmode:
2941 if (need_align)
2943 scan = emit_insn_after (gen_align_log (GEN_INT (3)), scan);
2944 align_insn = scan;
2945 need_align = 0;
2947 case DImode:
2948 for (lab = p->label; lab; lab = LABEL_REFS (lab))
2949 scan = emit_label_after (lab, scan);
2950 scan = emit_insn_after (gen_consttable_8 (p->value, const0_rtx),
2951 scan);
2952 break;
2953 default:
2954 abort ();
2955 break;
2958 if (p->mode != HImode)
2960 for (ref = p->wend; ref; ref = LABEL_NEXTREF (ref))
2962 lab = XEXP (ref, 0);
2963 scan = emit_insn_after (gen_consttable_window_end (lab),
2964 scan);
2969 pool_size = 0;
2972 for (i = 0; i < pool_size; i++)
2974 pool_node *p = &pool_vector[i];
2976 switch (p->mode)
2978 case HImode:
2979 break;
2980 case SImode:
2981 case SFmode:
2982 if (need_align)
2984 need_align = 0;
2985 scan = emit_label_after (gen_label_rtx (), scan);
2986 scan = emit_insn_after (gen_align_4 (), scan);
2988 for (lab = p->label; lab; lab = LABEL_REFS (lab))
2989 scan = emit_label_after (lab, scan);
2990 scan = emit_insn_after (gen_consttable_4 (p->value, const0_rtx),
2991 scan);
2992 break;
2993 case DFmode:
2994 case DImode:
2995 if (need_align)
2997 need_align = 0;
2998 scan = emit_label_after (gen_label_rtx (), scan);
2999 scan = emit_insn_after (gen_align_4 (), scan);
3001 for (lab = p->label; lab; lab = LABEL_REFS (lab))
3002 scan = emit_label_after (lab, scan);
3003 scan = emit_insn_after (gen_consttable_8 (p->value, const0_rtx),
3004 scan);
3005 break;
3006 default:
3007 abort ();
3008 break;
3011 if (p->mode != HImode)
3013 for (ref = p->wend; ref; ref = LABEL_NEXTREF (ref))
3015 lab = XEXP (ref, 0);
3016 scan = emit_insn_after (gen_consttable_window_end (lab), scan);
3021 scan = emit_insn_after (gen_consttable_end (), scan);
3022 scan = emit_barrier_after (scan);
3023 pool_size = 0;
3024 pool_window_label = NULL_RTX;
3025 pool_window_last = 0;
3028 /* Return nonzero if constant would be an ok source for a
3029 mov.w instead of a mov.l. */
3031 static int
3032 hi_const (rtx src)
3034 return (GET_CODE (src) == CONST_INT
3035 && INTVAL (src) >= -32768
3036 && INTVAL (src) <= 32767);
3039 /* Nonzero if the insn is a move instruction which needs to be fixed. */
3041 /* ??? For a DImode/DFmode moves, we don't need to fix it if each half of the
3042 CONST_DOUBLE input value is CONST_OK_FOR_I08. For a SFmode move, we don't
3043 need to fix it if the input value is CONST_OK_FOR_I08. */
3045 static int
3046 broken_move (rtx insn)
3048 if (GET_CODE (insn) == INSN)
3050 rtx pat = PATTERN (insn);
3051 if (GET_CODE (pat) == PARALLEL)
3052 pat = XVECEXP (pat, 0, 0);
3053 if (GET_CODE (pat) == SET
3054 /* We can load any 8 bit value if we don't care what the high
3055 order bits end up as. */
3056 && GET_MODE (SET_DEST (pat)) != QImode
3057 && (CONSTANT_P (SET_SRC (pat))
3058 /* Match mova_const. */
3059 || (GET_CODE (SET_SRC (pat)) == UNSPEC
3060 && XINT (SET_SRC (pat), 1) == UNSPEC_MOVA
3061 && GET_CODE (XVECEXP (SET_SRC (pat), 0, 0)) == CONST))
3062 && ! (TARGET_SH2E
3063 && GET_CODE (SET_SRC (pat)) == CONST_DOUBLE
3064 && (fp_zero_operand (SET_SRC (pat))
3065 || fp_one_operand (SET_SRC (pat)))
3066 /* ??? If this is a -m4 or -m4-single compilation, in general
3067 we don't know the current setting of fpscr, so disable fldi.
3068 There is an exception if this was a register-register move
3069 before reload - and hence it was ascertained that we have
3070 single precision setting - and in a post-reload optimization
3071 we changed this to do a constant load. In that case
3072 we don't have an r0 clobber, hence we must use fldi. */
3073 && (! TARGET_SH4 || TARGET_FMOVD
3074 || (GET_CODE (XEXP (XVECEXP (PATTERN (insn), 0, 2), 0))
3075 == SCRATCH))
3076 && GET_CODE (SET_DEST (pat)) == REG
3077 && FP_REGISTER_P (REGNO (SET_DEST (pat))))
3078 && ! (TARGET_SH2A
3079 && GET_MODE (SET_DEST (pat)) == SImode
3080 && GET_CODE (SET_SRC (pat)) == CONST_INT
3081 && CONST_OK_FOR_I20 (INTVAL (SET_SRC (pat))))
3082 && (GET_CODE (SET_SRC (pat)) != CONST_INT
3083 || ! CONST_OK_FOR_I08 (INTVAL (SET_SRC (pat)))))
3084 return 1;
3087 return 0;
3090 static int
3091 mova_p (rtx insn)
3093 return (GET_CODE (insn) == INSN
3094 && GET_CODE (PATTERN (insn)) == SET
3095 && GET_CODE (SET_SRC (PATTERN (insn))) == UNSPEC
3096 && XINT (SET_SRC (PATTERN (insn)), 1) == UNSPEC_MOVA
3097 /* Don't match mova_const. */
3098 && GET_CODE (XVECEXP (SET_SRC (PATTERN (insn)), 0, 0)) == LABEL_REF);
3101 /* Fix up a mova from a switch that went out of range. */
3102 static void
3103 fixup_mova (rtx mova)
3105 if (! flag_pic)
3107 SET_SRC (PATTERN (mova)) = XVECEXP (SET_SRC (PATTERN (mova)), 0, 0);
3108 INSN_CODE (mova) = -1;
3110 else
3112 rtx worker = mova;
3113 rtx lab = gen_label_rtx ();
3114 rtx wpat, wpat0, wpat1, wsrc, diff;
3118 worker = NEXT_INSN (worker);
3119 if (! worker
3120 || GET_CODE (worker) == CODE_LABEL
3121 || GET_CODE (worker) == JUMP_INSN)
3122 abort ();
3123 } while (recog_memoized (worker) != CODE_FOR_casesi_worker_1);
3124 wpat = PATTERN (worker);
3125 wpat0 = XVECEXP (wpat, 0, 0);
3126 wpat1 = XVECEXP (wpat, 0, 1);
3127 wsrc = SET_SRC (wpat0);
3128 PATTERN (worker) = (gen_casesi_worker_2
3129 (SET_DEST (wpat0), XVECEXP (wsrc, 0, 1),
3130 XEXP (XVECEXP (wsrc, 0, 2), 0), lab,
3131 XEXP (wpat1, 0)));
3132 INSN_CODE (worker) = -1;
3133 diff = gen_rtx_MINUS (Pmode, XVECEXP (SET_SRC (PATTERN (mova)), 0, 0),
3134 gen_rtx_LABEL_REF (Pmode, lab));
3135 diff = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, diff), UNSPEC_PIC);
3136 SET_SRC (PATTERN (mova)) = gen_rtx_CONST (Pmode, diff);
3137 INSN_CODE (mova) = -1;
3141 /* Find the last barrier from insn FROM which is close enough to hold the
3142 constant pool. If we can't find one, then create one near the end of
3143 the range. */
3145 static rtx
3146 find_barrier (int num_mova, rtx mova, rtx from)
3148 int count_si = 0;
3149 int count_hi = 0;
3150 int found_hi = 0;
3151 int found_si = 0;
3152 int found_di = 0;
3153 int hi_align = 2;
3154 int si_align = 2;
3155 int leading_mova = num_mova;
3156 rtx barrier_before_mova = 0, found_barrier = 0, good_barrier = 0;
3157 int si_limit;
3158 int hi_limit;
3160 /* For HImode: range is 510, add 4 because pc counts from address of
3161 second instruction after this one, subtract 2 for the jump instruction
3162 that we may need to emit before the table, subtract 2 for the instruction
3163 that fills the jump delay slot (in very rare cases, reorg will take an
3164 instruction from after the constant pool or will leave the delay slot
3165 empty). This gives 510.
3166 For SImode: range is 1020, add 4 because pc counts from address of
3167 second instruction after this one, subtract 2 in case pc is 2 byte
3168 aligned, subtract 2 for the jump instruction that we may need to emit
3169 before the table, subtract 2 for the instruction that fills the jump
3170 delay slot. This gives 1018. */
3172 /* The branch will always be shortened now that the reference address for
3173 forward branches is the successor address, thus we need no longer make
3174 adjustments to the [sh]i_limit for -O0. */
3176 si_limit = 1018;
3177 hi_limit = 510;
3179 while (from && count_si < si_limit && count_hi < hi_limit)
3181 int inc = get_attr_length (from);
3182 int new_align = 1;
3184 if (GET_CODE (from) == CODE_LABEL)
3186 if (optimize)
3187 new_align = 1 << label_to_alignment (from);
3188 else if (GET_CODE (prev_nonnote_insn (from)) == BARRIER)
3189 new_align = 1 << barrier_align (from);
3190 else
3191 new_align = 1;
3192 inc = 0;
3195 if (GET_CODE (from) == BARRIER)
3198 found_barrier = from;
3200 /* If we are at the end of the function, or in front of an alignment
3201 instruction, we need not insert an extra alignment. We prefer
3202 this kind of barrier. */
3203 if (barrier_align (from) > 2)
3204 good_barrier = from;
3207 if (broken_move (from))
3209 rtx pat, src, dst;
3210 enum machine_mode mode;
3212 pat = PATTERN (from);
3213 if (GET_CODE (pat) == PARALLEL)
3214 pat = XVECEXP (pat, 0, 0);
3215 src = SET_SRC (pat);
3216 dst = SET_DEST (pat);
3217 mode = GET_MODE (dst);
3219 /* We must explicitly check the mode, because sometimes the
3220 front end will generate code to load unsigned constants into
3221 HImode targets without properly sign extending them. */
3222 if (mode == HImode
3223 || (mode == SImode && hi_const (src) && REGNO (dst) != FPUL_REG))
3225 found_hi += 2;
3226 /* We put the short constants before the long constants, so
3227 we must count the length of short constants in the range
3228 for the long constants. */
3229 /* ??? This isn't optimal, but is easy to do. */
3230 si_limit -= 2;
3232 else
3234 /* We dump DF/DI constants before SF/SI ones, because
3235 the limit is the same, but the alignment requirements
3236 are higher. We may waste up to 4 additional bytes
3237 for alignment, and the DF/DI constant may have
3238 another SF/SI constant placed before it. */
3239 if (TARGET_SHCOMPACT
3240 && ! found_di
3241 && (mode == DFmode || mode == DImode))
3243 found_di = 1;
3244 si_limit -= 8;
3246 while (si_align > 2 && found_si + si_align - 2 > count_si)
3247 si_align >>= 1;
3248 if (found_si > count_si)
3249 count_si = found_si;
3250 found_si += GET_MODE_SIZE (mode);
3251 if (num_mova)
3252 si_limit -= GET_MODE_SIZE (mode);
3255 /* See the code in machine_dependent_reorg, which has a similar if
3256 statement that generates a new mova insn in many cases. */
3257 if (GET_CODE (dst) == REG && FP_ANY_REGISTER_P (REGNO (dst)))
3258 inc += 2;
3261 if (mova_p (from))
3263 if (! num_mova++)
3265 leading_mova = 0;
3266 mova = from;
3267 barrier_before_mova = good_barrier ? good_barrier : found_barrier;
3269 if (found_si > count_si)
3270 count_si = found_si;
3272 else if (GET_CODE (from) == JUMP_INSN
3273 && (GET_CODE (PATTERN (from)) == ADDR_VEC
3274 || GET_CODE (PATTERN (from)) == ADDR_DIFF_VEC))
3276 if (num_mova)
3277 num_mova--;
3278 if (barrier_align (next_real_insn (from)) == align_jumps_log)
3280 /* We have just passed the barrier in front of the
3281 ADDR_DIFF_VEC, which is stored in found_barrier. Since
3282 the ADDR_DIFF_VEC is accessed as data, just like our pool
3283 constants, this is a good opportunity to accommodate what
3284 we have gathered so far.
3285 If we waited any longer, we could end up at a barrier in
3286 front of code, which gives worse cache usage for separated
3287 instruction / data caches. */
3288 good_barrier = found_barrier;
3289 break;
3291 else
3293 rtx body = PATTERN (from);
3294 inc = XVECLEN (body, 1) * GET_MODE_SIZE (GET_MODE (body));
3297 /* For the SH1, we generate alignments even after jumps-around-jumps. */
3298 else if (GET_CODE (from) == JUMP_INSN
3299 && ! TARGET_SH2
3300 && ! TARGET_SMALLCODE)
3301 new_align = 4;
3303 if (found_si)
3305 count_si += inc;
3306 if (new_align > si_align)
3308 si_limit -= (count_si - 1) & (new_align - si_align);
3309 si_align = new_align;
3311 count_si = (count_si + new_align - 1) & -new_align;
3313 if (found_hi)
3315 count_hi += inc;
3316 if (new_align > hi_align)
3318 hi_limit -= (count_hi - 1) & (new_align - hi_align);
3319 hi_align = new_align;
3321 count_hi = (count_hi + new_align - 1) & -new_align;
3323 from = NEXT_INSN (from);
3326 if (num_mova)
3328 if (leading_mova)
3330 /* Try as we might, the leading mova is out of range. Change
3331 it into a load (which will become a pcload) and retry. */
3332 fixup_mova (mova);
3333 return find_barrier (0, 0, mova);
3335 else
3337 /* Insert the constant pool table before the mova instruction,
3338 to prevent the mova label reference from going out of range. */
3339 from = mova;
3340 good_barrier = found_barrier = barrier_before_mova;
3344 if (found_barrier)
3346 if (good_barrier && next_real_insn (found_barrier))
3347 found_barrier = good_barrier;
3349 else
3351 /* We didn't find a barrier in time to dump our stuff,
3352 so we'll make one. */
3353 rtx label = gen_label_rtx ();
3355 /* If we exceeded the range, then we must back up over the last
3356 instruction we looked at. Otherwise, we just need to undo the
3357 NEXT_INSN at the end of the loop. */
3358 if (count_hi > hi_limit || count_si > si_limit)
3359 from = PREV_INSN (PREV_INSN (from));
3360 else
3361 from = PREV_INSN (from);
3363 /* Walk back to be just before any jump or label.
3364 Putting it before a label reduces the number of times the branch
3365 around the constant pool table will be hit. Putting it before
3366 a jump makes it more likely that the bra delay slot will be
3367 filled. */
3368 while (GET_CODE (from) == JUMP_INSN || GET_CODE (from) == NOTE
3369 || GET_CODE (from) == CODE_LABEL)
3370 from = PREV_INSN (from);
3372 from = emit_jump_insn_after (gen_jump (label), from);
3373 JUMP_LABEL (from) = label;
3374 LABEL_NUSES (label) = 1;
3375 found_barrier = emit_barrier_after (from);
3376 emit_label_after (label, found_barrier);
3379 return found_barrier;
3382 /* If the instruction INSN is implemented by a special function, and we can
3383 positively find the register that is used to call the sfunc, and this
3384 register is not used anywhere else in this instruction - except as the
3385 destination of a set, return this register; else, return 0. */
3387 sfunc_uses_reg (rtx insn)
3389 int i;
3390 rtx pattern, part, reg_part, reg;
3392 if (GET_CODE (insn) != INSN)
3393 return 0;
3394 pattern = PATTERN (insn);
3395 if (GET_CODE (pattern) != PARALLEL || get_attr_type (insn) != TYPE_SFUNC)
3396 return 0;
3398 for (reg_part = 0, i = XVECLEN (pattern, 0) - 1; i >= 1; i--)
3400 part = XVECEXP (pattern, 0, i);
3401 if (GET_CODE (part) == USE && GET_MODE (XEXP (part, 0)) == SImode)
3402 reg_part = part;
3404 if (! reg_part)
3405 return 0;
3406 reg = XEXP (reg_part, 0);
3407 for (i = XVECLEN (pattern, 0) - 1; i >= 0; i--)
3409 part = XVECEXP (pattern, 0, i);
3410 if (part == reg_part || GET_CODE (part) == CLOBBER)
3411 continue;
3412 if (reg_mentioned_p (reg, ((GET_CODE (part) == SET
3413 && GET_CODE (SET_DEST (part)) == REG)
3414 ? SET_SRC (part) : part)))
3415 return 0;
3417 return reg;
3420 /* See if the only way in which INSN uses REG is by calling it, or by
3421 setting it while calling it. Set *SET to a SET rtx if the register
3422 is set by INSN. */
3424 static int
3425 noncall_uses_reg (rtx reg, rtx insn, rtx *set)
3427 rtx pattern, reg2;
3429 *set = NULL_RTX;
3431 reg2 = sfunc_uses_reg (insn);
3432 if (reg2 && REGNO (reg2) == REGNO (reg))
3434 pattern = single_set (insn);
3435 if (pattern
3436 && GET_CODE (SET_DEST (pattern)) == REG
3437 && REGNO (reg) == REGNO (SET_DEST (pattern)))
3438 *set = pattern;
3439 return 0;
3441 if (GET_CODE (insn) != CALL_INSN)
3443 /* We don't use rtx_equal_p because we don't care if the mode is
3444 different. */
3445 pattern = single_set (insn);
3446 if (pattern
3447 && GET_CODE (SET_DEST (pattern)) == REG
3448 && REGNO (reg) == REGNO (SET_DEST (pattern)))
3450 rtx par, part;
3451 int i;
3453 *set = pattern;
3454 par = PATTERN (insn);
3455 if (GET_CODE (par) == PARALLEL)
3456 for (i = XVECLEN (par, 0) - 1; i >= 0; i--)
3458 part = XVECEXP (par, 0, i);
3459 if (GET_CODE (part) != SET && reg_mentioned_p (reg, part))
3460 return 1;
3462 return reg_mentioned_p (reg, SET_SRC (pattern));
3465 return 1;
3468 pattern = PATTERN (insn);
3470 if (GET_CODE (pattern) == PARALLEL)
3472 int i;
3474 for (i = XVECLEN (pattern, 0) - 1; i >= 1; i--)
3475 if (reg_mentioned_p (reg, XVECEXP (pattern, 0, i)))
3476 return 1;
3477 pattern = XVECEXP (pattern, 0, 0);
3480 if (GET_CODE (pattern) == SET)
3482 if (reg_mentioned_p (reg, SET_DEST (pattern)))
3484 /* We don't use rtx_equal_p, because we don't care if the
3485 mode is different. */
3486 if (GET_CODE (SET_DEST (pattern)) != REG
3487 || REGNO (reg) != REGNO (SET_DEST (pattern)))
3488 return 1;
3490 *set = pattern;
3493 pattern = SET_SRC (pattern);
3496 if (GET_CODE (pattern) != CALL
3497 || GET_CODE (XEXP (pattern, 0)) != MEM
3498 || ! rtx_equal_p (reg, XEXP (XEXP (pattern, 0), 0)))
3499 return 1;
3501 return 0;
3504 /* Given a X, a pattern of an insn or a part of it, return a mask of used
3505 general registers. Bits 0..15 mean that the respective registers
3506 are used as inputs in the instruction. Bits 16..31 mean that the
3507 registers 0..15, respectively, are used as outputs, or are clobbered.
3508 IS_DEST should be set to 16 if X is the destination of a SET, else to 0. */
3510 regs_used (rtx x, int is_dest)
3512 enum rtx_code code;
3513 const char *fmt;
3514 int i, used = 0;
3516 if (! x)
3517 return used;
3518 code = GET_CODE (x);
3519 switch (code)
3521 case REG:
3522 if (REGNO (x) < 16)
3523 return (((1 << HARD_REGNO_NREGS (0, GET_MODE (x))) - 1)
3524 << (REGNO (x) + is_dest));
3525 return 0;
3526 case SUBREG:
3528 rtx y = SUBREG_REG (x);
3530 if (GET_CODE (y) != REG)
3531 break;
3532 if (REGNO (y) < 16)
3533 return (((1 << HARD_REGNO_NREGS (0, GET_MODE (x))) - 1)
3534 << (REGNO (y) +
3535 subreg_regno_offset (REGNO (y),
3536 GET_MODE (y),
3537 SUBREG_BYTE (x),
3538 GET_MODE (x)) + is_dest));
3539 return 0;
3541 case SET:
3542 return regs_used (SET_SRC (x), 0) | regs_used (SET_DEST (x), 16);
3543 case RETURN:
3544 /* If there was a return value, it must have been indicated with USE. */
3545 return 0x00ffff00;
3546 case CLOBBER:
3547 is_dest = 1;
3548 break;
3549 case MEM:
3550 is_dest = 0;
3551 break;
3552 case CALL:
3553 used |= 0x00ff00f0;
3554 break;
3555 default:
3556 break;
3559 fmt = GET_RTX_FORMAT (code);
3561 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
3563 if (fmt[i] == 'E')
3565 register int j;
3566 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
3567 used |= regs_used (XVECEXP (x, i, j), is_dest);
3569 else if (fmt[i] == 'e')
3570 used |= regs_used (XEXP (x, i), is_dest);
3572 return used;
3575 /* Create an instruction that prevents redirection of a conditional branch
3576 to the destination of the JUMP with address ADDR.
3577 If the branch needs to be implemented as an indirect jump, try to find
3578 a scratch register for it.
3579 If NEED_BLOCK is 0, don't do anything unless we need a scratch register.
3580 If any preceding insn that doesn't fit into a delay slot is good enough,
3581 pass 1. Pass 2 if a definite blocking insn is needed.
3582 -1 is used internally to avoid deep recursion.
3583 If a blocking instruction is made or recognized, return it. */
3585 static rtx
3586 gen_block_redirect (rtx jump, int addr, int need_block)
3588 int dead = 0;
3589 rtx prev = prev_nonnote_insn (jump);
3590 rtx dest;
3592 /* First, check if we already have an instruction that satisfies our need. */
3593 if (prev && GET_CODE (prev) == INSN && ! INSN_DELETED_P (prev))
3595 if (INSN_CODE (prev) == CODE_FOR_indirect_jump_scratch)
3596 return prev;
3597 if (GET_CODE (PATTERN (prev)) == USE
3598 || GET_CODE (PATTERN (prev)) == CLOBBER
3599 || get_attr_in_delay_slot (prev) == IN_DELAY_SLOT_YES)
3600 prev = jump;
3601 else if ((need_block &= ~1) < 0)
3602 return prev;
3603 else if (recog_memoized (prev) == CODE_FOR_block_branch_redirect)
3604 need_block = 0;
3606 if (GET_CODE (PATTERN (jump)) == RETURN)
3608 if (! need_block)
3609 return prev;
3610 /* Reorg even does nasty things with return insns that cause branches
3611 to go out of range - see find_end_label and callers. */
3612 return emit_insn_before (gen_block_branch_redirect (const0_rtx) , jump);
3614 /* We can't use JUMP_LABEL here because it might be undefined
3615 when not optimizing. */
3616 dest = XEXP (SET_SRC (PATTERN (jump)), 0);
3617 /* If the branch is out of range, try to find a scratch register for it. */
3618 if (optimize
3619 && (INSN_ADDRESSES (INSN_UID (dest)) - addr + (unsigned) 4092
3620 > 4092 + 4098))
3622 rtx scan;
3623 /* Don't look for the stack pointer as a scratch register,
3624 it would cause trouble if an interrupt occurred. */
3625 unsigned try = 0x7fff, used;
3626 int jump_left = flag_expensive_optimizations + 1;
3628 /* It is likely that the most recent eligible instruction is wanted for
3629 the delay slot. Therefore, find out which registers it uses, and
3630 try to avoid using them. */
3632 for (scan = jump; (scan = PREV_INSN (scan)); )
3634 enum rtx_code code;
3636 if (INSN_DELETED_P (scan))
3637 continue;
3638 code = GET_CODE (scan);
3639 if (code == CODE_LABEL || code == JUMP_INSN)
3640 break;
3641 if (code == INSN
3642 && GET_CODE (PATTERN (scan)) != USE
3643 && GET_CODE (PATTERN (scan)) != CLOBBER
3644 && get_attr_in_delay_slot (scan) == IN_DELAY_SLOT_YES)
3646 try &= ~regs_used (PATTERN (scan), 0);
3647 break;
3650 for (used = dead = 0, scan = JUMP_LABEL (jump);
3651 (scan = NEXT_INSN (scan)); )
3653 enum rtx_code code;
3655 if (INSN_DELETED_P (scan))
3656 continue;
3657 code = GET_CODE (scan);
3658 if (INSN_P (scan))
3660 used |= regs_used (PATTERN (scan), 0);
3661 if (code == CALL_INSN)
3662 used |= regs_used (CALL_INSN_FUNCTION_USAGE (scan), 0);
3663 dead |= (used >> 16) & ~used;
3664 if (dead & try)
3666 dead &= try;
3667 break;
3669 if (code == JUMP_INSN)
3671 if (jump_left-- && simplejump_p (scan))
3672 scan = JUMP_LABEL (scan);
3673 else
3674 break;
3678 /* Mask out the stack pointer again, in case it was
3679 the only 'free' register we have found. */
3680 dead &= 0x7fff;
3682 /* If the immediate destination is still in range, check for possible
3683 threading with a jump beyond the delay slot insn.
3684 Don't check if we are called recursively; the jump has been or will be
3685 checked in a different invocation then. */
3687 else if (optimize && need_block >= 0)
3689 rtx next = next_active_insn (next_active_insn (dest));
3690 if (next && GET_CODE (next) == JUMP_INSN
3691 && GET_CODE (PATTERN (next)) == SET
3692 && recog_memoized (next) == CODE_FOR_jump_compact)
3694 dest = JUMP_LABEL (next);
3695 if (dest
3696 && (INSN_ADDRESSES (INSN_UID (dest)) - addr + (unsigned) 4092
3697 > 4092 + 4098))
3698 gen_block_redirect (next, INSN_ADDRESSES (INSN_UID (next)), -1);
3702 if (dead)
3704 rtx reg = gen_rtx_REG (SImode, exact_log2 (dead & -dead));
3706 /* It would be nice if we could convert the jump into an indirect
3707 jump / far branch right now, and thus exposing all constituent
3708 instructions to further optimization. However, reorg uses
3709 simplejump_p to determine if there is an unconditional jump where
3710 it should try to schedule instructions from the target of the
3711 branch; simplejump_p fails for indirect jumps even if they have
3712 a JUMP_LABEL. */
3713 rtx insn = emit_insn_before (gen_indirect_jump_scratch
3714 (reg, GEN_INT (INSN_UID (JUMP_LABEL (jump))))
3715 , jump);
3716 /* ??? We would like this to have the scope of the jump, but that
3717 scope will change when a delay slot insn of an inner scope is added.
3718 Hence, after delay slot scheduling, we'll have to expect
3719 NOTE_INSN_BLOCK_END notes between the indirect_jump_scratch and
3720 the jump. */
3722 INSN_LOCATOR (insn) = INSN_LOCATOR (jump);
3723 INSN_CODE (insn) = CODE_FOR_indirect_jump_scratch;
3724 return insn;
3726 else if (need_block)
3727 /* We can't use JUMP_LABEL here because it might be undefined
3728 when not optimizing. */
3729 return emit_insn_before (gen_block_branch_redirect
3730 (GEN_INT (INSN_UID (XEXP (SET_SRC (PATTERN (jump)), 0))))
3731 , jump);
3732 return prev;
3735 #define CONDJUMP_MIN -252
3736 #define CONDJUMP_MAX 262
3737 struct far_branch
3739 /* A label (to be placed) in front of the jump
3740 that jumps to our ultimate destination. */
3741 rtx near_label;
3742 /* Where we are going to insert it if we cannot move the jump any farther,
3743 or the jump itself if we have picked up an existing jump. */
3744 rtx insert_place;
3745 /* The ultimate destination. */
3746 rtx far_label;
3747 struct far_branch *prev;
3748 /* If the branch has already been created, its address;
3749 else the address of its first prospective user. */
3750 int address;
3753 static void gen_far_branch (struct far_branch *);
3754 enum mdep_reorg_phase_e mdep_reorg_phase;
3755 static void
3756 gen_far_branch (struct far_branch *bp)
3758 rtx insn = bp->insert_place;
3759 rtx jump;
3760 rtx label = gen_label_rtx ();
3762 emit_label_after (label, insn);
3763 if (bp->far_label)
3765 jump = emit_jump_insn_after (gen_jump (bp->far_label), insn);
3766 LABEL_NUSES (bp->far_label)++;
3768 else
3769 jump = emit_jump_insn_after (gen_return (), insn);
3770 /* Emit a barrier so that reorg knows that any following instructions
3771 are not reachable via a fall-through path.
3772 But don't do this when not optimizing, since we wouldn't suppress the
3773 alignment for the barrier then, and could end up with out-of-range
3774 pc-relative loads. */
3775 if (optimize)
3776 emit_barrier_after (jump);
3777 emit_label_after (bp->near_label, insn);
3778 JUMP_LABEL (jump) = bp->far_label;
3779 if (! invert_jump (insn, label, 1))
3780 abort ();
3781 /* If we are branching around a jump (rather than a return), prevent
3782 reorg from using an insn from the jump target as the delay slot insn -
3783 when reorg did this, it pessimized code (we rather hide the delay slot)
3784 and it could cause branches to go out of range. */
3785 if (bp->far_label)
3786 (emit_insn_after
3787 (gen_stuff_delay_slot
3788 (GEN_INT (INSN_UID (XEXP (SET_SRC (PATTERN (jump)), 0))),
3789 GEN_INT (recog_memoized (insn) == CODE_FOR_branch_false)),
3790 insn));
3791 /* Prevent reorg from undoing our splits. */
3792 gen_block_redirect (jump, bp->address += 2, 2);
3795 /* Fix up ADDR_DIFF_VECs. */
3796 void
3797 fixup_addr_diff_vecs (rtx first)
3799 rtx insn;
3801 for (insn = first; insn; insn = NEXT_INSN (insn))
3803 rtx vec_lab, pat, prev, prevpat, x, braf_label;
3805 if (GET_CODE (insn) != JUMP_INSN
3806 || GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
3807 continue;
3808 pat = PATTERN (insn);
3809 vec_lab = XEXP (XEXP (pat, 0), 0);
3811 /* Search the matching casesi_jump_2. */
3812 for (prev = vec_lab; ; prev = PREV_INSN (prev))
3814 if (GET_CODE (prev) != JUMP_INSN)
3815 continue;
3816 prevpat = PATTERN (prev);
3817 if (GET_CODE (prevpat) != PARALLEL || XVECLEN (prevpat, 0) != 2)
3818 continue;
3819 x = XVECEXP (prevpat, 0, 1);
3820 if (GET_CODE (x) != USE)
3821 continue;
3822 x = XEXP (x, 0);
3823 if (GET_CODE (x) == LABEL_REF && XEXP (x, 0) == vec_lab)
3824 break;
3826 /* FIXME: This is a bug in the optimizer, but it seems harmless
3827 to just avoid panicing. */
3828 if (!prev)
3829 continue;
3831 /* Emit the reference label of the braf where it belongs, right after
3832 the casesi_jump_2 (i.e. braf). */
3833 braf_label = XEXP (XEXP (SET_SRC (XVECEXP (prevpat, 0, 0)), 1), 0);
3834 emit_label_after (braf_label, prev);
3836 /* Fix up the ADDR_DIF_VEC to be relative
3837 to the reference address of the braf. */
3838 XEXP (XEXP (pat, 0), 0) = braf_label;
3842 /* BARRIER_OR_LABEL is either a BARRIER or a CODE_LABEL immediately following
3843 a barrier. Return the base 2 logarithm of the desired alignment. */
3845 barrier_align (rtx barrier_or_label)
3847 rtx next = next_real_insn (barrier_or_label), pat, prev;
3848 int slot, credit, jump_to_next = 0;
3850 if (! next)
3851 return 0;
3853 pat = PATTERN (next);
3855 if (GET_CODE (pat) == ADDR_DIFF_VEC)
3856 return 2;
3858 if (GET_CODE (pat) == UNSPEC_VOLATILE && XINT (pat, 1) == UNSPECV_ALIGN)
3859 /* This is a barrier in front of a constant table. */
3860 return 0;
3862 prev = prev_real_insn (barrier_or_label);
3863 if (GET_CODE (PATTERN (prev)) == ADDR_DIFF_VEC)
3865 pat = PATTERN (prev);
3866 /* If this is a very small table, we want to keep the alignment after
3867 the table to the minimum for proper code alignment. */
3868 return ((TARGET_SMALLCODE
3869 || ((unsigned) XVECLEN (pat, 1) * GET_MODE_SIZE (GET_MODE (pat))
3870 <= (unsigned) 1 << (CACHE_LOG - 2)))
3871 ? 1 << TARGET_SHMEDIA : align_jumps_log);
3874 if (TARGET_SMALLCODE)
3875 return 0;
3877 if (! TARGET_SH2 || ! optimize)
3878 return align_jumps_log;
3880 /* When fixing up pcloads, a constant table might be inserted just before
3881 the basic block that ends with the barrier. Thus, we can't trust the
3882 instruction lengths before that. */
3883 if (mdep_reorg_phase > SH_FIXUP_PCLOAD)
3885 /* Check if there is an immediately preceding branch to the insn beyond
3886 the barrier. We must weight the cost of discarding useful information
3887 from the current cache line when executing this branch and there is
3888 an alignment, against that of fetching unneeded insn in front of the
3889 branch target when there is no alignment. */
3891 /* There are two delay_slot cases to consider. One is the simple case
3892 where the preceding branch is to the insn beyond the barrier (simple
3893 delay slot filling), and the other is where the preceding branch has
3894 a delay slot that is a duplicate of the insn after the barrier
3895 (fill_eager_delay_slots) and the branch is to the insn after the insn
3896 after the barrier. */
3898 /* PREV is presumed to be the JUMP_INSN for the barrier under
3899 investigation. Skip to the insn before it. */
3900 prev = prev_real_insn (prev);
3902 for (slot = 2, credit = (1 << (CACHE_LOG - 2)) + 2;
3903 credit >= 0 && prev && GET_CODE (prev) == INSN;
3904 prev = prev_real_insn (prev))
3906 jump_to_next = 0;
3907 if (GET_CODE (PATTERN (prev)) == USE
3908 || GET_CODE (PATTERN (prev)) == CLOBBER)
3909 continue;
3910 if (GET_CODE (PATTERN (prev)) == SEQUENCE)
3912 prev = XVECEXP (PATTERN (prev), 0, 1);
3913 if (INSN_UID (prev) == INSN_UID (next))
3915 /* Delay slot was filled with insn at jump target. */
3916 jump_to_next = 1;
3917 continue;
3921 if (slot &&
3922 get_attr_in_delay_slot (prev) == IN_DELAY_SLOT_YES)
3923 slot = 0;
3924 credit -= get_attr_length (prev);
3926 if (prev
3927 && GET_CODE (prev) == JUMP_INSN
3928 && JUMP_LABEL (prev))
3930 rtx x;
3931 if (jump_to_next
3932 || next_real_insn (JUMP_LABEL (prev)) == next
3933 /* If relax_delay_slots() decides NEXT was redundant
3934 with some previous instruction, it will have
3935 redirected PREV's jump to the following insn. */
3936 || JUMP_LABEL (prev) == next_nonnote_insn (next)
3937 /* There is no upper bound on redundant instructions
3938 that might have been skipped, but we must not put an
3939 alignment where none had been before. */
3940 || (x = (NEXT_INSN (NEXT_INSN (PREV_INSN (prev)))),
3941 (INSN_P (x)
3942 && (INSN_CODE (x) == CODE_FOR_block_branch_redirect
3943 || INSN_CODE (x) == CODE_FOR_indirect_jump_scratch
3944 || INSN_CODE (x) == CODE_FOR_stuff_delay_slot))))
3946 rtx pat = PATTERN (prev);
3947 if (GET_CODE (pat) == PARALLEL)
3948 pat = XVECEXP (pat, 0, 0);
3949 if (credit - slot >= (GET_CODE (SET_SRC (pat)) == PC ? 2 : 0))
3950 return 0;
3955 return align_jumps_log;
3958 /* If we are inside a phony loop, almost any kind of label can turn up as the
3959 first one in the loop. Aligning a braf label causes incorrect switch
3960 destination addresses; we can detect braf labels because they are
3961 followed by a BARRIER.
3962 Applying loop alignment to small constant or switch tables is a waste
3963 of space, so we suppress this too. */
3965 sh_loop_align (rtx label)
3967 rtx next = label;
3970 next = next_nonnote_insn (next);
3971 while (next && GET_CODE (next) == CODE_LABEL);
3973 if (! next
3974 || ! INSN_P (next)
3975 || GET_CODE (PATTERN (next)) == ADDR_DIFF_VEC
3976 || recog_memoized (next) == CODE_FOR_consttable_2)
3977 return 0;
3979 return align_loops_log;
3982 /* Do a final pass over the function, just before delayed branch
3983 scheduling. */
3985 static void
3986 sh_reorg (void)
3988 rtx first, insn, mova = NULL_RTX;
3989 int num_mova;
3990 rtx r0_rtx = gen_rtx_REG (Pmode, 0);
3991 rtx r0_inc_rtx = gen_rtx_POST_INC (Pmode, r0_rtx);
3993 first = get_insns ();
3995 /* We must split call insns before introducing `mova's. If we're
3996 optimizing, they'll have already been split. Otherwise, make
3997 sure we don't split them too late. */
3998 if (! optimize)
3999 split_all_insns_noflow ();
4001 if (TARGET_SHMEDIA)
4002 return;
4004 /* If relaxing, generate pseudo-ops to associate function calls with
4005 the symbols they call. It does no harm to not generate these
4006 pseudo-ops. However, when we can generate them, it enables to
4007 linker to potentially relax the jsr to a bsr, and eliminate the
4008 register load and, possibly, the constant pool entry. */
4010 mdep_reorg_phase = SH_INSERT_USES_LABELS;
4011 if (TARGET_RELAX)
4013 /* Remove all REG_LABEL notes. We want to use them for our own
4014 purposes. This works because none of the remaining passes
4015 need to look at them.
4017 ??? But it may break in the future. We should use a machine
4018 dependent REG_NOTE, or some other approach entirely. */
4019 for (insn = first; insn; insn = NEXT_INSN (insn))
4021 if (INSN_P (insn))
4023 rtx note;
4025 while ((note = find_reg_note (insn, REG_LABEL, NULL_RTX)) != 0)
4026 remove_note (insn, note);
4030 for (insn = first; insn; insn = NEXT_INSN (insn))
4032 rtx pattern, reg, link, set, scan, dies, label;
4033 int rescan = 0, foundinsn = 0;
4035 if (GET_CODE (insn) == CALL_INSN)
4037 pattern = PATTERN (insn);
4039 if (GET_CODE (pattern) == PARALLEL)
4040 pattern = XVECEXP (pattern, 0, 0);
4041 if (GET_CODE (pattern) == SET)
4042 pattern = SET_SRC (pattern);
4044 if (GET_CODE (pattern) != CALL
4045 || GET_CODE (XEXP (pattern, 0)) != MEM)
4046 continue;
4048 reg = XEXP (XEXP (pattern, 0), 0);
4050 else
4052 reg = sfunc_uses_reg (insn);
4053 if (! reg)
4054 continue;
4057 if (GET_CODE (reg) != REG)
4058 continue;
4060 /* This is a function call via REG. If the only uses of REG
4061 between the time that it is set and the time that it dies
4062 are in function calls, then we can associate all the
4063 function calls with the setting of REG. */
4065 for (link = LOG_LINKS (insn); link; link = XEXP (link, 1))
4067 if (REG_NOTE_KIND (link) != 0)
4068 continue;
4069 set = single_set (XEXP (link, 0));
4070 if (set && rtx_equal_p (reg, SET_DEST (set)))
4072 link = XEXP (link, 0);
4073 break;
4077 if (! link)
4079 /* ??? Sometimes global register allocation will have
4080 deleted the insn pointed to by LOG_LINKS. Try
4081 scanning backward to find where the register is set. */
4082 for (scan = PREV_INSN (insn);
4083 scan && GET_CODE (scan) != CODE_LABEL;
4084 scan = PREV_INSN (scan))
4086 if (! INSN_P (scan))
4087 continue;
4089 if (! reg_mentioned_p (reg, scan))
4090 continue;
4092 if (noncall_uses_reg (reg, scan, &set))
4093 break;
4095 if (set)
4097 link = scan;
4098 break;
4103 if (! link)
4104 continue;
4106 /* The register is set at LINK. */
4108 /* We can only optimize the function call if the register is
4109 being set to a symbol. In theory, we could sometimes
4110 optimize calls to a constant location, but the assembler
4111 and linker do not support that at present. */
4112 if (GET_CODE (SET_SRC (set)) != SYMBOL_REF
4113 && GET_CODE (SET_SRC (set)) != LABEL_REF)
4114 continue;
4116 /* Scan forward from LINK to the place where REG dies, and
4117 make sure that the only insns which use REG are
4118 themselves function calls. */
4120 /* ??? This doesn't work for call targets that were allocated
4121 by reload, since there may not be a REG_DEAD note for the
4122 register. */
4124 dies = NULL_RTX;
4125 for (scan = NEXT_INSN (link); scan; scan = NEXT_INSN (scan))
4127 rtx scanset;
4129 /* Don't try to trace forward past a CODE_LABEL if we haven't
4130 seen INSN yet. Ordinarily, we will only find the setting insn
4131 in LOG_LINKS if it is in the same basic block. However,
4132 cross-jumping can insert code labels in between the load and
4133 the call, and can result in situations where a single call
4134 insn may have two targets depending on where we came from. */
4136 if (GET_CODE (scan) == CODE_LABEL && ! foundinsn)
4137 break;
4139 if (! INSN_P (scan))
4140 continue;
4142 /* Don't try to trace forward past a JUMP. To optimize
4143 safely, we would have to check that all the
4144 instructions at the jump destination did not use REG. */
4146 if (GET_CODE (scan) == JUMP_INSN)
4147 break;
4149 if (! reg_mentioned_p (reg, scan))
4150 continue;
4152 if (noncall_uses_reg (reg, scan, &scanset))
4153 break;
4155 if (scan == insn)
4156 foundinsn = 1;
4158 if (scan != insn
4159 && (GET_CODE (scan) == CALL_INSN || sfunc_uses_reg (scan)))
4161 /* There is a function call to this register other
4162 than the one we are checking. If we optimize
4163 this call, we need to rescan again below. */
4164 rescan = 1;
4167 /* ??? We shouldn't have to worry about SCANSET here.
4168 We should just be able to check for a REG_DEAD note
4169 on a function call. However, the REG_DEAD notes are
4170 apparently not dependable around libcalls; c-torture
4171 execute/920501-2 is a test case. If SCANSET is set,
4172 then this insn sets the register, so it must have
4173 died earlier. Unfortunately, this will only handle
4174 the cases in which the register is, in fact, set in a
4175 later insn. */
4177 /* ??? We shouldn't have to use FOUNDINSN here.
4178 However, the LOG_LINKS fields are apparently not
4179 entirely reliable around libcalls;
4180 newlib/libm/math/e_pow.c is a test case. Sometimes
4181 an insn will appear in LOG_LINKS even though it is
4182 not the most recent insn which sets the register. */
4184 if (foundinsn
4185 && (scanset
4186 || find_reg_note (scan, REG_DEAD, reg)))
4188 dies = scan;
4189 break;
4193 if (! dies)
4195 /* Either there was a branch, or some insn used REG
4196 other than as a function call address. */
4197 continue;
4200 /* Create a code label, and put it in a REG_LABEL note on
4201 the insn which sets the register, and on each call insn
4202 which uses the register. In final_prescan_insn we look
4203 for the REG_LABEL notes, and output the appropriate label
4204 or pseudo-op. */
4206 label = gen_label_rtx ();
4207 REG_NOTES (link) = gen_rtx_INSN_LIST (REG_LABEL, label,
4208 REG_NOTES (link));
4209 REG_NOTES (insn) = gen_rtx_INSN_LIST (REG_LABEL, label,
4210 REG_NOTES (insn));
4211 if (rescan)
4213 scan = link;
4216 rtx reg2;
4218 scan = NEXT_INSN (scan);
4219 if (scan != insn
4220 && ((GET_CODE (scan) == CALL_INSN
4221 && reg_mentioned_p (reg, scan))
4222 || ((reg2 = sfunc_uses_reg (scan))
4223 && REGNO (reg2) == REGNO (reg))))
4224 REG_NOTES (scan)
4225 = gen_rtx_INSN_LIST (REG_LABEL, label, REG_NOTES (scan));
4227 while (scan != dies);
4232 if (TARGET_SH2)
4233 fixup_addr_diff_vecs (first);
4235 if (optimize)
4237 mdep_reorg_phase = SH_SHORTEN_BRANCHES0;
4238 shorten_branches (first);
4240 /* Scan the function looking for move instructions which have to be
4241 changed to pc-relative loads and insert the literal tables. */
4243 mdep_reorg_phase = SH_FIXUP_PCLOAD;
4244 for (insn = first, num_mova = 0; insn; insn = NEXT_INSN (insn))
4246 if (mova_p (insn))
4248 /* ??? basic block reordering can move a switch table dispatch
4249 below the switch table. Check if that has happened.
4250 We only have the addresses available when optimizing; but then,
4251 this check shouldn't be needed when not optimizing. */
4252 rtx label_ref = XVECEXP (SET_SRC (PATTERN (insn)), 0, 0);
4253 if (optimize
4254 && (INSN_ADDRESSES (INSN_UID (insn))
4255 > INSN_ADDRESSES (INSN_UID (XEXP (label_ref, 0)))))
4257 /* Change the mova into a load.
4258 broken_move will then return true for it. */
4259 fixup_mova (insn);
4261 else if (! num_mova++)
4262 mova = insn;
4264 else if (GET_CODE (insn) == JUMP_INSN
4265 && GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC
4266 && num_mova)
4268 rtx scan;
4269 int total;
4271 num_mova--;
4273 /* Some code might have been inserted between the mova and
4274 its ADDR_DIFF_VEC. Check if the mova is still in range. */
4275 for (scan = mova, total = 0; scan != insn; scan = NEXT_INSN (scan))
4276 total += get_attr_length (scan);
4278 /* range of mova is 1020, add 4 because pc counts from address of
4279 second instruction after this one, subtract 2 in case pc is 2
4280 byte aligned. Possible alignment needed for the ADDR_DIFF_VEC
4281 cancels out with alignment effects of the mova itself. */
4282 if (total > 1022)
4284 /* Change the mova into a load, and restart scanning
4285 there. broken_move will then return true for mova. */
4286 fixup_mova (mova);
4287 insn = mova;
4290 if (broken_move (insn)
4291 || (GET_CODE (insn) == INSN
4292 && recog_memoized (insn) == CODE_FOR_casesi_worker_2))
4294 rtx scan;
4295 /* Scan ahead looking for a barrier to stick the constant table
4296 behind. */
4297 rtx barrier = find_barrier (num_mova, mova, insn);
4298 rtx last_float_move = NULL_RTX, last_float = 0, *last_float_addr = NULL;
4299 int need_aligned_label = 0;
4301 if (num_mova && ! mova_p (mova))
4303 /* find_barrier had to change the first mova into a
4304 pcload; thus, we have to start with this new pcload. */
4305 insn = mova;
4306 num_mova = 0;
4308 /* Now find all the moves between the points and modify them. */
4309 for (scan = insn; scan != barrier; scan = NEXT_INSN (scan))
4311 if (GET_CODE (scan) == CODE_LABEL)
4312 last_float = 0;
4313 if (GET_CODE (scan) == INSN
4314 && recog_memoized (scan) == CODE_FOR_casesi_worker_2)
4315 need_aligned_label = 1;
4316 if (broken_move (scan))
4318 rtx *patp = &PATTERN (scan), pat = *patp;
4319 rtx src, dst;
4320 rtx lab;
4321 rtx newsrc;
4322 enum machine_mode mode;
4324 if (GET_CODE (pat) == PARALLEL)
4325 patp = &XVECEXP (pat, 0, 0), pat = *patp;
4326 src = SET_SRC (pat);
4327 dst = SET_DEST (pat);
4328 mode = GET_MODE (dst);
4330 if (mode == SImode && hi_const (src)
4331 && REGNO (dst) != FPUL_REG)
4333 int offset = 0;
4335 mode = HImode;
4336 while (GET_CODE (dst) == SUBREG)
4338 offset += subreg_regno_offset (REGNO (SUBREG_REG (dst)),
4339 GET_MODE (SUBREG_REG (dst)),
4340 SUBREG_BYTE (dst),
4341 GET_MODE (dst));
4342 dst = SUBREG_REG (dst);
4344 dst = gen_rtx_REG (HImode, REGNO (dst) + offset);
4346 if (GET_CODE (dst) == REG && FP_ANY_REGISTER_P (REGNO (dst)))
4348 /* This must be an insn that clobbers r0. */
4349 rtx *clobberp = &XVECEXP (PATTERN (scan), 0,
4350 XVECLEN (PATTERN (scan), 0)
4351 - 1);
4352 rtx clobber = *clobberp;
4354 if (GET_CODE (clobber) != CLOBBER
4355 || ! rtx_equal_p (XEXP (clobber, 0), r0_rtx))
4356 abort ();
4358 if (last_float
4359 && reg_set_between_p (r0_rtx, last_float_move, scan))
4360 last_float = 0;
4361 if (last_float
4362 && TARGET_SHCOMPACT
4363 && GET_MODE_SIZE (mode) != 4
4364 && GET_MODE_SIZE (GET_MODE (last_float)) == 4)
4365 last_float = 0;
4366 lab = add_constant (src, mode, last_float);
4367 if (lab)
4368 emit_insn_before (gen_mova (lab), scan);
4369 else
4371 /* There will be a REG_UNUSED note for r0 on
4372 LAST_FLOAT_MOVE; we have to change it to REG_INC,
4373 lest reorg:mark_target_live_regs will not
4374 consider r0 to be used, and we end up with delay
4375 slot insn in front of SCAN that clobbers r0. */
4376 rtx note
4377 = find_regno_note (last_float_move, REG_UNUSED, 0);
4379 /* If we are not optimizing, then there may not be
4380 a note. */
4381 if (note)
4382 PUT_MODE (note, REG_INC);
4384 *last_float_addr = r0_inc_rtx;
4386 last_float_move = scan;
4387 last_float = src;
4388 newsrc = gen_rtx_MEM (mode,
4389 (((TARGET_SH4 && ! TARGET_FMOVD)
4390 || REGNO (dst) == FPUL_REG)
4391 ? r0_inc_rtx
4392 : r0_rtx));
4393 last_float_addr = &XEXP (newsrc, 0);
4395 /* Remove the clobber of r0. */
4396 *clobberp = gen_rtx_CLOBBER (GET_MODE (clobber),
4397 gen_rtx_SCRATCH (Pmode));
4399 /* This is a mova needing a label. Create it. */
4400 else if (GET_CODE (src) == UNSPEC
4401 && XINT (src, 1) == UNSPEC_MOVA
4402 && GET_CODE (XVECEXP (src, 0, 0)) == CONST)
4404 lab = add_constant (XVECEXP (src, 0, 0), mode, 0);
4405 newsrc = gen_rtx_LABEL_REF (VOIDmode, lab);
4406 newsrc = gen_rtx_UNSPEC (SImode,
4407 gen_rtvec (1, newsrc),
4408 UNSPEC_MOVA);
4410 else
4412 lab = add_constant (src, mode, 0);
4413 newsrc = gen_rtx_LABEL_REF (VOIDmode, lab);
4414 newsrc = gen_const_mem (mode, newsrc);
4416 *patp = gen_rtx_SET (VOIDmode, dst, newsrc);
4417 INSN_CODE (scan) = -1;
4420 dump_table (need_aligned_label ? insn : 0, barrier);
4421 insn = barrier;
4425 mdep_reorg_phase = SH_SHORTEN_BRANCHES1;
4426 INSN_ADDRESSES_FREE ();
4427 split_branches (first);
4429 /* The INSN_REFERENCES_ARE_DELAYED in sh.h is problematic because it
4430 also has an effect on the register that holds the address of the sfunc.
4431 Insert an extra dummy insn in front of each sfunc that pretends to
4432 use this register. */
4433 if (flag_delayed_branch)
4435 for (insn = first; insn; insn = NEXT_INSN (insn))
4437 rtx reg = sfunc_uses_reg (insn);
4439 if (! reg)
4440 continue;
4441 emit_insn_before (gen_use_sfunc_addr (reg), insn);
4444 #if 0
4445 /* fpscr is not actually a user variable, but we pretend it is for the
4446 sake of the previous optimization passes, since we want it handled like
4447 one. However, we don't have any debugging information for it, so turn
4448 it into a non-user variable now. */
4449 if (TARGET_SH4)
4450 REG_USERVAR_P (get_fpscr_rtx ()) = 0;
4451 #endif
4452 mdep_reorg_phase = SH_AFTER_MDEP_REORG;
4456 get_dest_uid (rtx label, int max_uid)
4458 rtx dest = next_real_insn (label);
4459 int dest_uid;
4460 if (! dest)
4461 /* This can happen for an undefined label. */
4462 return 0;
4463 dest_uid = INSN_UID (dest);
4464 /* If this is a newly created branch redirection blocking instruction,
4465 we cannot index the branch_uid or insn_addresses arrays with its
4466 uid. But then, we won't need to, because the actual destination is
4467 the following branch. */
4468 while (dest_uid >= max_uid)
4470 dest = NEXT_INSN (dest);
4471 dest_uid = INSN_UID (dest);
4473 if (GET_CODE (dest) == JUMP_INSN && GET_CODE (PATTERN (dest)) == RETURN)
4474 return 0;
4475 return dest_uid;
4478 /* Split condbranches that are out of range. Also add clobbers for
4479 scratch registers that are needed in far jumps.
4480 We do this before delay slot scheduling, so that it can take our
4481 newly created instructions into account. It also allows us to
4482 find branches with common targets more easily. */
4484 static void
4485 split_branches (rtx first)
4487 rtx insn;
4488 struct far_branch **uid_branch, *far_branch_list = 0;
4489 int max_uid = get_max_uid ();
4491 /* Find out which branches are out of range. */
4492 shorten_branches (first);
4494 uid_branch = (struct far_branch **) alloca (max_uid * sizeof *uid_branch);
4495 memset ((char *) uid_branch, 0, max_uid * sizeof *uid_branch);
4497 for (insn = first; insn; insn = NEXT_INSN (insn))
4498 if (! INSN_P (insn))
4499 continue;
4500 else if (INSN_DELETED_P (insn))
4502 /* Shorten_branches would split this instruction again,
4503 so transform it into a note. */
4504 PUT_CODE (insn, NOTE);
4505 NOTE_LINE_NUMBER (insn) = NOTE_INSN_DELETED;
4506 NOTE_SOURCE_FILE (insn) = 0;
4508 else if (GET_CODE (insn) == JUMP_INSN
4509 /* Don't mess with ADDR_DIFF_VEC */
4510 && (GET_CODE (PATTERN (insn)) == SET
4511 || GET_CODE (PATTERN (insn)) == RETURN))
4513 enum attr_type type = get_attr_type (insn);
4514 if (type == TYPE_CBRANCH)
4516 rtx next, beyond;
4518 if (get_attr_length (insn) > 4)
4520 rtx src = SET_SRC (PATTERN (insn));
4521 rtx olabel = XEXP (XEXP (src, 1), 0);
4522 int addr = INSN_ADDRESSES (INSN_UID (insn));
4523 rtx label = 0;
4524 int dest_uid = get_dest_uid (olabel, max_uid);
4525 struct far_branch *bp = uid_branch[dest_uid];
4527 /* redirect_jump needs a valid JUMP_LABEL, and it might delete
4528 the label if the LABEL_NUSES count drops to zero. There is
4529 always a jump_optimize pass that sets these values, but it
4530 proceeds to delete unreferenced code, and then if not
4531 optimizing, to un-delete the deleted instructions, thus
4532 leaving labels with too low uses counts. */
4533 if (! optimize)
4535 JUMP_LABEL (insn) = olabel;
4536 LABEL_NUSES (olabel)++;
4538 if (! bp)
4540 bp = (struct far_branch *) alloca (sizeof *bp);
4541 uid_branch[dest_uid] = bp;
4542 bp->prev = far_branch_list;
4543 far_branch_list = bp;
4544 bp->far_label
4545 = XEXP (XEXP (SET_SRC (PATTERN (insn)), 1), 0);
4546 LABEL_NUSES (bp->far_label)++;
4548 else
4550 label = bp->near_label;
4551 if (! label && bp->address - addr >= CONDJUMP_MIN)
4553 rtx block = bp->insert_place;
4555 if (GET_CODE (PATTERN (block)) == RETURN)
4556 block = PREV_INSN (block);
4557 else
4558 block = gen_block_redirect (block,
4559 bp->address, 2);
4560 label = emit_label_after (gen_label_rtx (),
4561 PREV_INSN (block));
4562 bp->near_label = label;
4564 else if (label && ! NEXT_INSN (label))
4566 if (addr + 2 - bp->address <= CONDJUMP_MAX)
4567 bp->insert_place = insn;
4568 else
4569 gen_far_branch (bp);
4572 if (! label
4573 || (NEXT_INSN (label) && bp->address - addr < CONDJUMP_MIN))
4575 bp->near_label = label = gen_label_rtx ();
4576 bp->insert_place = insn;
4577 bp->address = addr;
4579 if (! redirect_jump (insn, label, 1))
4580 abort ();
4582 else
4584 /* get_attr_length (insn) == 2 */
4585 /* Check if we have a pattern where reorg wants to redirect
4586 the branch to a label from an unconditional branch that
4587 is too far away. */
4588 /* We can't use JUMP_LABEL here because it might be undefined
4589 when not optimizing. */
4590 /* A syntax error might cause beyond to be NULL_RTX. */
4591 beyond
4592 = next_active_insn (XEXP (XEXP (SET_SRC (PATTERN (insn)), 1),
4593 0));
4595 if (beyond
4596 && (GET_CODE (beyond) == JUMP_INSN
4597 || ((beyond = next_active_insn (beyond))
4598 && GET_CODE (beyond) == JUMP_INSN))
4599 && GET_CODE (PATTERN (beyond)) == SET
4600 && recog_memoized (beyond) == CODE_FOR_jump_compact
4601 && ((INSN_ADDRESSES
4602 (INSN_UID (XEXP (SET_SRC (PATTERN (beyond)), 0)))
4603 - INSN_ADDRESSES (INSN_UID (insn)) + (unsigned) 252)
4604 > 252 + 258 + 2))
4605 gen_block_redirect (beyond,
4606 INSN_ADDRESSES (INSN_UID (beyond)), 1);
4609 next = next_active_insn (insn);
4611 if ((GET_CODE (next) == JUMP_INSN
4612 || ((next = next_active_insn (next))
4613 && GET_CODE (next) == JUMP_INSN))
4614 && GET_CODE (PATTERN (next)) == SET
4615 && recog_memoized (next) == CODE_FOR_jump_compact
4616 && ((INSN_ADDRESSES
4617 (INSN_UID (XEXP (SET_SRC (PATTERN (next)), 0)))
4618 - INSN_ADDRESSES (INSN_UID (insn)) + (unsigned) 252)
4619 > 252 + 258 + 2))
4620 gen_block_redirect (next, INSN_ADDRESSES (INSN_UID (next)), 1);
4622 else if (type == TYPE_JUMP || type == TYPE_RETURN)
4624 int addr = INSN_ADDRESSES (INSN_UID (insn));
4625 rtx far_label = 0;
4626 int dest_uid = 0;
4627 struct far_branch *bp;
4629 if (type == TYPE_JUMP)
4631 far_label = XEXP (SET_SRC (PATTERN (insn)), 0);
4632 dest_uid = get_dest_uid (far_label, max_uid);
4633 if (! dest_uid)
4635 /* Parse errors can lead to labels outside
4636 the insn stream. */
4637 if (! NEXT_INSN (far_label))
4638 continue;
4640 if (! optimize)
4642 JUMP_LABEL (insn) = far_label;
4643 LABEL_NUSES (far_label)++;
4645 redirect_jump (insn, NULL_RTX, 1);
4646 far_label = 0;
4649 bp = uid_branch[dest_uid];
4650 if (! bp)
4652 bp = (struct far_branch *) alloca (sizeof *bp);
4653 uid_branch[dest_uid] = bp;
4654 bp->prev = far_branch_list;
4655 far_branch_list = bp;
4656 bp->near_label = 0;
4657 bp->far_label = far_label;
4658 if (far_label)
4659 LABEL_NUSES (far_label)++;
4661 else if (bp->near_label && ! NEXT_INSN (bp->near_label))
4662 if (addr - bp->address <= CONDJUMP_MAX)
4663 emit_label_after (bp->near_label, PREV_INSN (insn));
4664 else
4666 gen_far_branch (bp);
4667 bp->near_label = 0;
4669 else
4670 bp->near_label = 0;
4671 bp->address = addr;
4672 bp->insert_place = insn;
4673 if (! far_label)
4674 emit_insn_before (gen_block_branch_redirect (const0_rtx), insn);
4675 else
4676 gen_block_redirect (insn, addr, bp->near_label ? 2 : 0);
4679 /* Generate all pending far branches,
4680 and free our references to the far labels. */
4681 while (far_branch_list)
4683 if (far_branch_list->near_label
4684 && ! NEXT_INSN (far_branch_list->near_label))
4685 gen_far_branch (far_branch_list);
4686 if (optimize
4687 && far_branch_list->far_label
4688 && ! --LABEL_NUSES (far_branch_list->far_label))
4689 delete_insn (far_branch_list->far_label);
4690 far_branch_list = far_branch_list->prev;
4693 /* Instruction length information is no longer valid due to the new
4694 instructions that have been generated. */
4695 init_insn_lengths ();
4698 /* Dump out instruction addresses, which is useful for debugging the
4699 constant pool table stuff.
4701 If relaxing, output the label and pseudo-ops used to link together
4702 calls and the instruction which set the registers. */
4704 /* ??? The addresses printed by this routine for insns are nonsense for
4705 insns which are inside of a sequence where none of the inner insns have
4706 variable length. This is because the second pass of shorten_branches
4707 does not bother to update them. */
4709 void
4710 final_prescan_insn (rtx insn, rtx *opvec ATTRIBUTE_UNUSED,
4711 int noperands ATTRIBUTE_UNUSED)
4713 if (TARGET_DUMPISIZE)
4714 fprintf (asm_out_file, "\n! at %04x\n", INSN_ADDRESSES (INSN_UID (insn)));
4716 if (TARGET_RELAX)
4718 rtx note;
4720 note = find_reg_note (insn, REG_LABEL, NULL_RTX);
4721 if (note)
4723 rtx pattern;
4725 pattern = PATTERN (insn);
4726 if (GET_CODE (pattern) == PARALLEL)
4727 pattern = XVECEXP (pattern, 0, 0);
4728 if (GET_CODE (pattern) == CALL
4729 || (GET_CODE (pattern) == SET
4730 && (GET_CODE (SET_SRC (pattern)) == CALL
4731 || get_attr_type (insn) == TYPE_SFUNC)))
4732 asm_fprintf (asm_out_file, "\t.uses %LL%d\n",
4733 CODE_LABEL_NUMBER (XEXP (note, 0)));
4734 else if (GET_CODE (pattern) == SET)
4735 (*targetm.asm_out.internal_label) (asm_out_file, "L",
4736 CODE_LABEL_NUMBER (XEXP (note, 0)));
4737 else
4738 abort ();
4743 /* Dump out any constants accumulated in the final pass. These will
4744 only be labels. */
4746 const char *
4747 output_jump_label_table (void)
4749 int i;
4751 if (pool_size)
4753 fprintf (asm_out_file, "\t.align 2\n");
4754 for (i = 0; i < pool_size; i++)
4756 pool_node *p = &pool_vector[i];
4758 (*targetm.asm_out.internal_label) (asm_out_file, "L",
4759 CODE_LABEL_NUMBER (p->label));
4760 output_asm_insn (".long %O0", &p->value);
4762 pool_size = 0;
4765 return "";
4768 /* A full frame looks like:
4770 arg-5
4771 arg-4
4772 [ if current_function_anonymous_args
4773 arg-3
4774 arg-2
4775 arg-1
4776 arg-0 ]
4777 saved-fp
4778 saved-r10
4779 saved-r11
4780 saved-r12
4781 saved-pr
4782 local-n
4784 local-1
4785 local-0 <- fp points here. */
4787 /* Number of bytes pushed for anonymous args, used to pass information
4788 between expand_prologue and expand_epilogue. */
4790 /* Adjust the stack by SIZE bytes. REG holds the rtl of the register to be
4791 adjusted. If epilogue_p is zero, this is for a prologue; otherwise, it's
4792 for an epilogue and a negative value means that it's for a sibcall
4793 epilogue. If LIVE_REGS_MASK is nonzero, it points to a HARD_REG_SET of
4794 all the registers that are about to be restored, and hence dead. */
4796 static void
4797 output_stack_adjust (int size, rtx reg, int epilogue_p,
4798 HARD_REG_SET *live_regs_mask)
4800 rtx (*emit_fn) (rtx) = epilogue_p ? &emit_insn : &frame_insn;
4801 if (size)
4803 HOST_WIDE_INT align = STACK_BOUNDARY / BITS_PER_UNIT;
4805 /* This test is bogus, as output_stack_adjust is used to re-align the
4806 stack. */
4807 #if 0
4808 if (size % align)
4809 abort ();
4810 #endif
4812 if (CONST_OK_FOR_ADD (size))
4813 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size)));
4814 /* Try to do it with two partial adjustments; however, we must make
4815 sure that the stack is properly aligned at all times, in case
4816 an interrupt occurs between the two partial adjustments. */
4817 else if (CONST_OK_FOR_ADD (size / 2 & -align)
4818 && CONST_OK_FOR_ADD (size - (size / 2 & -align)))
4820 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size / 2 & -align)));
4821 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size - (size / 2 & -align))));
4823 else
4825 rtx const_reg;
4826 rtx insn;
4827 int temp = epilogue_p ? 7 : (TARGET_SH5 ? 0 : 1);
4828 int i;
4830 /* If TEMP is invalid, we could temporarily save a general
4831 register to MACL. However, there is currently no need
4832 to handle this case, so just abort when we see it. */
4833 if (epilogue_p < 0
4834 || current_function_interrupt
4835 || ! call_really_used_regs[temp] || fixed_regs[temp])
4836 temp = -1;
4837 if (temp < 0 && ! current_function_interrupt
4838 && (TARGET_SHMEDIA || epilogue_p >= 0))
4840 HARD_REG_SET temps;
4841 COPY_HARD_REG_SET (temps, call_used_reg_set);
4842 AND_COMPL_HARD_REG_SET (temps, call_fixed_reg_set);
4843 if (epilogue_p > 0)
4845 int nreg = 0;
4846 if (current_function_return_rtx)
4848 enum machine_mode mode;
4849 mode = GET_MODE (current_function_return_rtx);
4850 if (BASE_RETURN_VALUE_REG (mode) == FIRST_RET_REG)
4851 nreg = HARD_REGNO_NREGS (FIRST_RET_REG, mode);
4853 for (i = 0; i < nreg; i++)
4854 CLEAR_HARD_REG_BIT (temps, FIRST_RET_REG + i);
4855 if (current_function_calls_eh_return)
4857 CLEAR_HARD_REG_BIT (temps, EH_RETURN_STACKADJ_REGNO);
4858 for (i = 0; i <= 3; i++)
4859 CLEAR_HARD_REG_BIT (temps, EH_RETURN_DATA_REGNO (i));
4862 if (TARGET_SHMEDIA && epilogue_p < 0)
4863 for (i = FIRST_TARGET_REG; i <= LAST_TARGET_REG; i++)
4864 CLEAR_HARD_REG_BIT (temps, i);
4865 if (epilogue_p <= 0)
4867 for (i = FIRST_PARM_REG;
4868 i < FIRST_PARM_REG + NPARM_REGS (SImode); i++)
4869 CLEAR_HARD_REG_BIT (temps, i);
4870 if (cfun->static_chain_decl != NULL)
4871 CLEAR_HARD_REG_BIT (temps, STATIC_CHAIN_REGNUM);
4873 temp = scavenge_reg (&temps);
4875 if (temp < 0 && live_regs_mask)
4876 temp = scavenge_reg (live_regs_mask);
4877 if (temp < 0)
4879 /* If we reached here, the most likely case is the (sibcall)
4880 epilogue for non SHmedia. Put a special push/pop sequence
4881 for such case as the last resort. This looks lengthy but
4882 would not be problem because it seems to be very rare. */
4883 if (! TARGET_SHMEDIA && epilogue_p)
4885 rtx adj_reg, tmp_reg, mem;
4887 /* ??? There is still the slight possibility that r4 or r5
4888 have been reserved as fixed registers or assigned as
4889 global registers, and they change during an interrupt.
4890 There are possible ways to handle this:
4891 - If we are adjusting the frame pointer (r14), we can do
4892 with a single temp register and an ordinary push / pop
4893 on the stack.
4894 - Grab any call-used or call-saved registers (i.e. not
4895 fixed or globals) for the temps we need. We might
4896 also grab r14 if we are adjusting the stack pointer.
4897 If we can't find enough available registers, issue
4898 a diagnostic and abort - the user must have reserved
4899 way too many registers.
4900 But since all this is rather unlikely to happen and
4901 would require extra testing, we just abort if r4 / r5
4902 are not available. */
4903 if (fixed_regs[4] || fixed_regs[5]
4904 || global_regs[4] || global_regs[5])
4905 abort ();
4907 adj_reg = gen_rtx_REG (GET_MODE (reg), 4);
4908 tmp_reg = gen_rtx_REG (GET_MODE (reg), 5);
4909 emit_move_insn (gen_rtx_MEM (Pmode, reg), adj_reg);
4910 emit_insn (GEN_MOV (adj_reg, GEN_INT (size)));
4911 emit_insn (GEN_ADD3 (adj_reg, adj_reg, reg));
4912 mem = gen_rtx_MEM (Pmode, gen_rtx_PRE_DEC (Pmode, adj_reg));
4913 emit_move_insn (mem, tmp_reg);
4914 emit_move_insn (tmp_reg, gen_rtx_MEM (Pmode, reg));
4915 mem = gen_rtx_MEM (Pmode, gen_rtx_PRE_DEC (Pmode, adj_reg));
4916 emit_move_insn (mem, tmp_reg);
4917 emit_move_insn (reg, adj_reg);
4918 mem = gen_rtx_MEM (Pmode, gen_rtx_POST_INC (Pmode, reg));
4919 emit_move_insn (adj_reg, mem);
4920 mem = gen_rtx_MEM (Pmode, gen_rtx_POST_INC (Pmode, reg));
4921 emit_move_insn (tmp_reg, mem);
4922 return;
4924 else
4925 abort ();
4927 const_reg = gen_rtx_REG (GET_MODE (reg), temp);
4929 /* If SIZE is negative, subtract the positive value.
4930 This sometimes allows a constant pool entry to be shared
4931 between prologue and epilogue code. */
4932 if (size < 0)
4934 emit_insn (GEN_MOV (const_reg, GEN_INT (-size)));
4935 insn = emit_fn (GEN_SUB3 (reg, reg, const_reg));
4937 else
4939 emit_insn (GEN_MOV (const_reg, GEN_INT (size)));
4940 insn = emit_fn (GEN_ADD3 (reg, reg, const_reg));
4942 if (! epilogue_p)
4943 REG_NOTES (insn)
4944 = (gen_rtx_EXPR_LIST
4945 (REG_FRAME_RELATED_EXPR,
4946 gen_rtx_SET (VOIDmode, reg,
4947 gen_rtx_PLUS (SImode, reg, GEN_INT (size))),
4948 REG_NOTES (insn)));
4953 static rtx
4954 frame_insn (rtx x)
4956 x = emit_insn (x);
4957 RTX_FRAME_RELATED_P (x) = 1;
4958 return x;
4961 /* Output RTL to push register RN onto the stack. */
4963 static rtx
4964 push (int rn)
4966 rtx x;
4967 if (rn == FPUL_REG)
4968 x = gen_push_fpul ();
4969 else if (rn == FPSCR_REG)
4970 x = gen_push_fpscr ();
4971 else if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && ! TARGET_FPU_SINGLE
4972 && FP_OR_XD_REGISTER_P (rn))
4974 if (FP_REGISTER_P (rn) && (rn - FIRST_FP_REG) & 1)
4975 return NULL_RTX;
4976 x = gen_push_4 (gen_rtx_REG (DFmode, rn));
4978 else if (TARGET_SH2E && FP_REGISTER_P (rn))
4979 x = gen_push_e (gen_rtx_REG (SFmode, rn));
4980 else
4981 x = gen_push (gen_rtx_REG (SImode, rn));
4983 x = frame_insn (x);
4984 REG_NOTES (x)
4985 = gen_rtx_EXPR_LIST (REG_INC,
4986 gen_rtx_REG (SImode, STACK_POINTER_REGNUM), 0);
4987 return x;
4990 /* Output RTL to pop register RN from the stack. */
4992 static void
4993 pop (int rn)
4995 rtx x;
4996 if (rn == FPUL_REG)
4997 x = gen_pop_fpul ();
4998 else if (rn == FPSCR_REG)
4999 x = gen_pop_fpscr ();
5000 else if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && ! TARGET_FPU_SINGLE
5001 && FP_OR_XD_REGISTER_P (rn))
5003 if (FP_REGISTER_P (rn) && (rn - FIRST_FP_REG) & 1)
5004 return;
5005 x = gen_pop_4 (gen_rtx_REG (DFmode, rn));
5007 else if (TARGET_SH2E && FP_REGISTER_P (rn))
5008 x = gen_pop_e (gen_rtx_REG (SFmode, rn));
5009 else
5010 x = gen_pop (gen_rtx_REG (SImode, rn));
5012 x = emit_insn (x);
5013 REG_NOTES (x)
5014 = gen_rtx_EXPR_LIST (REG_INC,
5015 gen_rtx_REG (SImode, STACK_POINTER_REGNUM), 0);
5018 /* Generate code to push the regs specified in the mask. */
5020 static void
5021 push_regs (HARD_REG_SET *mask, int interrupt_handler)
5023 int i;
5024 int skip_fpscr = 0;
5026 /* Push PR last; this gives better latencies after the prologue, and
5027 candidates for the return delay slot when there are no general
5028 registers pushed. */
5029 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
5031 /* If this is an interrupt handler, and the SZ bit varies,
5032 and we have to push any floating point register, we need
5033 to switch to the correct precision first. */
5034 if (i == FIRST_FP_REG && interrupt_handler && TARGET_FMOVD
5035 && hard_regs_intersect_p (mask, &reg_class_contents[DF_REGS]))
5037 HARD_REG_SET unsaved;
5039 push (FPSCR_REG);
5040 COMPL_HARD_REG_SET (unsaved, *mask);
5041 fpscr_set_from_mem (NORMAL_MODE (FP_MODE), unsaved);
5042 skip_fpscr = 1;
5044 if (i != PR_REG
5045 && (i != FPSCR_REG || ! skip_fpscr)
5046 && TEST_HARD_REG_BIT (*mask, i))
5047 push (i);
5049 if (TEST_HARD_REG_BIT (*mask, PR_REG))
5050 push (PR_REG);
5053 /* Calculate how much extra space is needed to save all callee-saved
5054 target registers.
5055 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
5057 static int
5058 shmedia_target_regs_stack_space (HARD_REG_SET *live_regs_mask)
5060 int reg;
5061 int stack_space = 0;
5062 int interrupt_handler = sh_cfun_interrupt_handler_p ();
5064 for (reg = LAST_TARGET_REG; reg >= FIRST_TARGET_REG; reg--)
5065 if ((! call_really_used_regs[reg] || interrupt_handler)
5066 && ! TEST_HARD_REG_BIT (*live_regs_mask, reg))
5067 /* Leave space to save this target register on the stack,
5068 in case target register allocation wants to use it. */
5069 stack_space += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
5070 return stack_space;
5073 /* Decide whether we should reserve space for callee-save target registers,
5074 in case target register allocation wants to use them. REGS_SAVED is
5075 the space, in bytes, that is already required for register saves.
5076 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
5078 static int
5079 shmedia_reserve_space_for_target_registers_p (int regs_saved,
5080 HARD_REG_SET *live_regs_mask)
5082 if (optimize_size)
5083 return 0;
5084 return shmedia_target_regs_stack_space (live_regs_mask) <= regs_saved;
5087 /* Decide how much space to reserve for callee-save target registers
5088 in case target register allocation wants to use them.
5089 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
5091 static int
5092 shmedia_target_regs_stack_adjust (HARD_REG_SET *live_regs_mask)
5094 if (shmedia_space_reserved_for_target_registers)
5095 return shmedia_target_regs_stack_space (live_regs_mask);
5096 else
5097 return 0;
5100 /* Work out the registers which need to be saved, both as a mask and a
5101 count of saved words. Return the count.
5103 If doing a pragma interrupt function, then push all regs used by the
5104 function, and if we call another function (we can tell by looking at PR),
5105 make sure that all the regs it clobbers are safe too. */
5107 static int
5108 calc_live_regs (HARD_REG_SET *live_regs_mask)
5110 int reg;
5111 int count;
5112 int interrupt_handler;
5113 int pr_live, has_call;
5115 interrupt_handler = sh_cfun_interrupt_handler_p ();
5117 CLEAR_HARD_REG_SET (*live_regs_mask);
5118 if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && interrupt_handler
5119 && regs_ever_live[FPSCR_REG])
5120 target_flags &= ~FPU_SINGLE_BIT;
5121 /* If we can save a lot of saves by switching to double mode, do that. */
5122 else if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && TARGET_FPU_SINGLE)
5123 for (count = 0, reg = FIRST_FP_REG; reg <= LAST_FP_REG; reg += 2)
5124 if (regs_ever_live[reg] && regs_ever_live[reg+1]
5125 && (! call_really_used_regs[reg]
5126 || (interrupt_handler && ! pragma_trapa))
5127 && ++count > 2)
5129 target_flags &= ~FPU_SINGLE_BIT;
5130 break;
5132 /* PR_MEDIA_REG is a general purpose register, thus global_alloc already
5133 knows how to use it. That means the pseudo originally allocated for
5134 the initial value can become the PR_MEDIA_REG hard register, as seen for
5135 execute/20010122-1.c:test9. */
5136 if (TARGET_SHMEDIA)
5137 /* ??? this function is called from initial_elimination_offset, hence we
5138 can't use the result of sh_media_register_for_return here. */
5139 pr_live = sh_pr_n_sets ();
5140 else
5142 rtx pr_initial = has_hard_reg_initial_val (Pmode, PR_REG);
5143 pr_live = (pr_initial
5144 ? (GET_CODE (pr_initial) != REG
5145 || REGNO (pr_initial) != (PR_REG))
5146 : regs_ever_live[PR_REG]);
5147 /* For Shcompact, if not optimizing, we end up with a memory reference
5148 using the return address pointer for __builtin_return_address even
5149 though there is no actual need to put the PR register on the stack. */
5150 pr_live |= regs_ever_live[RETURN_ADDRESS_POINTER_REGNUM];
5152 /* Force PR to be live if the prologue has to call the SHmedia
5153 argument decoder or register saver. */
5154 if (TARGET_SHCOMPACT
5155 && ((current_function_args_info.call_cookie
5156 & ~ CALL_COOKIE_RET_TRAMP (1))
5157 || current_function_has_nonlocal_label))
5158 pr_live = 1;
5159 has_call = TARGET_SHMEDIA ? ! leaf_function_p () : pr_live;
5160 for (count = 0, reg = FIRST_PSEUDO_REGISTER - 1; reg >= 0; reg--)
5162 if (reg == (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG)
5163 ? pr_live
5164 : (interrupt_handler && ! pragma_trapa)
5165 ? (/* Need to save all the regs ever live. */
5166 (regs_ever_live[reg]
5167 || (call_really_used_regs[reg]
5168 && (! fixed_regs[reg] || reg == MACH_REG || reg == MACL_REG
5169 || reg == PIC_OFFSET_TABLE_REGNUM)
5170 && has_call)
5171 || (has_call && REGISTER_NATURAL_MODE (reg) == SImode
5172 && (GENERAL_REGISTER_P (reg) || TARGET_REGISTER_P (reg))))
5173 && reg != STACK_POINTER_REGNUM && reg != ARG_POINTER_REGNUM
5174 && reg != RETURN_ADDRESS_POINTER_REGNUM
5175 && reg != T_REG && reg != GBR_REG
5176 /* Push fpscr only on targets which have FPU */
5177 && (reg != FPSCR_REG || TARGET_FPU_ANY))
5178 : (/* Only push those regs which are used and need to be saved. */
5179 (TARGET_SHCOMPACT
5180 && flag_pic
5181 && current_function_args_info.call_cookie
5182 && reg == (int) PIC_OFFSET_TABLE_REGNUM)
5183 || (regs_ever_live[reg] && ! call_really_used_regs[reg])
5184 || (current_function_calls_eh_return
5185 && (reg == (int) EH_RETURN_DATA_REGNO (0)
5186 || reg == (int) EH_RETURN_DATA_REGNO (1)
5187 || reg == (int) EH_RETURN_DATA_REGNO (2)
5188 || reg == (int) EH_RETURN_DATA_REGNO (3)))
5189 || ((reg == MACL_REG || reg == MACH_REG)
5190 && regs_ever_live[reg]
5191 && sh_cfun_attr_renesas_p ())
5194 SET_HARD_REG_BIT (*live_regs_mask, reg);
5195 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
5197 if ((TARGET_SH4 || TARGET_SH2A_DOUBLE || TARGET_SH5) && TARGET_FMOVD
5198 && GET_MODE_CLASS (REGISTER_NATURAL_MODE (reg)) == MODE_FLOAT)
5200 if (FP_REGISTER_P (reg))
5202 if (! TARGET_FPU_SINGLE && ! regs_ever_live[reg ^ 1])
5204 SET_HARD_REG_BIT (*live_regs_mask, (reg ^ 1));
5205 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg ^ 1));
5208 else if (XD_REGISTER_P (reg))
5210 /* Must switch to double mode to access these registers. */
5211 target_flags &= ~FPU_SINGLE_BIT;
5216 /* If we have a target register optimization pass after prologue / epilogue
5217 threading, we need to assume all target registers will be live even if
5218 they aren't now. */
5219 if (flag_branch_target_load_optimize2
5220 && TARGET_SAVE_ALL_TARGET_REGS
5221 && shmedia_space_reserved_for_target_registers)
5222 for (reg = LAST_TARGET_REG; reg >= FIRST_TARGET_REG; reg--)
5223 if ((! call_really_used_regs[reg] || interrupt_handler)
5224 && ! TEST_HARD_REG_BIT (*live_regs_mask, reg))
5226 SET_HARD_REG_BIT (*live_regs_mask, reg);
5227 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
5229 /* If this is an interrupt handler, we don't have any call-clobbered
5230 registers we can conveniently use for target register save/restore.
5231 Make sure we save at least one general purpose register when we need
5232 to save target registers. */
5233 if (interrupt_handler
5234 && hard_regs_intersect_p (live_regs_mask,
5235 &reg_class_contents[TARGET_REGS])
5236 && ! hard_regs_intersect_p (live_regs_mask,
5237 &reg_class_contents[GENERAL_REGS]))
5239 SET_HARD_REG_BIT (*live_regs_mask, R0_REG);
5240 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (R0_REG));
5243 return count;
5246 /* Code to generate prologue and epilogue sequences */
5248 /* PUSHED is the number of bytes that are being pushed on the
5249 stack for register saves. Return the frame size, padded
5250 appropriately so that the stack stays properly aligned. */
5251 static HOST_WIDE_INT
5252 rounded_frame_size (int pushed)
5254 HOST_WIDE_INT size = get_frame_size ();
5255 HOST_WIDE_INT align = STACK_BOUNDARY / BITS_PER_UNIT;
5257 return ((size + pushed + align - 1) & -align) - pushed;
5260 /* Choose a call-clobbered target-branch register that remains
5261 unchanged along the whole function. We set it up as the return
5262 value in the prologue. */
5264 sh_media_register_for_return (void)
5266 int regno;
5267 int tr0_used;
5269 if (! current_function_is_leaf)
5270 return -1;
5271 if (lookup_attribute ("interrupt_handler",
5272 DECL_ATTRIBUTES (current_function_decl)))
5273 return -1;
5275 tr0_used = flag_pic && regs_ever_live[PIC_OFFSET_TABLE_REGNUM];
5277 for (regno = FIRST_TARGET_REG + tr0_used; regno <= LAST_TARGET_REG; regno++)
5278 if (call_really_used_regs[regno] && ! regs_ever_live[regno])
5279 return regno;
5281 return -1;
5284 /* The maximum registers we need to save are:
5285 - 62 general purpose registers (r15 is stack pointer, r63 is zero)
5286 - 32 floating point registers (for each pair, we save none,
5287 one single precision value, or a double precision value).
5288 - 8 target registers
5289 - add 1 entry for a delimiter. */
5290 #define MAX_SAVED_REGS (62+32+8)
5292 typedef struct save_entry_s
5294 unsigned char reg;
5295 unsigned char mode;
5296 short offset;
5297 } save_entry;
5299 #define MAX_TEMPS 4
5301 /* There will be a delimiter entry with VOIDmode both at the start and the
5302 end of a filled in schedule. The end delimiter has the offset of the
5303 save with the smallest (i.e. most negative) offset. */
5304 typedef struct save_schedule_s
5306 save_entry entries[MAX_SAVED_REGS + 2];
5307 int temps[MAX_TEMPS+1];
5308 } save_schedule;
5310 /* Fill in SCHEDULE according to LIVE_REGS_MASK. If RESTORE is nonzero,
5311 use reverse order. Returns the last entry written to (not counting
5312 the delimiter). OFFSET_BASE is a number to be added to all offset
5313 entries. */
5315 static save_entry *
5316 sh5_schedule_saves (HARD_REG_SET *live_regs_mask, save_schedule *schedule,
5317 int offset_base)
5319 int align, i;
5320 save_entry *entry = schedule->entries;
5321 int tmpx = 0;
5322 int offset;
5324 if (! current_function_interrupt)
5325 for (i = FIRST_GENERAL_REG; tmpx < MAX_TEMPS && i <= LAST_GENERAL_REG; i++)
5326 if (call_really_used_regs[i] && ! fixed_regs[i] && i != PR_MEDIA_REG
5327 && ! FUNCTION_ARG_REGNO_P (i)
5328 && i != FIRST_RET_REG
5329 && ! (cfun->static_chain_decl != NULL && i == STATIC_CHAIN_REGNUM)
5330 && ! (current_function_calls_eh_return
5331 && (i == EH_RETURN_STACKADJ_REGNO
5332 || ((unsigned) i <= EH_RETURN_DATA_REGNO (0)
5333 && (unsigned) i >= EH_RETURN_DATA_REGNO (3)))))
5334 schedule->temps[tmpx++] = i;
5335 entry->reg = -1;
5336 entry->mode = VOIDmode;
5337 entry->offset = offset_base;
5338 entry++;
5339 /* We loop twice: first, we save 8-byte aligned registers in the
5340 higher addresses, that are known to be aligned. Then, we
5341 proceed to saving 32-bit registers that don't need 8-byte
5342 alignment.
5343 If this is an interrupt function, all registers that need saving
5344 need to be saved in full. moreover, we need to postpone saving
5345 target registers till we have saved some general purpose registers
5346 we can then use as scratch registers. */
5347 offset = offset_base;
5348 for (align = 1; align >= 0; align--)
5350 for (i = FIRST_PSEUDO_REGISTER - 1; i >= 0; i--)
5351 if (TEST_HARD_REG_BIT (*live_regs_mask, i))
5353 enum machine_mode mode = REGISTER_NATURAL_MODE (i);
5354 int reg = i;
5356 if (current_function_interrupt)
5358 if (TARGET_REGISTER_P (i))
5359 continue;
5360 if (GENERAL_REGISTER_P (i))
5361 mode = DImode;
5363 if (mode == SFmode && (i % 2) == 1
5364 && ! TARGET_FPU_SINGLE && FP_REGISTER_P (i)
5365 && (TEST_HARD_REG_BIT (*live_regs_mask, (i ^ 1))))
5367 mode = DFmode;
5368 i--;
5369 reg--;
5372 /* If we're doing the aligned pass and this is not aligned,
5373 or we're doing the unaligned pass and this is aligned,
5374 skip it. */
5375 if ((GET_MODE_SIZE (mode) % (STACK_BOUNDARY / BITS_PER_UNIT) == 0)
5376 != align)
5377 continue;
5379 if (current_function_interrupt
5380 && GENERAL_REGISTER_P (i)
5381 && tmpx < MAX_TEMPS)
5382 schedule->temps[tmpx++] = i;
5384 offset -= GET_MODE_SIZE (mode);
5385 entry->reg = i;
5386 entry->mode = mode;
5387 entry->offset = offset;
5388 entry++;
5390 if (align && current_function_interrupt)
5391 for (i = LAST_TARGET_REG; i >= FIRST_TARGET_REG; i--)
5392 if (TEST_HARD_REG_BIT (*live_regs_mask, i))
5394 offset -= GET_MODE_SIZE (DImode);
5395 entry->reg = i;
5396 entry->mode = DImode;
5397 entry->offset = offset;
5398 entry++;
5401 entry->reg = -1;
5402 entry->mode = VOIDmode;
5403 entry->offset = offset;
5404 schedule->temps[tmpx] = -1;
5405 return entry - 1;
5408 void
5409 sh_expand_prologue (void)
5411 HARD_REG_SET live_regs_mask;
5412 int d, i;
5413 int d_rounding = 0;
5414 int save_flags = target_flags;
5415 int pretend_args;
5417 current_function_interrupt = sh_cfun_interrupt_handler_p ();
5419 /* We have pretend args if we had an object sent partially in registers
5420 and partially on the stack, e.g. a large structure. */
5421 pretend_args = current_function_pretend_args_size;
5422 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl)
5423 && (NPARM_REGS(SImode)
5424 > current_function_args_info.arg_count[(int) SH_ARG_INT]))
5425 pretend_args = 0;
5426 output_stack_adjust (-pretend_args
5427 - current_function_args_info.stack_regs * 8,
5428 stack_pointer_rtx, 0, NULL);
5430 if (TARGET_SHCOMPACT && flag_pic && current_function_args_info.call_cookie)
5431 /* We're going to use the PIC register to load the address of the
5432 incoming-argument decoder and/or of the return trampoline from
5433 the GOT, so make sure the PIC register is preserved and
5434 initialized. */
5435 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
5437 if (TARGET_SHCOMPACT
5438 && (current_function_args_info.call_cookie & ~ CALL_COOKIE_RET_TRAMP(1)))
5440 int reg;
5442 /* First, make all registers with incoming arguments that will
5443 be pushed onto the stack live, so that register renaming
5444 doesn't overwrite them. */
5445 for (reg = 0; reg < NPARM_REGS (SImode); reg++)
5446 if (CALL_COOKIE_STACKSEQ_GET (current_function_args_info.call_cookie)
5447 >= NPARM_REGS (SImode) - reg)
5448 for (; reg < NPARM_REGS (SImode); reg++)
5449 emit_insn (gen_shcompact_preserve_incoming_args
5450 (gen_rtx_REG (SImode, FIRST_PARM_REG + reg)));
5451 else if (CALL_COOKIE_INT_REG_GET
5452 (current_function_args_info.call_cookie, reg) == 1)
5453 emit_insn (gen_shcompact_preserve_incoming_args
5454 (gen_rtx_REG (SImode, FIRST_PARM_REG + reg)));
5456 emit_move_insn (gen_rtx_REG (Pmode, MACL_REG),
5457 stack_pointer_rtx);
5458 emit_move_insn (gen_rtx_REG (SImode, R0_REG),
5459 GEN_INT (current_function_args_info.call_cookie));
5460 emit_move_insn (gen_rtx_REG (SImode, MACH_REG),
5461 gen_rtx_REG (SImode, R0_REG));
5463 else if (TARGET_SHMEDIA)
5465 int tr = sh_media_register_for_return ();
5467 if (tr >= 0)
5469 rtx insn = emit_move_insn (gen_rtx_REG (DImode, tr),
5470 gen_rtx_REG (DImode, PR_MEDIA_REG));
5472 /* ??? We should suppress saving pr when we don't need it, but this
5473 is tricky because of builtin_return_address. */
5475 /* If this function only exits with sibcalls, this copy
5476 will be flagged as dead. */
5477 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD,
5478 const0_rtx,
5479 REG_NOTES (insn));
5483 /* Emit the code for SETUP_VARARGS. */
5484 if (current_function_stdarg)
5486 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl))
5488 /* Push arg regs as if they'd been provided by caller in stack. */
5489 for (i = 0; i < NPARM_REGS(SImode); i++)
5491 int rn = NPARM_REGS(SImode) + FIRST_PARM_REG - i - 1;
5492 rtx insn;
5494 if (i >= (NPARM_REGS(SImode)
5495 - current_function_args_info.arg_count[(int) SH_ARG_INT]
5497 break;
5498 insn = push (rn);
5499 RTX_FRAME_RELATED_P (insn) = 0;
5504 /* If we're supposed to switch stacks at function entry, do so now. */
5505 if (sp_switch)
5506 emit_insn (gen_sp_switch_1 ());
5508 d = calc_live_regs (&live_regs_mask);
5509 /* ??? Maybe we could save some switching if we can move a mode switch
5510 that already happens to be at the function start into the prologue. */
5511 if (target_flags != save_flags && ! current_function_interrupt)
5512 emit_insn (gen_toggle_sz ());
5514 if (TARGET_SH5)
5516 int offset_base, offset;
5517 rtx r0 = NULL_RTX;
5518 int offset_in_r0 = -1;
5519 int sp_in_r0 = 0;
5520 int tregs_space = shmedia_target_regs_stack_adjust (&live_regs_mask);
5521 int total_size, save_size;
5522 save_schedule schedule;
5523 save_entry *entry;
5524 int *tmp_pnt;
5526 if (call_really_used_regs[R0_REG] && ! fixed_regs[R0_REG]
5527 && ! current_function_interrupt)
5528 r0 = gen_rtx_REG (Pmode, R0_REG);
5530 /* D is the actual number of bytes that we need for saving registers,
5531 however, in initial_elimination_offset we have committed to using
5532 an additional TREGS_SPACE amount of bytes - in order to keep both
5533 addresses to arguments supplied by the caller and local variables
5534 valid, we must keep this gap. Place it between the incoming
5535 arguments and the actually saved registers in a bid to optimize
5536 locality of reference. */
5537 total_size = d + tregs_space;
5538 total_size += rounded_frame_size (total_size);
5539 save_size = total_size - rounded_frame_size (d);
5540 if (save_size % (STACK_BOUNDARY / BITS_PER_UNIT))
5541 d_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
5542 - save_size % (STACK_BOUNDARY / BITS_PER_UNIT));
5544 /* If adjusting the stack in a single step costs nothing extra, do so.
5545 I.e. either if a single addi is enough, or we need a movi anyway,
5546 and we don't exceed the maximum offset range (the test for the
5547 latter is conservative for simplicity). */
5548 if (TARGET_SHMEDIA
5549 && (CONST_OK_FOR_I10 (-total_size)
5550 || (! CONST_OK_FOR_I10 (-(save_size + d_rounding))
5551 && total_size <= 2044)))
5552 d_rounding = total_size - save_size;
5554 offset_base = d + d_rounding;
5556 output_stack_adjust (-(save_size + d_rounding), stack_pointer_rtx,
5557 0, NULL);
5559 sh5_schedule_saves (&live_regs_mask, &schedule, offset_base);
5560 tmp_pnt = schedule.temps;
5561 for (entry = &schedule.entries[1]; entry->mode != VOIDmode; entry++)
5563 enum machine_mode mode = entry->mode;
5564 int reg = entry->reg;
5565 rtx reg_rtx, mem_rtx, pre_dec = NULL_RTX;
5566 rtx orig_reg_rtx;
5568 offset = entry->offset;
5570 reg_rtx = gen_rtx_REG (mode, reg);
5572 mem_rtx = gen_rtx_MEM (mode,
5573 gen_rtx_PLUS (Pmode,
5574 stack_pointer_rtx,
5575 GEN_INT (offset)));
5577 GO_IF_LEGITIMATE_ADDRESS (mode, XEXP (mem_rtx, 0), try_pre_dec);
5579 if (! r0)
5580 abort ();
5581 mem_rtx = NULL_RTX;
5583 try_pre_dec:
5585 if (HAVE_PRE_DECREMENT
5586 && (offset_in_r0 - offset == GET_MODE_SIZE (mode)
5587 || mem_rtx == NULL_RTX
5588 || reg == PR_REG || SPECIAL_REGISTER_P (reg)))
5590 pre_dec = gen_rtx_MEM (mode,
5591 gen_rtx_PRE_DEC (Pmode, r0));
5593 GO_IF_LEGITIMATE_ADDRESS (mode, XEXP (pre_dec, 0),
5594 pre_dec_ok);
5596 pre_dec = NULL_RTX;
5598 break;
5600 pre_dec_ok:
5601 mem_rtx = NULL_RTX;
5602 offset += GET_MODE_SIZE (mode);
5604 while (0);
5606 if (mem_rtx != NULL_RTX)
5607 goto addr_ok;
5609 if (offset_in_r0 == -1)
5611 emit_move_insn (r0, GEN_INT (offset));
5612 offset_in_r0 = offset;
5614 else if (offset != offset_in_r0)
5616 emit_move_insn (r0,
5617 gen_rtx_PLUS
5618 (Pmode, r0,
5619 GEN_INT (offset - offset_in_r0)));
5620 offset_in_r0 += offset - offset_in_r0;
5623 if (pre_dec != NULL_RTX)
5625 if (! sp_in_r0)
5627 emit_move_insn (r0,
5628 gen_rtx_PLUS
5629 (Pmode, r0, stack_pointer_rtx));
5630 sp_in_r0 = 1;
5633 offset -= GET_MODE_SIZE (mode);
5634 offset_in_r0 -= GET_MODE_SIZE (mode);
5636 mem_rtx = pre_dec;
5638 else if (sp_in_r0)
5639 mem_rtx = gen_rtx_MEM (mode, r0);
5640 else
5641 mem_rtx = gen_rtx_MEM (mode,
5642 gen_rtx_PLUS (Pmode,
5643 stack_pointer_rtx,
5644 r0));
5646 /* We must not use an r0-based address for target-branch
5647 registers or for special registers without pre-dec
5648 memory addresses, since we store their values in r0
5649 first. */
5650 if (TARGET_REGISTER_P (reg)
5651 || ((reg == PR_REG || SPECIAL_REGISTER_P (reg))
5652 && mem_rtx != pre_dec))
5653 abort ();
5655 addr_ok:
5656 orig_reg_rtx = reg_rtx;
5657 if (TARGET_REGISTER_P (reg)
5658 || ((reg == PR_REG || SPECIAL_REGISTER_P (reg))
5659 && mem_rtx != pre_dec))
5661 rtx tmp_reg = gen_rtx_REG (GET_MODE (reg_rtx), *tmp_pnt);
5663 emit_move_insn (tmp_reg, reg_rtx);
5665 if (REGNO (tmp_reg) == R0_REG)
5667 offset_in_r0 = -1;
5668 sp_in_r0 = 0;
5669 if (refers_to_regno_p (R0_REG, R0_REG+1, mem_rtx, (rtx *) 0))
5670 abort ();
5673 if (*++tmp_pnt <= 0)
5674 tmp_pnt = schedule.temps;
5676 reg_rtx = tmp_reg;
5679 rtx insn;
5681 /* Mark as interesting for dwarf cfi generator */
5682 insn = emit_move_insn (mem_rtx, reg_rtx);
5683 RTX_FRAME_RELATED_P (insn) = 1;
5684 /* If we use an intermediate register for the save, we can't
5685 describe this exactly in cfi as a copy of the to-be-saved
5686 register into the temporary register and then the temporary
5687 register on the stack, because the temporary register can
5688 have a different natural size than the to-be-saved register.
5689 Thus, we gloss over the intermediate copy and pretend we do
5690 a direct save from the to-be-saved register. */
5691 if (REGNO (reg_rtx) != reg)
5693 rtx set, note_rtx;
5695 set = gen_rtx_SET (VOIDmode, mem_rtx, orig_reg_rtx);
5696 note_rtx = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, set,
5697 REG_NOTES (insn));
5698 REG_NOTES (insn) = note_rtx;
5701 if (TARGET_SHCOMPACT && (offset_in_r0 != -1))
5703 rtx reg_rtx = gen_rtx_REG (mode, reg);
5704 rtx set, note_rtx;
5705 rtx mem_rtx = gen_rtx_MEM (mode,
5706 gen_rtx_PLUS (Pmode,
5707 stack_pointer_rtx,
5708 GEN_INT (offset)));
5710 set = gen_rtx_SET (VOIDmode, mem_rtx, reg_rtx);
5711 note_rtx = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, set,
5712 REG_NOTES (insn));
5713 REG_NOTES (insn) = note_rtx;
5718 if (entry->offset != d_rounding)
5719 abort ();
5721 else
5722 push_regs (&live_regs_mask, current_function_interrupt);
5724 if (flag_pic && regs_ever_live[PIC_OFFSET_TABLE_REGNUM])
5726 rtx insn = get_last_insn ();
5727 rtx last = emit_insn (gen_GOTaddr2picreg ());
5729 /* Mark these insns as possibly dead. Sometimes, flow2 may
5730 delete all uses of the PIC register. In this case, let it
5731 delete the initialization too. */
5734 insn = NEXT_INSN (insn);
5736 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD,
5737 const0_rtx,
5738 REG_NOTES (insn));
5740 while (insn != last);
5743 if (SHMEDIA_REGS_STACK_ADJUST ())
5745 emit_move_insn (gen_rtx_REG (Pmode, R0_REG),
5746 function_symbol (TARGET_FPU_ANY
5747 ? "__GCC_push_shmedia_regs"
5748 : "__GCC_push_shmedia_regs_nofpu"));
5749 /* This must NOT go through the PLT, otherwise mach and macl
5750 may be clobbered. */
5751 emit_insn (gen_shmedia_save_restore_regs_compact
5752 (GEN_INT (-SHMEDIA_REGS_STACK_ADJUST ())));
5755 if (target_flags != save_flags && ! current_function_interrupt)
5757 rtx insn = emit_insn (gen_toggle_sz ());
5759 /* If we're lucky, a mode switch in the function body will
5760 overwrite fpscr, turning this insn dead. Tell flow this
5761 insn is ok to delete. */
5762 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD,
5763 const0_rtx,
5764 REG_NOTES (insn));
5767 target_flags = save_flags;
5769 output_stack_adjust (-rounded_frame_size (d) + d_rounding,
5770 stack_pointer_rtx, 0, NULL);
5772 if (frame_pointer_needed)
5773 frame_insn (GEN_MOV (frame_pointer_rtx, stack_pointer_rtx));
5775 if (TARGET_SHCOMPACT
5776 && (current_function_args_info.call_cookie & ~ CALL_COOKIE_RET_TRAMP(1)))
5778 /* This must NOT go through the PLT, otherwise mach and macl
5779 may be clobbered. */
5780 emit_move_insn (gen_rtx_REG (Pmode, R0_REG),
5781 function_symbol ("__GCC_shcompact_incoming_args"));
5782 emit_insn (gen_shcompact_incoming_args ());
5786 void
5787 sh_expand_epilogue (bool sibcall_p)
5789 HARD_REG_SET live_regs_mask;
5790 int d, i;
5791 int d_rounding = 0;
5793 int save_flags = target_flags;
5794 int frame_size, save_size;
5795 int fpscr_deferred = 0;
5796 int e = sibcall_p ? -1 : 1;
5798 d = calc_live_regs (&live_regs_mask);
5800 save_size = d;
5801 frame_size = rounded_frame_size (d);
5803 if (TARGET_SH5)
5805 int tregs_space = shmedia_target_regs_stack_adjust (&live_regs_mask);
5806 int total_size;
5807 if (d % (STACK_BOUNDARY / BITS_PER_UNIT))
5808 d_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
5809 - d % (STACK_BOUNDARY / BITS_PER_UNIT));
5811 total_size = d + tregs_space;
5812 total_size += rounded_frame_size (total_size);
5813 save_size = total_size - frame_size;
5815 /* If adjusting the stack in a single step costs nothing extra, do so.
5816 I.e. either if a single addi is enough, or we need a movi anyway,
5817 and we don't exceed the maximum offset range (the test for the
5818 latter is conservative for simplicity). */
5819 if (TARGET_SHMEDIA
5820 && ! frame_pointer_needed
5821 && (CONST_OK_FOR_I10 (total_size)
5822 || (! CONST_OK_FOR_I10 (save_size + d_rounding)
5823 && total_size <= 2044)))
5824 d_rounding = frame_size;
5826 frame_size -= d_rounding;
5829 if (frame_pointer_needed)
5831 output_stack_adjust (frame_size, frame_pointer_rtx, e, &live_regs_mask);
5833 /* We must avoid moving the stack pointer adjustment past code
5834 which reads from the local frame, else an interrupt could
5835 occur after the SP adjustment and clobber data in the local
5836 frame. */
5837 emit_insn (gen_blockage ());
5838 emit_insn (GEN_MOV (stack_pointer_rtx, frame_pointer_rtx));
5840 else if (frame_size)
5842 /* We must avoid moving the stack pointer adjustment past code
5843 which reads from the local frame, else an interrupt could
5844 occur after the SP adjustment and clobber data in the local
5845 frame. */
5846 emit_insn (gen_blockage ());
5847 output_stack_adjust (frame_size, stack_pointer_rtx, e, &live_regs_mask);
5850 if (SHMEDIA_REGS_STACK_ADJUST ())
5852 emit_move_insn (gen_rtx_REG (Pmode, R0_REG),
5853 function_symbol (TARGET_FPU_ANY
5854 ? "__GCC_pop_shmedia_regs"
5855 : "__GCC_pop_shmedia_regs_nofpu"));
5856 /* This must NOT go through the PLT, otherwise mach and macl
5857 may be clobbered. */
5858 emit_insn (gen_shmedia_save_restore_regs_compact
5859 (GEN_INT (SHMEDIA_REGS_STACK_ADJUST ())));
5862 /* Pop all the registers. */
5864 if (target_flags != save_flags && ! current_function_interrupt)
5865 emit_insn (gen_toggle_sz ());
5866 if (TARGET_SH5)
5868 int offset_base, offset;
5869 int offset_in_r0 = -1;
5870 int sp_in_r0 = 0;
5871 rtx r0 = gen_rtx_REG (Pmode, R0_REG);
5872 save_schedule schedule;
5873 save_entry *entry;
5874 int *tmp_pnt;
5876 entry = sh5_schedule_saves (&live_regs_mask, &schedule, d_rounding);
5877 offset_base = -entry[1].offset + d_rounding;
5878 tmp_pnt = schedule.temps;
5879 for (; entry->mode != VOIDmode; entry--)
5881 enum machine_mode mode = entry->mode;
5882 int reg = entry->reg;
5883 rtx reg_rtx, mem_rtx, post_inc = NULL_RTX, insn;
5885 offset = offset_base + entry->offset;
5886 reg_rtx = gen_rtx_REG (mode, reg);
5888 mem_rtx = gen_rtx_MEM (mode,
5889 gen_rtx_PLUS (Pmode,
5890 stack_pointer_rtx,
5891 GEN_INT (offset)));
5893 GO_IF_LEGITIMATE_ADDRESS (mode, XEXP (mem_rtx, 0), try_post_inc);
5895 mem_rtx = NULL_RTX;
5897 try_post_inc:
5899 if (HAVE_POST_INCREMENT
5900 && (offset == offset_in_r0
5901 || (offset + GET_MODE_SIZE (mode) != d + d_rounding
5902 && mem_rtx == NULL_RTX)
5903 || reg == PR_REG || SPECIAL_REGISTER_P (reg)))
5905 post_inc = gen_rtx_MEM (mode,
5906 gen_rtx_POST_INC (Pmode, r0));
5908 GO_IF_LEGITIMATE_ADDRESS (mode, XEXP (post_inc, 0),
5909 post_inc_ok);
5911 post_inc = NULL_RTX;
5913 break;
5915 post_inc_ok:
5916 mem_rtx = NULL_RTX;
5918 while (0);
5920 if (mem_rtx != NULL_RTX)
5921 goto addr_ok;
5923 if (offset_in_r0 == -1)
5925 emit_move_insn (r0, GEN_INT (offset));
5926 offset_in_r0 = offset;
5928 else if (offset != offset_in_r0)
5930 emit_move_insn (r0,
5931 gen_rtx_PLUS
5932 (Pmode, r0,
5933 GEN_INT (offset - offset_in_r0)));
5934 offset_in_r0 += offset - offset_in_r0;
5937 if (post_inc != NULL_RTX)
5939 if (! sp_in_r0)
5941 emit_move_insn (r0,
5942 gen_rtx_PLUS
5943 (Pmode, r0, stack_pointer_rtx));
5944 sp_in_r0 = 1;
5947 mem_rtx = post_inc;
5949 offset_in_r0 += GET_MODE_SIZE (mode);
5951 else if (sp_in_r0)
5952 mem_rtx = gen_rtx_MEM (mode, r0);
5953 else
5954 mem_rtx = gen_rtx_MEM (mode,
5955 gen_rtx_PLUS (Pmode,
5956 stack_pointer_rtx,
5957 r0));
5959 if ((reg == PR_REG || SPECIAL_REGISTER_P (reg))
5960 && mem_rtx != post_inc)
5961 abort ();
5963 addr_ok:
5964 if ((reg == PR_REG || SPECIAL_REGISTER_P (reg))
5965 && mem_rtx != post_inc)
5967 insn = emit_move_insn (r0, mem_rtx);
5968 mem_rtx = r0;
5970 else if (TARGET_REGISTER_P (reg))
5972 rtx tmp_reg = gen_rtx_REG (mode, *tmp_pnt);
5974 /* Give the scheduler a bit of freedom by using up to
5975 MAX_TEMPS registers in a round-robin fashion. */
5976 insn = emit_move_insn (tmp_reg, mem_rtx);
5977 mem_rtx = tmp_reg;
5978 if (*++tmp_pnt < 0)
5979 tmp_pnt = schedule.temps;
5982 insn = emit_move_insn (reg_rtx, mem_rtx);
5983 if (reg == PR_MEDIA_REG && sh_media_register_for_return () >= 0)
5984 /* This is dead, unless we return with a sibcall. */
5985 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD,
5986 const0_rtx,
5987 REG_NOTES (insn));
5990 if (entry->offset + offset_base != d + d_rounding)
5991 abort ();
5993 else /* ! TARGET_SH5 */
5995 save_size = 0;
5996 if (TEST_HARD_REG_BIT (live_regs_mask, PR_REG))
5997 pop (PR_REG);
5998 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
6000 int j = (FIRST_PSEUDO_REGISTER - 1) - i;
6002 if (j == FPSCR_REG && current_function_interrupt && TARGET_FMOVD
6003 && hard_regs_intersect_p (&live_regs_mask,
6004 &reg_class_contents[DF_REGS]))
6005 fpscr_deferred = 1;
6006 else if (j != PR_REG && TEST_HARD_REG_BIT (live_regs_mask, j))
6007 pop (j);
6008 if (j == FIRST_FP_REG && fpscr_deferred)
6009 pop (FPSCR_REG);
6013 if (target_flags != save_flags && ! current_function_interrupt)
6014 emit_insn (gen_toggle_sz ());
6015 target_flags = save_flags;
6017 output_stack_adjust (current_function_pretend_args_size
6018 + save_size + d_rounding
6019 + current_function_args_info.stack_regs * 8,
6020 stack_pointer_rtx, e, NULL);
6022 if (current_function_calls_eh_return)
6023 emit_insn (GEN_ADD3 (stack_pointer_rtx, stack_pointer_rtx,
6024 EH_RETURN_STACKADJ_RTX));
6026 /* Switch back to the normal stack if necessary. */
6027 if (sp_switch)
6028 emit_insn (gen_sp_switch_2 ());
6030 /* Tell flow the insn that pops PR isn't dead. */
6031 /* PR_REG will never be live in SHmedia mode, and we don't need to
6032 USE PR_MEDIA_REG, since it will be explicitly copied to TR0_REG
6033 by the return pattern. */
6034 if (TEST_HARD_REG_BIT (live_regs_mask, PR_REG))
6035 emit_insn (gen_rtx_USE (VOIDmode, gen_rtx_REG (SImode, PR_REG)));
6038 static int sh_need_epilogue_known = 0;
6041 sh_need_epilogue (void)
6043 if (! sh_need_epilogue_known)
6045 rtx epilogue;
6047 start_sequence ();
6048 sh_expand_epilogue (0);
6049 epilogue = get_insns ();
6050 end_sequence ();
6051 sh_need_epilogue_known = (epilogue == NULL ? -1 : 1);
6053 return sh_need_epilogue_known > 0;
6056 /* Emit code to change the current function's return address to RA.
6057 TEMP is available as a scratch register, if needed. */
6059 void
6060 sh_set_return_address (rtx ra, rtx tmp)
6062 HARD_REG_SET live_regs_mask;
6063 int d;
6064 int pr_reg = TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG;
6065 int pr_offset;
6067 d = calc_live_regs (&live_regs_mask);
6069 /* If pr_reg isn't life, we can set it (or the register given in
6070 sh_media_register_for_return) directly. */
6071 if (! TEST_HARD_REG_BIT (live_regs_mask, pr_reg))
6073 rtx rr;
6075 if (TARGET_SHMEDIA)
6077 int rr_regno = sh_media_register_for_return ();
6079 if (rr_regno < 0)
6080 rr_regno = pr_reg;
6082 rr = gen_rtx_REG (DImode, rr_regno);
6084 else
6085 rr = gen_rtx_REG (SImode, pr_reg);
6087 emit_insn (GEN_MOV (rr, ra));
6088 /* Tell flow the register for return isn't dead. */
6089 emit_insn (gen_rtx_USE (VOIDmode, rr));
6090 return;
6093 if (TARGET_SH5)
6095 int offset;
6096 save_schedule schedule;
6097 save_entry *entry;
6099 entry = sh5_schedule_saves (&live_regs_mask, &schedule, 0);
6100 offset = entry[1].offset;
6101 for (; entry->mode != VOIDmode; entry--)
6102 if (entry->reg == pr_reg)
6103 goto found;
6105 /* We can't find pr register. */
6106 abort ();
6108 found:
6109 offset = entry->offset - offset;
6110 pr_offset = (rounded_frame_size (d) + offset
6111 + SHMEDIA_REGS_STACK_ADJUST ());
6113 else
6114 pr_offset = rounded_frame_size (d);
6116 emit_insn (GEN_MOV (tmp, GEN_INT (pr_offset)));
6117 emit_insn (GEN_ADD3 (tmp, tmp, frame_pointer_rtx));
6119 tmp = gen_rtx_MEM (Pmode, tmp);
6120 emit_insn (GEN_MOV (tmp, ra));
6123 /* Clear variables at function end. */
6125 static void
6126 sh_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
6127 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
6129 trap_exit = pragma_interrupt = pragma_trapa = pragma_nosave_low_regs = 0;
6130 sh_need_epilogue_known = 0;
6131 sp_switch = NULL_RTX;
6134 static rtx
6135 sh_builtin_saveregs (void)
6137 /* First unnamed integer register. */
6138 int first_intreg = current_function_args_info.arg_count[(int) SH_ARG_INT];
6139 /* Number of integer registers we need to save. */
6140 int n_intregs = MAX (0, NPARM_REGS (SImode) - first_intreg);
6141 /* First unnamed SFmode float reg */
6142 int first_floatreg = current_function_args_info.arg_count[(int) SH_ARG_FLOAT];
6143 /* Number of SFmode float regs to save. */
6144 int n_floatregs = MAX (0, NPARM_REGS (SFmode) - first_floatreg);
6145 rtx regbuf, fpregs;
6146 int bufsize, regno;
6147 HOST_WIDE_INT alias_set;
6149 if (TARGET_SH5)
6151 if (n_intregs)
6153 int pushregs = n_intregs;
6155 while (pushregs < NPARM_REGS (SImode) - 1
6156 && (CALL_COOKIE_INT_REG_GET
6157 (current_function_args_info.call_cookie,
6158 NPARM_REGS (SImode) - pushregs)
6159 == 1))
6161 current_function_args_info.call_cookie
6162 &= ~ CALL_COOKIE_INT_REG (NPARM_REGS (SImode)
6163 - pushregs, 1);
6164 pushregs++;
6167 if (pushregs == NPARM_REGS (SImode))
6168 current_function_args_info.call_cookie
6169 |= (CALL_COOKIE_INT_REG (0, 1)
6170 | CALL_COOKIE_STACKSEQ (pushregs - 1));
6171 else
6172 current_function_args_info.call_cookie
6173 |= CALL_COOKIE_STACKSEQ (pushregs);
6175 current_function_pretend_args_size += 8 * n_intregs;
6177 if (TARGET_SHCOMPACT)
6178 return const0_rtx;
6181 if (! TARGET_SH2E && ! TARGET_SH4 && ! TARGET_SH5)
6183 error ("__builtin_saveregs not supported by this subtarget");
6184 return const0_rtx;
6187 if (TARGET_SHMEDIA)
6188 n_floatregs = 0;
6190 /* Allocate block of memory for the regs. */
6191 /* ??? If n_intregs + n_floatregs == 0, should we allocate at least 1 byte?
6192 Or can assign_stack_local accept a 0 SIZE argument? */
6193 bufsize = (n_intregs * UNITS_PER_WORD) + (n_floatregs * UNITS_PER_WORD);
6195 if (TARGET_SHMEDIA)
6196 regbuf = gen_rtx_MEM (BLKmode,
6197 gen_rtx_REG (Pmode, ARG_POINTER_REGNUM));
6198 else if (n_floatregs & 1)
6200 rtx addr;
6202 regbuf = assign_stack_local (BLKmode, bufsize + UNITS_PER_WORD, 0);
6203 addr = copy_to_mode_reg (Pmode, XEXP (regbuf, 0));
6204 emit_insn (gen_iorsi3 (addr, addr, GEN_INT (UNITS_PER_WORD)));
6205 regbuf = change_address (regbuf, BLKmode, addr);
6207 else
6208 regbuf = assign_stack_local (BLKmode, bufsize, 0);
6209 alias_set = get_varargs_alias_set ();
6210 set_mem_alias_set (regbuf, alias_set);
6212 /* Save int args.
6213 This is optimized to only save the regs that are necessary. Explicitly
6214 named args need not be saved. */
6215 if (n_intregs > 0)
6216 move_block_from_reg (BASE_ARG_REG (SImode) + first_intreg,
6217 adjust_address (regbuf, BLKmode,
6218 n_floatregs * UNITS_PER_WORD),
6219 n_intregs);
6221 if (TARGET_SHMEDIA)
6222 /* Return the address of the regbuf. */
6223 return XEXP (regbuf, 0);
6225 /* Save float args.
6226 This is optimized to only save the regs that are necessary. Explicitly
6227 named args need not be saved.
6228 We explicitly build a pointer to the buffer because it halves the insn
6229 count when not optimizing (otherwise the pointer is built for each reg
6230 saved).
6231 We emit the moves in reverse order so that we can use predecrement. */
6233 fpregs = gen_reg_rtx (Pmode);
6234 emit_move_insn (fpregs, XEXP (regbuf, 0));
6235 emit_insn (gen_addsi3 (fpregs, fpregs,
6236 GEN_INT (n_floatregs * UNITS_PER_WORD)));
6237 if (TARGET_SH4 || TARGET_SH2A_DOUBLE)
6239 rtx mem;
6240 for (regno = NPARM_REGS (DFmode) - 2; regno >= first_floatreg; regno -= 2)
6242 emit_insn (gen_addsi3 (fpregs, fpregs,
6243 GEN_INT (-2 * UNITS_PER_WORD)));
6244 mem = gen_rtx_MEM (DFmode, fpregs);
6245 set_mem_alias_set (mem, alias_set);
6246 emit_move_insn (mem,
6247 gen_rtx_REG (DFmode, BASE_ARG_REG (DFmode) + regno));
6249 regno = first_floatreg;
6250 if (regno & 1)
6252 emit_insn (gen_addsi3 (fpregs, fpregs, GEN_INT (-UNITS_PER_WORD)));
6253 mem = gen_rtx_MEM (SFmode, fpregs);
6254 set_mem_alias_set (mem, alias_set);
6255 emit_move_insn (mem,
6256 gen_rtx_REG (SFmode, BASE_ARG_REG (SFmode) + regno
6257 - (TARGET_LITTLE_ENDIAN != 0)));
6260 else
6261 for (regno = NPARM_REGS (SFmode) - 1; regno >= first_floatreg; regno--)
6263 rtx mem;
6265 emit_insn (gen_addsi3 (fpregs, fpregs, GEN_INT (-UNITS_PER_WORD)));
6266 mem = gen_rtx_MEM (SFmode, fpregs);
6267 set_mem_alias_set (mem, alias_set);
6268 emit_move_insn (mem,
6269 gen_rtx_REG (SFmode, BASE_ARG_REG (SFmode) + regno));
6272 /* Return the address of the regbuf. */
6273 return XEXP (regbuf, 0);
6276 /* Define the `__builtin_va_list' type for the ABI. */
6278 static tree
6279 sh_build_builtin_va_list (void)
6281 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
6282 tree record;
6284 if (TARGET_SH5 || (! TARGET_SH2E && ! TARGET_SH4)
6285 || TARGET_HITACHI || sh_cfun_attr_renesas_p ())
6286 return ptr_type_node;
6288 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
6290 f_next_o = build_decl (FIELD_DECL, get_identifier ("__va_next_o"),
6291 ptr_type_node);
6292 f_next_o_limit = build_decl (FIELD_DECL,
6293 get_identifier ("__va_next_o_limit"),
6294 ptr_type_node);
6295 f_next_fp = build_decl (FIELD_DECL, get_identifier ("__va_next_fp"),
6296 ptr_type_node);
6297 f_next_fp_limit = build_decl (FIELD_DECL,
6298 get_identifier ("__va_next_fp_limit"),
6299 ptr_type_node);
6300 f_next_stack = build_decl (FIELD_DECL, get_identifier ("__va_next_stack"),
6301 ptr_type_node);
6303 DECL_FIELD_CONTEXT (f_next_o) = record;
6304 DECL_FIELD_CONTEXT (f_next_o_limit) = record;
6305 DECL_FIELD_CONTEXT (f_next_fp) = record;
6306 DECL_FIELD_CONTEXT (f_next_fp_limit) = record;
6307 DECL_FIELD_CONTEXT (f_next_stack) = record;
6309 TYPE_FIELDS (record) = f_next_o;
6310 TREE_CHAIN (f_next_o) = f_next_o_limit;
6311 TREE_CHAIN (f_next_o_limit) = f_next_fp;
6312 TREE_CHAIN (f_next_fp) = f_next_fp_limit;
6313 TREE_CHAIN (f_next_fp_limit) = f_next_stack;
6315 layout_type (record);
6317 return record;
6320 /* Implement `va_start' for varargs and stdarg. */
6322 void
6323 sh_va_start (tree valist, rtx nextarg)
6325 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
6326 tree next_o, next_o_limit, next_fp, next_fp_limit, next_stack;
6327 tree t, u;
6328 int nfp, nint;
6330 if (TARGET_SH5)
6332 expand_builtin_saveregs ();
6333 std_expand_builtin_va_start (valist, nextarg);
6334 return;
6337 if ((! TARGET_SH2E && ! TARGET_SH4)
6338 || TARGET_HITACHI || sh_cfun_attr_renesas_p ())
6340 std_expand_builtin_va_start (valist, nextarg);
6341 return;
6344 f_next_o = TYPE_FIELDS (va_list_type_node);
6345 f_next_o_limit = TREE_CHAIN (f_next_o);
6346 f_next_fp = TREE_CHAIN (f_next_o_limit);
6347 f_next_fp_limit = TREE_CHAIN (f_next_fp);
6348 f_next_stack = TREE_CHAIN (f_next_fp_limit);
6350 next_o = build (COMPONENT_REF, TREE_TYPE (f_next_o), valist, f_next_o,
6351 NULL_TREE);
6352 next_o_limit = build (COMPONENT_REF, TREE_TYPE (f_next_o_limit),
6353 valist, f_next_o_limit, NULL_TREE);
6354 next_fp = build (COMPONENT_REF, TREE_TYPE (f_next_fp), valist, f_next_fp,
6355 NULL_TREE);
6356 next_fp_limit = build (COMPONENT_REF, TREE_TYPE (f_next_fp_limit),
6357 valist, f_next_fp_limit, NULL_TREE);
6358 next_stack = build (COMPONENT_REF, TREE_TYPE (f_next_stack),
6359 valist, f_next_stack, NULL_TREE);
6361 /* Call __builtin_saveregs. */
6362 u = make_tree (ptr_type_node, expand_builtin_saveregs ());
6363 t = build (MODIFY_EXPR, ptr_type_node, next_fp, u);
6364 TREE_SIDE_EFFECTS (t) = 1;
6365 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6367 nfp = current_function_args_info.arg_count[SH_ARG_FLOAT];
6368 if (nfp < 8)
6369 nfp = 8 - nfp;
6370 else
6371 nfp = 0;
6372 u = fold (build (PLUS_EXPR, ptr_type_node, u,
6373 build_int_cst (NULL_TREE, UNITS_PER_WORD * nfp)));
6374 t = build (MODIFY_EXPR, ptr_type_node, next_fp_limit, u);
6375 TREE_SIDE_EFFECTS (t) = 1;
6376 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6378 t = build (MODIFY_EXPR, ptr_type_node, next_o, u);
6379 TREE_SIDE_EFFECTS (t) = 1;
6380 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6382 nint = current_function_args_info.arg_count[SH_ARG_INT];
6383 if (nint < 4)
6384 nint = 4 - nint;
6385 else
6386 nint = 0;
6387 u = fold (build (PLUS_EXPR, ptr_type_node, u,
6388 build_int_cst (NULL_TREE, UNITS_PER_WORD * nint)));
6389 t = build (MODIFY_EXPR, ptr_type_node, next_o_limit, u);
6390 TREE_SIDE_EFFECTS (t) = 1;
6391 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6393 u = make_tree (ptr_type_node, nextarg);
6394 t = build (MODIFY_EXPR, ptr_type_node, next_stack, u);
6395 TREE_SIDE_EFFECTS (t) = 1;
6396 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6399 /* Implement `va_arg'. */
6401 static tree
6402 sh_gimplify_va_arg_expr (tree valist, tree type, tree *pre_p,
6403 tree *post_p ATTRIBUTE_UNUSED)
6405 HOST_WIDE_INT size, rsize;
6406 tree tmp, pptr_type_node;
6407 tree addr, lab_over, result = NULL;
6408 int pass_by_ref = targetm.calls.must_pass_in_stack (TYPE_MODE (type), type);
6410 if (pass_by_ref)
6411 type = build_pointer_type (type);
6413 size = int_size_in_bytes (type);
6414 rsize = (size + UNITS_PER_WORD - 1) & -UNITS_PER_WORD;
6415 pptr_type_node = build_pointer_type (ptr_type_node);
6417 if (! TARGET_SH5 && (TARGET_SH2E || TARGET_SH4)
6418 && ! (TARGET_HITACHI || sh_cfun_attr_renesas_p ()))
6420 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
6421 tree next_o, next_o_limit, next_fp, next_fp_limit, next_stack;
6422 int pass_as_float;
6423 tree lab_false;
6425 f_next_o = TYPE_FIELDS (va_list_type_node);
6426 f_next_o_limit = TREE_CHAIN (f_next_o);
6427 f_next_fp = TREE_CHAIN (f_next_o_limit);
6428 f_next_fp_limit = TREE_CHAIN (f_next_fp);
6429 f_next_stack = TREE_CHAIN (f_next_fp_limit);
6431 next_o = build (COMPONENT_REF, TREE_TYPE (f_next_o), valist, f_next_o,
6432 NULL_TREE);
6433 next_o_limit = build (COMPONENT_REF, TREE_TYPE (f_next_o_limit),
6434 valist, f_next_o_limit, NULL_TREE);
6435 next_fp = build (COMPONENT_REF, TREE_TYPE (f_next_fp),
6436 valist, f_next_fp, NULL_TREE);
6437 next_fp_limit = build (COMPONENT_REF, TREE_TYPE (f_next_fp_limit),
6438 valist, f_next_fp_limit, NULL_TREE);
6439 next_stack = build (COMPONENT_REF, TREE_TYPE (f_next_stack),
6440 valist, f_next_stack, NULL_TREE);
6442 /* Structures with a single member with a distinct mode are passed
6443 like their member. This is relevant if the latter has a REAL_TYPE
6444 or COMPLEX_TYPE type. */
6445 if (TREE_CODE (type) == RECORD_TYPE
6446 && TYPE_FIELDS (type)
6447 && TREE_CODE (TYPE_FIELDS (type)) == FIELD_DECL
6448 && (TREE_CODE (TREE_TYPE (TYPE_FIELDS (type))) == REAL_TYPE
6449 || TREE_CODE (TREE_TYPE (TYPE_FIELDS (type))) == COMPLEX_TYPE)
6450 && TREE_CHAIN (TYPE_FIELDS (type)) == NULL_TREE)
6451 type = TREE_TYPE (TYPE_FIELDS (type));
6453 if (TARGET_SH4)
6455 pass_as_float = ((TREE_CODE (type) == REAL_TYPE && size <= 8)
6456 || (TREE_CODE (type) == COMPLEX_TYPE
6457 && TREE_CODE (TREE_TYPE (type)) == REAL_TYPE
6458 && size <= 16));
6460 else
6462 pass_as_float = (TREE_CODE (type) == REAL_TYPE && size == 4);
6465 addr = create_tmp_var (pptr_type_node, NULL);
6466 lab_false = create_artificial_label ();
6467 lab_over = create_artificial_label ();
6469 valist = build1 (INDIRECT_REF, ptr_type_node, addr);
6471 if (pass_as_float)
6473 int first_floatreg
6474 = current_function_args_info.arg_count[(int) SH_ARG_FLOAT];
6475 int n_floatregs = MAX (0, NPARM_REGS (SFmode) - first_floatreg);
6477 tmp = build (GE_EXPR, boolean_type_node, next_fp, next_fp_limit);
6478 tmp = build (COND_EXPR, void_type_node, tmp,
6479 build (GOTO_EXPR, void_type_node, lab_false),
6480 NULL);
6481 gimplify_and_add (tmp, pre_p);
6483 if (TYPE_ALIGN (type) > BITS_PER_WORD
6484 || (((TREE_CODE (type) == REAL_TYPE && size == 8) || size == 16)
6485 && (n_floatregs & 1)))
6487 tmp = fold_convert (ptr_type_node, size_int (UNITS_PER_WORD));
6488 tmp = build (BIT_AND_EXPR, ptr_type_node, next_fp, tmp);
6489 tmp = build (PLUS_EXPR, ptr_type_node, next_fp, tmp);
6490 tmp = build (MODIFY_EXPR, ptr_type_node, next_fp, tmp);
6491 gimplify_and_add (tmp, pre_p);
6494 tmp = build1 (ADDR_EXPR, pptr_type_node, next_fp);
6495 tmp = build (MODIFY_EXPR, void_type_node, addr, tmp);
6496 gimplify_and_add (tmp, pre_p);
6498 #ifdef FUNCTION_ARG_SCmode_WART
6499 if (TYPE_MODE (type) == SCmode && TARGET_SH4 && TARGET_LITTLE_ENDIAN)
6501 tree subtype = TREE_TYPE (type);
6502 tree real, imag;
6504 imag = std_gimplify_va_arg_expr (valist, subtype, pre_p, NULL);
6505 imag = get_initialized_tmp_var (imag, pre_p, NULL);
6507 real = std_gimplify_va_arg_expr (valist, subtype, pre_p, NULL);
6508 real = get_initialized_tmp_var (real, pre_p, NULL);
6510 result = build (COMPLEX_EXPR, type, real, imag);
6511 result = get_initialized_tmp_var (result, pre_p, NULL);
6513 #endif /* FUNCTION_ARG_SCmode_WART */
6515 tmp = build (GOTO_EXPR, void_type_node, lab_over);
6516 gimplify_and_add (tmp, pre_p);
6518 tmp = build (LABEL_EXPR, void_type_node, lab_false);
6519 gimplify_and_add (tmp, pre_p);
6521 tmp = build1 (ADDR_EXPR, pptr_type_node, next_stack);
6522 tmp = build (MODIFY_EXPR, void_type_node, addr, tmp);
6523 gimplify_and_add (tmp, pre_p);
6525 else
6527 tmp = fold_convert (ptr_type_node, size_int (rsize));
6528 tmp = build (PLUS_EXPR, ptr_type_node, next_o, tmp);
6529 tmp = build (GT_EXPR, boolean_type_node, tmp, next_o_limit);
6530 tmp = build (COND_EXPR, void_type_node, tmp,
6531 build (GOTO_EXPR, void_type_node, lab_false),
6532 NULL);
6533 gimplify_and_add (tmp, pre_p);
6535 tmp = build1 (ADDR_EXPR, pptr_type_node, next_o);
6536 tmp = build (MODIFY_EXPR, void_type_node, addr, tmp);
6537 gimplify_and_add (tmp, pre_p);
6539 tmp = build (GOTO_EXPR, void_type_node, lab_over);
6540 gimplify_and_add (tmp, pre_p);
6542 tmp = build (LABEL_EXPR, void_type_node, lab_false);
6543 gimplify_and_add (tmp, pre_p);
6545 if (size > 4 && ! TARGET_SH4)
6547 tmp = build (MODIFY_EXPR, ptr_type_node, next_o, next_o_limit);
6548 gimplify_and_add (tmp, pre_p);
6551 tmp = build1 (ADDR_EXPR, pptr_type_node, next_stack);
6552 tmp = build (MODIFY_EXPR, void_type_node, addr, tmp);
6553 gimplify_and_add (tmp, pre_p);
6556 if (!result)
6558 tmp = build (LABEL_EXPR, void_type_node, lab_over);
6559 gimplify_and_add (tmp, pre_p);
6563 /* ??? In va-sh.h, there had been code to make values larger than
6564 size 8 indirect. This does not match the FUNCTION_ARG macros. */
6566 tmp = std_gimplify_va_arg_expr (valist, type, pre_p, NULL);
6567 if (result)
6569 tmp = build (MODIFY_EXPR, void_type_node, result, tmp);
6570 gimplify_and_add (tmp, pre_p);
6572 tmp = build (LABEL_EXPR, void_type_node, lab_over);
6573 gimplify_and_add (tmp, pre_p);
6575 else
6576 result = tmp;
6578 if (pass_by_ref)
6579 result = build_fold_indirect_ref (result);
6581 return result;
6584 bool
6585 sh_promote_prototypes (tree type)
6587 if (TARGET_HITACHI)
6588 return 0;
6589 if (! type)
6590 return 1;
6591 return ! sh_attr_renesas_p (type);
6594 /* Whether an argument must be passed by reference. On SHcompact, we
6595 pretend arguments wider than 32-bits that would have been passed in
6596 registers are passed by reference, so that an SHmedia trampoline
6597 loads them into the full 64-bits registers. */
6599 static int
6600 shcompact_byref (CUMULATIVE_ARGS *cum, enum machine_mode mode,
6601 tree type, bool named)
6603 unsigned HOST_WIDE_INT size;
6605 if (type)
6606 size = int_size_in_bytes (type);
6607 else
6608 size = GET_MODE_SIZE (mode);
6610 if (cum->arg_count[SH_ARG_INT] < NPARM_REGS (SImode)
6611 && (!named
6612 || GET_SH_ARG_CLASS (mode) == SH_ARG_INT
6613 || (GET_SH_ARG_CLASS (mode) == SH_ARG_FLOAT
6614 && cum->arg_count[SH_ARG_FLOAT] >= NPARM_REGS (SFmode)))
6615 && size > 4
6616 && !SHCOMPACT_FORCE_ON_STACK (mode, type)
6617 && !SH5_WOULD_BE_PARTIAL_NREGS (*cum, mode, type, named))
6618 return size;
6619 else
6620 return 0;
6623 static bool
6624 sh_pass_by_reference (CUMULATIVE_ARGS *cum, enum machine_mode mode,
6625 tree type, bool named)
6627 if (targetm.calls.must_pass_in_stack (mode, type))
6628 return true;
6630 /* ??? std_gimplify_va_arg_expr passes NULL for cum. That function
6631 wants to know about pass-by-reference semantics for incoming
6632 arguments. */
6633 if (! cum)
6634 return false;
6636 if (TARGET_SHCOMPACT)
6638 cum->byref = shcompact_byref (cum, mode, type, named);
6639 return cum->byref != 0;
6642 return false;
6645 static bool
6646 sh_callee_copies (CUMULATIVE_ARGS *cum, enum machine_mode mode,
6647 tree type, bool named ATTRIBUTE_UNUSED)
6649 /* ??? How can it possibly be correct to return true only on the
6650 caller side of the equation? Is there someplace else in the
6651 sh backend that's magically producing the copies? */
6652 return (cum->outgoing
6653 && ((mode == BLKmode ? TYPE_ALIGN (type) : GET_MODE_ALIGNMENT (mode))
6654 % SH_MIN_ALIGN_FOR_CALLEE_COPY == 0));
6657 /* Define where to put the arguments to a function.
6658 Value is zero to push the argument on the stack,
6659 or a hard register in which to store the argument.
6661 MODE is the argument's machine mode.
6662 TYPE is the data type of the argument (as a tree).
6663 This is null for libcalls where that information may
6664 not be available.
6665 CUM is a variable of type CUMULATIVE_ARGS which gives info about
6666 the preceding args and about the function being called.
6667 NAMED is nonzero if this argument is a named parameter
6668 (otherwise it is an extra parameter matching an ellipsis).
6670 On SH the first args are normally in registers
6671 and the rest are pushed. Any arg that starts within the first
6672 NPARM_REGS words is at least partially passed in a register unless
6673 its data type forbids. */
6677 sh_function_arg (CUMULATIVE_ARGS *ca, enum machine_mode mode,
6678 tree type, int named)
6680 if (! TARGET_SH5 && mode == VOIDmode)
6681 return GEN_INT (ca->renesas_abi ? 1 : 0);
6683 if (! TARGET_SH5
6684 && PASS_IN_REG_P (*ca, mode, type)
6685 && (named || ! (TARGET_HITACHI || ca->renesas_abi)))
6687 int regno;
6689 if (mode == SCmode && TARGET_SH4 && TARGET_LITTLE_ENDIAN
6690 && (! FUNCTION_ARG_SCmode_WART || (ROUND_REG (*ca, mode) & 1)))
6692 rtx r1 = gen_rtx_EXPR_LIST (VOIDmode,
6693 gen_rtx_REG (SFmode,
6694 BASE_ARG_REG (mode)
6695 + (ROUND_REG (*ca, mode) ^ 1)),
6696 const0_rtx);
6697 rtx r2 = gen_rtx_EXPR_LIST (VOIDmode,
6698 gen_rtx_REG (SFmode,
6699 BASE_ARG_REG (mode)
6700 + ((ROUND_REG (*ca, mode) + 1) ^ 1)),
6701 GEN_INT (4));
6702 return gen_rtx_PARALLEL(SCmode, gen_rtvec(2, r1, r2));
6705 /* If the alignment of a DF value causes an SF register to be
6706 skipped, we will use that skipped register for the next SF
6707 value. */
6708 if ((TARGET_HITACHI || ca->renesas_abi)
6709 && ca->free_single_fp_reg
6710 && mode == SFmode)
6711 return gen_rtx_REG (mode, ca->free_single_fp_reg);
6713 regno = (BASE_ARG_REG (mode) + ROUND_REG (*ca, mode))
6714 ^ (mode == SFmode && TARGET_SH4
6715 && TARGET_LITTLE_ENDIAN != 0
6716 && ! TARGET_HITACHI && ! ca->renesas_abi);
6717 return gen_rtx_REG (mode, regno);
6721 if (TARGET_SH5)
6723 if (mode == VOIDmode && TARGET_SHCOMPACT)
6724 return GEN_INT (ca->call_cookie);
6726 /* The following test assumes unnamed arguments are promoted to
6727 DFmode. */
6728 if (mode == SFmode && ca->free_single_fp_reg)
6729 return SH5_PROTOTYPED_FLOAT_ARG (*ca, mode, ca->free_single_fp_reg);
6731 if ((GET_SH_ARG_CLASS (mode) == SH_ARG_FLOAT)
6732 && (named || ! ca->prototype_p)
6733 && ca->arg_count[(int) SH_ARG_FLOAT] < NPARM_REGS (SFmode))
6735 if (! ca->prototype_p && TARGET_SHMEDIA)
6736 return SH5_PROTOTYPELESS_FLOAT_ARG (*ca, mode);
6738 return SH5_PROTOTYPED_FLOAT_ARG (*ca, mode,
6739 FIRST_FP_PARM_REG
6740 + ca->arg_count[(int) SH_ARG_FLOAT]);
6743 if (ca->arg_count[(int) SH_ARG_INT] < NPARM_REGS (SImode)
6744 && (! TARGET_SHCOMPACT
6745 || (! SHCOMPACT_FORCE_ON_STACK (mode, type)
6746 && ! SH5_WOULD_BE_PARTIAL_NREGS (*ca, mode,
6747 type, named))))
6749 return gen_rtx_REG (mode, (FIRST_PARM_REG
6750 + ca->arg_count[(int) SH_ARG_INT]));
6753 return 0;
6756 return 0;
6759 /* Update the data in CUM to advance over an argument
6760 of mode MODE and data type TYPE.
6761 (TYPE is null for libcalls where that information may not be
6762 available.) */
6764 void
6765 sh_function_arg_advance (CUMULATIVE_ARGS *ca, enum machine_mode mode,
6766 tree type, int named)
6768 if (ca->force_mem)
6769 ca->force_mem = 0;
6770 else if (TARGET_SH5)
6772 tree type2 = (ca->byref && type
6773 ? TREE_TYPE (type)
6774 : type);
6775 enum machine_mode mode2 = (ca->byref && type
6776 ? TYPE_MODE (type2)
6777 : mode);
6778 int dwords = ((ca->byref
6779 ? ca->byref
6780 : mode2 == BLKmode
6781 ? int_size_in_bytes (type2)
6782 : GET_MODE_SIZE (mode2)) + 7) / 8;
6783 int numregs = MIN (dwords, NPARM_REGS (SImode)
6784 - ca->arg_count[(int) SH_ARG_INT]);
6786 if (numregs)
6788 ca->arg_count[(int) SH_ARG_INT] += numregs;
6789 if (TARGET_SHCOMPACT
6790 && SHCOMPACT_FORCE_ON_STACK (mode2, type2))
6792 ca->call_cookie
6793 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
6794 - numregs, 1);
6795 /* N.B. We want this also for outgoing. */
6796 ca->stack_regs += numregs;
6798 else if (ca->byref)
6800 if (! ca->outgoing)
6801 ca->stack_regs += numregs;
6802 ca->byref_regs += numregs;
6803 ca->byref = 0;
6805 ca->call_cookie
6806 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
6807 - numregs, 2);
6808 while (--numregs);
6809 ca->call_cookie
6810 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
6811 - 1, 1);
6813 else if (dwords > numregs)
6815 int pushregs = numregs;
6817 if (TARGET_SHCOMPACT)
6818 ca->stack_regs += numregs;
6819 while (pushregs < NPARM_REGS (SImode) - 1
6820 && (CALL_COOKIE_INT_REG_GET
6821 (ca->call_cookie,
6822 NPARM_REGS (SImode) - pushregs)
6823 == 1))
6825 ca->call_cookie
6826 &= ~ CALL_COOKIE_INT_REG (NPARM_REGS (SImode)
6827 - pushregs, 1);
6828 pushregs++;
6830 if (numregs == NPARM_REGS (SImode))
6831 ca->call_cookie
6832 |= CALL_COOKIE_INT_REG (0, 1)
6833 | CALL_COOKIE_STACKSEQ (numregs - 1);
6834 else
6835 ca->call_cookie
6836 |= CALL_COOKIE_STACKSEQ (numregs);
6839 if (GET_SH_ARG_CLASS (mode2) == SH_ARG_FLOAT
6840 && (named || ! ca->prototype_p))
6842 if (mode2 == SFmode && ca->free_single_fp_reg)
6843 ca->free_single_fp_reg = 0;
6844 else if (ca->arg_count[(int) SH_ARG_FLOAT]
6845 < NPARM_REGS (SFmode))
6847 int numfpregs
6848 = MIN ((GET_MODE_SIZE (mode2) + 7) / 8 * 2,
6849 NPARM_REGS (SFmode)
6850 - ca->arg_count[(int) SH_ARG_FLOAT]);
6852 ca->arg_count[(int) SH_ARG_FLOAT] += numfpregs;
6854 if (TARGET_SHCOMPACT && ! ca->prototype_p)
6856 if (ca->outgoing && numregs > 0)
6859 ca->call_cookie
6860 |= (CALL_COOKIE_INT_REG
6861 (ca->arg_count[(int) SH_ARG_INT]
6862 - numregs + ((numfpregs - 2) / 2),
6863 4 + (ca->arg_count[(int) SH_ARG_FLOAT]
6864 - numfpregs) / 2));
6866 while (numfpregs -= 2);
6868 else if (mode2 == SFmode && (named)
6869 && (ca->arg_count[(int) SH_ARG_FLOAT]
6870 < NPARM_REGS (SFmode)))
6871 ca->free_single_fp_reg
6872 = FIRST_FP_PARM_REG - numfpregs
6873 + ca->arg_count[(int) SH_ARG_FLOAT] + 1;
6876 return;
6879 if ((TARGET_HITACHI || ca->renesas_abi) && TARGET_FPU_DOUBLE)
6881 /* Note that we've used the skipped register. */
6882 if (mode == SFmode && ca->free_single_fp_reg)
6884 ca->free_single_fp_reg = 0;
6885 return;
6887 /* When we have a DF after an SF, there's an SF register that get
6888 skipped in order to align the DF value. We note this skipped
6889 register, because the next SF value will use it, and not the
6890 SF that follows the DF. */
6891 if (mode == DFmode
6892 && ROUND_REG (*ca, DFmode) != ROUND_REG (*ca, SFmode))
6894 ca->free_single_fp_reg = (ROUND_REG (*ca, SFmode)
6895 + BASE_ARG_REG (mode));
6899 if (! ((TARGET_SH4 || TARGET_SH2A) || ca->renesas_abi)
6900 || PASS_IN_REG_P (*ca, mode, type))
6901 (ca->arg_count[(int) GET_SH_ARG_CLASS (mode)]
6902 = (ROUND_REG (*ca, mode)
6903 + (mode == BLKmode
6904 ? ROUND_ADVANCE (int_size_in_bytes (type))
6905 : ROUND_ADVANCE (GET_MODE_SIZE (mode)))));
6908 /* The Renesas calling convention doesn't quite fit into this scheme since
6909 the address is passed like an invisible argument, but one that is always
6910 passed in memory. */
6911 static rtx
6912 sh_struct_value_rtx (tree fndecl, int incoming ATTRIBUTE_UNUSED)
6914 if (TARGET_HITACHI || sh_attr_renesas_p (fndecl))
6915 return 0;
6916 return gen_rtx_REG (Pmode, 2);
6919 /* Worker function for TARGET_RETURN_IN_MEMORY. */
6921 static bool
6922 sh_return_in_memory (tree type, tree fndecl)
6924 if (TARGET_SH5)
6926 if (TYPE_MODE (type) == BLKmode)
6927 return ((unsigned HOST_WIDE_INT) int_size_in_bytes (type)) > 8;
6928 else
6929 return GET_MODE_SIZE (TYPE_MODE (type)) > 8;
6931 else
6933 return (TYPE_MODE (type) == BLKmode
6934 || ((TARGET_HITACHI || sh_attr_renesas_p (fndecl))
6935 && TREE_CODE (type) == RECORD_TYPE));
6939 /* We actually emit the code in sh_expand_prologue. We used to use
6940 a static variable to flag that we need to emit this code, but that
6941 doesn't when inlining, when functions are deferred and then emitted
6942 later. Fortunately, we already have two flags that are part of struct
6943 function that tell if a function uses varargs or stdarg. */
6944 static void
6945 sh_setup_incoming_varargs (CUMULATIVE_ARGS *ca,
6946 enum machine_mode mode,
6947 tree type,
6948 int *pretend_arg_size,
6949 int second_time ATTRIBUTE_UNUSED)
6951 if (! current_function_stdarg)
6952 abort ();
6953 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl))
6955 int named_parm_regs, anon_parm_regs;
6957 named_parm_regs = (ROUND_REG (*ca, mode)
6958 + (mode == BLKmode
6959 ? ROUND_ADVANCE (int_size_in_bytes (type))
6960 : ROUND_ADVANCE (GET_MODE_SIZE (mode))));
6961 anon_parm_regs = NPARM_REGS (SImode) - named_parm_regs;
6962 if (anon_parm_regs > 0)
6963 *pretend_arg_size = anon_parm_regs * 4;
6967 static bool
6968 sh_strict_argument_naming (CUMULATIVE_ARGS *ca ATTRIBUTE_UNUSED)
6970 return TARGET_SH5;
6973 static bool
6974 sh_pretend_outgoing_varargs_named (CUMULATIVE_ARGS *ca)
6976 return ! (TARGET_HITACHI || ca->renesas_abi) && ! TARGET_SH5;
6980 /* Define the offset between two registers, one to be eliminated, and
6981 the other its replacement, at the start of a routine. */
6984 initial_elimination_offset (int from, int to)
6986 int regs_saved;
6987 int regs_saved_rounding = 0;
6988 int total_saved_regs_space;
6989 int total_auto_space;
6990 int save_flags = target_flags;
6991 int copy_flags;
6992 HARD_REG_SET live_regs_mask;
6994 shmedia_space_reserved_for_target_registers = false;
6995 regs_saved = calc_live_regs (&live_regs_mask);
6996 regs_saved += SHMEDIA_REGS_STACK_ADJUST ();
6998 if (shmedia_reserve_space_for_target_registers_p (regs_saved, &live_regs_mask))
7000 shmedia_space_reserved_for_target_registers = true;
7001 regs_saved += shmedia_target_regs_stack_adjust (&live_regs_mask);
7004 if (TARGET_SH5 && regs_saved % (STACK_BOUNDARY / BITS_PER_UNIT))
7005 regs_saved_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
7006 - regs_saved % (STACK_BOUNDARY / BITS_PER_UNIT));
7008 total_auto_space = rounded_frame_size (regs_saved) - regs_saved_rounding;
7009 copy_flags = target_flags;
7010 target_flags = save_flags;
7012 total_saved_regs_space = regs_saved + regs_saved_rounding;
7014 if (from == ARG_POINTER_REGNUM && to == FRAME_POINTER_REGNUM)
7015 return total_saved_regs_space + total_auto_space
7016 + current_function_args_info.byref_regs * 8;
7018 if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
7019 return total_saved_regs_space + total_auto_space
7020 + current_function_args_info.byref_regs * 8;
7022 /* Initial gap between fp and sp is 0. */
7023 if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
7024 return 0;
7026 if (from == RETURN_ADDRESS_POINTER_REGNUM
7027 && (to == FRAME_POINTER_REGNUM || to == STACK_POINTER_REGNUM))
7029 if (TARGET_SH5)
7031 int n = total_saved_regs_space;
7032 int pr_reg = TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG;
7033 save_schedule schedule;
7034 save_entry *entry;
7036 n += total_auto_space;
7038 /* If it wasn't saved, there's not much we can do. */
7039 if (! TEST_HARD_REG_BIT (live_regs_mask, pr_reg))
7040 return n;
7042 target_flags = copy_flags;
7044 sh5_schedule_saves (&live_regs_mask, &schedule, n);
7045 for (entry = &schedule.entries[1]; entry->mode != VOIDmode; entry++)
7046 if (entry->reg == pr_reg)
7048 target_flags = save_flags;
7049 return entry->offset;
7051 abort ();
7053 else
7054 return total_auto_space;
7057 abort ();
7060 /* Handle machine specific pragmas to be semi-compatible with Renesas
7061 compiler. */
7063 void
7064 sh_pr_interrupt (struct cpp_reader *pfile ATTRIBUTE_UNUSED)
7066 pragma_interrupt = 1;
7069 void
7070 sh_pr_trapa (struct cpp_reader *pfile ATTRIBUTE_UNUSED)
7072 pragma_interrupt = pragma_trapa = 1;
7075 void
7076 sh_pr_nosave_low_regs (struct cpp_reader *pfile ATTRIBUTE_UNUSED)
7078 pragma_nosave_low_regs = 1;
7081 /* Generate 'handle_interrupt' attribute for decls */
7083 static void
7084 sh_insert_attributes (tree node, tree *attributes)
7086 if (! pragma_interrupt
7087 || TREE_CODE (node) != FUNCTION_DECL)
7088 return;
7090 /* We are only interested in fields. */
7091 if (!DECL_P (node))
7092 return;
7094 /* Add a 'handle_interrupt' attribute. */
7095 * attributes = tree_cons (get_identifier ("interrupt_handler"), NULL, * attributes);
7097 return;
7100 /* Supported attributes:
7102 interrupt_handler -- specifies this function is an interrupt handler.
7104 sp_switch -- specifies an alternate stack for an interrupt handler
7105 to run on.
7107 trap_exit -- use a trapa to exit an interrupt function instead of
7108 an rte instruction.
7110 renesas -- use Renesas calling/layout conventions (functions and
7111 structures).
7115 const struct attribute_spec sh_attribute_table[] =
7117 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
7118 { "interrupt_handler", 0, 0, true, false, false, sh_handle_interrupt_handler_attribute },
7119 { "sp_switch", 1, 1, true, false, false, sh_handle_sp_switch_attribute },
7120 { "trap_exit", 1, 1, true, false, false, sh_handle_trap_exit_attribute },
7121 { "renesas", 0, 0, false, true, false, sh_handle_renesas_attribute },
7122 #ifdef SYMBIAN
7123 /* Symbian support adds three new attributes:
7124 dllexport - for exporting a function/variable that will live in a dll
7125 dllimport - for importing a function/variable from a dll
7127 Microsoft allows multiple declspecs in one __declspec, separating
7128 them with spaces. We do NOT support this. Instead, use __declspec
7129 multiple times. */
7130 { "dllimport", 0, 0, true, false, false, sh_symbian_handle_dll_attribute },
7131 { "dllexport", 0, 0, true, false, false, sh_symbian_handle_dll_attribute },
7132 #endif
7133 { NULL, 0, 0, false, false, false, NULL }
7136 /* Handle an "interrupt_handler" attribute; arguments as in
7137 struct attribute_spec.handler. */
7138 static tree
7139 sh_handle_interrupt_handler_attribute (tree *node, tree name,
7140 tree args ATTRIBUTE_UNUSED,
7141 int flags ATTRIBUTE_UNUSED,
7142 bool *no_add_attrs)
7144 if (TREE_CODE (*node) != FUNCTION_DECL)
7146 warning ("`%s' attribute only applies to functions",
7147 IDENTIFIER_POINTER (name));
7148 *no_add_attrs = true;
7150 else if (TARGET_SHCOMPACT)
7152 error ("attribute interrupt_handler is not compatible with -m5-compact");
7153 *no_add_attrs = true;
7156 return NULL_TREE;
7159 /* Handle an "sp_switch" attribute; arguments as in
7160 struct attribute_spec.handler. */
7161 static tree
7162 sh_handle_sp_switch_attribute (tree *node, tree name, tree args,
7163 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
7165 if (TREE_CODE (*node) != FUNCTION_DECL)
7167 warning ("`%s' attribute only applies to functions",
7168 IDENTIFIER_POINTER (name));
7169 *no_add_attrs = true;
7171 else if (!pragma_interrupt)
7173 /* The sp_switch attribute only has meaning for interrupt functions. */
7174 warning ("`%s' attribute only applies to interrupt functions",
7175 IDENTIFIER_POINTER (name));
7176 *no_add_attrs = true;
7178 else if (TREE_CODE (TREE_VALUE (args)) != STRING_CST)
7180 /* The argument must be a constant string. */
7181 warning ("`%s' attribute argument not a string constant",
7182 IDENTIFIER_POINTER (name));
7183 *no_add_attrs = true;
7185 else
7187 char *s = ggc_strdup (TREE_STRING_POINTER (TREE_VALUE (args)));
7188 sp_switch = gen_rtx_SYMBOL_REF (VOIDmode, s);
7191 return NULL_TREE;
7194 /* Handle an "trap_exit" attribute; arguments as in
7195 struct attribute_spec.handler. */
7196 static tree
7197 sh_handle_trap_exit_attribute (tree *node, tree name, tree args,
7198 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
7200 if (TREE_CODE (*node) != FUNCTION_DECL)
7202 warning ("`%s' attribute only applies to functions",
7203 IDENTIFIER_POINTER (name));
7204 *no_add_attrs = true;
7206 else if (!pragma_interrupt)
7208 /* The trap_exit attribute only has meaning for interrupt functions. */
7209 warning ("`%s' attribute only applies to interrupt functions",
7210 IDENTIFIER_POINTER (name));
7211 *no_add_attrs = true;
7213 else if (TREE_CODE (TREE_VALUE (args)) != INTEGER_CST)
7215 /* The argument must be a constant integer. */
7216 warning ("`%s' attribute argument not an integer constant",
7217 IDENTIFIER_POINTER (name));
7218 *no_add_attrs = true;
7220 else
7222 trap_exit = TREE_INT_CST_LOW (TREE_VALUE (args));
7225 return NULL_TREE;
7228 static tree
7229 sh_handle_renesas_attribute (tree *node ATTRIBUTE_UNUSED,
7230 tree name ATTRIBUTE_UNUSED,
7231 tree args ATTRIBUTE_UNUSED,
7232 int flags ATTRIBUTE_UNUSED,
7233 bool *no_add_attrs ATTRIBUTE_UNUSED)
7235 return NULL_TREE;
7238 /* True if __attribute__((renesas)) or -mrenesas. */
7240 sh_attr_renesas_p (tree td)
7242 if (TARGET_HITACHI)
7243 return 1;
7244 if (td == 0)
7245 return 0;
7246 if (DECL_P (td))
7247 td = TREE_TYPE (td);
7248 return (lookup_attribute ("renesas", TYPE_ATTRIBUTES (td))
7249 != NULL_TREE);
7252 /* True if __attribute__((renesas)) or -mrenesas, for the current
7253 function. */
7255 sh_cfun_attr_renesas_p (void)
7257 return sh_attr_renesas_p (current_function_decl);
7261 sh_cfun_interrupt_handler_p (void)
7263 return (lookup_attribute ("interrupt_handler",
7264 DECL_ATTRIBUTES (current_function_decl))
7265 != NULL_TREE);
7268 /* ??? target_switches in toplev.c is static, hence we have to duplicate it. */
7269 static const struct
7271 const char *const name;
7272 const int value;
7273 const char *const description;
7275 sh_target_switches[] = TARGET_SWITCHES;
7276 #define target_switches sh_target_switches
7278 /* Like default_pch_valid_p, but take flag_mask into account. */
7279 const char *
7280 sh_pch_valid_p (const void *data_p, size_t len)
7282 const char *data = (const char *)data_p;
7283 const char *flag_that_differs = NULL;
7284 size_t i;
7285 int old_flags;
7286 int flag_mask
7287 = (SH1_BIT | SH2_BIT | SH3_BIT | SH_E_BIT | HARD_SH4_BIT | FPU_SINGLE_BIT
7288 | SH4_BIT | HITACHI_BIT | LITTLE_ENDIAN_BIT);
7290 /* -fpic and -fpie also usually make a PCH invalid. */
7291 if (data[0] != flag_pic)
7292 return _("created and used with different settings of -fpic");
7293 if (data[1] != flag_pie)
7294 return _("created and used with different settings of -fpie");
7295 data += 2;
7297 /* Check target_flags. */
7298 memcpy (&old_flags, data, sizeof (target_flags));
7299 if (((old_flags ^ target_flags) & flag_mask) != 0)
7301 for (i = 0; i < ARRAY_SIZE (target_switches); i++)
7303 int bits;
7305 bits = target_switches[i].value;
7306 if (bits < 0)
7307 bits = -bits;
7308 bits &= flag_mask;
7309 if ((target_flags & bits) != (old_flags & bits))
7311 flag_that_differs = target_switches[i].name;
7312 goto make_message;
7315 abort ();
7317 data += sizeof (target_flags);
7318 len -= sizeof (target_flags);
7320 /* Check string options. */
7321 #ifdef TARGET_OPTIONS
7322 for (i = 0; i < ARRAY_SIZE (target_options); i++)
7324 const char *str = *target_options[i].variable;
7325 size_t l;
7326 if (! str)
7327 str = "";
7328 l = strlen (str) + 1;
7329 if (len < l || memcmp (data, str, l) != 0)
7331 flag_that_differs = target_options[i].prefix;
7332 goto make_message;
7334 data += l;
7335 len -= l;
7337 #endif
7339 return NULL;
7341 make_message:
7343 char *r;
7344 asprintf (&r, _("created and used with differing settings of `-m%s'"),
7345 flag_that_differs);
7346 if (r == NULL)
7347 return _("out of memory");
7348 return r;
7352 /* Predicates used by the templates. */
7354 /* Returns 1 if OP is MACL, MACH or PR. The input must be a REG rtx.
7355 Used only in general_movsrc_operand. */
7358 system_reg_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
7360 switch (REGNO (op))
7362 case PR_REG:
7363 case MACL_REG:
7364 case MACH_REG:
7365 return 1;
7367 return 0;
7370 /* Returns 1 if OP can be source of a simple move operation.
7371 Same as general_operand, but a LABEL_REF is valid, PRE_DEC is
7372 invalid as are subregs of system registers. */
7375 general_movsrc_operand (rtx op, enum machine_mode mode)
7377 if (GET_CODE (op) == MEM)
7379 rtx inside = XEXP (op, 0);
7380 if (GET_CODE (inside) == CONST)
7381 inside = XEXP (inside, 0);
7383 if (GET_CODE (inside) == LABEL_REF)
7384 return 1;
7386 if (GET_CODE (inside) == PLUS
7387 && GET_CODE (XEXP (inside, 0)) == LABEL_REF
7388 && GET_CODE (XEXP (inside, 1)) == CONST_INT)
7389 return 1;
7391 /* Only post inc allowed. */
7392 if (GET_CODE (inside) == PRE_DEC)
7393 return 0;
7396 if ((mode == QImode || mode == HImode)
7397 && (GET_CODE (op) == SUBREG
7398 && GET_CODE (XEXP (op, 0)) == REG
7399 && system_reg_operand (XEXP (op, 0), mode)))
7400 return 0;
7402 return general_operand (op, mode);
7405 /* Returns 1 if OP can be a destination of a move.
7406 Same as general_operand, but no preinc allowed. */
7409 general_movdst_operand (rtx op, enum machine_mode mode)
7411 /* Only pre dec allowed. */
7412 if (GET_CODE (op) == MEM && GET_CODE (XEXP (op, 0)) == POST_INC)
7413 return 0;
7415 return general_operand (op, mode);
7418 /* Returns 1 if OP is a normal arithmetic register. */
7421 arith_reg_operand (rtx op, enum machine_mode mode)
7423 if (register_operand (op, mode))
7425 int regno;
7427 if (GET_CODE (op) == REG)
7428 regno = REGNO (op);
7429 else if (GET_CODE (op) == SUBREG && GET_CODE (SUBREG_REG (op)) == REG)
7430 regno = REGNO (SUBREG_REG (op));
7431 else
7432 return 1;
7434 return (regno != T_REG && regno != PR_REG
7435 && ! TARGET_REGISTER_P (regno)
7436 && (regno != FPUL_REG || TARGET_SH4)
7437 && regno != MACH_REG && regno != MACL_REG);
7439 return 0;
7442 /* Like above, but for DImode destinations: forbid paradoxical DImode subregs,
7443 because this would lead to missing sign extensions when truncating from
7444 DImode to SImode. */
7446 arith_reg_dest (rtx op, enum machine_mode mode)
7448 if (mode == DImode && GET_CODE (op) == SUBREG
7449 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (op))) < 8)
7450 return 0;
7451 return arith_reg_operand (op, mode);
7455 int_gpr_dest (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
7457 enum machine_mode op_mode = GET_MODE (op);
7459 if (GET_MODE_CLASS (op_mode) != MODE_INT
7460 || GET_MODE_SIZE (op_mode) >= UNITS_PER_WORD)
7461 return 0;
7462 if (! reload_completed)
7463 return 0;
7464 return true_regnum (op) <= LAST_GENERAL_REG;
7468 fp_arith_reg_operand (rtx op, enum machine_mode mode)
7470 if (register_operand (op, mode))
7472 int regno;
7474 if (GET_CODE (op) == REG)
7475 regno = REGNO (op);
7476 else if (GET_CODE (op) == SUBREG && GET_CODE (SUBREG_REG (op)) == REG)
7477 regno = REGNO (SUBREG_REG (op));
7478 else
7479 return 1;
7481 return (regno >= FIRST_PSEUDO_REGISTER
7482 || FP_REGISTER_P (regno));
7484 return 0;
7487 /* Returns 1 if OP is a valid source operand for an arithmetic insn. */
7490 arith_operand (rtx op, enum machine_mode mode)
7492 if (arith_reg_operand (op, mode))
7493 return 1;
7495 if (TARGET_SHMEDIA)
7497 /* FIXME: We should be checking whether the CONST_INT fits in a
7498 CONST_OK_FOR_I16 here, but this causes reload_cse to crash when
7499 attempting to transform a sequence of two 64-bit sets of the
7500 same register from literal constants into a set and an add,
7501 when the difference is too wide for an add. */
7502 if (GET_CODE (op) == CONST_INT
7503 || EXTRA_CONSTRAINT_C16 (op))
7504 return 1;
7505 else
7506 return 0;
7508 else if (GET_CODE (op) == CONST_INT && CONST_OK_FOR_I08 (INTVAL (op)))
7509 return 1;
7511 return 0;
7514 /* Returns 1 if OP is a valid source operand for a compare insn. */
7517 arith_reg_or_0_operand (rtx op, enum machine_mode mode)
7519 if (arith_reg_operand (op, mode))
7520 return 1;
7522 if (EXTRA_CONSTRAINT_Z (op))
7523 return 1;
7525 return 0;
7528 /* Return 1 if OP is a valid source operand for an SHmedia operation
7529 that takes either a register or a 6-bit immediate. */
7532 shmedia_6bit_operand (rtx op, enum machine_mode mode)
7534 return (arith_reg_operand (op, mode)
7535 || (GET_CODE (op) == CONST_INT && CONST_OK_FOR_I06 (INTVAL (op))));
7538 /* Returns 1 if OP is a valid source operand for a logical operation. */
7541 logical_operand (rtx op, enum machine_mode mode)
7543 if (arith_reg_operand (op, mode))
7544 return 1;
7546 if (TARGET_SHMEDIA)
7548 if (GET_CODE (op) == CONST_INT && CONST_OK_FOR_I10 (INTVAL (op)))
7549 return 1;
7550 else
7551 return 0;
7553 else if (GET_CODE (op) == CONST_INT && CONST_OK_FOR_K08 (INTVAL (op)))
7554 return 1;
7556 return 0;
7560 and_operand (rtx op, enum machine_mode mode)
7562 if (logical_operand (op, mode))
7563 return 1;
7565 /* Check mshflo.l / mshflhi.l opportunities. */
7566 if (TARGET_SHMEDIA
7567 && mode == DImode
7568 && GET_CODE (op) == CONST_INT
7569 && CONST_OK_FOR_J16 (INTVAL (op)))
7570 return 1;
7572 return 0;
7575 /* Nonzero if OP is a floating point value with value 0.0. */
7578 fp_zero_operand (rtx op)
7580 REAL_VALUE_TYPE r;
7582 if (GET_MODE (op) != SFmode)
7583 return 0;
7585 REAL_VALUE_FROM_CONST_DOUBLE (r, op);
7586 return REAL_VALUES_EQUAL (r, dconst0) && ! REAL_VALUE_MINUS_ZERO (r);
7589 /* Nonzero if OP is a floating point value with value 1.0. */
7592 fp_one_operand (rtx op)
7594 REAL_VALUE_TYPE r;
7596 if (GET_MODE (op) != SFmode)
7597 return 0;
7599 REAL_VALUE_FROM_CONST_DOUBLE (r, op);
7600 return REAL_VALUES_EQUAL (r, dconst1);
7603 /* For -m4 and -m4-single-only, mode switching is used. If we are
7604 compiling without -mfmovd, movsf_ie isn't taken into account for
7605 mode switching. We could check in machine_dependent_reorg for
7606 cases where we know we are in single precision mode, but there is
7607 interface to find that out during reload, so we must avoid
7608 choosing an fldi alternative during reload and thus failing to
7609 allocate a scratch register for the constant loading. */
7611 fldi_ok (void)
7613 return ! TARGET_SH4 || TARGET_FMOVD || reload_completed;
7617 tertiary_reload_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
7619 enum rtx_code code = GET_CODE (op);
7620 return code == MEM || (TARGET_SH4 && code == CONST_DOUBLE);
7624 fpscr_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
7626 return (GET_CODE (op) == REG
7627 && (REGNO (op) == FPSCR_REG
7628 || (REGNO (op) >= FIRST_PSEUDO_REGISTER
7629 && !(reload_in_progress || reload_completed)))
7630 && GET_MODE (op) == PSImode);
7634 fpul_operand (rtx op, enum machine_mode mode)
7636 if (TARGET_SHMEDIA)
7637 return fp_arith_reg_operand (op, mode);
7639 return (GET_CODE (op) == REG
7640 && (REGNO (op) == FPUL_REG || REGNO (op) >= FIRST_PSEUDO_REGISTER)
7641 && GET_MODE (op) == mode);
7645 symbol_ref_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
7647 return (GET_CODE (op) == SYMBOL_REF);
7650 /* Return the TLS type for TLS symbols, 0 for otherwise. */
7652 tls_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
7654 if (GET_CODE (op) != SYMBOL_REF)
7655 return 0;
7656 return SYMBOL_REF_TLS_MODEL (op);
7660 commutative_float_operator (rtx op, enum machine_mode mode)
7662 if (GET_MODE (op) != mode)
7663 return 0;
7664 switch (GET_CODE (op))
7666 case PLUS:
7667 case MULT:
7668 return 1;
7669 default:
7670 break;
7672 return 0;
7676 noncommutative_float_operator (rtx op, enum machine_mode mode)
7678 if (GET_MODE (op) != mode)
7679 return 0;
7680 switch (GET_CODE (op))
7682 case MINUS:
7683 case DIV:
7684 return 1;
7685 default:
7686 break;
7688 return 0;
7692 unary_float_operator (rtx op, enum machine_mode mode)
7694 if (GET_MODE (op) != mode)
7695 return 0;
7696 switch (GET_CODE (op))
7698 case ABS:
7699 case NEG:
7700 case SQRT:
7701 return 1;
7702 default:
7703 break;
7705 return 0;
7709 binary_float_operator (rtx op, enum machine_mode mode)
7711 if (GET_MODE (op) != mode)
7712 return 0;
7713 switch (GET_CODE (op))
7715 case PLUS:
7716 case MINUS:
7717 case MULT:
7718 case DIV:
7719 return 1;
7720 default:
7721 break;
7723 return 0;
7727 binary_logical_operator (rtx op, enum machine_mode mode)
7729 if (GET_MODE (op) != mode)
7730 return 0;
7731 switch (GET_CODE (op))
7733 case IOR:
7734 case AND:
7735 case XOR:
7736 return 1;
7737 default:
7738 break;
7740 return 0;
7744 equality_comparison_operator (rtx op, enum machine_mode mode)
7746 return ((mode == VOIDmode || GET_MODE (op) == mode)
7747 && (GET_CODE (op) == EQ || GET_CODE (op) == NE));
7751 greater_comparison_operator (rtx op, enum machine_mode mode)
7753 if (mode != VOIDmode && GET_MODE (op) == mode)
7754 return 0;
7755 switch (GET_CODE (op))
7757 case GT:
7758 case GE:
7759 case GTU:
7760 case GEU:
7761 return 1;
7762 default:
7763 return 0;
7768 less_comparison_operator (rtx op, enum machine_mode mode)
7770 if (mode != VOIDmode && GET_MODE (op) == mode)
7771 return 0;
7772 switch (GET_CODE (op))
7774 case LT:
7775 case LE:
7776 case LTU:
7777 case LEU:
7778 return 1;
7779 default:
7780 return 0;
7784 /* Accept pseudos and branch target registers. */
7786 target_reg_operand (rtx op, enum machine_mode mode)
7788 if (mode != DImode
7789 || GET_MODE (op) != DImode)
7790 return 0;
7792 if (GET_CODE (op) == SUBREG)
7793 op = XEXP (op, 0);
7795 if (GET_CODE (op) != REG)
7796 return 0;
7798 /* We must protect ourselves from matching pseudos that are virtual
7799 register, because they will eventually be replaced with hardware
7800 registers that aren't branch-target registers. */
7801 if (REGNO (op) > LAST_VIRTUAL_REGISTER
7802 || TARGET_REGISTER_P (REGNO (op)))
7803 return 1;
7805 return 0;
7808 /* Same as target_reg_operand, except that label_refs and symbol_refs
7809 are accepted before reload. */
7811 target_operand (rtx op, enum machine_mode mode)
7813 if (mode != DImode)
7814 return 0;
7816 if ((GET_MODE (op) == DImode || GET_MODE (op) == VOIDmode)
7817 && EXTRA_CONSTRAINT_Csy (op))
7818 return ! reload_completed;
7820 return target_reg_operand (op, mode);
7824 mextr_bit_offset (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
7826 HOST_WIDE_INT i;
7828 if (GET_CODE (op) != CONST_INT)
7829 return 0;
7830 i = INTVAL (op);
7831 return i >= 1 * 8 && i <= 7 * 8 && (i & 7) == 0;
7835 extend_reg_operand (rtx op, enum machine_mode mode)
7837 return (GET_CODE (op) == TRUNCATE
7838 ? arith_operand
7839 : arith_reg_operand) (op, mode);
7843 trunc_hi_operand (rtx op, enum machine_mode mode)
7845 enum machine_mode op_mode = GET_MODE (op);
7847 if (op_mode != SImode && op_mode != DImode
7848 && op_mode != V4HImode && op_mode != V2SImode)
7849 return 0;
7850 return extend_reg_operand (op, mode);
7854 extend_reg_or_0_operand (rtx op, enum machine_mode mode)
7856 return (GET_CODE (op) == TRUNCATE
7857 ? arith_operand
7858 : arith_reg_or_0_operand) (op, mode);
7862 general_extend_operand (rtx op, enum machine_mode mode)
7864 return (GET_CODE (op) == TRUNCATE
7865 ? arith_operand
7866 : nonimmediate_operand) (op, mode);
7870 inqhi_operand (rtx op, enum machine_mode mode)
7872 if (GET_CODE (op) != TRUNCATE || mode != GET_MODE (op))
7873 return 0;
7874 op = XEXP (op, 0);
7875 /* Can't use true_regnum here because copy_cost wants to know about
7876 SECONDARY_INPUT_RELOAD_CLASS. */
7877 return GET_CODE (op) == REG && FP_REGISTER_P (REGNO (op));
7881 sh_rep_vec (rtx v, enum machine_mode mode)
7883 int i;
7884 rtx x, y;
7886 if ((GET_CODE (v) != CONST_VECTOR && GET_CODE (v) != PARALLEL)
7887 || (GET_MODE (v) != mode && mode != VOIDmode))
7888 return 0;
7889 i = XVECLEN (v, 0) - 2;
7890 x = XVECEXP (v, 0, i + 1);
7891 if (GET_MODE_UNIT_SIZE (mode) == 1)
7893 y = XVECEXP (v, 0, i);
7894 for (i -= 2; i >= 0; i -= 2)
7895 if (! rtx_equal_p (XVECEXP (v, 0, i + 1), x)
7896 || ! rtx_equal_p (XVECEXP (v, 0, i), y))
7897 return 0;
7899 else
7900 for (; i >= 0; i--)
7901 if (XVECEXP (v, 0, i) != x)
7902 return 0;
7903 return 1;
7906 /* Determine if V is a constant vector matching MODE with only one element
7907 that is not a sign extension. Two byte-sized elements count as one. */
7909 sh_1el_vec (rtx v, enum machine_mode mode)
7911 int unit_size;
7912 int i, last, least, sign_ix;
7913 rtx sign;
7915 if (GET_CODE (v) != CONST_VECTOR
7916 || (GET_MODE (v) != mode && mode != VOIDmode))
7917 return 0;
7918 /* Determine numbers of last and of least significant elements. */
7919 last = XVECLEN (v, 0) - 1;
7920 least = TARGET_LITTLE_ENDIAN ? 0 : last;
7921 if (GET_CODE (XVECEXP (v, 0, least)) != CONST_INT)
7922 return 0;
7923 sign_ix = least;
7924 if (GET_MODE_UNIT_SIZE (mode) == 1)
7925 sign_ix = TARGET_LITTLE_ENDIAN ? 1 : last - 1;
7926 if (GET_CODE (XVECEXP (v, 0, sign_ix)) != CONST_INT)
7927 return 0;
7928 unit_size = GET_MODE_UNIT_SIZE (GET_MODE (v));
7929 sign = (INTVAL (XVECEXP (v, 0, sign_ix)) >> (unit_size * BITS_PER_UNIT - 1)
7930 ? constm1_rtx : const0_rtx);
7931 i = XVECLEN (v, 0) - 1;
7933 if (i != least && i != sign_ix && XVECEXP (v, 0, i) != sign)
7934 return 0;
7935 while (--i);
7936 return 1;
7940 sh_const_vec (rtx v, enum machine_mode mode)
7942 int i;
7944 if (GET_CODE (v) != CONST_VECTOR
7945 || (GET_MODE (v) != mode && mode != VOIDmode))
7946 return 0;
7947 i = XVECLEN (v, 0) - 1;
7948 for (; i >= 0; i--)
7949 if (GET_CODE (XVECEXP (v, 0, i)) != CONST_INT)
7950 return 0;
7951 return 1;
7954 /* Return the destination address of a branch. */
7956 static int
7957 branch_dest (rtx branch)
7959 rtx dest = SET_SRC (PATTERN (branch));
7960 int dest_uid;
7962 if (GET_CODE (dest) == IF_THEN_ELSE)
7963 dest = XEXP (dest, 1);
7964 dest = XEXP (dest, 0);
7965 dest_uid = INSN_UID (dest);
7966 return INSN_ADDRESSES (dest_uid);
7969 /* Return nonzero if REG is not used after INSN.
7970 We assume REG is a reload reg, and therefore does
7971 not live past labels. It may live past calls or jumps though. */
7973 reg_unused_after (rtx reg, rtx insn)
7975 enum rtx_code code;
7976 rtx set;
7978 /* If the reg is set by this instruction, then it is safe for our
7979 case. Disregard the case where this is a store to memory, since
7980 we are checking a register used in the store address. */
7981 set = single_set (insn);
7982 if (set && GET_CODE (SET_DEST (set)) != MEM
7983 && reg_overlap_mentioned_p (reg, SET_DEST (set)))
7984 return 1;
7986 while ((insn = NEXT_INSN (insn)))
7988 rtx set;
7989 if (!INSN_P (insn))
7990 continue;
7992 code = GET_CODE (insn);
7994 #if 0
7995 /* If this is a label that existed before reload, then the register
7996 if dead here. However, if this is a label added by reorg, then
7997 the register may still be live here. We can't tell the difference,
7998 so we just ignore labels completely. */
7999 if (code == CODE_LABEL)
8000 return 1;
8001 /* else */
8002 #endif
8004 if (code == JUMP_INSN)
8005 return 0;
8007 /* If this is a sequence, we must handle them all at once.
8008 We could have for instance a call that sets the target register,
8009 and an insn in a delay slot that uses the register. In this case,
8010 we must return 0. */
8011 else if (code == INSN && GET_CODE (PATTERN (insn)) == SEQUENCE)
8013 int i;
8014 int retval = 0;
8016 for (i = 0; i < XVECLEN (PATTERN (insn), 0); i++)
8018 rtx this_insn = XVECEXP (PATTERN (insn), 0, i);
8019 rtx set = single_set (this_insn);
8021 if (GET_CODE (this_insn) == CALL_INSN)
8022 code = CALL_INSN;
8023 else if (GET_CODE (this_insn) == JUMP_INSN)
8025 if (INSN_ANNULLED_BRANCH_P (this_insn))
8026 return 0;
8027 code = JUMP_INSN;
8030 if (set && reg_overlap_mentioned_p (reg, SET_SRC (set)))
8031 return 0;
8032 if (set && reg_overlap_mentioned_p (reg, SET_DEST (set)))
8034 if (GET_CODE (SET_DEST (set)) != MEM)
8035 retval = 1;
8036 else
8037 return 0;
8039 if (set == 0
8040 && reg_overlap_mentioned_p (reg, PATTERN (this_insn)))
8041 return 0;
8043 if (retval == 1)
8044 return 1;
8045 else if (code == JUMP_INSN)
8046 return 0;
8049 set = single_set (insn);
8050 if (set && reg_overlap_mentioned_p (reg, SET_SRC (set)))
8051 return 0;
8052 if (set && reg_overlap_mentioned_p (reg, SET_DEST (set)))
8053 return GET_CODE (SET_DEST (set)) != MEM;
8054 if (set == 0 && reg_overlap_mentioned_p (reg, PATTERN (insn)))
8055 return 0;
8057 if (code == CALL_INSN && call_really_used_regs[REGNO (reg)])
8058 return 1;
8060 return 1;
8063 #include "ggc.h"
8065 static GTY(()) rtx fpscr_rtx;
8067 get_fpscr_rtx (void)
8069 if (! fpscr_rtx)
8071 fpscr_rtx = gen_rtx_REG (PSImode, FPSCR_REG);
8072 REG_USERVAR_P (fpscr_rtx) = 1;
8073 mark_user_reg (fpscr_rtx);
8075 if (! reload_completed || mdep_reorg_phase != SH_AFTER_MDEP_REORG)
8076 mark_user_reg (fpscr_rtx);
8077 return fpscr_rtx;
8080 void
8081 emit_sf_insn (rtx pat)
8083 emit_insn (pat);
8086 void
8087 emit_df_insn (rtx pat)
8089 emit_insn (pat);
8092 void
8093 expand_sf_unop (rtx (*fun) (rtx, rtx, rtx), rtx *operands)
8095 emit_sf_insn ((*fun) (operands[0], operands[1], get_fpscr_rtx ()));
8098 void
8099 expand_sf_binop (rtx (*fun) (rtx, rtx, rtx, rtx), rtx *operands)
8101 emit_sf_insn ((*fun) (operands[0], operands[1], operands[2],
8102 get_fpscr_rtx ()));
8105 void
8106 expand_df_unop (rtx (*fun) (rtx, rtx, rtx), rtx *operands)
8108 emit_df_insn ((*fun) (operands[0], operands[1], get_fpscr_rtx ()));
8111 void
8112 expand_df_binop (rtx (*fun) (rtx, rtx, rtx, rtx), rtx *operands)
8114 emit_df_insn ((*fun) (operands[0], operands[1], operands[2],
8115 get_fpscr_rtx ()));
8118 /* ??? gcc does flow analysis strictly after common subexpression
8119 elimination. As a result, common subexpression elimination fails
8120 when there are some intervening statements setting the same register.
8121 If we did nothing about this, this would hurt the precision switching
8122 for SH4 badly. There is some cse after reload, but it is unable to
8123 undo the extra register pressure from the unused instructions, and
8124 it cannot remove auto-increment loads.
8126 A C code example that shows this flow/cse weakness for (at least) SH
8127 and sparc (as of gcc ss-970706) is this:
8129 double
8130 f(double a)
8132 double d;
8133 d = 0.1;
8134 a += d;
8135 d = 1.1;
8136 d = 0.1;
8137 a *= d;
8138 return a;
8141 So we add another pass before common subexpression elimination, to
8142 remove assignments that are dead due to a following assignment in the
8143 same basic block. */
8145 static void
8146 mark_use (rtx x, rtx *reg_set_block)
8148 enum rtx_code code;
8150 if (! x)
8151 return;
8152 code = GET_CODE (x);
8153 switch (code)
8155 case REG:
8157 int regno = REGNO (x);
8158 int nregs = (regno < FIRST_PSEUDO_REGISTER
8159 ? HARD_REGNO_NREGS (regno, GET_MODE (x))
8160 : 1);
8163 reg_set_block[regno + nregs - 1] = 0;
8165 while (--nregs);
8166 break;
8168 case SET:
8170 rtx dest = SET_DEST (x);
8172 if (GET_CODE (dest) == SUBREG)
8173 dest = SUBREG_REG (dest);
8174 if (GET_CODE (dest) != REG)
8175 mark_use (dest, reg_set_block);
8176 mark_use (SET_SRC (x), reg_set_block);
8177 break;
8179 case CLOBBER:
8180 break;
8181 default:
8183 const char *fmt = GET_RTX_FORMAT (code);
8184 int i, j;
8185 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
8187 if (fmt[i] == 'e')
8188 mark_use (XEXP (x, i), reg_set_block);
8189 else if (fmt[i] == 'E')
8190 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
8191 mark_use (XVECEXP (x, i, j), reg_set_block);
8193 break;
8198 static rtx get_free_reg (HARD_REG_SET);
8200 /* This function returns a register to use to load the address to load
8201 the fpscr from. Currently it always returns r1 or r7, but when we are
8202 able to use pseudo registers after combine, or have a better mechanism
8203 for choosing a register, it should be done here. */
8204 /* REGS_LIVE is the liveness information for the point for which we
8205 need this allocation. In some bare-bones exit blocks, r1 is live at the
8206 start. We can even have all of r0..r3 being live:
8207 __complex__ long long f (double d) { if (d == 0) return 2; else return 3; }
8208 INSN before which new insns are placed with will clobber the register
8209 we return. If a basic block consists only of setting the return value
8210 register to a pseudo and using that register, the return value is not
8211 live before or after this block, yet we we'll insert our insns right in
8212 the middle. */
8214 static rtx
8215 get_free_reg (HARD_REG_SET regs_live)
8217 if (! TEST_HARD_REG_BIT (regs_live, 1))
8218 return gen_rtx_REG (Pmode, 1);
8220 /* Hard reg 1 is live; since this is a SMALL_REGISTER_CLASSES target,
8221 there shouldn't be anything but a jump before the function end. */
8222 if (! TEST_HARD_REG_BIT (regs_live, 7))
8223 return gen_rtx_REG (Pmode, 7);
8225 abort ();
8228 /* This function will set the fpscr from memory.
8229 MODE is the mode we are setting it to. */
8230 void
8231 fpscr_set_from_mem (int mode, HARD_REG_SET regs_live)
8233 enum attr_fp_mode fp_mode = mode;
8234 rtx addr_reg = get_free_reg (regs_live);
8236 if (fp_mode == (enum attr_fp_mode) ACTUAL_NORMAL_MODE (FP_MODE))
8237 emit_insn (gen_fpu_switch1 (addr_reg));
8238 else
8239 emit_insn (gen_fpu_switch0 (addr_reg));
8242 /* Is the given character a logical line separator for the assembler? */
8243 #ifndef IS_ASM_LOGICAL_LINE_SEPARATOR
8244 #define IS_ASM_LOGICAL_LINE_SEPARATOR(C) ((C) == ';')
8245 #endif
8248 sh_insn_length_adjustment (rtx insn)
8250 /* Instructions with unfilled delay slots take up an extra two bytes for
8251 the nop in the delay slot. */
8252 if (((GET_CODE (insn) == INSN
8253 && GET_CODE (PATTERN (insn)) != USE
8254 && GET_CODE (PATTERN (insn)) != CLOBBER)
8255 || GET_CODE (insn) == CALL_INSN
8256 || (GET_CODE (insn) == JUMP_INSN
8257 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC
8258 && GET_CODE (PATTERN (insn)) != ADDR_VEC))
8259 && GET_CODE (PATTERN (NEXT_INSN (PREV_INSN (insn)))) != SEQUENCE
8260 && get_attr_needs_delay_slot (insn) == NEEDS_DELAY_SLOT_YES)
8261 return 2;
8263 /* SH2e has a bug that prevents the use of annulled branches, so if
8264 the delay slot is not filled, we'll have to put a NOP in it. */
8265 if (sh_cpu == CPU_SH2E
8266 && GET_CODE (insn) == JUMP_INSN
8267 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC
8268 && GET_CODE (PATTERN (insn)) != ADDR_VEC
8269 && get_attr_type (insn) == TYPE_CBRANCH
8270 && GET_CODE (PATTERN (NEXT_INSN (PREV_INSN (insn)))) != SEQUENCE)
8271 return 2;
8273 /* sh-dsp parallel processing insn take four bytes instead of two. */
8275 if (GET_CODE (insn) == INSN)
8277 int sum = 0;
8278 rtx body = PATTERN (insn);
8279 const char *template;
8280 char c;
8281 int maybe_label = 1;
8283 if (GET_CODE (body) == ASM_INPUT)
8284 template = XSTR (body, 0);
8285 else if (asm_noperands (body) >= 0)
8286 template
8287 = decode_asm_operands (body, NULL, NULL, NULL, NULL);
8288 else
8289 return 0;
8292 int ppi_adjust = 0;
8295 c = *template++;
8296 while (c == ' ' || c == '\t');
8297 /* all sh-dsp parallel-processing insns start with p.
8298 The only non-ppi sh insn starting with p is pref.
8299 The only ppi starting with pr is prnd. */
8300 if ((c == 'p' || c == 'P') && strncasecmp ("re", template, 2))
8301 ppi_adjust = 2;
8302 /* The repeat pseudo-insn expands two three insns, a total of
8303 six bytes in size. */
8304 else if ((c == 'r' || c == 'R')
8305 && ! strncasecmp ("epeat", template, 5))
8306 ppi_adjust = 4;
8307 while (c && c != '\n' && ! IS_ASM_LOGICAL_LINE_SEPARATOR (c))
8309 /* If this is a label, it is obviously not a ppi insn. */
8310 if (c == ':' && maybe_label)
8312 ppi_adjust = 0;
8313 break;
8315 else if (c == '\'' || c == '"')
8316 maybe_label = 0;
8317 c = *template++;
8319 sum += ppi_adjust;
8320 maybe_label = c != ':';
8322 while (c);
8323 return sum;
8325 return 0;
8328 /* Return TRUE if X references a SYMBOL_REF or LABEL_REF whose symbol
8329 isn't protected by a PIC unspec. */
8331 nonpic_symbol_mentioned_p (rtx x)
8333 register const char *fmt;
8334 register int i;
8336 if (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF
8337 || GET_CODE (x) == PC)
8338 return 1;
8340 /* We don't want to look into the possible MEM location of a
8341 CONST_DOUBLE, since we're not going to use it, in general. */
8342 if (GET_CODE (x) == CONST_DOUBLE)
8343 return 0;
8345 if (GET_CODE (x) == UNSPEC
8346 && (XINT (x, 1) == UNSPEC_PIC
8347 || XINT (x, 1) == UNSPEC_GOT
8348 || XINT (x, 1) == UNSPEC_GOTOFF
8349 || XINT (x, 1) == UNSPEC_GOTPLT
8350 || XINT (x, 1) == UNSPEC_GOTTPOFF
8351 || XINT (x, 1) == UNSPEC_DTPOFF
8352 || XINT (x, 1) == UNSPEC_PLT))
8353 return 0;
8355 fmt = GET_RTX_FORMAT (GET_CODE (x));
8356 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
8358 if (fmt[i] == 'E')
8360 register int j;
8362 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
8363 if (nonpic_symbol_mentioned_p (XVECEXP (x, i, j)))
8364 return 1;
8366 else if (fmt[i] == 'e' && nonpic_symbol_mentioned_p (XEXP (x, i)))
8367 return 1;
8370 return 0;
8373 /* Convert a non-PIC address in `orig' to a PIC address using @GOT or
8374 @GOTOFF in `reg'. */
8376 legitimize_pic_address (rtx orig, enum machine_mode mode ATTRIBUTE_UNUSED,
8377 rtx reg)
8379 if (tls_symbolic_operand (orig, Pmode))
8380 return orig;
8382 if (GET_CODE (orig) == LABEL_REF
8383 || (GET_CODE (orig) == SYMBOL_REF && SYMBOL_REF_LOCAL_P (orig)))
8385 if (reg == 0)
8386 reg = gen_reg_rtx (Pmode);
8388 emit_insn (gen_symGOTOFF2reg (reg, orig));
8389 return reg;
8391 else if (GET_CODE (orig) == SYMBOL_REF)
8393 if (reg == 0)
8394 reg = gen_reg_rtx (Pmode);
8396 emit_insn (gen_symGOT2reg (reg, orig));
8397 return reg;
8399 return orig;
8402 /* Mark the use of a constant in the literal table. If the constant
8403 has multiple labels, make it unique. */
8404 static rtx
8405 mark_constant_pool_use (rtx x)
8407 rtx insn, lab, pattern;
8409 if (x == NULL)
8410 return x;
8412 switch (GET_CODE (x))
8414 case LABEL_REF:
8415 x = XEXP (x, 0);
8416 case CODE_LABEL:
8417 break;
8418 default:
8419 return x;
8422 /* Get the first label in the list of labels for the same constant
8423 and delete another labels in the list. */
8424 lab = x;
8425 for (insn = PREV_INSN (x); insn; insn = PREV_INSN (insn))
8427 if (GET_CODE (insn) != CODE_LABEL
8428 || LABEL_REFS (insn) != NEXT_INSN (insn))
8429 break;
8430 lab = insn;
8433 for (insn = LABEL_REFS (lab); insn; insn = LABEL_REFS (insn))
8434 INSN_DELETED_P (insn) = 1;
8436 /* Mark constants in a window. */
8437 for (insn = NEXT_INSN (x); insn; insn = NEXT_INSN (insn))
8439 if (GET_CODE (insn) != INSN)
8440 continue;
8442 pattern = PATTERN (insn);
8443 if (GET_CODE (pattern) != UNSPEC_VOLATILE)
8444 continue;
8446 switch (XINT (pattern, 1))
8448 case UNSPECV_CONST2:
8449 case UNSPECV_CONST4:
8450 case UNSPECV_CONST8:
8451 XVECEXP (pattern, 0, 1) = const1_rtx;
8452 break;
8453 case UNSPECV_WINDOW_END:
8454 if (XVECEXP (pattern, 0, 0) == x)
8455 return lab;
8456 break;
8457 case UNSPECV_CONST_END:
8458 return lab;
8459 default:
8460 break;
8464 return lab;
8467 /* Return true if it's possible to redirect BRANCH1 to the destination
8468 of an unconditional jump BRANCH2. We only want to do this if the
8469 resulting branch will have a short displacement. */
8471 sh_can_redirect_branch (rtx branch1, rtx branch2)
8473 if (flag_expensive_optimizations && simplejump_p (branch2))
8475 rtx dest = XEXP (SET_SRC (single_set (branch2)), 0);
8476 rtx insn;
8477 int distance;
8479 for (distance = 0, insn = NEXT_INSN (branch1);
8480 insn && distance < 256;
8481 insn = PREV_INSN (insn))
8483 if (insn == dest)
8484 return 1;
8485 else
8486 distance += get_attr_length (insn);
8488 for (distance = 0, insn = NEXT_INSN (branch1);
8489 insn && distance < 256;
8490 insn = NEXT_INSN (insn))
8492 if (insn == dest)
8493 return 1;
8494 else
8495 distance += get_attr_length (insn);
8498 return 0;
8501 /* Return nonzero if register old_reg can be renamed to register new_reg. */
8503 sh_hard_regno_rename_ok (unsigned int old_reg ATTRIBUTE_UNUSED,
8504 unsigned int new_reg)
8506 /* Interrupt functions can only use registers that have already been
8507 saved by the prologue, even if they would normally be
8508 call-clobbered. */
8510 if (sh_cfun_interrupt_handler_p () && !regs_ever_live[new_reg])
8511 return 0;
8513 return 1;
8516 /* Function to update the integer COST
8517 based on the relationship between INSN that is dependent on
8518 DEP_INSN through the dependence LINK. The default is to make no
8519 adjustment to COST. This can be used for example to specify to
8520 the scheduler that an output- or anti-dependence does not incur
8521 the same cost as a data-dependence. The return value should be
8522 the new value for COST. */
8523 static int
8524 sh_adjust_cost (rtx insn, rtx link ATTRIBUTE_UNUSED, rtx dep_insn, int cost)
8526 rtx reg, use_pat;
8528 if (TARGET_SHMEDIA)
8530 /* On SHmedia, if the dependence is an anti-dependence or
8531 output-dependence, there is no cost. */
8532 if (REG_NOTE_KIND (link) != 0)
8533 cost = 0;
8535 if (get_attr_is_mac_media (insn)
8536 && get_attr_is_mac_media (dep_insn))
8537 cost = 1;
8539 else if (REG_NOTE_KIND (link) == 0)
8541 enum attr_type dep_type, type;
8543 if (recog_memoized (insn) < 0
8544 || recog_memoized (dep_insn) < 0)
8545 return cost;
8547 dep_type = get_attr_type (dep_insn);
8548 if (dep_type == TYPE_FLOAD || dep_type == TYPE_PCFLOAD)
8549 cost--;
8550 if ((dep_type == TYPE_LOAD_SI || dep_type == TYPE_PCLOAD_SI)
8551 && (type = get_attr_type (insn)) != TYPE_CALL
8552 && type != TYPE_SFUNC)
8553 cost--;
8555 /* The only input for a call that is timing-critical is the
8556 function's address. */
8557 if (GET_CODE(insn) == CALL_INSN)
8559 rtx call = PATTERN (insn);
8561 if (GET_CODE (call) == PARALLEL)
8562 call = XVECEXP (call, 0 ,0);
8563 if (GET_CODE (call) == SET)
8564 call = SET_SRC (call);
8565 if (GET_CODE (call) == CALL && GET_CODE (XEXP (call, 0)) == MEM
8566 && ! reg_set_p (XEXP (XEXP (call, 0), 0), dep_insn))
8567 cost = 0;
8569 /* Likewise, the most timing critical input for an sfuncs call
8570 is the function address. However, sfuncs typically start
8571 using their arguments pretty quickly.
8572 Assume a four cycle delay before they are needed. */
8573 /* All sfunc calls are parallels with at least four components.
8574 Exploit this to avoid unnecessary calls to sfunc_uses_reg. */
8575 else if (GET_CODE (PATTERN (insn)) == PARALLEL
8576 && XVECLEN (PATTERN (insn), 0) >= 4
8577 && (reg = sfunc_uses_reg (insn)))
8579 if (! reg_set_p (reg, dep_insn))
8580 cost -= 4;
8582 /* When the preceding instruction loads the shift amount of
8583 the following SHAD/SHLD, the latency of the load is increased
8584 by 1 cycle. */
8585 else if (TARGET_SH4
8586 && get_attr_type (insn) == TYPE_DYN_SHIFT
8587 && get_attr_any_int_load (dep_insn) == ANY_INT_LOAD_YES
8588 && reg_overlap_mentioned_p (SET_DEST (PATTERN (dep_insn)),
8589 XEXP (SET_SRC (single_set (insn)),
8590 1)))
8591 cost++;
8592 /* When an LS group instruction with a latency of less than
8593 3 cycles is followed by a double-precision floating-point
8594 instruction, FIPR, or FTRV, the latency of the first
8595 instruction is increased to 3 cycles. */
8596 else if (cost < 3
8597 && get_attr_insn_class (dep_insn) == INSN_CLASS_LS_GROUP
8598 && get_attr_dfp_comp (insn) == DFP_COMP_YES)
8599 cost = 3;
8600 /* The lsw register of a double-precision computation is ready one
8601 cycle earlier. */
8602 else if (reload_completed
8603 && get_attr_dfp_comp (dep_insn) == DFP_COMP_YES
8604 && (use_pat = single_set (insn))
8605 && ! regno_use_in (REGNO (SET_DEST (single_set (dep_insn))),
8606 SET_SRC (use_pat)))
8607 cost -= 1;
8609 if (get_attr_any_fp_comp (dep_insn) == ANY_FP_COMP_YES
8610 && get_attr_late_fp_use (insn) == LATE_FP_USE_YES)
8611 cost -= 1;
8613 /* An anti-dependence penalty of two applies if the first insn is a double
8614 precision fadd / fsub / fmul. */
8615 else if (REG_NOTE_KIND (link) == REG_DEP_ANTI
8616 && recog_memoized (dep_insn) >= 0
8617 && get_attr_type (dep_insn) == TYPE_DFP_ARITH
8618 /* A lot of alleged anti-flow dependences are fake,
8619 so check this one is real. */
8620 && flow_dependent_p (dep_insn, insn))
8621 cost = 2;
8624 return cost;
8627 /* Check if INSN is flow-dependent on DEP_INSN. Can also be used to check
8628 if DEP_INSN is anti-flow dependent on INSN. */
8629 static int
8630 flow_dependent_p (rtx insn, rtx dep_insn)
8632 rtx tmp = PATTERN (insn);
8634 note_stores (PATTERN (dep_insn), flow_dependent_p_1, &tmp);
8635 return tmp == NULL_RTX;
8638 /* A helper function for flow_dependent_p called through note_stores. */
8639 static void
8640 flow_dependent_p_1 (rtx x, rtx pat ATTRIBUTE_UNUSED, void *data)
8642 rtx * pinsn = (rtx *) data;
8644 if (*pinsn && reg_referenced_p (x, *pinsn))
8645 *pinsn = NULL_RTX;
8648 /* For use by ALLOCATE_INITIAL_VALUE. Note that sh.md contains some
8649 'special function' patterns (type sfunc) that clobber pr, but that
8650 do not look like function calls to leaf_function_p. Hence we must
8651 do this extra check. */
8653 sh_pr_n_sets (void)
8655 return REG_N_SETS (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG);
8658 /* This function returns "2" to indicate dual issue for the SH4
8659 processor. To be used by the DFA pipeline description. */
8660 static int
8661 sh_issue_rate (void)
8663 if (TARGET_SUPERSCALAR)
8664 return 2;
8665 else
8666 return 1;
8669 /* Functions for ready queue reordering for sched1. */
8671 /* Get weight for mode for a set x. */
8672 static short
8673 find_set_regmode_weight (rtx x, enum machine_mode mode)
8675 if (GET_CODE (x) == CLOBBER && register_operand (SET_DEST (x), mode))
8676 return 1;
8677 if (GET_CODE (x) == SET && register_operand (SET_DEST (x), mode))
8679 if (GET_CODE (SET_DEST (x)) == REG)
8681 if (!reg_mentioned_p (SET_DEST (x), SET_SRC (x)))
8682 return 1;
8683 else
8684 return 0;
8686 return 1;
8688 return 0;
8691 /* Get regmode weight for insn. */
8692 static short
8693 find_insn_regmode_weight (rtx insn, enum machine_mode mode)
8695 short reg_weight = 0;
8696 rtx x;
8698 /* Increment weight for each register born here. */
8699 x = PATTERN (insn);
8700 reg_weight += find_set_regmode_weight (x, mode);
8701 if (GET_CODE (x) == PARALLEL)
8703 int j;
8704 for (j = XVECLEN (x, 0) - 1; j >= 0; j--)
8706 x = XVECEXP (PATTERN (insn), 0, j);
8707 reg_weight += find_set_regmode_weight (x, mode);
8710 /* Decrement weight for each register that dies here. */
8711 for (x = REG_NOTES (insn); x; x = XEXP (x, 1))
8713 if (REG_NOTE_KIND (x) == REG_DEAD || REG_NOTE_KIND (x) == REG_UNUSED)
8715 rtx note = XEXP (x, 0);
8716 if (GET_CODE (note) == REG && GET_MODE (note) == mode)
8717 reg_weight--;
8720 return reg_weight;
8723 /* Calculate regmode weights for all insns of a basic block. */
8724 static void
8725 find_regmode_weight (int b, enum machine_mode mode)
8727 rtx insn, next_tail, head, tail;
8729 get_block_head_tail (b, &head, &tail);
8730 next_tail = NEXT_INSN (tail);
8732 for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
8734 /* Handle register life information. */
8735 if (!INSN_P (insn))
8736 continue;
8738 if (mode == SFmode)
8739 INSN_REGMODE_WEIGHT (insn, mode) =
8740 find_insn_regmode_weight (insn, mode) + 2 * find_insn_regmode_weight (insn, DFmode);
8741 else if (mode == SImode)
8742 INSN_REGMODE_WEIGHT (insn, mode) =
8743 find_insn_regmode_weight (insn, mode) + 2 * find_insn_regmode_weight (insn, DImode);
8747 /* Comparison function for ready queue sorting. */
8748 static int
8749 rank_for_reorder (const void *x, const void *y)
8751 rtx tmp = *(const rtx *) y;
8752 rtx tmp2 = *(const rtx *) x;
8754 /* The insn in a schedule group should be issued the first. */
8755 if (SCHED_GROUP_P (tmp) != SCHED_GROUP_P (tmp2))
8756 return SCHED_GROUP_P (tmp2) ? 1 : -1;
8758 /* If insns are equally good, sort by INSN_LUID (original insn order), This
8759 minimizes instruction movement, thus minimizing sched's effect on
8760 register pressure. */
8761 return INSN_LUID (tmp) - INSN_LUID (tmp2);
8764 /* Resort the array A in which only element at index N may be out of order. */
8765 static void
8766 swap_reorder (rtx *a, int n)
8768 rtx insn = a[n - 1];
8769 int i = n - 2;
8771 while (i >= 0 && rank_for_reorder (a + i, &insn) >= 0)
8773 a[i + 1] = a[i];
8774 i -= 1;
8776 a[i + 1] = insn;
8779 #define SCHED_REORDER(READY, N_READY) \
8780 do \
8782 if ((N_READY) == 2) \
8783 swap_reorder (READY, N_READY); \
8784 else if ((N_READY) > 2) \
8785 qsort (READY, N_READY, sizeof (rtx), rank_for_reorder); \
8787 while (0)
8789 /* Sort the ready list READY by ascending priority, using the SCHED_REORDER
8790 macro. */
8791 static void
8792 ready_reorder (rtx *ready, int nready)
8794 SCHED_REORDER (ready, nready);
8797 /* Calculate regmode weights for all insns of all basic block. */
8798 static void
8799 sh_md_init_global (FILE *dump ATTRIBUTE_UNUSED,
8800 int verbose ATTRIBUTE_UNUSED,
8801 int old_max_uid)
8803 basic_block b;
8805 regmode_weight[0] = (short *) xcalloc (old_max_uid, sizeof (short));
8806 regmode_weight[1] = (short *) xcalloc (old_max_uid, sizeof (short));
8808 FOR_EACH_BB_REVERSE (b)
8810 find_regmode_weight (b->index, SImode);
8811 find_regmode_weight (b->index, SFmode);
8814 CURR_REGMODE_PRESSURE (SImode) = 0;
8815 CURR_REGMODE_PRESSURE (SFmode) = 0;
8819 /* Cleanup. */
8820 static void
8821 sh_md_finish_global (FILE *dump ATTRIBUTE_UNUSED,
8822 int verbose ATTRIBUTE_UNUSED)
8824 if (regmode_weight[0])
8826 free (regmode_weight[0]);
8827 regmode_weight[0] = NULL;
8829 if (regmode_weight[1])
8831 free (regmode_weight[1]);
8832 regmode_weight[1] = NULL;
8836 /* Cache the can_issue_more so that we can return it from reorder2. Also,
8837 keep count of register pressures on SImode and SFmode. */
8838 static int
8839 sh_variable_issue (FILE *dump ATTRIBUTE_UNUSED,
8840 int sched_verbose ATTRIBUTE_UNUSED,
8841 rtx insn,
8842 int can_issue_more)
8844 if (GET_CODE (PATTERN (insn)) != USE
8845 && GET_CODE (PATTERN (insn)) != CLOBBER)
8846 cached_can_issue_more = can_issue_more - 1;
8847 else
8848 cached_can_issue_more = can_issue_more;
8850 if (reload_completed)
8851 return cached_can_issue_more;
8853 CURR_REGMODE_PRESSURE (SImode) += INSN_REGMODE_WEIGHT (insn, SImode);
8854 CURR_REGMODE_PRESSURE (SFmode) += INSN_REGMODE_WEIGHT (insn, SFmode);
8856 return cached_can_issue_more;
8859 static void
8860 sh_md_init (FILE *dump ATTRIBUTE_UNUSED,
8861 int verbose ATTRIBUTE_UNUSED,
8862 int veclen ATTRIBUTE_UNUSED)
8864 CURR_REGMODE_PRESSURE (SImode) = 0;
8865 CURR_REGMODE_PRESSURE (SFmode) = 0;
8868 /* Some magic numbers. */
8869 /* Pressure on register r0 can lead to spill failures. so avoid sched1 for
8870 functions that already have high pressure on r0. */
8871 #define R0_MAX_LIFE_REGIONS 2
8872 #define R0_MAX_LIVE_LENGTH 12
8873 /* Register Pressure thresholds for SImode and SFmode registers. */
8874 #define SIMODE_MAX_WEIGHT 5
8875 #define SFMODE_MAX_WEIGHT 10
8877 /* Return true if the pressure is high for MODE. */
8878 static short
8879 high_pressure (enum machine_mode mode)
8881 /* Pressure on register r0 can lead to spill failures. so avoid sched1 for
8882 functions that already have high pressure on r0. */
8883 if ((REG_N_SETS (0) - REG_N_DEATHS (0)) >= R0_MAX_LIFE_REGIONS
8884 && REG_LIVE_LENGTH (0) >= R0_MAX_LIVE_LENGTH)
8885 return 1;
8887 if (mode == SFmode)
8888 return (CURR_REGMODE_PRESSURE (SFmode) > SFMODE_MAX_WEIGHT);
8889 else
8890 return (CURR_REGMODE_PRESSURE (SImode) > SIMODE_MAX_WEIGHT);
8893 /* Reorder ready queue if register pressure is high. */
8894 static int
8895 sh_reorder (FILE *dump ATTRIBUTE_UNUSED,
8896 int sched_verbose ATTRIBUTE_UNUSED,
8897 rtx *ready,
8898 int *n_readyp,
8899 int clock_var ATTRIBUTE_UNUSED)
8901 if (reload_completed)
8902 return sh_issue_rate ();
8904 if (high_pressure (SFmode) || high_pressure (SImode))
8906 ready_reorder (ready, *n_readyp);
8909 return sh_issue_rate ();
8912 /* Skip cycles if the current register pressure is high. */
8913 static int
8914 sh_reorder2 (FILE *dump ATTRIBUTE_UNUSED,
8915 int sched_verbose ATTRIBUTE_UNUSED,
8916 rtx *ready ATTRIBUTE_UNUSED,
8917 int *n_readyp ATTRIBUTE_UNUSED,
8918 int clock_var ATTRIBUTE_UNUSED)
8920 if (reload_completed)
8921 return cached_can_issue_more;
8923 if (high_pressure(SFmode) || high_pressure (SImode))
8924 skip_cycles = 1;
8926 return cached_can_issue_more;
8929 /* Skip cycles without sorting the ready queue. This will move insn from
8930 Q->R. If this is the last cycle we are skipping; allow sorting of ready
8931 queue by sh_reorder. */
8933 /* Generally, skipping these many cycles are sufficient for all insns to move
8934 from Q -> R. */
8935 #define MAX_SKIPS 8
8937 static int
8938 sh_dfa_new_cycle (FILE *sched_dump ATTRIBUTE_UNUSED,
8939 int sched_verbose ATTRIBUTE_UNUSED,
8940 rtx insn ATTRIBUTE_UNUSED,
8941 int last_clock_var,
8942 int clock_var,
8943 int *sort_p)
8945 if (reload_completed)
8946 return 0;
8948 if (skip_cycles)
8950 if ((clock_var - last_clock_var) < MAX_SKIPS)
8952 *sort_p = 0;
8953 return 1;
8955 /* If this is the last cycle we are skipping, allow reordering of R. */
8956 if ((clock_var - last_clock_var) == MAX_SKIPS)
8958 *sort_p = 1;
8959 return 1;
8963 skip_cycles = 0;
8965 return 0;
8968 /* SHmedia requires registers for branches, so we can't generate new
8969 branches past reload. */
8970 static bool
8971 sh_cannot_modify_jumps_p (void)
8973 return (TARGET_SHMEDIA && (reload_in_progress || reload_completed));
8976 static int
8977 sh_target_reg_class (void)
8979 return TARGET_SHMEDIA ? TARGET_REGS : NO_REGS;
8982 static bool
8983 sh_optimize_target_register_callee_saved (bool after_prologue_epilogue_gen)
8985 return (shmedia_space_reserved_for_target_registers
8986 && (! after_prologue_epilogue_gen || TARGET_SAVE_ALL_TARGET_REGS));
8989 static bool
8990 sh_ms_bitfield_layout_p (tree record_type ATTRIBUTE_UNUSED)
8992 return (TARGET_SH5 || TARGET_HITACHI || sh_attr_renesas_p (record_type));
8996 On the SH1..SH4, the trampoline looks like
8997 2 0002 D202 mov.l l2,r2
8998 1 0000 D301 mov.l l1,r3
8999 3 0004 422B jmp @r2
9000 4 0006 0009 nop
9001 5 0008 00000000 l1: .long area
9002 6 000c 00000000 l2: .long function
9004 SH5 (compact) uses r1 instead of r3 for the static chain. */
9007 /* Emit RTL insns to initialize the variable parts of a trampoline.
9008 FNADDR is an RTX for the address of the function's pure code.
9009 CXT is an RTX for the static chain value for the function. */
9011 void
9012 sh_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
9014 if (TARGET_SHMEDIA64)
9016 rtx tramp_templ;
9017 int fixed_len;
9019 rtx movi1 = GEN_INT (0xcc000010);
9020 rtx shori1 = GEN_INT (0xc8000010);
9021 rtx src, dst;
9023 /* The following trampoline works within a +- 128 KB range for cxt:
9024 ptb/u cxt,tr1; movi fnaddr >> 48,r0; shori fnaddr >> 32,r0;
9025 shori fnaddr >> 16,r0; shori fnaddr,r0; ptabs/l r0,tr0
9026 gettr tr1,r1; blink tr0,r63 */
9027 /* Address rounding makes it hard to compute the exact bounds of the
9028 offset for this trampoline, but we have a rather generous offset
9029 range, so frame_offset should do fine as an upper bound. */
9030 if (cxt == virtual_stack_vars_rtx && frame_offset < 0x20000)
9032 /* ??? could optimize this trampoline initialization
9033 by writing DImode words with two insns each. */
9034 rtx mask = force_reg (DImode, GEN_INT (0x3fffc00));
9035 rtx insn = gen_rtx_MINUS (DImode, cxt, tramp);
9036 insn = gen_rtx_ASHIFT (DImode, insn, GEN_INT (10-2));
9037 insn = gen_rtx_AND (DImode, insn, mask);
9038 /* Or in ptb/u .,tr1 pattern */
9039 insn = gen_rtx_IOR (DImode, insn, gen_int_mode (0xec000010, SImode));
9040 insn = force_operand (insn, NULL_RTX);
9041 insn = gen_lowpart (SImode, insn);
9042 emit_move_insn (gen_rtx_MEM (SImode, tramp), insn);
9043 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (38));
9044 insn = gen_rtx_AND (DImode, insn, mask);
9045 insn = force_operand (gen_rtx_IOR (DImode, movi1, insn), NULL_RTX);
9046 insn = gen_lowpart (SImode, insn);
9047 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 4)), insn);
9048 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (22));
9049 insn = gen_rtx_AND (DImode, insn, mask);
9050 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
9051 insn = gen_lowpart (SImode, insn);
9052 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 8)), insn);
9053 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (6));
9054 insn = gen_rtx_AND (DImode, insn, mask);
9055 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
9056 insn = gen_lowpart (SImode, insn);
9057 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 12)),
9058 insn);
9059 insn = gen_rtx_ASHIFT (DImode, fnaddr, GEN_INT (10));
9060 insn = gen_rtx_AND (DImode, insn, mask);
9061 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
9062 insn = gen_lowpart (SImode, insn);
9063 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 16)),
9064 insn);
9065 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 20)),
9066 GEN_INT (0x6bf10600));
9067 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 24)),
9068 GEN_INT (0x4415fc10));
9069 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 28)),
9070 GEN_INT (0x4401fff0));
9071 emit_insn (gen_ic_invalidate_line (tramp));
9072 return;
9074 tramp_templ = gen_rtx_SYMBOL_REF (Pmode,"__GCC_nested_trampoline");
9075 fixed_len = TRAMPOLINE_SIZE - 2 * GET_MODE_SIZE (Pmode);
9077 tramp_templ = gen_datalabel_ref (tramp_templ);
9078 dst = gen_rtx_MEM (BLKmode, tramp);
9079 src = gen_rtx_MEM (BLKmode, tramp_templ);
9080 set_mem_align (dst, 256);
9081 set_mem_align (src, 64);
9082 emit_block_move (dst, src, GEN_INT (fixed_len), BLOCK_OP_NORMAL);
9084 emit_move_insn (gen_rtx_MEM (Pmode, plus_constant (tramp, fixed_len)),
9085 fnaddr);
9086 emit_move_insn (gen_rtx_MEM (Pmode,
9087 plus_constant (tramp,
9088 fixed_len
9089 + GET_MODE_SIZE (Pmode))),
9090 cxt);
9091 emit_insn (gen_ic_invalidate_line (tramp));
9092 return;
9094 else if (TARGET_SHMEDIA)
9096 /* movi fnaddr >> 16,r1; shori fnaddr,r1; ptabs/l r1,tr0
9097 movi cxt >> 16,r1; shori cxt,r1; blink tr0,r63 */
9098 rtx quad0 = gen_reg_rtx (DImode), cxtload = gen_reg_rtx (DImode);
9099 rtx quad1 = gen_reg_rtx (DImode), quad2 = gen_reg_rtx (DImode);
9100 /* movi 0,r1: 0xcc000010 shori 0,r1: c8000010 concatenated,
9101 rotated 10 right, and higher 16 bit of every 32 selected. */
9102 rtx movishori
9103 = force_reg (V2HImode, (simplify_gen_subreg
9104 (V2HImode, GEN_INT (0x4330432), SImode, 0)));
9105 rtx ptabs = force_reg (DImode, GEN_INT (0x6bf10600));
9106 rtx blink = force_reg (DImode, GEN_INT (0x4401fff0));
9108 tramp = force_reg (Pmode, tramp);
9109 fnaddr = force_reg (SImode, fnaddr);
9110 cxt = force_reg (SImode, cxt);
9111 emit_insn (gen_mshflo_w_x (gen_rtx_SUBREG (V4HImode, quad0, 0),
9112 gen_rtx_SUBREG (V2HImode, fnaddr, 0),
9113 movishori));
9114 emit_insn (gen_rotrdi3_mextr (quad0, quad0,
9115 GEN_INT (TARGET_LITTLE_ENDIAN ? 24 : 56)));
9116 emit_insn (gen_ashldi3_media (quad0, quad0, const2_rtx));
9117 emit_move_insn (gen_rtx_MEM (DImode, tramp), quad0);
9118 emit_insn (gen_mshflo_w_x (gen_rtx_SUBREG (V4HImode, cxtload, 0),
9119 gen_rtx_SUBREG (V2HImode, cxt, 0),
9120 movishori));
9121 emit_insn (gen_rotrdi3_mextr (cxtload, cxtload,
9122 GEN_INT (TARGET_LITTLE_ENDIAN ? 24 : 56)));
9123 emit_insn (gen_ashldi3_media (cxtload, cxtload, const2_rtx));
9124 if (TARGET_LITTLE_ENDIAN)
9126 emit_insn (gen_mshflo_l_di (quad1, ptabs, cxtload));
9127 emit_insn (gen_mextr4 (quad2, cxtload, blink));
9129 else
9131 emit_insn (gen_mextr4 (quad1, cxtload, ptabs));
9132 emit_insn (gen_mshflo_l_di (quad2, blink, cxtload));
9134 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, 8)), quad1);
9135 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, 16)), quad2);
9136 emit_insn (gen_ic_invalidate_line (tramp));
9137 return;
9139 else if (TARGET_SHCOMPACT)
9141 emit_insn (gen_initialize_trampoline (tramp, cxt, fnaddr));
9142 return;
9144 emit_move_insn (gen_rtx_MEM (SImode, tramp),
9145 gen_int_mode (TARGET_LITTLE_ENDIAN ? 0xd301d202 : 0xd202d301,
9146 SImode));
9147 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 4)),
9148 gen_int_mode (TARGET_LITTLE_ENDIAN ? 0x0009422b : 0x422b0009,
9149 SImode));
9150 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 8)),
9151 cxt);
9152 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 12)),
9153 fnaddr);
9154 if (TARGET_HARVARD)
9156 if (TARGET_USERMODE)
9157 emit_library_call (function_symbol ("__ic_invalidate"),
9158 0, VOIDmode, 1, tramp, SImode);
9159 else
9160 emit_insn (gen_ic_invalidate_line (tramp));
9164 /* FIXME: This is overly conservative. A SHcompact function that
9165 receives arguments ``by reference'' will have them stored in its
9166 own stack frame, so it must not pass pointers or references to
9167 these arguments to other functions by means of sibling calls. */
9168 static bool
9169 sh_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
9171 return (decl
9172 && (! TARGET_SHCOMPACT
9173 || current_function_args_info.stack_regs == 0)
9174 && ! sh_cfun_interrupt_handler_p ());
9177 /* Machine specific built-in functions. */
9179 struct builtin_description
9181 const enum insn_code icode;
9182 const char *const name;
9183 int signature;
9186 /* describe number and signedness of arguments; arg[0] == result
9187 (1: unsigned, 2: signed, 4: don't care, 8: pointer 0: no argument */
9188 static const char signature_args[][4] =
9190 #define SH_BLTIN_V2SI2 0
9191 { 4, 4 },
9192 #define SH_BLTIN_V4HI2 1
9193 { 4, 4 },
9194 #define SH_BLTIN_V2SI3 2
9195 { 4, 4, 4 },
9196 #define SH_BLTIN_V4HI3 3
9197 { 4, 4, 4 },
9198 #define SH_BLTIN_V8QI3 4
9199 { 4, 4, 4 },
9200 #define SH_BLTIN_MAC_HISI 5
9201 { 1, 4, 4, 1 },
9202 #define SH_BLTIN_SH_HI 6
9203 { 4, 4, 1 },
9204 #define SH_BLTIN_SH_SI 7
9205 { 4, 4, 1 },
9206 #define SH_BLTIN_V4HI2V2SI 8
9207 { 4, 4, 4 },
9208 #define SH_BLTIN_V4HI2V8QI 9
9209 { 4, 4, 4 },
9210 #define SH_BLTIN_SISF 10
9211 { 4, 2 },
9212 #define SH_BLTIN_LDUA_L 11
9213 { 2, 8 },
9214 #define SH_BLTIN_LDUA_Q 12
9215 { 1, 8 },
9216 #define SH_BLTIN_STUA_L 13
9217 { 0, 8, 2 },
9218 #define SH_BLTIN_STUA_Q 14
9219 { 0, 8, 1 },
9220 #define SH_BLTIN_UDI 15
9221 { 0, 8, 1 },
9222 #define SH_BLTIN_NUM_SHARED_SIGNATURES 16
9223 #define SH_BLTIN_2 16
9224 #define SH_BLTIN_SU 16
9225 { 1, 2 },
9226 #define SH_BLTIN_3 17
9227 #define SH_BLTIN_SUS 17
9228 { 2, 2, 1 },
9229 #define SH_BLTIN_PSSV 18
9230 { 0, 8, 2, 2 },
9231 #define SH_BLTIN_XXUU 19
9232 #define SH_BLTIN_UUUU 19
9233 { 1, 1, 1, 1 },
9234 #define SH_BLTIN_PV 20
9235 { 0, 8 },
9237 /* mcmv: operands considered unsigned. */
9238 /* mmulsum_wq, msad_ubq: result considered unsigned long long. */
9239 /* mperm: control value considered unsigned int. */
9240 /* mshalds, mshard, mshards, mshlld, mshlrd: shift count is unsigned int. */
9241 /* mshards_q: returns signed short. */
9242 /* nsb: takes long long arg, returns unsigned char. */
9243 static const struct builtin_description bdesc[] =
9245 { CODE_FOR_absv2si2, "__builtin_absv2si2", SH_BLTIN_V2SI2 },
9246 { CODE_FOR_absv4hi2, "__builtin_absv4hi2", SH_BLTIN_V4HI2 },
9247 { CODE_FOR_addv2si3, "__builtin_addv2si3", SH_BLTIN_V2SI3 },
9248 { CODE_FOR_addv4hi3, "__builtin_addv4hi3", SH_BLTIN_V4HI3 },
9249 { CODE_FOR_ssaddv2si3,"__builtin_ssaddv2si3", SH_BLTIN_V2SI3 },
9250 { CODE_FOR_usaddv8qi3,"__builtin_usaddv8qi3", SH_BLTIN_V8QI3 },
9251 { CODE_FOR_ssaddv4hi3,"__builtin_ssaddv4hi3", SH_BLTIN_V4HI3 },
9252 #if 0
9253 { CODE_FOR_alloco32, "__builtin_sh_media_ALLOCO", SH_BLTIN_PV },
9254 { CODE_FOR_alloco64, "__builtin_sh_media_ALLOCO", SH_BLTIN_PV },
9255 #endif
9256 { CODE_FOR_negcmpeqv8qi,"__builtin_sh_media_MCMPEQ_B", SH_BLTIN_V8QI3 },
9257 { CODE_FOR_negcmpeqv2si,"__builtin_sh_media_MCMPEQ_L", SH_BLTIN_V2SI3 },
9258 { CODE_FOR_negcmpeqv4hi,"__builtin_sh_media_MCMPEQ_W", SH_BLTIN_V4HI3 },
9259 { CODE_FOR_negcmpgtuv8qi,"__builtin_sh_media_MCMPGT_UB", SH_BLTIN_V8QI3 },
9260 { CODE_FOR_negcmpgtv2si,"__builtin_sh_media_MCMPGT_L", SH_BLTIN_V2SI3 },
9261 { CODE_FOR_negcmpgtv4hi,"__builtin_sh_media_MCMPGT_W", SH_BLTIN_V4HI3 },
9262 { CODE_FOR_mcmv, "__builtin_sh_media_MCMV", SH_BLTIN_UUUU },
9263 { CODE_FOR_mcnvs_lw, "__builtin_sh_media_MCNVS_LW", SH_BLTIN_3 },
9264 { CODE_FOR_mcnvs_wb, "__builtin_sh_media_MCNVS_WB", SH_BLTIN_V4HI2V8QI },
9265 { CODE_FOR_mcnvs_wub, "__builtin_sh_media_MCNVS_WUB", SH_BLTIN_V4HI2V8QI },
9266 { CODE_FOR_mextr1, "__builtin_sh_media_MEXTR1", SH_BLTIN_UDI },
9267 { CODE_FOR_mextr2, "__builtin_sh_media_MEXTR2", SH_BLTIN_UDI },
9268 { CODE_FOR_mextr3, "__builtin_sh_media_MEXTR3", SH_BLTIN_UDI },
9269 { CODE_FOR_mextr4, "__builtin_sh_media_MEXTR4", SH_BLTIN_UDI },
9270 { CODE_FOR_mextr5, "__builtin_sh_media_MEXTR5", SH_BLTIN_UDI },
9271 { CODE_FOR_mextr6, "__builtin_sh_media_MEXTR6", SH_BLTIN_UDI },
9272 { CODE_FOR_mextr7, "__builtin_sh_media_MEXTR7", SH_BLTIN_UDI },
9273 { CODE_FOR_mmacfx_wl, "__builtin_sh_media_MMACFX_WL", SH_BLTIN_MAC_HISI },
9274 { CODE_FOR_mmacnfx_wl,"__builtin_sh_media_MMACNFX_WL", SH_BLTIN_MAC_HISI },
9275 { CODE_FOR_mulv2si3, "__builtin_mulv2si3", SH_BLTIN_V2SI3, },
9276 { CODE_FOR_mulv4hi3, "__builtin_mulv4hi3", SH_BLTIN_V4HI3 },
9277 { CODE_FOR_mmulfx_l, "__builtin_sh_media_MMULFX_L", SH_BLTIN_V2SI3 },
9278 { CODE_FOR_mmulfx_w, "__builtin_sh_media_MMULFX_W", SH_BLTIN_V4HI3 },
9279 { CODE_FOR_mmulfxrp_w,"__builtin_sh_media_MMULFXRP_W", SH_BLTIN_V4HI3 },
9280 { CODE_FOR_mmulhi_wl, "__builtin_sh_media_MMULHI_WL", SH_BLTIN_V4HI2V2SI },
9281 { CODE_FOR_mmullo_wl, "__builtin_sh_media_MMULLO_WL", SH_BLTIN_V4HI2V2SI },
9282 { CODE_FOR_mmulsum_wq,"__builtin_sh_media_MMULSUM_WQ", SH_BLTIN_XXUU },
9283 { CODE_FOR_mperm_w, "__builtin_sh_media_MPERM_W", SH_BLTIN_SH_HI },
9284 { CODE_FOR_msad_ubq, "__builtin_sh_media_MSAD_UBQ", SH_BLTIN_XXUU },
9285 { CODE_FOR_mshalds_l, "__builtin_sh_media_MSHALDS_L", SH_BLTIN_SH_SI },
9286 { CODE_FOR_mshalds_w, "__builtin_sh_media_MSHALDS_W", SH_BLTIN_SH_HI },
9287 { CODE_FOR_ashrv2si3, "__builtin_ashrv2si3", SH_BLTIN_SH_SI },
9288 { CODE_FOR_ashrv4hi3, "__builtin_ashrv4hi3", SH_BLTIN_SH_HI },
9289 { CODE_FOR_mshards_q, "__builtin_sh_media_MSHARDS_Q", SH_BLTIN_SUS },
9290 { CODE_FOR_mshfhi_b, "__builtin_sh_media_MSHFHI_B", SH_BLTIN_V8QI3 },
9291 { CODE_FOR_mshfhi_l, "__builtin_sh_media_MSHFHI_L", SH_BLTIN_V2SI3 },
9292 { CODE_FOR_mshfhi_w, "__builtin_sh_media_MSHFHI_W", SH_BLTIN_V4HI3 },
9293 { CODE_FOR_mshflo_b, "__builtin_sh_media_MSHFLO_B", SH_BLTIN_V8QI3 },
9294 { CODE_FOR_mshflo_l, "__builtin_sh_media_MSHFLO_L", SH_BLTIN_V2SI3 },
9295 { CODE_FOR_mshflo_w, "__builtin_sh_media_MSHFLO_W", SH_BLTIN_V4HI3 },
9296 { CODE_FOR_ashlv2si3, "__builtin_ashlv2si3", SH_BLTIN_SH_SI },
9297 { CODE_FOR_ashlv4hi3, "__builtin_ashlv4hi3", SH_BLTIN_SH_HI },
9298 { CODE_FOR_lshrv2si3, "__builtin_lshrv2si3", SH_BLTIN_SH_SI },
9299 { CODE_FOR_lshrv4hi3, "__builtin_lshrv4hi3", SH_BLTIN_SH_HI },
9300 { CODE_FOR_subv2si3, "__builtin_subv2si3", SH_BLTIN_V2SI3 },
9301 { CODE_FOR_subv4hi3, "__builtin_subv4hi3", SH_BLTIN_V4HI3 },
9302 { CODE_FOR_sssubv2si3,"__builtin_sssubv2si3", SH_BLTIN_V2SI3 },
9303 { CODE_FOR_ussubv8qi3,"__builtin_ussubv8qi3", SH_BLTIN_V8QI3 },
9304 { CODE_FOR_sssubv4hi3,"__builtin_sssubv4hi3", SH_BLTIN_V4HI3 },
9305 { CODE_FOR_fcosa_s, "__builtin_sh_media_FCOSA_S", SH_BLTIN_SISF },
9306 { CODE_FOR_fsina_s, "__builtin_sh_media_FSINA_S", SH_BLTIN_SISF },
9307 { CODE_FOR_fipr, "__builtin_sh_media_FIPR_S", SH_BLTIN_3 },
9308 { CODE_FOR_ftrv, "__builtin_sh_media_FTRV_S", SH_BLTIN_3 },
9309 { CODE_FOR_fsrra_s, "__builtin_sh_media_FSRRA_S", SH_BLTIN_2 },
9310 #if 0
9311 { CODE_FOR_ldhi_l, "__builtin_sh_media_LDHI_L", SH_BLTIN_LDUA_L },
9312 { CODE_FOR_ldhi_q, "__builtin_sh_media_LDHI_Q", SH_BLTIN_LDUA_Q },
9313 { CODE_FOR_ldlo_l, "__builtin_sh_media_LDLO_L", SH_BLTIN_LDUA_L },
9314 { CODE_FOR_ldlo_q, "__builtin_sh_media_LDLO_Q", SH_BLTIN_LDUA_Q },
9315 { CODE_FOR_sthi_l, "__builtin_sh_media_STHI_L", SH_BLTIN_STUA_L },
9316 { CODE_FOR_sthi_q, "__builtin_sh_media_STHI_Q", SH_BLTIN_STUA_Q },
9317 { CODE_FOR_stlo_l, "__builtin_sh_media_STLO_L", SH_BLTIN_STUA_L },
9318 { CODE_FOR_stlo_q, "__builtin_sh_media_STLO_Q", SH_BLTIN_STUA_Q },
9319 { CODE_FOR_ldhi_l64, "__builtin_sh_media_LDHI_L", SH_BLTIN_LDUA_L },
9320 { CODE_FOR_ldhi_q64, "__builtin_sh_media_LDHI_Q", SH_BLTIN_LDUA_Q },
9321 { CODE_FOR_ldlo_l64, "__builtin_sh_media_LDLO_L", SH_BLTIN_LDUA_L },
9322 { CODE_FOR_ldlo_q64, "__builtin_sh_media_LDLO_Q", SH_BLTIN_LDUA_Q },
9323 { CODE_FOR_sthi_l64, "__builtin_sh_media_STHI_L", SH_BLTIN_STUA_L },
9324 { CODE_FOR_sthi_q64, "__builtin_sh_media_STHI_Q", SH_BLTIN_STUA_Q },
9325 { CODE_FOR_stlo_l64, "__builtin_sh_media_STLO_L", SH_BLTIN_STUA_L },
9326 { CODE_FOR_stlo_q64, "__builtin_sh_media_STLO_Q", SH_BLTIN_STUA_Q },
9327 #endif
9328 { CODE_FOR_nsb, "__builtin_sh_media_NSB", SH_BLTIN_SU },
9329 { CODE_FOR_byterev, "__builtin_sh_media_BYTEREV", SH_BLTIN_2 },
9330 #if 0
9331 { CODE_FOR_prefetch32,"__builtin_sh_media_PREFO", SH_BLTIN_PSSV },
9332 { CODE_FOR_prefetch64,"__builtin_sh_media_PREFO", SH_BLTIN_PSSV }
9333 #endif
9336 static void
9337 sh_media_init_builtins (void)
9339 tree shared[SH_BLTIN_NUM_SHARED_SIGNATURES];
9340 const struct builtin_description *d;
9342 memset (shared, 0, sizeof shared);
9343 for (d = bdesc; d - bdesc < (int) ARRAY_SIZE (bdesc); d++)
9345 tree type, arg_type;
9346 int signature = d->signature;
9347 int i;
9349 if (signature < SH_BLTIN_NUM_SHARED_SIGNATURES && shared[signature])
9350 type = shared[signature];
9351 else
9353 int has_result = signature_args[signature][0] != 0;
9355 if (signature_args[signature][1] == 8
9356 && (insn_data[d->icode].operand[has_result].mode != Pmode))
9357 continue;
9358 if (! TARGET_FPU_ANY
9359 && FLOAT_MODE_P (insn_data[d->icode].operand[0].mode))
9360 continue;
9361 type = void_list_node;
9362 for (i = 3; ; i--)
9364 int arg = signature_args[signature][i];
9365 int opno = i - 1 + has_result;
9367 if (arg == 8)
9368 arg_type = ptr_type_node;
9369 else if (arg)
9370 arg_type = ((*lang_hooks.types.type_for_mode)
9371 (insn_data[d->icode].operand[opno].mode,
9372 (arg & 1)));
9373 else if (i)
9374 continue;
9375 else
9376 arg_type = void_type_node;
9377 if (i == 0)
9378 break;
9379 type = tree_cons (NULL_TREE, arg_type, type);
9381 type = build_function_type (arg_type, type);
9382 if (signature < SH_BLTIN_NUM_SHARED_SIGNATURES)
9383 shared[signature] = type;
9385 lang_hooks.builtin_function (d->name, type, d - bdesc, BUILT_IN_MD,
9386 NULL, NULL_TREE);
9390 /* Implements target hook vector_mode_supported_p. */
9391 bool
9392 sh_vector_mode_supported_p (enum machine_mode mode)
9394 if (TARGET_FPU_ANY
9395 && ((mode == V2SFmode)
9396 || (mode == V4SFmode)
9397 || (mode == V16SFmode)))
9398 return true;
9400 else if (TARGET_SHMEDIA
9401 && ((mode == V8QImode)
9402 || (mode == V2HImode)
9403 || (mode == V4HImode)
9404 || (mode == V2SImode)))
9405 return true;
9407 return false;
9410 /* Implements target hook dwarf_calling_convention. Return an enum
9411 of dwarf_calling_convention. */
9413 sh_dwarf_calling_convention (tree func)
9415 if (sh_attr_renesas_p (func))
9416 return DW_CC_renesas_sh;
9418 return DW_CC_normal;
9421 static void
9422 sh_init_builtins (void)
9424 if (TARGET_SHMEDIA)
9425 sh_media_init_builtins ();
9428 /* Expand an expression EXP that calls a built-in function,
9429 with result going to TARGET if that's convenient
9430 (and in mode MODE if that's convenient).
9431 SUBTARGET may be used as the target for computing one of EXP's operands.
9432 IGNORE is nonzero if the value is to be ignored. */
9434 static rtx
9435 sh_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
9436 enum machine_mode mode ATTRIBUTE_UNUSED, int ignore)
9438 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
9439 tree arglist = TREE_OPERAND (exp, 1);
9440 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
9441 const struct builtin_description *d = &bdesc[fcode];
9442 enum insn_code icode = d->icode;
9443 int signature = d->signature;
9444 enum machine_mode tmode = VOIDmode;
9445 int nop = 0, i;
9446 rtx op[4];
9447 rtx pat;
9449 if (signature_args[signature][0])
9451 if (ignore)
9452 return 0;
9454 tmode = insn_data[icode].operand[0].mode;
9455 if (! target
9456 || GET_MODE (target) != tmode
9457 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
9458 target = gen_reg_rtx (tmode);
9459 op[nop++] = target;
9461 else
9462 target = 0;
9464 for (i = 1; i <= 3; i++, nop++)
9466 tree arg;
9467 enum machine_mode opmode, argmode;
9469 if (! signature_args[signature][i])
9470 break;
9471 arg = TREE_VALUE (arglist);
9472 if (arg == error_mark_node)
9473 return const0_rtx;
9474 arglist = TREE_CHAIN (arglist);
9475 opmode = insn_data[icode].operand[nop].mode;
9476 argmode = TYPE_MODE (TREE_TYPE (arg));
9477 if (argmode != opmode)
9478 arg = build1 (NOP_EXPR,
9479 (*lang_hooks.types.type_for_mode) (opmode, 0), arg);
9480 op[nop] = expand_expr (arg, NULL_RTX, opmode, 0);
9481 if (! (*insn_data[icode].operand[nop].predicate) (op[nop], opmode))
9482 op[nop] = copy_to_mode_reg (opmode, op[nop]);
9485 switch (nop)
9487 case 1:
9488 pat = (*insn_data[d->icode].genfun) (op[0]);
9489 break;
9490 case 2:
9491 pat = (*insn_data[d->icode].genfun) (op[0], op[1]);
9492 break;
9493 case 3:
9494 pat = (*insn_data[d->icode].genfun) (op[0], op[1], op[2]);
9495 break;
9496 case 4:
9497 pat = (*insn_data[d->icode].genfun) (op[0], op[1], op[2], op[3]);
9498 break;
9499 default:
9500 abort ();
9502 if (! pat)
9503 return 0;
9504 emit_insn (pat);
9505 return target;
9508 void
9509 sh_expand_unop_v2sf (enum rtx_code code, rtx op0, rtx op1)
9511 rtx sel0 = const0_rtx;
9512 rtx sel1 = const1_rtx;
9513 rtx (*fn) (rtx, rtx, rtx, rtx, rtx) = gen_unary_sf_op;
9514 rtx op = gen_rtx_fmt_e (code, SFmode, op1);
9516 emit_insn ((*fn) (op0, op1, op, sel0, sel0));
9517 emit_insn ((*fn) (op0, op1, op, sel1, sel1));
9520 void
9521 sh_expand_binop_v2sf (enum rtx_code code, rtx op0, rtx op1, rtx op2)
9523 rtx sel0 = const0_rtx;
9524 rtx sel1 = const1_rtx;
9525 rtx (*fn) (rtx, rtx, rtx, rtx, rtx, rtx, rtx, rtx)
9526 = gen_binary_sf_op;
9527 rtx op = gen_rtx_fmt_ee (code, SFmode, op1, op2);
9529 emit_insn ((*fn) (op0, op1, op2, op, sel0, sel0, sel0, sel1));
9530 emit_insn ((*fn) (op0, op1, op2, op, sel1, sel1, sel1, sel0));
9533 /* Return the class of registers for which a mode change from FROM to TO
9534 is invalid. */
9535 bool
9536 sh_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
9537 enum reg_class class)
9539 /* We want to enable the use of SUBREGs as a means to
9540 VEC_SELECT a single element of a vector. */
9541 if (to == SFmode && VECTOR_MODE_P (from) && GET_MODE_INNER (from) == SFmode)
9542 return (reg_classes_intersect_p (GENERAL_REGS, class));
9544 if (GET_MODE_SIZE (from) != GET_MODE_SIZE (to))
9546 if (TARGET_LITTLE_ENDIAN)
9548 if (GET_MODE_SIZE (to) < 8 || GET_MODE_SIZE (from) < 8)
9549 return reg_classes_intersect_p (DF_REGS, class);
9551 else
9553 if (GET_MODE_SIZE (from) < 8)
9554 return reg_classes_intersect_p (DF_HI_REGS, class);
9557 return 0;
9561 /* If ADDRESS refers to a CODE_LABEL, add NUSES to the number of times
9562 that label is used. */
9564 void
9565 sh_mark_label (rtx address, int nuses)
9567 if (GOTOFF_P (address))
9569 /* Extract the label or symbol. */
9570 address = XEXP (address, 0);
9571 if (GET_CODE (address) == PLUS)
9572 address = XEXP (address, 0);
9573 address = XVECEXP (address, 0, 0);
9575 if (GET_CODE (address) == LABEL_REF
9576 && GET_CODE (XEXP (address, 0)) == CODE_LABEL)
9577 LABEL_NUSES (XEXP (address, 0)) += nuses;
9580 /* Compute extra cost of moving data between one register class
9581 and another. */
9583 /* If SECONDARY*_RELOAD_CLASS says something about the src/dst pair, regclass
9584 uses this information. Hence, the general register <-> floating point
9585 register information here is not used for SFmode. */
9588 sh_register_move_cost (enum machine_mode mode,
9589 enum reg_class srcclass, enum reg_class dstclass)
9591 if (dstclass == T_REGS || dstclass == PR_REGS)
9592 return 10;
9594 if (dstclass == MAC_REGS && srcclass == MAC_REGS)
9595 return 4;
9597 if (mode == SImode && ! TARGET_SHMEDIA && TARGET_FMOVD
9598 && REGCLASS_HAS_FP_REG (srcclass)
9599 && REGCLASS_HAS_FP_REG (dstclass))
9600 return 4;
9602 if ((REGCLASS_HAS_FP_REG (dstclass) && srcclass == MAC_REGS)
9603 || (dstclass == MAC_REGS && REGCLASS_HAS_FP_REG (srcclass)))
9604 return 9;
9606 if ((REGCLASS_HAS_FP_REG (dstclass)
9607 && REGCLASS_HAS_GENERAL_REG (srcclass))
9608 || (REGCLASS_HAS_GENERAL_REG (dstclass)
9609 && REGCLASS_HAS_FP_REG (srcclass)))
9610 return ((TARGET_SHMEDIA ? 4 : TARGET_FMOVD ? 8 : 12)
9611 * ((GET_MODE_SIZE (mode) + 7) / 8U));
9613 if ((dstclass == FPUL_REGS
9614 && REGCLASS_HAS_GENERAL_REG (srcclass))
9615 || (srcclass == FPUL_REGS
9616 && REGCLASS_HAS_GENERAL_REG (dstclass)))
9617 return 5;
9619 if ((dstclass == FPUL_REGS
9620 && (srcclass == PR_REGS || srcclass == MAC_REGS || srcclass == T_REGS))
9621 || (srcclass == FPUL_REGS
9622 && (dstclass == PR_REGS || dstclass == MAC_REGS)))
9623 return 7;
9625 if ((srcclass == TARGET_REGS && ! REGCLASS_HAS_GENERAL_REG (dstclass))
9626 || ((dstclass) == TARGET_REGS && ! REGCLASS_HAS_GENERAL_REG (srcclass)))
9627 return 20;
9629 if ((srcclass == FPSCR_REGS && ! REGCLASS_HAS_GENERAL_REG (dstclass))
9630 || (dstclass == FPSCR_REGS && ! REGCLASS_HAS_GENERAL_REG (srcclass)))
9631 return 4;
9633 if (TARGET_SHMEDIA
9634 || (TARGET_FMOVD
9635 && ! REGCLASS_HAS_GENERAL_REG (srcclass)
9636 && ! REGCLASS_HAS_GENERAL_REG (dstclass)))
9637 return 2 * ((GET_MODE_SIZE (mode) + 7) / 8U);
9639 return 2 * ((GET_MODE_SIZE (mode) + 3) / 4U);
9642 /* Like register_operand, but take into account that SHMEDIA can use
9643 the constant zero like a general register. */
9645 sh_register_operand (rtx op, enum machine_mode mode)
9647 if (op == CONST0_RTX (mode) && TARGET_SHMEDIA)
9648 return 1;
9649 return register_operand (op, mode);
9653 cmpsi_operand (rtx op, enum machine_mode mode)
9655 if (GET_CODE (op) == REG && REGNO (op) == T_REG
9656 && GET_MODE (op) == SImode)
9657 return 1;
9658 return arith_operand (op, mode);
9661 static rtx emit_load_ptr (rtx, rtx);
9663 static rtx
9664 emit_load_ptr (rtx reg, rtx addr)
9666 rtx mem = gen_rtx_MEM (ptr_mode, addr);
9668 if (Pmode != ptr_mode)
9669 mem = gen_rtx_SIGN_EXTEND (Pmode, mem);
9670 return emit_move_insn (reg, mem);
9673 void
9674 sh_output_mi_thunk (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED,
9675 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
9676 tree function)
9678 CUMULATIVE_ARGS cum;
9679 int structure_value_byref = 0;
9680 rtx this, this_value, sibcall, insns, funexp;
9681 tree funtype = TREE_TYPE (function);
9682 int simple_add = CONST_OK_FOR_ADD (delta);
9683 int did_load = 0;
9684 rtx scratch0, scratch1, scratch2;
9686 reload_completed = 1;
9687 epilogue_completed = 1;
9688 no_new_pseudos = 1;
9689 current_function_uses_only_leaf_regs = 1;
9690 reset_block_changes ();
9692 emit_note (NOTE_INSN_PROLOGUE_END);
9694 /* Find the "this" pointer. We have such a wide range of ABIs for the
9695 SH that it's best to do this completely machine independently.
9696 "this" is passed as first argument, unless a structure return pointer
9697 comes first, in which case "this" comes second. */
9698 INIT_CUMULATIVE_ARGS (cum, funtype, NULL_RTX, 0, 1);
9699 #ifndef PCC_STATIC_STRUCT_RETURN
9700 if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
9701 structure_value_byref = 1;
9702 #endif /* not PCC_STATIC_STRUCT_RETURN */
9703 if (structure_value_byref && sh_struct_value_rtx (function, 0) == 0)
9705 tree ptype = build_pointer_type (TREE_TYPE (funtype));
9707 FUNCTION_ARG_ADVANCE (cum, Pmode, ptype, 1);
9709 this = FUNCTION_ARG (cum, Pmode, ptr_type_node, 1);
9711 /* For SHcompact, we only have r0 for a scratch register: r1 is the
9712 static chain pointer (even if you can't have nested virtual functions
9713 right now, someone might implement them sometime), and the rest of the
9714 registers are used for argument passing, are callee-saved, or reserved. */
9715 scratch0 = scratch1 = scratch2 = gen_rtx_REG (Pmode, 0);
9716 if (! TARGET_SH5)
9718 scratch1 = gen_rtx_REG (ptr_mode, 1);
9719 /* N.B., if not TARGET_HITACHI, register 2 is used to pass the pointer
9720 pointing where to return struct values. */
9721 scratch2 = gen_rtx_REG (Pmode, 3);
9723 else if (TARGET_SHMEDIA)
9725 scratch1 = gen_rtx_REG (ptr_mode, 21);
9726 scratch2 = gen_rtx_REG (Pmode, TR0_REG);
9729 this_value = plus_constant (this, delta);
9730 if (vcall_offset
9731 && (simple_add || scratch0 != scratch1)
9732 && strict_memory_address_p (ptr_mode, this_value))
9734 emit_load_ptr (scratch0, this_value);
9735 did_load = 1;
9738 if (!delta)
9739 ; /* Do nothing. */
9740 else if (simple_add)
9741 emit_move_insn (this, this_value);
9742 else
9744 emit_move_insn (scratch1, GEN_INT (delta));
9745 emit_insn (gen_add2_insn (this, scratch1));
9748 if (vcall_offset)
9750 rtx offset_addr;
9752 if (!did_load)
9753 emit_load_ptr (scratch0, this);
9755 offset_addr = plus_constant (scratch0, vcall_offset);
9756 if (strict_memory_address_p (ptr_mode, offset_addr))
9757 ; /* Do nothing. */
9758 else if (! TARGET_SH5)
9760 /* scratch0 != scratch1, and we have indexed loads. Get better
9761 schedule by loading the offset into r1 and using an indexed
9762 load - then the load of r1 can issue before the load from
9763 (this + delta) finishes. */
9764 emit_move_insn (scratch1, GEN_INT (vcall_offset));
9765 offset_addr = gen_rtx_PLUS (Pmode, scratch0, scratch1);
9767 else if (CONST_OK_FOR_ADD (vcall_offset))
9769 emit_insn (gen_add2_insn (scratch0, GEN_INT (vcall_offset)));
9770 offset_addr = scratch0;
9772 else if (scratch0 != scratch1)
9774 emit_move_insn (scratch1, GEN_INT (vcall_offset));
9775 emit_insn (gen_add2_insn (scratch0, scratch1));
9776 offset_addr = scratch0;
9778 else
9779 abort (); /* FIXME */
9780 emit_load_ptr (scratch0, offset_addr);
9782 if (Pmode != ptr_mode)
9783 scratch0 = gen_rtx_TRUNCATE (ptr_mode, scratch0);
9784 emit_insn (gen_add2_insn (this, scratch0));
9787 /* Generate a tail call to the target function. */
9788 if (! TREE_USED (function))
9790 assemble_external (function);
9791 TREE_USED (function) = 1;
9793 funexp = XEXP (DECL_RTL (function), 0);
9794 emit_move_insn (scratch2, funexp);
9795 funexp = gen_rtx_MEM (FUNCTION_MODE, scratch2);
9796 sibcall = emit_call_insn (gen_sibcall (funexp, const0_rtx, NULL_RTX));
9797 SIBLING_CALL_P (sibcall) = 1;
9798 use_reg (&CALL_INSN_FUNCTION_USAGE (sibcall), this);
9799 emit_barrier ();
9801 /* Run just enough of rest_of_compilation to do scheduling and get
9802 the insns emitted. Note that use_thunk calls
9803 assemble_start_function and assemble_end_function. */
9805 insn_locators_initialize ();
9806 insns = get_insns ();
9808 if (optimize > 0 && flag_schedule_insns_after_reload)
9810 if (! basic_block_info)
9811 init_flow ();
9812 rtl_register_cfg_hooks ();
9813 find_basic_blocks (insns, max_reg_num (), dump_file);
9814 life_analysis (dump_file, PROP_FINAL);
9816 split_all_insns (1);
9818 schedule_insns (dump_file);
9821 sh_reorg ();
9823 if (optimize > 0 && flag_delayed_branch)
9824 dbr_schedule (insns, dump_file);
9825 shorten_branches (insns);
9826 final_start_function (insns, file, 1);
9827 final (insns, file, 1, 0);
9828 final_end_function ();
9830 if (optimize > 0 && flag_schedule_insns_after_reload)
9832 /* Release all memory allocated by flow. */
9833 free_basic_block_vars ();
9835 /* Release all memory held by regsets now. */
9836 regset_release_memory ();
9839 reload_completed = 0;
9840 epilogue_completed = 0;
9841 no_new_pseudos = 0;
9845 function_symbol (const char *name)
9847 rtx sym = gen_rtx_SYMBOL_REF (Pmode, name);
9848 SYMBOL_REF_FLAGS (sym) = SYMBOL_FLAG_FUNCTION;
9849 return sym;
9852 /* Find the number of a general purpose register in S. */
9853 static int
9854 scavenge_reg (HARD_REG_SET *s)
9856 int r;
9857 for (r = FIRST_GENERAL_REG; r <= LAST_GENERAL_REG; r++)
9858 if (TEST_HARD_REG_BIT (*s, r))
9859 return r;
9860 return -1;
9864 sh_get_pr_initial_val (void)
9866 rtx val;
9868 /* ??? Unfortunately, get_hard_reg_initial_val doesn't always work for the
9869 PR register on SHcompact, because it might be clobbered by the prologue.
9870 We check first if that is known to be the case. */
9871 if (TARGET_SHCOMPACT
9872 && ((current_function_args_info.call_cookie
9873 & ~ CALL_COOKIE_RET_TRAMP (1))
9874 || current_function_has_nonlocal_label))
9875 return gen_rtx_MEM (SImode, return_address_pointer_rtx);
9877 /* If we haven't finished rtl generation, there might be a nonlocal label
9878 that we haven't seen yet.
9879 ??? get_hard_reg_initial_val fails if it is called while no_new_pseudos
9880 is set, unless it has been called before for the same register. And even
9881 then, we end in trouble if we didn't use the register in the same
9882 basic block before. So call get_hard_reg_initial_val now and wrap it
9883 in an unspec if we might need to replace it. */
9884 /* ??? We also must do this for TARGET_SH1 in general, because otherwise
9885 combine can put the pseudo returned by get_hard_reg_initial_val into
9886 instructions that need a general purpose registers, which will fail to
9887 be recognized when the pseudo becomes allocated to PR. */
9889 = get_hard_reg_initial_val (Pmode, TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG);
9890 if (TARGET_SH1)
9891 return gen_rtx_UNSPEC (SImode, gen_rtvec (1, val), UNSPEC_RA);
9892 return val;
9896 sh_expand_t_scc (enum rtx_code code, rtx target)
9898 rtx result = target;
9899 HOST_WIDE_INT val;
9901 if (GET_CODE (sh_compare_op0) != REG || REGNO (sh_compare_op0) != T_REG
9902 || GET_CODE (sh_compare_op1) != CONST_INT)
9903 return 0;
9904 if (GET_CODE (result) != REG)
9905 result = gen_reg_rtx (SImode);
9906 val = INTVAL (sh_compare_op1);
9907 if ((code == EQ && val == 1) || (code == NE && val == 0))
9908 emit_insn (gen_movt (result));
9909 else if ((code == EQ && val == 0) || (code == NE && val == 1))
9911 emit_insn (gen_rtx_CLOBBER (VOIDmode, result));
9912 emit_insn (gen_subc (result, result, result));
9913 emit_insn (gen_addsi3 (result, result, const1_rtx));
9915 else if (code == EQ || code == NE)
9916 emit_insn (gen_move_insn (result, GEN_INT (code == NE)));
9917 else
9918 return 0;
9919 if (result != target)
9920 emit_move_insn (target, result);
9921 return 1;
9924 /* INSN is an sfunc; return the rtx that describes the address used. */
9925 static rtx
9926 extract_sfunc_addr (rtx insn)
9928 rtx pattern, part = NULL_RTX;
9929 int len, i;
9931 pattern = PATTERN (insn);
9932 len = XVECLEN (pattern, 0);
9933 for (i = 0; i < len; i++)
9935 part = XVECEXP (pattern, 0, i);
9936 if (GET_CODE (part) == USE && GET_MODE (XEXP (part, 0)) == Pmode
9937 && GENERAL_REGISTER_P (true_regnum (XEXP (part, 0))))
9938 return XEXP (part, 0);
9940 if (GET_CODE (XVECEXP (pattern, 0, 0)) == UNSPEC_VOLATILE)
9941 return XVECEXP (XVECEXP (pattern, 0, 0), 0, 1);
9942 abort ();
9945 /* Verify that the register in use_sfunc_addr still agrees with the address
9946 used in the sfunc. This prevents fill_slots_from_thread from changing
9947 use_sfunc_addr.
9948 INSN is the use_sfunc_addr instruction, and REG is the register it
9949 guards. */
9951 check_use_sfunc_addr (rtx insn, rtx reg)
9953 /* Search for the sfunc. It should really come right after INSN. */
9954 while ((insn = NEXT_INSN (insn)))
9956 if (GET_CODE (insn) == CODE_LABEL || GET_CODE (insn) == JUMP_INSN)
9957 break;
9958 if (! INSN_P (insn))
9959 continue;
9961 if (GET_CODE (PATTERN (insn)) == SEQUENCE)
9962 insn = XVECEXP (PATTERN (insn), 0, 0);
9963 if (GET_CODE (PATTERN (insn)) != PARALLEL
9964 || get_attr_type (insn) != TYPE_SFUNC)
9965 continue;
9966 return rtx_equal_p (extract_sfunc_addr (insn), reg);
9968 abort ();
9971 /* Returns 1 if OP is a MEM that can be source of a simple move operation. */
9974 unaligned_load_operand (rtx op, enum machine_mode mode)
9976 rtx inside;
9978 if (GET_CODE (op) != MEM || GET_MODE (op) != mode)
9979 return 0;
9981 inside = XEXP (op, 0);
9983 if (GET_CODE (inside) == POST_INC)
9984 inside = XEXP (inside, 0);
9986 if (GET_CODE (inside) == REG)
9987 return 1;
9989 return 0;
9992 /* This function returns a constant rtx that represents pi / 2**15 in
9993 SFmode. it's used to scale SFmode angles, in radians, to a
9994 fixed-point signed 16.16-bit fraction of a full circle, i.e., 2*pi
9995 maps to 0x10000). */
9997 static GTY(()) rtx sh_fsca_sf2int_rtx;
10000 sh_fsca_sf2int (void)
10002 if (! sh_fsca_sf2int_rtx)
10004 REAL_VALUE_TYPE rv;
10006 real_from_string (&rv, "10430.378350470453");
10007 sh_fsca_sf2int_rtx = const_double_from_real_value (rv, SFmode);
10010 return sh_fsca_sf2int_rtx;
10013 /* This function returns a constant rtx that represents pi / 2**15 in
10014 DFmode. it's used to scale DFmode angles, in radians, to a
10015 fixed-point signed 16.16-bit fraction of a full circle, i.e., 2*pi
10016 maps to 0x10000). */
10018 static GTY(()) rtx sh_fsca_df2int_rtx;
10021 sh_fsca_df2int (void)
10023 if (! sh_fsca_df2int_rtx)
10025 REAL_VALUE_TYPE rv;
10027 real_from_string (&rv, "10430.378350470453");
10028 sh_fsca_df2int_rtx = const_double_from_real_value (rv, DFmode);
10031 return sh_fsca_df2int_rtx;
10034 /* This function returns a constant rtx that represents 2**15 / pi in
10035 SFmode. it's used to scale a fixed-point signed 16.16-bit fraction
10036 of a full circle back to a SFmode value, i.e., 0x10000 maps to
10037 2*pi). */
10039 static GTY(()) rtx sh_fsca_int2sf_rtx;
10042 sh_fsca_int2sf (void)
10044 if (! sh_fsca_int2sf_rtx)
10046 REAL_VALUE_TYPE rv;
10048 real_from_string (&rv, "9.587379924285257e-5");
10049 sh_fsca_int2sf_rtx = const_double_from_real_value (rv, SFmode);
10052 return sh_fsca_int2sf_rtx;
10055 /* Initialize the CUMULATIVE_ARGS structure. */
10057 void
10058 sh_init_cumulative_args (CUMULATIVE_ARGS * pcum,
10059 tree fntype,
10060 rtx libname ATTRIBUTE_UNUSED,
10061 tree fndecl,
10062 signed int n_named_args,
10063 enum machine_mode mode)
10065 pcum->arg_count [(int) SH_ARG_FLOAT] = 0;
10066 pcum->free_single_fp_reg = 0;
10067 pcum->stack_regs = 0;
10068 pcum->byref_regs = 0;
10069 pcum->byref = 0;
10070 pcum->outgoing = (n_named_args == -1) ? 0 : 1;
10072 /* XXX - Should we check TARGET_HITACHI here ??? */
10073 pcum->renesas_abi = sh_attr_renesas_p (fntype) ? 1 : 0;
10075 if (fntype)
10077 pcum->force_mem = ((TARGET_HITACHI || pcum->renesas_abi)
10078 && aggregate_value_p (TREE_TYPE (fntype), fndecl));
10079 pcum->prototype_p = TYPE_ARG_TYPES (fntype) ? TRUE : FALSE;
10080 pcum->arg_count [(int) SH_ARG_INT]
10081 = TARGET_SH5 && aggregate_value_p (TREE_TYPE (fntype), fndecl);
10083 pcum->call_cookie
10084 = CALL_COOKIE_RET_TRAMP (TARGET_SHCOMPACT
10085 && pcum->arg_count [(int) SH_ARG_INT] == 0
10086 && (TYPE_MODE (TREE_TYPE (fntype)) == BLKmode
10087 ? int_size_in_bytes (TREE_TYPE (fntype))
10088 : GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (fntype)))) > 4
10089 && (BASE_RETURN_VALUE_REG (TYPE_MODE (TREE_TYPE (fntype)))
10090 == FIRST_RET_REG));
10092 else
10094 pcum->arg_count [(int) SH_ARG_INT] = 0;
10095 pcum->prototype_p = FALSE;
10096 if (mode != VOIDmode)
10098 pcum->call_cookie =
10099 CALL_COOKIE_RET_TRAMP (TARGET_SHCOMPACT
10100 && GET_MODE_SIZE (mode) > 4
10101 && BASE_RETURN_VALUE_REG (mode) == FIRST_RET_REG);
10103 /* If the default ABI is the Renesas ABI then all library
10104 calls must assume that the library will be using the
10105 Renesas ABI. So if the function would return its result
10106 in memory then we must force the address of this memory
10107 block onto the stack. Ideally we would like to call
10108 targetm.calls.return_in_memory() here but we do not have
10109 the TYPE or the FNDECL available so we synthesize the
10110 contents of that function as best we can. */
10111 pcum->force_mem =
10112 (TARGET_DEFAULT & HITACHI_BIT)
10113 && (mode == BLKmode
10114 || (GET_MODE_SIZE (mode) > 4
10115 && !(mode == DFmode
10116 && TARGET_FPU_DOUBLE)));
10118 else
10120 pcum->call_cookie = 0;
10121 pcum->force_mem = FALSE;
10126 #include "gt-sh.h"