Merge from mainline (gomp-merge-2005-02-26).
[official-gcc.git] / gcc / config / sh / sh.c
blob9587b5373aa86800227ab658a09db8a0e6e8da49
1 /* Output routines for GCC for Renesas / SuperH SH.
2 Copyright (C) 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
3 2003, 2004, 2005 Free Software Foundation, Inc.
4 Contributed by Steve Chamberlain (sac@cygnus.com).
5 Improved by Jim Wilson (wilson@cygnus.com).
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 2, or (at your option)
12 any later version.
14 GCC is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING. If not, write to
21 the Free Software Foundation, 59 Temple Place - Suite 330,
22 Boston, MA 02111-1307, USA. */
24 #include "config.h"
25 #include "system.h"
26 #include "coretypes.h"
27 #include "tm.h"
28 #include "insn-config.h"
29 #include "rtl.h"
30 #include "tree.h"
31 #include "flags.h"
32 #include "expr.h"
33 #include "optabs.h"
34 #include "function.h"
35 #include "regs.h"
36 #include "hard-reg-set.h"
37 #include "output.h"
38 #include "insn-attr.h"
39 #include "toplev.h"
40 #include "recog.h"
41 #include "c-pragma.h"
42 #include "integrate.h"
43 #include "dwarf2.h"
44 #include "tm_p.h"
45 #include "target.h"
46 #include "target-def.h"
47 #include "real.h"
48 #include "langhooks.h"
49 #include "basic-block.h"
50 #include "cfglayout.h"
51 #include "intl.h"
52 #include "sched-int.h"
53 #include "ggc.h"
54 #include "tree-gimple.h"
57 int code_for_indirect_jump_scratch = CODE_FOR_indirect_jump_scratch;
59 #define MSW (TARGET_LITTLE_ENDIAN ? 1 : 0)
60 #define LSW (TARGET_LITTLE_ENDIAN ? 0 : 1)
62 /* These are some macros to abstract register modes. */
63 #define CONST_OK_FOR_ADD(size) \
64 (TARGET_SHMEDIA ? CONST_OK_FOR_I10 (size) : CONST_OK_FOR_I08 (size))
65 #define GEN_MOV (*(TARGET_SHMEDIA64 ? gen_movdi : gen_movsi))
66 #define GEN_ADD3 (*(TARGET_SHMEDIA64 ? gen_adddi3 : gen_addsi3))
67 #define GEN_SUB3 (*(TARGET_SHMEDIA64 ? gen_subdi3 : gen_subsi3))
69 /* Set to 1 by expand_prologue() when the function is an interrupt handler. */
70 int current_function_interrupt;
72 /* ??? The pragma interrupt support will not work for SH3. */
73 /* This is set by #pragma interrupt and #pragma trapa, and causes gcc to
74 output code for the next function appropriate for an interrupt handler. */
75 int pragma_interrupt;
77 /* This is set by the trap_exit attribute for functions. It specifies
78 a trap number to be used in a trapa instruction at function exit
79 (instead of an rte instruction). */
80 int trap_exit;
82 /* This is used by the sp_switch attribute for functions. It specifies
83 a variable holding the address of the stack the interrupt function
84 should switch to/from at entry/exit. */
85 rtx sp_switch;
87 /* This is set by #pragma trapa, and is similar to the above, except that
88 the compiler doesn't emit code to preserve all registers. */
89 static int pragma_trapa;
91 /* This is set by #pragma nosave_low_regs. This is useful on the SH3,
92 which has a separate set of low regs for User and Supervisor modes.
93 This should only be used for the lowest level of interrupts. Higher levels
94 of interrupts must save the registers in case they themselves are
95 interrupted. */
96 int pragma_nosave_low_regs;
98 /* This is used for communication between TARGET_SETUP_INCOMING_VARARGS and
99 sh_expand_prologue. */
100 int current_function_anonymous_args;
102 /* Global variables for machine-dependent things. */
104 /* Which cpu are we scheduling for. */
105 enum processor_type sh_cpu;
107 /* Definitions used in ready queue reordering for first scheduling pass. */
109 /* Reg weights arrays for modes SFmode and SImode, indexed by insn LUID. */
110 static short *regmode_weight[2];
112 /* Total SFmode and SImode weights of scheduled insns. */
113 static int curr_regmode_pressure[2];
115 /* If true, skip cycles for Q -> R movement. */
116 static int skip_cycles = 0;
118 /* Cached value of can_issue_more. This is cached in sh_variable_issue hook
119 and returned from sh_reorder2. */
120 static short cached_can_issue_more;
122 /* Saved operands from the last compare to use when we generate an scc
123 or bcc insn. */
125 rtx sh_compare_op0;
126 rtx sh_compare_op1;
128 /* Provides the class number of the smallest class containing
129 reg number. */
131 enum reg_class regno_reg_class[FIRST_PSEUDO_REGISTER] =
133 R0_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
134 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
135 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
136 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
137 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
138 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
139 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
140 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
141 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
142 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
143 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
144 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
145 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
146 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
147 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
148 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
149 FP0_REGS,FP_REGS, FP_REGS, FP_REGS,
150 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
151 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
152 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
153 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
154 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
155 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
156 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
157 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
158 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
159 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
160 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
161 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
162 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
163 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
164 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
165 TARGET_REGS, TARGET_REGS, TARGET_REGS, TARGET_REGS,
166 TARGET_REGS, TARGET_REGS, TARGET_REGS, TARGET_REGS,
167 DF_REGS, DF_REGS, DF_REGS, DF_REGS,
168 DF_REGS, DF_REGS, DF_REGS, DF_REGS,
169 NO_REGS, GENERAL_REGS, PR_REGS, T_REGS,
170 MAC_REGS, MAC_REGS, FPUL_REGS, FPSCR_REGS,
171 GENERAL_REGS,
174 char sh_register_names[FIRST_PSEUDO_REGISTER] \
175 [MAX_REGISTER_NAME_LENGTH + 1] = SH_REGISTER_NAMES_INITIALIZER;
177 char sh_additional_register_names[ADDREGNAMES_SIZE] \
178 [MAX_ADDITIONAL_REGISTER_NAME_LENGTH + 1]
179 = SH_ADDITIONAL_REGISTER_NAMES_INITIALIZER;
181 /* Provide reg_class from a letter such as appears in the machine
182 description. *: target independently reserved letter.
183 reg_class_from_letter['e' - 'a'] is set to NO_REGS for TARGET_FMOVD. */
185 enum reg_class reg_class_from_letter[] =
187 /* a */ ALL_REGS, /* b */ TARGET_REGS, /* c */ FPSCR_REGS, /* d */ DF_REGS,
188 /* e */ FP_REGS, /* f */ FP_REGS, /* g **/ NO_REGS, /* h */ NO_REGS,
189 /* i **/ NO_REGS, /* j */ NO_REGS, /* k */ SIBCALL_REGS, /* l */ PR_REGS,
190 /* m **/ NO_REGS, /* n **/ NO_REGS, /* o **/ NO_REGS, /* p **/ NO_REGS,
191 /* q */ NO_REGS, /* r **/ NO_REGS, /* s **/ NO_REGS, /* t */ T_REGS,
192 /* u */ NO_REGS, /* v */ NO_REGS, /* w */ FP0_REGS, /* x */ MAC_REGS,
193 /* y */ FPUL_REGS, /* z */ R0_REGS
196 int assembler_dialect;
198 static bool shmedia_space_reserved_for_target_registers;
200 static void split_branches (rtx);
201 static int branch_dest (rtx);
202 static void force_into (rtx, rtx);
203 static void print_slot (rtx);
204 static rtx add_constant (rtx, enum machine_mode, rtx);
205 static void dump_table (rtx, rtx);
206 static int hi_const (rtx);
207 static int broken_move (rtx);
208 static int mova_p (rtx);
209 static rtx find_barrier (int, rtx, rtx);
210 static int noncall_uses_reg (rtx, rtx, rtx *);
211 static rtx gen_block_redirect (rtx, int, int);
212 static void sh_reorg (void);
213 static void output_stack_adjust (int, rtx, int, HARD_REG_SET *);
214 static rtx frame_insn (rtx);
215 static rtx push (int);
216 static void pop (int);
217 static void push_regs (HARD_REG_SET *, int);
218 static int calc_live_regs (HARD_REG_SET *);
219 static void mark_use (rtx, rtx *);
220 static HOST_WIDE_INT rounded_frame_size (int);
221 static rtx mark_constant_pool_use (rtx);
222 const struct attribute_spec sh_attribute_table[];
223 static tree sh_handle_interrupt_handler_attribute (tree *, tree, tree, int, bool *);
224 static tree sh_handle_sp_switch_attribute (tree *, tree, tree, int, bool *);
225 static tree sh_handle_trap_exit_attribute (tree *, tree, tree, int, bool *);
226 static tree sh_handle_renesas_attribute (tree *, tree, tree, int, bool *);
227 static void sh_output_function_epilogue (FILE *, HOST_WIDE_INT);
228 static void sh_insert_attributes (tree, tree *);
229 static int sh_adjust_cost (rtx, rtx, rtx, int);
230 static int sh_issue_rate (void);
231 static int sh_dfa_new_cycle (FILE *, int, rtx, int, int, int *sort_p);
232 static short find_set_regmode_weight (rtx, enum machine_mode);
233 static short find_insn_regmode_weight (rtx, enum machine_mode);
234 static void find_regmode_weight (int, enum machine_mode);
235 static void sh_md_init_global (FILE *, int, int);
236 static void sh_md_finish_global (FILE *, int);
237 static int rank_for_reorder (const void *, const void *);
238 static void swap_reorder (rtx *, int);
239 static void ready_reorder (rtx *, int);
240 static short high_pressure (enum machine_mode);
241 static int sh_reorder (FILE *, int, rtx *, int *, int);
242 static int sh_reorder2 (FILE *, int, rtx *, int *, int);
243 static void sh_md_init (FILE *, int, int);
244 static int sh_variable_issue (FILE *, int, rtx, int);
246 static bool sh_function_ok_for_sibcall (tree, tree);
248 static bool sh_cannot_modify_jumps_p (void);
249 static int sh_target_reg_class (void);
250 static bool sh_optimize_target_register_callee_saved (bool);
251 static bool sh_ms_bitfield_layout_p (tree);
253 static void sh_init_builtins (void);
254 static void sh_media_init_builtins (void);
255 static rtx sh_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
256 static void sh_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT, tree);
257 static void sh_file_start (void);
258 static int flow_dependent_p (rtx, rtx);
259 static void flow_dependent_p_1 (rtx, rtx, void *);
260 static int shiftcosts (rtx);
261 static int andcosts (rtx);
262 static int addsubcosts (rtx);
263 static int multcosts (rtx);
264 static bool unspec_caller_rtx_p (rtx);
265 static bool sh_cannot_copy_insn_p (rtx);
266 static bool sh_rtx_costs (rtx, int, int, int *);
267 static int sh_address_cost (rtx);
268 static int shmedia_target_regs_stack_space (HARD_REG_SET *);
269 static int shmedia_reserve_space_for_target_registers_p (int, HARD_REG_SET *);
270 static int shmedia_target_regs_stack_adjust (HARD_REG_SET *);
271 static int scavenge_reg (HARD_REG_SET *s);
272 struct save_schedule_s;
273 static struct save_entry_s *sh5_schedule_saves (HARD_REG_SET *,
274 struct save_schedule_s *, int);
276 static rtx sh_struct_value_rtx (tree, int);
277 static bool sh_return_in_memory (tree, tree);
278 static rtx sh_builtin_saveregs (void);
279 static void sh_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode, tree, int *, int);
280 static bool sh_strict_argument_naming (CUMULATIVE_ARGS *);
281 static bool sh_pretend_outgoing_varargs_named (CUMULATIVE_ARGS *);
282 static tree sh_build_builtin_va_list (void);
283 static tree sh_gimplify_va_arg_expr (tree, tree, tree *, tree *);
284 static bool sh_pass_by_reference (CUMULATIVE_ARGS *, enum machine_mode,
285 tree, bool);
286 static bool sh_callee_copies (CUMULATIVE_ARGS *, enum machine_mode,
287 tree, bool);
288 static int sh_arg_partial_bytes (CUMULATIVE_ARGS *, enum machine_mode,
289 tree, bool);
290 static int sh_dwarf_calling_convention (tree);
291 static int hard_regs_intersect_p (HARD_REG_SET *, HARD_REG_SET *);
294 /* Initialize the GCC target structure. */
295 #undef TARGET_ATTRIBUTE_TABLE
296 #define TARGET_ATTRIBUTE_TABLE sh_attribute_table
298 /* The next two are used for debug info when compiling with -gdwarf. */
299 #undef TARGET_ASM_UNALIGNED_HI_OP
300 #define TARGET_ASM_UNALIGNED_HI_OP "\t.uaword\t"
301 #undef TARGET_ASM_UNALIGNED_SI_OP
302 #define TARGET_ASM_UNALIGNED_SI_OP "\t.ualong\t"
304 /* These are NULLed out on non-SH5 in OVERRIDE_OPTIONS. */
305 #undef TARGET_ASM_UNALIGNED_DI_OP
306 #define TARGET_ASM_UNALIGNED_DI_OP "\t.uaquad\t"
307 #undef TARGET_ASM_ALIGNED_DI_OP
308 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
310 #undef TARGET_ASM_FUNCTION_EPILOGUE
311 #define TARGET_ASM_FUNCTION_EPILOGUE sh_output_function_epilogue
313 #undef TARGET_ASM_OUTPUT_MI_THUNK
314 #define TARGET_ASM_OUTPUT_MI_THUNK sh_output_mi_thunk
316 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
317 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_tree_hwi_hwi_tree_true
319 #undef TARGET_ASM_FILE_START
320 #define TARGET_ASM_FILE_START sh_file_start
321 #undef TARGET_ASM_FILE_START_FILE_DIRECTIVE
322 #define TARGET_ASM_FILE_START_FILE_DIRECTIVE true
324 #undef TARGET_INSERT_ATTRIBUTES
325 #define TARGET_INSERT_ATTRIBUTES sh_insert_attributes
327 #undef TARGET_SCHED_ADJUST_COST
328 #define TARGET_SCHED_ADJUST_COST sh_adjust_cost
330 #undef TARGET_SCHED_ISSUE_RATE
331 #define TARGET_SCHED_ISSUE_RATE sh_issue_rate
333 /* The next 5 hooks have been implemented for reenabling sched1. With the
334 help of these macros we are limiting the movement of insns in sched1 to
335 reduce the register pressure. The overall idea is to keep count of SImode
336 and SFmode regs required by already scheduled insns. When these counts
337 cross some threshold values; give priority to insns that free registers.
338 The insn that frees registers is most likely to be the insn with lowest
339 LUID (original insn order); but such an insn might be there in the stalled
340 queue (Q) instead of the ready queue (R). To solve this, we skip cycles
341 upto a max of 8 cycles so that such insns may move from Q -> R.
343 The description of the hooks are as below:
345 TARGET_SCHED_INIT_GLOBAL: Added a new target hook in the generic
346 scheduler; it is called inside the sched_init function just after
347 find_insn_reg_weights function call. It is used to calculate the SImode
348 and SFmode weights of insns of basic blocks; much similar to what
349 find_insn_reg_weights does.
350 TARGET_SCHED_FINISH_GLOBAL: Corresponding cleanup hook.
352 TARGET_SCHED_DFA_NEW_CYCLE: Skip cycles if high register pressure is
353 indicated by TARGET_SCHED_REORDER2; doing this may move insns from
354 (Q)->(R).
356 TARGET_SCHED_REORDER: If the register pressure for SImode or SFmode is
357 high; reorder the ready queue so that the insn with lowest LUID will be
358 issued next.
360 TARGET_SCHED_REORDER2: If the register pressure is high, indicate to
361 TARGET_SCHED_DFA_NEW_CYCLE to skip cycles.
363 TARGET_SCHED_VARIABLE_ISSUE: Cache the value of can_issue_more so that it
364 can be returned from TARGET_SCHED_REORDER2.
366 TARGET_SCHED_INIT: Reset the register pressure counting variables. */
368 #undef TARGET_SCHED_DFA_NEW_CYCLE
369 #define TARGET_SCHED_DFA_NEW_CYCLE sh_dfa_new_cycle
371 #undef TARGET_SCHED_INIT_GLOBAL
372 #define TARGET_SCHED_INIT_GLOBAL sh_md_init_global
374 #undef TARGET_SCHED_FINISH_GLOBAL
375 #define TARGET_SCHED_FINISH_GLOBAL sh_md_finish_global
377 #undef TARGET_SCHED_VARIABLE_ISSUE
378 #define TARGET_SCHED_VARIABLE_ISSUE sh_variable_issue
380 #undef TARGET_SCHED_REORDER
381 #define TARGET_SCHED_REORDER sh_reorder
383 #undef TARGET_SCHED_REORDER2
384 #define TARGET_SCHED_REORDER2 sh_reorder2
386 #undef TARGET_SCHED_INIT
387 #define TARGET_SCHED_INIT sh_md_init
389 #undef TARGET_CANNOT_MODIFY_JUMPS_P
390 #define TARGET_CANNOT_MODIFY_JUMPS_P sh_cannot_modify_jumps_p
391 #undef TARGET_BRANCH_TARGET_REGISTER_CLASS
392 #define TARGET_BRANCH_TARGET_REGISTER_CLASS sh_target_reg_class
393 #undef TARGET_BRANCH_TARGET_REGISTER_CALLEE_SAVED
394 #define TARGET_BRANCH_TARGET_REGISTER_CALLEE_SAVED \
395 sh_optimize_target_register_callee_saved
397 #undef TARGET_MS_BITFIELD_LAYOUT_P
398 #define TARGET_MS_BITFIELD_LAYOUT_P sh_ms_bitfield_layout_p
400 #undef TARGET_INIT_BUILTINS
401 #define TARGET_INIT_BUILTINS sh_init_builtins
402 #undef TARGET_EXPAND_BUILTIN
403 #define TARGET_EXPAND_BUILTIN sh_expand_builtin
405 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
406 #define TARGET_FUNCTION_OK_FOR_SIBCALL sh_function_ok_for_sibcall
408 #undef TARGET_CANNOT_COPY_INSN_P
409 #define TARGET_CANNOT_COPY_INSN_P sh_cannot_copy_insn_p
410 #undef TARGET_RTX_COSTS
411 #define TARGET_RTX_COSTS sh_rtx_costs
412 #undef TARGET_ADDRESS_COST
413 #define TARGET_ADDRESS_COST sh_address_cost
415 #undef TARGET_MACHINE_DEPENDENT_REORG
416 #define TARGET_MACHINE_DEPENDENT_REORG sh_reorg
418 #ifdef HAVE_AS_TLS
419 #undef TARGET_HAVE_TLS
420 #define TARGET_HAVE_TLS true
421 #endif
423 #undef TARGET_PROMOTE_PROTOTYPES
424 #define TARGET_PROMOTE_PROTOTYPES sh_promote_prototypes
425 #undef TARGET_PROMOTE_FUNCTION_ARGS
426 #define TARGET_PROMOTE_FUNCTION_ARGS sh_promote_prototypes
427 #undef TARGET_PROMOTE_FUNCTION_RETURN
428 #define TARGET_PROMOTE_FUNCTION_RETURN sh_promote_prototypes
430 #undef TARGET_STRUCT_VALUE_RTX
431 #define TARGET_STRUCT_VALUE_RTX sh_struct_value_rtx
432 #undef TARGET_RETURN_IN_MEMORY
433 #define TARGET_RETURN_IN_MEMORY sh_return_in_memory
435 #undef TARGET_EXPAND_BUILTIN_SAVEREGS
436 #define TARGET_EXPAND_BUILTIN_SAVEREGS sh_builtin_saveregs
437 #undef TARGET_SETUP_INCOMING_VARARGS
438 #define TARGET_SETUP_INCOMING_VARARGS sh_setup_incoming_varargs
439 #undef TARGET_STRICT_ARGUMENT_NAMING
440 #define TARGET_STRICT_ARGUMENT_NAMING sh_strict_argument_naming
441 #undef TARGET_PRETEND_OUTGOING_VARARGS_NAMED
442 #define TARGET_PRETEND_OUTGOING_VARARGS_NAMED sh_pretend_outgoing_varargs_named
443 #undef TARGET_MUST_PASS_IN_STACK
444 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
445 #undef TARGET_PASS_BY_REFERENCE
446 #define TARGET_PASS_BY_REFERENCE sh_pass_by_reference
447 #undef TARGET_CALLEE_COPIES
448 #define TARGET_CALLEE_COPIES sh_callee_copies
449 #undef TARGET_ARG_PARTIAL_BYTES
450 #define TARGET_ARG_PARTIAL_BYTES sh_arg_partial_bytes
452 #undef TARGET_BUILD_BUILTIN_VA_LIST
453 #define TARGET_BUILD_BUILTIN_VA_LIST sh_build_builtin_va_list
454 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
455 #define TARGET_GIMPLIFY_VA_ARG_EXPR sh_gimplify_va_arg_expr
457 #undef TARGET_VECTOR_MODE_SUPPORTED_P
458 #define TARGET_VECTOR_MODE_SUPPORTED_P sh_vector_mode_supported_p
460 #undef TARGET_PCH_VALID_P
461 #define TARGET_PCH_VALID_P sh_pch_valid_p
463 #undef TARGET_DWARF_CALLING_CONVENTION
464 #define TARGET_DWARF_CALLING_CONVENTION sh_dwarf_calling_convention
466 /* Return regmode weight for insn. */
467 #define INSN_REGMODE_WEIGHT(INSN, MODE) regmode_weight[((MODE) == SImode) ? 0 : 1][INSN_UID (INSN)]
469 /* Return current register pressure for regmode. */
470 #define CURR_REGMODE_PRESSURE(MODE) curr_regmode_pressure[((MODE) == SImode) ? 0 : 1]
472 #ifdef SYMBIAN
474 #undef TARGET_ENCODE_SECTION_INFO
475 #define TARGET_ENCODE_SECTION_INFO sh_symbian_encode_section_info
476 #undef TARGET_STRIP_NAME_ENCODING
477 #define TARGET_STRIP_NAME_ENCODING sh_symbian_strip_name_encoding
478 #undef TARGET_CXX_IMPORT_EXPORT_CLASS
479 #define TARGET_CXX_IMPORT_EXPORT_CLASS symbian_import_export_class
481 #endif /* SYMBIAN */
483 struct gcc_target targetm = TARGET_INITIALIZER;
485 /* Print the operand address in x to the stream. */
487 void
488 print_operand_address (FILE *stream, rtx x)
490 switch (GET_CODE (x))
492 case REG:
493 case SUBREG:
494 fprintf (stream, "@%s", reg_names[true_regnum (x)]);
495 break;
497 case PLUS:
499 rtx base = XEXP (x, 0);
500 rtx index = XEXP (x, 1);
502 switch (GET_CODE (index))
504 case CONST_INT:
505 fprintf (stream, "@(%d,%s)", (int) INTVAL (index),
506 reg_names[true_regnum (base)]);
507 break;
509 case REG:
510 case SUBREG:
512 int base_num = true_regnum (base);
513 int index_num = true_regnum (index);
515 fprintf (stream, "@(r0,%s)",
516 reg_names[MAX (base_num, index_num)]);
517 break;
520 default:
521 debug_rtx (x);
522 abort ();
525 break;
527 case PRE_DEC:
528 fprintf (stream, "@-%s", reg_names[true_regnum (XEXP (x, 0))]);
529 break;
531 case POST_INC:
532 fprintf (stream, "@%s+", reg_names[true_regnum (XEXP (x, 0))]);
533 break;
535 default:
536 x = mark_constant_pool_use (x);
537 output_addr_const (stream, x);
538 break;
542 /* Print operand x (an rtx) in assembler syntax to file stream
543 according to modifier code.
545 '.' print a .s if insn needs delay slot
546 ',' print LOCAL_LABEL_PREFIX
547 '@' print trap, rte or rts depending upon pragma interruptness
548 '#' output a nop if there is nothing to put in the delay slot
549 ''' print likelihood suffix (/u for unlikely).
550 'O' print a constant without the #
551 'R' print the LSW of a dp value - changes if in little endian
552 'S' print the MSW of a dp value - changes if in little endian
553 'T' print the next word of a dp value - same as 'R' in big endian mode.
554 'M' print an `x' if `m' will print `base,index'.
555 'N' print 'r63' if the operand is (const_int 0).
556 'd' print a V2SF reg as dN instead of fpN.
557 'm' print a pair `base,offset' or `base,index', for LD and ST.
558 'u' prints the lowest 16 bits of CONST_INT, as an unsigned value.
559 'o' output an operator. */
561 void
562 print_operand (FILE *stream, rtx x, int code)
564 switch (code)
566 case '.':
567 if (final_sequence
568 && ! INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))
569 && get_attr_length (XVECEXP (final_sequence, 0, 1)))
570 fprintf (stream, ASSEMBLER_DIALECT ? "/s" : ".s");
571 break;
572 case ',':
573 fprintf (stream, "%s", LOCAL_LABEL_PREFIX);
574 break;
575 case '@':
576 if (trap_exit)
577 fprintf (stream, "trapa #%d", trap_exit);
578 else if (sh_cfun_interrupt_handler_p ())
579 fprintf (stream, "rte");
580 else
581 fprintf (stream, "rts");
582 break;
583 case '#':
584 /* Output a nop if there's nothing in the delay slot. */
585 if (dbr_sequence_length () == 0)
586 fprintf (stream, "\n\tnop");
587 break;
588 case '\'':
590 rtx note = find_reg_note (current_output_insn, REG_BR_PROB, 0);
592 if (note && INTVAL (XEXP (note, 0)) * 2 < REG_BR_PROB_BASE)
593 fputs ("/u", stream);
594 break;
596 case 'O':
597 x = mark_constant_pool_use (x);
598 output_addr_const (stream, x);
599 break;
600 case 'R':
601 fputs (reg_names[REGNO (x) + LSW], (stream));
602 break;
603 case 'S':
604 fputs (reg_names[REGNO (x) + MSW], (stream));
605 break;
606 case 'T':
607 /* Next word of a double. */
608 switch (GET_CODE (x))
610 case REG:
611 fputs (reg_names[REGNO (x) + 1], (stream));
612 break;
613 case MEM:
614 if (GET_CODE (XEXP (x, 0)) != PRE_DEC
615 && GET_CODE (XEXP (x, 0)) != POST_INC)
616 x = adjust_address (x, SImode, 4);
617 print_operand_address (stream, XEXP (x, 0));
618 break;
619 default:
620 break;
622 break;
623 case 'o':
624 switch (GET_CODE (x))
626 case PLUS: fputs ("add", stream); break;
627 case MINUS: fputs ("sub", stream); break;
628 case MULT: fputs ("mul", stream); break;
629 case DIV: fputs ("div", stream); break;
630 case EQ: fputs ("eq", stream); break;
631 case NE: fputs ("ne", stream); break;
632 case GT: case LT: fputs ("gt", stream); break;
633 case GE: case LE: fputs ("ge", stream); break;
634 case GTU: case LTU: fputs ("gtu", stream); break;
635 case GEU: case LEU: fputs ("geu", stream); break;
636 default:
637 break;
639 break;
640 case 'M':
641 if (GET_CODE (x) == MEM
642 && GET_CODE (XEXP (x, 0)) == PLUS
643 && (GET_CODE (XEXP (XEXP (x, 0), 1)) == REG
644 || GET_CODE (XEXP (XEXP (x, 0), 1)) == SUBREG))
645 fputc ('x', stream);
646 break;
648 case 'm':
649 if (GET_CODE (x) != MEM)
650 abort ();
651 x = XEXP (x, 0);
652 switch (GET_CODE (x))
654 case REG:
655 case SUBREG:
656 print_operand (stream, x, 0);
657 fputs (", 0", stream);
658 break;
660 case PLUS:
661 print_operand (stream, XEXP (x, 0), 0);
662 fputs (", ", stream);
663 print_operand (stream, XEXP (x, 1), 0);
664 break;
666 default:
667 abort ();
669 break;
671 case 'd':
672 if (GET_CODE (x) != REG || GET_MODE (x) != V2SFmode)
673 abort ();
675 fprintf ((stream), "d%s", reg_names[REGNO (x)] + 1);
676 break;
678 case 'N':
679 if (x == CONST0_RTX (GET_MODE (x)))
681 fprintf ((stream), "r63");
682 break;
684 goto default_output;
685 case 'u':
686 if (GET_CODE (x) == CONST_INT)
688 fprintf ((stream), "%u", (unsigned) INTVAL (x) & (0x10000 - 1));
689 break;
691 /* Fall through. */
693 default_output:
694 default:
695 switch (GET_CODE (x))
697 /* FIXME: We need this on SHmedia32 because reload generates
698 some sign-extended HI or QI loads into DImode registers
699 but, because Pmode is SImode, the address ends up with a
700 subreg:SI of the DImode register. Maybe reload should be
701 fixed so as to apply alter_subreg to such loads? */
702 case SUBREG:
703 if (SUBREG_BYTE (x) != 0
704 || GET_CODE (SUBREG_REG (x)) != REG)
705 abort ();
707 x = SUBREG_REG (x);
708 /* Fall through. */
710 case REG:
711 if (FP_REGISTER_P (REGNO (x))
712 && GET_MODE (x) == V16SFmode)
713 fprintf ((stream), "mtrx%s", reg_names[REGNO (x)] + 2);
714 else if (FP_REGISTER_P (REGNO (x))
715 && GET_MODE (x) == V4SFmode)
716 fprintf ((stream), "fv%s", reg_names[REGNO (x)] + 2);
717 else if (GET_CODE (x) == REG
718 && GET_MODE (x) == V2SFmode)
719 fprintf ((stream), "fp%s", reg_names[REGNO (x)] + 2);
720 else if (FP_REGISTER_P (REGNO (x))
721 && GET_MODE_SIZE (GET_MODE (x)) > 4)
722 fprintf ((stream), "d%s", reg_names[REGNO (x)] + 1);
723 else
724 fputs (reg_names[REGNO (x)], (stream));
725 break;
727 case MEM:
728 output_address (XEXP (x, 0));
729 break;
731 case CONST:
732 if (TARGET_SHMEDIA
733 && GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
734 && GET_MODE (XEXP (x, 0)) == DImode
735 && GET_CODE (XEXP (XEXP (x, 0), 0)) == TRUNCATE
736 && GET_MODE (XEXP (XEXP (x, 0), 0)) == HImode)
738 rtx val = XEXP (XEXP (XEXP (x, 0), 0), 0);
740 fputc ('(', stream);
741 if (GET_CODE (val) == ASHIFTRT)
743 fputc ('(', stream);
744 if (GET_CODE (XEXP (val, 0)) == CONST)
745 fputc ('(', stream);
746 output_addr_const (stream, XEXP (val, 0));
747 if (GET_CODE (XEXP (val, 0)) == CONST)
748 fputc (')', stream);
749 fputs (" >> ", stream);
750 output_addr_const (stream, XEXP (val, 1));
751 fputc (')', stream);
753 else
755 if (GET_CODE (val) == CONST)
756 fputc ('(', stream);
757 output_addr_const (stream, val);
758 if (GET_CODE (val) == CONST)
759 fputc (')', stream);
761 fputs (" & 65535)", stream);
762 break;
765 /* Fall through. */
766 default:
767 if (TARGET_SH1)
768 fputc ('#', stream);
769 output_addr_const (stream, x);
770 break;
772 break;
776 /* Like force_operand, but guarantees that VALUE ends up in TARGET. */
777 static void
778 force_into (rtx value, rtx target)
780 value = force_operand (value, target);
781 if (! rtx_equal_p (value, target))
782 emit_insn (gen_move_insn (target, value));
785 /* Emit code to perform a block move. Choose the best method.
787 OPERANDS[0] is the destination.
788 OPERANDS[1] is the source.
789 OPERANDS[2] is the size.
790 OPERANDS[3] is the alignment safe to use. */
793 expand_block_move (rtx *operands)
795 int align = INTVAL (operands[3]);
796 int constp = (GET_CODE (operands[2]) == CONST_INT);
797 int bytes = (constp ? INTVAL (operands[2]) : 0);
799 if (! constp)
800 return 0;
802 /* If we could use mov.l to move words and dest is word-aligned, we
803 can use movua.l for loads and still generate a relatively short
804 and efficient sequence. */
805 if (TARGET_SH4A_ARCH && align < 4
806 && MEM_ALIGN (operands[0]) >= 32
807 && can_move_by_pieces (bytes, 32))
809 rtx dest = copy_rtx (operands[0]);
810 rtx src = copy_rtx (operands[1]);
811 /* We could use different pseudos for each copied word, but
812 since movua can only load into r0, it's kind of
813 pointless. */
814 rtx temp = gen_reg_rtx (SImode);
815 rtx src_addr = copy_addr_to_reg (XEXP (src, 0));
816 int copied = 0;
818 while (copied + 4 <= bytes)
820 rtx to = adjust_address (dest, SImode, copied);
821 rtx from = adjust_automodify_address (src, SImode, src_addr, copied);
823 emit_insn (gen_movua (temp, from));
824 emit_move_insn (src_addr, plus_constant (src_addr, 4));
825 emit_move_insn (to, temp);
826 copied += 4;
829 if (copied < bytes)
830 move_by_pieces (adjust_address (dest, BLKmode, copied),
831 adjust_automodify_address (src, BLKmode,
832 src_addr, copied),
833 bytes - copied, align, 0);
835 return 1;
838 /* If it isn't a constant number of bytes, or if it doesn't have 4 byte
839 alignment, or if it isn't a multiple of 4 bytes, then fail. */
840 if (align < 4 || (bytes % 4 != 0))
841 return 0;
843 if (TARGET_HARD_SH4)
845 if (bytes < 12)
846 return 0;
847 else if (bytes == 12)
849 tree entry_name;
850 rtx sym;
851 rtx func_addr_rtx;
852 rtx r4 = gen_rtx_REG (SImode, 4);
853 rtx r5 = gen_rtx_REG (SImode, 5);
855 entry_name = get_identifier ("__movmemSI12_i4");
857 sym = function_symbol (IDENTIFIER_POINTER (entry_name));
858 func_addr_rtx = copy_to_mode_reg (Pmode, sym);
859 force_into (XEXP (operands[0], 0), r4);
860 force_into (XEXP (operands[1], 0), r5);
861 emit_insn (gen_block_move_real_i4 (func_addr_rtx));
862 return 1;
864 else if (! TARGET_SMALLCODE)
866 tree entry_name;
867 rtx sym;
868 rtx func_addr_rtx;
869 int dwords;
870 rtx r4 = gen_rtx_REG (SImode, 4);
871 rtx r5 = gen_rtx_REG (SImode, 5);
872 rtx r6 = gen_rtx_REG (SImode, 6);
874 entry_name = get_identifier (bytes & 4
875 ? "__movmem_i4_odd"
876 : "__movmem_i4_even");
877 sym = function_symbol (IDENTIFIER_POINTER (entry_name));
878 func_addr_rtx = copy_to_mode_reg (Pmode, sym);
879 force_into (XEXP (operands[0], 0), r4);
880 force_into (XEXP (operands[1], 0), r5);
882 dwords = bytes >> 3;
883 emit_insn (gen_move_insn (r6, GEN_INT (dwords - 1)));
884 emit_insn (gen_block_lump_real_i4 (func_addr_rtx));
885 return 1;
887 else
888 return 0;
890 if (bytes < 64)
892 char entry[30];
893 tree entry_name;
894 rtx sym;
895 rtx func_addr_rtx;
896 rtx r4 = gen_rtx_REG (SImode, 4);
897 rtx r5 = gen_rtx_REG (SImode, 5);
899 sprintf (entry, "__movmemSI%d", bytes);
900 entry_name = get_identifier (entry);
901 sym = function_symbol (IDENTIFIER_POINTER (entry_name));
902 func_addr_rtx = copy_to_mode_reg (Pmode, sym);
903 force_into (XEXP (operands[0], 0), r4);
904 force_into (XEXP (operands[1], 0), r5);
905 emit_insn (gen_block_move_real (func_addr_rtx));
906 return 1;
909 /* This is the same number of bytes as a memcpy call, but to a different
910 less common function name, so this will occasionally use more space. */
911 if (! TARGET_SMALLCODE)
913 tree entry_name;
914 rtx sym;
915 rtx func_addr_rtx;
916 int final_switch, while_loop;
917 rtx r4 = gen_rtx_REG (SImode, 4);
918 rtx r5 = gen_rtx_REG (SImode, 5);
919 rtx r6 = gen_rtx_REG (SImode, 6);
921 entry_name = get_identifier ("__movmem");
922 sym = function_symbol (IDENTIFIER_POINTER (entry_name));
923 func_addr_rtx = copy_to_mode_reg (Pmode, sym);
924 force_into (XEXP (operands[0], 0), r4);
925 force_into (XEXP (operands[1], 0), r5);
927 /* r6 controls the size of the move. 16 is decremented from it
928 for each 64 bytes moved. Then the negative bit left over is used
929 as an index into a list of move instructions. e.g., a 72 byte move
930 would be set up with size(r6) = 14, for one iteration through the
931 big while loop, and a switch of -2 for the last part. */
933 final_switch = 16 - ((bytes / 4) % 16);
934 while_loop = ((bytes / 4) / 16 - 1) * 16;
935 emit_insn (gen_move_insn (r6, GEN_INT (while_loop + final_switch)));
936 emit_insn (gen_block_lump_real (func_addr_rtx));
937 return 1;
940 return 0;
943 /* Prepare operands for a move define_expand; specifically, one of the
944 operands must be in a register. */
947 prepare_move_operands (rtx operands[], enum machine_mode mode)
949 if ((mode == SImode || mode == DImode)
950 && flag_pic
951 && ! ((mode == Pmode || mode == ptr_mode)
952 && tls_symbolic_operand (operands[1], Pmode) != 0))
954 rtx temp;
955 if (SYMBOLIC_CONST_P (operands[1]))
957 if (GET_CODE (operands[0]) == MEM)
958 operands[1] = force_reg (Pmode, operands[1]);
959 else if (TARGET_SHMEDIA
960 && GET_CODE (operands[1]) == LABEL_REF
961 && target_reg_operand (operands[0], mode))
962 /* It's ok. */;
963 else
965 temp = no_new_pseudos ? operands[0] : gen_reg_rtx (Pmode);
966 operands[1] = legitimize_pic_address (operands[1], mode, temp);
969 else if (GET_CODE (operands[1]) == CONST
970 && GET_CODE (XEXP (operands[1], 0)) == PLUS
971 && SYMBOLIC_CONST_P (XEXP (XEXP (operands[1], 0), 0)))
973 temp = no_new_pseudos ? operands[0] : gen_reg_rtx (Pmode);
974 temp = legitimize_pic_address (XEXP (XEXP (operands[1], 0), 0),
975 mode, temp);
976 operands[1] = expand_binop (mode, add_optab, temp,
977 XEXP (XEXP (operands[1], 0), 1),
978 no_new_pseudos ? temp
979 : gen_reg_rtx (Pmode),
980 0, OPTAB_LIB_WIDEN);
984 if (! reload_in_progress && ! reload_completed)
986 /* Copy the source to a register if both operands aren't registers. */
987 if (! register_operand (operands[0], mode)
988 && ! sh_register_operand (operands[1], mode))
989 operands[1] = copy_to_mode_reg (mode, operands[1]);
991 if (GET_CODE (operands[0]) == MEM && ! memory_operand (operands[0], mode))
993 /* This is like change_address_1 (operands[0], mode, 0, 1) ,
994 except that we can't use that function because it is static. */
995 rtx new = change_address (operands[0], mode, 0);
996 MEM_COPY_ATTRIBUTES (new, operands[0]);
997 operands[0] = new;
1000 /* This case can happen while generating code to move the result
1001 of a library call to the target. Reject `st r0,@(rX,rY)' because
1002 reload will fail to find a spill register for rX, since r0 is already
1003 being used for the source. */
1004 else if (refers_to_regno_p (R0_REG, R0_REG + 1, operands[1], (rtx *)0)
1005 && GET_CODE (operands[0]) == MEM
1006 && GET_CODE (XEXP (operands[0], 0)) == PLUS
1007 && GET_CODE (XEXP (XEXP (operands[0], 0), 1)) == REG)
1008 operands[1] = copy_to_mode_reg (mode, operands[1]);
1011 if (mode == Pmode || mode == ptr_mode)
1013 rtx op0, op1;
1014 enum tls_model tls_kind;
1016 op0 = operands[0];
1017 op1 = operands[1];
1018 if ((tls_kind = tls_symbolic_operand (op1, Pmode)))
1020 rtx tga_op1, tga_ret, tmp, tmp2;
1022 switch (tls_kind)
1024 case TLS_MODEL_GLOBAL_DYNAMIC:
1025 tga_ret = gen_rtx_REG (Pmode, R0_REG);
1026 emit_call_insn (gen_tls_global_dynamic (tga_ret, op1));
1027 op1 = tga_ret;
1028 break;
1030 case TLS_MODEL_LOCAL_DYNAMIC:
1031 tga_ret = gen_rtx_REG (Pmode, R0_REG);
1032 emit_call_insn (gen_tls_local_dynamic (tga_ret, op1));
1034 tmp = gen_reg_rtx (Pmode);
1035 emit_move_insn (tmp, tga_ret);
1037 if (register_operand (op0, Pmode))
1038 tmp2 = op0;
1039 else
1040 tmp2 = gen_reg_rtx (Pmode);
1042 emit_insn (gen_symDTPOFF2reg (tmp2, op1, tmp));
1043 op1 = tmp2;
1044 break;
1046 case TLS_MODEL_INITIAL_EXEC:
1047 if (! flag_pic)
1049 /* Don't schedule insns for getting GOT address when
1050 the first scheduling is enabled, to avoid spill
1051 failures for R0. */
1052 if (flag_schedule_insns)
1053 emit_insn (gen_blockage ());
1054 emit_insn (gen_GOTaddr2picreg ());
1055 emit_insn (gen_rtx_USE (VOIDmode, gen_rtx_REG (SImode,
1056 PIC_REG)));
1057 if (flag_schedule_insns)
1058 emit_insn (gen_blockage ());
1060 tga_op1 = no_new_pseudos ? op0 : gen_reg_rtx (Pmode);
1061 tmp = gen_sym2GOTTPOFF (op1);
1062 emit_insn (gen_tls_initial_exec (tga_op1, tmp));
1063 op1 = tga_op1;
1064 break;
1066 case TLS_MODEL_LOCAL_EXEC:
1067 tmp2 = gen_reg_rtx (Pmode);
1068 emit_insn (gen_load_gbr (tmp2));
1069 tmp = gen_reg_rtx (Pmode);
1070 emit_insn (gen_symTPOFF2reg (tmp, op1));
1072 if (register_operand (op0, Pmode))
1073 op1 = op0;
1074 else
1075 op1 = gen_reg_rtx (Pmode);
1077 emit_insn (gen_addsi3 (op1, tmp, tmp2));
1078 break;
1080 default:
1081 abort ();
1083 operands[1] = op1;
1087 return 0;
1090 /* Prepare the operands for an scc instruction; make sure that the
1091 compare has been done. */
1093 prepare_scc_operands (enum rtx_code code)
1095 rtx t_reg = gen_rtx_REG (SImode, T_REG);
1096 enum rtx_code oldcode = code;
1097 enum machine_mode mode;
1099 /* First need a compare insn. */
1100 switch (code)
1102 case NE:
1103 /* It isn't possible to handle this case. */
1104 abort ();
1105 case LT:
1106 code = GT;
1107 break;
1108 case LE:
1109 code = GE;
1110 break;
1111 case LTU:
1112 code = GTU;
1113 break;
1114 case LEU:
1115 code = GEU;
1116 break;
1117 default:
1118 break;
1120 if (code != oldcode)
1122 rtx tmp = sh_compare_op0;
1123 sh_compare_op0 = sh_compare_op1;
1124 sh_compare_op1 = tmp;
1127 mode = GET_MODE (sh_compare_op0);
1128 if (mode == VOIDmode)
1129 mode = GET_MODE (sh_compare_op1);
1131 sh_compare_op0 = force_reg (mode, sh_compare_op0);
1132 if ((code != EQ && code != NE
1133 && (sh_compare_op1 != const0_rtx
1134 || code == GTU || code == GEU || code == LTU || code == LEU))
1135 || (mode == DImode && sh_compare_op1 != const0_rtx)
1136 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
1137 sh_compare_op1 = force_reg (mode, sh_compare_op1);
1139 if ((TARGET_SH4 || TARGET_SH2A) && GET_MODE_CLASS (mode) == MODE_FLOAT)
1140 (mode == SFmode ? emit_sf_insn : emit_df_insn)
1141 (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2,
1142 gen_rtx_SET (VOIDmode, t_reg,
1143 gen_rtx_fmt_ee (code, SImode,
1144 sh_compare_op0, sh_compare_op1)),
1145 gen_rtx_USE (VOIDmode, get_fpscr_rtx ()))));
1146 else
1147 emit_insn (gen_rtx_SET (VOIDmode, t_reg,
1148 gen_rtx_fmt_ee (code, SImode,
1149 sh_compare_op0, sh_compare_op1)));
1151 return t_reg;
1154 /* Called from the md file, set up the operands of a compare instruction. */
1156 void
1157 from_compare (rtx *operands, int code)
1159 enum machine_mode mode = GET_MODE (sh_compare_op0);
1160 rtx insn;
1161 if (mode == VOIDmode)
1162 mode = GET_MODE (sh_compare_op1);
1163 if (code != EQ
1164 || mode == DImode
1165 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
1167 /* Force args into regs, since we can't use constants here. */
1168 sh_compare_op0 = force_reg (mode, sh_compare_op0);
1169 if (sh_compare_op1 != const0_rtx
1170 || code == GTU || code == GEU
1171 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
1172 sh_compare_op1 = force_reg (mode, sh_compare_op1);
1174 if (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT && code == GE)
1176 from_compare (operands, GT);
1177 insn = gen_ieee_ccmpeqsf_t (sh_compare_op0, sh_compare_op1);
1179 else
1180 insn = gen_rtx_SET (VOIDmode,
1181 gen_rtx_REG (SImode, T_REG),
1182 gen_rtx_fmt_ee (code, SImode,
1183 sh_compare_op0, sh_compare_op1));
1184 if ((TARGET_SH4 || TARGET_SH2A) && GET_MODE_CLASS (mode) == MODE_FLOAT)
1186 insn = gen_rtx_PARALLEL (VOIDmode,
1187 gen_rtvec (2, insn,
1188 gen_rtx_USE (VOIDmode, get_fpscr_rtx ())));
1189 (mode == SFmode ? emit_sf_insn : emit_df_insn) (insn);
1191 else
1192 emit_insn (insn);
1195 /* Functions to output assembly code. */
1197 /* Return a sequence of instructions to perform DI or DF move.
1199 Since the SH cannot move a DI or DF in one instruction, we have
1200 to take care when we see overlapping source and dest registers. */
1202 const char *
1203 output_movedouble (rtx insn ATTRIBUTE_UNUSED, rtx operands[],
1204 enum machine_mode mode)
1206 rtx dst = operands[0];
1207 rtx src = operands[1];
1209 if (GET_CODE (dst) == MEM
1210 && GET_CODE (XEXP (dst, 0)) == PRE_DEC)
1211 return "mov.l %T1,%0\n\tmov.l %1,%0";
1213 if (register_operand (dst, mode)
1214 && register_operand (src, mode))
1216 if (REGNO (src) == MACH_REG)
1217 return "sts mach,%S0\n\tsts macl,%R0";
1219 /* When mov.d r1,r2 do r2->r3 then r1->r2;
1220 when mov.d r1,r0 do r1->r0 then r2->r1. */
1222 if (REGNO (src) + 1 == REGNO (dst))
1223 return "mov %T1,%T0\n\tmov %1,%0";
1224 else
1225 return "mov %1,%0\n\tmov %T1,%T0";
1227 else if (GET_CODE (src) == CONST_INT)
1229 if (INTVAL (src) < 0)
1230 output_asm_insn ("mov #-1,%S0", operands);
1231 else
1232 output_asm_insn ("mov #0,%S0", operands);
1234 return "mov %1,%R0";
1236 else if (GET_CODE (src) == MEM)
1238 int ptrreg = -1;
1239 int dreg = REGNO (dst);
1240 rtx inside = XEXP (src, 0);
1242 if (GET_CODE (inside) == REG)
1243 ptrreg = REGNO (inside);
1244 else if (GET_CODE (inside) == SUBREG)
1245 ptrreg = subreg_regno (inside);
1246 else if (GET_CODE (inside) == PLUS)
1248 ptrreg = REGNO (XEXP (inside, 0));
1249 /* ??? A r0+REG address shouldn't be possible here, because it isn't
1250 an offsettable address. Unfortunately, offsettable addresses use
1251 QImode to check the offset, and a QImode offsettable address
1252 requires r0 for the other operand, which is not currently
1253 supported, so we can't use the 'o' constraint.
1254 Thus we must check for and handle r0+REG addresses here.
1255 We punt for now, since this is likely very rare. */
1256 if (GET_CODE (XEXP (inside, 1)) == REG)
1257 abort ();
1259 else if (GET_CODE (inside) == LABEL_REF)
1260 return "mov.l %1,%0\n\tmov.l %1+4,%T0";
1261 else if (GET_CODE (inside) == POST_INC)
1262 return "mov.l %1,%0\n\tmov.l %1,%T0";
1263 else
1264 abort ();
1266 /* Work out the safe way to copy. Copy into the second half first. */
1267 if (dreg == ptrreg)
1268 return "mov.l %T1,%T0\n\tmov.l %1,%0";
1271 return "mov.l %1,%0\n\tmov.l %T1,%T0";
1274 /* Print an instruction which would have gone into a delay slot after
1275 another instruction, but couldn't because the other instruction expanded
1276 into a sequence where putting the slot insn at the end wouldn't work. */
1278 static void
1279 print_slot (rtx insn)
1281 final_scan_insn (XVECEXP (insn, 0, 1), asm_out_file, optimize, 0, 1, NULL);
1283 INSN_DELETED_P (XVECEXP (insn, 0, 1)) = 1;
1286 const char *
1287 output_far_jump (rtx insn, rtx op)
1289 struct { rtx lab, reg, op; } this;
1290 rtx braf_base_lab = NULL_RTX;
1291 const char *jump;
1292 int far;
1293 int offset = branch_dest (insn) - INSN_ADDRESSES (INSN_UID (insn));
1294 rtx prev;
1296 this.lab = gen_label_rtx ();
1298 if (TARGET_SH2
1299 && offset >= -32764
1300 && offset - get_attr_length (insn) <= 32766)
1302 far = 0;
1303 jump = "mov.w %O0,%1; braf %1";
1305 else
1307 far = 1;
1308 if (flag_pic)
1310 if (TARGET_SH2)
1311 jump = "mov.l %O0,%1; braf %1";
1312 else
1313 jump = "mov.l r0,@-r15; mova %O0,r0; mov.l @r0,%1; add r0,%1; mov.l @r15+,r0; jmp @%1";
1315 else
1316 jump = "mov.l %O0,%1; jmp @%1";
1318 /* If we have a scratch register available, use it. */
1319 if (GET_CODE ((prev = prev_nonnote_insn (insn))) == INSN
1320 && INSN_CODE (prev) == CODE_FOR_indirect_jump_scratch)
1322 this.reg = SET_DEST (XVECEXP (PATTERN (prev), 0, 0));
1323 if (REGNO (this.reg) == R0_REG && flag_pic && ! TARGET_SH2)
1324 jump = "mov.l r1,@-r15; mova %O0,r0; mov.l @r0,r1; add r1,r0; mov.l @r15+,r1; jmp @%1";
1325 output_asm_insn (jump, &this.lab);
1326 if (dbr_sequence_length ())
1327 print_slot (final_sequence);
1328 else
1329 output_asm_insn ("nop", 0);
1331 else
1333 /* Output the delay slot insn first if any. */
1334 if (dbr_sequence_length ())
1335 print_slot (final_sequence);
1337 this.reg = gen_rtx_REG (SImode, 13);
1338 /* We must keep the stack aligned to 8-byte boundaries on SH5.
1339 Fortunately, MACL is fixed and call-clobbered, and we never
1340 need its value across jumps, so save r13 in it instead of in
1341 the stack. */
1342 if (TARGET_SH5)
1343 output_asm_insn ("lds r13, macl", 0);
1344 else
1345 output_asm_insn ("mov.l r13,@-r15", 0);
1346 output_asm_insn (jump, &this.lab);
1347 if (TARGET_SH5)
1348 output_asm_insn ("sts macl, r13", 0);
1349 else
1350 output_asm_insn ("mov.l @r15+,r13", 0);
1352 if (far && flag_pic && TARGET_SH2)
1354 braf_base_lab = gen_label_rtx ();
1355 (*targetm.asm_out.internal_label) (asm_out_file, "L",
1356 CODE_LABEL_NUMBER (braf_base_lab));
1358 if (far)
1359 output_asm_insn (".align 2", 0);
1360 (*targetm.asm_out.internal_label) (asm_out_file, "L", CODE_LABEL_NUMBER (this.lab));
1361 this.op = op;
1362 if (far && flag_pic)
1364 if (TARGET_SH2)
1365 this.lab = braf_base_lab;
1366 output_asm_insn (".long %O2-%O0", &this.lab);
1368 else
1369 output_asm_insn (far ? ".long %O2" : ".word %O2-%O0", &this.lab);
1370 return "";
1373 /* Local label counter, used for constants in the pool and inside
1374 pattern branches. */
1376 static int lf = 100;
1378 /* Output code for ordinary branches. */
1380 const char *
1381 output_branch (int logic, rtx insn, rtx *operands)
1383 switch (get_attr_length (insn))
1385 case 6:
1386 /* This can happen if filling the delay slot has caused a forward
1387 branch to exceed its range (we could reverse it, but only
1388 when we know we won't overextend other branches; this should
1389 best be handled by relaxation).
1390 It can also happen when other condbranches hoist delay slot insn
1391 from their destination, thus leading to code size increase.
1392 But the branch will still be in the range -4092..+4098 bytes. */
1394 if (! TARGET_RELAX)
1396 int label = lf++;
1397 /* The call to print_slot will clobber the operands. */
1398 rtx op0 = operands[0];
1400 /* If the instruction in the delay slot is annulled (true), then
1401 there is no delay slot where we can put it now. The only safe
1402 place for it is after the label. final will do that by default. */
1404 if (final_sequence
1405 && ! INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))
1406 && get_attr_length (XVECEXP (final_sequence, 0, 1)))
1408 asm_fprintf (asm_out_file, "\tb%s%ss\t%LLF%d\n", logic ? "f" : "t",
1409 ASSEMBLER_DIALECT ? "/" : ".", label);
1410 print_slot (final_sequence);
1412 else
1413 asm_fprintf (asm_out_file, "\tb%s\t%LLF%d\n", logic ? "f" : "t", label);
1415 output_asm_insn ("bra\t%l0", &op0);
1416 fprintf (asm_out_file, "\tnop\n");
1417 (*targetm.asm_out.internal_label) (asm_out_file, "LF", label);
1419 return "";
1421 /* When relaxing, handle this like a short branch. The linker
1422 will fix it up if it still doesn't fit after relaxation. */
1423 case 2:
1424 return logic ? "bt%.\t%l0" : "bf%.\t%l0";
1426 /* These are for SH2e, in which we have to account for the
1427 extra nop because of the hardware bug in annulled branches. */
1428 case 8:
1429 if (! TARGET_RELAX)
1431 int label = lf++;
1433 if (final_sequence
1434 && INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0)))
1435 abort ();
1436 asm_fprintf (asm_out_file, "b%s%ss\t%LLF%d\n",
1437 logic ? "f" : "t",
1438 ASSEMBLER_DIALECT ? "/" : ".", label);
1439 fprintf (asm_out_file, "\tnop\n");
1440 output_asm_insn ("bra\t%l0", operands);
1441 fprintf (asm_out_file, "\tnop\n");
1442 (*targetm.asm_out.internal_label) (asm_out_file, "LF", label);
1444 return "";
1446 /* When relaxing, fall through. */
1447 case 4:
1449 char buffer[10];
1451 sprintf (buffer, "b%s%ss\t%%l0",
1452 logic ? "t" : "f",
1453 ASSEMBLER_DIALECT ? "/" : ".");
1454 output_asm_insn (buffer, &operands[0]);
1455 return "nop";
1458 default:
1459 /* There should be no longer branches now - that would
1460 indicate that something has destroyed the branches set
1461 up in machine_dependent_reorg. */
1462 abort ();
1466 const char *
1467 output_branchy_insn (enum rtx_code code, const char *template,
1468 rtx insn, rtx *operands)
1470 rtx next_insn = NEXT_INSN (insn);
1472 if (next_insn && GET_CODE (next_insn) == JUMP_INSN && condjump_p (next_insn))
1474 rtx src = SET_SRC (PATTERN (next_insn));
1475 if (GET_CODE (src) == IF_THEN_ELSE && GET_CODE (XEXP (src, 0)) != code)
1477 /* Following branch not taken */
1478 operands[9] = gen_label_rtx ();
1479 emit_label_after (operands[9], next_insn);
1480 INSN_ADDRESSES_NEW (operands[9],
1481 INSN_ADDRESSES (INSN_UID (next_insn))
1482 + get_attr_length (next_insn));
1483 return template;
1485 else
1487 int offset = (branch_dest (next_insn)
1488 - INSN_ADDRESSES (INSN_UID (next_insn)) + 4);
1489 if (offset >= -252 && offset <= 258)
1491 if (GET_CODE (src) == IF_THEN_ELSE)
1492 /* branch_true */
1493 src = XEXP (src, 1);
1494 operands[9] = src;
1495 return template;
1499 operands[9] = gen_label_rtx ();
1500 emit_label_after (operands[9], insn);
1501 INSN_ADDRESSES_NEW (operands[9],
1502 INSN_ADDRESSES (INSN_UID (insn))
1503 + get_attr_length (insn));
1504 return template;
1507 const char *
1508 output_ieee_ccmpeq (rtx insn, rtx *operands)
1510 return output_branchy_insn (NE, "bt\t%l9\\;fcmp/eq\t%1,%0", insn, operands);
1513 /* Output the start of the assembler file. */
1515 static void
1516 sh_file_start (void)
1518 default_file_start ();
1520 #ifdef SYMBIAN
1521 /* Declare the .directive section before it is used. */
1522 fputs ("\t.section .directive, \"SM\", @progbits, 1\n", asm_out_file);
1523 fputs ("\t.asciz \"#<SYMEDIT>#\\n\"\n", asm_out_file);
1524 #endif
1526 if (TARGET_ELF)
1527 /* We need to show the text section with the proper
1528 attributes as in TEXT_SECTION_ASM_OP, before dwarf2out
1529 emits it without attributes in TEXT_SECTION_ASM_OP, else GAS
1530 will complain. We can teach GAS specifically about the
1531 default attributes for our choice of text section, but
1532 then we would have to change GAS again if/when we change
1533 the text section name. */
1534 fprintf (asm_out_file, "%s\n", TEXT_SECTION_ASM_OP);
1535 else
1536 /* Switch to the data section so that the coffsem symbol
1537 isn't in the text section. */
1538 data_section ();
1540 if (TARGET_LITTLE_ENDIAN)
1541 fputs ("\t.little\n", asm_out_file);
1543 if (!TARGET_ELF)
1545 if (TARGET_SHCOMPACT)
1546 fputs ("\t.mode\tSHcompact\n", asm_out_file);
1547 else if (TARGET_SHMEDIA)
1548 fprintf (asm_out_file, "\t.mode\tSHmedia\n\t.abi\t%i\n",
1549 TARGET_SHMEDIA64 ? 64 : 32);
1553 /* Check if PAT includes UNSPEC_CALLER unspec pattern. */
1555 static bool
1556 unspec_caller_rtx_p (rtx pat)
1558 switch (GET_CODE (pat))
1560 case CONST:
1561 return unspec_caller_rtx_p (XEXP (pat, 0));
1562 case PLUS:
1563 case MINUS:
1564 if (unspec_caller_rtx_p (XEXP (pat, 0)))
1565 return true;
1566 return unspec_caller_rtx_p (XEXP (pat, 1));
1567 case UNSPEC:
1568 if (XINT (pat, 1) == UNSPEC_CALLER)
1569 return true;
1570 default:
1571 break;
1574 return false;
1577 /* Indicate that INSN cannot be duplicated. This is true for insn
1578 that generates an unique label. */
1580 static bool
1581 sh_cannot_copy_insn_p (rtx insn)
1583 rtx pat;
1585 if (!reload_completed || !flag_pic)
1586 return false;
1588 if (GET_CODE (insn) != INSN)
1589 return false;
1590 if (asm_noperands (insn) >= 0)
1591 return false;
1593 pat = PATTERN (insn);
1594 if (GET_CODE (pat) != SET)
1595 return false;
1596 pat = SET_SRC (pat);
1598 if (unspec_caller_rtx_p (pat))
1599 return true;
1601 return false;
1604 /* Actual number of instructions used to make a shift by N. */
1605 static const char ashiftrt_insns[] =
1606 { 0,1,2,3,4,5,8,8,8,8,8,8,8,8,8,8,2,3,4,5,8,8,8,8,8,8,8,8,8,8,8,2};
1608 /* Left shift and logical right shift are the same. */
1609 static const char shift_insns[] =
1610 { 0,1,1,2,2,3,3,4,1,2,2,3,3,4,3,3,1,2,2,3,3,4,3,3,2,3,3,4,4,4,3,3};
1612 /* Individual shift amounts needed to get the above length sequences.
1613 One bit right shifts clobber the T bit, so when possible, put one bit
1614 shifts in the middle of the sequence, so the ends are eligible for
1615 branch delay slots. */
1616 static const short shift_amounts[32][5] = {
1617 {0}, {1}, {2}, {2, 1},
1618 {2, 2}, {2, 1, 2}, {2, 2, 2}, {2, 2, 1, 2},
1619 {8}, {8, 1}, {8, 2}, {8, 1, 2},
1620 {8, 2, 2}, {8, 2, 1, 2}, {8, -2, 8}, {8, -1, 8},
1621 {16}, {16, 1}, {16, 2}, {16, 1, 2},
1622 {16, 2, 2}, {16, 2, 1, 2}, {16, -2, 8}, {16, -1, 8},
1623 {16, 8}, {16, 1, 8}, {16, 8, 2}, {16, 8, 1, 2},
1624 {16, 8, 2, 2}, {16, -1, -2, 16}, {16, -2, 16}, {16, -1, 16}};
1626 /* Likewise, but for shift amounts < 16, up to three highmost bits
1627 might be clobbered. This is typically used when combined with some
1628 kind of sign or zero extension. */
1630 static const char ext_shift_insns[] =
1631 { 0,1,1,2,2,3,2,2,1,2,2,3,3,3,2,2,1,2,2,3,3,4,3,3,2,3,3,4,4,4,3,3};
1633 static const short ext_shift_amounts[32][4] = {
1634 {0}, {1}, {2}, {2, 1},
1635 {2, 2}, {2, 1, 2}, {8, -2}, {8, -1},
1636 {8}, {8, 1}, {8, 2}, {8, 1, 2},
1637 {8, 2, 2}, {16, -2, -1}, {16, -2}, {16, -1},
1638 {16}, {16, 1}, {16, 2}, {16, 1, 2},
1639 {16, 2, 2}, {16, 2, 1, 2}, {16, -2, 8}, {16, -1, 8},
1640 {16, 8}, {16, 1, 8}, {16, 8, 2}, {16, 8, 1, 2},
1641 {16, 8, 2, 2}, {16, -1, -2, 16}, {16, -2, 16}, {16, -1, 16}};
1643 /* Assuming we have a value that has been sign-extended by at least one bit,
1644 can we use the ext_shift_amounts with the last shift turned to an arithmetic shift
1645 to shift it by N without data loss, and quicker than by other means? */
1646 #define EXT_SHIFT_SIGNED(n) (((n) | 8) == 15)
1648 /* This is used in length attributes in sh.md to help compute the length
1649 of arbitrary constant shift instructions. */
1652 shift_insns_rtx (rtx insn)
1654 rtx set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
1655 int shift_count = INTVAL (XEXP (set_src, 1));
1656 enum rtx_code shift_code = GET_CODE (set_src);
1658 switch (shift_code)
1660 case ASHIFTRT:
1661 return ashiftrt_insns[shift_count];
1662 case LSHIFTRT:
1663 case ASHIFT:
1664 return shift_insns[shift_count];
1665 default:
1666 abort ();
1670 /* Return the cost of a shift. */
1672 static inline int
1673 shiftcosts (rtx x)
1675 int value;
1677 if (TARGET_SHMEDIA)
1678 return 1;
1680 if (GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD)
1682 if (GET_MODE (x) == DImode
1683 && GET_CODE (XEXP (x, 1)) == CONST_INT
1684 && INTVAL (XEXP (x, 1)) == 1)
1685 return 2;
1687 /* Everything else is invalid, because there is no pattern for it. */
1688 return 10000;
1690 /* If shift by a non constant, then this will be expensive. */
1691 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
1692 return SH_DYNAMIC_SHIFT_COST;
1694 value = INTVAL (XEXP (x, 1));
1696 /* Otherwise, return the true cost in instructions. */
1697 if (GET_CODE (x) == ASHIFTRT)
1699 int cost = ashiftrt_insns[value];
1700 /* If SH3, then we put the constant in a reg and use shad. */
1701 if (cost > 1 + SH_DYNAMIC_SHIFT_COST)
1702 cost = 1 + SH_DYNAMIC_SHIFT_COST;
1703 return cost;
1705 else
1706 return shift_insns[value];
1709 /* Return the cost of an AND operation. */
1711 static inline int
1712 andcosts (rtx x)
1714 int i;
1716 /* Anding with a register is a single cycle and instruction. */
1717 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
1718 return 1;
1720 i = INTVAL (XEXP (x, 1));
1722 if (TARGET_SHMEDIA)
1724 if ((GET_CODE (XEXP (x, 1)) == CONST_INT
1725 && CONST_OK_FOR_I16 (INTVAL (XEXP (x, 1))))
1726 || EXTRA_CONSTRAINT_C16 (XEXP (x, 1)))
1727 return 1;
1728 else
1729 return 2;
1732 /* These constants are single cycle extu.[bw] instructions. */
1733 if (i == 0xff || i == 0xffff)
1734 return 1;
1735 /* Constants that can be used in an and immediate instruction in a single
1736 cycle, but this requires r0, so make it a little more expensive. */
1737 if (CONST_OK_FOR_K08 (i))
1738 return 2;
1739 /* Constants that can be loaded with a mov immediate and an and.
1740 This case is probably unnecessary. */
1741 if (CONST_OK_FOR_I08 (i))
1742 return 2;
1743 /* Any other constants requires a 2 cycle pc-relative load plus an and.
1744 This case is probably unnecessary. */
1745 return 3;
1748 /* Return the cost of an addition or a subtraction. */
1750 static inline int
1751 addsubcosts (rtx x)
1753 /* Adding a register is a single cycle insn. */
1754 if (GET_CODE (XEXP (x, 1)) == REG
1755 || GET_CODE (XEXP (x, 1)) == SUBREG)
1756 return 1;
1758 /* Likewise for small constants. */
1759 if (GET_CODE (XEXP (x, 1)) == CONST_INT
1760 && CONST_OK_FOR_ADD (INTVAL (XEXP (x, 1))))
1761 return 1;
1763 if (TARGET_SHMEDIA)
1764 switch (GET_CODE (XEXP (x, 1)))
1766 case CONST:
1767 case LABEL_REF:
1768 case SYMBOL_REF:
1769 return TARGET_SHMEDIA64 ? 5 : 3;
1771 case CONST_INT:
1772 if (CONST_OK_FOR_I16 (INTVAL (XEXP (x, 1))))
1773 return 2;
1774 else if (CONST_OK_FOR_I16 (INTVAL (XEXP (x, 1)) >> 16))
1775 return 3;
1776 else if (CONST_OK_FOR_I16 ((INTVAL (XEXP (x, 1)) >> 16) >> 16))
1777 return 4;
1779 /* Fall through. */
1780 default:
1781 return 5;
1784 /* Any other constant requires a 2 cycle pc-relative load plus an
1785 addition. */
1786 return 3;
1789 /* Return the cost of a multiply. */
1790 static inline int
1791 multcosts (rtx x ATTRIBUTE_UNUSED)
1793 if (TARGET_SHMEDIA)
1794 return 3;
1796 if (TARGET_SH2)
1798 /* We have a mul insn, so we can never take more than the mul and the
1799 read of the mac reg, but count more because of the latency and extra
1800 reg usage. */
1801 if (TARGET_SMALLCODE)
1802 return 2;
1803 return 3;
1806 /* If we're aiming at small code, then just count the number of
1807 insns in a multiply call sequence. */
1808 if (TARGET_SMALLCODE)
1809 return 5;
1811 /* Otherwise count all the insns in the routine we'd be calling too. */
1812 return 20;
1815 /* Compute a (partial) cost for rtx X. Return true if the complete
1816 cost has been computed, and false if subexpressions should be
1817 scanned. In either case, *TOTAL contains the cost result. */
1819 static bool
1820 sh_rtx_costs (rtx x, int code, int outer_code, int *total)
1822 switch (code)
1824 case CONST_INT:
1825 if (TARGET_SHMEDIA)
1827 if (INTVAL (x) == 0)
1828 *total = 0;
1829 else if (outer_code == AND && and_operand ((x), DImode))
1830 *total = 0;
1831 else if ((outer_code == IOR || outer_code == XOR
1832 || outer_code == PLUS)
1833 && CONST_OK_FOR_I10 (INTVAL (x)))
1834 *total = 0;
1835 else if (CONST_OK_FOR_I16 (INTVAL (x)))
1836 *total = COSTS_N_INSNS (outer_code != SET);
1837 else if (CONST_OK_FOR_I16 (INTVAL (x) >> 16))
1838 *total = COSTS_N_INSNS (2);
1839 else if (CONST_OK_FOR_I16 ((INTVAL (x) >> 16) >> 16))
1840 *total = COSTS_N_INSNS (3);
1841 else
1842 *total = COSTS_N_INSNS (4);
1843 return true;
1845 if (CONST_OK_FOR_I08 (INTVAL (x)))
1846 *total = 0;
1847 else if ((outer_code == AND || outer_code == IOR || outer_code == XOR)
1848 && CONST_OK_FOR_K08 (INTVAL (x)))
1849 *total = 1;
1850 else
1851 *total = 8;
1852 return true;
1854 case CONST:
1855 case LABEL_REF:
1856 case SYMBOL_REF:
1857 if (TARGET_SHMEDIA64)
1858 *total = COSTS_N_INSNS (4);
1859 else if (TARGET_SHMEDIA32)
1860 *total = COSTS_N_INSNS (2);
1861 else
1862 *total = 5;
1863 return true;
1865 case CONST_DOUBLE:
1866 if (TARGET_SHMEDIA)
1867 *total = COSTS_N_INSNS (4);
1868 else
1869 *total = 10;
1870 return true;
1872 case PLUS:
1873 *total = COSTS_N_INSNS (addsubcosts (x));
1874 return true;
1876 case AND:
1877 *total = COSTS_N_INSNS (andcosts (x));
1878 return true;
1880 case MULT:
1881 *total = COSTS_N_INSNS (multcosts (x));
1882 return true;
1884 case ASHIFT:
1885 case ASHIFTRT:
1886 case LSHIFTRT:
1887 *total = COSTS_N_INSNS (shiftcosts (x));
1888 return true;
1890 case DIV:
1891 case UDIV:
1892 case MOD:
1893 case UMOD:
1894 *total = COSTS_N_INSNS (20);
1895 return true;
1897 case FLOAT:
1898 case FIX:
1899 *total = 100;
1900 return true;
1902 default:
1903 return false;
1907 /* Compute the cost of an address. For the SH, all valid addresses are
1908 the same cost. Use a slightly higher cost for reg + reg addressing,
1909 since it increases pressure on r0. */
1911 static int
1912 sh_address_cost (rtx X)
1914 return (GET_CODE (X) == PLUS
1915 && ! CONSTANT_P (XEXP (X, 1))
1916 && ! TARGET_SHMEDIA ? 1 : 0);
1919 /* Code to expand a shift. */
1921 void
1922 gen_ashift (int type, int n, rtx reg)
1924 /* Negative values here come from the shift_amounts array. */
1925 if (n < 0)
1927 if (type == ASHIFT)
1928 type = LSHIFTRT;
1929 else
1930 type = ASHIFT;
1931 n = -n;
1934 switch (type)
1936 case ASHIFTRT:
1937 emit_insn (gen_ashrsi3_k (reg, reg, GEN_INT (n)));
1938 break;
1939 case LSHIFTRT:
1940 if (n == 1)
1941 emit_insn (gen_lshrsi3_m (reg, reg, GEN_INT (n)));
1942 else
1943 emit_insn (gen_lshrsi3_k (reg, reg, GEN_INT (n)));
1944 break;
1945 case ASHIFT:
1946 emit_insn (gen_ashlsi3_std (reg, reg, GEN_INT (n)));
1947 break;
1951 /* Same for HImode */
1953 void
1954 gen_ashift_hi (int type, int n, rtx reg)
1956 /* Negative values here come from the shift_amounts array. */
1957 if (n < 0)
1959 if (type == ASHIFT)
1960 type = LSHIFTRT;
1961 else
1962 type = ASHIFT;
1963 n = -n;
1966 switch (type)
1968 case ASHIFTRT:
1969 case LSHIFTRT:
1970 /* We don't have HImode right shift operations because using the
1971 ordinary 32 bit shift instructions for that doesn't generate proper
1972 zero/sign extension.
1973 gen_ashift_hi is only called in contexts where we know that the
1974 sign extension works out correctly. */
1976 int offset = 0;
1977 if (GET_CODE (reg) == SUBREG)
1979 offset = SUBREG_BYTE (reg);
1980 reg = SUBREG_REG (reg);
1982 gen_ashift (type, n, gen_rtx_SUBREG (SImode, reg, offset));
1983 break;
1985 case ASHIFT:
1986 emit_insn (gen_ashlhi3_k (reg, reg, GEN_INT (n)));
1987 break;
1991 /* Output RTL to split a constant shift into its component SH constant
1992 shift instructions. */
1994 void
1995 gen_shifty_op (int code, rtx *operands)
1997 int value = INTVAL (operands[2]);
1998 int max, i;
2000 /* Truncate the shift count in case it is out of bounds. */
2001 value = value & 0x1f;
2003 if (value == 31)
2005 if (code == LSHIFTRT)
2007 emit_insn (gen_rotlsi3_1 (operands[0], operands[0]));
2008 emit_insn (gen_movt (operands[0]));
2009 return;
2011 else if (code == ASHIFT)
2013 /* There is a two instruction sequence for 31 bit left shifts,
2014 but it requires r0. */
2015 if (GET_CODE (operands[0]) == REG && REGNO (operands[0]) == 0)
2017 emit_insn (gen_andsi3 (operands[0], operands[0], const1_rtx));
2018 emit_insn (gen_rotlsi3_31 (operands[0], operands[0]));
2019 return;
2023 else if (value == 0)
2025 /* This can happen when not optimizing. We must output something here
2026 to prevent the compiler from aborting in final.c after the try_split
2027 call. */
2028 emit_insn (gen_nop ());
2029 return;
2032 max = shift_insns[value];
2033 for (i = 0; i < max; i++)
2034 gen_ashift (code, shift_amounts[value][i], operands[0]);
2037 /* Same as above, but optimized for values where the topmost bits don't
2038 matter. */
2040 void
2041 gen_shifty_hi_op (int code, rtx *operands)
2043 int value = INTVAL (operands[2]);
2044 int max, i;
2045 void (*gen_fun) (int, int, rtx);
2047 /* This operation is used by and_shl for SImode values with a few
2048 high bits known to be cleared. */
2049 value &= 31;
2050 if (value == 0)
2052 emit_insn (gen_nop ());
2053 return;
2056 gen_fun = GET_MODE (operands[0]) == HImode ? gen_ashift_hi : gen_ashift;
2057 if (code == ASHIFT)
2059 max = ext_shift_insns[value];
2060 for (i = 0; i < max; i++)
2061 gen_fun (code, ext_shift_amounts[value][i], operands[0]);
2063 else
2064 /* When shifting right, emit the shifts in reverse order, so that
2065 solitary negative values come first. */
2066 for (i = ext_shift_insns[value] - 1; i >= 0; i--)
2067 gen_fun (code, ext_shift_amounts[value][i], operands[0]);
2070 /* Output RTL for an arithmetic right shift. */
2072 /* ??? Rewrite to use super-optimizer sequences. */
2075 expand_ashiftrt (rtx *operands)
2077 rtx sym;
2078 rtx wrk;
2079 char func[18];
2080 tree func_name;
2081 int value;
2083 if (TARGET_SH3)
2085 if (GET_CODE (operands[2]) != CONST_INT)
2087 rtx count = copy_to_mode_reg (SImode, operands[2]);
2088 emit_insn (gen_negsi2 (count, count));
2089 emit_insn (gen_ashrsi3_d (operands[0], operands[1], count));
2090 return 1;
2092 else if (ashiftrt_insns[INTVAL (operands[2]) & 31]
2093 > 1 + SH_DYNAMIC_SHIFT_COST)
2095 rtx count
2096 = force_reg (SImode, GEN_INT (- (INTVAL (operands[2]) & 31)));
2097 emit_insn (gen_ashrsi3_d (operands[0], operands[1], count));
2098 return 1;
2101 if (GET_CODE (operands[2]) != CONST_INT)
2102 return 0;
2104 value = INTVAL (operands[2]) & 31;
2106 if (value == 31)
2108 emit_insn (gen_ashrsi2_31 (operands[0], operands[1]));
2109 return 1;
2111 else if (value >= 16 && value <= 19)
2113 wrk = gen_reg_rtx (SImode);
2114 emit_insn (gen_ashrsi2_16 (wrk, operands[1]));
2115 value -= 16;
2116 while (value--)
2117 gen_ashift (ASHIFTRT, 1, wrk);
2118 emit_move_insn (operands[0], wrk);
2119 return 1;
2121 /* Expand a short sequence inline, longer call a magic routine. */
2122 else if (value <= 5)
2124 wrk = gen_reg_rtx (SImode);
2125 emit_move_insn (wrk, operands[1]);
2126 while (value--)
2127 gen_ashift (ASHIFTRT, 1, wrk);
2128 emit_move_insn (operands[0], wrk);
2129 return 1;
2132 wrk = gen_reg_rtx (Pmode);
2134 /* Load the value into an arg reg and call a helper. */
2135 emit_move_insn (gen_rtx_REG (SImode, 4), operands[1]);
2136 sprintf (func, "__ashiftrt_r4_%d", value);
2137 func_name = get_identifier (func);
2138 sym = function_symbol (IDENTIFIER_POINTER (func_name));
2139 emit_move_insn (wrk, sym);
2140 emit_insn (gen_ashrsi3_n (GEN_INT (value), wrk));
2141 emit_move_insn (operands[0], gen_rtx_REG (SImode, 4));
2142 return 1;
2146 sh_dynamicalize_shift_p (rtx count)
2148 return shift_insns[INTVAL (count)] > 1 + SH_DYNAMIC_SHIFT_COST;
2151 /* Try to find a good way to implement the combiner pattern
2152 [(set (match_operand:SI 0 "register_operand" "r")
2153 (and:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
2154 (match_operand:SI 2 "const_int_operand" "n"))
2155 (match_operand:SI 3 "const_int_operand" "n"))) .
2156 LEFT_RTX is operand 2 in the above pattern, and MASK_RTX is operand 3.
2157 return 0 for simple right / left or left/right shift combination.
2158 return 1 for a combination of shifts with zero_extend.
2159 return 2 for a combination of shifts with an AND that needs r0.
2160 return 3 for a combination of shifts with an AND that needs an extra
2161 scratch register, when the three highmost bits of the AND mask are clear.
2162 return 4 for a combination of shifts with an AND that needs an extra
2163 scratch register, when any of the three highmost bits of the AND mask
2164 is set.
2165 If ATTRP is set, store an initial right shift width in ATTRP[0],
2166 and the instruction length in ATTRP[1] . These values are not valid
2167 when returning 0.
2168 When ATTRP is set and returning 1, ATTRP[2] gets set to the index into
2169 shift_amounts for the last shift value that is to be used before the
2170 sign extend. */
2172 shl_and_kind (rtx left_rtx, rtx mask_rtx, int *attrp)
2174 unsigned HOST_WIDE_INT mask, lsb, mask2, lsb2;
2175 int left = INTVAL (left_rtx), right;
2176 int best = 0;
2177 int cost, best_cost = 10000;
2178 int best_right = 0, best_len = 0;
2179 int i;
2180 int can_ext;
2182 if (left < 0 || left > 31)
2183 return 0;
2184 if (GET_CODE (mask_rtx) == CONST_INT)
2185 mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> left;
2186 else
2187 mask = (unsigned HOST_WIDE_INT) GET_MODE_MASK (SImode) >> left;
2188 /* Can this be expressed as a right shift / left shift pair? */
2189 lsb = ((mask ^ (mask - 1)) >> 1) + 1;
2190 right = exact_log2 (lsb);
2191 mask2 = ~(mask + lsb - 1);
2192 lsb2 = ((mask2 ^ (mask2 - 1)) >> 1) + 1;
2193 /* mask has no zeroes but trailing zeroes <==> ! mask2 */
2194 if (! mask2)
2195 best_cost = shift_insns[right] + shift_insns[right + left];
2196 /* mask has no trailing zeroes <==> ! right */
2197 else if (! right && mask2 == ~(lsb2 - 1))
2199 int late_right = exact_log2 (lsb2);
2200 best_cost = shift_insns[left + late_right] + shift_insns[late_right];
2202 /* Try to use zero extend. */
2203 if (mask2 == ~(lsb2 - 1))
2205 int width, first;
2207 for (width = 8; width <= 16; width += 8)
2209 /* Can we zero-extend right away? */
2210 if (lsb2 == (unsigned HOST_WIDE_INT) 1 << width)
2212 cost
2213 = 1 + ext_shift_insns[right] + ext_shift_insns[left + right];
2214 if (cost < best_cost)
2216 best = 1;
2217 best_cost = cost;
2218 best_right = right;
2219 best_len = cost;
2220 if (attrp)
2221 attrp[2] = -1;
2223 continue;
2225 /* ??? Could try to put zero extend into initial right shift,
2226 or even shift a bit left before the right shift. */
2227 /* Determine value of first part of left shift, to get to the
2228 zero extend cut-off point. */
2229 first = width - exact_log2 (lsb2) + right;
2230 if (first >= 0 && right + left - first >= 0)
2232 cost = ext_shift_insns[right] + ext_shift_insns[first] + 1
2233 + ext_shift_insns[right + left - first];
2234 if (cost < best_cost)
2236 best = 1;
2237 best_cost = cost;
2238 best_right = right;
2239 best_len = cost;
2240 if (attrp)
2241 attrp[2] = first;
2246 /* Try to use r0 AND pattern */
2247 for (i = 0; i <= 2; i++)
2249 if (i > right)
2250 break;
2251 if (! CONST_OK_FOR_K08 (mask >> i))
2252 continue;
2253 cost = (i != 0) + 2 + ext_shift_insns[left + i];
2254 if (cost < best_cost)
2256 best = 2;
2257 best_cost = cost;
2258 best_right = i;
2259 best_len = cost - 1;
2262 /* Try to use a scratch register to hold the AND operand. */
2263 can_ext = ((mask << left) & ((unsigned HOST_WIDE_INT) 3 << 30)) == 0;
2264 for (i = 0; i <= 2; i++)
2266 if (i > right)
2267 break;
2268 cost = (i != 0) + (CONST_OK_FOR_I08 (mask >> i) ? 2 : 3)
2269 + (can_ext ? ext_shift_insns : shift_insns)[left + i];
2270 if (cost < best_cost)
2272 best = 4 - can_ext;
2273 best_cost = cost;
2274 best_right = i;
2275 best_len = cost - 1 - ! CONST_OK_FOR_I08 (mask >> i);
2279 if (attrp)
2281 attrp[0] = best_right;
2282 attrp[1] = best_len;
2284 return best;
2287 /* This is used in length attributes of the unnamed instructions
2288 corresponding to shl_and_kind return values of 1 and 2. */
2290 shl_and_length (rtx insn)
2292 rtx set_src, left_rtx, mask_rtx;
2293 int attributes[3];
2295 set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
2296 left_rtx = XEXP (XEXP (set_src, 0), 1);
2297 mask_rtx = XEXP (set_src, 1);
2298 shl_and_kind (left_rtx, mask_rtx, attributes);
2299 return attributes[1];
2302 /* This is used in length attribute of the and_shl_scratch instruction. */
2305 shl_and_scr_length (rtx insn)
2307 rtx set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
2308 int len = shift_insns[INTVAL (XEXP (set_src, 1))];
2309 rtx op = XEXP (set_src, 0);
2310 len += shift_insns[INTVAL (XEXP (op, 1))] + 1;
2311 op = XEXP (XEXP (op, 0), 0);
2312 return len + shift_insns[INTVAL (XEXP (op, 1))];
2315 /* Generate rtl for instructions for which shl_and_kind advised a particular
2316 method of generating them, i.e. returned zero. */
2319 gen_shl_and (rtx dest, rtx left_rtx, rtx mask_rtx, rtx source)
2321 int attributes[3];
2322 unsigned HOST_WIDE_INT mask;
2323 int kind = shl_and_kind (left_rtx, mask_rtx, attributes);
2324 int right, total_shift;
2325 void (*shift_gen_fun) (int, rtx *) = gen_shifty_hi_op;
2327 right = attributes[0];
2328 total_shift = INTVAL (left_rtx) + right;
2329 mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> total_shift;
2330 switch (kind)
2332 default:
2333 return -1;
2334 case 1:
2336 int first = attributes[2];
2337 rtx operands[3];
2339 if (first < 0)
2341 emit_insn ((mask << right) <= 0xff
2342 ? gen_zero_extendqisi2 (dest,
2343 gen_lowpart (QImode, source))
2344 : gen_zero_extendhisi2 (dest,
2345 gen_lowpart (HImode, source)));
2346 source = dest;
2348 if (source != dest)
2349 emit_insn (gen_movsi (dest, source));
2350 operands[0] = dest;
2351 if (right)
2353 operands[2] = GEN_INT (right);
2354 gen_shifty_hi_op (LSHIFTRT, operands);
2356 if (first > 0)
2358 operands[2] = GEN_INT (first);
2359 gen_shifty_hi_op (ASHIFT, operands);
2360 total_shift -= first;
2361 mask <<= first;
2363 if (first >= 0)
2364 emit_insn (mask <= 0xff
2365 ? gen_zero_extendqisi2 (dest, gen_lowpart (QImode, dest))
2366 : gen_zero_extendhisi2 (dest, gen_lowpart (HImode, dest)));
2367 if (total_shift > 0)
2369 operands[2] = GEN_INT (total_shift);
2370 gen_shifty_hi_op (ASHIFT, operands);
2372 break;
2374 case 4:
2375 shift_gen_fun = gen_shifty_op;
2376 case 3:
2377 /* If the topmost bit that matters is set, set the topmost bits
2378 that don't matter. This way, we might be able to get a shorter
2379 signed constant. */
2380 if (mask & ((HOST_WIDE_INT) 1 << (31 - total_shift)))
2381 mask |= (HOST_WIDE_INT) ~0 << (31 - total_shift);
2382 case 2:
2383 /* Don't expand fine-grained when combining, because that will
2384 make the pattern fail. */
2385 if (currently_expanding_to_rtl
2386 || reload_in_progress || reload_completed)
2388 rtx operands[3];
2390 /* Cases 3 and 4 should be handled by this split
2391 only while combining */
2392 if (kind > 2)
2393 abort ();
2394 if (right)
2396 emit_insn (gen_lshrsi3 (dest, source, GEN_INT (right)));
2397 source = dest;
2399 emit_insn (gen_andsi3 (dest, source, GEN_INT (mask)));
2400 if (total_shift)
2402 operands[0] = dest;
2403 operands[1] = dest;
2404 operands[2] = GEN_INT (total_shift);
2405 shift_gen_fun (ASHIFT, operands);
2407 break;
2409 else
2411 int neg = 0;
2412 if (kind != 4 && total_shift < 16)
2414 neg = -ext_shift_amounts[total_shift][1];
2415 if (neg > 0)
2416 neg -= ext_shift_amounts[total_shift][2];
2417 else
2418 neg = 0;
2420 emit_insn (gen_and_shl_scratch (dest, source,
2421 GEN_INT (right),
2422 GEN_INT (mask),
2423 GEN_INT (total_shift + neg),
2424 GEN_INT (neg)));
2425 emit_insn (gen_movsi (dest, dest));
2426 break;
2429 return 0;
2432 /* Try to find a good way to implement the combiner pattern
2433 [(set (match_operand:SI 0 "register_operand" "=r")
2434 (sign_extract:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
2435 (match_operand:SI 2 "const_int_operand" "n")
2436 (match_operand:SI 3 "const_int_operand" "n")
2437 (const_int 0)))
2438 (clobber (reg:SI T_REG))]
2439 LEFT_RTX is operand 2 in the above pattern, and SIZE_RTX is operand 3.
2440 return 0 for simple left / right shift combination.
2441 return 1 for left shift / 8 bit sign extend / left shift.
2442 return 2 for left shift / 16 bit sign extend / left shift.
2443 return 3 for left shift / 8 bit sign extend / shift / sign extend.
2444 return 4 for left shift / 16 bit sign extend / shift / sign extend.
2445 return 5 for left shift / 16 bit sign extend / right shift
2446 return 6 for < 8 bit sign extend / left shift.
2447 return 7 for < 8 bit sign extend / left shift / single right shift.
2448 If COSTP is nonzero, assign the calculated cost to *COSTP. */
2451 shl_sext_kind (rtx left_rtx, rtx size_rtx, int *costp)
2453 int left, size, insize, ext;
2454 int cost = 0, best_cost;
2455 int kind;
2457 left = INTVAL (left_rtx);
2458 size = INTVAL (size_rtx);
2459 insize = size - left;
2460 if (insize <= 0)
2461 abort ();
2462 /* Default to left / right shift. */
2463 kind = 0;
2464 best_cost = shift_insns[32 - insize] + ashiftrt_insns[32 - size];
2465 if (size <= 16)
2467 /* 16 bit shift / sign extend / 16 bit shift */
2468 cost = shift_insns[16 - insize] + 1 + ashiftrt_insns[16 - size];
2469 /* If ashiftrt_insns[16 - size] is 8, this choice will be overridden
2470 below, by alternative 3 or something even better. */
2471 if (cost < best_cost)
2473 kind = 5;
2474 best_cost = cost;
2477 /* Try a plain sign extend between two shifts. */
2478 for (ext = 16; ext >= insize; ext -= 8)
2480 if (ext <= size)
2482 cost = ext_shift_insns[ext - insize] + 1 + shift_insns[size - ext];
2483 if (cost < best_cost)
2485 kind = ext / (unsigned) 8;
2486 best_cost = cost;
2489 /* Check if we can do a sloppy shift with a final signed shift
2490 restoring the sign. */
2491 if (EXT_SHIFT_SIGNED (size - ext))
2492 cost = ext_shift_insns[ext - insize] + ext_shift_insns[size - ext] + 1;
2493 /* If not, maybe it's still cheaper to do the second shift sloppy,
2494 and do a final sign extend? */
2495 else if (size <= 16)
2496 cost = ext_shift_insns[ext - insize] + 1
2497 + ext_shift_insns[size > ext ? size - ext : ext - size] + 1;
2498 else
2499 continue;
2500 if (cost < best_cost)
2502 kind = ext / (unsigned) 8 + 2;
2503 best_cost = cost;
2506 /* Check if we can sign extend in r0 */
2507 if (insize < 8)
2509 cost = 3 + shift_insns[left];
2510 if (cost < best_cost)
2512 kind = 6;
2513 best_cost = cost;
2515 /* Try the same with a final signed shift. */
2516 if (left < 31)
2518 cost = 3 + ext_shift_insns[left + 1] + 1;
2519 if (cost < best_cost)
2521 kind = 7;
2522 best_cost = cost;
2526 if (TARGET_SH3)
2528 /* Try to use a dynamic shift. */
2529 cost = shift_insns[32 - insize] + 1 + SH_DYNAMIC_SHIFT_COST;
2530 if (cost < best_cost)
2532 kind = 0;
2533 best_cost = cost;
2536 if (costp)
2537 *costp = cost;
2538 return kind;
2541 /* Function to be used in the length attribute of the instructions
2542 implementing this pattern. */
2545 shl_sext_length (rtx insn)
2547 rtx set_src, left_rtx, size_rtx;
2548 int cost;
2550 set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
2551 left_rtx = XEXP (XEXP (set_src, 0), 1);
2552 size_rtx = XEXP (set_src, 1);
2553 shl_sext_kind (left_rtx, size_rtx, &cost);
2554 return cost;
2557 /* Generate rtl for this pattern */
2560 gen_shl_sext (rtx dest, rtx left_rtx, rtx size_rtx, rtx source)
2562 int kind;
2563 int left, size, insize, cost;
2564 rtx operands[3];
2566 kind = shl_sext_kind (left_rtx, size_rtx, &cost);
2567 left = INTVAL (left_rtx);
2568 size = INTVAL (size_rtx);
2569 insize = size - left;
2570 switch (kind)
2572 case 1:
2573 case 2:
2574 case 3:
2575 case 4:
2577 int ext = kind & 1 ? 8 : 16;
2578 int shift2 = size - ext;
2580 /* Don't expand fine-grained when combining, because that will
2581 make the pattern fail. */
2582 if (! currently_expanding_to_rtl
2583 && ! reload_in_progress && ! reload_completed)
2585 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
2586 emit_insn (gen_movsi (dest, source));
2587 break;
2589 if (dest != source)
2590 emit_insn (gen_movsi (dest, source));
2591 operands[0] = dest;
2592 if (ext - insize)
2594 operands[2] = GEN_INT (ext - insize);
2595 gen_shifty_hi_op (ASHIFT, operands);
2597 emit_insn (kind & 1
2598 ? gen_extendqisi2 (dest, gen_lowpart (QImode, dest))
2599 : gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
2600 if (kind <= 2)
2602 if (shift2)
2604 operands[2] = GEN_INT (shift2);
2605 gen_shifty_op (ASHIFT, operands);
2608 else
2610 if (shift2 > 0)
2612 if (EXT_SHIFT_SIGNED (shift2))
2614 operands[2] = GEN_INT (shift2 + 1);
2615 gen_shifty_op (ASHIFT, operands);
2616 operands[2] = const1_rtx;
2617 gen_shifty_op (ASHIFTRT, operands);
2618 break;
2620 operands[2] = GEN_INT (shift2);
2621 gen_shifty_hi_op (ASHIFT, operands);
2623 else if (shift2)
2625 operands[2] = GEN_INT (-shift2);
2626 gen_shifty_hi_op (LSHIFTRT, operands);
2628 emit_insn (size <= 8
2629 ? gen_extendqisi2 (dest, gen_lowpart (QImode, dest))
2630 : gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
2632 break;
2634 case 5:
2636 int i = 16 - size;
2637 if (! currently_expanding_to_rtl
2638 && ! reload_in_progress && ! reload_completed)
2639 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
2640 else
2642 operands[0] = dest;
2643 operands[2] = GEN_INT (16 - insize);
2644 gen_shifty_hi_op (ASHIFT, operands);
2645 emit_insn (gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
2647 /* Don't use gen_ashrsi3 because it generates new pseudos. */
2648 while (--i >= 0)
2649 gen_ashift (ASHIFTRT, 1, dest);
2650 break;
2652 case 6:
2653 case 7:
2654 /* Don't expand fine-grained when combining, because that will
2655 make the pattern fail. */
2656 if (! currently_expanding_to_rtl
2657 && ! reload_in_progress && ! reload_completed)
2659 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
2660 emit_insn (gen_movsi (dest, source));
2661 break;
2663 emit_insn (gen_andsi3 (dest, source, GEN_INT ((1 << insize) - 1)));
2664 emit_insn (gen_xorsi3 (dest, dest, GEN_INT (1 << (insize - 1))));
2665 emit_insn (gen_addsi3 (dest, dest, GEN_INT (-1 << (insize - 1))));
2666 operands[0] = dest;
2667 operands[2] = kind == 7 ? GEN_INT (left + 1) : left_rtx;
2668 gen_shifty_op (ASHIFT, operands);
2669 if (kind == 7)
2670 emit_insn (gen_ashrsi3_k (dest, dest, const1_rtx));
2671 break;
2672 default:
2673 return -1;
2675 return 0;
2678 /* Prefix a symbol_ref name with "datalabel". */
2681 gen_datalabel_ref (rtx sym)
2683 if (GET_CODE (sym) == LABEL_REF)
2684 return gen_rtx_CONST (GET_MODE (sym),
2685 gen_rtx_UNSPEC (GET_MODE (sym),
2686 gen_rtvec (1, sym),
2687 UNSPEC_DATALABEL));
2689 if (GET_CODE (sym) != SYMBOL_REF)
2690 abort ();
2692 return sym;
2696 /* The SH cannot load a large constant into a register, constants have to
2697 come from a pc relative load. The reference of a pc relative load
2698 instruction must be less than 1k infront of the instruction. This
2699 means that we often have to dump a constant inside a function, and
2700 generate code to branch around it.
2702 It is important to minimize this, since the branches will slow things
2703 down and make things bigger.
2705 Worst case code looks like:
2707 mov.l L1,rn
2708 bra L2
2710 align
2711 L1: .long value
2715 mov.l L3,rn
2716 bra L4
2718 align
2719 L3: .long value
2723 We fix this by performing a scan before scheduling, which notices which
2724 instructions need to have their operands fetched from the constant table
2725 and builds the table.
2727 The algorithm is:
2729 scan, find an instruction which needs a pcrel move. Look forward, find the
2730 last barrier which is within MAX_COUNT bytes of the requirement.
2731 If there isn't one, make one. Process all the instructions between
2732 the find and the barrier.
2734 In the above example, we can tell that L3 is within 1k of L1, so
2735 the first move can be shrunk from the 3 insn+constant sequence into
2736 just 1 insn, and the constant moved to L3 to make:
2738 mov.l L1,rn
2740 mov.l L3,rn
2741 bra L4
2743 align
2744 L3:.long value
2745 L4:.long value
2747 Then the second move becomes the target for the shortening process. */
2749 typedef struct
2751 rtx value; /* Value in table. */
2752 rtx label; /* Label of value. */
2753 rtx wend; /* End of window. */
2754 enum machine_mode mode; /* Mode of value. */
2756 /* True if this constant is accessed as part of a post-increment
2757 sequence. Note that HImode constants are never accessed in this way. */
2758 bool part_of_sequence_p;
2759 } pool_node;
2761 /* The maximum number of constants that can fit into one pool, since
2762 the pc relative range is 0...1020 bytes and constants are at least 4
2763 bytes long. */
2765 #define MAX_POOL_SIZE (1020/4)
2766 static pool_node pool_vector[MAX_POOL_SIZE];
2767 static int pool_size;
2768 static rtx pool_window_label;
2769 static int pool_window_last;
2771 /* ??? If we need a constant in HImode which is the truncated value of a
2772 constant we need in SImode, we could combine the two entries thus saving
2773 two bytes. Is this common enough to be worth the effort of implementing
2774 it? */
2776 /* ??? This stuff should be done at the same time that we shorten branches.
2777 As it is now, we must assume that all branches are the maximum size, and
2778 this causes us to almost always output constant pools sooner than
2779 necessary. */
2781 /* Add a constant to the pool and return its label. */
2783 static rtx
2784 add_constant (rtx x, enum machine_mode mode, rtx last_value)
2786 int i;
2787 rtx lab, new, ref, newref;
2789 /* First see if we've already got it. */
2790 for (i = 0; i < pool_size; i++)
2792 if (x->code == pool_vector[i].value->code
2793 && mode == pool_vector[i].mode)
2795 if (x->code == CODE_LABEL)
2797 if (XINT (x, 3) != XINT (pool_vector[i].value, 3))
2798 continue;
2800 if (rtx_equal_p (x, pool_vector[i].value))
2802 lab = new = 0;
2803 if (! last_value
2804 || ! i
2805 || ! rtx_equal_p (last_value, pool_vector[i-1].value))
2807 new = gen_label_rtx ();
2808 LABEL_REFS (new) = pool_vector[i].label;
2809 pool_vector[i].label = lab = new;
2811 if (lab && pool_window_label)
2813 newref = gen_rtx_LABEL_REF (VOIDmode, pool_window_label);
2814 ref = pool_vector[pool_window_last].wend;
2815 LABEL_NEXTREF (newref) = ref;
2816 pool_vector[pool_window_last].wend = newref;
2818 if (new)
2819 pool_window_label = new;
2820 pool_window_last = i;
2821 return lab;
2826 /* Need a new one. */
2827 pool_vector[pool_size].value = x;
2828 if (last_value && rtx_equal_p (last_value, pool_vector[pool_size - 1].value))
2830 lab = 0;
2831 pool_vector[pool_size - 1].part_of_sequence_p = true;
2833 else
2834 lab = gen_label_rtx ();
2835 pool_vector[pool_size].mode = mode;
2836 pool_vector[pool_size].label = lab;
2837 pool_vector[pool_size].wend = NULL_RTX;
2838 pool_vector[pool_size].part_of_sequence_p = (lab == 0);
2839 if (lab && pool_window_label)
2841 newref = gen_rtx_LABEL_REF (VOIDmode, pool_window_label);
2842 ref = pool_vector[pool_window_last].wend;
2843 LABEL_NEXTREF (newref) = ref;
2844 pool_vector[pool_window_last].wend = newref;
2846 if (lab)
2847 pool_window_label = lab;
2848 pool_window_last = pool_size;
2849 pool_size++;
2850 return lab;
2853 /* Output the literal table. START, if nonzero, is the first instruction
2854 this table is needed for, and also indicates that there is at least one
2855 casesi_worker_2 instruction; We have to emit the operand3 labels from
2856 these insns at a 4-byte aligned position. BARRIER is the barrier
2857 after which we are to place the table. */
2859 static void
2860 dump_table (rtx start, rtx barrier)
2862 rtx scan = barrier;
2863 int i;
2864 int need_align = 1;
2865 rtx lab, ref;
2866 int have_df = 0;
2868 /* Do two passes, first time dump out the HI sized constants. */
2870 for (i = 0; i < pool_size; i++)
2872 pool_node *p = &pool_vector[i];
2874 if (p->mode == HImode)
2876 if (need_align)
2878 scan = emit_insn_after (gen_align_2 (), scan);
2879 need_align = 0;
2881 for (lab = p->label; lab; lab = LABEL_REFS (lab))
2882 scan = emit_label_after (lab, scan);
2883 scan = emit_insn_after (gen_consttable_2 (p->value, const0_rtx),
2884 scan);
2885 for (ref = p->wend; ref; ref = LABEL_NEXTREF (ref))
2887 lab = XEXP (ref, 0);
2888 scan = emit_insn_after (gen_consttable_window_end (lab), scan);
2891 else if (p->mode == DFmode)
2892 have_df = 1;
2895 need_align = 1;
2897 if (start)
2899 scan = emit_insn_after (gen_align_4 (), scan);
2900 need_align = 0;
2901 for (; start != barrier; start = NEXT_INSN (start))
2902 if (GET_CODE (start) == INSN
2903 && recog_memoized (start) == CODE_FOR_casesi_worker_2)
2905 rtx src = SET_SRC (XVECEXP (PATTERN (start), 0, 0));
2906 rtx lab = XEXP (XVECEXP (src, 0, 3), 0);
2908 scan = emit_label_after (lab, scan);
2911 if (TARGET_FMOVD && TARGET_ALIGN_DOUBLE && have_df)
2913 rtx align_insn = NULL_RTX;
2915 scan = emit_label_after (gen_label_rtx (), scan);
2916 scan = emit_insn_after (gen_align_log (GEN_INT (3)), scan);
2917 need_align = 0;
2919 for (i = 0; i < pool_size; i++)
2921 pool_node *p = &pool_vector[i];
2923 switch (p->mode)
2925 case HImode:
2926 break;
2927 case SImode:
2928 case SFmode:
2929 if (align_insn && !p->part_of_sequence_p)
2931 for (lab = p->label; lab; lab = LABEL_REFS (lab))
2932 emit_label_before (lab, align_insn);
2933 emit_insn_before (gen_consttable_4 (p->value, const0_rtx),
2934 align_insn);
2935 for (ref = p->wend; ref; ref = LABEL_NEXTREF (ref))
2937 lab = XEXP (ref, 0);
2938 emit_insn_before (gen_consttable_window_end (lab),
2939 align_insn);
2941 delete_insn (align_insn);
2942 align_insn = NULL_RTX;
2943 continue;
2945 else
2947 for (lab = p->label; lab; lab = LABEL_REFS (lab))
2948 scan = emit_label_after (lab, scan);
2949 scan = emit_insn_after (gen_consttable_4 (p->value,
2950 const0_rtx), scan);
2951 need_align = ! need_align;
2953 break;
2954 case DFmode:
2955 if (need_align)
2957 scan = emit_insn_after (gen_align_log (GEN_INT (3)), scan);
2958 align_insn = scan;
2959 need_align = 0;
2961 case DImode:
2962 for (lab = p->label; lab; lab = LABEL_REFS (lab))
2963 scan = emit_label_after (lab, scan);
2964 scan = emit_insn_after (gen_consttable_8 (p->value, const0_rtx),
2965 scan);
2966 break;
2967 default:
2968 abort ();
2969 break;
2972 if (p->mode != HImode)
2974 for (ref = p->wend; ref; ref = LABEL_NEXTREF (ref))
2976 lab = XEXP (ref, 0);
2977 scan = emit_insn_after (gen_consttable_window_end (lab),
2978 scan);
2983 pool_size = 0;
2986 for (i = 0; i < pool_size; i++)
2988 pool_node *p = &pool_vector[i];
2990 switch (p->mode)
2992 case HImode:
2993 break;
2994 case SImode:
2995 case SFmode:
2996 if (need_align)
2998 need_align = 0;
2999 scan = emit_label_after (gen_label_rtx (), scan);
3000 scan = emit_insn_after (gen_align_4 (), scan);
3002 for (lab = p->label; lab; lab = LABEL_REFS (lab))
3003 scan = emit_label_after (lab, scan);
3004 scan = emit_insn_after (gen_consttable_4 (p->value, const0_rtx),
3005 scan);
3006 break;
3007 case DFmode:
3008 case DImode:
3009 if (need_align)
3011 need_align = 0;
3012 scan = emit_label_after (gen_label_rtx (), scan);
3013 scan = emit_insn_after (gen_align_4 (), scan);
3015 for (lab = p->label; lab; lab = LABEL_REFS (lab))
3016 scan = emit_label_after (lab, scan);
3017 scan = emit_insn_after (gen_consttable_8 (p->value, const0_rtx),
3018 scan);
3019 break;
3020 default:
3021 abort ();
3022 break;
3025 if (p->mode != HImode)
3027 for (ref = p->wend; ref; ref = LABEL_NEXTREF (ref))
3029 lab = XEXP (ref, 0);
3030 scan = emit_insn_after (gen_consttable_window_end (lab), scan);
3035 scan = emit_insn_after (gen_consttable_end (), scan);
3036 scan = emit_barrier_after (scan);
3037 pool_size = 0;
3038 pool_window_label = NULL_RTX;
3039 pool_window_last = 0;
3042 /* Return nonzero if constant would be an ok source for a
3043 mov.w instead of a mov.l. */
3045 static int
3046 hi_const (rtx src)
3048 return (GET_CODE (src) == CONST_INT
3049 && INTVAL (src) >= -32768
3050 && INTVAL (src) <= 32767);
3053 /* Nonzero if the insn is a move instruction which needs to be fixed. */
3055 /* ??? For a DImode/DFmode moves, we don't need to fix it if each half of the
3056 CONST_DOUBLE input value is CONST_OK_FOR_I08. For a SFmode move, we don't
3057 need to fix it if the input value is CONST_OK_FOR_I08. */
3059 static int
3060 broken_move (rtx insn)
3062 if (GET_CODE (insn) == INSN)
3064 rtx pat = PATTERN (insn);
3065 if (GET_CODE (pat) == PARALLEL)
3066 pat = XVECEXP (pat, 0, 0);
3067 if (GET_CODE (pat) == SET
3068 /* We can load any 8 bit value if we don't care what the high
3069 order bits end up as. */
3070 && GET_MODE (SET_DEST (pat)) != QImode
3071 && (CONSTANT_P (SET_SRC (pat))
3072 /* Match mova_const. */
3073 || (GET_CODE (SET_SRC (pat)) == UNSPEC
3074 && XINT (SET_SRC (pat), 1) == UNSPEC_MOVA
3075 && GET_CODE (XVECEXP (SET_SRC (pat), 0, 0)) == CONST))
3076 && ! (TARGET_SH2E
3077 && GET_CODE (SET_SRC (pat)) == CONST_DOUBLE
3078 && (fp_zero_operand (SET_SRC (pat))
3079 || fp_one_operand (SET_SRC (pat)))
3080 /* ??? If this is a -m4 or -m4-single compilation, in general
3081 we don't know the current setting of fpscr, so disable fldi.
3082 There is an exception if this was a register-register move
3083 before reload - and hence it was ascertained that we have
3084 single precision setting - and in a post-reload optimization
3085 we changed this to do a constant load. In that case
3086 we don't have an r0 clobber, hence we must use fldi. */
3087 && (! TARGET_SH4 || TARGET_FMOVD
3088 || (GET_CODE (XEXP (XVECEXP (PATTERN (insn), 0, 2), 0))
3089 == SCRATCH))
3090 && GET_CODE (SET_DEST (pat)) == REG
3091 && FP_REGISTER_P (REGNO (SET_DEST (pat))))
3092 && ! (TARGET_SH2A
3093 && GET_MODE (SET_DEST (pat)) == SImode
3094 && GET_CODE (SET_SRC (pat)) == CONST_INT
3095 && CONST_OK_FOR_I20 (INTVAL (SET_SRC (pat))))
3096 && (GET_CODE (SET_SRC (pat)) != CONST_INT
3097 || ! CONST_OK_FOR_I08 (INTVAL (SET_SRC (pat)))))
3098 return 1;
3101 return 0;
3104 static int
3105 mova_p (rtx insn)
3107 return (GET_CODE (insn) == INSN
3108 && GET_CODE (PATTERN (insn)) == SET
3109 && GET_CODE (SET_SRC (PATTERN (insn))) == UNSPEC
3110 && XINT (SET_SRC (PATTERN (insn)), 1) == UNSPEC_MOVA
3111 /* Don't match mova_const. */
3112 && GET_CODE (XVECEXP (SET_SRC (PATTERN (insn)), 0, 0)) == LABEL_REF);
3115 /* Fix up a mova from a switch that went out of range. */
3116 static void
3117 fixup_mova (rtx mova)
3119 if (! flag_pic)
3121 SET_SRC (PATTERN (mova)) = XVECEXP (SET_SRC (PATTERN (mova)), 0, 0);
3122 INSN_CODE (mova) = -1;
3124 else
3126 rtx worker = mova;
3127 rtx lab = gen_label_rtx ();
3128 rtx wpat, wpat0, wpat1, wsrc, diff;
3132 worker = NEXT_INSN (worker);
3133 if (! worker
3134 || GET_CODE (worker) == CODE_LABEL
3135 || GET_CODE (worker) == JUMP_INSN)
3136 abort ();
3137 } while (recog_memoized (worker) != CODE_FOR_casesi_worker_1);
3138 wpat = PATTERN (worker);
3139 wpat0 = XVECEXP (wpat, 0, 0);
3140 wpat1 = XVECEXP (wpat, 0, 1);
3141 wsrc = SET_SRC (wpat0);
3142 PATTERN (worker) = (gen_casesi_worker_2
3143 (SET_DEST (wpat0), XVECEXP (wsrc, 0, 1),
3144 XEXP (XVECEXP (wsrc, 0, 2), 0), lab,
3145 XEXP (wpat1, 0)));
3146 INSN_CODE (worker) = -1;
3147 diff = gen_rtx_MINUS (Pmode, XVECEXP (SET_SRC (PATTERN (mova)), 0, 0),
3148 gen_rtx_LABEL_REF (Pmode, lab));
3149 diff = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, diff), UNSPEC_PIC);
3150 SET_SRC (PATTERN (mova)) = gen_rtx_CONST (Pmode, diff);
3151 INSN_CODE (mova) = -1;
3155 /* Find the last barrier from insn FROM which is close enough to hold the
3156 constant pool. If we can't find one, then create one near the end of
3157 the range. */
3159 static rtx
3160 find_barrier (int num_mova, rtx mova, rtx from)
3162 int count_si = 0;
3163 int count_hi = 0;
3164 int found_hi = 0;
3165 int found_si = 0;
3166 int found_di = 0;
3167 int hi_align = 2;
3168 int si_align = 2;
3169 int leading_mova = num_mova;
3170 rtx barrier_before_mova = 0, found_barrier = 0, good_barrier = 0;
3171 int si_limit;
3172 int hi_limit;
3174 /* For HImode: range is 510, add 4 because pc counts from address of
3175 second instruction after this one, subtract 2 for the jump instruction
3176 that we may need to emit before the table, subtract 2 for the instruction
3177 that fills the jump delay slot (in very rare cases, reorg will take an
3178 instruction from after the constant pool or will leave the delay slot
3179 empty). This gives 510.
3180 For SImode: range is 1020, add 4 because pc counts from address of
3181 second instruction after this one, subtract 2 in case pc is 2 byte
3182 aligned, subtract 2 for the jump instruction that we may need to emit
3183 before the table, subtract 2 for the instruction that fills the jump
3184 delay slot. This gives 1018. */
3186 /* The branch will always be shortened now that the reference address for
3187 forward branches is the successor address, thus we need no longer make
3188 adjustments to the [sh]i_limit for -O0. */
3190 si_limit = 1018;
3191 hi_limit = 510;
3193 while (from && count_si < si_limit && count_hi < hi_limit)
3195 int inc = get_attr_length (from);
3196 int new_align = 1;
3198 if (GET_CODE (from) == CODE_LABEL)
3200 if (optimize)
3201 new_align = 1 << label_to_alignment (from);
3202 else if (GET_CODE (prev_nonnote_insn (from)) == BARRIER)
3203 new_align = 1 << barrier_align (from);
3204 else
3205 new_align = 1;
3206 inc = 0;
3209 if (GET_CODE (from) == BARRIER)
3212 found_barrier = from;
3214 /* If we are at the end of the function, or in front of an alignment
3215 instruction, we need not insert an extra alignment. We prefer
3216 this kind of barrier. */
3217 if (barrier_align (from) > 2)
3218 good_barrier = from;
3221 if (broken_move (from))
3223 rtx pat, src, dst;
3224 enum machine_mode mode;
3226 pat = PATTERN (from);
3227 if (GET_CODE (pat) == PARALLEL)
3228 pat = XVECEXP (pat, 0, 0);
3229 src = SET_SRC (pat);
3230 dst = SET_DEST (pat);
3231 mode = GET_MODE (dst);
3233 /* We must explicitly check the mode, because sometimes the
3234 front end will generate code to load unsigned constants into
3235 HImode targets without properly sign extending them. */
3236 if (mode == HImode
3237 || (mode == SImode && hi_const (src) && REGNO (dst) != FPUL_REG))
3239 found_hi += 2;
3240 /* We put the short constants before the long constants, so
3241 we must count the length of short constants in the range
3242 for the long constants. */
3243 /* ??? This isn't optimal, but is easy to do. */
3244 si_limit -= 2;
3246 else
3248 /* We dump DF/DI constants before SF/SI ones, because
3249 the limit is the same, but the alignment requirements
3250 are higher. We may waste up to 4 additional bytes
3251 for alignment, and the DF/DI constant may have
3252 another SF/SI constant placed before it. */
3253 if (TARGET_SHCOMPACT
3254 && ! found_di
3255 && (mode == DFmode || mode == DImode))
3257 found_di = 1;
3258 si_limit -= 8;
3260 while (si_align > 2 && found_si + si_align - 2 > count_si)
3261 si_align >>= 1;
3262 if (found_si > count_si)
3263 count_si = found_si;
3264 found_si += GET_MODE_SIZE (mode);
3265 if (num_mova)
3266 si_limit -= GET_MODE_SIZE (mode);
3269 /* See the code in machine_dependent_reorg, which has a similar if
3270 statement that generates a new mova insn in many cases. */
3271 if (GET_CODE (dst) == REG && FP_ANY_REGISTER_P (REGNO (dst)))
3272 inc += 2;
3275 if (mova_p (from))
3277 if (! num_mova++)
3279 leading_mova = 0;
3280 mova = from;
3281 barrier_before_mova = good_barrier ? good_barrier : found_barrier;
3283 if (found_si > count_si)
3284 count_si = found_si;
3286 else if (GET_CODE (from) == JUMP_INSN
3287 && (GET_CODE (PATTERN (from)) == ADDR_VEC
3288 || GET_CODE (PATTERN (from)) == ADDR_DIFF_VEC))
3290 if (num_mova)
3291 num_mova--;
3292 if (barrier_align (next_real_insn (from)) == align_jumps_log)
3294 /* We have just passed the barrier in front of the
3295 ADDR_DIFF_VEC, which is stored in found_barrier. Since
3296 the ADDR_DIFF_VEC is accessed as data, just like our pool
3297 constants, this is a good opportunity to accommodate what
3298 we have gathered so far.
3299 If we waited any longer, we could end up at a barrier in
3300 front of code, which gives worse cache usage for separated
3301 instruction / data caches. */
3302 good_barrier = found_barrier;
3303 break;
3305 else
3307 rtx body = PATTERN (from);
3308 inc = XVECLEN (body, 1) * GET_MODE_SIZE (GET_MODE (body));
3311 /* For the SH1, we generate alignments even after jumps-around-jumps. */
3312 else if (GET_CODE (from) == JUMP_INSN
3313 && ! TARGET_SH2
3314 && ! TARGET_SMALLCODE)
3315 new_align = 4;
3317 if (found_si)
3319 count_si += inc;
3320 if (new_align > si_align)
3322 si_limit -= (count_si - 1) & (new_align - si_align);
3323 si_align = new_align;
3325 count_si = (count_si + new_align - 1) & -new_align;
3327 if (found_hi)
3329 count_hi += inc;
3330 if (new_align > hi_align)
3332 hi_limit -= (count_hi - 1) & (new_align - hi_align);
3333 hi_align = new_align;
3335 count_hi = (count_hi + new_align - 1) & -new_align;
3337 from = NEXT_INSN (from);
3340 if (num_mova)
3342 if (leading_mova)
3344 /* Try as we might, the leading mova is out of range. Change
3345 it into a load (which will become a pcload) and retry. */
3346 fixup_mova (mova);
3347 return find_barrier (0, 0, mova);
3349 else
3351 /* Insert the constant pool table before the mova instruction,
3352 to prevent the mova label reference from going out of range. */
3353 from = mova;
3354 good_barrier = found_barrier = barrier_before_mova;
3358 if (found_barrier)
3360 if (good_barrier && next_real_insn (found_barrier))
3361 found_barrier = good_barrier;
3363 else
3365 /* We didn't find a barrier in time to dump our stuff,
3366 so we'll make one. */
3367 rtx label = gen_label_rtx ();
3369 /* If we exceeded the range, then we must back up over the last
3370 instruction we looked at. Otherwise, we just need to undo the
3371 NEXT_INSN at the end of the loop. */
3372 if (count_hi > hi_limit || count_si > si_limit)
3373 from = PREV_INSN (PREV_INSN (from));
3374 else
3375 from = PREV_INSN (from);
3377 /* Walk back to be just before any jump or label.
3378 Putting it before a label reduces the number of times the branch
3379 around the constant pool table will be hit. Putting it before
3380 a jump makes it more likely that the bra delay slot will be
3381 filled. */
3382 while (GET_CODE (from) == JUMP_INSN || GET_CODE (from) == NOTE
3383 || GET_CODE (from) == CODE_LABEL)
3384 from = PREV_INSN (from);
3386 from = emit_jump_insn_after (gen_jump (label), from);
3387 JUMP_LABEL (from) = label;
3388 LABEL_NUSES (label) = 1;
3389 found_barrier = emit_barrier_after (from);
3390 emit_label_after (label, found_barrier);
3393 return found_barrier;
3396 /* If the instruction INSN is implemented by a special function, and we can
3397 positively find the register that is used to call the sfunc, and this
3398 register is not used anywhere else in this instruction - except as the
3399 destination of a set, return this register; else, return 0. */
3401 sfunc_uses_reg (rtx insn)
3403 int i;
3404 rtx pattern, part, reg_part, reg;
3406 if (GET_CODE (insn) != INSN)
3407 return 0;
3408 pattern = PATTERN (insn);
3409 if (GET_CODE (pattern) != PARALLEL || get_attr_type (insn) != TYPE_SFUNC)
3410 return 0;
3412 for (reg_part = 0, i = XVECLEN (pattern, 0) - 1; i >= 1; i--)
3414 part = XVECEXP (pattern, 0, i);
3415 if (GET_CODE (part) == USE && GET_MODE (XEXP (part, 0)) == SImode)
3416 reg_part = part;
3418 if (! reg_part)
3419 return 0;
3420 reg = XEXP (reg_part, 0);
3421 for (i = XVECLEN (pattern, 0) - 1; i >= 0; i--)
3423 part = XVECEXP (pattern, 0, i);
3424 if (part == reg_part || GET_CODE (part) == CLOBBER)
3425 continue;
3426 if (reg_mentioned_p (reg, ((GET_CODE (part) == SET
3427 && GET_CODE (SET_DEST (part)) == REG)
3428 ? SET_SRC (part) : part)))
3429 return 0;
3431 return reg;
3434 /* See if the only way in which INSN uses REG is by calling it, or by
3435 setting it while calling it. Set *SET to a SET rtx if the register
3436 is set by INSN. */
3438 static int
3439 noncall_uses_reg (rtx reg, rtx insn, rtx *set)
3441 rtx pattern, reg2;
3443 *set = NULL_RTX;
3445 reg2 = sfunc_uses_reg (insn);
3446 if (reg2 && REGNO (reg2) == REGNO (reg))
3448 pattern = single_set (insn);
3449 if (pattern
3450 && GET_CODE (SET_DEST (pattern)) == REG
3451 && REGNO (reg) == REGNO (SET_DEST (pattern)))
3452 *set = pattern;
3453 return 0;
3455 if (GET_CODE (insn) != CALL_INSN)
3457 /* We don't use rtx_equal_p because we don't care if the mode is
3458 different. */
3459 pattern = single_set (insn);
3460 if (pattern
3461 && GET_CODE (SET_DEST (pattern)) == REG
3462 && REGNO (reg) == REGNO (SET_DEST (pattern)))
3464 rtx par, part;
3465 int i;
3467 *set = pattern;
3468 par = PATTERN (insn);
3469 if (GET_CODE (par) == PARALLEL)
3470 for (i = XVECLEN (par, 0) - 1; i >= 0; i--)
3472 part = XVECEXP (par, 0, i);
3473 if (GET_CODE (part) != SET && reg_mentioned_p (reg, part))
3474 return 1;
3476 return reg_mentioned_p (reg, SET_SRC (pattern));
3479 return 1;
3482 pattern = PATTERN (insn);
3484 if (GET_CODE (pattern) == PARALLEL)
3486 int i;
3488 for (i = XVECLEN (pattern, 0) - 1; i >= 1; i--)
3489 if (reg_mentioned_p (reg, XVECEXP (pattern, 0, i)))
3490 return 1;
3491 pattern = XVECEXP (pattern, 0, 0);
3494 if (GET_CODE (pattern) == SET)
3496 if (reg_mentioned_p (reg, SET_DEST (pattern)))
3498 /* We don't use rtx_equal_p, because we don't care if the
3499 mode is different. */
3500 if (GET_CODE (SET_DEST (pattern)) != REG
3501 || REGNO (reg) != REGNO (SET_DEST (pattern)))
3502 return 1;
3504 *set = pattern;
3507 pattern = SET_SRC (pattern);
3510 if (GET_CODE (pattern) != CALL
3511 || GET_CODE (XEXP (pattern, 0)) != MEM
3512 || ! rtx_equal_p (reg, XEXP (XEXP (pattern, 0), 0)))
3513 return 1;
3515 return 0;
3518 /* Given a X, a pattern of an insn or a part of it, return a mask of used
3519 general registers. Bits 0..15 mean that the respective registers
3520 are used as inputs in the instruction. Bits 16..31 mean that the
3521 registers 0..15, respectively, are used as outputs, or are clobbered.
3522 IS_DEST should be set to 16 if X is the destination of a SET, else to 0. */
3524 regs_used (rtx x, int is_dest)
3526 enum rtx_code code;
3527 const char *fmt;
3528 int i, used = 0;
3530 if (! x)
3531 return used;
3532 code = GET_CODE (x);
3533 switch (code)
3535 case REG:
3536 if (REGNO (x) < 16)
3537 return (((1 << HARD_REGNO_NREGS (0, GET_MODE (x))) - 1)
3538 << (REGNO (x) + is_dest));
3539 return 0;
3540 case SUBREG:
3542 rtx y = SUBREG_REG (x);
3544 if (GET_CODE (y) != REG)
3545 break;
3546 if (REGNO (y) < 16)
3547 return (((1 << HARD_REGNO_NREGS (0, GET_MODE (x))) - 1)
3548 << (REGNO (y) +
3549 subreg_regno_offset (REGNO (y),
3550 GET_MODE (y),
3551 SUBREG_BYTE (x),
3552 GET_MODE (x)) + is_dest));
3553 return 0;
3555 case SET:
3556 return regs_used (SET_SRC (x), 0) | regs_used (SET_DEST (x), 16);
3557 case RETURN:
3558 /* If there was a return value, it must have been indicated with USE. */
3559 return 0x00ffff00;
3560 case CLOBBER:
3561 is_dest = 1;
3562 break;
3563 case MEM:
3564 is_dest = 0;
3565 break;
3566 case CALL:
3567 used |= 0x00ff00f0;
3568 break;
3569 default:
3570 break;
3573 fmt = GET_RTX_FORMAT (code);
3575 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
3577 if (fmt[i] == 'E')
3579 register int j;
3580 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
3581 used |= regs_used (XVECEXP (x, i, j), is_dest);
3583 else if (fmt[i] == 'e')
3584 used |= regs_used (XEXP (x, i), is_dest);
3586 return used;
3589 /* Create an instruction that prevents redirection of a conditional branch
3590 to the destination of the JUMP with address ADDR.
3591 If the branch needs to be implemented as an indirect jump, try to find
3592 a scratch register for it.
3593 If NEED_BLOCK is 0, don't do anything unless we need a scratch register.
3594 If any preceding insn that doesn't fit into a delay slot is good enough,
3595 pass 1. Pass 2 if a definite blocking insn is needed.
3596 -1 is used internally to avoid deep recursion.
3597 If a blocking instruction is made or recognized, return it. */
3599 static rtx
3600 gen_block_redirect (rtx jump, int addr, int need_block)
3602 int dead = 0;
3603 rtx prev = prev_nonnote_insn (jump);
3604 rtx dest;
3606 /* First, check if we already have an instruction that satisfies our need. */
3607 if (prev && GET_CODE (prev) == INSN && ! INSN_DELETED_P (prev))
3609 if (INSN_CODE (prev) == CODE_FOR_indirect_jump_scratch)
3610 return prev;
3611 if (GET_CODE (PATTERN (prev)) == USE
3612 || GET_CODE (PATTERN (prev)) == CLOBBER
3613 || get_attr_in_delay_slot (prev) == IN_DELAY_SLOT_YES)
3614 prev = jump;
3615 else if ((need_block &= ~1) < 0)
3616 return prev;
3617 else if (recog_memoized (prev) == CODE_FOR_block_branch_redirect)
3618 need_block = 0;
3620 if (GET_CODE (PATTERN (jump)) == RETURN)
3622 if (! need_block)
3623 return prev;
3624 /* Reorg even does nasty things with return insns that cause branches
3625 to go out of range - see find_end_label and callers. */
3626 return emit_insn_before (gen_block_branch_redirect (const0_rtx) , jump);
3628 /* We can't use JUMP_LABEL here because it might be undefined
3629 when not optimizing. */
3630 dest = XEXP (SET_SRC (PATTERN (jump)), 0);
3631 /* If the branch is out of range, try to find a scratch register for it. */
3632 if (optimize
3633 && (INSN_ADDRESSES (INSN_UID (dest)) - addr + (unsigned) 4092
3634 > 4092 + 4098))
3636 rtx scan;
3637 /* Don't look for the stack pointer as a scratch register,
3638 it would cause trouble if an interrupt occurred. */
3639 unsigned try = 0x7fff, used;
3640 int jump_left = flag_expensive_optimizations + 1;
3642 /* It is likely that the most recent eligible instruction is wanted for
3643 the delay slot. Therefore, find out which registers it uses, and
3644 try to avoid using them. */
3646 for (scan = jump; (scan = PREV_INSN (scan)); )
3648 enum rtx_code code;
3650 if (INSN_DELETED_P (scan))
3651 continue;
3652 code = GET_CODE (scan);
3653 if (code == CODE_LABEL || code == JUMP_INSN)
3654 break;
3655 if (code == INSN
3656 && GET_CODE (PATTERN (scan)) != USE
3657 && GET_CODE (PATTERN (scan)) != CLOBBER
3658 && get_attr_in_delay_slot (scan) == IN_DELAY_SLOT_YES)
3660 try &= ~regs_used (PATTERN (scan), 0);
3661 break;
3664 for (used = dead = 0, scan = JUMP_LABEL (jump);
3665 (scan = NEXT_INSN (scan)); )
3667 enum rtx_code code;
3669 if (INSN_DELETED_P (scan))
3670 continue;
3671 code = GET_CODE (scan);
3672 if (INSN_P (scan))
3674 used |= regs_used (PATTERN (scan), 0);
3675 if (code == CALL_INSN)
3676 used |= regs_used (CALL_INSN_FUNCTION_USAGE (scan), 0);
3677 dead |= (used >> 16) & ~used;
3678 if (dead & try)
3680 dead &= try;
3681 break;
3683 if (code == JUMP_INSN)
3685 if (jump_left-- && simplejump_p (scan))
3686 scan = JUMP_LABEL (scan);
3687 else
3688 break;
3692 /* Mask out the stack pointer again, in case it was
3693 the only 'free' register we have found. */
3694 dead &= 0x7fff;
3696 /* If the immediate destination is still in range, check for possible
3697 threading with a jump beyond the delay slot insn.
3698 Don't check if we are called recursively; the jump has been or will be
3699 checked in a different invocation then. */
3701 else if (optimize && need_block >= 0)
3703 rtx next = next_active_insn (next_active_insn (dest));
3704 if (next && GET_CODE (next) == JUMP_INSN
3705 && GET_CODE (PATTERN (next)) == SET
3706 && recog_memoized (next) == CODE_FOR_jump_compact)
3708 dest = JUMP_LABEL (next);
3709 if (dest
3710 && (INSN_ADDRESSES (INSN_UID (dest)) - addr + (unsigned) 4092
3711 > 4092 + 4098))
3712 gen_block_redirect (next, INSN_ADDRESSES (INSN_UID (next)), -1);
3716 if (dead)
3718 rtx reg = gen_rtx_REG (SImode, exact_log2 (dead & -dead));
3720 /* It would be nice if we could convert the jump into an indirect
3721 jump / far branch right now, and thus exposing all constituent
3722 instructions to further optimization. However, reorg uses
3723 simplejump_p to determine if there is an unconditional jump where
3724 it should try to schedule instructions from the target of the
3725 branch; simplejump_p fails for indirect jumps even if they have
3726 a JUMP_LABEL. */
3727 rtx insn = emit_insn_before (gen_indirect_jump_scratch
3728 (reg, GEN_INT (INSN_UID (JUMP_LABEL (jump))))
3729 , jump);
3730 /* ??? We would like this to have the scope of the jump, but that
3731 scope will change when a delay slot insn of an inner scope is added.
3732 Hence, after delay slot scheduling, we'll have to expect
3733 NOTE_INSN_BLOCK_END notes between the indirect_jump_scratch and
3734 the jump. */
3736 INSN_LOCATOR (insn) = INSN_LOCATOR (jump);
3737 INSN_CODE (insn) = CODE_FOR_indirect_jump_scratch;
3738 return insn;
3740 else if (need_block)
3741 /* We can't use JUMP_LABEL here because it might be undefined
3742 when not optimizing. */
3743 return emit_insn_before (gen_block_branch_redirect
3744 (GEN_INT (INSN_UID (XEXP (SET_SRC (PATTERN (jump)), 0))))
3745 , jump);
3746 return prev;
3749 #define CONDJUMP_MIN -252
3750 #define CONDJUMP_MAX 262
3751 struct far_branch
3753 /* A label (to be placed) in front of the jump
3754 that jumps to our ultimate destination. */
3755 rtx near_label;
3756 /* Where we are going to insert it if we cannot move the jump any farther,
3757 or the jump itself if we have picked up an existing jump. */
3758 rtx insert_place;
3759 /* The ultimate destination. */
3760 rtx far_label;
3761 struct far_branch *prev;
3762 /* If the branch has already been created, its address;
3763 else the address of its first prospective user. */
3764 int address;
3767 static void gen_far_branch (struct far_branch *);
3768 enum mdep_reorg_phase_e mdep_reorg_phase;
3769 static void
3770 gen_far_branch (struct far_branch *bp)
3772 rtx insn = bp->insert_place;
3773 rtx jump;
3774 rtx label = gen_label_rtx ();
3776 emit_label_after (label, insn);
3777 if (bp->far_label)
3779 jump = emit_jump_insn_after (gen_jump (bp->far_label), insn);
3780 LABEL_NUSES (bp->far_label)++;
3782 else
3783 jump = emit_jump_insn_after (gen_return (), insn);
3784 /* Emit a barrier so that reorg knows that any following instructions
3785 are not reachable via a fall-through path.
3786 But don't do this when not optimizing, since we wouldn't suppress the
3787 alignment for the barrier then, and could end up with out-of-range
3788 pc-relative loads. */
3789 if (optimize)
3790 emit_barrier_after (jump);
3791 emit_label_after (bp->near_label, insn);
3792 JUMP_LABEL (jump) = bp->far_label;
3793 if (! invert_jump (insn, label, 1))
3794 abort ();
3795 /* If we are branching around a jump (rather than a return), prevent
3796 reorg from using an insn from the jump target as the delay slot insn -
3797 when reorg did this, it pessimized code (we rather hide the delay slot)
3798 and it could cause branches to go out of range. */
3799 if (bp->far_label)
3800 (emit_insn_after
3801 (gen_stuff_delay_slot
3802 (GEN_INT (INSN_UID (XEXP (SET_SRC (PATTERN (jump)), 0))),
3803 GEN_INT (recog_memoized (insn) == CODE_FOR_branch_false)),
3804 insn));
3805 /* Prevent reorg from undoing our splits. */
3806 gen_block_redirect (jump, bp->address += 2, 2);
3809 /* Fix up ADDR_DIFF_VECs. */
3810 void
3811 fixup_addr_diff_vecs (rtx first)
3813 rtx insn;
3815 for (insn = first; insn; insn = NEXT_INSN (insn))
3817 rtx vec_lab, pat, prev, prevpat, x, braf_label;
3819 if (GET_CODE (insn) != JUMP_INSN
3820 || GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
3821 continue;
3822 pat = PATTERN (insn);
3823 vec_lab = XEXP (XEXP (pat, 0), 0);
3825 /* Search the matching casesi_jump_2. */
3826 for (prev = vec_lab; ; prev = PREV_INSN (prev))
3828 if (GET_CODE (prev) != JUMP_INSN)
3829 continue;
3830 prevpat = PATTERN (prev);
3831 if (GET_CODE (prevpat) != PARALLEL || XVECLEN (prevpat, 0) != 2)
3832 continue;
3833 x = XVECEXP (prevpat, 0, 1);
3834 if (GET_CODE (x) != USE)
3835 continue;
3836 x = XEXP (x, 0);
3837 if (GET_CODE (x) == LABEL_REF && XEXP (x, 0) == vec_lab)
3838 break;
3840 /* FIXME: This is a bug in the optimizer, but it seems harmless
3841 to just avoid panicing. */
3842 if (!prev)
3843 continue;
3845 /* Emit the reference label of the braf where it belongs, right after
3846 the casesi_jump_2 (i.e. braf). */
3847 braf_label = XEXP (XEXP (SET_SRC (XVECEXP (prevpat, 0, 0)), 1), 0);
3848 emit_label_after (braf_label, prev);
3850 /* Fix up the ADDR_DIF_VEC to be relative
3851 to the reference address of the braf. */
3852 XEXP (XEXP (pat, 0), 0) = braf_label;
3856 /* BARRIER_OR_LABEL is either a BARRIER or a CODE_LABEL immediately following
3857 a barrier. Return the base 2 logarithm of the desired alignment. */
3859 barrier_align (rtx barrier_or_label)
3861 rtx next = next_real_insn (barrier_or_label), pat, prev;
3862 int slot, credit, jump_to_next = 0;
3864 if (! next)
3865 return 0;
3867 pat = PATTERN (next);
3869 if (GET_CODE (pat) == ADDR_DIFF_VEC)
3870 return 2;
3872 if (GET_CODE (pat) == UNSPEC_VOLATILE && XINT (pat, 1) == UNSPECV_ALIGN)
3873 /* This is a barrier in front of a constant table. */
3874 return 0;
3876 prev = prev_real_insn (barrier_or_label);
3877 if (GET_CODE (PATTERN (prev)) == ADDR_DIFF_VEC)
3879 pat = PATTERN (prev);
3880 /* If this is a very small table, we want to keep the alignment after
3881 the table to the minimum for proper code alignment. */
3882 return ((TARGET_SMALLCODE
3883 || ((unsigned) XVECLEN (pat, 1) * GET_MODE_SIZE (GET_MODE (pat))
3884 <= (unsigned) 1 << (CACHE_LOG - 2)))
3885 ? 1 << TARGET_SHMEDIA : align_jumps_log);
3888 if (TARGET_SMALLCODE)
3889 return 0;
3891 if (! TARGET_SH2 || ! optimize)
3892 return align_jumps_log;
3894 /* When fixing up pcloads, a constant table might be inserted just before
3895 the basic block that ends with the barrier. Thus, we can't trust the
3896 instruction lengths before that. */
3897 if (mdep_reorg_phase > SH_FIXUP_PCLOAD)
3899 /* Check if there is an immediately preceding branch to the insn beyond
3900 the barrier. We must weight the cost of discarding useful information
3901 from the current cache line when executing this branch and there is
3902 an alignment, against that of fetching unneeded insn in front of the
3903 branch target when there is no alignment. */
3905 /* There are two delay_slot cases to consider. One is the simple case
3906 where the preceding branch is to the insn beyond the barrier (simple
3907 delay slot filling), and the other is where the preceding branch has
3908 a delay slot that is a duplicate of the insn after the barrier
3909 (fill_eager_delay_slots) and the branch is to the insn after the insn
3910 after the barrier. */
3912 /* PREV is presumed to be the JUMP_INSN for the barrier under
3913 investigation. Skip to the insn before it. */
3914 prev = prev_real_insn (prev);
3916 for (slot = 2, credit = (1 << (CACHE_LOG - 2)) + 2;
3917 credit >= 0 && prev && GET_CODE (prev) == INSN;
3918 prev = prev_real_insn (prev))
3920 jump_to_next = 0;
3921 if (GET_CODE (PATTERN (prev)) == USE
3922 || GET_CODE (PATTERN (prev)) == CLOBBER)
3923 continue;
3924 if (GET_CODE (PATTERN (prev)) == SEQUENCE)
3926 prev = XVECEXP (PATTERN (prev), 0, 1);
3927 if (INSN_UID (prev) == INSN_UID (next))
3929 /* Delay slot was filled with insn at jump target. */
3930 jump_to_next = 1;
3931 continue;
3935 if (slot &&
3936 get_attr_in_delay_slot (prev) == IN_DELAY_SLOT_YES)
3937 slot = 0;
3938 credit -= get_attr_length (prev);
3940 if (prev
3941 && GET_CODE (prev) == JUMP_INSN
3942 && JUMP_LABEL (prev))
3944 rtx x;
3945 if (jump_to_next
3946 || next_real_insn (JUMP_LABEL (prev)) == next
3947 /* If relax_delay_slots() decides NEXT was redundant
3948 with some previous instruction, it will have
3949 redirected PREV's jump to the following insn. */
3950 || JUMP_LABEL (prev) == next_nonnote_insn (next)
3951 /* There is no upper bound on redundant instructions
3952 that might have been skipped, but we must not put an
3953 alignment where none had been before. */
3954 || (x = (NEXT_INSN (NEXT_INSN (PREV_INSN (prev)))),
3955 (INSN_P (x)
3956 && (INSN_CODE (x) == CODE_FOR_block_branch_redirect
3957 || INSN_CODE (x) == CODE_FOR_indirect_jump_scratch
3958 || INSN_CODE (x) == CODE_FOR_stuff_delay_slot))))
3960 rtx pat = PATTERN (prev);
3961 if (GET_CODE (pat) == PARALLEL)
3962 pat = XVECEXP (pat, 0, 0);
3963 if (credit - slot >= (GET_CODE (SET_SRC (pat)) == PC ? 2 : 0))
3964 return 0;
3969 return align_jumps_log;
3972 /* If we are inside a phony loop, almost any kind of label can turn up as the
3973 first one in the loop. Aligning a braf label causes incorrect switch
3974 destination addresses; we can detect braf labels because they are
3975 followed by a BARRIER.
3976 Applying loop alignment to small constant or switch tables is a waste
3977 of space, so we suppress this too. */
3979 sh_loop_align (rtx label)
3981 rtx next = label;
3984 next = next_nonnote_insn (next);
3985 while (next && GET_CODE (next) == CODE_LABEL);
3987 if (! next
3988 || ! INSN_P (next)
3989 || GET_CODE (PATTERN (next)) == ADDR_DIFF_VEC
3990 || recog_memoized (next) == CODE_FOR_consttable_2)
3991 return 0;
3993 return align_loops_log;
3996 /* Do a final pass over the function, just before delayed branch
3997 scheduling. */
3999 static void
4000 sh_reorg (void)
4002 rtx first, insn, mova = NULL_RTX;
4003 int num_mova;
4004 rtx r0_rtx = gen_rtx_REG (Pmode, 0);
4005 rtx r0_inc_rtx = gen_rtx_POST_INC (Pmode, r0_rtx);
4007 first = get_insns ();
4009 /* We must split call insns before introducing `mova's. If we're
4010 optimizing, they'll have already been split. Otherwise, make
4011 sure we don't split them too late. */
4012 if (! optimize)
4013 split_all_insns_noflow ();
4015 if (TARGET_SHMEDIA)
4016 return;
4018 /* If relaxing, generate pseudo-ops to associate function calls with
4019 the symbols they call. It does no harm to not generate these
4020 pseudo-ops. However, when we can generate them, it enables to
4021 linker to potentially relax the jsr to a bsr, and eliminate the
4022 register load and, possibly, the constant pool entry. */
4024 mdep_reorg_phase = SH_INSERT_USES_LABELS;
4025 if (TARGET_RELAX)
4027 /* Remove all REG_LABEL notes. We want to use them for our own
4028 purposes. This works because none of the remaining passes
4029 need to look at them.
4031 ??? But it may break in the future. We should use a machine
4032 dependent REG_NOTE, or some other approach entirely. */
4033 for (insn = first; insn; insn = NEXT_INSN (insn))
4035 if (INSN_P (insn))
4037 rtx note;
4039 while ((note = find_reg_note (insn, REG_LABEL, NULL_RTX)) != 0)
4040 remove_note (insn, note);
4044 for (insn = first; insn; insn = NEXT_INSN (insn))
4046 rtx pattern, reg, link, set, scan, dies, label;
4047 int rescan = 0, foundinsn = 0;
4049 if (GET_CODE (insn) == CALL_INSN)
4051 pattern = PATTERN (insn);
4053 if (GET_CODE (pattern) == PARALLEL)
4054 pattern = XVECEXP (pattern, 0, 0);
4055 if (GET_CODE (pattern) == SET)
4056 pattern = SET_SRC (pattern);
4058 if (GET_CODE (pattern) != CALL
4059 || GET_CODE (XEXP (pattern, 0)) != MEM)
4060 continue;
4062 reg = XEXP (XEXP (pattern, 0), 0);
4064 else
4066 reg = sfunc_uses_reg (insn);
4067 if (! reg)
4068 continue;
4071 if (GET_CODE (reg) != REG)
4072 continue;
4074 /* This is a function call via REG. If the only uses of REG
4075 between the time that it is set and the time that it dies
4076 are in function calls, then we can associate all the
4077 function calls with the setting of REG. */
4079 for (link = LOG_LINKS (insn); link; link = XEXP (link, 1))
4081 if (REG_NOTE_KIND (link) != 0)
4082 continue;
4083 set = single_set (XEXP (link, 0));
4084 if (set && rtx_equal_p (reg, SET_DEST (set)))
4086 link = XEXP (link, 0);
4087 break;
4091 if (! link)
4093 /* ??? Sometimes global register allocation will have
4094 deleted the insn pointed to by LOG_LINKS. Try
4095 scanning backward to find where the register is set. */
4096 for (scan = PREV_INSN (insn);
4097 scan && GET_CODE (scan) != CODE_LABEL;
4098 scan = PREV_INSN (scan))
4100 if (! INSN_P (scan))
4101 continue;
4103 if (! reg_mentioned_p (reg, scan))
4104 continue;
4106 if (noncall_uses_reg (reg, scan, &set))
4107 break;
4109 if (set)
4111 link = scan;
4112 break;
4117 if (! link)
4118 continue;
4120 /* The register is set at LINK. */
4122 /* We can only optimize the function call if the register is
4123 being set to a symbol. In theory, we could sometimes
4124 optimize calls to a constant location, but the assembler
4125 and linker do not support that at present. */
4126 if (GET_CODE (SET_SRC (set)) != SYMBOL_REF
4127 && GET_CODE (SET_SRC (set)) != LABEL_REF)
4128 continue;
4130 /* Scan forward from LINK to the place where REG dies, and
4131 make sure that the only insns which use REG are
4132 themselves function calls. */
4134 /* ??? This doesn't work for call targets that were allocated
4135 by reload, since there may not be a REG_DEAD note for the
4136 register. */
4138 dies = NULL_RTX;
4139 for (scan = NEXT_INSN (link); scan; scan = NEXT_INSN (scan))
4141 rtx scanset;
4143 /* Don't try to trace forward past a CODE_LABEL if we haven't
4144 seen INSN yet. Ordinarily, we will only find the setting insn
4145 in LOG_LINKS if it is in the same basic block. However,
4146 cross-jumping can insert code labels in between the load and
4147 the call, and can result in situations where a single call
4148 insn may have two targets depending on where we came from. */
4150 if (GET_CODE (scan) == CODE_LABEL && ! foundinsn)
4151 break;
4153 if (! INSN_P (scan))
4154 continue;
4156 /* Don't try to trace forward past a JUMP. To optimize
4157 safely, we would have to check that all the
4158 instructions at the jump destination did not use REG. */
4160 if (GET_CODE (scan) == JUMP_INSN)
4161 break;
4163 if (! reg_mentioned_p (reg, scan))
4164 continue;
4166 if (noncall_uses_reg (reg, scan, &scanset))
4167 break;
4169 if (scan == insn)
4170 foundinsn = 1;
4172 if (scan != insn
4173 && (GET_CODE (scan) == CALL_INSN || sfunc_uses_reg (scan)))
4175 /* There is a function call to this register other
4176 than the one we are checking. If we optimize
4177 this call, we need to rescan again below. */
4178 rescan = 1;
4181 /* ??? We shouldn't have to worry about SCANSET here.
4182 We should just be able to check for a REG_DEAD note
4183 on a function call. However, the REG_DEAD notes are
4184 apparently not dependable around libcalls; c-torture
4185 execute/920501-2 is a test case. If SCANSET is set,
4186 then this insn sets the register, so it must have
4187 died earlier. Unfortunately, this will only handle
4188 the cases in which the register is, in fact, set in a
4189 later insn. */
4191 /* ??? We shouldn't have to use FOUNDINSN here.
4192 However, the LOG_LINKS fields are apparently not
4193 entirely reliable around libcalls;
4194 newlib/libm/math/e_pow.c is a test case. Sometimes
4195 an insn will appear in LOG_LINKS even though it is
4196 not the most recent insn which sets the register. */
4198 if (foundinsn
4199 && (scanset
4200 || find_reg_note (scan, REG_DEAD, reg)))
4202 dies = scan;
4203 break;
4207 if (! dies)
4209 /* Either there was a branch, or some insn used REG
4210 other than as a function call address. */
4211 continue;
4214 /* Create a code label, and put it in a REG_LABEL note on
4215 the insn which sets the register, and on each call insn
4216 which uses the register. In final_prescan_insn we look
4217 for the REG_LABEL notes, and output the appropriate label
4218 or pseudo-op. */
4220 label = gen_label_rtx ();
4221 REG_NOTES (link) = gen_rtx_INSN_LIST (REG_LABEL, label,
4222 REG_NOTES (link));
4223 REG_NOTES (insn) = gen_rtx_INSN_LIST (REG_LABEL, label,
4224 REG_NOTES (insn));
4225 if (rescan)
4227 scan = link;
4230 rtx reg2;
4232 scan = NEXT_INSN (scan);
4233 if (scan != insn
4234 && ((GET_CODE (scan) == CALL_INSN
4235 && reg_mentioned_p (reg, scan))
4236 || ((reg2 = sfunc_uses_reg (scan))
4237 && REGNO (reg2) == REGNO (reg))))
4238 REG_NOTES (scan)
4239 = gen_rtx_INSN_LIST (REG_LABEL, label, REG_NOTES (scan));
4241 while (scan != dies);
4246 if (TARGET_SH2)
4247 fixup_addr_diff_vecs (first);
4249 if (optimize)
4251 mdep_reorg_phase = SH_SHORTEN_BRANCHES0;
4252 shorten_branches (first);
4254 /* Scan the function looking for move instructions which have to be
4255 changed to pc-relative loads and insert the literal tables. */
4257 mdep_reorg_phase = SH_FIXUP_PCLOAD;
4258 for (insn = first, num_mova = 0; insn; insn = NEXT_INSN (insn))
4260 if (mova_p (insn))
4262 /* ??? basic block reordering can move a switch table dispatch
4263 below the switch table. Check if that has happened.
4264 We only have the addresses available when optimizing; but then,
4265 this check shouldn't be needed when not optimizing. */
4266 rtx label_ref = XVECEXP (SET_SRC (PATTERN (insn)), 0, 0);
4267 if (optimize
4268 && (INSN_ADDRESSES (INSN_UID (insn))
4269 > INSN_ADDRESSES (INSN_UID (XEXP (label_ref, 0)))))
4271 /* Change the mova into a load.
4272 broken_move will then return true for it. */
4273 fixup_mova (insn);
4275 else if (! num_mova++)
4276 mova = insn;
4278 else if (GET_CODE (insn) == JUMP_INSN
4279 && GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC
4280 && num_mova)
4282 rtx scan;
4283 int total;
4285 num_mova--;
4287 /* Some code might have been inserted between the mova and
4288 its ADDR_DIFF_VEC. Check if the mova is still in range. */
4289 for (scan = mova, total = 0; scan != insn; scan = NEXT_INSN (scan))
4290 total += get_attr_length (scan);
4292 /* range of mova is 1020, add 4 because pc counts from address of
4293 second instruction after this one, subtract 2 in case pc is 2
4294 byte aligned. Possible alignment needed for the ADDR_DIFF_VEC
4295 cancels out with alignment effects of the mova itself. */
4296 if (total > 1022)
4298 /* Change the mova into a load, and restart scanning
4299 there. broken_move will then return true for mova. */
4300 fixup_mova (mova);
4301 insn = mova;
4304 if (broken_move (insn)
4305 || (GET_CODE (insn) == INSN
4306 && recog_memoized (insn) == CODE_FOR_casesi_worker_2))
4308 rtx scan;
4309 /* Scan ahead looking for a barrier to stick the constant table
4310 behind. */
4311 rtx barrier = find_barrier (num_mova, mova, insn);
4312 rtx last_float_move = NULL_RTX, last_float = 0, *last_float_addr = NULL;
4313 int need_aligned_label = 0;
4315 if (num_mova && ! mova_p (mova))
4317 /* find_barrier had to change the first mova into a
4318 pcload; thus, we have to start with this new pcload. */
4319 insn = mova;
4320 num_mova = 0;
4322 /* Now find all the moves between the points and modify them. */
4323 for (scan = insn; scan != barrier; scan = NEXT_INSN (scan))
4325 if (GET_CODE (scan) == CODE_LABEL)
4326 last_float = 0;
4327 if (GET_CODE (scan) == INSN
4328 && recog_memoized (scan) == CODE_FOR_casesi_worker_2)
4329 need_aligned_label = 1;
4330 if (broken_move (scan))
4332 rtx *patp = &PATTERN (scan), pat = *patp;
4333 rtx src, dst;
4334 rtx lab;
4335 rtx newsrc;
4336 enum machine_mode mode;
4338 if (GET_CODE (pat) == PARALLEL)
4339 patp = &XVECEXP (pat, 0, 0), pat = *patp;
4340 src = SET_SRC (pat);
4341 dst = SET_DEST (pat);
4342 mode = GET_MODE (dst);
4344 if (mode == SImode && hi_const (src)
4345 && REGNO (dst) != FPUL_REG)
4347 int offset = 0;
4349 mode = HImode;
4350 while (GET_CODE (dst) == SUBREG)
4352 offset += subreg_regno_offset (REGNO (SUBREG_REG (dst)),
4353 GET_MODE (SUBREG_REG (dst)),
4354 SUBREG_BYTE (dst),
4355 GET_MODE (dst));
4356 dst = SUBREG_REG (dst);
4358 dst = gen_rtx_REG (HImode, REGNO (dst) + offset);
4360 if (GET_CODE (dst) == REG && FP_ANY_REGISTER_P (REGNO (dst)))
4362 /* This must be an insn that clobbers r0. */
4363 rtx *clobberp = &XVECEXP (PATTERN (scan), 0,
4364 XVECLEN (PATTERN (scan), 0)
4365 - 1);
4366 rtx clobber = *clobberp;
4368 if (GET_CODE (clobber) != CLOBBER
4369 || ! rtx_equal_p (XEXP (clobber, 0), r0_rtx))
4370 abort ();
4372 if (last_float
4373 && reg_set_between_p (r0_rtx, last_float_move, scan))
4374 last_float = 0;
4375 if (last_float
4376 && TARGET_SHCOMPACT
4377 && GET_MODE_SIZE (mode) != 4
4378 && GET_MODE_SIZE (GET_MODE (last_float)) == 4)
4379 last_float = 0;
4380 lab = add_constant (src, mode, last_float);
4381 if (lab)
4382 emit_insn_before (gen_mova (lab), scan);
4383 else
4385 /* There will be a REG_UNUSED note for r0 on
4386 LAST_FLOAT_MOVE; we have to change it to REG_INC,
4387 lest reorg:mark_target_live_regs will not
4388 consider r0 to be used, and we end up with delay
4389 slot insn in front of SCAN that clobbers r0. */
4390 rtx note
4391 = find_regno_note (last_float_move, REG_UNUSED, 0);
4393 /* If we are not optimizing, then there may not be
4394 a note. */
4395 if (note)
4396 PUT_MODE (note, REG_INC);
4398 *last_float_addr = r0_inc_rtx;
4400 last_float_move = scan;
4401 last_float = src;
4402 newsrc = gen_rtx_MEM (mode,
4403 (((TARGET_SH4 && ! TARGET_FMOVD)
4404 || REGNO (dst) == FPUL_REG)
4405 ? r0_inc_rtx
4406 : r0_rtx));
4407 last_float_addr = &XEXP (newsrc, 0);
4409 /* Remove the clobber of r0. */
4410 *clobberp = gen_rtx_CLOBBER (GET_MODE (clobber),
4411 gen_rtx_SCRATCH (Pmode));
4413 /* This is a mova needing a label. Create it. */
4414 else if (GET_CODE (src) == UNSPEC
4415 && XINT (src, 1) == UNSPEC_MOVA
4416 && GET_CODE (XVECEXP (src, 0, 0)) == CONST)
4418 lab = add_constant (XVECEXP (src, 0, 0), mode, 0);
4419 newsrc = gen_rtx_LABEL_REF (VOIDmode, lab);
4420 newsrc = gen_rtx_UNSPEC (SImode,
4421 gen_rtvec (1, newsrc),
4422 UNSPEC_MOVA);
4424 else
4426 lab = add_constant (src, mode, 0);
4427 newsrc = gen_rtx_LABEL_REF (VOIDmode, lab);
4428 newsrc = gen_const_mem (mode, newsrc);
4430 *patp = gen_rtx_SET (VOIDmode, dst, newsrc);
4431 INSN_CODE (scan) = -1;
4434 dump_table (need_aligned_label ? insn : 0, barrier);
4435 insn = barrier;
4439 mdep_reorg_phase = SH_SHORTEN_BRANCHES1;
4440 INSN_ADDRESSES_FREE ();
4441 split_branches (first);
4443 /* The INSN_REFERENCES_ARE_DELAYED in sh.h is problematic because it
4444 also has an effect on the register that holds the address of the sfunc.
4445 Insert an extra dummy insn in front of each sfunc that pretends to
4446 use this register. */
4447 if (flag_delayed_branch)
4449 for (insn = first; insn; insn = NEXT_INSN (insn))
4451 rtx reg = sfunc_uses_reg (insn);
4453 if (! reg)
4454 continue;
4455 emit_insn_before (gen_use_sfunc_addr (reg), insn);
4458 #if 0
4459 /* fpscr is not actually a user variable, but we pretend it is for the
4460 sake of the previous optimization passes, since we want it handled like
4461 one. However, we don't have any debugging information for it, so turn
4462 it into a non-user variable now. */
4463 if (TARGET_SH4)
4464 REG_USERVAR_P (get_fpscr_rtx ()) = 0;
4465 #endif
4466 mdep_reorg_phase = SH_AFTER_MDEP_REORG;
4470 get_dest_uid (rtx label, int max_uid)
4472 rtx dest = next_real_insn (label);
4473 int dest_uid;
4474 if (! dest)
4475 /* This can happen for an undefined label. */
4476 return 0;
4477 dest_uid = INSN_UID (dest);
4478 /* If this is a newly created branch redirection blocking instruction,
4479 we cannot index the branch_uid or insn_addresses arrays with its
4480 uid. But then, we won't need to, because the actual destination is
4481 the following branch. */
4482 while (dest_uid >= max_uid)
4484 dest = NEXT_INSN (dest);
4485 dest_uid = INSN_UID (dest);
4487 if (GET_CODE (dest) == JUMP_INSN && GET_CODE (PATTERN (dest)) == RETURN)
4488 return 0;
4489 return dest_uid;
4492 /* Split condbranches that are out of range. Also add clobbers for
4493 scratch registers that are needed in far jumps.
4494 We do this before delay slot scheduling, so that it can take our
4495 newly created instructions into account. It also allows us to
4496 find branches with common targets more easily. */
4498 static void
4499 split_branches (rtx first)
4501 rtx insn;
4502 struct far_branch **uid_branch, *far_branch_list = 0;
4503 int max_uid = get_max_uid ();
4505 /* Find out which branches are out of range. */
4506 shorten_branches (first);
4508 uid_branch = (struct far_branch **) alloca (max_uid * sizeof *uid_branch);
4509 memset ((char *) uid_branch, 0, max_uid * sizeof *uid_branch);
4511 for (insn = first; insn; insn = NEXT_INSN (insn))
4512 if (! INSN_P (insn))
4513 continue;
4514 else if (INSN_DELETED_P (insn))
4516 /* Shorten_branches would split this instruction again,
4517 so transform it into a note. */
4518 PUT_CODE (insn, NOTE);
4519 NOTE_LINE_NUMBER (insn) = NOTE_INSN_DELETED;
4520 NOTE_SOURCE_FILE (insn) = 0;
4522 else if (GET_CODE (insn) == JUMP_INSN
4523 /* Don't mess with ADDR_DIFF_VEC */
4524 && (GET_CODE (PATTERN (insn)) == SET
4525 || GET_CODE (PATTERN (insn)) == RETURN))
4527 enum attr_type type = get_attr_type (insn);
4528 if (type == TYPE_CBRANCH)
4530 rtx next, beyond;
4532 if (get_attr_length (insn) > 4)
4534 rtx src = SET_SRC (PATTERN (insn));
4535 rtx olabel = XEXP (XEXP (src, 1), 0);
4536 int addr = INSN_ADDRESSES (INSN_UID (insn));
4537 rtx label = 0;
4538 int dest_uid = get_dest_uid (olabel, max_uid);
4539 struct far_branch *bp = uid_branch[dest_uid];
4541 /* redirect_jump needs a valid JUMP_LABEL, and it might delete
4542 the label if the LABEL_NUSES count drops to zero. There is
4543 always a jump_optimize pass that sets these values, but it
4544 proceeds to delete unreferenced code, and then if not
4545 optimizing, to un-delete the deleted instructions, thus
4546 leaving labels with too low uses counts. */
4547 if (! optimize)
4549 JUMP_LABEL (insn) = olabel;
4550 LABEL_NUSES (olabel)++;
4552 if (! bp)
4554 bp = (struct far_branch *) alloca (sizeof *bp);
4555 uid_branch[dest_uid] = bp;
4556 bp->prev = far_branch_list;
4557 far_branch_list = bp;
4558 bp->far_label
4559 = XEXP (XEXP (SET_SRC (PATTERN (insn)), 1), 0);
4560 LABEL_NUSES (bp->far_label)++;
4562 else
4564 label = bp->near_label;
4565 if (! label && bp->address - addr >= CONDJUMP_MIN)
4567 rtx block = bp->insert_place;
4569 if (GET_CODE (PATTERN (block)) == RETURN)
4570 block = PREV_INSN (block);
4571 else
4572 block = gen_block_redirect (block,
4573 bp->address, 2);
4574 label = emit_label_after (gen_label_rtx (),
4575 PREV_INSN (block));
4576 bp->near_label = label;
4578 else if (label && ! NEXT_INSN (label))
4580 if (addr + 2 - bp->address <= CONDJUMP_MAX)
4581 bp->insert_place = insn;
4582 else
4583 gen_far_branch (bp);
4586 if (! label
4587 || (NEXT_INSN (label) && bp->address - addr < CONDJUMP_MIN))
4589 bp->near_label = label = gen_label_rtx ();
4590 bp->insert_place = insn;
4591 bp->address = addr;
4593 if (! redirect_jump (insn, label, 1))
4594 abort ();
4596 else
4598 /* get_attr_length (insn) == 2 */
4599 /* Check if we have a pattern where reorg wants to redirect
4600 the branch to a label from an unconditional branch that
4601 is too far away. */
4602 /* We can't use JUMP_LABEL here because it might be undefined
4603 when not optimizing. */
4604 /* A syntax error might cause beyond to be NULL_RTX. */
4605 beyond
4606 = next_active_insn (XEXP (XEXP (SET_SRC (PATTERN (insn)), 1),
4607 0));
4609 if (beyond
4610 && (GET_CODE (beyond) == JUMP_INSN
4611 || ((beyond = next_active_insn (beyond))
4612 && GET_CODE (beyond) == JUMP_INSN))
4613 && GET_CODE (PATTERN (beyond)) == SET
4614 && recog_memoized (beyond) == CODE_FOR_jump_compact
4615 && ((INSN_ADDRESSES
4616 (INSN_UID (XEXP (SET_SRC (PATTERN (beyond)), 0)))
4617 - INSN_ADDRESSES (INSN_UID (insn)) + (unsigned) 252)
4618 > 252 + 258 + 2))
4619 gen_block_redirect (beyond,
4620 INSN_ADDRESSES (INSN_UID (beyond)), 1);
4623 next = next_active_insn (insn);
4625 if ((GET_CODE (next) == JUMP_INSN
4626 || ((next = next_active_insn (next))
4627 && GET_CODE (next) == JUMP_INSN))
4628 && GET_CODE (PATTERN (next)) == SET
4629 && recog_memoized (next) == CODE_FOR_jump_compact
4630 && ((INSN_ADDRESSES
4631 (INSN_UID (XEXP (SET_SRC (PATTERN (next)), 0)))
4632 - INSN_ADDRESSES (INSN_UID (insn)) + (unsigned) 252)
4633 > 252 + 258 + 2))
4634 gen_block_redirect (next, INSN_ADDRESSES (INSN_UID (next)), 1);
4636 else if (type == TYPE_JUMP || type == TYPE_RETURN)
4638 int addr = INSN_ADDRESSES (INSN_UID (insn));
4639 rtx far_label = 0;
4640 int dest_uid = 0;
4641 struct far_branch *bp;
4643 if (type == TYPE_JUMP)
4645 far_label = XEXP (SET_SRC (PATTERN (insn)), 0);
4646 dest_uid = get_dest_uid (far_label, max_uid);
4647 if (! dest_uid)
4649 /* Parse errors can lead to labels outside
4650 the insn stream. */
4651 if (! NEXT_INSN (far_label))
4652 continue;
4654 if (! optimize)
4656 JUMP_LABEL (insn) = far_label;
4657 LABEL_NUSES (far_label)++;
4659 redirect_jump (insn, NULL_RTX, 1);
4660 far_label = 0;
4663 bp = uid_branch[dest_uid];
4664 if (! bp)
4666 bp = (struct far_branch *) alloca (sizeof *bp);
4667 uid_branch[dest_uid] = bp;
4668 bp->prev = far_branch_list;
4669 far_branch_list = bp;
4670 bp->near_label = 0;
4671 bp->far_label = far_label;
4672 if (far_label)
4673 LABEL_NUSES (far_label)++;
4675 else if (bp->near_label && ! NEXT_INSN (bp->near_label))
4676 if (addr - bp->address <= CONDJUMP_MAX)
4677 emit_label_after (bp->near_label, PREV_INSN (insn));
4678 else
4680 gen_far_branch (bp);
4681 bp->near_label = 0;
4683 else
4684 bp->near_label = 0;
4685 bp->address = addr;
4686 bp->insert_place = insn;
4687 if (! far_label)
4688 emit_insn_before (gen_block_branch_redirect (const0_rtx), insn);
4689 else
4690 gen_block_redirect (insn, addr, bp->near_label ? 2 : 0);
4693 /* Generate all pending far branches,
4694 and free our references to the far labels. */
4695 while (far_branch_list)
4697 if (far_branch_list->near_label
4698 && ! NEXT_INSN (far_branch_list->near_label))
4699 gen_far_branch (far_branch_list);
4700 if (optimize
4701 && far_branch_list->far_label
4702 && ! --LABEL_NUSES (far_branch_list->far_label))
4703 delete_insn (far_branch_list->far_label);
4704 far_branch_list = far_branch_list->prev;
4707 /* Instruction length information is no longer valid due to the new
4708 instructions that have been generated. */
4709 init_insn_lengths ();
4712 /* Dump out instruction addresses, which is useful for debugging the
4713 constant pool table stuff.
4715 If relaxing, output the label and pseudo-ops used to link together
4716 calls and the instruction which set the registers. */
4718 /* ??? The addresses printed by this routine for insns are nonsense for
4719 insns which are inside of a sequence where none of the inner insns have
4720 variable length. This is because the second pass of shorten_branches
4721 does not bother to update them. */
4723 void
4724 final_prescan_insn (rtx insn, rtx *opvec ATTRIBUTE_UNUSED,
4725 int noperands ATTRIBUTE_UNUSED)
4727 if (TARGET_DUMPISIZE)
4728 fprintf (asm_out_file, "\n! at %04x\n", INSN_ADDRESSES (INSN_UID (insn)));
4730 if (TARGET_RELAX)
4732 rtx note;
4734 note = find_reg_note (insn, REG_LABEL, NULL_RTX);
4735 if (note)
4737 rtx pattern;
4739 pattern = PATTERN (insn);
4740 if (GET_CODE (pattern) == PARALLEL)
4741 pattern = XVECEXP (pattern, 0, 0);
4742 if (GET_CODE (pattern) == CALL
4743 || (GET_CODE (pattern) == SET
4744 && (GET_CODE (SET_SRC (pattern)) == CALL
4745 || get_attr_type (insn) == TYPE_SFUNC)))
4746 asm_fprintf (asm_out_file, "\t.uses %LL%d\n",
4747 CODE_LABEL_NUMBER (XEXP (note, 0)));
4748 else if (GET_CODE (pattern) == SET)
4749 (*targetm.asm_out.internal_label) (asm_out_file, "L",
4750 CODE_LABEL_NUMBER (XEXP (note, 0)));
4751 else
4752 abort ();
4757 /* Dump out any constants accumulated in the final pass. These will
4758 only be labels. */
4760 const char *
4761 output_jump_label_table (void)
4763 int i;
4765 if (pool_size)
4767 fprintf (asm_out_file, "\t.align 2\n");
4768 for (i = 0; i < pool_size; i++)
4770 pool_node *p = &pool_vector[i];
4772 (*targetm.asm_out.internal_label) (asm_out_file, "L",
4773 CODE_LABEL_NUMBER (p->label));
4774 output_asm_insn (".long %O0", &p->value);
4776 pool_size = 0;
4779 return "";
4782 /* A full frame looks like:
4784 arg-5
4785 arg-4
4786 [ if current_function_anonymous_args
4787 arg-3
4788 arg-2
4789 arg-1
4790 arg-0 ]
4791 saved-fp
4792 saved-r10
4793 saved-r11
4794 saved-r12
4795 saved-pr
4796 local-n
4798 local-1
4799 local-0 <- fp points here. */
4801 /* Number of bytes pushed for anonymous args, used to pass information
4802 between expand_prologue and expand_epilogue. */
4804 /* Adjust the stack by SIZE bytes. REG holds the rtl of the register to be
4805 adjusted. If epilogue_p is zero, this is for a prologue; otherwise, it's
4806 for an epilogue and a negative value means that it's for a sibcall
4807 epilogue. If LIVE_REGS_MASK is nonzero, it points to a HARD_REG_SET of
4808 all the registers that are about to be restored, and hence dead. */
4810 static void
4811 output_stack_adjust (int size, rtx reg, int epilogue_p,
4812 HARD_REG_SET *live_regs_mask)
4814 rtx (*emit_fn) (rtx) = epilogue_p ? &emit_insn : &frame_insn;
4815 if (size)
4817 HOST_WIDE_INT align = STACK_BOUNDARY / BITS_PER_UNIT;
4819 /* This test is bogus, as output_stack_adjust is used to re-align the
4820 stack. */
4821 #if 0
4822 if (size % align)
4823 abort ();
4824 #endif
4826 if (CONST_OK_FOR_ADD (size))
4827 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size)));
4828 /* Try to do it with two partial adjustments; however, we must make
4829 sure that the stack is properly aligned at all times, in case
4830 an interrupt occurs between the two partial adjustments. */
4831 else if (CONST_OK_FOR_ADD (size / 2 & -align)
4832 && CONST_OK_FOR_ADD (size - (size / 2 & -align)))
4834 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size / 2 & -align)));
4835 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size - (size / 2 & -align))));
4837 else
4839 rtx const_reg;
4840 rtx insn;
4841 int temp = epilogue_p ? 7 : (TARGET_SH5 ? 0 : 1);
4842 int i;
4844 /* If TEMP is invalid, we could temporarily save a general
4845 register to MACL. However, there is currently no need
4846 to handle this case, so just abort when we see it. */
4847 if (epilogue_p < 0
4848 || current_function_interrupt
4849 || ! call_really_used_regs[temp] || fixed_regs[temp])
4850 temp = -1;
4851 if (temp < 0 && ! current_function_interrupt
4852 && (TARGET_SHMEDIA || epilogue_p >= 0))
4854 HARD_REG_SET temps;
4855 COPY_HARD_REG_SET (temps, call_used_reg_set);
4856 AND_COMPL_HARD_REG_SET (temps, call_fixed_reg_set);
4857 if (epilogue_p > 0)
4859 int nreg = 0;
4860 if (current_function_return_rtx)
4862 enum machine_mode mode;
4863 mode = GET_MODE (current_function_return_rtx);
4864 if (BASE_RETURN_VALUE_REG (mode) == FIRST_RET_REG)
4865 nreg = HARD_REGNO_NREGS (FIRST_RET_REG, mode);
4867 for (i = 0; i < nreg; i++)
4868 CLEAR_HARD_REG_BIT (temps, FIRST_RET_REG + i);
4869 if (current_function_calls_eh_return)
4871 CLEAR_HARD_REG_BIT (temps, EH_RETURN_STACKADJ_REGNO);
4872 for (i = 0; i <= 3; i++)
4873 CLEAR_HARD_REG_BIT (temps, EH_RETURN_DATA_REGNO (i));
4876 if (TARGET_SHMEDIA && epilogue_p < 0)
4877 for (i = FIRST_TARGET_REG; i <= LAST_TARGET_REG; i++)
4878 CLEAR_HARD_REG_BIT (temps, i);
4879 if (epilogue_p <= 0)
4881 for (i = FIRST_PARM_REG;
4882 i < FIRST_PARM_REG + NPARM_REGS (SImode); i++)
4883 CLEAR_HARD_REG_BIT (temps, i);
4884 if (cfun->static_chain_decl != NULL)
4885 CLEAR_HARD_REG_BIT (temps, STATIC_CHAIN_REGNUM);
4887 temp = scavenge_reg (&temps);
4889 if (temp < 0 && live_regs_mask)
4890 temp = scavenge_reg (live_regs_mask);
4891 if (temp < 0)
4893 /* If we reached here, the most likely case is the (sibcall)
4894 epilogue for non SHmedia. Put a special push/pop sequence
4895 for such case as the last resort. This looks lengthy but
4896 would not be problem because it seems to be very rare. */
4897 if (! TARGET_SHMEDIA && epilogue_p)
4899 rtx adj_reg, tmp_reg, mem;
4901 /* ??? There is still the slight possibility that r4 or r5
4902 have been reserved as fixed registers or assigned as
4903 global registers, and they change during an interrupt.
4904 There are possible ways to handle this:
4905 - If we are adjusting the frame pointer (r14), we can do
4906 with a single temp register and an ordinary push / pop
4907 on the stack.
4908 - Grab any call-used or call-saved registers (i.e. not
4909 fixed or globals) for the temps we need. We might
4910 also grab r14 if we are adjusting the stack pointer.
4911 If we can't find enough available registers, issue
4912 a diagnostic and abort - the user must have reserved
4913 way too many registers.
4914 But since all this is rather unlikely to happen and
4915 would require extra testing, we just abort if r4 / r5
4916 are not available. */
4917 if (fixed_regs[4] || fixed_regs[5]
4918 || global_regs[4] || global_regs[5])
4919 abort ();
4921 adj_reg = gen_rtx_REG (GET_MODE (reg), 4);
4922 tmp_reg = gen_rtx_REG (GET_MODE (reg), 5);
4923 emit_move_insn (gen_rtx_MEM (Pmode, reg), adj_reg);
4924 emit_insn (GEN_MOV (adj_reg, GEN_INT (size)));
4925 emit_insn (GEN_ADD3 (adj_reg, adj_reg, reg));
4926 mem = gen_rtx_MEM (Pmode, gen_rtx_PRE_DEC (Pmode, adj_reg));
4927 emit_move_insn (mem, tmp_reg);
4928 emit_move_insn (tmp_reg, gen_rtx_MEM (Pmode, reg));
4929 mem = gen_rtx_MEM (Pmode, gen_rtx_PRE_DEC (Pmode, adj_reg));
4930 emit_move_insn (mem, tmp_reg);
4931 emit_move_insn (reg, adj_reg);
4932 mem = gen_rtx_MEM (Pmode, gen_rtx_POST_INC (Pmode, reg));
4933 emit_move_insn (adj_reg, mem);
4934 mem = gen_rtx_MEM (Pmode, gen_rtx_POST_INC (Pmode, reg));
4935 emit_move_insn (tmp_reg, mem);
4936 return;
4938 else
4939 abort ();
4941 const_reg = gen_rtx_REG (GET_MODE (reg), temp);
4943 /* If SIZE is negative, subtract the positive value.
4944 This sometimes allows a constant pool entry to be shared
4945 between prologue and epilogue code. */
4946 if (size < 0)
4948 emit_insn (GEN_MOV (const_reg, GEN_INT (-size)));
4949 insn = emit_fn (GEN_SUB3 (reg, reg, const_reg));
4951 else
4953 emit_insn (GEN_MOV (const_reg, GEN_INT (size)));
4954 insn = emit_fn (GEN_ADD3 (reg, reg, const_reg));
4956 if (! epilogue_p)
4957 REG_NOTES (insn)
4958 = (gen_rtx_EXPR_LIST
4959 (REG_FRAME_RELATED_EXPR,
4960 gen_rtx_SET (VOIDmode, reg,
4961 gen_rtx_PLUS (SImode, reg, GEN_INT (size))),
4962 REG_NOTES (insn)));
4967 static rtx
4968 frame_insn (rtx x)
4970 x = emit_insn (x);
4971 RTX_FRAME_RELATED_P (x) = 1;
4972 return x;
4975 /* Output RTL to push register RN onto the stack. */
4977 static rtx
4978 push (int rn)
4980 rtx x;
4981 if (rn == FPUL_REG)
4982 x = gen_push_fpul ();
4983 else if (rn == FPSCR_REG)
4984 x = gen_push_fpscr ();
4985 else if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && ! TARGET_FPU_SINGLE
4986 && FP_OR_XD_REGISTER_P (rn))
4988 if (FP_REGISTER_P (rn) && (rn - FIRST_FP_REG) & 1)
4989 return NULL_RTX;
4990 x = gen_push_4 (gen_rtx_REG (DFmode, rn));
4992 else if (TARGET_SH2E && FP_REGISTER_P (rn))
4993 x = gen_push_e (gen_rtx_REG (SFmode, rn));
4994 else
4995 x = gen_push (gen_rtx_REG (SImode, rn));
4997 x = frame_insn (x);
4998 REG_NOTES (x)
4999 = gen_rtx_EXPR_LIST (REG_INC,
5000 gen_rtx_REG (SImode, STACK_POINTER_REGNUM), 0);
5001 return x;
5004 /* Output RTL to pop register RN from the stack. */
5006 static void
5007 pop (int rn)
5009 rtx x;
5010 if (rn == FPUL_REG)
5011 x = gen_pop_fpul ();
5012 else if (rn == FPSCR_REG)
5013 x = gen_pop_fpscr ();
5014 else if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && ! TARGET_FPU_SINGLE
5015 && FP_OR_XD_REGISTER_P (rn))
5017 if (FP_REGISTER_P (rn) && (rn - FIRST_FP_REG) & 1)
5018 return;
5019 x = gen_pop_4 (gen_rtx_REG (DFmode, rn));
5021 else if (TARGET_SH2E && FP_REGISTER_P (rn))
5022 x = gen_pop_e (gen_rtx_REG (SFmode, rn));
5023 else
5024 x = gen_pop (gen_rtx_REG (SImode, rn));
5026 x = emit_insn (x);
5027 REG_NOTES (x)
5028 = gen_rtx_EXPR_LIST (REG_INC,
5029 gen_rtx_REG (SImode, STACK_POINTER_REGNUM), 0);
5032 /* Generate code to push the regs specified in the mask. */
5034 static void
5035 push_regs (HARD_REG_SET *mask, int interrupt_handler)
5037 int i;
5038 int skip_fpscr = 0;
5040 /* Push PR last; this gives better latencies after the prologue, and
5041 candidates for the return delay slot when there are no general
5042 registers pushed. */
5043 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
5045 /* If this is an interrupt handler, and the SZ bit varies,
5046 and we have to push any floating point register, we need
5047 to switch to the correct precision first. */
5048 if (i == FIRST_FP_REG && interrupt_handler && TARGET_FMOVD
5049 && hard_regs_intersect_p (mask, &reg_class_contents[DF_REGS]))
5051 HARD_REG_SET unsaved;
5053 push (FPSCR_REG);
5054 COMPL_HARD_REG_SET (unsaved, *mask);
5055 fpscr_set_from_mem (NORMAL_MODE (FP_MODE), unsaved);
5056 skip_fpscr = 1;
5058 if (i != PR_REG
5059 && (i != FPSCR_REG || ! skip_fpscr)
5060 && TEST_HARD_REG_BIT (*mask, i))
5061 push (i);
5063 if (TEST_HARD_REG_BIT (*mask, PR_REG))
5064 push (PR_REG);
5067 /* Calculate how much extra space is needed to save all callee-saved
5068 target registers.
5069 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
5071 static int
5072 shmedia_target_regs_stack_space (HARD_REG_SET *live_regs_mask)
5074 int reg;
5075 int stack_space = 0;
5076 int interrupt_handler = sh_cfun_interrupt_handler_p ();
5078 for (reg = LAST_TARGET_REG; reg >= FIRST_TARGET_REG; reg--)
5079 if ((! call_really_used_regs[reg] || interrupt_handler)
5080 && ! TEST_HARD_REG_BIT (*live_regs_mask, reg))
5081 /* Leave space to save this target register on the stack,
5082 in case target register allocation wants to use it. */
5083 stack_space += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
5084 return stack_space;
5087 /* Decide whether we should reserve space for callee-save target registers,
5088 in case target register allocation wants to use them. REGS_SAVED is
5089 the space, in bytes, that is already required for register saves.
5090 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
5092 static int
5093 shmedia_reserve_space_for_target_registers_p (int regs_saved,
5094 HARD_REG_SET *live_regs_mask)
5096 if (optimize_size)
5097 return 0;
5098 return shmedia_target_regs_stack_space (live_regs_mask) <= regs_saved;
5101 /* Decide how much space to reserve for callee-save target registers
5102 in case target register allocation wants to use them.
5103 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
5105 static int
5106 shmedia_target_regs_stack_adjust (HARD_REG_SET *live_regs_mask)
5108 if (shmedia_space_reserved_for_target_registers)
5109 return shmedia_target_regs_stack_space (live_regs_mask);
5110 else
5111 return 0;
5114 /* Work out the registers which need to be saved, both as a mask and a
5115 count of saved words. Return the count.
5117 If doing a pragma interrupt function, then push all regs used by the
5118 function, and if we call another function (we can tell by looking at PR),
5119 make sure that all the regs it clobbers are safe too. */
5121 static int
5122 calc_live_regs (HARD_REG_SET *live_regs_mask)
5124 unsigned int reg;
5125 int count;
5126 int interrupt_handler;
5127 int pr_live, has_call;
5129 interrupt_handler = sh_cfun_interrupt_handler_p ();
5131 CLEAR_HARD_REG_SET (*live_regs_mask);
5132 if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && interrupt_handler
5133 && regs_ever_live[FPSCR_REG])
5134 target_flags &= ~FPU_SINGLE_BIT;
5135 /* If we can save a lot of saves by switching to double mode, do that. */
5136 else if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && TARGET_FPU_SINGLE)
5137 for (count = 0, reg = FIRST_FP_REG; reg <= LAST_FP_REG; reg += 2)
5138 if (regs_ever_live[reg] && regs_ever_live[reg+1]
5139 && (! call_really_used_regs[reg]
5140 || (interrupt_handler && ! pragma_trapa))
5141 && ++count > 2)
5143 target_flags &= ~FPU_SINGLE_BIT;
5144 break;
5146 /* PR_MEDIA_REG is a general purpose register, thus global_alloc already
5147 knows how to use it. That means the pseudo originally allocated for
5148 the initial value can become the PR_MEDIA_REG hard register, as seen for
5149 execute/20010122-1.c:test9. */
5150 if (TARGET_SHMEDIA)
5151 /* ??? this function is called from initial_elimination_offset, hence we
5152 can't use the result of sh_media_register_for_return here. */
5153 pr_live = sh_pr_n_sets ();
5154 else
5156 rtx pr_initial = has_hard_reg_initial_val (Pmode, PR_REG);
5157 pr_live = (pr_initial
5158 ? (GET_CODE (pr_initial) != REG
5159 || REGNO (pr_initial) != (PR_REG))
5160 : regs_ever_live[PR_REG]);
5161 /* For Shcompact, if not optimizing, we end up with a memory reference
5162 using the return address pointer for __builtin_return_address even
5163 though there is no actual need to put the PR register on the stack. */
5164 pr_live |= regs_ever_live[RETURN_ADDRESS_POINTER_REGNUM];
5166 /* Force PR to be live if the prologue has to call the SHmedia
5167 argument decoder or register saver. */
5168 if (TARGET_SHCOMPACT
5169 && ((current_function_args_info.call_cookie
5170 & ~ CALL_COOKIE_RET_TRAMP (1))
5171 || current_function_has_nonlocal_label))
5172 pr_live = 1;
5173 has_call = TARGET_SHMEDIA ? ! leaf_function_p () : pr_live;
5174 for (count = 0, reg = FIRST_PSEUDO_REGISTER; reg-- != 0; )
5176 if (reg == (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG)
5177 ? pr_live
5178 : (interrupt_handler && ! pragma_trapa)
5179 ? (/* Need to save all the regs ever live. */
5180 (regs_ever_live[reg]
5181 || (call_really_used_regs[reg]
5182 && (! fixed_regs[reg] || reg == MACH_REG || reg == MACL_REG
5183 || reg == PIC_OFFSET_TABLE_REGNUM)
5184 && has_call)
5185 || (has_call && REGISTER_NATURAL_MODE (reg) == SImode
5186 && (GENERAL_REGISTER_P (reg) || TARGET_REGISTER_P (reg))))
5187 && reg != STACK_POINTER_REGNUM && reg != ARG_POINTER_REGNUM
5188 && reg != RETURN_ADDRESS_POINTER_REGNUM
5189 && reg != T_REG && reg != GBR_REG
5190 /* Push fpscr only on targets which have FPU */
5191 && (reg != FPSCR_REG || TARGET_FPU_ANY))
5192 : (/* Only push those regs which are used and need to be saved. */
5193 (TARGET_SHCOMPACT
5194 && flag_pic
5195 && current_function_args_info.call_cookie
5196 && reg == PIC_OFFSET_TABLE_REGNUM)
5197 || (regs_ever_live[reg] && ! call_really_used_regs[reg])
5198 || (current_function_calls_eh_return
5199 && (reg == EH_RETURN_DATA_REGNO (0)
5200 || reg == EH_RETURN_DATA_REGNO (1)
5201 || reg == EH_RETURN_DATA_REGNO (2)
5202 || reg == EH_RETURN_DATA_REGNO (3)))
5203 || ((reg == MACL_REG || reg == MACH_REG)
5204 && regs_ever_live[reg]
5205 && sh_cfun_attr_renesas_p ())
5208 SET_HARD_REG_BIT (*live_regs_mask, reg);
5209 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
5211 if ((TARGET_SH4 || TARGET_SH2A_DOUBLE || TARGET_SH5) && TARGET_FMOVD
5212 && GET_MODE_CLASS (REGISTER_NATURAL_MODE (reg)) == MODE_FLOAT)
5214 if (FP_REGISTER_P (reg))
5216 if (! TARGET_FPU_SINGLE && ! regs_ever_live[reg ^ 1])
5218 SET_HARD_REG_BIT (*live_regs_mask, (reg ^ 1));
5219 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg ^ 1));
5222 else if (XD_REGISTER_P (reg))
5224 /* Must switch to double mode to access these registers. */
5225 target_flags &= ~FPU_SINGLE_BIT;
5230 /* If we have a target register optimization pass after prologue / epilogue
5231 threading, we need to assume all target registers will be live even if
5232 they aren't now. */
5233 if (flag_branch_target_load_optimize2
5234 && TARGET_SAVE_ALL_TARGET_REGS
5235 && shmedia_space_reserved_for_target_registers)
5236 for (reg = LAST_TARGET_REG; reg >= FIRST_TARGET_REG; reg--)
5237 if ((! call_really_used_regs[reg] || interrupt_handler)
5238 && ! TEST_HARD_REG_BIT (*live_regs_mask, reg))
5240 SET_HARD_REG_BIT (*live_regs_mask, reg);
5241 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
5243 /* If this is an interrupt handler, we don't have any call-clobbered
5244 registers we can conveniently use for target register save/restore.
5245 Make sure we save at least one general purpose register when we need
5246 to save target registers. */
5247 if (interrupt_handler
5248 && hard_regs_intersect_p (live_regs_mask,
5249 &reg_class_contents[TARGET_REGS])
5250 && ! hard_regs_intersect_p (live_regs_mask,
5251 &reg_class_contents[GENERAL_REGS]))
5253 SET_HARD_REG_BIT (*live_regs_mask, R0_REG);
5254 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (R0_REG));
5257 return count;
5260 /* Code to generate prologue and epilogue sequences */
5262 /* PUSHED is the number of bytes that are being pushed on the
5263 stack for register saves. Return the frame size, padded
5264 appropriately so that the stack stays properly aligned. */
5265 static HOST_WIDE_INT
5266 rounded_frame_size (int pushed)
5268 HOST_WIDE_INT size = get_frame_size ();
5269 HOST_WIDE_INT align = STACK_BOUNDARY / BITS_PER_UNIT;
5271 return ((size + pushed + align - 1) & -align) - pushed;
5274 /* Choose a call-clobbered target-branch register that remains
5275 unchanged along the whole function. We set it up as the return
5276 value in the prologue. */
5278 sh_media_register_for_return (void)
5280 int regno;
5281 int tr0_used;
5283 if (! current_function_is_leaf)
5284 return -1;
5285 if (lookup_attribute ("interrupt_handler",
5286 DECL_ATTRIBUTES (current_function_decl)))
5287 return -1;
5289 tr0_used = flag_pic && regs_ever_live[PIC_OFFSET_TABLE_REGNUM];
5291 for (regno = FIRST_TARGET_REG + tr0_used; regno <= LAST_TARGET_REG; regno++)
5292 if (call_really_used_regs[regno] && ! regs_ever_live[regno])
5293 return regno;
5295 return -1;
5298 /* The maximum registers we need to save are:
5299 - 62 general purpose registers (r15 is stack pointer, r63 is zero)
5300 - 32 floating point registers (for each pair, we save none,
5301 one single precision value, or a double precision value).
5302 - 8 target registers
5303 - add 1 entry for a delimiter. */
5304 #define MAX_SAVED_REGS (62+32+8)
5306 typedef struct save_entry_s
5308 unsigned char reg;
5309 unsigned char mode;
5310 short offset;
5311 } save_entry;
5313 #define MAX_TEMPS 4
5315 /* There will be a delimiter entry with VOIDmode both at the start and the
5316 end of a filled in schedule. The end delimiter has the offset of the
5317 save with the smallest (i.e. most negative) offset. */
5318 typedef struct save_schedule_s
5320 save_entry entries[MAX_SAVED_REGS + 2];
5321 int temps[MAX_TEMPS+1];
5322 } save_schedule;
5324 /* Fill in SCHEDULE according to LIVE_REGS_MASK. If RESTORE is nonzero,
5325 use reverse order. Returns the last entry written to (not counting
5326 the delimiter). OFFSET_BASE is a number to be added to all offset
5327 entries. */
5329 static save_entry *
5330 sh5_schedule_saves (HARD_REG_SET *live_regs_mask, save_schedule *schedule,
5331 int offset_base)
5333 int align, i;
5334 save_entry *entry = schedule->entries;
5335 int tmpx = 0;
5336 int offset;
5338 if (! current_function_interrupt)
5339 for (i = FIRST_GENERAL_REG; tmpx < MAX_TEMPS && i <= LAST_GENERAL_REG; i++)
5340 if (call_really_used_regs[i] && ! fixed_regs[i] && i != PR_MEDIA_REG
5341 && ! FUNCTION_ARG_REGNO_P (i)
5342 && i != FIRST_RET_REG
5343 && ! (cfun->static_chain_decl != NULL && i == STATIC_CHAIN_REGNUM)
5344 && ! (current_function_calls_eh_return
5345 && (i == EH_RETURN_STACKADJ_REGNO
5346 || ((unsigned) i >= EH_RETURN_DATA_REGNO (0)
5347 && (unsigned) i <= EH_RETURN_DATA_REGNO (3)))))
5348 schedule->temps[tmpx++] = i;
5349 entry->reg = -1;
5350 entry->mode = VOIDmode;
5351 entry->offset = offset_base;
5352 entry++;
5353 /* We loop twice: first, we save 8-byte aligned registers in the
5354 higher addresses, that are known to be aligned. Then, we
5355 proceed to saving 32-bit registers that don't need 8-byte
5356 alignment.
5357 If this is an interrupt function, all registers that need saving
5358 need to be saved in full. moreover, we need to postpone saving
5359 target registers till we have saved some general purpose registers
5360 we can then use as scratch registers. */
5361 offset = offset_base;
5362 for (align = 1; align >= 0; align--)
5364 for (i = FIRST_PSEUDO_REGISTER - 1; i >= 0; i--)
5365 if (TEST_HARD_REG_BIT (*live_regs_mask, i))
5367 enum machine_mode mode = REGISTER_NATURAL_MODE (i);
5368 int reg = i;
5370 if (current_function_interrupt)
5372 if (TARGET_REGISTER_P (i))
5373 continue;
5374 if (GENERAL_REGISTER_P (i))
5375 mode = DImode;
5377 if (mode == SFmode && (i % 2) == 1
5378 && ! TARGET_FPU_SINGLE && FP_REGISTER_P (i)
5379 && (TEST_HARD_REG_BIT (*live_regs_mask, (i ^ 1))))
5381 mode = DFmode;
5382 i--;
5383 reg--;
5386 /* If we're doing the aligned pass and this is not aligned,
5387 or we're doing the unaligned pass and this is aligned,
5388 skip it. */
5389 if ((GET_MODE_SIZE (mode) % (STACK_BOUNDARY / BITS_PER_UNIT) == 0)
5390 != align)
5391 continue;
5393 if (current_function_interrupt
5394 && GENERAL_REGISTER_P (i)
5395 && tmpx < MAX_TEMPS)
5396 schedule->temps[tmpx++] = i;
5398 offset -= GET_MODE_SIZE (mode);
5399 entry->reg = i;
5400 entry->mode = mode;
5401 entry->offset = offset;
5402 entry++;
5404 if (align && current_function_interrupt)
5405 for (i = LAST_TARGET_REG; i >= FIRST_TARGET_REG; i--)
5406 if (TEST_HARD_REG_BIT (*live_regs_mask, i))
5408 offset -= GET_MODE_SIZE (DImode);
5409 entry->reg = i;
5410 entry->mode = DImode;
5411 entry->offset = offset;
5412 entry++;
5415 entry->reg = -1;
5416 entry->mode = VOIDmode;
5417 entry->offset = offset;
5418 schedule->temps[tmpx] = -1;
5419 return entry - 1;
5422 void
5423 sh_expand_prologue (void)
5425 HARD_REG_SET live_regs_mask;
5426 int d, i;
5427 int d_rounding = 0;
5428 int save_flags = target_flags;
5429 int pretend_args;
5431 current_function_interrupt = sh_cfun_interrupt_handler_p ();
5433 /* We have pretend args if we had an object sent partially in registers
5434 and partially on the stack, e.g. a large structure. */
5435 pretend_args = current_function_pretend_args_size;
5436 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl)
5437 && (NPARM_REGS(SImode)
5438 > current_function_args_info.arg_count[(int) SH_ARG_INT]))
5439 pretend_args = 0;
5440 output_stack_adjust (-pretend_args
5441 - current_function_args_info.stack_regs * 8,
5442 stack_pointer_rtx, 0, NULL);
5444 if (TARGET_SHCOMPACT && flag_pic && current_function_args_info.call_cookie)
5445 /* We're going to use the PIC register to load the address of the
5446 incoming-argument decoder and/or of the return trampoline from
5447 the GOT, so make sure the PIC register is preserved and
5448 initialized. */
5449 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
5451 if (TARGET_SHCOMPACT
5452 && (current_function_args_info.call_cookie & ~ CALL_COOKIE_RET_TRAMP(1)))
5454 int reg;
5456 /* First, make all registers with incoming arguments that will
5457 be pushed onto the stack live, so that register renaming
5458 doesn't overwrite them. */
5459 for (reg = 0; reg < NPARM_REGS (SImode); reg++)
5460 if (CALL_COOKIE_STACKSEQ_GET (current_function_args_info.call_cookie)
5461 >= NPARM_REGS (SImode) - reg)
5462 for (; reg < NPARM_REGS (SImode); reg++)
5463 emit_insn (gen_shcompact_preserve_incoming_args
5464 (gen_rtx_REG (SImode, FIRST_PARM_REG + reg)));
5465 else if (CALL_COOKIE_INT_REG_GET
5466 (current_function_args_info.call_cookie, reg) == 1)
5467 emit_insn (gen_shcompact_preserve_incoming_args
5468 (gen_rtx_REG (SImode, FIRST_PARM_REG + reg)));
5470 emit_move_insn (gen_rtx_REG (Pmode, MACL_REG),
5471 stack_pointer_rtx);
5472 emit_move_insn (gen_rtx_REG (SImode, R0_REG),
5473 GEN_INT (current_function_args_info.call_cookie));
5474 emit_move_insn (gen_rtx_REG (SImode, MACH_REG),
5475 gen_rtx_REG (SImode, R0_REG));
5477 else if (TARGET_SHMEDIA)
5479 int tr = sh_media_register_for_return ();
5481 if (tr >= 0)
5483 rtx insn = emit_move_insn (gen_rtx_REG (DImode, tr),
5484 gen_rtx_REG (DImode, PR_MEDIA_REG));
5486 /* ??? We should suppress saving pr when we don't need it, but this
5487 is tricky because of builtin_return_address. */
5489 /* If this function only exits with sibcalls, this copy
5490 will be flagged as dead. */
5491 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD,
5492 const0_rtx,
5493 REG_NOTES (insn));
5497 /* Emit the code for SETUP_VARARGS. */
5498 if (current_function_stdarg)
5500 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl))
5502 /* Push arg regs as if they'd been provided by caller in stack. */
5503 for (i = 0; i < NPARM_REGS(SImode); i++)
5505 int rn = NPARM_REGS(SImode) + FIRST_PARM_REG - i - 1;
5506 rtx insn;
5508 if (i >= (NPARM_REGS(SImode)
5509 - current_function_args_info.arg_count[(int) SH_ARG_INT]
5511 break;
5512 insn = push (rn);
5513 RTX_FRAME_RELATED_P (insn) = 0;
5518 /* If we're supposed to switch stacks at function entry, do so now. */
5519 if (sp_switch)
5520 emit_insn (gen_sp_switch_1 ());
5522 d = calc_live_regs (&live_regs_mask);
5523 /* ??? Maybe we could save some switching if we can move a mode switch
5524 that already happens to be at the function start into the prologue. */
5525 if (target_flags != save_flags && ! current_function_interrupt)
5526 emit_insn (gen_toggle_sz ());
5528 if (TARGET_SH5)
5530 int offset_base, offset;
5531 rtx r0 = NULL_RTX;
5532 int offset_in_r0 = -1;
5533 int sp_in_r0 = 0;
5534 int tregs_space = shmedia_target_regs_stack_adjust (&live_regs_mask);
5535 int total_size, save_size;
5536 save_schedule schedule;
5537 save_entry *entry;
5538 int *tmp_pnt;
5540 if (call_really_used_regs[R0_REG] && ! fixed_regs[R0_REG]
5541 && ! current_function_interrupt)
5542 r0 = gen_rtx_REG (Pmode, R0_REG);
5544 /* D is the actual number of bytes that we need for saving registers,
5545 however, in initial_elimination_offset we have committed to using
5546 an additional TREGS_SPACE amount of bytes - in order to keep both
5547 addresses to arguments supplied by the caller and local variables
5548 valid, we must keep this gap. Place it between the incoming
5549 arguments and the actually saved registers in a bid to optimize
5550 locality of reference. */
5551 total_size = d + tregs_space;
5552 total_size += rounded_frame_size (total_size);
5553 save_size = total_size - rounded_frame_size (d);
5554 if (save_size % (STACK_BOUNDARY / BITS_PER_UNIT))
5555 d_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
5556 - save_size % (STACK_BOUNDARY / BITS_PER_UNIT));
5558 /* If adjusting the stack in a single step costs nothing extra, do so.
5559 I.e. either if a single addi is enough, or we need a movi anyway,
5560 and we don't exceed the maximum offset range (the test for the
5561 latter is conservative for simplicity). */
5562 if (TARGET_SHMEDIA
5563 && (CONST_OK_FOR_I10 (-total_size)
5564 || (! CONST_OK_FOR_I10 (-(save_size + d_rounding))
5565 && total_size <= 2044)))
5566 d_rounding = total_size - save_size;
5568 offset_base = d + d_rounding;
5570 output_stack_adjust (-(save_size + d_rounding), stack_pointer_rtx,
5571 0, NULL);
5573 sh5_schedule_saves (&live_regs_mask, &schedule, offset_base);
5574 tmp_pnt = schedule.temps;
5575 for (entry = &schedule.entries[1]; entry->mode != VOIDmode; entry++)
5577 enum machine_mode mode = entry->mode;
5578 unsigned int reg = entry->reg;
5579 rtx reg_rtx, mem_rtx, pre_dec = NULL_RTX;
5580 rtx orig_reg_rtx;
5582 offset = entry->offset;
5584 reg_rtx = gen_rtx_REG (mode, reg);
5586 mem_rtx = gen_rtx_MEM (mode,
5587 gen_rtx_PLUS (Pmode,
5588 stack_pointer_rtx,
5589 GEN_INT (offset)));
5591 GO_IF_LEGITIMATE_ADDRESS (mode, XEXP (mem_rtx, 0), try_pre_dec);
5593 if (! r0)
5594 abort ();
5595 mem_rtx = NULL_RTX;
5597 try_pre_dec:
5599 if (HAVE_PRE_DECREMENT
5600 && (offset_in_r0 - offset == GET_MODE_SIZE (mode)
5601 || mem_rtx == NULL_RTX
5602 || reg == PR_REG || SPECIAL_REGISTER_P (reg)))
5604 pre_dec = gen_rtx_MEM (mode,
5605 gen_rtx_PRE_DEC (Pmode, r0));
5607 GO_IF_LEGITIMATE_ADDRESS (mode, XEXP (pre_dec, 0),
5608 pre_dec_ok);
5610 pre_dec = NULL_RTX;
5612 break;
5614 pre_dec_ok:
5615 mem_rtx = NULL_RTX;
5616 offset += GET_MODE_SIZE (mode);
5618 while (0);
5620 if (mem_rtx != NULL_RTX)
5621 goto addr_ok;
5623 if (offset_in_r0 == -1)
5625 emit_move_insn (r0, GEN_INT (offset));
5626 offset_in_r0 = offset;
5628 else if (offset != offset_in_r0)
5630 emit_move_insn (r0,
5631 gen_rtx_PLUS
5632 (Pmode, r0,
5633 GEN_INT (offset - offset_in_r0)));
5634 offset_in_r0 += offset - offset_in_r0;
5637 if (pre_dec != NULL_RTX)
5639 if (! sp_in_r0)
5641 emit_move_insn (r0,
5642 gen_rtx_PLUS
5643 (Pmode, r0, stack_pointer_rtx));
5644 sp_in_r0 = 1;
5647 offset -= GET_MODE_SIZE (mode);
5648 offset_in_r0 -= GET_MODE_SIZE (mode);
5650 mem_rtx = pre_dec;
5652 else if (sp_in_r0)
5653 mem_rtx = gen_rtx_MEM (mode, r0);
5654 else
5655 mem_rtx = gen_rtx_MEM (mode,
5656 gen_rtx_PLUS (Pmode,
5657 stack_pointer_rtx,
5658 r0));
5660 /* We must not use an r0-based address for target-branch
5661 registers or for special registers without pre-dec
5662 memory addresses, since we store their values in r0
5663 first. */
5664 if (TARGET_REGISTER_P (reg)
5665 || ((reg == PR_REG || SPECIAL_REGISTER_P (reg))
5666 && mem_rtx != pre_dec))
5667 abort ();
5669 addr_ok:
5670 orig_reg_rtx = reg_rtx;
5671 if (TARGET_REGISTER_P (reg)
5672 || ((reg == PR_REG || SPECIAL_REGISTER_P (reg))
5673 && mem_rtx != pre_dec))
5675 rtx tmp_reg = gen_rtx_REG (GET_MODE (reg_rtx), *tmp_pnt);
5677 emit_move_insn (tmp_reg, reg_rtx);
5679 if (REGNO (tmp_reg) == R0_REG)
5681 offset_in_r0 = -1;
5682 sp_in_r0 = 0;
5683 if (refers_to_regno_p (R0_REG, R0_REG+1, mem_rtx, (rtx *) 0))
5684 abort ();
5687 if (*++tmp_pnt <= 0)
5688 tmp_pnt = schedule.temps;
5690 reg_rtx = tmp_reg;
5693 rtx insn;
5695 /* Mark as interesting for dwarf cfi generator */
5696 insn = emit_move_insn (mem_rtx, reg_rtx);
5697 RTX_FRAME_RELATED_P (insn) = 1;
5698 /* If we use an intermediate register for the save, we can't
5699 describe this exactly in cfi as a copy of the to-be-saved
5700 register into the temporary register and then the temporary
5701 register on the stack, because the temporary register can
5702 have a different natural size than the to-be-saved register.
5703 Thus, we gloss over the intermediate copy and pretend we do
5704 a direct save from the to-be-saved register. */
5705 if (REGNO (reg_rtx) != reg)
5707 rtx set, note_rtx;
5709 set = gen_rtx_SET (VOIDmode, mem_rtx, orig_reg_rtx);
5710 note_rtx = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, set,
5711 REG_NOTES (insn));
5712 REG_NOTES (insn) = note_rtx;
5715 if (TARGET_SHCOMPACT && (offset_in_r0 != -1))
5717 rtx reg_rtx = gen_rtx_REG (mode, reg);
5718 rtx set, note_rtx;
5719 rtx mem_rtx = gen_rtx_MEM (mode,
5720 gen_rtx_PLUS (Pmode,
5721 stack_pointer_rtx,
5722 GEN_INT (offset)));
5724 set = gen_rtx_SET (VOIDmode, mem_rtx, reg_rtx);
5725 note_rtx = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, set,
5726 REG_NOTES (insn));
5727 REG_NOTES (insn) = note_rtx;
5732 if (entry->offset != d_rounding)
5733 abort ();
5735 else
5736 push_regs (&live_regs_mask, current_function_interrupt);
5738 if (flag_pic && regs_ever_live[PIC_OFFSET_TABLE_REGNUM])
5740 rtx insn = get_last_insn ();
5741 rtx last = emit_insn (gen_GOTaddr2picreg ());
5743 /* Mark these insns as possibly dead. Sometimes, flow2 may
5744 delete all uses of the PIC register. In this case, let it
5745 delete the initialization too. */
5748 insn = NEXT_INSN (insn);
5750 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD,
5751 const0_rtx,
5752 REG_NOTES (insn));
5754 while (insn != last);
5757 if (SHMEDIA_REGS_STACK_ADJUST ())
5759 emit_move_insn (gen_rtx_REG (Pmode, R0_REG),
5760 function_symbol (TARGET_FPU_ANY
5761 ? "__GCC_push_shmedia_regs"
5762 : "__GCC_push_shmedia_regs_nofpu"));
5763 /* This must NOT go through the PLT, otherwise mach and macl
5764 may be clobbered. */
5765 emit_insn (gen_shmedia_save_restore_regs_compact
5766 (GEN_INT (-SHMEDIA_REGS_STACK_ADJUST ())));
5769 if (target_flags != save_flags && ! current_function_interrupt)
5771 rtx insn = emit_insn (gen_toggle_sz ());
5773 /* If we're lucky, a mode switch in the function body will
5774 overwrite fpscr, turning this insn dead. Tell flow this
5775 insn is ok to delete. */
5776 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD,
5777 const0_rtx,
5778 REG_NOTES (insn));
5781 target_flags = save_flags;
5783 output_stack_adjust (-rounded_frame_size (d) + d_rounding,
5784 stack_pointer_rtx, 0, NULL);
5786 if (frame_pointer_needed)
5787 frame_insn (GEN_MOV (frame_pointer_rtx, stack_pointer_rtx));
5789 if (TARGET_SHCOMPACT
5790 && (current_function_args_info.call_cookie & ~ CALL_COOKIE_RET_TRAMP(1)))
5792 /* This must NOT go through the PLT, otherwise mach and macl
5793 may be clobbered. */
5794 emit_move_insn (gen_rtx_REG (Pmode, R0_REG),
5795 function_symbol ("__GCC_shcompact_incoming_args"));
5796 emit_insn (gen_shcompact_incoming_args ());
5800 void
5801 sh_expand_epilogue (bool sibcall_p)
5803 HARD_REG_SET live_regs_mask;
5804 int d, i;
5805 int d_rounding = 0;
5807 int save_flags = target_flags;
5808 int frame_size, save_size;
5809 int fpscr_deferred = 0;
5810 int e = sibcall_p ? -1 : 1;
5812 d = calc_live_regs (&live_regs_mask);
5814 save_size = d;
5815 frame_size = rounded_frame_size (d);
5817 if (TARGET_SH5)
5819 int tregs_space = shmedia_target_regs_stack_adjust (&live_regs_mask);
5820 int total_size;
5821 if (d % (STACK_BOUNDARY / BITS_PER_UNIT))
5822 d_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
5823 - d % (STACK_BOUNDARY / BITS_PER_UNIT));
5825 total_size = d + tregs_space;
5826 total_size += rounded_frame_size (total_size);
5827 save_size = total_size - frame_size;
5829 /* If adjusting the stack in a single step costs nothing extra, do so.
5830 I.e. either if a single addi is enough, or we need a movi anyway,
5831 and we don't exceed the maximum offset range (the test for the
5832 latter is conservative for simplicity). */
5833 if (TARGET_SHMEDIA
5834 && ! frame_pointer_needed
5835 && (CONST_OK_FOR_I10 (total_size)
5836 || (! CONST_OK_FOR_I10 (save_size + d_rounding)
5837 && total_size <= 2044)))
5838 d_rounding = frame_size;
5840 frame_size -= d_rounding;
5843 if (frame_pointer_needed)
5845 /* We must avoid scheduling the epilogue with previous basic blocks
5846 when exception handling is enabled. See PR/18032. */
5847 if (flag_exceptions)
5848 emit_insn (gen_blockage ());
5849 output_stack_adjust (frame_size, frame_pointer_rtx, e, &live_regs_mask);
5851 /* We must avoid moving the stack pointer adjustment past code
5852 which reads from the local frame, else an interrupt could
5853 occur after the SP adjustment and clobber data in the local
5854 frame. */
5855 emit_insn (gen_blockage ());
5856 emit_insn (GEN_MOV (stack_pointer_rtx, frame_pointer_rtx));
5858 else if (frame_size)
5860 /* We must avoid moving the stack pointer adjustment past code
5861 which reads from the local frame, else an interrupt could
5862 occur after the SP adjustment and clobber data in the local
5863 frame. */
5864 emit_insn (gen_blockage ());
5865 output_stack_adjust (frame_size, stack_pointer_rtx, e, &live_regs_mask);
5868 if (SHMEDIA_REGS_STACK_ADJUST ())
5870 emit_move_insn (gen_rtx_REG (Pmode, R0_REG),
5871 function_symbol (TARGET_FPU_ANY
5872 ? "__GCC_pop_shmedia_regs"
5873 : "__GCC_pop_shmedia_regs_nofpu"));
5874 /* This must NOT go through the PLT, otherwise mach and macl
5875 may be clobbered. */
5876 emit_insn (gen_shmedia_save_restore_regs_compact
5877 (GEN_INT (SHMEDIA_REGS_STACK_ADJUST ())));
5880 /* Pop all the registers. */
5882 if (target_flags != save_flags && ! current_function_interrupt)
5883 emit_insn (gen_toggle_sz ());
5884 if (TARGET_SH5)
5886 int offset_base, offset;
5887 int offset_in_r0 = -1;
5888 int sp_in_r0 = 0;
5889 rtx r0 = gen_rtx_REG (Pmode, R0_REG);
5890 save_schedule schedule;
5891 save_entry *entry;
5892 int *tmp_pnt;
5894 entry = sh5_schedule_saves (&live_regs_mask, &schedule, d_rounding);
5895 offset_base = -entry[1].offset + d_rounding;
5896 tmp_pnt = schedule.temps;
5897 for (; entry->mode != VOIDmode; entry--)
5899 enum machine_mode mode = entry->mode;
5900 int reg = entry->reg;
5901 rtx reg_rtx, mem_rtx, post_inc = NULL_RTX, insn;
5903 offset = offset_base + entry->offset;
5904 reg_rtx = gen_rtx_REG (mode, reg);
5906 mem_rtx = gen_rtx_MEM (mode,
5907 gen_rtx_PLUS (Pmode,
5908 stack_pointer_rtx,
5909 GEN_INT (offset)));
5911 GO_IF_LEGITIMATE_ADDRESS (mode, XEXP (mem_rtx, 0), try_post_inc);
5913 mem_rtx = NULL_RTX;
5915 try_post_inc:
5917 if (HAVE_POST_INCREMENT
5918 && (offset == offset_in_r0
5919 || (offset + GET_MODE_SIZE (mode) != d + d_rounding
5920 && mem_rtx == NULL_RTX)
5921 || reg == PR_REG || SPECIAL_REGISTER_P (reg)))
5923 post_inc = gen_rtx_MEM (mode,
5924 gen_rtx_POST_INC (Pmode, r0));
5926 GO_IF_LEGITIMATE_ADDRESS (mode, XEXP (post_inc, 0),
5927 post_inc_ok);
5929 post_inc = NULL_RTX;
5931 break;
5933 post_inc_ok:
5934 mem_rtx = NULL_RTX;
5936 while (0);
5938 if (mem_rtx != NULL_RTX)
5939 goto addr_ok;
5941 if (offset_in_r0 == -1)
5943 emit_move_insn (r0, GEN_INT (offset));
5944 offset_in_r0 = offset;
5946 else if (offset != offset_in_r0)
5948 emit_move_insn (r0,
5949 gen_rtx_PLUS
5950 (Pmode, r0,
5951 GEN_INT (offset - offset_in_r0)));
5952 offset_in_r0 += offset - offset_in_r0;
5955 if (post_inc != NULL_RTX)
5957 if (! sp_in_r0)
5959 emit_move_insn (r0,
5960 gen_rtx_PLUS
5961 (Pmode, r0, stack_pointer_rtx));
5962 sp_in_r0 = 1;
5965 mem_rtx = post_inc;
5967 offset_in_r0 += GET_MODE_SIZE (mode);
5969 else if (sp_in_r0)
5970 mem_rtx = gen_rtx_MEM (mode, r0);
5971 else
5972 mem_rtx = gen_rtx_MEM (mode,
5973 gen_rtx_PLUS (Pmode,
5974 stack_pointer_rtx,
5975 r0));
5977 if ((reg == PR_REG || SPECIAL_REGISTER_P (reg))
5978 && mem_rtx != post_inc)
5979 abort ();
5981 addr_ok:
5982 if ((reg == PR_REG || SPECIAL_REGISTER_P (reg))
5983 && mem_rtx != post_inc)
5985 insn = emit_move_insn (r0, mem_rtx);
5986 mem_rtx = r0;
5988 else if (TARGET_REGISTER_P (reg))
5990 rtx tmp_reg = gen_rtx_REG (mode, *tmp_pnt);
5992 /* Give the scheduler a bit of freedom by using up to
5993 MAX_TEMPS registers in a round-robin fashion. */
5994 insn = emit_move_insn (tmp_reg, mem_rtx);
5995 mem_rtx = tmp_reg;
5996 if (*++tmp_pnt < 0)
5997 tmp_pnt = schedule.temps;
6000 insn = emit_move_insn (reg_rtx, mem_rtx);
6001 if (reg == PR_MEDIA_REG && sh_media_register_for_return () >= 0)
6002 /* This is dead, unless we return with a sibcall. */
6003 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD,
6004 const0_rtx,
6005 REG_NOTES (insn));
6008 if (entry->offset + offset_base != d + d_rounding)
6009 abort ();
6011 else /* ! TARGET_SH5 */
6013 save_size = 0;
6014 if (TEST_HARD_REG_BIT (live_regs_mask, PR_REG))
6015 pop (PR_REG);
6016 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
6018 int j = (FIRST_PSEUDO_REGISTER - 1) - i;
6020 if (j == FPSCR_REG && current_function_interrupt && TARGET_FMOVD
6021 && hard_regs_intersect_p (&live_regs_mask,
6022 &reg_class_contents[DF_REGS]))
6023 fpscr_deferred = 1;
6024 else if (j != PR_REG && TEST_HARD_REG_BIT (live_regs_mask, j))
6025 pop (j);
6026 if (j == FIRST_FP_REG && fpscr_deferred)
6027 pop (FPSCR_REG);
6031 if (target_flags != save_flags && ! current_function_interrupt)
6032 emit_insn (gen_toggle_sz ());
6033 target_flags = save_flags;
6035 output_stack_adjust (current_function_pretend_args_size
6036 + save_size + d_rounding
6037 + current_function_args_info.stack_regs * 8,
6038 stack_pointer_rtx, e, NULL);
6040 if (current_function_calls_eh_return)
6041 emit_insn (GEN_ADD3 (stack_pointer_rtx, stack_pointer_rtx,
6042 EH_RETURN_STACKADJ_RTX));
6044 /* Switch back to the normal stack if necessary. */
6045 if (sp_switch)
6046 emit_insn (gen_sp_switch_2 ());
6048 /* Tell flow the insn that pops PR isn't dead. */
6049 /* PR_REG will never be live in SHmedia mode, and we don't need to
6050 USE PR_MEDIA_REG, since it will be explicitly copied to TR0_REG
6051 by the return pattern. */
6052 if (TEST_HARD_REG_BIT (live_regs_mask, PR_REG))
6053 emit_insn (gen_rtx_USE (VOIDmode, gen_rtx_REG (SImode, PR_REG)));
6056 static int sh_need_epilogue_known = 0;
6059 sh_need_epilogue (void)
6061 if (! sh_need_epilogue_known)
6063 rtx epilogue;
6065 start_sequence ();
6066 sh_expand_epilogue (0);
6067 epilogue = get_insns ();
6068 end_sequence ();
6069 sh_need_epilogue_known = (epilogue == NULL ? -1 : 1);
6071 return sh_need_epilogue_known > 0;
6074 /* Emit code to change the current function's return address to RA.
6075 TEMP is available as a scratch register, if needed. */
6077 void
6078 sh_set_return_address (rtx ra, rtx tmp)
6080 HARD_REG_SET live_regs_mask;
6081 int d;
6082 int pr_reg = TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG;
6083 int pr_offset;
6085 d = calc_live_regs (&live_regs_mask);
6087 /* If pr_reg isn't life, we can set it (or the register given in
6088 sh_media_register_for_return) directly. */
6089 if (! TEST_HARD_REG_BIT (live_regs_mask, pr_reg))
6091 rtx rr;
6093 if (TARGET_SHMEDIA)
6095 int rr_regno = sh_media_register_for_return ();
6097 if (rr_regno < 0)
6098 rr_regno = pr_reg;
6100 rr = gen_rtx_REG (DImode, rr_regno);
6102 else
6103 rr = gen_rtx_REG (SImode, pr_reg);
6105 emit_insn (GEN_MOV (rr, ra));
6106 /* Tell flow the register for return isn't dead. */
6107 emit_insn (gen_rtx_USE (VOIDmode, rr));
6108 return;
6111 if (TARGET_SH5)
6113 int offset;
6114 save_schedule schedule;
6115 save_entry *entry;
6117 entry = sh5_schedule_saves (&live_regs_mask, &schedule, 0);
6118 offset = entry[1].offset;
6119 for (; entry->mode != VOIDmode; entry--)
6120 if (entry->reg == pr_reg)
6121 goto found;
6123 /* We can't find pr register. */
6124 abort ();
6126 found:
6127 offset = entry->offset - offset;
6128 pr_offset = (rounded_frame_size (d) + offset
6129 + SHMEDIA_REGS_STACK_ADJUST ());
6131 else
6132 pr_offset = rounded_frame_size (d);
6134 emit_insn (GEN_MOV (tmp, GEN_INT (pr_offset)));
6135 emit_insn (GEN_ADD3 (tmp, tmp, frame_pointer_rtx));
6137 tmp = gen_rtx_MEM (Pmode, tmp);
6138 emit_insn (GEN_MOV (tmp, ra));
6141 /* Clear variables at function end. */
6143 static void
6144 sh_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
6145 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
6147 trap_exit = pragma_interrupt = pragma_trapa = pragma_nosave_low_regs = 0;
6148 sh_need_epilogue_known = 0;
6149 sp_switch = NULL_RTX;
6152 static rtx
6153 sh_builtin_saveregs (void)
6155 /* First unnamed integer register. */
6156 int first_intreg = current_function_args_info.arg_count[(int) SH_ARG_INT];
6157 /* Number of integer registers we need to save. */
6158 int n_intregs = MAX (0, NPARM_REGS (SImode) - first_intreg);
6159 /* First unnamed SFmode float reg */
6160 int first_floatreg = current_function_args_info.arg_count[(int) SH_ARG_FLOAT];
6161 /* Number of SFmode float regs to save. */
6162 int n_floatregs = MAX (0, NPARM_REGS (SFmode) - first_floatreg);
6163 rtx regbuf, fpregs;
6164 int bufsize, regno;
6165 HOST_WIDE_INT alias_set;
6167 if (TARGET_SH5)
6169 if (n_intregs)
6171 int pushregs = n_intregs;
6173 while (pushregs < NPARM_REGS (SImode) - 1
6174 && (CALL_COOKIE_INT_REG_GET
6175 (current_function_args_info.call_cookie,
6176 NPARM_REGS (SImode) - pushregs)
6177 == 1))
6179 current_function_args_info.call_cookie
6180 &= ~ CALL_COOKIE_INT_REG (NPARM_REGS (SImode)
6181 - pushregs, 1);
6182 pushregs++;
6185 if (pushregs == NPARM_REGS (SImode))
6186 current_function_args_info.call_cookie
6187 |= (CALL_COOKIE_INT_REG (0, 1)
6188 | CALL_COOKIE_STACKSEQ (pushregs - 1));
6189 else
6190 current_function_args_info.call_cookie
6191 |= CALL_COOKIE_STACKSEQ (pushregs);
6193 current_function_pretend_args_size += 8 * n_intregs;
6195 if (TARGET_SHCOMPACT)
6196 return const0_rtx;
6199 if (! TARGET_SH2E && ! TARGET_SH4 && ! TARGET_SH5)
6201 error ("__builtin_saveregs not supported by this subtarget");
6202 return const0_rtx;
6205 if (TARGET_SHMEDIA)
6206 n_floatregs = 0;
6208 /* Allocate block of memory for the regs. */
6209 /* ??? If n_intregs + n_floatregs == 0, should we allocate at least 1 byte?
6210 Or can assign_stack_local accept a 0 SIZE argument? */
6211 bufsize = (n_intregs * UNITS_PER_WORD) + (n_floatregs * UNITS_PER_WORD);
6213 if (TARGET_SHMEDIA)
6214 regbuf = gen_rtx_MEM (BLKmode,
6215 gen_rtx_REG (Pmode, ARG_POINTER_REGNUM));
6216 else if (n_floatregs & 1)
6218 rtx addr;
6220 regbuf = assign_stack_local (BLKmode, bufsize + UNITS_PER_WORD, 0);
6221 addr = copy_to_mode_reg (Pmode, XEXP (regbuf, 0));
6222 emit_insn (gen_iorsi3 (addr, addr, GEN_INT (UNITS_PER_WORD)));
6223 regbuf = change_address (regbuf, BLKmode, addr);
6225 else
6226 regbuf = assign_stack_local (BLKmode, bufsize, 0);
6227 alias_set = get_varargs_alias_set ();
6228 set_mem_alias_set (regbuf, alias_set);
6230 /* Save int args.
6231 This is optimized to only save the regs that are necessary. Explicitly
6232 named args need not be saved. */
6233 if (n_intregs > 0)
6234 move_block_from_reg (BASE_ARG_REG (SImode) + first_intreg,
6235 adjust_address (regbuf, BLKmode,
6236 n_floatregs * UNITS_PER_WORD),
6237 n_intregs);
6239 if (TARGET_SHMEDIA)
6240 /* Return the address of the regbuf. */
6241 return XEXP (regbuf, 0);
6243 /* Save float args.
6244 This is optimized to only save the regs that are necessary. Explicitly
6245 named args need not be saved.
6246 We explicitly build a pointer to the buffer because it halves the insn
6247 count when not optimizing (otherwise the pointer is built for each reg
6248 saved).
6249 We emit the moves in reverse order so that we can use predecrement. */
6251 fpregs = gen_reg_rtx (Pmode);
6252 emit_move_insn (fpregs, XEXP (regbuf, 0));
6253 emit_insn (gen_addsi3 (fpregs, fpregs,
6254 GEN_INT (n_floatregs * UNITS_PER_WORD)));
6255 if (TARGET_SH4 || TARGET_SH2A_DOUBLE)
6257 rtx mem;
6258 for (regno = NPARM_REGS (DFmode) - 2; regno >= first_floatreg; regno -= 2)
6260 emit_insn (gen_addsi3 (fpregs, fpregs,
6261 GEN_INT (-2 * UNITS_PER_WORD)));
6262 mem = gen_rtx_MEM (DFmode, fpregs);
6263 set_mem_alias_set (mem, alias_set);
6264 emit_move_insn (mem,
6265 gen_rtx_REG (DFmode, BASE_ARG_REG (DFmode) + regno));
6267 regno = first_floatreg;
6268 if (regno & 1)
6270 emit_insn (gen_addsi3 (fpregs, fpregs, GEN_INT (-UNITS_PER_WORD)));
6271 mem = gen_rtx_MEM (SFmode, fpregs);
6272 set_mem_alias_set (mem, alias_set);
6273 emit_move_insn (mem,
6274 gen_rtx_REG (SFmode, BASE_ARG_REG (SFmode) + regno
6275 - (TARGET_LITTLE_ENDIAN != 0)));
6278 else
6279 for (regno = NPARM_REGS (SFmode) - 1; regno >= first_floatreg; regno--)
6281 rtx mem;
6283 emit_insn (gen_addsi3 (fpregs, fpregs, GEN_INT (-UNITS_PER_WORD)));
6284 mem = gen_rtx_MEM (SFmode, fpregs);
6285 set_mem_alias_set (mem, alias_set);
6286 emit_move_insn (mem,
6287 gen_rtx_REG (SFmode, BASE_ARG_REG (SFmode) + regno));
6290 /* Return the address of the regbuf. */
6291 return XEXP (regbuf, 0);
6294 /* Define the `__builtin_va_list' type for the ABI. */
6296 static tree
6297 sh_build_builtin_va_list (void)
6299 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
6300 tree record;
6302 if (TARGET_SH5 || (! TARGET_SH2E && ! TARGET_SH4)
6303 || TARGET_HITACHI || sh_cfun_attr_renesas_p ())
6304 return ptr_type_node;
6306 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
6308 f_next_o = build_decl (FIELD_DECL, get_identifier ("__va_next_o"),
6309 ptr_type_node);
6310 f_next_o_limit = build_decl (FIELD_DECL,
6311 get_identifier ("__va_next_o_limit"),
6312 ptr_type_node);
6313 f_next_fp = build_decl (FIELD_DECL, get_identifier ("__va_next_fp"),
6314 ptr_type_node);
6315 f_next_fp_limit = build_decl (FIELD_DECL,
6316 get_identifier ("__va_next_fp_limit"),
6317 ptr_type_node);
6318 f_next_stack = build_decl (FIELD_DECL, get_identifier ("__va_next_stack"),
6319 ptr_type_node);
6321 DECL_FIELD_CONTEXT (f_next_o) = record;
6322 DECL_FIELD_CONTEXT (f_next_o_limit) = record;
6323 DECL_FIELD_CONTEXT (f_next_fp) = record;
6324 DECL_FIELD_CONTEXT (f_next_fp_limit) = record;
6325 DECL_FIELD_CONTEXT (f_next_stack) = record;
6327 TYPE_FIELDS (record) = f_next_o;
6328 TREE_CHAIN (f_next_o) = f_next_o_limit;
6329 TREE_CHAIN (f_next_o_limit) = f_next_fp;
6330 TREE_CHAIN (f_next_fp) = f_next_fp_limit;
6331 TREE_CHAIN (f_next_fp_limit) = f_next_stack;
6333 layout_type (record);
6335 return record;
6338 /* Implement `va_start' for varargs and stdarg. */
6340 void
6341 sh_va_start (tree valist, rtx nextarg)
6343 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
6344 tree next_o, next_o_limit, next_fp, next_fp_limit, next_stack;
6345 tree t, u;
6346 int nfp, nint;
6348 if (TARGET_SH5)
6350 expand_builtin_saveregs ();
6351 std_expand_builtin_va_start (valist, nextarg);
6352 return;
6355 if ((! TARGET_SH2E && ! TARGET_SH4)
6356 || TARGET_HITACHI || sh_cfun_attr_renesas_p ())
6358 std_expand_builtin_va_start (valist, nextarg);
6359 return;
6362 f_next_o = TYPE_FIELDS (va_list_type_node);
6363 f_next_o_limit = TREE_CHAIN (f_next_o);
6364 f_next_fp = TREE_CHAIN (f_next_o_limit);
6365 f_next_fp_limit = TREE_CHAIN (f_next_fp);
6366 f_next_stack = TREE_CHAIN (f_next_fp_limit);
6368 next_o = build (COMPONENT_REF, TREE_TYPE (f_next_o), valist, f_next_o,
6369 NULL_TREE);
6370 next_o_limit = build (COMPONENT_REF, TREE_TYPE (f_next_o_limit),
6371 valist, f_next_o_limit, NULL_TREE);
6372 next_fp = build (COMPONENT_REF, TREE_TYPE (f_next_fp), valist, f_next_fp,
6373 NULL_TREE);
6374 next_fp_limit = build (COMPONENT_REF, TREE_TYPE (f_next_fp_limit),
6375 valist, f_next_fp_limit, NULL_TREE);
6376 next_stack = build (COMPONENT_REF, TREE_TYPE (f_next_stack),
6377 valist, f_next_stack, NULL_TREE);
6379 /* Call __builtin_saveregs. */
6380 u = make_tree (ptr_type_node, expand_builtin_saveregs ());
6381 t = build (MODIFY_EXPR, ptr_type_node, next_fp, u);
6382 TREE_SIDE_EFFECTS (t) = 1;
6383 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6385 nfp = current_function_args_info.arg_count[SH_ARG_FLOAT];
6386 if (nfp < 8)
6387 nfp = 8 - nfp;
6388 else
6389 nfp = 0;
6390 u = fold (build (PLUS_EXPR, ptr_type_node, u,
6391 build_int_cst (NULL_TREE, UNITS_PER_WORD * nfp)));
6392 t = build (MODIFY_EXPR, ptr_type_node, next_fp_limit, u);
6393 TREE_SIDE_EFFECTS (t) = 1;
6394 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6396 t = build (MODIFY_EXPR, ptr_type_node, next_o, u);
6397 TREE_SIDE_EFFECTS (t) = 1;
6398 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6400 nint = current_function_args_info.arg_count[SH_ARG_INT];
6401 if (nint < 4)
6402 nint = 4 - nint;
6403 else
6404 nint = 0;
6405 u = fold (build (PLUS_EXPR, ptr_type_node, u,
6406 build_int_cst (NULL_TREE, UNITS_PER_WORD * nint)));
6407 t = build (MODIFY_EXPR, ptr_type_node, next_o_limit, u);
6408 TREE_SIDE_EFFECTS (t) = 1;
6409 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6411 u = make_tree (ptr_type_node, nextarg);
6412 t = build (MODIFY_EXPR, ptr_type_node, next_stack, u);
6413 TREE_SIDE_EFFECTS (t) = 1;
6414 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6417 /* Implement `va_arg'. */
6419 static tree
6420 sh_gimplify_va_arg_expr (tree valist, tree type, tree *pre_p,
6421 tree *post_p ATTRIBUTE_UNUSED)
6423 HOST_WIDE_INT size, rsize;
6424 tree tmp, pptr_type_node;
6425 tree addr, lab_over = NULL, result = NULL;
6426 int pass_by_ref = targetm.calls.must_pass_in_stack (TYPE_MODE (type), type);
6428 if (pass_by_ref)
6429 type = build_pointer_type (type);
6431 size = int_size_in_bytes (type);
6432 rsize = (size + UNITS_PER_WORD - 1) & -UNITS_PER_WORD;
6433 pptr_type_node = build_pointer_type (ptr_type_node);
6435 if (! TARGET_SH5 && (TARGET_SH2E || TARGET_SH4)
6436 && ! (TARGET_HITACHI || sh_cfun_attr_renesas_p ()))
6438 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
6439 tree next_o, next_o_limit, next_fp, next_fp_limit, next_stack;
6440 int pass_as_float;
6441 tree lab_false;
6443 f_next_o = TYPE_FIELDS (va_list_type_node);
6444 f_next_o_limit = TREE_CHAIN (f_next_o);
6445 f_next_fp = TREE_CHAIN (f_next_o_limit);
6446 f_next_fp_limit = TREE_CHAIN (f_next_fp);
6447 f_next_stack = TREE_CHAIN (f_next_fp_limit);
6449 next_o = build (COMPONENT_REF, TREE_TYPE (f_next_o), valist, f_next_o,
6450 NULL_TREE);
6451 next_o_limit = build (COMPONENT_REF, TREE_TYPE (f_next_o_limit),
6452 valist, f_next_o_limit, NULL_TREE);
6453 next_fp = build (COMPONENT_REF, TREE_TYPE (f_next_fp),
6454 valist, f_next_fp, NULL_TREE);
6455 next_fp_limit = build (COMPONENT_REF, TREE_TYPE (f_next_fp_limit),
6456 valist, f_next_fp_limit, NULL_TREE);
6457 next_stack = build (COMPONENT_REF, TREE_TYPE (f_next_stack),
6458 valist, f_next_stack, NULL_TREE);
6460 /* Structures with a single member with a distinct mode are passed
6461 like their member. This is relevant if the latter has a REAL_TYPE
6462 or COMPLEX_TYPE type. */
6463 if (TREE_CODE (type) == RECORD_TYPE
6464 && TYPE_FIELDS (type)
6465 && TREE_CODE (TYPE_FIELDS (type)) == FIELD_DECL
6466 && (TREE_CODE (TREE_TYPE (TYPE_FIELDS (type))) == REAL_TYPE
6467 || TREE_CODE (TREE_TYPE (TYPE_FIELDS (type))) == COMPLEX_TYPE)
6468 && TREE_CHAIN (TYPE_FIELDS (type)) == NULL_TREE)
6469 type = TREE_TYPE (TYPE_FIELDS (type));
6471 if (TARGET_SH4)
6473 pass_as_float = ((TREE_CODE (type) == REAL_TYPE && size <= 8)
6474 || (TREE_CODE (type) == COMPLEX_TYPE
6475 && TREE_CODE (TREE_TYPE (type)) == REAL_TYPE
6476 && size <= 16));
6478 else
6480 pass_as_float = (TREE_CODE (type) == REAL_TYPE && size == 4);
6483 addr = create_tmp_var (pptr_type_node, NULL);
6484 lab_false = create_artificial_label ();
6485 lab_over = create_artificial_label ();
6487 valist = build1 (INDIRECT_REF, ptr_type_node, addr);
6489 if (pass_as_float)
6491 int first_floatreg
6492 = current_function_args_info.arg_count[(int) SH_ARG_FLOAT];
6493 int n_floatregs = MAX (0, NPARM_REGS (SFmode) - first_floatreg);
6495 tmp = build (GE_EXPR, boolean_type_node, next_fp, next_fp_limit);
6496 tmp = build (COND_EXPR, void_type_node, tmp,
6497 build (GOTO_EXPR, void_type_node, lab_false),
6498 NULL);
6499 gimplify_and_add (tmp, pre_p);
6501 if (TYPE_ALIGN (type) > BITS_PER_WORD
6502 || (((TREE_CODE (type) == REAL_TYPE && size == 8) || size == 16)
6503 && (n_floatregs & 1)))
6505 tmp = fold_convert (ptr_type_node, size_int (UNITS_PER_WORD));
6506 tmp = build (BIT_AND_EXPR, ptr_type_node, next_fp, tmp);
6507 tmp = build (PLUS_EXPR, ptr_type_node, next_fp, tmp);
6508 tmp = build (MODIFY_EXPR, ptr_type_node, next_fp, tmp);
6509 gimplify_and_add (tmp, pre_p);
6512 tmp = build1 (ADDR_EXPR, pptr_type_node, next_fp);
6513 tmp = build (MODIFY_EXPR, void_type_node, addr, tmp);
6514 gimplify_and_add (tmp, pre_p);
6516 #ifdef FUNCTION_ARG_SCmode_WART
6517 if (TYPE_MODE (type) == SCmode && TARGET_SH4 && TARGET_LITTLE_ENDIAN)
6519 tree subtype = TREE_TYPE (type);
6520 tree real, imag;
6522 imag = std_gimplify_va_arg_expr (valist, subtype, pre_p, NULL);
6523 imag = get_initialized_tmp_var (imag, pre_p, NULL);
6525 real = std_gimplify_va_arg_expr (valist, subtype, pre_p, NULL);
6526 real = get_initialized_tmp_var (real, pre_p, NULL);
6528 result = build (COMPLEX_EXPR, type, real, imag);
6529 result = get_initialized_tmp_var (result, pre_p, NULL);
6531 #endif /* FUNCTION_ARG_SCmode_WART */
6533 tmp = build (GOTO_EXPR, void_type_node, lab_over);
6534 gimplify_and_add (tmp, pre_p);
6536 tmp = build (LABEL_EXPR, void_type_node, lab_false);
6537 gimplify_and_add (tmp, pre_p);
6539 tmp = build1 (ADDR_EXPR, pptr_type_node, next_stack);
6540 tmp = build (MODIFY_EXPR, void_type_node, addr, tmp);
6541 gimplify_and_add (tmp, pre_p);
6543 else
6545 tmp = fold_convert (ptr_type_node, size_int (rsize));
6546 tmp = build (PLUS_EXPR, ptr_type_node, next_o, tmp);
6547 tmp = build (GT_EXPR, boolean_type_node, tmp, next_o_limit);
6548 tmp = build (COND_EXPR, void_type_node, tmp,
6549 build (GOTO_EXPR, void_type_node, lab_false),
6550 NULL);
6551 gimplify_and_add (tmp, pre_p);
6553 tmp = build1 (ADDR_EXPR, pptr_type_node, next_o);
6554 tmp = build (MODIFY_EXPR, void_type_node, addr, tmp);
6555 gimplify_and_add (tmp, pre_p);
6557 tmp = build (GOTO_EXPR, void_type_node, lab_over);
6558 gimplify_and_add (tmp, pre_p);
6560 tmp = build (LABEL_EXPR, void_type_node, lab_false);
6561 gimplify_and_add (tmp, pre_p);
6563 if (size > 4 && ! TARGET_SH4)
6565 tmp = build (MODIFY_EXPR, ptr_type_node, next_o, next_o_limit);
6566 gimplify_and_add (tmp, pre_p);
6569 tmp = build1 (ADDR_EXPR, pptr_type_node, next_stack);
6570 tmp = build (MODIFY_EXPR, void_type_node, addr, tmp);
6571 gimplify_and_add (tmp, pre_p);
6574 if (!result)
6576 tmp = build (LABEL_EXPR, void_type_node, lab_over);
6577 gimplify_and_add (tmp, pre_p);
6581 /* ??? In va-sh.h, there had been code to make values larger than
6582 size 8 indirect. This does not match the FUNCTION_ARG macros. */
6584 tmp = std_gimplify_va_arg_expr (valist, type, pre_p, NULL);
6585 if (result)
6587 tmp = build (MODIFY_EXPR, void_type_node, result, tmp);
6588 gimplify_and_add (tmp, pre_p);
6590 tmp = build (LABEL_EXPR, void_type_node, lab_over);
6591 gimplify_and_add (tmp, pre_p);
6593 else
6594 result = tmp;
6596 if (pass_by_ref)
6597 result = build_fold_indirect_ref (result);
6599 return result;
6602 bool
6603 sh_promote_prototypes (tree type)
6605 if (TARGET_HITACHI)
6606 return 0;
6607 if (! type)
6608 return 1;
6609 return ! sh_attr_renesas_p (type);
6612 /* Whether an argument must be passed by reference. On SHcompact, we
6613 pretend arguments wider than 32-bits that would have been passed in
6614 registers are passed by reference, so that an SHmedia trampoline
6615 loads them into the full 64-bits registers. */
6617 static int
6618 shcompact_byref (CUMULATIVE_ARGS *cum, enum machine_mode mode,
6619 tree type, bool named)
6621 unsigned HOST_WIDE_INT size;
6623 if (type)
6624 size = int_size_in_bytes (type);
6625 else
6626 size = GET_MODE_SIZE (mode);
6628 if (cum->arg_count[SH_ARG_INT] < NPARM_REGS (SImode)
6629 && (!named
6630 || GET_SH_ARG_CLASS (mode) == SH_ARG_INT
6631 || (GET_SH_ARG_CLASS (mode) == SH_ARG_FLOAT
6632 && cum->arg_count[SH_ARG_FLOAT] >= NPARM_REGS (SFmode)))
6633 && size > 4
6634 && !SHCOMPACT_FORCE_ON_STACK (mode, type)
6635 && !SH5_WOULD_BE_PARTIAL_NREGS (*cum, mode, type, named))
6636 return size;
6637 else
6638 return 0;
6641 static bool
6642 sh_pass_by_reference (CUMULATIVE_ARGS *cum, enum machine_mode mode,
6643 tree type, bool named)
6645 if (targetm.calls.must_pass_in_stack (mode, type))
6646 return true;
6648 /* ??? std_gimplify_va_arg_expr passes NULL for cum. That function
6649 wants to know about pass-by-reference semantics for incoming
6650 arguments. */
6651 if (! cum)
6652 return false;
6654 if (TARGET_SHCOMPACT)
6656 cum->byref = shcompact_byref (cum, mode, type, named);
6657 return cum->byref != 0;
6660 return false;
6663 static bool
6664 sh_callee_copies (CUMULATIVE_ARGS *cum, enum machine_mode mode,
6665 tree type, bool named ATTRIBUTE_UNUSED)
6667 /* ??? How can it possibly be correct to return true only on the
6668 caller side of the equation? Is there someplace else in the
6669 sh backend that's magically producing the copies? */
6670 return (cum->outgoing
6671 && ((mode == BLKmode ? TYPE_ALIGN (type) : GET_MODE_ALIGNMENT (mode))
6672 % SH_MIN_ALIGN_FOR_CALLEE_COPY == 0));
6675 static int
6676 sh_arg_partial_bytes (CUMULATIVE_ARGS *cum, enum machine_mode mode,
6677 tree type, bool named ATTRIBUTE_UNUSED)
6679 int words = 0;
6681 if (!TARGET_SH5
6682 && PASS_IN_REG_P (*cum, mode, type)
6683 && !(TARGET_SH4 || TARGET_SH2A_DOUBLE)
6684 && (ROUND_REG (*cum, mode)
6685 + (mode != BLKmode
6686 ? ROUND_ADVANCE (GET_MODE_SIZE (mode))
6687 : ROUND_ADVANCE (int_size_in_bytes (type)))
6688 > NPARM_REGS (mode)))
6689 words = NPARM_REGS (mode) - ROUND_REG (*cum, mode);
6691 else if (!TARGET_SHCOMPACT
6692 && SH5_WOULD_BE_PARTIAL_NREGS (*cum, mode, type, named))
6693 words = NPARM_REGS (SImode) - cum->arg_count[SH_ARG_INT];
6695 return words * UNITS_PER_WORD;
6699 /* Define where to put the arguments to a function.
6700 Value is zero to push the argument on the stack,
6701 or a hard register in which to store the argument.
6703 MODE is the argument's machine mode.
6704 TYPE is the data type of the argument (as a tree).
6705 This is null for libcalls where that information may
6706 not be available.
6707 CUM is a variable of type CUMULATIVE_ARGS which gives info about
6708 the preceding args and about the function being called.
6709 NAMED is nonzero if this argument is a named parameter
6710 (otherwise it is an extra parameter matching an ellipsis).
6712 On SH the first args are normally in registers
6713 and the rest are pushed. Any arg that starts within the first
6714 NPARM_REGS words is at least partially passed in a register unless
6715 its data type forbids. */
6719 sh_function_arg (CUMULATIVE_ARGS *ca, enum machine_mode mode,
6720 tree type, int named)
6722 if (! TARGET_SH5 && mode == VOIDmode)
6723 return GEN_INT (ca->renesas_abi ? 1 : 0);
6725 if (! TARGET_SH5
6726 && PASS_IN_REG_P (*ca, mode, type)
6727 && (named || ! (TARGET_HITACHI || ca->renesas_abi)))
6729 int regno;
6731 if (mode == SCmode && TARGET_SH4 && TARGET_LITTLE_ENDIAN
6732 && (! FUNCTION_ARG_SCmode_WART || (ROUND_REG (*ca, mode) & 1)))
6734 rtx r1 = gen_rtx_EXPR_LIST (VOIDmode,
6735 gen_rtx_REG (SFmode,
6736 BASE_ARG_REG (mode)
6737 + (ROUND_REG (*ca, mode) ^ 1)),
6738 const0_rtx);
6739 rtx r2 = gen_rtx_EXPR_LIST (VOIDmode,
6740 gen_rtx_REG (SFmode,
6741 BASE_ARG_REG (mode)
6742 + ((ROUND_REG (*ca, mode) + 1) ^ 1)),
6743 GEN_INT (4));
6744 return gen_rtx_PARALLEL(SCmode, gen_rtvec(2, r1, r2));
6747 /* If the alignment of a DF value causes an SF register to be
6748 skipped, we will use that skipped register for the next SF
6749 value. */
6750 if ((TARGET_HITACHI || ca->renesas_abi)
6751 && ca->free_single_fp_reg
6752 && mode == SFmode)
6753 return gen_rtx_REG (mode, ca->free_single_fp_reg);
6755 regno = (BASE_ARG_REG (mode) + ROUND_REG (*ca, mode))
6756 ^ (mode == SFmode && TARGET_SH4
6757 && TARGET_LITTLE_ENDIAN != 0
6758 && ! TARGET_HITACHI && ! ca->renesas_abi);
6759 return gen_rtx_REG (mode, regno);
6763 if (TARGET_SH5)
6765 if (mode == VOIDmode && TARGET_SHCOMPACT)
6766 return GEN_INT (ca->call_cookie);
6768 /* The following test assumes unnamed arguments are promoted to
6769 DFmode. */
6770 if (mode == SFmode && ca->free_single_fp_reg)
6771 return SH5_PROTOTYPED_FLOAT_ARG (*ca, mode, ca->free_single_fp_reg);
6773 if ((GET_SH_ARG_CLASS (mode) == SH_ARG_FLOAT)
6774 && (named || ! ca->prototype_p)
6775 && ca->arg_count[(int) SH_ARG_FLOAT] < NPARM_REGS (SFmode))
6777 if (! ca->prototype_p && TARGET_SHMEDIA)
6778 return SH5_PROTOTYPELESS_FLOAT_ARG (*ca, mode);
6780 return SH5_PROTOTYPED_FLOAT_ARG (*ca, mode,
6781 FIRST_FP_PARM_REG
6782 + ca->arg_count[(int) SH_ARG_FLOAT]);
6785 if (ca->arg_count[(int) SH_ARG_INT] < NPARM_REGS (SImode)
6786 && (! TARGET_SHCOMPACT
6787 || (! SHCOMPACT_FORCE_ON_STACK (mode, type)
6788 && ! SH5_WOULD_BE_PARTIAL_NREGS (*ca, mode,
6789 type, named))))
6791 return gen_rtx_REG (mode, (FIRST_PARM_REG
6792 + ca->arg_count[(int) SH_ARG_INT]));
6795 return 0;
6798 return 0;
6801 /* Update the data in CUM to advance over an argument
6802 of mode MODE and data type TYPE.
6803 (TYPE is null for libcalls where that information may not be
6804 available.) */
6806 void
6807 sh_function_arg_advance (CUMULATIVE_ARGS *ca, enum machine_mode mode,
6808 tree type, int named)
6810 if (ca->force_mem)
6811 ca->force_mem = 0;
6812 else if (TARGET_SH5)
6814 tree type2 = (ca->byref && type
6815 ? TREE_TYPE (type)
6816 : type);
6817 enum machine_mode mode2 = (ca->byref && type
6818 ? TYPE_MODE (type2)
6819 : mode);
6820 int dwords = ((ca->byref
6821 ? ca->byref
6822 : mode2 == BLKmode
6823 ? int_size_in_bytes (type2)
6824 : GET_MODE_SIZE (mode2)) + 7) / 8;
6825 int numregs = MIN (dwords, NPARM_REGS (SImode)
6826 - ca->arg_count[(int) SH_ARG_INT]);
6828 if (numregs)
6830 ca->arg_count[(int) SH_ARG_INT] += numregs;
6831 if (TARGET_SHCOMPACT
6832 && SHCOMPACT_FORCE_ON_STACK (mode2, type2))
6834 ca->call_cookie
6835 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
6836 - numregs, 1);
6837 /* N.B. We want this also for outgoing. */
6838 ca->stack_regs += numregs;
6840 else if (ca->byref)
6842 if (! ca->outgoing)
6843 ca->stack_regs += numregs;
6844 ca->byref_regs += numregs;
6845 ca->byref = 0;
6847 ca->call_cookie
6848 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
6849 - numregs, 2);
6850 while (--numregs);
6851 ca->call_cookie
6852 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
6853 - 1, 1);
6855 else if (dwords > numregs)
6857 int pushregs = numregs;
6859 if (TARGET_SHCOMPACT)
6860 ca->stack_regs += numregs;
6861 while (pushregs < NPARM_REGS (SImode) - 1
6862 && (CALL_COOKIE_INT_REG_GET
6863 (ca->call_cookie,
6864 NPARM_REGS (SImode) - pushregs)
6865 == 1))
6867 ca->call_cookie
6868 &= ~ CALL_COOKIE_INT_REG (NPARM_REGS (SImode)
6869 - pushregs, 1);
6870 pushregs++;
6872 if (numregs == NPARM_REGS (SImode))
6873 ca->call_cookie
6874 |= CALL_COOKIE_INT_REG (0, 1)
6875 | CALL_COOKIE_STACKSEQ (numregs - 1);
6876 else
6877 ca->call_cookie
6878 |= CALL_COOKIE_STACKSEQ (numregs);
6881 if (GET_SH_ARG_CLASS (mode2) == SH_ARG_FLOAT
6882 && (named || ! ca->prototype_p))
6884 if (mode2 == SFmode && ca->free_single_fp_reg)
6885 ca->free_single_fp_reg = 0;
6886 else if (ca->arg_count[(int) SH_ARG_FLOAT]
6887 < NPARM_REGS (SFmode))
6889 int numfpregs
6890 = MIN ((GET_MODE_SIZE (mode2) + 7) / 8 * 2,
6891 NPARM_REGS (SFmode)
6892 - ca->arg_count[(int) SH_ARG_FLOAT]);
6894 ca->arg_count[(int) SH_ARG_FLOAT] += numfpregs;
6896 if (TARGET_SHCOMPACT && ! ca->prototype_p)
6898 if (ca->outgoing && numregs > 0)
6901 ca->call_cookie
6902 |= (CALL_COOKIE_INT_REG
6903 (ca->arg_count[(int) SH_ARG_INT]
6904 - numregs + ((numfpregs - 2) / 2),
6905 4 + (ca->arg_count[(int) SH_ARG_FLOAT]
6906 - numfpregs) / 2));
6908 while (numfpregs -= 2);
6910 else if (mode2 == SFmode && (named)
6911 && (ca->arg_count[(int) SH_ARG_FLOAT]
6912 < NPARM_REGS (SFmode)))
6913 ca->free_single_fp_reg
6914 = FIRST_FP_PARM_REG - numfpregs
6915 + ca->arg_count[(int) SH_ARG_FLOAT] + 1;
6918 return;
6921 if ((TARGET_HITACHI || ca->renesas_abi) && TARGET_FPU_DOUBLE)
6923 /* Note that we've used the skipped register. */
6924 if (mode == SFmode && ca->free_single_fp_reg)
6926 ca->free_single_fp_reg = 0;
6927 return;
6929 /* When we have a DF after an SF, there's an SF register that get
6930 skipped in order to align the DF value. We note this skipped
6931 register, because the next SF value will use it, and not the
6932 SF that follows the DF. */
6933 if (mode == DFmode
6934 && ROUND_REG (*ca, DFmode) != ROUND_REG (*ca, SFmode))
6936 ca->free_single_fp_reg = (ROUND_REG (*ca, SFmode)
6937 + BASE_ARG_REG (mode));
6941 if (! ((TARGET_SH4 || TARGET_SH2A) || ca->renesas_abi)
6942 || PASS_IN_REG_P (*ca, mode, type))
6943 (ca->arg_count[(int) GET_SH_ARG_CLASS (mode)]
6944 = (ROUND_REG (*ca, mode)
6945 + (mode == BLKmode
6946 ? ROUND_ADVANCE (int_size_in_bytes (type))
6947 : ROUND_ADVANCE (GET_MODE_SIZE (mode)))));
6950 /* The Renesas calling convention doesn't quite fit into this scheme since
6951 the address is passed like an invisible argument, but one that is always
6952 passed in memory. */
6953 static rtx
6954 sh_struct_value_rtx (tree fndecl, int incoming ATTRIBUTE_UNUSED)
6956 if (TARGET_HITACHI || sh_attr_renesas_p (fndecl))
6957 return 0;
6958 return gen_rtx_REG (Pmode, 2);
6961 /* Worker function for TARGET_RETURN_IN_MEMORY. */
6963 static bool
6964 sh_return_in_memory (tree type, tree fndecl)
6966 if (TARGET_SH5)
6968 if (TYPE_MODE (type) == BLKmode)
6969 return ((unsigned HOST_WIDE_INT) int_size_in_bytes (type)) > 8;
6970 else
6971 return GET_MODE_SIZE (TYPE_MODE (type)) > 8;
6973 else
6975 return (TYPE_MODE (type) == BLKmode
6976 || ((TARGET_HITACHI || sh_attr_renesas_p (fndecl))
6977 && TREE_CODE (type) == RECORD_TYPE));
6981 /* We actually emit the code in sh_expand_prologue. We used to use
6982 a static variable to flag that we need to emit this code, but that
6983 doesn't when inlining, when functions are deferred and then emitted
6984 later. Fortunately, we already have two flags that are part of struct
6985 function that tell if a function uses varargs or stdarg. */
6986 static void
6987 sh_setup_incoming_varargs (CUMULATIVE_ARGS *ca,
6988 enum machine_mode mode,
6989 tree type,
6990 int *pretend_arg_size,
6991 int second_time ATTRIBUTE_UNUSED)
6993 if (! current_function_stdarg)
6994 abort ();
6995 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl))
6997 int named_parm_regs, anon_parm_regs;
6999 named_parm_regs = (ROUND_REG (*ca, mode)
7000 + (mode == BLKmode
7001 ? ROUND_ADVANCE (int_size_in_bytes (type))
7002 : ROUND_ADVANCE (GET_MODE_SIZE (mode))));
7003 anon_parm_regs = NPARM_REGS (SImode) - named_parm_regs;
7004 if (anon_parm_regs > 0)
7005 *pretend_arg_size = anon_parm_regs * 4;
7009 static bool
7010 sh_strict_argument_naming (CUMULATIVE_ARGS *ca ATTRIBUTE_UNUSED)
7012 return TARGET_SH5;
7015 static bool
7016 sh_pretend_outgoing_varargs_named (CUMULATIVE_ARGS *ca)
7018 return ! (TARGET_HITACHI || ca->renesas_abi) && ! TARGET_SH5;
7022 /* Define the offset between two registers, one to be eliminated, and
7023 the other its replacement, at the start of a routine. */
7026 initial_elimination_offset (int from, int to)
7028 int regs_saved;
7029 int regs_saved_rounding = 0;
7030 int total_saved_regs_space;
7031 int total_auto_space;
7032 int save_flags = target_flags;
7033 int copy_flags;
7034 HARD_REG_SET live_regs_mask;
7036 shmedia_space_reserved_for_target_registers = false;
7037 regs_saved = calc_live_regs (&live_regs_mask);
7038 regs_saved += SHMEDIA_REGS_STACK_ADJUST ();
7040 if (shmedia_reserve_space_for_target_registers_p (regs_saved, &live_regs_mask))
7042 shmedia_space_reserved_for_target_registers = true;
7043 regs_saved += shmedia_target_regs_stack_adjust (&live_regs_mask);
7046 if (TARGET_SH5 && regs_saved % (STACK_BOUNDARY / BITS_PER_UNIT))
7047 regs_saved_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
7048 - regs_saved % (STACK_BOUNDARY / BITS_PER_UNIT));
7050 total_auto_space = rounded_frame_size (regs_saved) - regs_saved_rounding;
7051 copy_flags = target_flags;
7052 target_flags = save_flags;
7054 total_saved_regs_space = regs_saved + regs_saved_rounding;
7056 if (from == ARG_POINTER_REGNUM && to == FRAME_POINTER_REGNUM)
7057 return total_saved_regs_space + total_auto_space
7058 + current_function_args_info.byref_regs * 8;
7060 if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
7061 return total_saved_regs_space + total_auto_space
7062 + current_function_args_info.byref_regs * 8;
7064 /* Initial gap between fp and sp is 0. */
7065 if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
7066 return 0;
7068 if (from == RETURN_ADDRESS_POINTER_REGNUM
7069 && (to == FRAME_POINTER_REGNUM || to == STACK_POINTER_REGNUM))
7071 if (TARGET_SH5)
7073 int n = total_saved_regs_space;
7074 int pr_reg = TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG;
7075 save_schedule schedule;
7076 save_entry *entry;
7078 n += total_auto_space;
7080 /* If it wasn't saved, there's not much we can do. */
7081 if (! TEST_HARD_REG_BIT (live_regs_mask, pr_reg))
7082 return n;
7084 target_flags = copy_flags;
7086 sh5_schedule_saves (&live_regs_mask, &schedule, n);
7087 for (entry = &schedule.entries[1]; entry->mode != VOIDmode; entry++)
7088 if (entry->reg == pr_reg)
7090 target_flags = save_flags;
7091 return entry->offset;
7093 abort ();
7095 else
7096 return total_auto_space;
7099 abort ();
7102 /* Handle machine specific pragmas to be semi-compatible with Renesas
7103 compiler. */
7105 void
7106 sh_pr_interrupt (struct cpp_reader *pfile ATTRIBUTE_UNUSED)
7108 pragma_interrupt = 1;
7111 void
7112 sh_pr_trapa (struct cpp_reader *pfile ATTRIBUTE_UNUSED)
7114 pragma_interrupt = pragma_trapa = 1;
7117 void
7118 sh_pr_nosave_low_regs (struct cpp_reader *pfile ATTRIBUTE_UNUSED)
7120 pragma_nosave_low_regs = 1;
7123 /* Generate 'handle_interrupt' attribute for decls */
7125 static void
7126 sh_insert_attributes (tree node, tree *attributes)
7128 if (! pragma_interrupt
7129 || TREE_CODE (node) != FUNCTION_DECL)
7130 return;
7132 /* We are only interested in fields. */
7133 if (!DECL_P (node))
7134 return;
7136 /* Add a 'handle_interrupt' attribute. */
7137 * attributes = tree_cons (get_identifier ("interrupt_handler"), NULL, * attributes);
7139 return;
7142 /* Supported attributes:
7144 interrupt_handler -- specifies this function is an interrupt handler.
7146 sp_switch -- specifies an alternate stack for an interrupt handler
7147 to run on.
7149 trap_exit -- use a trapa to exit an interrupt function instead of
7150 an rte instruction.
7152 renesas -- use Renesas calling/layout conventions (functions and
7153 structures).
7157 const struct attribute_spec sh_attribute_table[] =
7159 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
7160 { "interrupt_handler", 0, 0, true, false, false, sh_handle_interrupt_handler_attribute },
7161 { "sp_switch", 1, 1, true, false, false, sh_handle_sp_switch_attribute },
7162 { "trap_exit", 1, 1, true, false, false, sh_handle_trap_exit_attribute },
7163 { "renesas", 0, 0, false, true, false, sh_handle_renesas_attribute },
7164 #ifdef SYMBIAN
7165 /* Symbian support adds three new attributes:
7166 dllexport - for exporting a function/variable that will live in a dll
7167 dllimport - for importing a function/variable from a dll
7169 Microsoft allows multiple declspecs in one __declspec, separating
7170 them with spaces. We do NOT support this. Instead, use __declspec
7171 multiple times. */
7172 { "dllimport", 0, 0, true, false, false, sh_symbian_handle_dll_attribute },
7173 { "dllexport", 0, 0, true, false, false, sh_symbian_handle_dll_attribute },
7174 #endif
7175 { NULL, 0, 0, false, false, false, NULL }
7178 /* Handle an "interrupt_handler" attribute; arguments as in
7179 struct attribute_spec.handler. */
7180 static tree
7181 sh_handle_interrupt_handler_attribute (tree *node, tree name,
7182 tree args ATTRIBUTE_UNUSED,
7183 int flags ATTRIBUTE_UNUSED,
7184 bool *no_add_attrs)
7186 if (TREE_CODE (*node) != FUNCTION_DECL)
7188 warning ("%qs attribute only applies to functions",
7189 IDENTIFIER_POINTER (name));
7190 *no_add_attrs = true;
7192 else if (TARGET_SHCOMPACT)
7194 error ("attribute interrupt_handler is not compatible with -m5-compact");
7195 *no_add_attrs = true;
7198 return NULL_TREE;
7201 /* Handle an "sp_switch" attribute; arguments as in
7202 struct attribute_spec.handler. */
7203 static tree
7204 sh_handle_sp_switch_attribute (tree *node, tree name, tree args,
7205 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
7207 if (TREE_CODE (*node) != FUNCTION_DECL)
7209 warning ("%qs attribute only applies to functions",
7210 IDENTIFIER_POINTER (name));
7211 *no_add_attrs = true;
7213 else if (!pragma_interrupt)
7215 /* The sp_switch attribute only has meaning for interrupt functions. */
7216 warning ("%qs attribute only applies to interrupt functions",
7217 IDENTIFIER_POINTER (name));
7218 *no_add_attrs = true;
7220 else if (TREE_CODE (TREE_VALUE (args)) != STRING_CST)
7222 /* The argument must be a constant string. */
7223 warning ("%qs attribute argument not a string constant",
7224 IDENTIFIER_POINTER (name));
7225 *no_add_attrs = true;
7227 else
7229 const char *s = ggc_strdup (TREE_STRING_POINTER (TREE_VALUE (args)));
7230 sp_switch = gen_rtx_SYMBOL_REF (VOIDmode, s);
7233 return NULL_TREE;
7236 /* Handle an "trap_exit" attribute; arguments as in
7237 struct attribute_spec.handler. */
7238 static tree
7239 sh_handle_trap_exit_attribute (tree *node, tree name, tree args,
7240 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
7242 if (TREE_CODE (*node) != FUNCTION_DECL)
7244 warning ("%qs attribute only applies to functions",
7245 IDENTIFIER_POINTER (name));
7246 *no_add_attrs = true;
7248 else if (!pragma_interrupt)
7250 /* The trap_exit attribute only has meaning for interrupt functions. */
7251 warning ("%qs attribute only applies to interrupt functions",
7252 IDENTIFIER_POINTER (name));
7253 *no_add_attrs = true;
7255 else if (TREE_CODE (TREE_VALUE (args)) != INTEGER_CST)
7257 /* The argument must be a constant integer. */
7258 warning ("%qs attribute argument not an integer constant",
7259 IDENTIFIER_POINTER (name));
7260 *no_add_attrs = true;
7262 else
7264 trap_exit = TREE_INT_CST_LOW (TREE_VALUE (args));
7267 return NULL_TREE;
7270 static tree
7271 sh_handle_renesas_attribute (tree *node ATTRIBUTE_UNUSED,
7272 tree name ATTRIBUTE_UNUSED,
7273 tree args ATTRIBUTE_UNUSED,
7274 int flags ATTRIBUTE_UNUSED,
7275 bool *no_add_attrs ATTRIBUTE_UNUSED)
7277 return NULL_TREE;
7280 /* True if __attribute__((renesas)) or -mrenesas. */
7282 sh_attr_renesas_p (tree td)
7284 if (TARGET_HITACHI)
7285 return 1;
7286 if (td == 0)
7287 return 0;
7288 if (DECL_P (td))
7289 td = TREE_TYPE (td);
7290 return (lookup_attribute ("renesas", TYPE_ATTRIBUTES (td))
7291 != NULL_TREE);
7294 /* True if __attribute__((renesas)) or -mrenesas, for the current
7295 function. */
7297 sh_cfun_attr_renesas_p (void)
7299 return sh_attr_renesas_p (current_function_decl);
7303 sh_cfun_interrupt_handler_p (void)
7305 return (lookup_attribute ("interrupt_handler",
7306 DECL_ATTRIBUTES (current_function_decl))
7307 != NULL_TREE);
7310 /* ??? target_switches in toplev.c is static, hence we have to duplicate it. */
7311 static const struct
7313 const char *const name;
7314 const int value;
7315 const char *const description;
7317 sh_target_switches[] = TARGET_SWITCHES;
7318 #define target_switches sh_target_switches
7320 /* Like default_pch_valid_p, but take flag_mask into account. */
7321 const char *
7322 sh_pch_valid_p (const void *data_p, size_t len)
7324 const char *data = (const char *)data_p;
7325 const char *flag_that_differs = NULL;
7326 size_t i;
7327 int old_flags;
7328 int flag_mask
7329 = (SH1_BIT | SH2_BIT | SH3_BIT | SH_E_BIT | HARD_SH4_BIT | FPU_SINGLE_BIT
7330 | SH4_BIT | HITACHI_BIT | LITTLE_ENDIAN_BIT);
7332 /* -fpic and -fpie also usually make a PCH invalid. */
7333 if (data[0] != flag_pic)
7334 return _("created and used with different settings of -fpic");
7335 if (data[1] != flag_pie)
7336 return _("created and used with different settings of -fpie");
7337 data += 2;
7339 /* Check target_flags. */
7340 memcpy (&old_flags, data, sizeof (target_flags));
7341 if (((old_flags ^ target_flags) & flag_mask) != 0)
7343 for (i = 0; i < ARRAY_SIZE (target_switches); i++)
7345 int bits;
7347 bits = target_switches[i].value;
7348 if (bits < 0)
7349 bits = -bits;
7350 bits &= flag_mask;
7351 if ((target_flags & bits) != (old_flags & bits))
7353 flag_that_differs = target_switches[i].name;
7354 goto make_message;
7357 abort ();
7359 data += sizeof (target_flags);
7360 len -= sizeof (target_flags);
7362 /* Check string options. */
7363 #ifdef TARGET_OPTIONS
7364 for (i = 0; i < ARRAY_SIZE (target_options); i++)
7366 const char *str = *target_options[i].variable;
7367 size_t l;
7368 if (! str)
7369 str = "";
7370 l = strlen (str) + 1;
7371 if (len < l || memcmp (data, str, l) != 0)
7373 flag_that_differs = target_options[i].prefix;
7374 goto make_message;
7376 data += l;
7377 len -= l;
7379 #endif
7381 return NULL;
7383 make_message:
7385 char *r;
7386 asprintf (&r, _("created and used with differing settings of '-m%s'"),
7387 flag_that_differs);
7388 if (r == NULL)
7389 return _("out of memory");
7390 return r;
7394 /* Predicates used by the templates. */
7396 /* Returns 1 if OP is MACL, MACH or PR. The input must be a REG rtx.
7397 Used only in general_movsrc_operand. */
7400 system_reg_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
7402 switch (REGNO (op))
7404 case PR_REG:
7405 case MACL_REG:
7406 case MACH_REG:
7407 return 1;
7409 return 0;
7412 /* Returns 1 if OP can be source of a simple move operation.
7413 Same as general_operand, but a LABEL_REF is valid, PRE_DEC is
7414 invalid as are subregs of system registers. */
7417 general_movsrc_operand (rtx op, enum machine_mode mode)
7419 if (GET_CODE (op) == MEM)
7421 rtx inside = XEXP (op, 0);
7422 if (GET_CODE (inside) == CONST)
7423 inside = XEXP (inside, 0);
7425 if (GET_CODE (inside) == LABEL_REF)
7426 return 1;
7428 if (GET_CODE (inside) == PLUS
7429 && GET_CODE (XEXP (inside, 0)) == LABEL_REF
7430 && GET_CODE (XEXP (inside, 1)) == CONST_INT)
7431 return 1;
7433 /* Only post inc allowed. */
7434 if (GET_CODE (inside) == PRE_DEC)
7435 return 0;
7438 if ((mode == QImode || mode == HImode)
7439 && (GET_CODE (op) == SUBREG
7440 && GET_CODE (XEXP (op, 0)) == REG
7441 && system_reg_operand (XEXP (op, 0), mode)))
7442 return 0;
7444 return general_operand (op, mode);
7447 /* Returns 1 if OP can be a destination of a move.
7448 Same as general_operand, but no preinc allowed. */
7451 general_movdst_operand (rtx op, enum machine_mode mode)
7453 /* Only pre dec allowed. */
7454 if (GET_CODE (op) == MEM && GET_CODE (XEXP (op, 0)) == POST_INC)
7455 return 0;
7457 return general_operand (op, mode);
7460 /* Returns 1 if OP is a normal arithmetic register. */
7463 arith_reg_operand (rtx op, enum machine_mode mode)
7465 if (register_operand (op, mode))
7467 int regno;
7469 if (GET_CODE (op) == REG)
7470 regno = REGNO (op);
7471 else if (GET_CODE (op) == SUBREG && GET_CODE (SUBREG_REG (op)) == REG)
7472 regno = REGNO (SUBREG_REG (op));
7473 else
7474 return 1;
7476 return (regno != T_REG && regno != PR_REG
7477 && ! TARGET_REGISTER_P (regno)
7478 && (regno != FPUL_REG || TARGET_SH4)
7479 && regno != MACH_REG && regno != MACL_REG);
7481 return 0;
7484 /* Like above, but for DImode destinations: forbid paradoxical DImode subregs,
7485 because this would lead to missing sign extensions when truncating from
7486 DImode to SImode. */
7488 arith_reg_dest (rtx op, enum machine_mode mode)
7490 if (mode == DImode && GET_CODE (op) == SUBREG
7491 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (op))) < 8)
7492 return 0;
7493 return arith_reg_operand (op, mode);
7497 int_gpr_dest (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
7499 enum machine_mode op_mode = GET_MODE (op);
7501 if (GET_MODE_CLASS (op_mode) != MODE_INT
7502 || GET_MODE_SIZE (op_mode) >= UNITS_PER_WORD)
7503 return 0;
7504 if (! reload_completed)
7505 return 0;
7506 return true_regnum (op) <= LAST_GENERAL_REG;
7510 fp_arith_reg_operand (rtx op, enum machine_mode mode)
7512 if (register_operand (op, mode))
7514 int regno;
7516 if (GET_CODE (op) == REG)
7517 regno = REGNO (op);
7518 else if (GET_CODE (op) == SUBREG && GET_CODE (SUBREG_REG (op)) == REG)
7519 regno = REGNO (SUBREG_REG (op));
7520 else
7521 return 1;
7523 return (regno >= FIRST_PSEUDO_REGISTER
7524 || FP_REGISTER_P (regno));
7526 return 0;
7529 /* Returns 1 if OP is a valid source operand for an arithmetic insn. */
7532 arith_operand (rtx op, enum machine_mode mode)
7534 if (arith_reg_operand (op, mode))
7535 return 1;
7537 if (TARGET_SHMEDIA)
7539 /* FIXME: We should be checking whether the CONST_INT fits in a
7540 CONST_OK_FOR_I16 here, but this causes reload_cse to crash when
7541 attempting to transform a sequence of two 64-bit sets of the
7542 same register from literal constants into a set and an add,
7543 when the difference is too wide for an add. */
7544 if (GET_CODE (op) == CONST_INT
7545 || EXTRA_CONSTRAINT_C16 (op))
7546 return 1;
7547 else
7548 return 0;
7550 else if (GET_CODE (op) == CONST_INT && CONST_OK_FOR_I08 (INTVAL (op)))
7551 return 1;
7553 return 0;
7556 /* Returns 1 if OP is a valid source operand for a compare insn. */
7559 arith_reg_or_0_operand (rtx op, enum machine_mode mode)
7561 if (arith_reg_operand (op, mode))
7562 return 1;
7564 if (EXTRA_CONSTRAINT_Z (op))
7565 return 1;
7567 return 0;
7570 /* Return 1 if OP is a valid source operand for an SHmedia operation
7571 that takes either a register or a 6-bit immediate. */
7574 shmedia_6bit_operand (rtx op, enum machine_mode mode)
7576 return (arith_reg_operand (op, mode)
7577 || (GET_CODE (op) == CONST_INT && CONST_OK_FOR_I06 (INTVAL (op))));
7580 /* Returns 1 if OP is a valid source operand for a logical operation. */
7583 logical_operand (rtx op, enum machine_mode mode)
7585 if (arith_reg_operand (op, mode))
7586 return 1;
7588 if (TARGET_SHMEDIA)
7590 if (GET_CODE (op) == CONST_INT && CONST_OK_FOR_I10 (INTVAL (op)))
7591 return 1;
7592 else
7593 return 0;
7595 else if (GET_CODE (op) == CONST_INT && CONST_OK_FOR_K08 (INTVAL (op)))
7596 return 1;
7598 return 0;
7602 and_operand (rtx op, enum machine_mode mode)
7604 if (logical_operand (op, mode))
7605 return 1;
7607 /* Check mshflo.l / mshflhi.l opportunities. */
7608 if (TARGET_SHMEDIA
7609 && mode == DImode
7610 && GET_CODE (op) == CONST_INT
7611 && CONST_OK_FOR_J16 (INTVAL (op)))
7612 return 1;
7614 return 0;
7617 /* Nonzero if OP is a floating point value with value 0.0. */
7620 fp_zero_operand (rtx op)
7622 REAL_VALUE_TYPE r;
7624 if (GET_MODE (op) != SFmode)
7625 return 0;
7627 REAL_VALUE_FROM_CONST_DOUBLE (r, op);
7628 return REAL_VALUES_EQUAL (r, dconst0) && ! REAL_VALUE_MINUS_ZERO (r);
7631 /* Nonzero if OP is a floating point value with value 1.0. */
7634 fp_one_operand (rtx op)
7636 REAL_VALUE_TYPE r;
7638 if (GET_MODE (op) != SFmode)
7639 return 0;
7641 REAL_VALUE_FROM_CONST_DOUBLE (r, op);
7642 return REAL_VALUES_EQUAL (r, dconst1);
7645 /* For -m4 and -m4-single-only, mode switching is used. If we are
7646 compiling without -mfmovd, movsf_ie isn't taken into account for
7647 mode switching. We could check in machine_dependent_reorg for
7648 cases where we know we are in single precision mode, but there is
7649 interface to find that out during reload, so we must avoid
7650 choosing an fldi alternative during reload and thus failing to
7651 allocate a scratch register for the constant loading. */
7653 fldi_ok (void)
7655 return ! TARGET_SH4 || TARGET_FMOVD || reload_completed;
7659 tertiary_reload_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
7661 enum rtx_code code = GET_CODE (op);
7662 return code == MEM || (TARGET_SH4 && code == CONST_DOUBLE);
7666 fpscr_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
7668 return (GET_CODE (op) == REG
7669 && (REGNO (op) == FPSCR_REG
7670 || (REGNO (op) >= FIRST_PSEUDO_REGISTER
7671 && !(reload_in_progress || reload_completed)))
7672 && GET_MODE (op) == PSImode);
7676 fpul_operand (rtx op, enum machine_mode mode)
7678 if (TARGET_SHMEDIA)
7679 return fp_arith_reg_operand (op, mode);
7681 return (GET_CODE (op) == REG
7682 && (REGNO (op) == FPUL_REG || REGNO (op) >= FIRST_PSEUDO_REGISTER)
7683 && GET_MODE (op) == mode);
7687 symbol_ref_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
7689 return (GET_CODE (op) == SYMBOL_REF);
7692 /* Return the TLS type for TLS symbols, 0 for otherwise. */
7694 tls_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
7696 if (GET_CODE (op) != SYMBOL_REF)
7697 return 0;
7698 return SYMBOL_REF_TLS_MODEL (op);
7702 commutative_float_operator (rtx op, enum machine_mode mode)
7704 if (GET_MODE (op) != mode)
7705 return 0;
7706 switch (GET_CODE (op))
7708 case PLUS:
7709 case MULT:
7710 return 1;
7711 default:
7712 break;
7714 return 0;
7718 noncommutative_float_operator (rtx op, enum machine_mode mode)
7720 if (GET_MODE (op) != mode)
7721 return 0;
7722 switch (GET_CODE (op))
7724 case MINUS:
7725 case DIV:
7726 return 1;
7727 default:
7728 break;
7730 return 0;
7734 unary_float_operator (rtx op, enum machine_mode mode)
7736 if (GET_MODE (op) != mode)
7737 return 0;
7738 switch (GET_CODE (op))
7740 case ABS:
7741 case NEG:
7742 case SQRT:
7743 return 1;
7744 default:
7745 break;
7747 return 0;
7751 binary_float_operator (rtx op, enum machine_mode mode)
7753 if (GET_MODE (op) != mode)
7754 return 0;
7755 switch (GET_CODE (op))
7757 case PLUS:
7758 case MINUS:
7759 case MULT:
7760 case DIV:
7761 return 1;
7762 default:
7763 break;
7765 return 0;
7769 binary_logical_operator (rtx op, enum machine_mode mode)
7771 if (GET_MODE (op) != mode)
7772 return 0;
7773 switch (GET_CODE (op))
7775 case IOR:
7776 case AND:
7777 case XOR:
7778 return 1;
7779 default:
7780 break;
7782 return 0;
7786 equality_comparison_operator (rtx op, enum machine_mode mode)
7788 return ((mode == VOIDmode || GET_MODE (op) == mode)
7789 && (GET_CODE (op) == EQ || GET_CODE (op) == NE));
7793 greater_comparison_operator (rtx op, enum machine_mode mode)
7795 if (mode != VOIDmode && GET_MODE (op) == mode)
7796 return 0;
7797 switch (GET_CODE (op))
7799 case GT:
7800 case GE:
7801 case GTU:
7802 case GEU:
7803 return 1;
7804 default:
7805 return 0;
7810 less_comparison_operator (rtx op, enum machine_mode mode)
7812 if (mode != VOIDmode && GET_MODE (op) == mode)
7813 return 0;
7814 switch (GET_CODE (op))
7816 case LT:
7817 case LE:
7818 case LTU:
7819 case LEU:
7820 return 1;
7821 default:
7822 return 0;
7826 /* Accept pseudos and branch target registers. */
7828 target_reg_operand (rtx op, enum machine_mode mode)
7830 if (mode != DImode
7831 || GET_MODE (op) != DImode)
7832 return 0;
7834 if (GET_CODE (op) == SUBREG)
7835 op = XEXP (op, 0);
7837 if (GET_CODE (op) != REG)
7838 return 0;
7840 /* We must protect ourselves from matching pseudos that are virtual
7841 register, because they will eventually be replaced with hardware
7842 registers that aren't branch-target registers. */
7843 if (REGNO (op) > LAST_VIRTUAL_REGISTER
7844 || TARGET_REGISTER_P (REGNO (op)))
7845 return 1;
7847 return 0;
7850 /* Same as target_reg_operand, except that label_refs and symbol_refs
7851 are accepted before reload. */
7853 target_operand (rtx op, enum machine_mode mode)
7855 if (mode != DImode)
7856 return 0;
7858 if ((GET_MODE (op) == DImode || GET_MODE (op) == VOIDmode)
7859 && EXTRA_CONSTRAINT_Csy (op))
7860 return ! reload_completed;
7862 return target_reg_operand (op, mode);
7866 mextr_bit_offset (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
7868 HOST_WIDE_INT i;
7870 if (GET_CODE (op) != CONST_INT)
7871 return 0;
7872 i = INTVAL (op);
7873 return i >= 1 * 8 && i <= 7 * 8 && (i & 7) == 0;
7877 extend_reg_operand (rtx op, enum machine_mode mode)
7879 return (GET_CODE (op) == TRUNCATE
7880 ? arith_operand
7881 : arith_reg_operand) (op, mode);
7885 trunc_hi_operand (rtx op, enum machine_mode mode)
7887 enum machine_mode op_mode = GET_MODE (op);
7889 if (op_mode != SImode && op_mode != DImode
7890 && op_mode != V4HImode && op_mode != V2SImode)
7891 return 0;
7892 return extend_reg_operand (op, mode);
7896 extend_reg_or_0_operand (rtx op, enum machine_mode mode)
7898 return (GET_CODE (op) == TRUNCATE
7899 ? arith_operand
7900 : arith_reg_or_0_operand) (op, mode);
7904 general_extend_operand (rtx op, enum machine_mode mode)
7906 return (GET_CODE (op) == TRUNCATE
7907 ? arith_operand
7908 : nonimmediate_operand) (op, mode);
7912 inqhi_operand (rtx op, enum machine_mode mode)
7914 if (GET_CODE (op) != TRUNCATE || mode != GET_MODE (op))
7915 return 0;
7916 op = XEXP (op, 0);
7917 /* Can't use true_regnum here because copy_cost wants to know about
7918 SECONDARY_INPUT_RELOAD_CLASS. */
7919 return GET_CODE (op) == REG && FP_REGISTER_P (REGNO (op));
7923 sh_rep_vec (rtx v, enum machine_mode mode)
7925 int i;
7926 rtx x, y;
7928 if ((GET_CODE (v) != CONST_VECTOR && GET_CODE (v) != PARALLEL)
7929 || (GET_MODE (v) != mode && mode != VOIDmode))
7930 return 0;
7931 i = XVECLEN (v, 0) - 2;
7932 x = XVECEXP (v, 0, i + 1);
7933 if (GET_MODE_UNIT_SIZE (mode) == 1)
7935 y = XVECEXP (v, 0, i);
7936 for (i -= 2; i >= 0; i -= 2)
7937 if (! rtx_equal_p (XVECEXP (v, 0, i + 1), x)
7938 || ! rtx_equal_p (XVECEXP (v, 0, i), y))
7939 return 0;
7941 else
7942 for (; i >= 0; i--)
7943 if (XVECEXP (v, 0, i) != x)
7944 return 0;
7945 return 1;
7948 /* Determine if V is a constant vector matching MODE with only one element
7949 that is not a sign extension. Two byte-sized elements count as one. */
7951 sh_1el_vec (rtx v, enum machine_mode mode)
7953 int unit_size;
7954 int i, last, least, sign_ix;
7955 rtx sign;
7957 if (GET_CODE (v) != CONST_VECTOR
7958 || (GET_MODE (v) != mode && mode != VOIDmode))
7959 return 0;
7960 /* Determine numbers of last and of least significant elements. */
7961 last = XVECLEN (v, 0) - 1;
7962 least = TARGET_LITTLE_ENDIAN ? 0 : last;
7963 if (GET_CODE (XVECEXP (v, 0, least)) != CONST_INT)
7964 return 0;
7965 sign_ix = least;
7966 if (GET_MODE_UNIT_SIZE (mode) == 1)
7967 sign_ix = TARGET_LITTLE_ENDIAN ? 1 : last - 1;
7968 if (GET_CODE (XVECEXP (v, 0, sign_ix)) != CONST_INT)
7969 return 0;
7970 unit_size = GET_MODE_UNIT_SIZE (GET_MODE (v));
7971 sign = (INTVAL (XVECEXP (v, 0, sign_ix)) >> (unit_size * BITS_PER_UNIT - 1)
7972 ? constm1_rtx : const0_rtx);
7973 i = XVECLEN (v, 0) - 1;
7975 if (i != least && i != sign_ix && XVECEXP (v, 0, i) != sign)
7976 return 0;
7977 while (--i);
7978 return 1;
7982 sh_const_vec (rtx v, enum machine_mode mode)
7984 int i;
7986 if (GET_CODE (v) != CONST_VECTOR
7987 || (GET_MODE (v) != mode && mode != VOIDmode))
7988 return 0;
7989 i = XVECLEN (v, 0) - 1;
7990 for (; i >= 0; i--)
7991 if (GET_CODE (XVECEXP (v, 0, i)) != CONST_INT)
7992 return 0;
7993 return 1;
7996 /* Return the destination address of a branch. */
7998 static int
7999 branch_dest (rtx branch)
8001 rtx dest = SET_SRC (PATTERN (branch));
8002 int dest_uid;
8004 if (GET_CODE (dest) == IF_THEN_ELSE)
8005 dest = XEXP (dest, 1);
8006 dest = XEXP (dest, 0);
8007 dest_uid = INSN_UID (dest);
8008 return INSN_ADDRESSES (dest_uid);
8011 /* Return nonzero if REG is not used after INSN.
8012 We assume REG is a reload reg, and therefore does
8013 not live past labels. It may live past calls or jumps though. */
8015 reg_unused_after (rtx reg, rtx insn)
8017 enum rtx_code code;
8018 rtx set;
8020 /* If the reg is set by this instruction, then it is safe for our
8021 case. Disregard the case where this is a store to memory, since
8022 we are checking a register used in the store address. */
8023 set = single_set (insn);
8024 if (set && GET_CODE (SET_DEST (set)) != MEM
8025 && reg_overlap_mentioned_p (reg, SET_DEST (set)))
8026 return 1;
8028 while ((insn = NEXT_INSN (insn)))
8030 rtx set;
8031 if (!INSN_P (insn))
8032 continue;
8034 code = GET_CODE (insn);
8036 #if 0
8037 /* If this is a label that existed before reload, then the register
8038 if dead here. However, if this is a label added by reorg, then
8039 the register may still be live here. We can't tell the difference,
8040 so we just ignore labels completely. */
8041 if (code == CODE_LABEL)
8042 return 1;
8043 /* else */
8044 #endif
8046 if (code == JUMP_INSN)
8047 return 0;
8049 /* If this is a sequence, we must handle them all at once.
8050 We could have for instance a call that sets the target register,
8051 and an insn in a delay slot that uses the register. In this case,
8052 we must return 0. */
8053 else if (code == INSN && GET_CODE (PATTERN (insn)) == SEQUENCE)
8055 int i;
8056 int retval = 0;
8058 for (i = 0; i < XVECLEN (PATTERN (insn), 0); i++)
8060 rtx this_insn = XVECEXP (PATTERN (insn), 0, i);
8061 rtx set = single_set (this_insn);
8063 if (GET_CODE (this_insn) == CALL_INSN)
8064 code = CALL_INSN;
8065 else if (GET_CODE (this_insn) == JUMP_INSN)
8067 if (INSN_ANNULLED_BRANCH_P (this_insn))
8068 return 0;
8069 code = JUMP_INSN;
8072 if (set && reg_overlap_mentioned_p (reg, SET_SRC (set)))
8073 return 0;
8074 if (set && reg_overlap_mentioned_p (reg, SET_DEST (set)))
8076 if (GET_CODE (SET_DEST (set)) != MEM)
8077 retval = 1;
8078 else
8079 return 0;
8081 if (set == 0
8082 && reg_overlap_mentioned_p (reg, PATTERN (this_insn)))
8083 return 0;
8085 if (retval == 1)
8086 return 1;
8087 else if (code == JUMP_INSN)
8088 return 0;
8091 set = single_set (insn);
8092 if (set && reg_overlap_mentioned_p (reg, SET_SRC (set)))
8093 return 0;
8094 if (set && reg_overlap_mentioned_p (reg, SET_DEST (set)))
8095 return GET_CODE (SET_DEST (set)) != MEM;
8096 if (set == 0 && reg_overlap_mentioned_p (reg, PATTERN (insn)))
8097 return 0;
8099 if (code == CALL_INSN && call_really_used_regs[REGNO (reg)])
8100 return 1;
8102 return 1;
8105 #include "ggc.h"
8107 static GTY(()) rtx fpscr_rtx;
8109 get_fpscr_rtx (void)
8111 if (! fpscr_rtx)
8113 fpscr_rtx = gen_rtx_REG (PSImode, FPSCR_REG);
8114 REG_USERVAR_P (fpscr_rtx) = 1;
8115 mark_user_reg (fpscr_rtx);
8117 if (! reload_completed || mdep_reorg_phase != SH_AFTER_MDEP_REORG)
8118 mark_user_reg (fpscr_rtx);
8119 return fpscr_rtx;
8122 void
8123 emit_sf_insn (rtx pat)
8125 emit_insn (pat);
8128 void
8129 emit_df_insn (rtx pat)
8131 emit_insn (pat);
8134 void
8135 expand_sf_unop (rtx (*fun) (rtx, rtx, rtx), rtx *operands)
8137 emit_sf_insn ((*fun) (operands[0], operands[1], get_fpscr_rtx ()));
8140 void
8141 expand_sf_binop (rtx (*fun) (rtx, rtx, rtx, rtx), rtx *operands)
8143 emit_sf_insn ((*fun) (operands[0], operands[1], operands[2],
8144 get_fpscr_rtx ()));
8147 void
8148 expand_df_unop (rtx (*fun) (rtx, rtx, rtx), rtx *operands)
8150 emit_df_insn ((*fun) (operands[0], operands[1], get_fpscr_rtx ()));
8153 void
8154 expand_df_binop (rtx (*fun) (rtx, rtx, rtx, rtx), rtx *operands)
8156 emit_df_insn ((*fun) (operands[0], operands[1], operands[2],
8157 get_fpscr_rtx ()));
8160 /* ??? gcc does flow analysis strictly after common subexpression
8161 elimination. As a result, common subexpression elimination fails
8162 when there are some intervening statements setting the same register.
8163 If we did nothing about this, this would hurt the precision switching
8164 for SH4 badly. There is some cse after reload, but it is unable to
8165 undo the extra register pressure from the unused instructions, and
8166 it cannot remove auto-increment loads.
8168 A C code example that shows this flow/cse weakness for (at least) SH
8169 and sparc (as of gcc ss-970706) is this:
8171 double
8172 f(double a)
8174 double d;
8175 d = 0.1;
8176 a += d;
8177 d = 1.1;
8178 d = 0.1;
8179 a *= d;
8180 return a;
8183 So we add another pass before common subexpression elimination, to
8184 remove assignments that are dead due to a following assignment in the
8185 same basic block. */
8187 static void
8188 mark_use (rtx x, rtx *reg_set_block)
8190 enum rtx_code code;
8192 if (! x)
8193 return;
8194 code = GET_CODE (x);
8195 switch (code)
8197 case REG:
8199 int regno = REGNO (x);
8200 int nregs = (regno < FIRST_PSEUDO_REGISTER
8201 ? HARD_REGNO_NREGS (regno, GET_MODE (x))
8202 : 1);
8205 reg_set_block[regno + nregs - 1] = 0;
8207 while (--nregs);
8208 break;
8210 case SET:
8212 rtx dest = SET_DEST (x);
8214 if (GET_CODE (dest) == SUBREG)
8215 dest = SUBREG_REG (dest);
8216 if (GET_CODE (dest) != REG)
8217 mark_use (dest, reg_set_block);
8218 mark_use (SET_SRC (x), reg_set_block);
8219 break;
8221 case CLOBBER:
8222 break;
8223 default:
8225 const char *fmt = GET_RTX_FORMAT (code);
8226 int i, j;
8227 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
8229 if (fmt[i] == 'e')
8230 mark_use (XEXP (x, i), reg_set_block);
8231 else if (fmt[i] == 'E')
8232 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
8233 mark_use (XVECEXP (x, i, j), reg_set_block);
8235 break;
8240 static rtx get_free_reg (HARD_REG_SET);
8242 /* This function returns a register to use to load the address to load
8243 the fpscr from. Currently it always returns r1 or r7, but when we are
8244 able to use pseudo registers after combine, or have a better mechanism
8245 for choosing a register, it should be done here. */
8246 /* REGS_LIVE is the liveness information for the point for which we
8247 need this allocation. In some bare-bones exit blocks, r1 is live at the
8248 start. We can even have all of r0..r3 being live:
8249 __complex__ long long f (double d) { if (d == 0) return 2; else return 3; }
8250 INSN before which new insns are placed with will clobber the register
8251 we return. If a basic block consists only of setting the return value
8252 register to a pseudo and using that register, the return value is not
8253 live before or after this block, yet we we'll insert our insns right in
8254 the middle. */
8256 static rtx
8257 get_free_reg (HARD_REG_SET regs_live)
8259 if (! TEST_HARD_REG_BIT (regs_live, 1))
8260 return gen_rtx_REG (Pmode, 1);
8262 /* Hard reg 1 is live; since this is a SMALL_REGISTER_CLASSES target,
8263 there shouldn't be anything but a jump before the function end. */
8264 if (! TEST_HARD_REG_BIT (regs_live, 7))
8265 return gen_rtx_REG (Pmode, 7);
8267 abort ();
8270 /* This function will set the fpscr from memory.
8271 MODE is the mode we are setting it to. */
8272 void
8273 fpscr_set_from_mem (int mode, HARD_REG_SET regs_live)
8275 enum attr_fp_mode fp_mode = mode;
8276 rtx addr_reg = get_free_reg (regs_live);
8278 if (fp_mode == (enum attr_fp_mode) ACTUAL_NORMAL_MODE (FP_MODE))
8279 emit_insn (gen_fpu_switch1 (addr_reg));
8280 else
8281 emit_insn (gen_fpu_switch0 (addr_reg));
8284 /* Is the given character a logical line separator for the assembler? */
8285 #ifndef IS_ASM_LOGICAL_LINE_SEPARATOR
8286 #define IS_ASM_LOGICAL_LINE_SEPARATOR(C) ((C) == ';')
8287 #endif
8290 sh_insn_length_adjustment (rtx insn)
8292 /* Instructions with unfilled delay slots take up an extra two bytes for
8293 the nop in the delay slot. */
8294 if (((GET_CODE (insn) == INSN
8295 && GET_CODE (PATTERN (insn)) != USE
8296 && GET_CODE (PATTERN (insn)) != CLOBBER)
8297 || GET_CODE (insn) == CALL_INSN
8298 || (GET_CODE (insn) == JUMP_INSN
8299 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC
8300 && GET_CODE (PATTERN (insn)) != ADDR_VEC))
8301 && GET_CODE (PATTERN (NEXT_INSN (PREV_INSN (insn)))) != SEQUENCE
8302 && get_attr_needs_delay_slot (insn) == NEEDS_DELAY_SLOT_YES)
8303 return 2;
8305 /* SH2e has a bug that prevents the use of annulled branches, so if
8306 the delay slot is not filled, we'll have to put a NOP in it. */
8307 if (sh_cpu == CPU_SH2E
8308 && GET_CODE (insn) == JUMP_INSN
8309 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC
8310 && GET_CODE (PATTERN (insn)) != ADDR_VEC
8311 && get_attr_type (insn) == TYPE_CBRANCH
8312 && GET_CODE (PATTERN (NEXT_INSN (PREV_INSN (insn)))) != SEQUENCE)
8313 return 2;
8315 /* sh-dsp parallel processing insn take four bytes instead of two. */
8317 if (GET_CODE (insn) == INSN)
8319 int sum = 0;
8320 rtx body = PATTERN (insn);
8321 const char *template;
8322 char c;
8323 int maybe_label = 1;
8325 if (GET_CODE (body) == ASM_INPUT)
8326 template = XSTR (body, 0);
8327 else if (asm_noperands (body) >= 0)
8328 template
8329 = decode_asm_operands (body, NULL, NULL, NULL, NULL);
8330 else
8331 return 0;
8334 int ppi_adjust = 0;
8337 c = *template++;
8338 while (c == ' ' || c == '\t');
8339 /* all sh-dsp parallel-processing insns start with p.
8340 The only non-ppi sh insn starting with p is pref.
8341 The only ppi starting with pr is prnd. */
8342 if ((c == 'p' || c == 'P') && strncasecmp ("re", template, 2))
8343 ppi_adjust = 2;
8344 /* The repeat pseudo-insn expands two three insns, a total of
8345 six bytes in size. */
8346 else if ((c == 'r' || c == 'R')
8347 && ! strncasecmp ("epeat", template, 5))
8348 ppi_adjust = 4;
8349 while (c && c != '\n' && ! IS_ASM_LOGICAL_LINE_SEPARATOR (c))
8351 /* If this is a label, it is obviously not a ppi insn. */
8352 if (c == ':' && maybe_label)
8354 ppi_adjust = 0;
8355 break;
8357 else if (c == '\'' || c == '"')
8358 maybe_label = 0;
8359 c = *template++;
8361 sum += ppi_adjust;
8362 maybe_label = c != ':';
8364 while (c);
8365 return sum;
8367 return 0;
8370 /* Return TRUE if X references a SYMBOL_REF or LABEL_REF whose symbol
8371 isn't protected by a PIC unspec. */
8373 nonpic_symbol_mentioned_p (rtx x)
8375 register const char *fmt;
8376 register int i;
8378 if (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF
8379 || GET_CODE (x) == PC)
8380 return 1;
8382 /* We don't want to look into the possible MEM location of a
8383 CONST_DOUBLE, since we're not going to use it, in general. */
8384 if (GET_CODE (x) == CONST_DOUBLE)
8385 return 0;
8387 if (GET_CODE (x) == UNSPEC
8388 && (XINT (x, 1) == UNSPEC_PIC
8389 || XINT (x, 1) == UNSPEC_GOT
8390 || XINT (x, 1) == UNSPEC_GOTOFF
8391 || XINT (x, 1) == UNSPEC_GOTPLT
8392 || XINT (x, 1) == UNSPEC_GOTTPOFF
8393 || XINT (x, 1) == UNSPEC_DTPOFF
8394 || XINT (x, 1) == UNSPEC_PLT))
8395 return 0;
8397 fmt = GET_RTX_FORMAT (GET_CODE (x));
8398 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
8400 if (fmt[i] == 'E')
8402 register int j;
8404 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
8405 if (nonpic_symbol_mentioned_p (XVECEXP (x, i, j)))
8406 return 1;
8408 else if (fmt[i] == 'e' && nonpic_symbol_mentioned_p (XEXP (x, i)))
8409 return 1;
8412 return 0;
8415 /* Convert a non-PIC address in `orig' to a PIC address using @GOT or
8416 @GOTOFF in `reg'. */
8418 legitimize_pic_address (rtx orig, enum machine_mode mode ATTRIBUTE_UNUSED,
8419 rtx reg)
8421 if (tls_symbolic_operand (orig, Pmode))
8422 return orig;
8424 if (GET_CODE (orig) == LABEL_REF
8425 || (GET_CODE (orig) == SYMBOL_REF && SYMBOL_REF_LOCAL_P (orig)))
8427 if (reg == 0)
8428 reg = gen_reg_rtx (Pmode);
8430 emit_insn (gen_symGOTOFF2reg (reg, orig));
8431 return reg;
8433 else if (GET_CODE (orig) == SYMBOL_REF)
8435 if (reg == 0)
8436 reg = gen_reg_rtx (Pmode);
8438 emit_insn (gen_symGOT2reg (reg, orig));
8439 return reg;
8441 return orig;
8444 /* Mark the use of a constant in the literal table. If the constant
8445 has multiple labels, make it unique. */
8446 static rtx
8447 mark_constant_pool_use (rtx x)
8449 rtx insn, lab, pattern;
8451 if (x == NULL)
8452 return x;
8454 switch (GET_CODE (x))
8456 case LABEL_REF:
8457 x = XEXP (x, 0);
8458 case CODE_LABEL:
8459 break;
8460 default:
8461 return x;
8464 /* Get the first label in the list of labels for the same constant
8465 and delete another labels in the list. */
8466 lab = x;
8467 for (insn = PREV_INSN (x); insn; insn = PREV_INSN (insn))
8469 if (GET_CODE (insn) != CODE_LABEL
8470 || LABEL_REFS (insn) != NEXT_INSN (insn))
8471 break;
8472 lab = insn;
8475 for (insn = LABEL_REFS (lab); insn; insn = LABEL_REFS (insn))
8476 INSN_DELETED_P (insn) = 1;
8478 /* Mark constants in a window. */
8479 for (insn = NEXT_INSN (x); insn; insn = NEXT_INSN (insn))
8481 if (GET_CODE (insn) != INSN)
8482 continue;
8484 pattern = PATTERN (insn);
8485 if (GET_CODE (pattern) != UNSPEC_VOLATILE)
8486 continue;
8488 switch (XINT (pattern, 1))
8490 case UNSPECV_CONST2:
8491 case UNSPECV_CONST4:
8492 case UNSPECV_CONST8:
8493 XVECEXP (pattern, 0, 1) = const1_rtx;
8494 break;
8495 case UNSPECV_WINDOW_END:
8496 if (XVECEXP (pattern, 0, 0) == x)
8497 return lab;
8498 break;
8499 case UNSPECV_CONST_END:
8500 return lab;
8501 default:
8502 break;
8506 return lab;
8509 /* Return true if it's possible to redirect BRANCH1 to the destination
8510 of an unconditional jump BRANCH2. We only want to do this if the
8511 resulting branch will have a short displacement. */
8513 sh_can_redirect_branch (rtx branch1, rtx branch2)
8515 if (flag_expensive_optimizations && simplejump_p (branch2))
8517 rtx dest = XEXP (SET_SRC (single_set (branch2)), 0);
8518 rtx insn;
8519 int distance;
8521 for (distance = 0, insn = NEXT_INSN (branch1);
8522 insn && distance < 256;
8523 insn = PREV_INSN (insn))
8525 if (insn == dest)
8526 return 1;
8527 else
8528 distance += get_attr_length (insn);
8530 for (distance = 0, insn = NEXT_INSN (branch1);
8531 insn && distance < 256;
8532 insn = NEXT_INSN (insn))
8534 if (insn == dest)
8535 return 1;
8536 else
8537 distance += get_attr_length (insn);
8540 return 0;
8543 /* Return nonzero if register old_reg can be renamed to register new_reg. */
8545 sh_hard_regno_rename_ok (unsigned int old_reg ATTRIBUTE_UNUSED,
8546 unsigned int new_reg)
8548 /* Interrupt functions can only use registers that have already been
8549 saved by the prologue, even if they would normally be
8550 call-clobbered. */
8552 if (sh_cfun_interrupt_handler_p () && !regs_ever_live[new_reg])
8553 return 0;
8555 return 1;
8558 /* Function to update the integer COST
8559 based on the relationship between INSN that is dependent on
8560 DEP_INSN through the dependence LINK. The default is to make no
8561 adjustment to COST. This can be used for example to specify to
8562 the scheduler that an output- or anti-dependence does not incur
8563 the same cost as a data-dependence. The return value should be
8564 the new value for COST. */
8565 static int
8566 sh_adjust_cost (rtx insn, rtx link ATTRIBUTE_UNUSED, rtx dep_insn, int cost)
8568 rtx reg, use_pat;
8570 if (TARGET_SHMEDIA)
8572 /* On SHmedia, if the dependence is an anti-dependence or
8573 output-dependence, there is no cost. */
8574 if (REG_NOTE_KIND (link) != 0)
8575 cost = 0;
8577 if (get_attr_is_mac_media (insn)
8578 && get_attr_is_mac_media (dep_insn))
8579 cost = 1;
8581 else if (REG_NOTE_KIND (link) == 0)
8583 enum attr_type dep_type, type;
8585 if (recog_memoized (insn) < 0
8586 || recog_memoized (dep_insn) < 0)
8587 return cost;
8589 dep_type = get_attr_type (dep_insn);
8590 if (dep_type == TYPE_FLOAD || dep_type == TYPE_PCFLOAD)
8591 cost--;
8592 if ((dep_type == TYPE_LOAD_SI || dep_type == TYPE_PCLOAD_SI)
8593 && (type = get_attr_type (insn)) != TYPE_CALL
8594 && type != TYPE_SFUNC)
8595 cost--;
8597 /* The only input for a call that is timing-critical is the
8598 function's address. */
8599 if (GET_CODE(insn) == CALL_INSN)
8601 rtx call = PATTERN (insn);
8603 if (GET_CODE (call) == PARALLEL)
8604 call = XVECEXP (call, 0 ,0);
8605 if (GET_CODE (call) == SET)
8606 call = SET_SRC (call);
8607 if (GET_CODE (call) == CALL && GET_CODE (XEXP (call, 0)) == MEM
8608 && ! reg_set_p (XEXP (XEXP (call, 0), 0), dep_insn))
8609 cost = 0;
8611 /* Likewise, the most timing critical input for an sfuncs call
8612 is the function address. However, sfuncs typically start
8613 using their arguments pretty quickly.
8614 Assume a four cycle delay before they are needed. */
8615 /* All sfunc calls are parallels with at least four components.
8616 Exploit this to avoid unnecessary calls to sfunc_uses_reg. */
8617 else if (GET_CODE (PATTERN (insn)) == PARALLEL
8618 && XVECLEN (PATTERN (insn), 0) >= 4
8619 && (reg = sfunc_uses_reg (insn)))
8621 if (! reg_set_p (reg, dep_insn))
8622 cost -= 4;
8624 /* When the preceding instruction loads the shift amount of
8625 the following SHAD/SHLD, the latency of the load is increased
8626 by 1 cycle. */
8627 else if (TARGET_SH4
8628 && get_attr_type (insn) == TYPE_DYN_SHIFT
8629 && get_attr_any_int_load (dep_insn) == ANY_INT_LOAD_YES
8630 && reg_overlap_mentioned_p (SET_DEST (PATTERN (dep_insn)),
8631 XEXP (SET_SRC (single_set (insn)),
8632 1)))
8633 cost++;
8634 /* When an LS group instruction with a latency of less than
8635 3 cycles is followed by a double-precision floating-point
8636 instruction, FIPR, or FTRV, the latency of the first
8637 instruction is increased to 3 cycles. */
8638 else if (cost < 3
8639 && get_attr_insn_class (dep_insn) == INSN_CLASS_LS_GROUP
8640 && get_attr_dfp_comp (insn) == DFP_COMP_YES)
8641 cost = 3;
8642 /* The lsw register of a double-precision computation is ready one
8643 cycle earlier. */
8644 else if (reload_completed
8645 && get_attr_dfp_comp (dep_insn) == DFP_COMP_YES
8646 && (use_pat = single_set (insn))
8647 && ! regno_use_in (REGNO (SET_DEST (single_set (dep_insn))),
8648 SET_SRC (use_pat)))
8649 cost -= 1;
8651 if (get_attr_any_fp_comp (dep_insn) == ANY_FP_COMP_YES
8652 && get_attr_late_fp_use (insn) == LATE_FP_USE_YES)
8653 cost -= 1;
8655 /* An anti-dependence penalty of two applies if the first insn is a double
8656 precision fadd / fsub / fmul. */
8657 else if (REG_NOTE_KIND (link) == REG_DEP_ANTI
8658 && recog_memoized (dep_insn) >= 0
8659 && get_attr_type (dep_insn) == TYPE_DFP_ARITH
8660 /* A lot of alleged anti-flow dependences are fake,
8661 so check this one is real. */
8662 && flow_dependent_p (dep_insn, insn))
8663 cost = 2;
8666 return cost;
8669 /* Check if INSN is flow-dependent on DEP_INSN. Can also be used to check
8670 if DEP_INSN is anti-flow dependent on INSN. */
8671 static int
8672 flow_dependent_p (rtx insn, rtx dep_insn)
8674 rtx tmp = PATTERN (insn);
8676 note_stores (PATTERN (dep_insn), flow_dependent_p_1, &tmp);
8677 return tmp == NULL_RTX;
8680 /* A helper function for flow_dependent_p called through note_stores. */
8681 static void
8682 flow_dependent_p_1 (rtx x, rtx pat ATTRIBUTE_UNUSED, void *data)
8684 rtx * pinsn = (rtx *) data;
8686 if (*pinsn && reg_referenced_p (x, *pinsn))
8687 *pinsn = NULL_RTX;
8690 /* For use by ALLOCATE_INITIAL_VALUE. Note that sh.md contains some
8691 'special function' patterns (type sfunc) that clobber pr, but that
8692 do not look like function calls to leaf_function_p. Hence we must
8693 do this extra check. */
8695 sh_pr_n_sets (void)
8697 return REG_N_SETS (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG);
8700 /* This function returns "2" to indicate dual issue for the SH4
8701 processor. To be used by the DFA pipeline description. */
8702 static int
8703 sh_issue_rate (void)
8705 if (TARGET_SUPERSCALAR)
8706 return 2;
8707 else
8708 return 1;
8711 /* Functions for ready queue reordering for sched1. */
8713 /* Get weight for mode for a set x. */
8714 static short
8715 find_set_regmode_weight (rtx x, enum machine_mode mode)
8717 if (GET_CODE (x) == CLOBBER && register_operand (SET_DEST (x), mode))
8718 return 1;
8719 if (GET_CODE (x) == SET && register_operand (SET_DEST (x), mode))
8721 if (GET_CODE (SET_DEST (x)) == REG)
8723 if (!reg_mentioned_p (SET_DEST (x), SET_SRC (x)))
8724 return 1;
8725 else
8726 return 0;
8728 return 1;
8730 return 0;
8733 /* Get regmode weight for insn. */
8734 static short
8735 find_insn_regmode_weight (rtx insn, enum machine_mode mode)
8737 short reg_weight = 0;
8738 rtx x;
8740 /* Increment weight for each register born here. */
8741 x = PATTERN (insn);
8742 reg_weight += find_set_regmode_weight (x, mode);
8743 if (GET_CODE (x) == PARALLEL)
8745 int j;
8746 for (j = XVECLEN (x, 0) - 1; j >= 0; j--)
8748 x = XVECEXP (PATTERN (insn), 0, j);
8749 reg_weight += find_set_regmode_weight (x, mode);
8752 /* Decrement weight for each register that dies here. */
8753 for (x = REG_NOTES (insn); x; x = XEXP (x, 1))
8755 if (REG_NOTE_KIND (x) == REG_DEAD || REG_NOTE_KIND (x) == REG_UNUSED)
8757 rtx note = XEXP (x, 0);
8758 if (GET_CODE (note) == REG && GET_MODE (note) == mode)
8759 reg_weight--;
8762 return reg_weight;
8765 /* Calculate regmode weights for all insns of a basic block. */
8766 static void
8767 find_regmode_weight (int b, enum machine_mode mode)
8769 rtx insn, next_tail, head, tail;
8771 get_block_head_tail (b, &head, &tail);
8772 next_tail = NEXT_INSN (tail);
8774 for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
8776 /* Handle register life information. */
8777 if (!INSN_P (insn))
8778 continue;
8780 if (mode == SFmode)
8781 INSN_REGMODE_WEIGHT (insn, mode) =
8782 find_insn_regmode_weight (insn, mode) + 2 * find_insn_regmode_weight (insn, DFmode);
8783 else if (mode == SImode)
8784 INSN_REGMODE_WEIGHT (insn, mode) =
8785 find_insn_regmode_weight (insn, mode) + 2 * find_insn_regmode_weight (insn, DImode);
8789 /* Comparison function for ready queue sorting. */
8790 static int
8791 rank_for_reorder (const void *x, const void *y)
8793 rtx tmp = *(const rtx *) y;
8794 rtx tmp2 = *(const rtx *) x;
8796 /* The insn in a schedule group should be issued the first. */
8797 if (SCHED_GROUP_P (tmp) != SCHED_GROUP_P (tmp2))
8798 return SCHED_GROUP_P (tmp2) ? 1 : -1;
8800 /* If insns are equally good, sort by INSN_LUID (original insn order), This
8801 minimizes instruction movement, thus minimizing sched's effect on
8802 register pressure. */
8803 return INSN_LUID (tmp) - INSN_LUID (tmp2);
8806 /* Resort the array A in which only element at index N may be out of order. */
8807 static void
8808 swap_reorder (rtx *a, int n)
8810 rtx insn = a[n - 1];
8811 int i = n - 2;
8813 while (i >= 0 && rank_for_reorder (a + i, &insn) >= 0)
8815 a[i + 1] = a[i];
8816 i -= 1;
8818 a[i + 1] = insn;
8821 #define SCHED_REORDER(READY, N_READY) \
8822 do \
8824 if ((N_READY) == 2) \
8825 swap_reorder (READY, N_READY); \
8826 else if ((N_READY) > 2) \
8827 qsort (READY, N_READY, sizeof (rtx), rank_for_reorder); \
8829 while (0)
8831 /* Sort the ready list READY by ascending priority, using the SCHED_REORDER
8832 macro. */
8833 static void
8834 ready_reorder (rtx *ready, int nready)
8836 SCHED_REORDER (ready, nready);
8839 /* Calculate regmode weights for all insns of all basic block. */
8840 static void
8841 sh_md_init_global (FILE *dump ATTRIBUTE_UNUSED,
8842 int verbose ATTRIBUTE_UNUSED,
8843 int old_max_uid)
8845 basic_block b;
8847 regmode_weight[0] = (short *) xcalloc (old_max_uid, sizeof (short));
8848 regmode_weight[1] = (short *) xcalloc (old_max_uid, sizeof (short));
8850 FOR_EACH_BB_REVERSE (b)
8852 find_regmode_weight (b->index, SImode);
8853 find_regmode_weight (b->index, SFmode);
8856 CURR_REGMODE_PRESSURE (SImode) = 0;
8857 CURR_REGMODE_PRESSURE (SFmode) = 0;
8861 /* Cleanup. */
8862 static void
8863 sh_md_finish_global (FILE *dump ATTRIBUTE_UNUSED,
8864 int verbose ATTRIBUTE_UNUSED)
8866 if (regmode_weight[0])
8868 free (regmode_weight[0]);
8869 regmode_weight[0] = NULL;
8871 if (regmode_weight[1])
8873 free (regmode_weight[1]);
8874 regmode_weight[1] = NULL;
8878 /* Cache the can_issue_more so that we can return it from reorder2. Also,
8879 keep count of register pressures on SImode and SFmode. */
8880 static int
8881 sh_variable_issue (FILE *dump ATTRIBUTE_UNUSED,
8882 int sched_verbose ATTRIBUTE_UNUSED,
8883 rtx insn,
8884 int can_issue_more)
8886 if (GET_CODE (PATTERN (insn)) != USE
8887 && GET_CODE (PATTERN (insn)) != CLOBBER)
8888 cached_can_issue_more = can_issue_more - 1;
8889 else
8890 cached_can_issue_more = can_issue_more;
8892 if (reload_completed)
8893 return cached_can_issue_more;
8895 CURR_REGMODE_PRESSURE (SImode) += INSN_REGMODE_WEIGHT (insn, SImode);
8896 CURR_REGMODE_PRESSURE (SFmode) += INSN_REGMODE_WEIGHT (insn, SFmode);
8898 return cached_can_issue_more;
8901 static void
8902 sh_md_init (FILE *dump ATTRIBUTE_UNUSED,
8903 int verbose ATTRIBUTE_UNUSED,
8904 int veclen ATTRIBUTE_UNUSED)
8906 CURR_REGMODE_PRESSURE (SImode) = 0;
8907 CURR_REGMODE_PRESSURE (SFmode) = 0;
8910 /* Some magic numbers. */
8911 /* Pressure on register r0 can lead to spill failures. so avoid sched1 for
8912 functions that already have high pressure on r0. */
8913 #define R0_MAX_LIFE_REGIONS 2
8914 #define R0_MAX_LIVE_LENGTH 12
8915 /* Register Pressure thresholds for SImode and SFmode registers. */
8916 #define SIMODE_MAX_WEIGHT 5
8917 #define SFMODE_MAX_WEIGHT 10
8919 /* Return true if the pressure is high for MODE. */
8920 static short
8921 high_pressure (enum machine_mode mode)
8923 /* Pressure on register r0 can lead to spill failures. so avoid sched1 for
8924 functions that already have high pressure on r0. */
8925 if ((REG_N_SETS (0) - REG_N_DEATHS (0)) >= R0_MAX_LIFE_REGIONS
8926 && REG_LIVE_LENGTH (0) >= R0_MAX_LIVE_LENGTH)
8927 return 1;
8929 if (mode == SFmode)
8930 return (CURR_REGMODE_PRESSURE (SFmode) > SFMODE_MAX_WEIGHT);
8931 else
8932 return (CURR_REGMODE_PRESSURE (SImode) > SIMODE_MAX_WEIGHT);
8935 /* Reorder ready queue if register pressure is high. */
8936 static int
8937 sh_reorder (FILE *dump ATTRIBUTE_UNUSED,
8938 int sched_verbose ATTRIBUTE_UNUSED,
8939 rtx *ready,
8940 int *n_readyp,
8941 int clock_var ATTRIBUTE_UNUSED)
8943 if (reload_completed)
8944 return sh_issue_rate ();
8946 if (high_pressure (SFmode) || high_pressure (SImode))
8948 ready_reorder (ready, *n_readyp);
8951 return sh_issue_rate ();
8954 /* Skip cycles if the current register pressure is high. */
8955 static int
8956 sh_reorder2 (FILE *dump ATTRIBUTE_UNUSED,
8957 int sched_verbose ATTRIBUTE_UNUSED,
8958 rtx *ready ATTRIBUTE_UNUSED,
8959 int *n_readyp ATTRIBUTE_UNUSED,
8960 int clock_var ATTRIBUTE_UNUSED)
8962 if (reload_completed)
8963 return cached_can_issue_more;
8965 if (high_pressure(SFmode) || high_pressure (SImode))
8966 skip_cycles = 1;
8968 return cached_can_issue_more;
8971 /* Skip cycles without sorting the ready queue. This will move insn from
8972 Q->R. If this is the last cycle we are skipping; allow sorting of ready
8973 queue by sh_reorder. */
8975 /* Generally, skipping these many cycles are sufficient for all insns to move
8976 from Q -> R. */
8977 #define MAX_SKIPS 8
8979 static int
8980 sh_dfa_new_cycle (FILE *sched_dump ATTRIBUTE_UNUSED,
8981 int sched_verbose ATTRIBUTE_UNUSED,
8982 rtx insn ATTRIBUTE_UNUSED,
8983 int last_clock_var,
8984 int clock_var,
8985 int *sort_p)
8987 if (reload_completed)
8988 return 0;
8990 if (skip_cycles)
8992 if ((clock_var - last_clock_var) < MAX_SKIPS)
8994 *sort_p = 0;
8995 return 1;
8997 /* If this is the last cycle we are skipping, allow reordering of R. */
8998 if ((clock_var - last_clock_var) == MAX_SKIPS)
9000 *sort_p = 1;
9001 return 1;
9005 skip_cycles = 0;
9007 return 0;
9010 /* SHmedia requires registers for branches, so we can't generate new
9011 branches past reload. */
9012 static bool
9013 sh_cannot_modify_jumps_p (void)
9015 return (TARGET_SHMEDIA && (reload_in_progress || reload_completed));
9018 static int
9019 sh_target_reg_class (void)
9021 return TARGET_SHMEDIA ? TARGET_REGS : NO_REGS;
9024 static bool
9025 sh_optimize_target_register_callee_saved (bool after_prologue_epilogue_gen)
9027 return (shmedia_space_reserved_for_target_registers
9028 && (! after_prologue_epilogue_gen || TARGET_SAVE_ALL_TARGET_REGS));
9031 static bool
9032 sh_ms_bitfield_layout_p (tree record_type ATTRIBUTE_UNUSED)
9034 return (TARGET_SH5 || TARGET_HITACHI || sh_attr_renesas_p (record_type));
9038 On the SH1..SH4, the trampoline looks like
9039 2 0002 D202 mov.l l2,r2
9040 1 0000 D301 mov.l l1,r3
9041 3 0004 422B jmp @r2
9042 4 0006 0009 nop
9043 5 0008 00000000 l1: .long area
9044 6 000c 00000000 l2: .long function
9046 SH5 (compact) uses r1 instead of r3 for the static chain. */
9049 /* Emit RTL insns to initialize the variable parts of a trampoline.
9050 FNADDR is an RTX for the address of the function's pure code.
9051 CXT is an RTX for the static chain value for the function. */
9053 void
9054 sh_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
9056 if (TARGET_SHMEDIA64)
9058 rtx tramp_templ;
9059 int fixed_len;
9061 rtx movi1 = GEN_INT (0xcc000010);
9062 rtx shori1 = GEN_INT (0xc8000010);
9063 rtx src, dst;
9065 /* The following trampoline works within a +- 128 KB range for cxt:
9066 ptb/u cxt,tr1; movi fnaddr >> 48,r0; shori fnaddr >> 32,r0;
9067 shori fnaddr >> 16,r0; shori fnaddr,r0; ptabs/l r0,tr0
9068 gettr tr1,r1; blink tr0,r63 */
9069 /* Address rounding makes it hard to compute the exact bounds of the
9070 offset for this trampoline, but we have a rather generous offset
9071 range, so frame_offset should do fine as an upper bound. */
9072 if (cxt == virtual_stack_vars_rtx && frame_offset < 0x20000)
9074 /* ??? could optimize this trampoline initialization
9075 by writing DImode words with two insns each. */
9076 rtx mask = force_reg (DImode, GEN_INT (0x3fffc00));
9077 rtx insn = gen_rtx_MINUS (DImode, cxt, tramp);
9078 insn = gen_rtx_ASHIFT (DImode, insn, GEN_INT (10-2));
9079 insn = gen_rtx_AND (DImode, insn, mask);
9080 /* Or in ptb/u .,tr1 pattern */
9081 insn = gen_rtx_IOR (DImode, insn, gen_int_mode (0xec000010, SImode));
9082 insn = force_operand (insn, NULL_RTX);
9083 insn = gen_lowpart (SImode, insn);
9084 emit_move_insn (gen_rtx_MEM (SImode, tramp), insn);
9085 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (38));
9086 insn = gen_rtx_AND (DImode, insn, mask);
9087 insn = force_operand (gen_rtx_IOR (DImode, movi1, insn), NULL_RTX);
9088 insn = gen_lowpart (SImode, insn);
9089 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 4)), insn);
9090 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (22));
9091 insn = gen_rtx_AND (DImode, insn, mask);
9092 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
9093 insn = gen_lowpart (SImode, insn);
9094 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 8)), insn);
9095 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (6));
9096 insn = gen_rtx_AND (DImode, insn, mask);
9097 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
9098 insn = gen_lowpart (SImode, insn);
9099 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 12)),
9100 insn);
9101 insn = gen_rtx_ASHIFT (DImode, fnaddr, GEN_INT (10));
9102 insn = gen_rtx_AND (DImode, insn, mask);
9103 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
9104 insn = gen_lowpart (SImode, insn);
9105 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 16)),
9106 insn);
9107 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 20)),
9108 GEN_INT (0x6bf10600));
9109 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 24)),
9110 GEN_INT (0x4415fc10));
9111 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 28)),
9112 GEN_INT (0x4401fff0));
9113 emit_insn (gen_ic_invalidate_line (tramp));
9114 return;
9116 tramp_templ = gen_rtx_SYMBOL_REF (Pmode,"__GCC_nested_trampoline");
9117 fixed_len = TRAMPOLINE_SIZE - 2 * GET_MODE_SIZE (Pmode);
9119 tramp_templ = gen_datalabel_ref (tramp_templ);
9120 dst = gen_rtx_MEM (BLKmode, tramp);
9121 src = gen_rtx_MEM (BLKmode, tramp_templ);
9122 set_mem_align (dst, 256);
9123 set_mem_align (src, 64);
9124 emit_block_move (dst, src, GEN_INT (fixed_len), BLOCK_OP_NORMAL);
9126 emit_move_insn (gen_rtx_MEM (Pmode, plus_constant (tramp, fixed_len)),
9127 fnaddr);
9128 emit_move_insn (gen_rtx_MEM (Pmode,
9129 plus_constant (tramp,
9130 fixed_len
9131 + GET_MODE_SIZE (Pmode))),
9132 cxt);
9133 emit_insn (gen_ic_invalidate_line (tramp));
9134 return;
9136 else if (TARGET_SHMEDIA)
9138 /* movi fnaddr >> 16,r1; shori fnaddr,r1; ptabs/l r1,tr0
9139 movi cxt >> 16,r1; shori cxt,r1; blink tr0,r63 */
9140 rtx quad0 = gen_reg_rtx (DImode), cxtload = gen_reg_rtx (DImode);
9141 rtx quad1 = gen_reg_rtx (DImode), quad2 = gen_reg_rtx (DImode);
9142 /* movi 0,r1: 0xcc000010 shori 0,r1: c8000010 concatenated,
9143 rotated 10 right, and higher 16 bit of every 32 selected. */
9144 rtx movishori
9145 = force_reg (V2HImode, (simplify_gen_subreg
9146 (V2HImode, GEN_INT (0x4330432), SImode, 0)));
9147 rtx ptabs = force_reg (DImode, GEN_INT (0x6bf10600));
9148 rtx blink = force_reg (DImode, GEN_INT (0x4401fff0));
9150 tramp = force_reg (Pmode, tramp);
9151 fnaddr = force_reg (SImode, fnaddr);
9152 cxt = force_reg (SImode, cxt);
9153 emit_insn (gen_mshflo_w_x (gen_rtx_SUBREG (V4HImode, quad0, 0),
9154 gen_rtx_SUBREG (V2HImode, fnaddr, 0),
9155 movishori));
9156 emit_insn (gen_rotrdi3_mextr (quad0, quad0,
9157 GEN_INT (TARGET_LITTLE_ENDIAN ? 24 : 56)));
9158 emit_insn (gen_ashldi3_media (quad0, quad0, const2_rtx));
9159 emit_move_insn (gen_rtx_MEM (DImode, tramp), quad0);
9160 emit_insn (gen_mshflo_w_x (gen_rtx_SUBREG (V4HImode, cxtload, 0),
9161 gen_rtx_SUBREG (V2HImode, cxt, 0),
9162 movishori));
9163 emit_insn (gen_rotrdi3_mextr (cxtload, cxtload,
9164 GEN_INT (TARGET_LITTLE_ENDIAN ? 24 : 56)));
9165 emit_insn (gen_ashldi3_media (cxtload, cxtload, const2_rtx));
9166 if (TARGET_LITTLE_ENDIAN)
9168 emit_insn (gen_mshflo_l_di (quad1, ptabs, cxtload));
9169 emit_insn (gen_mextr4 (quad2, cxtload, blink));
9171 else
9173 emit_insn (gen_mextr4 (quad1, cxtload, ptabs));
9174 emit_insn (gen_mshflo_l_di (quad2, blink, cxtload));
9176 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, 8)), quad1);
9177 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, 16)), quad2);
9178 emit_insn (gen_ic_invalidate_line (tramp));
9179 return;
9181 else if (TARGET_SHCOMPACT)
9183 emit_insn (gen_initialize_trampoline (tramp, cxt, fnaddr));
9184 return;
9186 emit_move_insn (gen_rtx_MEM (SImode, tramp),
9187 gen_int_mode (TARGET_LITTLE_ENDIAN ? 0xd301d202 : 0xd202d301,
9188 SImode));
9189 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 4)),
9190 gen_int_mode (TARGET_LITTLE_ENDIAN ? 0x0009422b : 0x422b0009,
9191 SImode));
9192 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 8)),
9193 cxt);
9194 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 12)),
9195 fnaddr);
9196 if (TARGET_HARVARD)
9198 if (TARGET_USERMODE)
9199 emit_library_call (function_symbol ("__ic_invalidate"),
9200 0, VOIDmode, 1, tramp, SImode);
9201 else
9202 emit_insn (gen_ic_invalidate_line (tramp));
9206 /* FIXME: This is overly conservative. A SHcompact function that
9207 receives arguments ``by reference'' will have them stored in its
9208 own stack frame, so it must not pass pointers or references to
9209 these arguments to other functions by means of sibling calls. */
9210 static bool
9211 sh_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
9213 return (decl
9214 && (! TARGET_SHCOMPACT
9215 || current_function_args_info.stack_regs == 0)
9216 && ! sh_cfun_interrupt_handler_p ());
9219 /* Machine specific built-in functions. */
9221 struct builtin_description
9223 const enum insn_code icode;
9224 const char *const name;
9225 int signature;
9228 /* describe number and signedness of arguments; arg[0] == result
9229 (1: unsigned, 2: signed, 4: don't care, 8: pointer 0: no argument */
9230 static const char signature_args[][4] =
9232 #define SH_BLTIN_V2SI2 0
9233 { 4, 4 },
9234 #define SH_BLTIN_V4HI2 1
9235 { 4, 4 },
9236 #define SH_BLTIN_V2SI3 2
9237 { 4, 4, 4 },
9238 #define SH_BLTIN_V4HI3 3
9239 { 4, 4, 4 },
9240 #define SH_BLTIN_V8QI3 4
9241 { 4, 4, 4 },
9242 #define SH_BLTIN_MAC_HISI 5
9243 { 1, 4, 4, 1 },
9244 #define SH_BLTIN_SH_HI 6
9245 { 4, 4, 1 },
9246 #define SH_BLTIN_SH_SI 7
9247 { 4, 4, 1 },
9248 #define SH_BLTIN_V4HI2V2SI 8
9249 { 4, 4, 4 },
9250 #define SH_BLTIN_V4HI2V8QI 9
9251 { 4, 4, 4 },
9252 #define SH_BLTIN_SISF 10
9253 { 4, 2 },
9254 #define SH_BLTIN_LDUA_L 11
9255 { 2, 8 },
9256 #define SH_BLTIN_LDUA_Q 12
9257 { 1, 8 },
9258 #define SH_BLTIN_STUA_L 13
9259 { 0, 8, 2 },
9260 #define SH_BLTIN_STUA_Q 14
9261 { 0, 8, 1 },
9262 #define SH_BLTIN_UDI 15
9263 { 0, 8, 1 },
9264 #define SH_BLTIN_NUM_SHARED_SIGNATURES 16
9265 #define SH_BLTIN_2 16
9266 #define SH_BLTIN_SU 16
9267 { 1, 2 },
9268 #define SH_BLTIN_3 17
9269 #define SH_BLTIN_SUS 17
9270 { 2, 2, 1 },
9271 #define SH_BLTIN_PSSV 18
9272 { 0, 8, 2, 2 },
9273 #define SH_BLTIN_XXUU 19
9274 #define SH_BLTIN_UUUU 19
9275 { 1, 1, 1, 1 },
9276 #define SH_BLTIN_PV 20
9277 { 0, 8 },
9279 /* mcmv: operands considered unsigned. */
9280 /* mmulsum_wq, msad_ubq: result considered unsigned long long. */
9281 /* mperm: control value considered unsigned int. */
9282 /* mshalds, mshard, mshards, mshlld, mshlrd: shift count is unsigned int. */
9283 /* mshards_q: returns signed short. */
9284 /* nsb: takes long long arg, returns unsigned char. */
9285 static const struct builtin_description bdesc[] =
9287 { CODE_FOR_absv2si2, "__builtin_absv2si2", SH_BLTIN_V2SI2 },
9288 { CODE_FOR_absv4hi2, "__builtin_absv4hi2", SH_BLTIN_V4HI2 },
9289 { CODE_FOR_addv2si3, "__builtin_addv2si3", SH_BLTIN_V2SI3 },
9290 { CODE_FOR_addv4hi3, "__builtin_addv4hi3", SH_BLTIN_V4HI3 },
9291 { CODE_FOR_ssaddv2si3,"__builtin_ssaddv2si3", SH_BLTIN_V2SI3 },
9292 { CODE_FOR_usaddv8qi3,"__builtin_usaddv8qi3", SH_BLTIN_V8QI3 },
9293 { CODE_FOR_ssaddv4hi3,"__builtin_ssaddv4hi3", SH_BLTIN_V4HI3 },
9294 #if 0
9295 { CODE_FOR_alloco32, "__builtin_sh_media_ALLOCO", SH_BLTIN_PV },
9296 { CODE_FOR_alloco64, "__builtin_sh_media_ALLOCO", SH_BLTIN_PV },
9297 #endif
9298 { CODE_FOR_negcmpeqv8qi,"__builtin_sh_media_MCMPEQ_B", SH_BLTIN_V8QI3 },
9299 { CODE_FOR_negcmpeqv2si,"__builtin_sh_media_MCMPEQ_L", SH_BLTIN_V2SI3 },
9300 { CODE_FOR_negcmpeqv4hi,"__builtin_sh_media_MCMPEQ_W", SH_BLTIN_V4HI3 },
9301 { CODE_FOR_negcmpgtuv8qi,"__builtin_sh_media_MCMPGT_UB", SH_BLTIN_V8QI3 },
9302 { CODE_FOR_negcmpgtv2si,"__builtin_sh_media_MCMPGT_L", SH_BLTIN_V2SI3 },
9303 { CODE_FOR_negcmpgtv4hi,"__builtin_sh_media_MCMPGT_W", SH_BLTIN_V4HI3 },
9304 { CODE_FOR_mcmv, "__builtin_sh_media_MCMV", SH_BLTIN_UUUU },
9305 { CODE_FOR_mcnvs_lw, "__builtin_sh_media_MCNVS_LW", SH_BLTIN_3 },
9306 { CODE_FOR_mcnvs_wb, "__builtin_sh_media_MCNVS_WB", SH_BLTIN_V4HI2V8QI },
9307 { CODE_FOR_mcnvs_wub, "__builtin_sh_media_MCNVS_WUB", SH_BLTIN_V4HI2V8QI },
9308 { CODE_FOR_mextr1, "__builtin_sh_media_MEXTR1", SH_BLTIN_UDI },
9309 { CODE_FOR_mextr2, "__builtin_sh_media_MEXTR2", SH_BLTIN_UDI },
9310 { CODE_FOR_mextr3, "__builtin_sh_media_MEXTR3", SH_BLTIN_UDI },
9311 { CODE_FOR_mextr4, "__builtin_sh_media_MEXTR4", SH_BLTIN_UDI },
9312 { CODE_FOR_mextr5, "__builtin_sh_media_MEXTR5", SH_BLTIN_UDI },
9313 { CODE_FOR_mextr6, "__builtin_sh_media_MEXTR6", SH_BLTIN_UDI },
9314 { CODE_FOR_mextr7, "__builtin_sh_media_MEXTR7", SH_BLTIN_UDI },
9315 { CODE_FOR_mmacfx_wl, "__builtin_sh_media_MMACFX_WL", SH_BLTIN_MAC_HISI },
9316 { CODE_FOR_mmacnfx_wl,"__builtin_sh_media_MMACNFX_WL", SH_BLTIN_MAC_HISI },
9317 { CODE_FOR_mulv2si3, "__builtin_mulv2si3", SH_BLTIN_V2SI3, },
9318 { CODE_FOR_mulv4hi3, "__builtin_mulv4hi3", SH_BLTIN_V4HI3 },
9319 { CODE_FOR_mmulfx_l, "__builtin_sh_media_MMULFX_L", SH_BLTIN_V2SI3 },
9320 { CODE_FOR_mmulfx_w, "__builtin_sh_media_MMULFX_W", SH_BLTIN_V4HI3 },
9321 { CODE_FOR_mmulfxrp_w,"__builtin_sh_media_MMULFXRP_W", SH_BLTIN_V4HI3 },
9322 { CODE_FOR_mmulhi_wl, "__builtin_sh_media_MMULHI_WL", SH_BLTIN_V4HI2V2SI },
9323 { CODE_FOR_mmullo_wl, "__builtin_sh_media_MMULLO_WL", SH_BLTIN_V4HI2V2SI },
9324 { CODE_FOR_mmulsum_wq,"__builtin_sh_media_MMULSUM_WQ", SH_BLTIN_XXUU },
9325 { CODE_FOR_mperm_w, "__builtin_sh_media_MPERM_W", SH_BLTIN_SH_HI },
9326 { CODE_FOR_msad_ubq, "__builtin_sh_media_MSAD_UBQ", SH_BLTIN_XXUU },
9327 { CODE_FOR_mshalds_l, "__builtin_sh_media_MSHALDS_L", SH_BLTIN_SH_SI },
9328 { CODE_FOR_mshalds_w, "__builtin_sh_media_MSHALDS_W", SH_BLTIN_SH_HI },
9329 { CODE_FOR_ashrv2si3, "__builtin_ashrv2si3", SH_BLTIN_SH_SI },
9330 { CODE_FOR_ashrv4hi3, "__builtin_ashrv4hi3", SH_BLTIN_SH_HI },
9331 { CODE_FOR_mshards_q, "__builtin_sh_media_MSHARDS_Q", SH_BLTIN_SUS },
9332 { CODE_FOR_mshfhi_b, "__builtin_sh_media_MSHFHI_B", SH_BLTIN_V8QI3 },
9333 { CODE_FOR_mshfhi_l, "__builtin_sh_media_MSHFHI_L", SH_BLTIN_V2SI3 },
9334 { CODE_FOR_mshfhi_w, "__builtin_sh_media_MSHFHI_W", SH_BLTIN_V4HI3 },
9335 { CODE_FOR_mshflo_b, "__builtin_sh_media_MSHFLO_B", SH_BLTIN_V8QI3 },
9336 { CODE_FOR_mshflo_l, "__builtin_sh_media_MSHFLO_L", SH_BLTIN_V2SI3 },
9337 { CODE_FOR_mshflo_w, "__builtin_sh_media_MSHFLO_W", SH_BLTIN_V4HI3 },
9338 { CODE_FOR_ashlv2si3, "__builtin_ashlv2si3", SH_BLTIN_SH_SI },
9339 { CODE_FOR_ashlv4hi3, "__builtin_ashlv4hi3", SH_BLTIN_SH_HI },
9340 { CODE_FOR_lshrv2si3, "__builtin_lshrv2si3", SH_BLTIN_SH_SI },
9341 { CODE_FOR_lshrv4hi3, "__builtin_lshrv4hi3", SH_BLTIN_SH_HI },
9342 { CODE_FOR_subv2si3, "__builtin_subv2si3", SH_BLTIN_V2SI3 },
9343 { CODE_FOR_subv4hi3, "__builtin_subv4hi3", SH_BLTIN_V4HI3 },
9344 { CODE_FOR_sssubv2si3,"__builtin_sssubv2si3", SH_BLTIN_V2SI3 },
9345 { CODE_FOR_ussubv8qi3,"__builtin_ussubv8qi3", SH_BLTIN_V8QI3 },
9346 { CODE_FOR_sssubv4hi3,"__builtin_sssubv4hi3", SH_BLTIN_V4HI3 },
9347 { CODE_FOR_fcosa_s, "__builtin_sh_media_FCOSA_S", SH_BLTIN_SISF },
9348 { CODE_FOR_fsina_s, "__builtin_sh_media_FSINA_S", SH_BLTIN_SISF },
9349 { CODE_FOR_fipr, "__builtin_sh_media_FIPR_S", SH_BLTIN_3 },
9350 { CODE_FOR_ftrv, "__builtin_sh_media_FTRV_S", SH_BLTIN_3 },
9351 { CODE_FOR_fsrra_s, "__builtin_sh_media_FSRRA_S", SH_BLTIN_2 },
9352 #if 0
9353 { CODE_FOR_ldhi_l, "__builtin_sh_media_LDHI_L", SH_BLTIN_LDUA_L },
9354 { CODE_FOR_ldhi_q, "__builtin_sh_media_LDHI_Q", SH_BLTIN_LDUA_Q },
9355 { CODE_FOR_ldlo_l, "__builtin_sh_media_LDLO_L", SH_BLTIN_LDUA_L },
9356 { CODE_FOR_ldlo_q, "__builtin_sh_media_LDLO_Q", SH_BLTIN_LDUA_Q },
9357 { CODE_FOR_sthi_l, "__builtin_sh_media_STHI_L", SH_BLTIN_STUA_L },
9358 { CODE_FOR_sthi_q, "__builtin_sh_media_STHI_Q", SH_BLTIN_STUA_Q },
9359 { CODE_FOR_stlo_l, "__builtin_sh_media_STLO_L", SH_BLTIN_STUA_L },
9360 { CODE_FOR_stlo_q, "__builtin_sh_media_STLO_Q", SH_BLTIN_STUA_Q },
9361 { CODE_FOR_ldhi_l64, "__builtin_sh_media_LDHI_L", SH_BLTIN_LDUA_L },
9362 { CODE_FOR_ldhi_q64, "__builtin_sh_media_LDHI_Q", SH_BLTIN_LDUA_Q },
9363 { CODE_FOR_ldlo_l64, "__builtin_sh_media_LDLO_L", SH_BLTIN_LDUA_L },
9364 { CODE_FOR_ldlo_q64, "__builtin_sh_media_LDLO_Q", SH_BLTIN_LDUA_Q },
9365 { CODE_FOR_sthi_l64, "__builtin_sh_media_STHI_L", SH_BLTIN_STUA_L },
9366 { CODE_FOR_sthi_q64, "__builtin_sh_media_STHI_Q", SH_BLTIN_STUA_Q },
9367 { CODE_FOR_stlo_l64, "__builtin_sh_media_STLO_L", SH_BLTIN_STUA_L },
9368 { CODE_FOR_stlo_q64, "__builtin_sh_media_STLO_Q", SH_BLTIN_STUA_Q },
9369 #endif
9370 { CODE_FOR_nsb, "__builtin_sh_media_NSB", SH_BLTIN_SU },
9371 { CODE_FOR_byterev, "__builtin_sh_media_BYTEREV", SH_BLTIN_2 },
9372 #if 0
9373 { CODE_FOR_prefetch32,"__builtin_sh_media_PREFO", SH_BLTIN_PSSV },
9374 { CODE_FOR_prefetch64,"__builtin_sh_media_PREFO", SH_BLTIN_PSSV }
9375 #endif
9378 static void
9379 sh_media_init_builtins (void)
9381 tree shared[SH_BLTIN_NUM_SHARED_SIGNATURES];
9382 const struct builtin_description *d;
9384 memset (shared, 0, sizeof shared);
9385 for (d = bdesc; d - bdesc < (int) ARRAY_SIZE (bdesc); d++)
9387 tree type, arg_type;
9388 int signature = d->signature;
9389 int i;
9391 if (signature < SH_BLTIN_NUM_SHARED_SIGNATURES && shared[signature])
9392 type = shared[signature];
9393 else
9395 int has_result = signature_args[signature][0] != 0;
9397 if (signature_args[signature][1] == 8
9398 && (insn_data[d->icode].operand[has_result].mode != Pmode))
9399 continue;
9400 if (! TARGET_FPU_ANY
9401 && FLOAT_MODE_P (insn_data[d->icode].operand[0].mode))
9402 continue;
9403 type = void_list_node;
9404 for (i = 3; ; i--)
9406 int arg = signature_args[signature][i];
9407 int opno = i - 1 + has_result;
9409 if (arg == 8)
9410 arg_type = ptr_type_node;
9411 else if (arg)
9412 arg_type = ((*lang_hooks.types.type_for_mode)
9413 (insn_data[d->icode].operand[opno].mode,
9414 (arg & 1)));
9415 else if (i)
9416 continue;
9417 else
9418 arg_type = void_type_node;
9419 if (i == 0)
9420 break;
9421 type = tree_cons (NULL_TREE, arg_type, type);
9423 type = build_function_type (arg_type, type);
9424 if (signature < SH_BLTIN_NUM_SHARED_SIGNATURES)
9425 shared[signature] = type;
9427 lang_hooks.builtin_function (d->name, type, d - bdesc, BUILT_IN_MD,
9428 NULL, NULL_TREE);
9432 /* Implements target hook vector_mode_supported_p. */
9433 bool
9434 sh_vector_mode_supported_p (enum machine_mode mode)
9436 if (TARGET_FPU_ANY
9437 && ((mode == V2SFmode)
9438 || (mode == V4SFmode)
9439 || (mode == V16SFmode)))
9440 return true;
9442 else if (TARGET_SHMEDIA
9443 && ((mode == V8QImode)
9444 || (mode == V2HImode)
9445 || (mode == V4HImode)
9446 || (mode == V2SImode)))
9447 return true;
9449 return false;
9452 /* Implements target hook dwarf_calling_convention. Return an enum
9453 of dwarf_calling_convention. */
9455 sh_dwarf_calling_convention (tree func)
9457 if (sh_attr_renesas_p (func))
9458 return DW_CC_GNU_renesas_sh;
9460 return DW_CC_normal;
9463 static void
9464 sh_init_builtins (void)
9466 if (TARGET_SHMEDIA)
9467 sh_media_init_builtins ();
9470 /* Expand an expression EXP that calls a built-in function,
9471 with result going to TARGET if that's convenient
9472 (and in mode MODE if that's convenient).
9473 SUBTARGET may be used as the target for computing one of EXP's operands.
9474 IGNORE is nonzero if the value is to be ignored. */
9476 static rtx
9477 sh_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
9478 enum machine_mode mode ATTRIBUTE_UNUSED, int ignore)
9480 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
9481 tree arglist = TREE_OPERAND (exp, 1);
9482 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
9483 const struct builtin_description *d = &bdesc[fcode];
9484 enum insn_code icode = d->icode;
9485 int signature = d->signature;
9486 enum machine_mode tmode = VOIDmode;
9487 int nop = 0, i;
9488 rtx op[4];
9489 rtx pat;
9491 if (signature_args[signature][0])
9493 if (ignore)
9494 return 0;
9496 tmode = insn_data[icode].operand[0].mode;
9497 if (! target
9498 || GET_MODE (target) != tmode
9499 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
9500 target = gen_reg_rtx (tmode);
9501 op[nop++] = target;
9503 else
9504 target = 0;
9506 for (i = 1; i <= 3; i++, nop++)
9508 tree arg;
9509 enum machine_mode opmode, argmode;
9511 if (! signature_args[signature][i])
9512 break;
9513 arg = TREE_VALUE (arglist);
9514 if (arg == error_mark_node)
9515 return const0_rtx;
9516 arglist = TREE_CHAIN (arglist);
9517 opmode = insn_data[icode].operand[nop].mode;
9518 argmode = TYPE_MODE (TREE_TYPE (arg));
9519 if (argmode != opmode)
9520 arg = build1 (NOP_EXPR,
9521 (*lang_hooks.types.type_for_mode) (opmode, 0), arg);
9522 op[nop] = expand_expr (arg, NULL_RTX, opmode, 0);
9523 if (! (*insn_data[icode].operand[nop].predicate) (op[nop], opmode))
9524 op[nop] = copy_to_mode_reg (opmode, op[nop]);
9527 switch (nop)
9529 case 1:
9530 pat = (*insn_data[d->icode].genfun) (op[0]);
9531 break;
9532 case 2:
9533 pat = (*insn_data[d->icode].genfun) (op[0], op[1]);
9534 break;
9535 case 3:
9536 pat = (*insn_data[d->icode].genfun) (op[0], op[1], op[2]);
9537 break;
9538 case 4:
9539 pat = (*insn_data[d->icode].genfun) (op[0], op[1], op[2], op[3]);
9540 break;
9541 default:
9542 abort ();
9544 if (! pat)
9545 return 0;
9546 emit_insn (pat);
9547 return target;
9550 void
9551 sh_expand_unop_v2sf (enum rtx_code code, rtx op0, rtx op1)
9553 rtx sel0 = const0_rtx;
9554 rtx sel1 = const1_rtx;
9555 rtx (*fn) (rtx, rtx, rtx, rtx, rtx) = gen_unary_sf_op;
9556 rtx op = gen_rtx_fmt_e (code, SFmode, op1);
9558 emit_insn ((*fn) (op0, op1, op, sel0, sel0));
9559 emit_insn ((*fn) (op0, op1, op, sel1, sel1));
9562 void
9563 sh_expand_binop_v2sf (enum rtx_code code, rtx op0, rtx op1, rtx op2)
9565 rtx sel0 = const0_rtx;
9566 rtx sel1 = const1_rtx;
9567 rtx (*fn) (rtx, rtx, rtx, rtx, rtx, rtx, rtx, rtx)
9568 = gen_binary_sf_op;
9569 rtx op = gen_rtx_fmt_ee (code, SFmode, op1, op2);
9571 emit_insn ((*fn) (op0, op1, op2, op, sel0, sel0, sel0, sel1));
9572 emit_insn ((*fn) (op0, op1, op2, op, sel1, sel1, sel1, sel0));
9575 /* Return the class of registers for which a mode change from FROM to TO
9576 is invalid. */
9577 bool
9578 sh_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
9579 enum reg_class class)
9581 /* We want to enable the use of SUBREGs as a means to
9582 VEC_SELECT a single element of a vector. */
9583 if (to == SFmode && VECTOR_MODE_P (from) && GET_MODE_INNER (from) == SFmode)
9584 return (reg_classes_intersect_p (GENERAL_REGS, class));
9586 if (GET_MODE_SIZE (from) != GET_MODE_SIZE (to))
9588 if (TARGET_LITTLE_ENDIAN)
9590 if (GET_MODE_SIZE (to) < 8 || GET_MODE_SIZE (from) < 8)
9591 return reg_classes_intersect_p (DF_REGS, class);
9593 else
9595 if (GET_MODE_SIZE (from) < 8)
9596 return reg_classes_intersect_p (DF_HI_REGS, class);
9599 return 0;
9603 /* If ADDRESS refers to a CODE_LABEL, add NUSES to the number of times
9604 that label is used. */
9606 void
9607 sh_mark_label (rtx address, int nuses)
9609 if (GOTOFF_P (address))
9611 /* Extract the label or symbol. */
9612 address = XEXP (address, 0);
9613 if (GET_CODE (address) == PLUS)
9614 address = XEXP (address, 0);
9615 address = XVECEXP (address, 0, 0);
9617 if (GET_CODE (address) == LABEL_REF
9618 && GET_CODE (XEXP (address, 0)) == CODE_LABEL)
9619 LABEL_NUSES (XEXP (address, 0)) += nuses;
9622 /* Compute extra cost of moving data between one register class
9623 and another. */
9625 /* If SECONDARY*_RELOAD_CLASS says something about the src/dst pair, regclass
9626 uses this information. Hence, the general register <-> floating point
9627 register information here is not used for SFmode. */
9630 sh_register_move_cost (enum machine_mode mode,
9631 enum reg_class srcclass, enum reg_class dstclass)
9633 if (dstclass == T_REGS || dstclass == PR_REGS)
9634 return 10;
9636 if (dstclass == MAC_REGS && srcclass == MAC_REGS)
9637 return 4;
9639 if (mode == SImode && ! TARGET_SHMEDIA && TARGET_FMOVD
9640 && REGCLASS_HAS_FP_REG (srcclass)
9641 && REGCLASS_HAS_FP_REG (dstclass))
9642 return 4;
9644 if ((REGCLASS_HAS_FP_REG (dstclass) && srcclass == MAC_REGS)
9645 || (dstclass == MAC_REGS && REGCLASS_HAS_FP_REG (srcclass)))
9646 return 9;
9648 if ((REGCLASS_HAS_FP_REG (dstclass)
9649 && REGCLASS_HAS_GENERAL_REG (srcclass))
9650 || (REGCLASS_HAS_GENERAL_REG (dstclass)
9651 && REGCLASS_HAS_FP_REG (srcclass)))
9652 return ((TARGET_SHMEDIA ? 4 : TARGET_FMOVD ? 8 : 12)
9653 * ((GET_MODE_SIZE (mode) + 7) / 8U));
9655 if ((dstclass == FPUL_REGS
9656 && REGCLASS_HAS_GENERAL_REG (srcclass))
9657 || (srcclass == FPUL_REGS
9658 && REGCLASS_HAS_GENERAL_REG (dstclass)))
9659 return 5;
9661 if ((dstclass == FPUL_REGS
9662 && (srcclass == PR_REGS || srcclass == MAC_REGS || srcclass == T_REGS))
9663 || (srcclass == FPUL_REGS
9664 && (dstclass == PR_REGS || dstclass == MAC_REGS)))
9665 return 7;
9667 if ((srcclass == TARGET_REGS && ! REGCLASS_HAS_GENERAL_REG (dstclass))
9668 || ((dstclass) == TARGET_REGS && ! REGCLASS_HAS_GENERAL_REG (srcclass)))
9669 return 20;
9671 if ((srcclass == FPSCR_REGS && ! REGCLASS_HAS_GENERAL_REG (dstclass))
9672 || (dstclass == FPSCR_REGS && ! REGCLASS_HAS_GENERAL_REG (srcclass)))
9673 return 4;
9675 if (TARGET_SHMEDIA
9676 || (TARGET_FMOVD
9677 && ! REGCLASS_HAS_GENERAL_REG (srcclass)
9678 && ! REGCLASS_HAS_GENERAL_REG (dstclass)))
9679 return 2 * ((GET_MODE_SIZE (mode) + 7) / 8U);
9681 return 2 * ((GET_MODE_SIZE (mode) + 3) / 4U);
9684 /* Like register_operand, but take into account that SHMEDIA can use
9685 the constant zero like a general register. */
9687 sh_register_operand (rtx op, enum machine_mode mode)
9689 if (op == CONST0_RTX (mode) && TARGET_SHMEDIA)
9690 return 1;
9691 return register_operand (op, mode);
9695 cmpsi_operand (rtx op, enum machine_mode mode)
9697 if (GET_CODE (op) == REG && REGNO (op) == T_REG
9698 && GET_MODE (op) == SImode)
9699 return 1;
9700 return arith_operand (op, mode);
9703 static rtx emit_load_ptr (rtx, rtx);
9705 static rtx
9706 emit_load_ptr (rtx reg, rtx addr)
9708 rtx mem = gen_rtx_MEM (ptr_mode, addr);
9710 if (Pmode != ptr_mode)
9711 mem = gen_rtx_SIGN_EXTEND (Pmode, mem);
9712 return emit_move_insn (reg, mem);
9715 void
9716 sh_output_mi_thunk (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED,
9717 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
9718 tree function)
9720 CUMULATIVE_ARGS cum;
9721 int structure_value_byref = 0;
9722 rtx this, this_value, sibcall, insns, funexp;
9723 tree funtype = TREE_TYPE (function);
9724 int simple_add = CONST_OK_FOR_ADD (delta);
9725 int did_load = 0;
9726 rtx scratch0, scratch1, scratch2;
9728 reload_completed = 1;
9729 epilogue_completed = 1;
9730 no_new_pseudos = 1;
9731 current_function_uses_only_leaf_regs = 1;
9732 reset_block_changes ();
9734 emit_note (NOTE_INSN_PROLOGUE_END);
9736 /* Find the "this" pointer. We have such a wide range of ABIs for the
9737 SH that it's best to do this completely machine independently.
9738 "this" is passed as first argument, unless a structure return pointer
9739 comes first, in which case "this" comes second. */
9740 INIT_CUMULATIVE_ARGS (cum, funtype, NULL_RTX, 0, 1);
9741 #ifndef PCC_STATIC_STRUCT_RETURN
9742 if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
9743 structure_value_byref = 1;
9744 #endif /* not PCC_STATIC_STRUCT_RETURN */
9745 if (structure_value_byref && sh_struct_value_rtx (function, 0) == 0)
9747 tree ptype = build_pointer_type (TREE_TYPE (funtype));
9749 FUNCTION_ARG_ADVANCE (cum, Pmode, ptype, 1);
9751 this = FUNCTION_ARG (cum, Pmode, ptr_type_node, 1);
9753 /* For SHcompact, we only have r0 for a scratch register: r1 is the
9754 static chain pointer (even if you can't have nested virtual functions
9755 right now, someone might implement them sometime), and the rest of the
9756 registers are used for argument passing, are callee-saved, or reserved. */
9757 scratch0 = scratch1 = scratch2 = gen_rtx_REG (Pmode, 0);
9758 if (! TARGET_SH5)
9760 scratch1 = gen_rtx_REG (ptr_mode, 1);
9761 /* N.B., if not TARGET_HITACHI, register 2 is used to pass the pointer
9762 pointing where to return struct values. */
9763 scratch2 = gen_rtx_REG (Pmode, 3);
9765 else if (TARGET_SHMEDIA)
9767 scratch1 = gen_rtx_REG (ptr_mode, 21);
9768 scratch2 = gen_rtx_REG (Pmode, TR0_REG);
9771 this_value = plus_constant (this, delta);
9772 if (vcall_offset
9773 && (simple_add || scratch0 != scratch1)
9774 && strict_memory_address_p (ptr_mode, this_value))
9776 emit_load_ptr (scratch0, this_value);
9777 did_load = 1;
9780 if (!delta)
9781 ; /* Do nothing. */
9782 else if (simple_add)
9783 emit_move_insn (this, this_value);
9784 else
9786 emit_move_insn (scratch1, GEN_INT (delta));
9787 emit_insn (gen_add2_insn (this, scratch1));
9790 if (vcall_offset)
9792 rtx offset_addr;
9794 if (!did_load)
9795 emit_load_ptr (scratch0, this);
9797 offset_addr = plus_constant (scratch0, vcall_offset);
9798 if (strict_memory_address_p (ptr_mode, offset_addr))
9799 ; /* Do nothing. */
9800 else if (! TARGET_SH5)
9802 /* scratch0 != scratch1, and we have indexed loads. Get better
9803 schedule by loading the offset into r1 and using an indexed
9804 load - then the load of r1 can issue before the load from
9805 (this + delta) finishes. */
9806 emit_move_insn (scratch1, GEN_INT (vcall_offset));
9807 offset_addr = gen_rtx_PLUS (Pmode, scratch0, scratch1);
9809 else if (CONST_OK_FOR_ADD (vcall_offset))
9811 emit_insn (gen_add2_insn (scratch0, GEN_INT (vcall_offset)));
9812 offset_addr = scratch0;
9814 else if (scratch0 != scratch1)
9816 emit_move_insn (scratch1, GEN_INT (vcall_offset));
9817 emit_insn (gen_add2_insn (scratch0, scratch1));
9818 offset_addr = scratch0;
9820 else
9821 abort (); /* FIXME */
9822 emit_load_ptr (scratch0, offset_addr);
9824 if (Pmode != ptr_mode)
9825 scratch0 = gen_rtx_TRUNCATE (ptr_mode, scratch0);
9826 emit_insn (gen_add2_insn (this, scratch0));
9829 /* Generate a tail call to the target function. */
9830 if (! TREE_USED (function))
9832 assemble_external (function);
9833 TREE_USED (function) = 1;
9835 funexp = XEXP (DECL_RTL (function), 0);
9836 emit_move_insn (scratch2, funexp);
9837 funexp = gen_rtx_MEM (FUNCTION_MODE, scratch2);
9838 sibcall = emit_call_insn (gen_sibcall (funexp, const0_rtx, NULL_RTX));
9839 SIBLING_CALL_P (sibcall) = 1;
9840 use_reg (&CALL_INSN_FUNCTION_USAGE (sibcall), this);
9841 emit_barrier ();
9843 /* Run just enough of rest_of_compilation to do scheduling and get
9844 the insns emitted. Note that use_thunk calls
9845 assemble_start_function and assemble_end_function. */
9847 insn_locators_initialize ();
9848 insns = get_insns ();
9850 if (optimize > 0 && flag_schedule_insns_after_reload)
9852 /* Initialize the bitmap obstacks. */
9853 bitmap_obstack_initialize (NULL);
9854 bitmap_obstack_initialize (&reg_obstack);
9855 if (! basic_block_info)
9856 init_flow ();
9857 rtl_register_cfg_hooks ();
9858 find_basic_blocks (insns);
9859 life_analysis (dump_file, PROP_FINAL);
9861 split_all_insns (1);
9863 schedule_insns (dump_file);
9866 sh_reorg ();
9868 if (optimize > 0 && flag_delayed_branch)
9869 dbr_schedule (insns, dump_file);
9870 shorten_branches (insns);
9871 final_start_function (insns, file, 1);
9872 final (insns, file, 1, 0);
9873 final_end_function ();
9875 if (optimize > 0 && flag_schedule_insns_after_reload)
9877 /* Release all memory allocated by flow. */
9878 free_basic_block_vars ();
9880 /* Release the bitmap obstacks. */
9881 bitmap_obstack_release (&reg_obstack);
9882 bitmap_obstack_release (NULL);
9885 reload_completed = 0;
9886 epilogue_completed = 0;
9887 no_new_pseudos = 0;
9891 function_symbol (const char *name)
9893 rtx sym = gen_rtx_SYMBOL_REF (Pmode, name);
9894 SYMBOL_REF_FLAGS (sym) = SYMBOL_FLAG_FUNCTION;
9895 return sym;
9898 /* Find the number of a general purpose register in S. */
9899 static int
9900 scavenge_reg (HARD_REG_SET *s)
9902 int r;
9903 for (r = FIRST_GENERAL_REG; r <= LAST_GENERAL_REG; r++)
9904 if (TEST_HARD_REG_BIT (*s, r))
9905 return r;
9906 return -1;
9910 sh_get_pr_initial_val (void)
9912 rtx val;
9914 /* ??? Unfortunately, get_hard_reg_initial_val doesn't always work for the
9915 PR register on SHcompact, because it might be clobbered by the prologue.
9916 We check first if that is known to be the case. */
9917 if (TARGET_SHCOMPACT
9918 && ((current_function_args_info.call_cookie
9919 & ~ CALL_COOKIE_RET_TRAMP (1))
9920 || current_function_has_nonlocal_label))
9921 return gen_rtx_MEM (SImode, return_address_pointer_rtx);
9923 /* If we haven't finished rtl generation, there might be a nonlocal label
9924 that we haven't seen yet.
9925 ??? get_hard_reg_initial_val fails if it is called while no_new_pseudos
9926 is set, unless it has been called before for the same register. And even
9927 then, we end in trouble if we didn't use the register in the same
9928 basic block before. So call get_hard_reg_initial_val now and wrap it
9929 in an unspec if we might need to replace it. */
9930 /* ??? We also must do this for TARGET_SH1 in general, because otherwise
9931 combine can put the pseudo returned by get_hard_reg_initial_val into
9932 instructions that need a general purpose registers, which will fail to
9933 be recognized when the pseudo becomes allocated to PR. */
9935 = get_hard_reg_initial_val (Pmode, TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG);
9936 if (TARGET_SH1)
9937 return gen_rtx_UNSPEC (SImode, gen_rtvec (1, val), UNSPEC_RA);
9938 return val;
9942 sh_expand_t_scc (enum rtx_code code, rtx target)
9944 rtx result = target;
9945 HOST_WIDE_INT val;
9947 if (GET_CODE (sh_compare_op0) != REG || REGNO (sh_compare_op0) != T_REG
9948 || GET_CODE (sh_compare_op1) != CONST_INT)
9949 return 0;
9950 if (GET_CODE (result) != REG)
9951 result = gen_reg_rtx (SImode);
9952 val = INTVAL (sh_compare_op1);
9953 if ((code == EQ && val == 1) || (code == NE && val == 0))
9954 emit_insn (gen_movt (result));
9955 else if ((code == EQ && val == 0) || (code == NE && val == 1))
9957 emit_insn (gen_rtx_CLOBBER (VOIDmode, result));
9958 emit_insn (gen_subc (result, result, result));
9959 emit_insn (gen_addsi3 (result, result, const1_rtx));
9961 else if (code == EQ || code == NE)
9962 emit_insn (gen_move_insn (result, GEN_INT (code == NE)));
9963 else
9964 return 0;
9965 if (result != target)
9966 emit_move_insn (target, result);
9967 return 1;
9970 /* INSN is an sfunc; return the rtx that describes the address used. */
9971 static rtx
9972 extract_sfunc_addr (rtx insn)
9974 rtx pattern, part = NULL_RTX;
9975 int len, i;
9977 pattern = PATTERN (insn);
9978 len = XVECLEN (pattern, 0);
9979 for (i = 0; i < len; i++)
9981 part = XVECEXP (pattern, 0, i);
9982 if (GET_CODE (part) == USE && GET_MODE (XEXP (part, 0)) == Pmode
9983 && GENERAL_REGISTER_P (true_regnum (XEXP (part, 0))))
9984 return XEXP (part, 0);
9986 if (GET_CODE (XVECEXP (pattern, 0, 0)) == UNSPEC_VOLATILE)
9987 return XVECEXP (XVECEXP (pattern, 0, 0), 0, 1);
9988 abort ();
9991 /* Verify that the register in use_sfunc_addr still agrees with the address
9992 used in the sfunc. This prevents fill_slots_from_thread from changing
9993 use_sfunc_addr.
9994 INSN is the use_sfunc_addr instruction, and REG is the register it
9995 guards. */
9997 check_use_sfunc_addr (rtx insn, rtx reg)
9999 /* Search for the sfunc. It should really come right after INSN. */
10000 while ((insn = NEXT_INSN (insn)))
10002 if (GET_CODE (insn) == CODE_LABEL || GET_CODE (insn) == JUMP_INSN)
10003 break;
10004 if (! INSN_P (insn))
10005 continue;
10007 if (GET_CODE (PATTERN (insn)) == SEQUENCE)
10008 insn = XVECEXP (PATTERN (insn), 0, 0);
10009 if (GET_CODE (PATTERN (insn)) != PARALLEL
10010 || get_attr_type (insn) != TYPE_SFUNC)
10011 continue;
10012 return rtx_equal_p (extract_sfunc_addr (insn), reg);
10014 abort ();
10017 /* Returns 1 if OP is a MEM that can be source of a simple move operation. */
10020 unaligned_load_operand (rtx op, enum machine_mode mode)
10022 rtx inside;
10024 if (GET_CODE (op) != MEM || GET_MODE (op) != mode)
10025 return 0;
10027 inside = XEXP (op, 0);
10029 if (GET_CODE (inside) == POST_INC)
10030 inside = XEXP (inside, 0);
10032 if (GET_CODE (inside) == REG)
10033 return 1;
10035 return 0;
10038 /* This function returns a constant rtx that represents pi / 2**15 in
10039 SFmode. it's used to scale SFmode angles, in radians, to a
10040 fixed-point signed 16.16-bit fraction of a full circle, i.e., 2*pi
10041 maps to 0x10000). */
10043 static GTY(()) rtx sh_fsca_sf2int_rtx;
10046 sh_fsca_sf2int (void)
10048 if (! sh_fsca_sf2int_rtx)
10050 REAL_VALUE_TYPE rv;
10052 real_from_string (&rv, "10430.378350470453");
10053 sh_fsca_sf2int_rtx = const_double_from_real_value (rv, SFmode);
10056 return sh_fsca_sf2int_rtx;
10059 /* This function returns a constant rtx that represents pi / 2**15 in
10060 DFmode. it's used to scale DFmode angles, in radians, to a
10061 fixed-point signed 16.16-bit fraction of a full circle, i.e., 2*pi
10062 maps to 0x10000). */
10064 static GTY(()) rtx sh_fsca_df2int_rtx;
10067 sh_fsca_df2int (void)
10069 if (! sh_fsca_df2int_rtx)
10071 REAL_VALUE_TYPE rv;
10073 real_from_string (&rv, "10430.378350470453");
10074 sh_fsca_df2int_rtx = const_double_from_real_value (rv, DFmode);
10077 return sh_fsca_df2int_rtx;
10080 /* This function returns a constant rtx that represents 2**15 / pi in
10081 SFmode. it's used to scale a fixed-point signed 16.16-bit fraction
10082 of a full circle back to a SFmode value, i.e., 0x10000 maps to
10083 2*pi). */
10085 static GTY(()) rtx sh_fsca_int2sf_rtx;
10088 sh_fsca_int2sf (void)
10090 if (! sh_fsca_int2sf_rtx)
10092 REAL_VALUE_TYPE rv;
10094 real_from_string (&rv, "9.587379924285257e-5");
10095 sh_fsca_int2sf_rtx = const_double_from_real_value (rv, SFmode);
10098 return sh_fsca_int2sf_rtx;
10101 /* Initialize the CUMULATIVE_ARGS structure. */
10103 void
10104 sh_init_cumulative_args (CUMULATIVE_ARGS * pcum,
10105 tree fntype,
10106 rtx libname ATTRIBUTE_UNUSED,
10107 tree fndecl,
10108 signed int n_named_args,
10109 enum machine_mode mode)
10111 pcum->arg_count [(int) SH_ARG_FLOAT] = 0;
10112 pcum->free_single_fp_reg = 0;
10113 pcum->stack_regs = 0;
10114 pcum->byref_regs = 0;
10115 pcum->byref = 0;
10116 pcum->outgoing = (n_named_args == -1) ? 0 : 1;
10118 /* XXX - Should we check TARGET_HITACHI here ??? */
10119 pcum->renesas_abi = sh_attr_renesas_p (fntype) ? 1 : 0;
10121 if (fntype)
10123 pcum->force_mem = ((TARGET_HITACHI || pcum->renesas_abi)
10124 && aggregate_value_p (TREE_TYPE (fntype), fndecl));
10125 pcum->prototype_p = TYPE_ARG_TYPES (fntype) ? TRUE : FALSE;
10126 pcum->arg_count [(int) SH_ARG_INT]
10127 = TARGET_SH5 && aggregate_value_p (TREE_TYPE (fntype), fndecl);
10129 pcum->call_cookie
10130 = CALL_COOKIE_RET_TRAMP (TARGET_SHCOMPACT
10131 && pcum->arg_count [(int) SH_ARG_INT] == 0
10132 && (TYPE_MODE (TREE_TYPE (fntype)) == BLKmode
10133 ? int_size_in_bytes (TREE_TYPE (fntype))
10134 : GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (fntype)))) > 4
10135 && (BASE_RETURN_VALUE_REG (TYPE_MODE (TREE_TYPE (fntype)))
10136 == FIRST_RET_REG));
10138 else
10140 pcum->arg_count [(int) SH_ARG_INT] = 0;
10141 pcum->prototype_p = FALSE;
10142 if (mode != VOIDmode)
10144 pcum->call_cookie =
10145 CALL_COOKIE_RET_TRAMP (TARGET_SHCOMPACT
10146 && GET_MODE_SIZE (mode) > 4
10147 && BASE_RETURN_VALUE_REG (mode) == FIRST_RET_REG);
10149 /* If the default ABI is the Renesas ABI then all library
10150 calls must assume that the library will be using the
10151 Renesas ABI. So if the function would return its result
10152 in memory then we must force the address of this memory
10153 block onto the stack. Ideally we would like to call
10154 targetm.calls.return_in_memory() here but we do not have
10155 the TYPE or the FNDECL available so we synthesize the
10156 contents of that function as best we can. */
10157 pcum->force_mem =
10158 (TARGET_DEFAULT & HITACHI_BIT)
10159 && (mode == BLKmode
10160 || (GET_MODE_SIZE (mode) > 4
10161 && !(mode == DFmode
10162 && TARGET_FPU_DOUBLE)));
10164 else
10166 pcum->call_cookie = 0;
10167 pcum->force_mem = FALSE;
10172 /* Determine if two hard register sets intersect.
10173 Return 1 if they do. */
10175 static int
10176 hard_regs_intersect_p (HARD_REG_SET *a, HARD_REG_SET *b)
10178 HARD_REG_SET c;
10179 COPY_HARD_REG_SET (c, *a);
10180 AND_HARD_REG_SET (c, *b);
10181 GO_IF_HARD_REG_SUBSET (c, reg_class_contents[(int) NO_REGS], lose);
10182 return 1;
10183 lose:
10184 return 0;
10187 #include "gt-sh.h"