* config/sh/sh.c (sh_va_arg): Initinalize lab_over.
[official-gcc.git] / gcc / config / sh / sh.c
blobef5df6b69a6a3b8fb38bf1d73b702510e7001d09
1 /* Output routines for GCC for Renesas / SuperH SH.
2 Copyright (C) 1993, 1994, 1995, 1997, 1997, 1998, 1999, 2000, 2001, 2002,
3 2003, 2004 Free Software Foundation, Inc.
4 Contributed by Steve Chamberlain (sac@cygnus.com).
5 Improved by Jim Wilson (wilson@cygnus.com).
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 2, or (at your option)
12 any later version.
14 GCC is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING. If not, write to
21 the Free Software Foundation, 59 Temple Place - Suite 330,
22 Boston, MA 02111-1307, USA. */
24 #include "config.h"
25 #include "system.h"
26 #include "coretypes.h"
27 #include "tm.h"
28 #include "insn-config.h"
29 #include "rtl.h"
30 #include "tree.h"
31 #include "flags.h"
32 #include "expr.h"
33 #include "optabs.h"
34 #include "function.h"
35 #include "regs.h"
36 #include "hard-reg-set.h"
37 #include "output.h"
38 #include "insn-attr.h"
39 #include "toplev.h"
40 #include "recog.h"
41 #include "c-pragma.h"
42 #include "integrate.h"
43 #include "tm_p.h"
44 #include "target.h"
45 #include "target-def.h"
46 #include "real.h"
47 #include "langhooks.h"
48 #include "basic-block.h"
49 #include "ra.h"
50 #include "cfglayout.h"
51 #include "intl.h"
52 #include "sched-int.h"
53 #include "ggc.h"
55 int code_for_indirect_jump_scratch = CODE_FOR_indirect_jump_scratch;
57 #define MSW (TARGET_LITTLE_ENDIAN ? 1 : 0)
58 #define LSW (TARGET_LITTLE_ENDIAN ? 0 : 1)
60 /* These are some macros to abstract register modes. */
61 #define CONST_OK_FOR_ADD(size) \
62 (TARGET_SHMEDIA ? CONST_OK_FOR_I10 (size) : CONST_OK_FOR_I08 (size))
63 #define GEN_MOV (*(TARGET_SHMEDIA64 ? gen_movdi : gen_movsi))
64 #define GEN_ADD3 (*(TARGET_SHMEDIA64 ? gen_adddi3 : gen_addsi3))
65 #define GEN_SUB3 (*(TARGET_SHMEDIA64 ? gen_subdi3 : gen_subsi3))
67 /* Set to 1 by expand_prologue() when the function is an interrupt handler. */
68 int current_function_interrupt;
70 /* ??? The pragma interrupt support will not work for SH3. */
71 /* This is set by #pragma interrupt and #pragma trapa, and causes gcc to
72 output code for the next function appropriate for an interrupt handler. */
73 int pragma_interrupt;
75 /* This is set by the trap_exit attribute for functions. It specifies
76 a trap number to be used in a trapa instruction at function exit
77 (instead of an rte instruction). */
78 int trap_exit;
80 /* This is used by the sp_switch attribute for functions. It specifies
81 a variable holding the address of the stack the interrupt function
82 should switch to/from at entry/exit. */
83 rtx sp_switch;
85 /* This is set by #pragma trapa, and is similar to the above, except that
86 the compiler doesn't emit code to preserve all registers. */
87 static int pragma_trapa;
89 /* This is set by #pragma nosave_low_regs. This is useful on the SH3,
90 which has a separate set of low regs for User and Supervisor modes.
91 This should only be used for the lowest level of interrupts. Higher levels
92 of interrupts must save the registers in case they themselves are
93 interrupted. */
94 int pragma_nosave_low_regs;
96 /* This is used for communication between TARGET_SETUP_INCOMING_VARARGS and
97 sh_expand_prologue. */
98 int current_function_anonymous_args;
100 /* Global variables for machine-dependent things. */
102 /* Which cpu are we scheduling for. */
103 enum processor_type sh_cpu;
105 /* Definitions used in ready queue reordering for first scheduling pass. */
107 /* Reg weights arrays for modes SFmode and SImode, indexed by insn LUID. */
108 static short *regmode_weight[2];
110 /* Total SFmode and SImode weights of scheduled insns. */
111 static int curr_regmode_pressure[2];
113 /* If true, skip cycles for Q -> R movement. */
114 static int skip_cycles = 0;
116 /* Cached value of can_issue_more. This is cached in sh_variable_issue hook
117 and returned from sh_reorder2. */
118 static short cached_can_issue_more;
120 /* Saved operands from the last compare to use when we generate an scc
121 or bcc insn. */
123 rtx sh_compare_op0;
124 rtx sh_compare_op1;
126 /* Provides the class number of the smallest class containing
127 reg number. */
129 enum reg_class regno_reg_class[FIRST_PSEUDO_REGISTER] =
131 R0_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
132 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
133 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
134 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
135 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
136 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
137 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
138 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
139 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
140 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
141 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
142 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
143 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
144 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
145 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
146 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
147 FP0_REGS,FP_REGS, FP_REGS, FP_REGS,
148 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
149 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
150 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
151 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
152 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
153 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
154 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
155 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
156 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
157 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
158 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
159 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
160 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
161 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
162 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
163 TARGET_REGS, TARGET_REGS, TARGET_REGS, TARGET_REGS,
164 TARGET_REGS, TARGET_REGS, TARGET_REGS, TARGET_REGS,
165 DF_REGS, DF_REGS, DF_REGS, DF_REGS,
166 DF_REGS, DF_REGS, DF_REGS, DF_REGS,
167 NO_REGS, GENERAL_REGS, PR_REGS, T_REGS,
168 MAC_REGS, MAC_REGS, FPUL_REGS, FPSCR_REGS,
169 GENERAL_REGS,
172 char sh_register_names[FIRST_PSEUDO_REGISTER] \
173 [MAX_REGISTER_NAME_LENGTH + 1] = SH_REGISTER_NAMES_INITIALIZER;
175 char sh_additional_register_names[ADDREGNAMES_SIZE] \
176 [MAX_ADDITIONAL_REGISTER_NAME_LENGTH + 1]
177 = SH_ADDITIONAL_REGISTER_NAMES_INITIALIZER;
179 /* Provide reg_class from a letter such as appears in the machine
180 description. *: target independently reserved letter.
181 reg_class_from_letter['e' - 'a'] is set to NO_REGS for TARGET_FMOVD. */
183 enum reg_class reg_class_from_letter[] =
185 /* a */ ALL_REGS, /* b */ TARGET_REGS, /* c */ FPSCR_REGS, /* d */ DF_REGS,
186 /* e */ FP_REGS, /* f */ FP_REGS, /* g **/ NO_REGS, /* h */ NO_REGS,
187 /* i **/ NO_REGS, /* j */ NO_REGS, /* k */ SIBCALL_REGS, /* l */ PR_REGS,
188 /* m **/ NO_REGS, /* n **/ NO_REGS, /* o **/ NO_REGS, /* p **/ NO_REGS,
189 /* q */ NO_REGS, /* r **/ NO_REGS, /* s **/ NO_REGS, /* t */ T_REGS,
190 /* u */ NO_REGS, /* v */ NO_REGS, /* w */ FP0_REGS, /* x */ MAC_REGS,
191 /* y */ FPUL_REGS, /* z */ R0_REGS
194 int assembler_dialect;
196 static bool shmedia_space_reserved_for_target_registers;
198 static void split_branches (rtx);
199 static int branch_dest (rtx);
200 static void force_into (rtx, rtx);
201 static void print_slot (rtx);
202 static rtx add_constant (rtx, enum machine_mode, rtx);
203 static void dump_table (rtx, rtx);
204 static int hi_const (rtx);
205 static int broken_move (rtx);
206 static int mova_p (rtx);
207 static rtx find_barrier (int, rtx, rtx);
208 static int noncall_uses_reg (rtx, rtx, rtx *);
209 static rtx gen_block_redirect (rtx, int, int);
210 static void sh_reorg (void);
211 static void output_stack_adjust (int, rtx, int, HARD_REG_SET *);
212 static rtx frame_insn (rtx);
213 static rtx push (int);
214 static void pop (int);
215 static void push_regs (HARD_REG_SET *, int);
216 static int calc_live_regs (HARD_REG_SET *);
217 static void mark_use (rtx, rtx *);
218 static HOST_WIDE_INT rounded_frame_size (int);
219 static rtx mark_constant_pool_use (rtx);
220 const struct attribute_spec sh_attribute_table[];
221 static tree sh_handle_interrupt_handler_attribute (tree *, tree, tree, int, bool *);
222 static tree sh_handle_sp_switch_attribute (tree *, tree, tree, int, bool *);
223 static tree sh_handle_trap_exit_attribute (tree *, tree, tree, int, bool *);
224 static tree sh_handle_renesas_attribute (tree *, tree, tree, int, bool *);
225 static void sh_output_function_epilogue (FILE *, HOST_WIDE_INT);
226 static void sh_insert_attributes (tree, tree *);
227 static int sh_adjust_cost (rtx, rtx, rtx, int);
228 static int sh_use_dfa_interface (void);
229 static int sh_issue_rate (void);
230 static int sh_dfa_new_cycle (FILE *, int, rtx, int, int, int *sort_p);
231 static short find_set_regmode_weight (rtx, enum machine_mode);
232 static short find_insn_regmode_weight (rtx, enum machine_mode);
233 static void find_regmode_weight (int, enum machine_mode);
234 static void sh_md_init_global (FILE *, int, int);
235 static void sh_md_finish_global (FILE *, int);
236 static int rank_for_reorder (const void *, const void *);
237 static void swap_reorder (rtx *, int);
238 static void ready_reorder (rtx *, int);
239 static short high_pressure (enum machine_mode);
240 static int sh_reorder (FILE *, int, rtx *, int *, int);
241 static int sh_reorder2 (FILE *, int, rtx *, int *, int);
242 static void sh_md_init (FILE *, int, int);
243 static int sh_variable_issue (FILE *, int, rtx, int);
245 static bool sh_function_ok_for_sibcall (tree, tree);
247 static bool sh_cannot_modify_jumps_p (void);
248 static int sh_target_reg_class (void);
249 static bool sh_optimize_target_register_callee_saved (bool);
250 static bool sh_ms_bitfield_layout_p (tree);
252 static void sh_init_builtins (void);
253 static void sh_media_init_builtins (void);
254 static rtx sh_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
255 static void sh_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT, tree);
256 static void sh_file_start (void);
257 static int flow_dependent_p (rtx, rtx);
258 static void flow_dependent_p_1 (rtx, rtx, void *);
259 static int shiftcosts (rtx);
260 static int andcosts (rtx);
261 static int addsubcosts (rtx);
262 static int multcosts (rtx);
263 static bool unspec_caller_rtx_p (rtx);
264 static bool sh_cannot_copy_insn_p (rtx);
265 static bool sh_rtx_costs (rtx, int, int, int *);
266 static int sh_address_cost (rtx);
267 static int shmedia_target_regs_stack_space (HARD_REG_SET *);
268 static int shmedia_reserve_space_for_target_registers_p (int, HARD_REG_SET *);
269 static int shmedia_target_regs_stack_adjust (HARD_REG_SET *);
270 static int scavenge_reg (HARD_REG_SET *s);
271 struct save_schedule_s;
272 static struct save_entry_s *sh5_schedule_saves (HARD_REG_SET *,
273 struct save_schedule_s *, int);
275 static rtx sh_struct_value_rtx (tree, int);
276 static bool sh_return_in_memory (tree, tree);
277 static rtx sh_builtin_saveregs (void);
278 static void sh_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode, tree, int *, int);
279 static bool sh_strict_argument_naming (CUMULATIVE_ARGS *);
280 static bool sh_pretend_outgoing_varargs_named (CUMULATIVE_ARGS *);
281 static tree sh_build_builtin_va_list (void);
284 /* Initialize the GCC target structure. */
285 #undef TARGET_ATTRIBUTE_TABLE
286 #define TARGET_ATTRIBUTE_TABLE sh_attribute_table
288 /* The next two are used for debug info when compiling with -gdwarf. */
289 #undef TARGET_ASM_UNALIGNED_HI_OP
290 #define TARGET_ASM_UNALIGNED_HI_OP "\t.uaword\t"
291 #undef TARGET_ASM_UNALIGNED_SI_OP
292 #define TARGET_ASM_UNALIGNED_SI_OP "\t.ualong\t"
294 /* These are NULLed out on non-SH5 in OVERRIDE_OPTIONS. */
295 #undef TARGET_ASM_UNALIGNED_DI_OP
296 #define TARGET_ASM_UNALIGNED_DI_OP "\t.uaquad\t"
297 #undef TARGET_ASM_ALIGNED_DI_OP
298 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
300 #undef TARGET_ASM_FUNCTION_EPILOGUE
301 #define TARGET_ASM_FUNCTION_EPILOGUE sh_output_function_epilogue
303 #undef TARGET_ASM_OUTPUT_MI_THUNK
304 #define TARGET_ASM_OUTPUT_MI_THUNK sh_output_mi_thunk
306 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
307 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_tree_hwi_hwi_tree_true
309 #undef TARGET_ASM_FILE_START
310 #define TARGET_ASM_FILE_START sh_file_start
311 #undef TARGET_ASM_FILE_START_FILE_DIRECTIVE
312 #define TARGET_ASM_FILE_START_FILE_DIRECTIVE true
314 #undef TARGET_INSERT_ATTRIBUTES
315 #define TARGET_INSERT_ATTRIBUTES sh_insert_attributes
317 #undef TARGET_SCHED_ADJUST_COST
318 #define TARGET_SCHED_ADJUST_COST sh_adjust_cost
320 #undef TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE
321 #define TARGET_SCHED_USE_DFA_PIPELINE_INTERFACE \
322 sh_use_dfa_interface
323 #undef TARGET_SCHED_ISSUE_RATE
324 #define TARGET_SCHED_ISSUE_RATE sh_issue_rate
326 /* The next 5 hooks have been implemented for reenabling sched1. With the
327 help of these macros we are limiting the movement of insns in sched1 to
328 reduce the register pressure. The overall idea is to keep count of SImode
329 and SFmode regs required by already scheduled insns. When these counts
330 cross some threshold values; give priority to insns that free registers.
331 The insn that frees registers is most likely to be the insn with lowest
332 LUID (original insn order); but such an insn might be there in the stalled
333 queue (Q) instead of the ready queue (R). To solve this, we skip cycles
334 upto a max of 8 cycles so that such insns may move from Q -> R.
336 The description of the hooks are as below:
338 TARGET_SCHED_INIT_GLOBAL: Added a new target hook in the generic
339 scheduler; it is called inside the sched_init function just after
340 find_insn_reg_weights function call. It is used to calculate the SImode
341 and SFmode weights of insns of basic blocks; much similar to what
342 find_insn_reg_weights does.
343 TARGET_SCHED_FINISH_GLOBAL: Corresponding cleanup hook.
345 TARGET_SCHED_DFA_NEW_CYCLE: Skip cycles if high register pressure is
346 indicated by TARGET_SCHED_REORDER2; doing this may move insns from
347 (Q)->(R).
349 TARGET_SCHED_REORDER: If the register pressure for SImode or SFmode is
350 high; reorder the ready queue so that the insn with lowest LUID will be
351 issued next.
353 TARGET_SCHED_REORDER2: If the register pressure is high, indicate to
354 TARGET_SCHED_DFA_NEW_CYCLE to skip cycles.
356 TARGET_SCHED_VARIABLE_ISSUE: Cache the value of can_issue_more so that it
357 can be returned from TARGET_SCHED_REORDER2.
359 TARGET_SCHED_INIT: Reset the register pressure counting variables. */
361 #undef TARGET_SCHED_DFA_NEW_CYCLE
362 #define TARGET_SCHED_DFA_NEW_CYCLE sh_dfa_new_cycle
364 #undef TARGET_SCHED_INIT_GLOBAL
365 #define TARGET_SCHED_INIT_GLOBAL sh_md_init_global
367 #undef TARGET_SCHED_FINISH_GLOBAL
368 #define TARGET_SCHED_FINISH_GLOBAL sh_md_finish_global
370 #undef TARGET_SCHED_VARIABLE_ISSUE
371 #define TARGET_SCHED_VARIABLE_ISSUE sh_variable_issue
373 #undef TARGET_SCHED_REORDER
374 #define TARGET_SCHED_REORDER sh_reorder
376 #undef TARGET_SCHED_REORDER2
377 #define TARGET_SCHED_REORDER2 sh_reorder2
379 #undef TARGET_SCHED_INIT
380 #define TARGET_SCHED_INIT sh_md_init
382 #undef TARGET_CANNOT_MODIFY_JUMPS_P
383 #define TARGET_CANNOT_MODIFY_JUMPS_P sh_cannot_modify_jumps_p
384 #undef TARGET_BRANCH_TARGET_REGISTER_CLASS
385 #define TARGET_BRANCH_TARGET_REGISTER_CLASS sh_target_reg_class
386 #undef TARGET_BRANCH_TARGET_REGISTER_CALLEE_SAVED
387 #define TARGET_BRANCH_TARGET_REGISTER_CALLEE_SAVED \
388 sh_optimize_target_register_callee_saved
390 #undef TARGET_MS_BITFIELD_LAYOUT_P
391 #define TARGET_MS_BITFIELD_LAYOUT_P sh_ms_bitfield_layout_p
393 #undef TARGET_INIT_BUILTINS
394 #define TARGET_INIT_BUILTINS sh_init_builtins
395 #undef TARGET_EXPAND_BUILTIN
396 #define TARGET_EXPAND_BUILTIN sh_expand_builtin
398 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
399 #define TARGET_FUNCTION_OK_FOR_SIBCALL sh_function_ok_for_sibcall
401 #undef TARGET_CANNOT_COPY_INSN_P
402 #define TARGET_CANNOT_COPY_INSN_P sh_cannot_copy_insn_p
403 #undef TARGET_RTX_COSTS
404 #define TARGET_RTX_COSTS sh_rtx_costs
405 #undef TARGET_ADDRESS_COST
406 #define TARGET_ADDRESS_COST sh_address_cost
408 #undef TARGET_MACHINE_DEPENDENT_REORG
409 #define TARGET_MACHINE_DEPENDENT_REORG sh_reorg
411 #ifdef HAVE_AS_TLS
412 #undef TARGET_HAVE_TLS
413 #define TARGET_HAVE_TLS true
414 #endif
416 #undef TARGET_PROMOTE_PROTOTYPES
417 #define TARGET_PROMOTE_PROTOTYPES sh_promote_prototypes
418 #undef TARGET_PROMOTE_FUNCTION_ARGS
419 #define TARGET_PROMOTE_FUNCTION_ARGS sh_promote_prototypes
420 #undef TARGET_PROMOTE_FUNCTION_RETURN
421 #define TARGET_PROMOTE_FUNCTION_RETURN sh_promote_prototypes
423 #undef TARGET_STRUCT_VALUE_RTX
424 #define TARGET_STRUCT_VALUE_RTX sh_struct_value_rtx
425 #undef TARGET_RETURN_IN_MEMORY
426 #define TARGET_RETURN_IN_MEMORY sh_return_in_memory
428 #undef TARGET_EXPAND_BUILTIN_SAVEREGS
429 #define TARGET_EXPAND_BUILTIN_SAVEREGS sh_builtin_saveregs
430 #undef TARGET_SETUP_INCOMING_VARARGS
431 #define TARGET_SETUP_INCOMING_VARARGS sh_setup_incoming_varargs
432 #undef TARGET_STRICT_ARGUMENT_NAMING
433 #define TARGET_STRICT_ARGUMENT_NAMING sh_strict_argument_naming
434 #undef TARGET_PRETEND_OUTGOING_VARARGS_NAMED
435 #define TARGET_PRETEND_OUTGOING_VARARGS_NAMED sh_pretend_outgoing_varargs_named
437 #undef TARGET_BUILD_BUILTIN_VA_LIST
438 #define TARGET_BUILD_BUILTIN_VA_LIST sh_build_builtin_va_list
440 #undef TARGET_PCH_VALID_P
441 #define TARGET_PCH_VALID_P sh_pch_valid_p
443 /* Return regmode weight for insn. */
444 #define INSN_REGMODE_WEIGHT(INSN, MODE) regmode_weight[((MODE) == SImode) ? 0 : 1][INSN_UID (INSN)]
446 /* Return current register pressure for regmode. */
447 #define CURR_REGMODE_PRESSURE(MODE) curr_regmode_pressure[((MODE) == SImode) ? 0 : 1]
449 struct gcc_target targetm = TARGET_INITIALIZER;
451 /* Print the operand address in x to the stream. */
453 void
454 print_operand_address (FILE *stream, rtx x)
456 switch (GET_CODE (x))
458 case REG:
459 case SUBREG:
460 fprintf (stream, "@%s", reg_names[true_regnum (x)]);
461 break;
463 case PLUS:
465 rtx base = XEXP (x, 0);
466 rtx index = XEXP (x, 1);
468 switch (GET_CODE (index))
470 case CONST_INT:
471 fprintf (stream, "@(%d,%s)", (int) INTVAL (index),
472 reg_names[true_regnum (base)]);
473 break;
475 case REG:
476 case SUBREG:
478 int base_num = true_regnum (base);
479 int index_num = true_regnum (index);
481 fprintf (stream, "@(r0,%s)",
482 reg_names[MAX (base_num, index_num)]);
483 break;
486 default:
487 debug_rtx (x);
488 abort ();
491 break;
493 case PRE_DEC:
494 fprintf (stream, "@-%s", reg_names[true_regnum (XEXP (x, 0))]);
495 break;
497 case POST_INC:
498 fprintf (stream, "@%s+", reg_names[true_regnum (XEXP (x, 0))]);
499 break;
501 default:
502 x = mark_constant_pool_use (x);
503 output_addr_const (stream, x);
504 break;
508 /* Print operand x (an rtx) in assembler syntax to file stream
509 according to modifier code.
511 '.' print a .s if insn needs delay slot
512 ',' print LOCAL_LABEL_PREFIX
513 '@' print trap, rte or rts depending upon pragma interruptness
514 '#' output a nop if there is nothing to put in the delay slot
515 ''' print likelihood suffix (/u for unlikely).
516 'O' print a constant without the #
517 'R' print the LSW of a dp value - changes if in little endian
518 'S' print the MSW of a dp value - changes if in little endian
519 'T' print the next word of a dp value - same as 'R' in big endian mode.
520 'M' print an `x' if `m' will print `base,index'.
521 'N' print 'r63' if the operand is (const_int 0).
522 'm' print a pair `base,offset' or `base,index', for LD and ST.
523 'u' prints the lowest 16 bits of CONST_INT, as an unsigned value.
524 'o' output an operator. */
526 void
527 print_operand (FILE *stream, rtx x, int code)
529 switch (code)
531 case '.':
532 if (final_sequence
533 && ! INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))
534 && get_attr_length (XVECEXP (final_sequence, 0, 1)))
535 fprintf (stream, ASSEMBLER_DIALECT ? "/s" : ".s");
536 break;
537 case ',':
538 fprintf (stream, "%s", LOCAL_LABEL_PREFIX);
539 break;
540 case '@':
541 if (trap_exit)
542 fprintf (stream, "trapa #%d", trap_exit);
543 else if (sh_cfun_interrupt_handler_p ())
544 fprintf (stream, "rte");
545 else
546 fprintf (stream, "rts");
547 break;
548 case '#':
549 /* Output a nop if there's nothing in the delay slot. */
550 if (dbr_sequence_length () == 0)
551 fprintf (stream, "\n\tnop");
552 break;
553 case '\'':
555 rtx note = find_reg_note (current_output_insn, REG_BR_PROB, 0);
557 if (note && INTVAL (XEXP (note, 0)) * 2 < REG_BR_PROB_BASE)
558 fputs ("/u", stream);
559 break;
561 case 'O':
562 x = mark_constant_pool_use (x);
563 output_addr_const (stream, x);
564 break;
565 case 'R':
566 fputs (reg_names[REGNO (x) + LSW], (stream));
567 break;
568 case 'S':
569 fputs (reg_names[REGNO (x) + MSW], (stream));
570 break;
571 case 'T':
572 /* Next word of a double. */
573 switch (GET_CODE (x))
575 case REG:
576 fputs (reg_names[REGNO (x) + 1], (stream));
577 break;
578 case MEM:
579 if (GET_CODE (XEXP (x, 0)) != PRE_DEC
580 && GET_CODE (XEXP (x, 0)) != POST_INC)
581 x = adjust_address (x, SImode, 4);
582 print_operand_address (stream, XEXP (x, 0));
583 break;
584 default:
585 break;
587 break;
588 case 'o':
589 switch (GET_CODE (x))
591 case PLUS: fputs ("add", stream); break;
592 case MINUS: fputs ("sub", stream); break;
593 case MULT: fputs ("mul", stream); break;
594 case DIV: fputs ("div", stream); break;
595 case EQ: fputs ("eq", stream); break;
596 case NE: fputs ("ne", stream); break;
597 case GT: case LT: fputs ("gt", stream); break;
598 case GE: case LE: fputs ("ge", stream); break;
599 case GTU: case LTU: fputs ("gtu", stream); break;
600 case GEU: case LEU: fputs ("geu", stream); break;
601 default:
602 break;
604 break;
605 case 'M':
606 if (GET_CODE (x) == MEM
607 && GET_CODE (XEXP (x, 0)) == PLUS
608 && (GET_CODE (XEXP (XEXP (x, 0), 1)) == REG
609 || GET_CODE (XEXP (XEXP (x, 0), 1)) == SUBREG))
610 fputc ('x', stream);
611 break;
613 case 'm':
614 if (GET_CODE (x) != MEM)
615 abort ();
616 x = XEXP (x, 0);
617 switch (GET_CODE (x))
619 case REG:
620 case SUBREG:
621 print_operand (stream, x, 0);
622 fputs (", 0", stream);
623 break;
625 case PLUS:
626 print_operand (stream, XEXP (x, 0), 0);
627 fputs (", ", stream);
628 print_operand (stream, XEXP (x, 1), 0);
629 break;
631 default:
632 abort ();
634 break;
636 case 'N':
637 if (x == CONST0_RTX (GET_MODE (x)))
639 fprintf ((stream), "r63");
640 break;
642 goto default_output;
643 case 'u':
644 if (GET_CODE (x) == CONST_INT)
646 fprintf ((stream), "%u", (unsigned) INTVAL (x) & (0x10000 - 1));
647 break;
649 /* Fall through. */
651 default_output:
652 default:
653 switch (GET_CODE (x))
655 /* FIXME: We need this on SHmedia32 because reload generates
656 some sign-extended HI or QI loads into DImode registers
657 but, because Pmode is SImode, the address ends up with a
658 subreg:SI of the DImode register. Maybe reload should be
659 fixed so as to apply alter_subreg to such loads? */
660 case SUBREG:
661 if (SUBREG_BYTE (x) != 0
662 || GET_CODE (SUBREG_REG (x)) != REG)
663 abort ();
665 x = SUBREG_REG (x);
666 /* Fall through. */
668 case REG:
669 if (FP_REGISTER_P (REGNO (x))
670 && GET_MODE (x) == V16SFmode)
671 fprintf ((stream), "mtrx%s", reg_names[REGNO (x)] + 2);
672 else if (FP_REGISTER_P (REGNO (x))
673 && GET_MODE (x) == V4SFmode)
674 fprintf ((stream), "fv%s", reg_names[REGNO (x)] + 2);
675 else if (GET_CODE (x) == REG
676 && GET_MODE (x) == V2SFmode)
677 fprintf ((stream), "fp%s", reg_names[REGNO (x)] + 2);
678 else if (FP_REGISTER_P (REGNO (x))
679 && GET_MODE_SIZE (GET_MODE (x)) > 4)
680 fprintf ((stream), "d%s", reg_names[REGNO (x)] + 1);
681 else
682 fputs (reg_names[REGNO (x)], (stream));
683 break;
685 case MEM:
686 output_address (XEXP (x, 0));
687 break;
689 case CONST:
690 if (TARGET_SHMEDIA
691 && GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
692 && GET_MODE (XEXP (x, 0)) == DImode
693 && GET_CODE (XEXP (XEXP (x, 0), 0)) == TRUNCATE
694 && GET_MODE (XEXP (XEXP (x, 0), 0)) == HImode)
696 rtx val = XEXP (XEXP (XEXP (x, 0), 0), 0);
698 fputc ('(', stream);
699 if (GET_CODE (val) == ASHIFTRT)
701 fputc ('(', stream);
702 if (GET_CODE (XEXP (val, 0)) == CONST)
703 fputc ('(', stream);
704 output_addr_const (stream, XEXP (val, 0));
705 if (GET_CODE (XEXP (val, 0)) == CONST)
706 fputc (')', stream);
707 fputs (" >> ", stream);
708 output_addr_const (stream, XEXP (val, 1));
709 fputc (')', stream);
711 else
713 if (GET_CODE (val) == CONST)
714 fputc ('(', stream);
715 output_addr_const (stream, val);
716 if (GET_CODE (val) == CONST)
717 fputc (')', stream);
719 fputs (" & 65535)", stream);
720 break;
723 /* Fall through. */
724 default:
725 if (TARGET_SH1)
726 fputc ('#', stream);
727 output_addr_const (stream, x);
728 break;
730 break;
734 /* Like force_operand, but guarantees that VALUE ends up in TARGET. */
735 static void
736 force_into (rtx value, rtx target)
738 value = force_operand (value, target);
739 if (! rtx_equal_p (value, target))
740 emit_insn (gen_move_insn (target, value));
743 /* Emit code to perform a block move. Choose the best method.
745 OPERANDS[0] is the destination.
746 OPERANDS[1] is the source.
747 OPERANDS[2] is the size.
748 OPERANDS[3] is the alignment safe to use. */
751 expand_block_move (rtx *operands)
753 int align = INTVAL (operands[3]);
754 int constp = (GET_CODE (operands[2]) == CONST_INT);
755 int bytes = (constp ? INTVAL (operands[2]) : 0);
757 /* If it isn't a constant number of bytes, or if it doesn't have 4 byte
758 alignment, or if it isn't a multiple of 4 bytes, then fail. */
759 if (! constp || align < 4 || (bytes % 4 != 0))
760 return 0;
762 if (TARGET_HARD_SH4)
764 if (bytes < 12)
765 return 0;
766 else if (bytes == 12)
768 tree entry_name;
769 rtx sym;
770 rtx func_addr_rtx;
771 rtx r4 = gen_rtx_REG (SImode, 4);
772 rtx r5 = gen_rtx_REG (SImode, 5);
774 entry_name = get_identifier ("__movmemSI12_i4");
776 sym = function_symbol (IDENTIFIER_POINTER (entry_name));
777 func_addr_rtx = copy_to_mode_reg (Pmode, sym);
778 force_into (XEXP (operands[0], 0), r4);
779 force_into (XEXP (operands[1], 0), r5);
780 emit_insn (gen_block_move_real_i4 (func_addr_rtx));
781 return 1;
783 else if (! TARGET_SMALLCODE)
785 tree entry_name;
786 rtx sym;
787 rtx func_addr_rtx;
788 int dwords;
789 rtx r4 = gen_rtx_REG (SImode, 4);
790 rtx r5 = gen_rtx_REG (SImode, 5);
791 rtx r6 = gen_rtx_REG (SImode, 6);
793 entry_name = get_identifier (bytes & 4
794 ? "__movmem_i4_odd"
795 : "__movmem_i4_even");
796 sym = function_symbol (IDENTIFIER_POINTER (entry_name));
797 func_addr_rtx = copy_to_mode_reg (Pmode, sym);
798 force_into (XEXP (operands[0], 0), r4);
799 force_into (XEXP (operands[1], 0), r5);
801 dwords = bytes >> 3;
802 emit_insn (gen_move_insn (r6, GEN_INT (dwords - 1)));
803 emit_insn (gen_block_lump_real_i4 (func_addr_rtx));
804 return 1;
806 else
807 return 0;
809 if (bytes < 64)
811 char entry[30];
812 tree entry_name;
813 rtx sym;
814 rtx func_addr_rtx;
815 rtx r4 = gen_rtx_REG (SImode, 4);
816 rtx r5 = gen_rtx_REG (SImode, 5);
818 sprintf (entry, "__movmemSI%d", bytes);
819 entry_name = get_identifier (entry);
820 sym = function_symbol (IDENTIFIER_POINTER (entry_name));
821 func_addr_rtx = copy_to_mode_reg (Pmode, sym);
822 force_into (XEXP (operands[0], 0), r4);
823 force_into (XEXP (operands[1], 0), r5);
824 emit_insn (gen_block_move_real (func_addr_rtx));
825 return 1;
828 /* This is the same number of bytes as a memcpy call, but to a different
829 less common function name, so this will occasionally use more space. */
830 if (! TARGET_SMALLCODE)
832 tree entry_name;
833 rtx sym;
834 rtx func_addr_rtx;
835 int final_switch, while_loop;
836 rtx r4 = gen_rtx_REG (SImode, 4);
837 rtx r5 = gen_rtx_REG (SImode, 5);
838 rtx r6 = gen_rtx_REG (SImode, 6);
840 entry_name = get_identifier ("__movmem");
841 sym = function_symbol (IDENTIFIER_POINTER (entry_name));
842 func_addr_rtx = copy_to_mode_reg (Pmode, sym);
843 force_into (XEXP (operands[0], 0), r4);
844 force_into (XEXP (operands[1], 0), r5);
846 /* r6 controls the size of the move. 16 is decremented from it
847 for each 64 bytes moved. Then the negative bit left over is used
848 as an index into a list of move instructions. e.g., a 72 byte move
849 would be set up with size(r6) = 14, for one iteration through the
850 big while loop, and a switch of -2 for the last part. */
852 final_switch = 16 - ((bytes / 4) % 16);
853 while_loop = ((bytes / 4) / 16 - 1) * 16;
854 emit_insn (gen_move_insn (r6, GEN_INT (while_loop + final_switch)));
855 emit_insn (gen_block_lump_real (func_addr_rtx));
856 return 1;
859 return 0;
862 /* Prepare operands for a move define_expand; specifically, one of the
863 operands must be in a register. */
866 prepare_move_operands (rtx operands[], enum machine_mode mode)
868 if ((mode == SImode || mode == DImode)
869 && flag_pic
870 && ! ((mode == Pmode || mode == ptr_mode)
871 && tls_symbolic_operand (operands[1], Pmode) != 0))
873 rtx temp;
874 if (SYMBOLIC_CONST_P (operands[1]))
876 if (GET_CODE (operands[0]) == MEM)
877 operands[1] = force_reg (Pmode, operands[1]);
878 else if (TARGET_SHMEDIA
879 && GET_CODE (operands[1]) == LABEL_REF
880 && target_reg_operand (operands[0], mode))
881 /* It's ok. */;
882 else
884 temp = no_new_pseudos ? operands[0] : gen_reg_rtx (Pmode);
885 operands[1] = legitimize_pic_address (operands[1], mode, temp);
888 else if (GET_CODE (operands[1]) == CONST
889 && GET_CODE (XEXP (operands[1], 0)) == PLUS
890 && SYMBOLIC_CONST_P (XEXP (XEXP (operands[1], 0), 0)))
892 temp = no_new_pseudos ? operands[0] : gen_reg_rtx (Pmode);
893 temp = legitimize_pic_address (XEXP (XEXP (operands[1], 0), 0),
894 mode, temp);
895 operands[1] = expand_binop (mode, add_optab, temp,
896 XEXP (XEXP (operands[1], 0), 1),
897 no_new_pseudos ? temp
898 : gen_reg_rtx (Pmode),
899 0, OPTAB_LIB_WIDEN);
903 if (! reload_in_progress && ! reload_completed)
905 /* Copy the source to a register if both operands aren't registers. */
906 if (! register_operand (operands[0], mode)
907 && ! sh_register_operand (operands[1], mode))
908 operands[1] = copy_to_mode_reg (mode, operands[1]);
910 if (GET_CODE (operands[0]) == MEM && ! memory_operand (operands[0], mode))
912 /* This is like change_address_1 (operands[0], mode, 0, 1) ,
913 except that we can't use that function because it is static. */
914 rtx new = change_address (operands[0], mode, 0);
915 MEM_COPY_ATTRIBUTES (new, operands[0]);
916 operands[0] = new;
919 /* This case can happen while generating code to move the result
920 of a library call to the target. Reject `st r0,@(rX,rY)' because
921 reload will fail to find a spill register for rX, since r0 is already
922 being used for the source. */
923 else if (refers_to_regno_p (R0_REG, R0_REG + 1, operands[1], (rtx *)0)
924 && GET_CODE (operands[0]) == MEM
925 && GET_CODE (XEXP (operands[0], 0)) == PLUS
926 && GET_CODE (XEXP (XEXP (operands[0], 0), 1)) == REG)
927 operands[1] = copy_to_mode_reg (mode, operands[1]);
930 if (mode == Pmode || mode == ptr_mode)
932 rtx op0, op1;
933 enum tls_model tls_kind;
935 op0 = operands[0];
936 op1 = operands[1];
937 if ((tls_kind = tls_symbolic_operand (op1, Pmode)))
939 rtx tga_op1, tga_ret, tmp, tmp2;
942 switch (tls_kind)
944 case TLS_MODEL_GLOBAL_DYNAMIC:
945 tga_ret = gen_rtx_REG (Pmode, R0_REG);
946 emit_call_insn (gen_tls_global_dynamic (tga_ret, op1));
947 op1 = tga_ret;
948 break;
950 case TLS_MODEL_LOCAL_DYNAMIC:
951 tga_ret = gen_rtx_REG (Pmode, R0_REG);
952 emit_call_insn (gen_tls_local_dynamic (tga_ret, op1));
954 tmp = gen_reg_rtx (Pmode);
955 emit_move_insn (tmp, tga_ret);
957 if (register_operand (op0, Pmode))
958 tmp2 = op0;
959 else
960 tmp2 = gen_reg_rtx (Pmode);
962 emit_insn (gen_symDTPOFF2reg (tmp2, op1, tmp));
963 op1 = tmp2;
964 break;
966 case TLS_MODEL_INITIAL_EXEC:
967 if (! flag_pic)
968 emit_insn (gen_GOTaddr2picreg ());
969 tga_op1 = gen_reg_rtx (Pmode);
970 tmp = gen_sym2GOTTPOFF (op1);
971 emit_insn (gen_tls_initial_exec (tga_op1, tmp));
972 op1 = tga_op1;
973 break;
975 case TLS_MODEL_LOCAL_EXEC:
976 tmp2 = gen_reg_rtx (Pmode);
977 emit_insn (gen_load_gbr (tmp2));
978 tmp = gen_reg_rtx (Pmode);
979 emit_insn (gen_symTPOFF2reg (tmp, op1));
980 RTX_UNCHANGING_P (tmp) = 1;
982 if (register_operand (op0, Pmode))
983 op1 = op0;
984 else
985 op1 = gen_reg_rtx (Pmode);
987 emit_insn (gen_addsi3 (op1, tmp, tmp2));
988 break;
990 default:
991 abort ();
993 operands[1] = op1;
997 return 0;
1000 /* Prepare the operands for an scc instruction; make sure that the
1001 compare has been done. */
1003 prepare_scc_operands (enum rtx_code code)
1005 rtx t_reg = gen_rtx_REG (SImode, T_REG);
1006 enum rtx_code oldcode = code;
1007 enum machine_mode mode;
1009 /* First need a compare insn. */
1010 switch (code)
1012 case NE:
1013 /* It isn't possible to handle this case. */
1014 abort ();
1015 case LT:
1016 code = GT;
1017 break;
1018 case LE:
1019 code = GE;
1020 break;
1021 case LTU:
1022 code = GTU;
1023 break;
1024 case LEU:
1025 code = GEU;
1026 break;
1027 default:
1028 break;
1030 if (code != oldcode)
1032 rtx tmp = sh_compare_op0;
1033 sh_compare_op0 = sh_compare_op1;
1034 sh_compare_op1 = tmp;
1037 mode = GET_MODE (sh_compare_op0);
1038 if (mode == VOIDmode)
1039 mode = GET_MODE (sh_compare_op1);
1041 sh_compare_op0 = force_reg (mode, sh_compare_op0);
1042 if ((code != EQ && code != NE
1043 && (sh_compare_op1 != const0_rtx
1044 || code == GTU || code == GEU || code == LTU || code == LEU))
1045 || (mode == DImode && sh_compare_op1 != const0_rtx)
1046 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
1047 sh_compare_op1 = force_reg (mode, sh_compare_op1);
1049 if (TARGET_SH4 && GET_MODE_CLASS (mode) == MODE_FLOAT)
1050 (mode == SFmode ? emit_sf_insn : emit_df_insn)
1051 (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2,
1052 gen_rtx_SET (VOIDmode, t_reg,
1053 gen_rtx_fmt_ee (code, SImode,
1054 sh_compare_op0, sh_compare_op1)),
1055 gen_rtx_USE (VOIDmode, get_fpscr_rtx ()))));
1056 else
1057 emit_insn (gen_rtx_SET (VOIDmode, t_reg,
1058 gen_rtx_fmt_ee (code, SImode,
1059 sh_compare_op0, sh_compare_op1)));
1061 return t_reg;
1064 /* Called from the md file, set up the operands of a compare instruction. */
1066 void
1067 from_compare (rtx *operands, int code)
1069 enum machine_mode mode = GET_MODE (sh_compare_op0);
1070 rtx insn;
1071 if (mode == VOIDmode)
1072 mode = GET_MODE (sh_compare_op1);
1073 if (code != EQ
1074 || mode == DImode
1075 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
1077 /* Force args into regs, since we can't use constants here. */
1078 sh_compare_op0 = force_reg (mode, sh_compare_op0);
1079 if (sh_compare_op1 != const0_rtx
1080 || code == GTU || code == GEU
1081 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
1082 sh_compare_op1 = force_reg (mode, sh_compare_op1);
1084 if (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT && code == GE)
1086 from_compare (operands, GT);
1087 insn = gen_ieee_ccmpeqsf_t (sh_compare_op0, sh_compare_op1);
1089 else
1090 insn = gen_rtx_SET (VOIDmode,
1091 gen_rtx_REG (SImode, T_REG),
1092 gen_rtx_fmt_ee (code, SImode,
1093 sh_compare_op0, sh_compare_op1));
1094 if (TARGET_SH4 && GET_MODE_CLASS (mode) == MODE_FLOAT)
1096 insn = gen_rtx_PARALLEL (VOIDmode,
1097 gen_rtvec (2, insn,
1098 gen_rtx_USE (VOIDmode, get_fpscr_rtx ())));
1099 (mode == SFmode ? emit_sf_insn : emit_df_insn) (insn);
1101 else
1102 emit_insn (insn);
1105 /* Functions to output assembly code. */
1107 /* Return a sequence of instructions to perform DI or DF move.
1109 Since the SH cannot move a DI or DF in one instruction, we have
1110 to take care when we see overlapping source and dest registers. */
1112 const char *
1113 output_movedouble (rtx insn ATTRIBUTE_UNUSED, rtx operands[],
1114 enum machine_mode mode)
1116 rtx dst = operands[0];
1117 rtx src = operands[1];
1119 if (GET_CODE (dst) == MEM
1120 && GET_CODE (XEXP (dst, 0)) == PRE_DEC)
1121 return "mov.l %T1,%0\n\tmov.l %1,%0";
1123 if (register_operand (dst, mode)
1124 && register_operand (src, mode))
1126 if (REGNO (src) == MACH_REG)
1127 return "sts mach,%S0\n\tsts macl,%R0";
1129 /* When mov.d r1,r2 do r2->r3 then r1->r2;
1130 when mov.d r1,r0 do r1->r0 then r2->r1. */
1132 if (REGNO (src) + 1 == REGNO (dst))
1133 return "mov %T1,%T0\n\tmov %1,%0";
1134 else
1135 return "mov %1,%0\n\tmov %T1,%T0";
1137 else if (GET_CODE (src) == CONST_INT)
1139 if (INTVAL (src) < 0)
1140 output_asm_insn ("mov #-1,%S0", operands);
1141 else
1142 output_asm_insn ("mov #0,%S0", operands);
1144 return "mov %1,%R0";
1146 else if (GET_CODE (src) == MEM)
1148 int ptrreg = -1;
1149 int dreg = REGNO (dst);
1150 rtx inside = XEXP (src, 0);
1152 if (GET_CODE (inside) == REG)
1153 ptrreg = REGNO (inside);
1154 else if (GET_CODE (inside) == SUBREG)
1155 ptrreg = subreg_regno (inside);
1156 else if (GET_CODE (inside) == PLUS)
1158 ptrreg = REGNO (XEXP (inside, 0));
1159 /* ??? A r0+REG address shouldn't be possible here, because it isn't
1160 an offsettable address. Unfortunately, offsettable addresses use
1161 QImode to check the offset, and a QImode offsettable address
1162 requires r0 for the other operand, which is not currently
1163 supported, so we can't use the 'o' constraint.
1164 Thus we must check for and handle r0+REG addresses here.
1165 We punt for now, since this is likely very rare. */
1166 if (GET_CODE (XEXP (inside, 1)) == REG)
1167 abort ();
1169 else if (GET_CODE (inside) == LABEL_REF)
1170 return "mov.l %1,%0\n\tmov.l %1+4,%T0";
1171 else if (GET_CODE (inside) == POST_INC)
1172 return "mov.l %1,%0\n\tmov.l %1,%T0";
1173 else
1174 abort ();
1176 /* Work out the safe way to copy. Copy into the second half first. */
1177 if (dreg == ptrreg)
1178 return "mov.l %T1,%T0\n\tmov.l %1,%0";
1181 return "mov.l %1,%0\n\tmov.l %T1,%T0";
1184 /* Print an instruction which would have gone into a delay slot after
1185 another instruction, but couldn't because the other instruction expanded
1186 into a sequence where putting the slot insn at the end wouldn't work. */
1188 static void
1189 print_slot (rtx insn)
1191 final_scan_insn (XVECEXP (insn, 0, 1), asm_out_file, optimize, 0, 1, NULL);
1193 INSN_DELETED_P (XVECEXP (insn, 0, 1)) = 1;
1196 const char *
1197 output_far_jump (rtx insn, rtx op)
1199 struct { rtx lab, reg, op; } this;
1200 rtx braf_base_lab = NULL_RTX;
1201 const char *jump;
1202 int far;
1203 int offset = branch_dest (insn) - INSN_ADDRESSES (INSN_UID (insn));
1204 rtx prev;
1206 this.lab = gen_label_rtx ();
1208 if (TARGET_SH2
1209 && offset >= -32764
1210 && offset - get_attr_length (insn) <= 32766)
1212 far = 0;
1213 jump = "mov.w %O0,%1; braf %1";
1215 else
1217 far = 1;
1218 if (flag_pic)
1220 if (TARGET_SH2)
1221 jump = "mov.l %O0,%1; braf %1";
1222 else
1223 jump = "mov.l r0,@-r15; mova %O0,r0; mov.l @r0,%1; add r0,%1; mov.l @r15+,r0; jmp @%1";
1225 else
1226 jump = "mov.l %O0,%1; jmp @%1";
1228 /* If we have a scratch register available, use it. */
1229 if (GET_CODE ((prev = prev_nonnote_insn (insn))) == INSN
1230 && INSN_CODE (prev) == CODE_FOR_indirect_jump_scratch)
1232 this.reg = SET_DEST (XVECEXP (PATTERN (prev), 0, 0));
1233 if (REGNO (this.reg) == R0_REG && flag_pic && ! TARGET_SH2)
1234 jump = "mov.l r1,@-r15; mova %O0,r0; mov.l @r0,r1; add r1,r0; mov.l @r15+,r1; jmp @%1";
1235 output_asm_insn (jump, &this.lab);
1236 if (dbr_sequence_length ())
1237 print_slot (final_sequence);
1238 else
1239 output_asm_insn ("nop", 0);
1241 else
1243 /* Output the delay slot insn first if any. */
1244 if (dbr_sequence_length ())
1245 print_slot (final_sequence);
1247 this.reg = gen_rtx_REG (SImode, 13);
1248 /* We must keep the stack aligned to 8-byte boundaries on SH5.
1249 Fortunately, MACL is fixed and call-clobbered, and we never
1250 need its value across jumps, so save r13 in it instead of in
1251 the stack. */
1252 if (TARGET_SH5)
1253 output_asm_insn ("lds r13, macl", 0);
1254 else
1255 output_asm_insn ("mov.l r13,@-r15", 0);
1256 output_asm_insn (jump, &this.lab);
1257 if (TARGET_SH5)
1258 output_asm_insn ("sts macl, r13", 0);
1259 else
1260 output_asm_insn ("mov.l @r15+,r13", 0);
1262 if (far && flag_pic && TARGET_SH2)
1264 braf_base_lab = gen_label_rtx ();
1265 (*targetm.asm_out.internal_label) (asm_out_file, "L",
1266 CODE_LABEL_NUMBER (braf_base_lab));
1268 if (far)
1269 output_asm_insn (".align 2", 0);
1270 (*targetm.asm_out.internal_label) (asm_out_file, "L", CODE_LABEL_NUMBER (this.lab));
1271 this.op = op;
1272 if (far && flag_pic)
1274 if (TARGET_SH2)
1275 this.lab = braf_base_lab;
1276 output_asm_insn (".long %O2-%O0", &this.lab);
1278 else
1279 output_asm_insn (far ? ".long %O2" : ".word %O2-%O0", &this.lab);
1280 return "";
1283 /* Local label counter, used for constants in the pool and inside
1284 pattern branches. */
1286 static int lf = 100;
1288 /* Output code for ordinary branches. */
1290 const char *
1291 output_branch (int logic, rtx insn, rtx *operands)
1293 switch (get_attr_length (insn))
1295 case 6:
1296 /* This can happen if filling the delay slot has caused a forward
1297 branch to exceed its range (we could reverse it, but only
1298 when we know we won't overextend other branches; this should
1299 best be handled by relaxation).
1300 It can also happen when other condbranches hoist delay slot insn
1301 from their destination, thus leading to code size increase.
1302 But the branch will still be in the range -4092..+4098 bytes. */
1304 if (! TARGET_RELAX)
1306 int label = lf++;
1307 /* The call to print_slot will clobber the operands. */
1308 rtx op0 = operands[0];
1310 /* If the instruction in the delay slot is annulled (true), then
1311 there is no delay slot where we can put it now. The only safe
1312 place for it is after the label. final will do that by default. */
1314 if (final_sequence
1315 && ! INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0)))
1317 asm_fprintf (asm_out_file, "\tb%s%ss\t%LLF%d\n", logic ? "f" : "t",
1318 ASSEMBLER_DIALECT ? "/" : ".", label);
1319 print_slot (final_sequence);
1321 else
1322 asm_fprintf (asm_out_file, "\tb%s\t%LLF%d\n", logic ? "f" : "t", label);
1324 output_asm_insn ("bra\t%l0", &op0);
1325 fprintf (asm_out_file, "\tnop\n");
1326 (*targetm.asm_out.internal_label) (asm_out_file, "LF", label);
1328 return "";
1330 /* When relaxing, handle this like a short branch. The linker
1331 will fix it up if it still doesn't fit after relaxation. */
1332 case 2:
1333 return logic ? "bt%.\t%l0" : "bf%.\t%l0";
1335 /* These are for SH2e, in which we have to account for the
1336 extra nop because of the hardware bug in annulled branches. */
1337 case 8:
1338 if (! TARGET_RELAX)
1340 int label = lf++;
1342 if (final_sequence
1343 && INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0)))
1344 abort ();
1345 asm_fprintf (asm_out_file, "b%s%ss\t%LLF%d\n",
1346 logic ? "f" : "t",
1347 ASSEMBLER_DIALECT ? "/" : ".", label);
1348 fprintf (asm_out_file, "\tnop\n");
1349 output_asm_insn ("bra\t%l0", operands);
1350 fprintf (asm_out_file, "\tnop\n");
1351 (*targetm.asm_out.internal_label) (asm_out_file, "LF", label);
1353 return "";
1355 /* When relaxing, fall through. */
1356 case 4:
1358 char buffer[10];
1360 sprintf (buffer, "b%s%ss\t%%l0",
1361 logic ? "t" : "f",
1362 ASSEMBLER_DIALECT ? "/" : ".");
1363 output_asm_insn (buffer, &operands[0]);
1364 return "nop";
1367 default:
1368 /* There should be no longer branches now - that would
1369 indicate that something has destroyed the branches set
1370 up in machine_dependent_reorg. */
1371 abort ();
1375 const char *
1376 output_branchy_insn (enum rtx_code code, const char *template,
1377 rtx insn, rtx *operands)
1379 rtx next_insn = NEXT_INSN (insn);
1381 if (next_insn && GET_CODE (next_insn) == JUMP_INSN && condjump_p (next_insn))
1383 rtx src = SET_SRC (PATTERN (next_insn));
1384 if (GET_CODE (src) == IF_THEN_ELSE && GET_CODE (XEXP (src, 0)) != code)
1386 /* Following branch not taken */
1387 operands[9] = gen_label_rtx ();
1388 emit_label_after (operands[9], next_insn);
1389 INSN_ADDRESSES_NEW (operands[9],
1390 INSN_ADDRESSES (INSN_UID (next_insn))
1391 + get_attr_length (next_insn));
1392 return template;
1394 else
1396 int offset = (branch_dest (next_insn)
1397 - INSN_ADDRESSES (INSN_UID (next_insn)) + 4);
1398 if (offset >= -252 && offset <= 258)
1400 if (GET_CODE (src) == IF_THEN_ELSE)
1401 /* branch_true */
1402 src = XEXP (src, 1);
1403 operands[9] = src;
1404 return template;
1408 operands[9] = gen_label_rtx ();
1409 emit_label_after (operands[9], insn);
1410 INSN_ADDRESSES_NEW (operands[9],
1411 INSN_ADDRESSES (INSN_UID (insn))
1412 + get_attr_length (insn));
1413 return template;
1416 const char *
1417 output_ieee_ccmpeq (rtx insn, rtx *operands)
1419 return output_branchy_insn (NE, "bt\t%l9\\;fcmp/eq\t%1,%0", insn, operands);
1422 /* Output the start of the assembler file. */
1424 static void
1425 sh_file_start (void)
1427 default_file_start ();
1429 if (TARGET_ELF)
1430 /* We need to show the text section with the proper
1431 attributes as in TEXT_SECTION_ASM_OP, before dwarf2out
1432 emits it without attributes in TEXT_SECTION_ASM_OP, else GAS
1433 will complain. We can teach GAS specifically about the
1434 default attributes for our choice of text section, but
1435 then we would have to change GAS again if/when we change
1436 the text section name. */
1437 fprintf (asm_out_file, "%s\n", TEXT_SECTION_ASM_OP);
1438 else
1439 /* Switch to the data section so that the coffsem symbol
1440 isn't in the text section. */
1441 data_section ();
1443 if (TARGET_LITTLE_ENDIAN)
1444 fputs ("\t.little\n", asm_out_file);
1446 if (!TARGET_ELF)
1448 if (TARGET_SHCOMPACT)
1449 fputs ("\t.mode\tSHcompact\n", asm_out_file);
1450 else if (TARGET_SHMEDIA)
1451 fprintf (asm_out_file, "\t.mode\tSHmedia\n\t.abi\t%i\n",
1452 TARGET_SHMEDIA64 ? 64 : 32);
1456 /* Check if PAT includes UNSPEC_CALLER unspec pattern. */
1458 static bool
1459 unspec_caller_rtx_p (rtx pat)
1461 switch (GET_CODE (pat))
1463 case CONST:
1464 return unspec_caller_rtx_p (XEXP (pat, 0));
1465 case PLUS:
1466 case MINUS:
1467 if (unspec_caller_rtx_p (XEXP (pat, 0)))
1468 return true;
1469 return unspec_caller_rtx_p (XEXP (pat, 1));
1470 case UNSPEC:
1471 if (XINT (pat, 1) == UNSPEC_CALLER)
1472 return true;
1473 default:
1474 break;
1477 return false;
1480 /* Indicate that INSN cannot be duplicated. This is true for insn
1481 that generates an unique label. */
1483 static bool
1484 sh_cannot_copy_insn_p (rtx insn)
1486 rtx pat;
1488 if (!reload_completed || !flag_pic)
1489 return false;
1491 if (GET_CODE (insn) != INSN)
1492 return false;
1493 if (asm_noperands (insn) >= 0)
1494 return false;
1496 pat = PATTERN (insn);
1497 if (GET_CODE (pat) != SET)
1498 return false;
1499 pat = SET_SRC (pat);
1501 if (unspec_caller_rtx_p (pat))
1502 return true;
1504 return false;
1507 /* Actual number of instructions used to make a shift by N. */
1508 static const char ashiftrt_insns[] =
1509 { 0,1,2,3,4,5,8,8,8,8,8,8,8,8,8,8,2,3,4,5,8,8,8,8,8,8,8,8,8,8,8,2};
1511 /* Left shift and logical right shift are the same. */
1512 static const char shift_insns[] =
1513 { 0,1,1,2,2,3,3,4,1,2,2,3,3,4,3,3,1,2,2,3,3,4,3,3,2,3,3,4,4,4,3,3};
1515 /* Individual shift amounts needed to get the above length sequences.
1516 One bit right shifts clobber the T bit, so when possible, put one bit
1517 shifts in the middle of the sequence, so the ends are eligible for
1518 branch delay slots. */
1519 static const short shift_amounts[32][5] = {
1520 {0}, {1}, {2}, {2, 1},
1521 {2, 2}, {2, 1, 2}, {2, 2, 2}, {2, 2, 1, 2},
1522 {8}, {8, 1}, {8, 2}, {8, 1, 2},
1523 {8, 2, 2}, {8, 2, 1, 2}, {8, -2, 8}, {8, -1, 8},
1524 {16}, {16, 1}, {16, 2}, {16, 1, 2},
1525 {16, 2, 2}, {16, 2, 1, 2}, {16, -2, 8}, {16, -1, 8},
1526 {16, 8}, {16, 1, 8}, {16, 8, 2}, {16, 8, 1, 2},
1527 {16, 8, 2, 2}, {16, -1, -2, 16}, {16, -2, 16}, {16, -1, 16}};
1529 /* Likewise, but for shift amounts < 16, up to three highmost bits
1530 might be clobbered. This is typically used when combined with some
1531 kind of sign or zero extension. */
1533 static const char ext_shift_insns[] =
1534 { 0,1,1,2,2,3,2,2,1,2,2,3,3,3,2,2,1,2,2,3,3,4,3,3,2,3,3,4,4,4,3,3};
1536 static const short ext_shift_amounts[32][4] = {
1537 {0}, {1}, {2}, {2, 1},
1538 {2, 2}, {2, 1, 2}, {8, -2}, {8, -1},
1539 {8}, {8, 1}, {8, 2}, {8, 1, 2},
1540 {8, 2, 2}, {16, -2, -1}, {16, -2}, {16, -1},
1541 {16}, {16, 1}, {16, 2}, {16, 1, 2},
1542 {16, 2, 2}, {16, 2, 1, 2}, {16, -2, 8}, {16, -1, 8},
1543 {16, 8}, {16, 1, 8}, {16, 8, 2}, {16, 8, 1, 2},
1544 {16, 8, 2, 2}, {16, -1, -2, 16}, {16, -2, 16}, {16, -1, 16}};
1546 /* Assuming we have a value that has been sign-extended by at least one bit,
1547 can we use the ext_shift_amounts with the last shift turned to an arithmetic shift
1548 to shift it by N without data loss, and quicker than by other means? */
1549 #define EXT_SHIFT_SIGNED(n) (((n) | 8) == 15)
1551 /* This is used in length attributes in sh.md to help compute the length
1552 of arbitrary constant shift instructions. */
1555 shift_insns_rtx (rtx insn)
1557 rtx set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
1558 int shift_count = INTVAL (XEXP (set_src, 1));
1559 enum rtx_code shift_code = GET_CODE (set_src);
1561 switch (shift_code)
1563 case ASHIFTRT:
1564 return ashiftrt_insns[shift_count];
1565 case LSHIFTRT:
1566 case ASHIFT:
1567 return shift_insns[shift_count];
1568 default:
1569 abort ();
1573 /* Return the cost of a shift. */
1575 static inline int
1576 shiftcosts (rtx x)
1578 int value;
1580 if (TARGET_SHMEDIA)
1581 return 1;
1583 if (GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD)
1585 if (GET_MODE (x) == DImode
1586 && GET_CODE (XEXP (x, 1)) == CONST_INT
1587 && INTVAL (XEXP (x, 1)) == 1)
1588 return 2;
1590 /* Everything else is invalid, because there is no pattern for it. */
1591 return 10000;
1593 /* If shift by a non constant, then this will be expensive. */
1594 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
1595 return SH_DYNAMIC_SHIFT_COST;
1597 value = INTVAL (XEXP (x, 1));
1599 /* Otherwise, return the true cost in instructions. */
1600 if (GET_CODE (x) == ASHIFTRT)
1602 int cost = ashiftrt_insns[value];
1603 /* If SH3, then we put the constant in a reg and use shad. */
1604 if (cost > 1 + SH_DYNAMIC_SHIFT_COST)
1605 cost = 1 + SH_DYNAMIC_SHIFT_COST;
1606 return cost;
1608 else
1609 return shift_insns[value];
1612 /* Return the cost of an AND operation. */
1614 static inline int
1615 andcosts (rtx x)
1617 int i;
1619 /* Anding with a register is a single cycle and instruction. */
1620 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
1621 return 1;
1623 i = INTVAL (XEXP (x, 1));
1625 if (TARGET_SHMEDIA)
1627 if ((GET_CODE (XEXP (x, 1)) == CONST_INT
1628 && CONST_OK_FOR_I16 (INTVAL (XEXP (x, 1))))
1629 || EXTRA_CONSTRAINT_C16 (XEXP (x, 1)))
1630 return 1;
1631 else
1632 return 2;
1635 /* These constants are single cycle extu.[bw] instructions. */
1636 if (i == 0xff || i == 0xffff)
1637 return 1;
1638 /* Constants that can be used in an and immediate instruction in a single
1639 cycle, but this requires r0, so make it a little more expensive. */
1640 if (CONST_OK_FOR_K08 (i))
1641 return 2;
1642 /* Constants that can be loaded with a mov immediate and an and.
1643 This case is probably unnecessary. */
1644 if (CONST_OK_FOR_I08 (i))
1645 return 2;
1646 /* Any other constants requires a 2 cycle pc-relative load plus an and.
1647 This case is probably unnecessary. */
1648 return 3;
1651 /* Return the cost of an addition or a subtraction. */
1653 static inline int
1654 addsubcosts (rtx x)
1656 /* Adding a register is a single cycle insn. */
1657 if (GET_CODE (XEXP (x, 1)) == REG
1658 || GET_CODE (XEXP (x, 1)) == SUBREG)
1659 return 1;
1661 /* Likewise for small constants. */
1662 if (GET_CODE (XEXP (x, 1)) == CONST_INT
1663 && CONST_OK_FOR_ADD (INTVAL (XEXP (x, 1))))
1664 return 1;
1666 if (TARGET_SHMEDIA)
1667 switch (GET_CODE (XEXP (x, 1)))
1669 case CONST:
1670 case LABEL_REF:
1671 case SYMBOL_REF:
1672 return TARGET_SHMEDIA64 ? 5 : 3;
1674 case CONST_INT:
1675 if (CONST_OK_FOR_I16 (INTVAL (XEXP (x, 1))))
1676 return 2;
1677 else if (CONST_OK_FOR_I16 (INTVAL (XEXP (x, 1)) >> 16))
1678 return 3;
1679 else if (CONST_OK_FOR_I16 ((INTVAL (XEXP (x, 1)) >> 16) >> 16))
1680 return 4;
1682 /* Fall through. */
1683 default:
1684 return 5;
1687 /* Any other constant requires a 2 cycle pc-relative load plus an
1688 addition. */
1689 return 3;
1692 /* Return the cost of a multiply. */
1693 static inline int
1694 multcosts (rtx x ATTRIBUTE_UNUSED)
1696 if (TARGET_SHMEDIA)
1697 return 3;
1699 if (TARGET_SH2)
1701 /* We have a mul insn, so we can never take more than the mul and the
1702 read of the mac reg, but count more because of the latency and extra
1703 reg usage. */
1704 if (TARGET_SMALLCODE)
1705 return 2;
1706 return 3;
1709 /* If we're aiming at small code, then just count the number of
1710 insns in a multiply call sequence. */
1711 if (TARGET_SMALLCODE)
1712 return 5;
1714 /* Otherwise count all the insns in the routine we'd be calling too. */
1715 return 20;
1718 /* Compute a (partial) cost for rtx X. Return true if the complete
1719 cost has been computed, and false if subexpressions should be
1720 scanned. In either case, *TOTAL contains the cost result. */
1722 static bool
1723 sh_rtx_costs (rtx x, int code, int outer_code, int *total)
1725 switch (code)
1727 case CONST_INT:
1728 if (TARGET_SHMEDIA)
1730 if (INTVAL (x) == 0)
1731 *total = 0;
1732 else if (outer_code == AND && and_operand ((x), DImode))
1733 *total = 0;
1734 else if ((outer_code == IOR || outer_code == XOR
1735 || outer_code == PLUS)
1736 && CONST_OK_FOR_I10 (INTVAL (x)))
1737 *total = 0;
1738 else if (CONST_OK_FOR_I16 (INTVAL (x)))
1739 *total = COSTS_N_INSNS (outer_code != SET);
1740 else if (CONST_OK_FOR_I16 (INTVAL (x) >> 16))
1741 *total = COSTS_N_INSNS (2);
1742 else if (CONST_OK_FOR_I16 ((INTVAL (x) >> 16) >> 16))
1743 *total = COSTS_N_INSNS (3);
1744 else
1745 *total = COSTS_N_INSNS (4);
1746 return true;
1748 if (CONST_OK_FOR_I08 (INTVAL (x)))
1749 *total = 0;
1750 else if ((outer_code == AND || outer_code == IOR || outer_code == XOR)
1751 && CONST_OK_FOR_K08 (INTVAL (x)))
1752 *total = 1;
1753 else
1754 *total = 8;
1755 return true;
1757 case CONST:
1758 case LABEL_REF:
1759 case SYMBOL_REF:
1760 if (TARGET_SHMEDIA64)
1761 *total = COSTS_N_INSNS (4);
1762 else if (TARGET_SHMEDIA32)
1763 *total = COSTS_N_INSNS (2);
1764 else
1765 *total = 5;
1766 return true;
1768 case CONST_DOUBLE:
1769 if (TARGET_SHMEDIA)
1770 *total = COSTS_N_INSNS (4);
1771 else
1772 *total = 10;
1773 return true;
1775 case PLUS:
1776 *total = COSTS_N_INSNS (addsubcosts (x));
1777 return true;
1779 case AND:
1780 *total = COSTS_N_INSNS (andcosts (x));
1781 return true;
1783 case MULT:
1784 *total = COSTS_N_INSNS (multcosts (x));
1785 return true;
1787 case ASHIFT:
1788 case ASHIFTRT:
1789 case LSHIFTRT:
1790 *total = COSTS_N_INSNS (shiftcosts (x));
1791 return true;
1793 case DIV:
1794 case UDIV:
1795 case MOD:
1796 case UMOD:
1797 *total = COSTS_N_INSNS (20);
1798 return true;
1800 case FLOAT:
1801 case FIX:
1802 *total = 100;
1803 return true;
1805 default:
1806 return false;
1810 /* Compute the cost of an address. For the SH, all valid addresses are
1811 the same cost. Use a slightly higher cost for reg + reg addressing,
1812 since it increases pressure on r0. */
1814 static int
1815 sh_address_cost (rtx X)
1817 return (GET_CODE (X) == PLUS
1818 && ! CONSTANT_P (XEXP (X, 1))
1819 && ! TARGET_SHMEDIA ? 1 : 0);
1822 /* Code to expand a shift. */
1824 void
1825 gen_ashift (int type, int n, rtx reg)
1827 /* Negative values here come from the shift_amounts array. */
1828 if (n < 0)
1830 if (type == ASHIFT)
1831 type = LSHIFTRT;
1832 else
1833 type = ASHIFT;
1834 n = -n;
1837 switch (type)
1839 case ASHIFTRT:
1840 emit_insn (gen_ashrsi3_k (reg, reg, GEN_INT (n)));
1841 break;
1842 case LSHIFTRT:
1843 if (n == 1)
1844 emit_insn (gen_lshrsi3_m (reg, reg, GEN_INT (n)));
1845 else
1846 emit_insn (gen_lshrsi3_k (reg, reg, GEN_INT (n)));
1847 break;
1848 case ASHIFT:
1849 emit_insn (gen_ashlsi3_std (reg, reg, GEN_INT (n)));
1850 break;
1854 /* Same for HImode */
1856 void
1857 gen_ashift_hi (int type, int n, rtx reg)
1859 /* Negative values here come from the shift_amounts array. */
1860 if (n < 0)
1862 if (type == ASHIFT)
1863 type = LSHIFTRT;
1864 else
1865 type = ASHIFT;
1866 n = -n;
1869 switch (type)
1871 case ASHIFTRT:
1872 case LSHIFTRT:
1873 /* We don't have HImode right shift operations because using the
1874 ordinary 32 bit shift instructions for that doesn't generate proper
1875 zero/sign extension.
1876 gen_ashift_hi is only called in contexts where we know that the
1877 sign extension works out correctly. */
1879 int offset = 0;
1880 if (GET_CODE (reg) == SUBREG)
1882 offset = SUBREG_BYTE (reg);
1883 reg = SUBREG_REG (reg);
1885 gen_ashift (type, n, gen_rtx_SUBREG (SImode, reg, offset));
1886 break;
1888 case ASHIFT:
1889 emit_insn (gen_ashlhi3_k (reg, reg, GEN_INT (n)));
1890 break;
1894 /* Output RTL to split a constant shift into its component SH constant
1895 shift instructions. */
1897 void
1898 gen_shifty_op (int code, rtx *operands)
1900 int value = INTVAL (operands[2]);
1901 int max, i;
1903 /* Truncate the shift count in case it is out of bounds. */
1904 value = value & 0x1f;
1906 if (value == 31)
1908 if (code == LSHIFTRT)
1910 emit_insn (gen_rotlsi3_1 (operands[0], operands[0]));
1911 emit_insn (gen_movt (operands[0]));
1912 return;
1914 else if (code == ASHIFT)
1916 /* There is a two instruction sequence for 31 bit left shifts,
1917 but it requires r0. */
1918 if (GET_CODE (operands[0]) == REG && REGNO (operands[0]) == 0)
1920 emit_insn (gen_andsi3 (operands[0], operands[0], const1_rtx));
1921 emit_insn (gen_rotlsi3_31 (operands[0], operands[0]));
1922 return;
1926 else if (value == 0)
1928 /* This can happen when not optimizing. We must output something here
1929 to prevent the compiler from aborting in final.c after the try_split
1930 call. */
1931 emit_insn (gen_nop ());
1932 return;
1935 max = shift_insns[value];
1936 for (i = 0; i < max; i++)
1937 gen_ashift (code, shift_amounts[value][i], operands[0]);
1940 /* Same as above, but optimized for values where the topmost bits don't
1941 matter. */
1943 void
1944 gen_shifty_hi_op (int code, rtx *operands)
1946 int value = INTVAL (operands[2]);
1947 int max, i;
1948 void (*gen_fun) (int, int, rtx);
1950 /* This operation is used by and_shl for SImode values with a few
1951 high bits known to be cleared. */
1952 value &= 31;
1953 if (value == 0)
1955 emit_insn (gen_nop ());
1956 return;
1959 gen_fun = GET_MODE (operands[0]) == HImode ? gen_ashift_hi : gen_ashift;
1960 if (code == ASHIFT)
1962 max = ext_shift_insns[value];
1963 for (i = 0; i < max; i++)
1964 gen_fun (code, ext_shift_amounts[value][i], operands[0]);
1966 else
1967 /* When shifting right, emit the shifts in reverse order, so that
1968 solitary negative values come first. */
1969 for (i = ext_shift_insns[value] - 1; i >= 0; i--)
1970 gen_fun (code, ext_shift_amounts[value][i], operands[0]);
1973 /* Output RTL for an arithmetic right shift. */
1975 /* ??? Rewrite to use super-optimizer sequences. */
1978 expand_ashiftrt (rtx *operands)
1980 rtx sym;
1981 rtx wrk;
1982 char func[18];
1983 tree func_name;
1984 int value;
1986 if (TARGET_SH3)
1988 if (GET_CODE (operands[2]) != CONST_INT)
1990 rtx count = copy_to_mode_reg (SImode, operands[2]);
1991 emit_insn (gen_negsi2 (count, count));
1992 emit_insn (gen_ashrsi3_d (operands[0], operands[1], count));
1993 return 1;
1995 else if (ashiftrt_insns[INTVAL (operands[2]) & 31]
1996 > 1 + SH_DYNAMIC_SHIFT_COST)
1998 rtx count
1999 = force_reg (SImode, GEN_INT (- (INTVAL (operands[2]) & 31)));
2000 emit_insn (gen_ashrsi3_d (operands[0], operands[1], count));
2001 return 1;
2004 if (GET_CODE (operands[2]) != CONST_INT)
2005 return 0;
2007 value = INTVAL (operands[2]) & 31;
2009 if (value == 31)
2011 emit_insn (gen_ashrsi2_31 (operands[0], operands[1]));
2012 return 1;
2014 else if (value >= 16 && value <= 19)
2016 wrk = gen_reg_rtx (SImode);
2017 emit_insn (gen_ashrsi2_16 (wrk, operands[1]));
2018 value -= 16;
2019 while (value--)
2020 gen_ashift (ASHIFTRT, 1, wrk);
2021 emit_move_insn (operands[0], wrk);
2022 return 1;
2024 /* Expand a short sequence inline, longer call a magic routine. */
2025 else if (value <= 5)
2027 wrk = gen_reg_rtx (SImode);
2028 emit_move_insn (wrk, operands[1]);
2029 while (value--)
2030 gen_ashift (ASHIFTRT, 1, wrk);
2031 emit_move_insn (operands[0], wrk);
2032 return 1;
2035 wrk = gen_reg_rtx (Pmode);
2037 /* Load the value into an arg reg and call a helper. */
2038 emit_move_insn (gen_rtx_REG (SImode, 4), operands[1]);
2039 sprintf (func, "__ashiftrt_r4_%d", value);
2040 func_name = get_identifier (func);
2041 sym = function_symbol (IDENTIFIER_POINTER (func_name));
2042 emit_move_insn (wrk, sym);
2043 emit_insn (gen_ashrsi3_n (GEN_INT (value), wrk));
2044 emit_move_insn (operands[0], gen_rtx_REG (SImode, 4));
2045 return 1;
2049 sh_dynamicalize_shift_p (rtx count)
2051 return shift_insns[INTVAL (count)] > 1 + SH_DYNAMIC_SHIFT_COST;
2054 /* Try to find a good way to implement the combiner pattern
2055 [(set (match_operand:SI 0 "register_operand" "r")
2056 (and:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
2057 (match_operand:SI 2 "const_int_operand" "n"))
2058 (match_operand:SI 3 "const_int_operand" "n"))) .
2059 LEFT_RTX is operand 2 in the above pattern, and MASK_RTX is operand 3.
2060 return 0 for simple right / left or left/right shift combination.
2061 return 1 for a combination of shifts with zero_extend.
2062 return 2 for a combination of shifts with an AND that needs r0.
2063 return 3 for a combination of shifts with an AND that needs an extra
2064 scratch register, when the three highmost bits of the AND mask are clear.
2065 return 4 for a combination of shifts with an AND that needs an extra
2066 scratch register, when any of the three highmost bits of the AND mask
2067 is set.
2068 If ATTRP is set, store an initial right shift width in ATTRP[0],
2069 and the instruction length in ATTRP[1] . These values are not valid
2070 when returning 0.
2071 When ATTRP is set and returning 1, ATTRP[2] gets set to the index into
2072 shift_amounts for the last shift value that is to be used before the
2073 sign extend. */
2075 shl_and_kind (rtx left_rtx, rtx mask_rtx, int *attrp)
2077 unsigned HOST_WIDE_INT mask, lsb, mask2, lsb2;
2078 int left = INTVAL (left_rtx), right;
2079 int best = 0;
2080 int cost, best_cost = 10000;
2081 int best_right = 0, best_len = 0;
2082 int i;
2083 int can_ext;
2085 if (left < 0 || left > 31)
2086 return 0;
2087 if (GET_CODE (mask_rtx) == CONST_INT)
2088 mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> left;
2089 else
2090 mask = (unsigned HOST_WIDE_INT) GET_MODE_MASK (SImode) >> left;
2091 /* Can this be expressed as a right shift / left shift pair? */
2092 lsb = ((mask ^ (mask - 1)) >> 1) + 1;
2093 right = exact_log2 (lsb);
2094 mask2 = ~(mask + lsb - 1);
2095 lsb2 = ((mask2 ^ (mask2 - 1)) >> 1) + 1;
2096 /* mask has no zeroes but trailing zeroes <==> ! mask2 */
2097 if (! mask2)
2098 best_cost = shift_insns[right] + shift_insns[right + left];
2099 /* mask has no trailing zeroes <==> ! right */
2100 else if (! right && mask2 == ~(lsb2 - 1))
2102 int late_right = exact_log2 (lsb2);
2103 best_cost = shift_insns[left + late_right] + shift_insns[late_right];
2105 /* Try to use zero extend. */
2106 if (mask2 == ~(lsb2 - 1))
2108 int width, first;
2110 for (width = 8; width <= 16; width += 8)
2112 /* Can we zero-extend right away? */
2113 if (lsb2 == (unsigned HOST_WIDE_INT) 1 << width)
2115 cost
2116 = 1 + ext_shift_insns[right] + ext_shift_insns[left + right];
2117 if (cost < best_cost)
2119 best = 1;
2120 best_cost = cost;
2121 best_right = right;
2122 best_len = cost;
2123 if (attrp)
2124 attrp[2] = -1;
2126 continue;
2128 /* ??? Could try to put zero extend into initial right shift,
2129 or even shift a bit left before the right shift. */
2130 /* Determine value of first part of left shift, to get to the
2131 zero extend cut-off point. */
2132 first = width - exact_log2 (lsb2) + right;
2133 if (first >= 0 && right + left - first >= 0)
2135 cost = ext_shift_insns[right] + ext_shift_insns[first] + 1
2136 + ext_shift_insns[right + left - first];
2137 if (cost < best_cost)
2139 best = 1;
2140 best_cost = cost;
2141 best_right = right;
2142 best_len = cost;
2143 if (attrp)
2144 attrp[2] = first;
2149 /* Try to use r0 AND pattern */
2150 for (i = 0; i <= 2; i++)
2152 if (i > right)
2153 break;
2154 if (! CONST_OK_FOR_K08 (mask >> i))
2155 continue;
2156 cost = (i != 0) + 2 + ext_shift_insns[left + i];
2157 if (cost < best_cost)
2159 best = 2;
2160 best_cost = cost;
2161 best_right = i;
2162 best_len = cost - 1;
2165 /* Try to use a scratch register to hold the AND operand. */
2166 can_ext = ((mask << left) & ((unsigned HOST_WIDE_INT) 3 << 30)) == 0;
2167 for (i = 0; i <= 2; i++)
2169 if (i > right)
2170 break;
2171 cost = (i != 0) + (CONST_OK_FOR_I08 (mask >> i) ? 2 : 3)
2172 + (can_ext ? ext_shift_insns : shift_insns)[left + i];
2173 if (cost < best_cost)
2175 best = 4 - can_ext;
2176 best_cost = cost;
2177 best_right = i;
2178 best_len = cost - 1 - ! CONST_OK_FOR_I08 (mask >> i);
2182 if (attrp)
2184 attrp[0] = best_right;
2185 attrp[1] = best_len;
2187 return best;
2190 /* This is used in length attributes of the unnamed instructions
2191 corresponding to shl_and_kind return values of 1 and 2. */
2193 shl_and_length (rtx insn)
2195 rtx set_src, left_rtx, mask_rtx;
2196 int attributes[3];
2198 set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
2199 left_rtx = XEXP (XEXP (set_src, 0), 1);
2200 mask_rtx = XEXP (set_src, 1);
2201 shl_and_kind (left_rtx, mask_rtx, attributes);
2202 return attributes[1];
2205 /* This is used in length attribute of the and_shl_scratch instruction. */
2208 shl_and_scr_length (rtx insn)
2210 rtx set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
2211 int len = shift_insns[INTVAL (XEXP (set_src, 1))];
2212 rtx op = XEXP (set_src, 0);
2213 len += shift_insns[INTVAL (XEXP (op, 1))] + 1;
2214 op = XEXP (XEXP (op, 0), 0);
2215 return len + shift_insns[INTVAL (XEXP (op, 1))];
2218 /* Generating rtl? */
2219 extern int rtx_equal_function_value_matters;
2221 /* Generate rtl for instructions for which shl_and_kind advised a particular
2222 method of generating them, i.e. returned zero. */
2225 gen_shl_and (rtx dest, rtx left_rtx, rtx mask_rtx, rtx source)
2227 int attributes[3];
2228 unsigned HOST_WIDE_INT mask;
2229 int kind = shl_and_kind (left_rtx, mask_rtx, attributes);
2230 int right, total_shift;
2231 void (*shift_gen_fun) (int, rtx *) = gen_shifty_hi_op;
2233 right = attributes[0];
2234 total_shift = INTVAL (left_rtx) + right;
2235 mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> total_shift;
2236 switch (kind)
2238 default:
2239 return -1;
2240 case 1:
2242 int first = attributes[2];
2243 rtx operands[3];
2245 if (first < 0)
2247 emit_insn ((mask << right) <= 0xff
2248 ? gen_zero_extendqisi2 (dest,
2249 gen_lowpart (QImode, source))
2250 : gen_zero_extendhisi2 (dest,
2251 gen_lowpart (HImode, source)));
2252 source = dest;
2254 if (source != dest)
2255 emit_insn (gen_movsi (dest, source));
2256 operands[0] = dest;
2257 if (right)
2259 operands[2] = GEN_INT (right);
2260 gen_shifty_hi_op (LSHIFTRT, operands);
2262 if (first > 0)
2264 operands[2] = GEN_INT (first);
2265 gen_shifty_hi_op (ASHIFT, operands);
2266 total_shift -= first;
2267 mask <<= first;
2269 if (first >= 0)
2270 emit_insn (mask <= 0xff
2271 ? gen_zero_extendqisi2 (dest, gen_lowpart (QImode, dest))
2272 : gen_zero_extendhisi2 (dest, gen_lowpart (HImode, dest)));
2273 if (total_shift > 0)
2275 operands[2] = GEN_INT (total_shift);
2276 gen_shifty_hi_op (ASHIFT, operands);
2278 break;
2280 case 4:
2281 shift_gen_fun = gen_shifty_op;
2282 case 3:
2283 /* If the topmost bit that matters is set, set the topmost bits
2284 that don't matter. This way, we might be able to get a shorter
2285 signed constant. */
2286 if (mask & ((HOST_WIDE_INT) 1 << (31 - total_shift)))
2287 mask |= (HOST_WIDE_INT) ~0 << (31 - total_shift);
2288 case 2:
2289 /* Don't expand fine-grained when combining, because that will
2290 make the pattern fail. */
2291 if (rtx_equal_function_value_matters
2292 || reload_in_progress || reload_completed)
2294 rtx operands[3];
2296 /* Cases 3 and 4 should be handled by this split
2297 only while combining */
2298 if (kind > 2)
2299 abort ();
2300 if (right)
2302 emit_insn (gen_lshrsi3 (dest, source, GEN_INT (right)));
2303 source = dest;
2305 emit_insn (gen_andsi3 (dest, source, GEN_INT (mask)));
2306 if (total_shift)
2308 operands[0] = dest;
2309 operands[1] = dest;
2310 operands[2] = GEN_INT (total_shift);
2311 shift_gen_fun (ASHIFT, operands);
2313 break;
2315 else
2317 int neg = 0;
2318 if (kind != 4 && total_shift < 16)
2320 neg = -ext_shift_amounts[total_shift][1];
2321 if (neg > 0)
2322 neg -= ext_shift_amounts[total_shift][2];
2323 else
2324 neg = 0;
2326 emit_insn (gen_and_shl_scratch (dest, source,
2327 GEN_INT (right),
2328 GEN_INT (mask),
2329 GEN_INT (total_shift + neg),
2330 GEN_INT (neg)));
2331 emit_insn (gen_movsi (dest, dest));
2332 break;
2335 return 0;
2338 /* Try to find a good way to implement the combiner pattern
2339 [(set (match_operand:SI 0 "register_operand" "=r")
2340 (sign_extract:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
2341 (match_operand:SI 2 "const_int_operand" "n")
2342 (match_operand:SI 3 "const_int_operand" "n")
2343 (const_int 0)))
2344 (clobber (reg:SI T_REG))]
2345 LEFT_RTX is operand 2 in the above pattern, and SIZE_RTX is operand 3.
2346 return 0 for simple left / right shift combination.
2347 return 1 for left shift / 8 bit sign extend / left shift.
2348 return 2 for left shift / 16 bit sign extend / left shift.
2349 return 3 for left shift / 8 bit sign extend / shift / sign extend.
2350 return 4 for left shift / 16 bit sign extend / shift / sign extend.
2351 return 5 for left shift / 16 bit sign extend / right shift
2352 return 6 for < 8 bit sign extend / left shift.
2353 return 7 for < 8 bit sign extend / left shift / single right shift.
2354 If COSTP is nonzero, assign the calculated cost to *COSTP. */
2357 shl_sext_kind (rtx left_rtx, rtx size_rtx, int *costp)
2359 int left, size, insize, ext;
2360 int cost = 0, best_cost;
2361 int kind;
2363 left = INTVAL (left_rtx);
2364 size = INTVAL (size_rtx);
2365 insize = size - left;
2366 if (insize <= 0)
2367 abort ();
2368 /* Default to left / right shift. */
2369 kind = 0;
2370 best_cost = shift_insns[32 - insize] + ashiftrt_insns[32 - size];
2371 if (size <= 16)
2373 /* 16 bit shift / sign extend / 16 bit shift */
2374 cost = shift_insns[16 - insize] + 1 + ashiftrt_insns[16 - size];
2375 /* If ashiftrt_insns[16 - size] is 8, this choice will be overridden
2376 below, by alternative 3 or something even better. */
2377 if (cost < best_cost)
2379 kind = 5;
2380 best_cost = cost;
2383 /* Try a plain sign extend between two shifts. */
2384 for (ext = 16; ext >= insize; ext -= 8)
2386 if (ext <= size)
2388 cost = ext_shift_insns[ext - insize] + 1 + shift_insns[size - ext];
2389 if (cost < best_cost)
2391 kind = ext / (unsigned) 8;
2392 best_cost = cost;
2395 /* Check if we can do a sloppy shift with a final signed shift
2396 restoring the sign. */
2397 if (EXT_SHIFT_SIGNED (size - ext))
2398 cost = ext_shift_insns[ext - insize] + ext_shift_insns[size - ext] + 1;
2399 /* If not, maybe it's still cheaper to do the second shift sloppy,
2400 and do a final sign extend? */
2401 else if (size <= 16)
2402 cost = ext_shift_insns[ext - insize] + 1
2403 + ext_shift_insns[size > ext ? size - ext : ext - size] + 1;
2404 else
2405 continue;
2406 if (cost < best_cost)
2408 kind = ext / (unsigned) 8 + 2;
2409 best_cost = cost;
2412 /* Check if we can sign extend in r0 */
2413 if (insize < 8)
2415 cost = 3 + shift_insns[left];
2416 if (cost < best_cost)
2418 kind = 6;
2419 best_cost = cost;
2421 /* Try the same with a final signed shift. */
2422 if (left < 31)
2424 cost = 3 + ext_shift_insns[left + 1] + 1;
2425 if (cost < best_cost)
2427 kind = 7;
2428 best_cost = cost;
2432 if (TARGET_SH3)
2434 /* Try to use a dynamic shift. */
2435 cost = shift_insns[32 - insize] + 1 + SH_DYNAMIC_SHIFT_COST;
2436 if (cost < best_cost)
2438 kind = 0;
2439 best_cost = cost;
2442 if (costp)
2443 *costp = cost;
2444 return kind;
2447 /* Function to be used in the length attribute of the instructions
2448 implementing this pattern. */
2451 shl_sext_length (rtx insn)
2453 rtx set_src, left_rtx, size_rtx;
2454 int cost;
2456 set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
2457 left_rtx = XEXP (XEXP (set_src, 0), 1);
2458 size_rtx = XEXP (set_src, 1);
2459 shl_sext_kind (left_rtx, size_rtx, &cost);
2460 return cost;
2463 /* Generate rtl for this pattern */
2466 gen_shl_sext (rtx dest, rtx left_rtx, rtx size_rtx, rtx source)
2468 int kind;
2469 int left, size, insize, cost;
2470 rtx operands[3];
2472 kind = shl_sext_kind (left_rtx, size_rtx, &cost);
2473 left = INTVAL (left_rtx);
2474 size = INTVAL (size_rtx);
2475 insize = size - left;
2476 switch (kind)
2478 case 1:
2479 case 2:
2480 case 3:
2481 case 4:
2483 int ext = kind & 1 ? 8 : 16;
2484 int shift2 = size - ext;
2486 /* Don't expand fine-grained when combining, because that will
2487 make the pattern fail. */
2488 if (! rtx_equal_function_value_matters
2489 && ! reload_in_progress && ! reload_completed)
2491 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
2492 emit_insn (gen_movsi (dest, source));
2493 break;
2495 if (dest != source)
2496 emit_insn (gen_movsi (dest, source));
2497 operands[0] = dest;
2498 if (ext - insize)
2500 operands[2] = GEN_INT (ext - insize);
2501 gen_shifty_hi_op (ASHIFT, operands);
2503 emit_insn (kind & 1
2504 ? gen_extendqisi2 (dest, gen_lowpart (QImode, dest))
2505 : gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
2506 if (kind <= 2)
2508 if (shift2)
2510 operands[2] = GEN_INT (shift2);
2511 gen_shifty_op (ASHIFT, operands);
2514 else
2516 if (shift2 > 0)
2518 if (EXT_SHIFT_SIGNED (shift2))
2520 operands[2] = GEN_INT (shift2 + 1);
2521 gen_shifty_op (ASHIFT, operands);
2522 operands[2] = const1_rtx;
2523 gen_shifty_op (ASHIFTRT, operands);
2524 break;
2526 operands[2] = GEN_INT (shift2);
2527 gen_shifty_hi_op (ASHIFT, operands);
2529 else if (shift2)
2531 operands[2] = GEN_INT (-shift2);
2532 gen_shifty_hi_op (LSHIFTRT, operands);
2534 emit_insn (size <= 8
2535 ? gen_extendqisi2 (dest, gen_lowpart (QImode, dest))
2536 : gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
2538 break;
2540 case 5:
2542 int i = 16 - size;
2543 if (! rtx_equal_function_value_matters
2544 && ! reload_in_progress && ! reload_completed)
2545 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
2546 else
2548 operands[0] = dest;
2549 operands[2] = GEN_INT (16 - insize);
2550 gen_shifty_hi_op (ASHIFT, operands);
2551 emit_insn (gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
2553 /* Don't use gen_ashrsi3 because it generates new pseudos. */
2554 while (--i >= 0)
2555 gen_ashift (ASHIFTRT, 1, dest);
2556 break;
2558 case 6:
2559 case 7:
2560 /* Don't expand fine-grained when combining, because that will
2561 make the pattern fail. */
2562 if (! rtx_equal_function_value_matters
2563 && ! reload_in_progress && ! reload_completed)
2565 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
2566 emit_insn (gen_movsi (dest, source));
2567 break;
2569 emit_insn (gen_andsi3 (dest, source, GEN_INT ((1 << insize) - 1)));
2570 emit_insn (gen_xorsi3 (dest, dest, GEN_INT (1 << (insize - 1))));
2571 emit_insn (gen_addsi3 (dest, dest, GEN_INT (-1 << (insize - 1))));
2572 operands[0] = dest;
2573 operands[2] = kind == 7 ? GEN_INT (left + 1) : left_rtx;
2574 gen_shifty_op (ASHIFT, operands);
2575 if (kind == 7)
2576 emit_insn (gen_ashrsi3_k (dest, dest, const1_rtx));
2577 break;
2578 default:
2579 return -1;
2581 return 0;
2584 /* Prefix a symbol_ref name with "datalabel". */
2587 gen_datalabel_ref (rtx sym)
2589 if (GET_CODE (sym) == LABEL_REF)
2590 return gen_rtx_CONST (GET_MODE (sym),
2591 gen_rtx_UNSPEC (GET_MODE (sym),
2592 gen_rtvec (1, sym),
2593 UNSPEC_DATALABEL));
2595 if (GET_CODE (sym) != SYMBOL_REF)
2596 abort ();
2598 return sym;
2602 /* The SH cannot load a large constant into a register, constants have to
2603 come from a pc relative load. The reference of a pc relative load
2604 instruction must be less than 1k infront of the instruction. This
2605 means that we often have to dump a constant inside a function, and
2606 generate code to branch around it.
2608 It is important to minimize this, since the branches will slow things
2609 down and make things bigger.
2611 Worst case code looks like:
2613 mov.l L1,rn
2614 bra L2
2616 align
2617 L1: .long value
2621 mov.l L3,rn
2622 bra L4
2624 align
2625 L3: .long value
2629 We fix this by performing a scan before scheduling, which notices which
2630 instructions need to have their operands fetched from the constant table
2631 and builds the table.
2633 The algorithm is:
2635 scan, find an instruction which needs a pcrel move. Look forward, find the
2636 last barrier which is within MAX_COUNT bytes of the requirement.
2637 If there isn't one, make one. Process all the instructions between
2638 the find and the barrier.
2640 In the above example, we can tell that L3 is within 1k of L1, so
2641 the first move can be shrunk from the 3 insn+constant sequence into
2642 just 1 insn, and the constant moved to L3 to make:
2644 mov.l L1,rn
2646 mov.l L3,rn
2647 bra L4
2649 align
2650 L3:.long value
2651 L4:.long value
2653 Then the second move becomes the target for the shortening process. */
2655 typedef struct
2657 rtx value; /* Value in table. */
2658 rtx label; /* Label of value. */
2659 rtx wend; /* End of window. */
2660 enum machine_mode mode; /* Mode of value. */
2662 /* True if this constant is accessed as part of a post-increment
2663 sequence. Note that HImode constants are never accessed in this way. */
2664 bool part_of_sequence_p;
2665 } pool_node;
2667 /* The maximum number of constants that can fit into one pool, since
2668 the pc relative range is 0...1020 bytes and constants are at least 4
2669 bytes long. */
2671 #define MAX_POOL_SIZE (1020/4)
2672 static pool_node pool_vector[MAX_POOL_SIZE];
2673 static int pool_size;
2674 static rtx pool_window_label;
2675 static int pool_window_last;
2677 /* ??? If we need a constant in HImode which is the truncated value of a
2678 constant we need in SImode, we could combine the two entries thus saving
2679 two bytes. Is this common enough to be worth the effort of implementing
2680 it? */
2682 /* ??? This stuff should be done at the same time that we shorten branches.
2683 As it is now, we must assume that all branches are the maximum size, and
2684 this causes us to almost always output constant pools sooner than
2685 necessary. */
2687 /* Add a constant to the pool and return its label. */
2689 static rtx
2690 add_constant (rtx x, enum machine_mode mode, rtx last_value)
2692 int i;
2693 rtx lab, new, ref, newref;
2695 /* First see if we've already got it. */
2696 for (i = 0; i < pool_size; i++)
2698 if (x->code == pool_vector[i].value->code
2699 && mode == pool_vector[i].mode)
2701 if (x->code == CODE_LABEL)
2703 if (XINT (x, 3) != XINT (pool_vector[i].value, 3))
2704 continue;
2706 if (rtx_equal_p (x, pool_vector[i].value))
2708 lab = new = 0;
2709 if (! last_value
2710 || ! i
2711 || ! rtx_equal_p (last_value, pool_vector[i-1].value))
2713 new = gen_label_rtx ();
2714 LABEL_REFS (new) = pool_vector[i].label;
2715 pool_vector[i].label = lab = new;
2717 if (lab && pool_window_label)
2719 newref = gen_rtx_LABEL_REF (VOIDmode, pool_window_label);
2720 ref = pool_vector[pool_window_last].wend;
2721 LABEL_NEXTREF (newref) = ref;
2722 pool_vector[pool_window_last].wend = newref;
2724 if (new)
2725 pool_window_label = new;
2726 pool_window_last = i;
2727 return lab;
2732 /* Need a new one. */
2733 pool_vector[pool_size].value = x;
2734 if (last_value && rtx_equal_p (last_value, pool_vector[pool_size - 1].value))
2736 lab = 0;
2737 pool_vector[pool_size - 1].part_of_sequence_p = true;
2739 else
2740 lab = gen_label_rtx ();
2741 pool_vector[pool_size].mode = mode;
2742 pool_vector[pool_size].label = lab;
2743 pool_vector[pool_size].wend = NULL_RTX;
2744 pool_vector[pool_size].part_of_sequence_p = (lab == 0);
2745 if (lab && pool_window_label)
2747 newref = gen_rtx_LABEL_REF (VOIDmode, pool_window_label);
2748 ref = pool_vector[pool_window_last].wend;
2749 LABEL_NEXTREF (newref) = ref;
2750 pool_vector[pool_window_last].wend = newref;
2752 if (lab)
2753 pool_window_label = lab;
2754 pool_window_last = pool_size;
2755 pool_size++;
2756 return lab;
2759 /* Output the literal table. START, if nonzero, is the first instruction
2760 this table is needed for, and also indicates that there is at least one
2761 casesi_worker_2 instruction; We have to emit the operand3 labels from
2762 these insns at a 4-byte aligned position. BARRIER is the barrier
2763 after which we are to place the table. */
2765 static void
2766 dump_table (rtx start, rtx barrier)
2768 rtx scan = barrier;
2769 int i;
2770 int need_align = 1;
2771 rtx lab, ref;
2772 int have_df = 0;
2774 /* Do two passes, first time dump out the HI sized constants. */
2776 for (i = 0; i < pool_size; i++)
2778 pool_node *p = &pool_vector[i];
2780 if (p->mode == HImode)
2782 if (need_align)
2784 scan = emit_insn_after (gen_align_2 (), scan);
2785 need_align = 0;
2787 for (lab = p->label; lab; lab = LABEL_REFS (lab))
2788 scan = emit_label_after (lab, scan);
2789 scan = emit_insn_after (gen_consttable_2 (p->value, const0_rtx),
2790 scan);
2791 for (ref = p->wend; ref; ref = LABEL_NEXTREF (ref))
2793 lab = XEXP (ref, 0);
2794 scan = emit_insn_after (gen_consttable_window_end (lab), scan);
2797 else if (p->mode == DFmode)
2798 have_df = 1;
2801 need_align = 1;
2803 if (start)
2805 scan = emit_insn_after (gen_align_4 (), scan);
2806 need_align = 0;
2807 for (; start != barrier; start = NEXT_INSN (start))
2808 if (GET_CODE (start) == INSN
2809 && recog_memoized (start) == CODE_FOR_casesi_worker_2)
2811 rtx src = SET_SRC (XVECEXP (PATTERN (start), 0, 0));
2812 rtx lab = XEXP (XVECEXP (src, 0, 3), 0);
2814 scan = emit_label_after (lab, scan);
2817 if (TARGET_FMOVD && TARGET_ALIGN_DOUBLE && have_df)
2819 rtx align_insn = NULL_RTX;
2821 scan = emit_label_after (gen_label_rtx (), scan);
2822 scan = emit_insn_after (gen_align_log (GEN_INT (3)), scan);
2823 need_align = 0;
2825 for (i = 0; i < pool_size; i++)
2827 pool_node *p = &pool_vector[i];
2829 switch (p->mode)
2831 case HImode:
2832 break;
2833 case SImode:
2834 case SFmode:
2835 if (align_insn && !p->part_of_sequence_p)
2837 for (lab = p->label; lab; lab = LABEL_REFS (lab))
2838 emit_label_before (lab, align_insn);
2839 emit_insn_before (gen_consttable_4 (p->value, const0_rtx),
2840 align_insn);
2841 for (ref = p->wend; ref; ref = LABEL_NEXTREF (ref))
2843 lab = XEXP (ref, 0);
2844 emit_insn_before (gen_consttable_window_end (lab),
2845 align_insn);
2847 delete_insn (align_insn);
2848 align_insn = NULL_RTX;
2849 continue;
2851 else
2853 for (lab = p->label; lab; lab = LABEL_REFS (lab))
2854 scan = emit_label_after (lab, scan);
2855 scan = emit_insn_after (gen_consttable_4 (p->value,
2856 const0_rtx), scan);
2857 need_align = ! need_align;
2859 break;
2860 case DFmode:
2861 if (need_align)
2863 scan = emit_insn_after (gen_align_log (GEN_INT (3)), scan);
2864 align_insn = scan;
2865 need_align = 0;
2867 case DImode:
2868 for (lab = p->label; lab; lab = LABEL_REFS (lab))
2869 scan = emit_label_after (lab, scan);
2870 scan = emit_insn_after (gen_consttable_8 (p->value, const0_rtx),
2871 scan);
2872 break;
2873 default:
2874 abort ();
2875 break;
2878 if (p->mode != HImode)
2880 for (ref = p->wend; ref; ref = LABEL_NEXTREF (ref))
2882 lab = XEXP (ref, 0);
2883 scan = emit_insn_after (gen_consttable_window_end (lab),
2884 scan);
2889 pool_size = 0;
2892 for (i = 0; i < pool_size; i++)
2894 pool_node *p = &pool_vector[i];
2896 switch (p->mode)
2898 case HImode:
2899 break;
2900 case SImode:
2901 case SFmode:
2902 if (need_align)
2904 need_align = 0;
2905 scan = emit_label_after (gen_label_rtx (), scan);
2906 scan = emit_insn_after (gen_align_4 (), scan);
2908 for (lab = p->label; lab; lab = LABEL_REFS (lab))
2909 scan = emit_label_after (lab, scan);
2910 scan = emit_insn_after (gen_consttable_4 (p->value, const0_rtx),
2911 scan);
2912 break;
2913 case DFmode:
2914 case DImode:
2915 if (need_align)
2917 need_align = 0;
2918 scan = emit_label_after (gen_label_rtx (), scan);
2919 scan = emit_insn_after (gen_align_4 (), scan);
2921 for (lab = p->label; lab; lab = LABEL_REFS (lab))
2922 scan = emit_label_after (lab, scan);
2923 scan = emit_insn_after (gen_consttable_8 (p->value, const0_rtx),
2924 scan);
2925 break;
2926 default:
2927 abort ();
2928 break;
2931 if (p->mode != HImode)
2933 for (ref = p->wend; ref; ref = LABEL_NEXTREF (ref))
2935 lab = XEXP (ref, 0);
2936 scan = emit_insn_after (gen_consttable_window_end (lab), scan);
2941 scan = emit_insn_after (gen_consttable_end (), scan);
2942 scan = emit_barrier_after (scan);
2943 pool_size = 0;
2944 pool_window_label = NULL_RTX;
2945 pool_window_last = 0;
2948 /* Return nonzero if constant would be an ok source for a
2949 mov.w instead of a mov.l. */
2951 static int
2952 hi_const (rtx src)
2954 return (GET_CODE (src) == CONST_INT
2955 && INTVAL (src) >= -32768
2956 && INTVAL (src) <= 32767);
2959 /* Nonzero if the insn is a move instruction which needs to be fixed. */
2961 /* ??? For a DImode/DFmode moves, we don't need to fix it if each half of the
2962 CONST_DOUBLE input value is CONST_OK_FOR_I08. For a SFmode move, we don't
2963 need to fix it if the input value is CONST_OK_FOR_I08. */
2965 static int
2966 broken_move (rtx insn)
2968 if (GET_CODE (insn) == INSN)
2970 rtx pat = PATTERN (insn);
2971 if (GET_CODE (pat) == PARALLEL)
2972 pat = XVECEXP (pat, 0, 0);
2973 if (GET_CODE (pat) == SET
2974 /* We can load any 8 bit value if we don't care what the high
2975 order bits end up as. */
2976 && GET_MODE (SET_DEST (pat)) != QImode
2977 && (CONSTANT_P (SET_SRC (pat))
2978 /* Match mova_const. */
2979 || (GET_CODE (SET_SRC (pat)) == UNSPEC
2980 && XINT (SET_SRC (pat), 1) == UNSPEC_MOVA
2981 && GET_CODE (XVECEXP (SET_SRC (pat), 0, 0)) == CONST))
2982 && ! (TARGET_SH2E
2983 && GET_CODE (SET_SRC (pat)) == CONST_DOUBLE
2984 && (fp_zero_operand (SET_SRC (pat))
2985 || fp_one_operand (SET_SRC (pat)))
2986 /* ??? If this is a -m4 or -m4-single compilation, in general
2987 we don't know the current setting of fpscr, so disable fldi.
2988 There is an exception if this was a register-register move
2989 before reload - and hence it was ascertained that we have
2990 single precision setting - and in a post-reload optimization
2991 we changed this to do a constant load. In that case
2992 we don't have an r0 clobber, hence we must use fldi. */
2993 && (! TARGET_SH4 || TARGET_FMOVD
2994 || (GET_CODE (XEXP (XVECEXP (PATTERN (insn), 0, 2), 0))
2995 == SCRATCH))
2996 && GET_CODE (SET_DEST (pat)) == REG
2997 && FP_REGISTER_P (REGNO (SET_DEST (pat))))
2998 && (GET_CODE (SET_SRC (pat)) != CONST_INT
2999 || ! CONST_OK_FOR_I08 (INTVAL (SET_SRC (pat)))))
3000 return 1;
3003 return 0;
3006 static int
3007 mova_p (rtx insn)
3009 return (GET_CODE (insn) == INSN
3010 && GET_CODE (PATTERN (insn)) == SET
3011 && GET_CODE (SET_SRC (PATTERN (insn))) == UNSPEC
3012 && XINT (SET_SRC (PATTERN (insn)), 1) == UNSPEC_MOVA
3013 /* Don't match mova_const. */
3014 && GET_CODE (XVECEXP (SET_SRC (PATTERN (insn)), 0, 0)) == LABEL_REF);
3017 /* Fix up a mova from a switch that went out of range. */
3018 static void
3019 fixup_mova (rtx mova)
3021 if (! flag_pic)
3023 SET_SRC (PATTERN (mova)) = XVECEXP (SET_SRC (PATTERN (mova)), 0, 0);
3024 INSN_CODE (mova) = -1;
3026 else
3028 rtx worker = mova;
3029 rtx lab = gen_label_rtx ();
3030 rtx wpat, wpat0, wpat1, wsrc, diff;
3034 worker = NEXT_INSN (worker);
3035 if (! worker
3036 || GET_CODE (worker) == CODE_LABEL
3037 || GET_CODE (worker) == JUMP_INSN)
3038 abort ();
3039 } while (recog_memoized (worker) != CODE_FOR_casesi_worker_1);
3040 wpat = PATTERN (worker);
3041 wpat0 = XVECEXP (wpat, 0, 0);
3042 wpat1 = XVECEXP (wpat, 0, 1);
3043 wsrc = SET_SRC (wpat0);
3044 PATTERN (worker) = (gen_casesi_worker_2
3045 (SET_DEST (wpat0), XVECEXP (wsrc, 0, 1),
3046 XEXP (XVECEXP (wsrc, 0, 2), 0), lab,
3047 XEXP (wpat1, 0)));
3048 INSN_CODE (worker) = -1;
3049 diff = gen_rtx_MINUS (Pmode, XVECEXP (SET_SRC (PATTERN (mova)), 0, 0),
3050 gen_rtx_LABEL_REF (Pmode, lab));
3051 diff = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, diff), UNSPEC_PIC);
3052 SET_SRC (PATTERN (mova)) = gen_rtx_CONST (Pmode, diff);
3053 INSN_CODE (mova) = -1;
3057 /* Find the last barrier from insn FROM which is close enough to hold the
3058 constant pool. If we can't find one, then create one near the end of
3059 the range. */
3061 static rtx
3062 find_barrier (int num_mova, rtx mova, rtx from)
3064 int count_si = 0;
3065 int count_hi = 0;
3066 int found_hi = 0;
3067 int found_si = 0;
3068 int found_di = 0;
3069 int hi_align = 2;
3070 int si_align = 2;
3071 int leading_mova = num_mova;
3072 rtx barrier_before_mova = 0, found_barrier = 0, good_barrier = 0;
3073 int si_limit;
3074 int hi_limit;
3076 /* For HImode: range is 510, add 4 because pc counts from address of
3077 second instruction after this one, subtract 2 for the jump instruction
3078 that we may need to emit before the table, subtract 2 for the instruction
3079 that fills the jump delay slot (in very rare cases, reorg will take an
3080 instruction from after the constant pool or will leave the delay slot
3081 empty). This gives 510.
3082 For SImode: range is 1020, add 4 because pc counts from address of
3083 second instruction after this one, subtract 2 in case pc is 2 byte
3084 aligned, subtract 2 for the jump instruction that we may need to emit
3085 before the table, subtract 2 for the instruction that fills the jump
3086 delay slot. This gives 1018. */
3088 /* The branch will always be shortened now that the reference address for
3089 forward branches is the successor address, thus we need no longer make
3090 adjustments to the [sh]i_limit for -O0. */
3092 si_limit = 1018;
3093 hi_limit = 510;
3095 while (from && count_si < si_limit && count_hi < hi_limit)
3097 int inc = get_attr_length (from);
3098 int new_align = 1;
3100 if (GET_CODE (from) == CODE_LABEL)
3102 if (optimize)
3103 new_align = 1 << label_to_alignment (from);
3104 else if (GET_CODE (prev_nonnote_insn (from)) == BARRIER)
3105 new_align = 1 << barrier_align (from);
3106 else
3107 new_align = 1;
3108 inc = 0;
3111 if (GET_CODE (from) == BARRIER)
3114 found_barrier = from;
3116 /* If we are at the end of the function, or in front of an alignment
3117 instruction, we need not insert an extra alignment. We prefer
3118 this kind of barrier. */
3119 if (barrier_align (from) > 2)
3120 good_barrier = from;
3123 if (broken_move (from))
3125 rtx pat, src, dst;
3126 enum machine_mode mode;
3128 pat = PATTERN (from);
3129 if (GET_CODE (pat) == PARALLEL)
3130 pat = XVECEXP (pat, 0, 0);
3131 src = SET_SRC (pat);
3132 dst = SET_DEST (pat);
3133 mode = GET_MODE (dst);
3135 /* We must explicitly check the mode, because sometimes the
3136 front end will generate code to load unsigned constants into
3137 HImode targets without properly sign extending them. */
3138 if (mode == HImode
3139 || (mode == SImode && hi_const (src) && REGNO (dst) != FPUL_REG))
3141 found_hi += 2;
3142 /* We put the short constants before the long constants, so
3143 we must count the length of short constants in the range
3144 for the long constants. */
3145 /* ??? This isn't optimal, but is easy to do. */
3146 si_limit -= 2;
3148 else
3150 /* We dump DF/DI constants before SF/SI ones, because
3151 the limit is the same, but the alignment requirements
3152 are higher. We may waste up to 4 additional bytes
3153 for alignment, and the DF/DI constant may have
3154 another SF/SI constant placed before it. */
3155 if (TARGET_SHCOMPACT
3156 && ! found_di
3157 && (mode == DFmode || mode == DImode))
3159 found_di = 1;
3160 si_limit -= 8;
3162 while (si_align > 2 && found_si + si_align - 2 > count_si)
3163 si_align >>= 1;
3164 if (found_si > count_si)
3165 count_si = found_si;
3166 found_si += GET_MODE_SIZE (mode);
3167 if (num_mova)
3168 si_limit -= GET_MODE_SIZE (mode);
3171 /* See the code in machine_dependent_reorg, which has a similar if
3172 statement that generates a new mova insn in many cases. */
3173 if (GET_CODE (dst) == REG && FP_ANY_REGISTER_P (REGNO (dst)))
3174 inc += 2;
3177 if (mova_p (from))
3179 if (! num_mova++)
3181 leading_mova = 0;
3182 mova = from;
3183 barrier_before_mova = good_barrier ? good_barrier : found_barrier;
3185 if (found_si > count_si)
3186 count_si = found_si;
3188 else if (GET_CODE (from) == JUMP_INSN
3189 && (GET_CODE (PATTERN (from)) == ADDR_VEC
3190 || GET_CODE (PATTERN (from)) == ADDR_DIFF_VEC))
3192 if (num_mova)
3193 num_mova--;
3194 if (barrier_align (next_real_insn (from)) == align_jumps_log)
3196 /* We have just passed the barrier in front of the
3197 ADDR_DIFF_VEC, which is stored in found_barrier. Since
3198 the ADDR_DIFF_VEC is accessed as data, just like our pool
3199 constants, this is a good opportunity to accommodate what
3200 we have gathered so far.
3201 If we waited any longer, we could end up at a barrier in
3202 front of code, which gives worse cache usage for separated
3203 instruction / data caches. */
3204 good_barrier = found_barrier;
3205 break;
3207 else
3209 rtx body = PATTERN (from);
3210 inc = XVECLEN (body, 1) * GET_MODE_SIZE (GET_MODE (body));
3213 /* For the SH1, we generate alignments even after jumps-around-jumps. */
3214 else if (GET_CODE (from) == JUMP_INSN
3215 && ! TARGET_SH2
3216 && ! TARGET_SMALLCODE)
3217 new_align = 4;
3219 if (found_si)
3221 count_si += inc;
3222 if (new_align > si_align)
3224 si_limit -= (count_si - 1) & (new_align - si_align);
3225 si_align = new_align;
3227 count_si = (count_si + new_align - 1) & -new_align;
3229 if (found_hi)
3231 count_hi += inc;
3232 if (new_align > hi_align)
3234 hi_limit -= (count_hi - 1) & (new_align - hi_align);
3235 hi_align = new_align;
3237 count_hi = (count_hi + new_align - 1) & -new_align;
3239 from = NEXT_INSN (from);
3242 if (num_mova)
3244 if (leading_mova)
3246 /* Try as we might, the leading mova is out of range. Change
3247 it into a load (which will become a pcload) and retry. */
3248 fixup_mova (mova);
3249 return find_barrier (0, 0, mova);
3251 else
3253 /* Insert the constant pool table before the mova instruction,
3254 to prevent the mova label reference from going out of range. */
3255 from = mova;
3256 good_barrier = found_barrier = barrier_before_mova;
3260 if (found_barrier)
3262 if (good_barrier && next_real_insn (found_barrier))
3263 found_barrier = good_barrier;
3265 else
3267 /* We didn't find a barrier in time to dump our stuff,
3268 so we'll make one. */
3269 rtx label = gen_label_rtx ();
3271 /* If we exceeded the range, then we must back up over the last
3272 instruction we looked at. Otherwise, we just need to undo the
3273 NEXT_INSN at the end of the loop. */
3274 if (count_hi > hi_limit || count_si > si_limit)
3275 from = PREV_INSN (PREV_INSN (from));
3276 else
3277 from = PREV_INSN (from);
3279 /* Walk back to be just before any jump or label.
3280 Putting it before a label reduces the number of times the branch
3281 around the constant pool table will be hit. Putting it before
3282 a jump makes it more likely that the bra delay slot will be
3283 filled. */
3284 while (GET_CODE (from) == JUMP_INSN || GET_CODE (from) == NOTE
3285 || GET_CODE (from) == CODE_LABEL)
3286 from = PREV_INSN (from);
3288 from = emit_jump_insn_after (gen_jump (label), from);
3289 JUMP_LABEL (from) = label;
3290 LABEL_NUSES (label) = 1;
3291 found_barrier = emit_barrier_after (from);
3292 emit_label_after (label, found_barrier);
3295 return found_barrier;
3298 /* If the instruction INSN is implemented by a special function, and we can
3299 positively find the register that is used to call the sfunc, and this
3300 register is not used anywhere else in this instruction - except as the
3301 destination of a set, return this register; else, return 0. */
3303 sfunc_uses_reg (rtx insn)
3305 int i;
3306 rtx pattern, part, reg_part, reg;
3308 if (GET_CODE (insn) != INSN)
3309 return 0;
3310 pattern = PATTERN (insn);
3311 if (GET_CODE (pattern) != PARALLEL || get_attr_type (insn) != TYPE_SFUNC)
3312 return 0;
3314 for (reg_part = 0, i = XVECLEN (pattern, 0) - 1; i >= 1; i--)
3316 part = XVECEXP (pattern, 0, i);
3317 if (GET_CODE (part) == USE && GET_MODE (XEXP (part, 0)) == SImode)
3318 reg_part = part;
3320 if (! reg_part)
3321 return 0;
3322 reg = XEXP (reg_part, 0);
3323 for (i = XVECLEN (pattern, 0) - 1; i >= 0; i--)
3325 part = XVECEXP (pattern, 0, i);
3326 if (part == reg_part || GET_CODE (part) == CLOBBER)
3327 continue;
3328 if (reg_mentioned_p (reg, ((GET_CODE (part) == SET
3329 && GET_CODE (SET_DEST (part)) == REG)
3330 ? SET_SRC (part) : part)))
3331 return 0;
3333 return reg;
3336 /* See if the only way in which INSN uses REG is by calling it, or by
3337 setting it while calling it. Set *SET to a SET rtx if the register
3338 is set by INSN. */
3340 static int
3341 noncall_uses_reg (rtx reg, rtx insn, rtx *set)
3343 rtx pattern, reg2;
3345 *set = NULL_RTX;
3347 reg2 = sfunc_uses_reg (insn);
3348 if (reg2 && REGNO (reg2) == REGNO (reg))
3350 pattern = single_set (insn);
3351 if (pattern
3352 && GET_CODE (SET_DEST (pattern)) == REG
3353 && REGNO (reg) == REGNO (SET_DEST (pattern)))
3354 *set = pattern;
3355 return 0;
3357 if (GET_CODE (insn) != CALL_INSN)
3359 /* We don't use rtx_equal_p because we don't care if the mode is
3360 different. */
3361 pattern = single_set (insn);
3362 if (pattern
3363 && GET_CODE (SET_DEST (pattern)) == REG
3364 && REGNO (reg) == REGNO (SET_DEST (pattern)))
3366 rtx par, part;
3367 int i;
3369 *set = pattern;
3370 par = PATTERN (insn);
3371 if (GET_CODE (par) == PARALLEL)
3372 for (i = XVECLEN (par, 0) - 1; i >= 0; i--)
3374 part = XVECEXP (par, 0, i);
3375 if (GET_CODE (part) != SET && reg_mentioned_p (reg, part))
3376 return 1;
3378 return reg_mentioned_p (reg, SET_SRC (pattern));
3381 return 1;
3384 pattern = PATTERN (insn);
3386 if (GET_CODE (pattern) == PARALLEL)
3388 int i;
3390 for (i = XVECLEN (pattern, 0) - 1; i >= 1; i--)
3391 if (reg_mentioned_p (reg, XVECEXP (pattern, 0, i)))
3392 return 1;
3393 pattern = XVECEXP (pattern, 0, 0);
3396 if (GET_CODE (pattern) == SET)
3398 if (reg_mentioned_p (reg, SET_DEST (pattern)))
3400 /* We don't use rtx_equal_p, because we don't care if the
3401 mode is different. */
3402 if (GET_CODE (SET_DEST (pattern)) != REG
3403 || REGNO (reg) != REGNO (SET_DEST (pattern)))
3404 return 1;
3406 *set = pattern;
3409 pattern = SET_SRC (pattern);
3412 if (GET_CODE (pattern) != CALL
3413 || GET_CODE (XEXP (pattern, 0)) != MEM
3414 || ! rtx_equal_p (reg, XEXP (XEXP (pattern, 0), 0)))
3415 return 1;
3417 return 0;
3420 /* Given a X, a pattern of an insn or a part of it, return a mask of used
3421 general registers. Bits 0..15 mean that the respective registers
3422 are used as inputs in the instruction. Bits 16..31 mean that the
3423 registers 0..15, respectively, are used as outputs, or are clobbered.
3424 IS_DEST should be set to 16 if X is the destination of a SET, else to 0. */
3426 regs_used (rtx x, int is_dest)
3428 enum rtx_code code;
3429 const char *fmt;
3430 int i, used = 0;
3432 if (! x)
3433 return used;
3434 code = GET_CODE (x);
3435 switch (code)
3437 case REG:
3438 if (REGNO (x) < 16)
3439 return (((1 << HARD_REGNO_NREGS (0, GET_MODE (x))) - 1)
3440 << (REGNO (x) + is_dest));
3441 return 0;
3442 case SUBREG:
3444 rtx y = SUBREG_REG (x);
3446 if (GET_CODE (y) != REG)
3447 break;
3448 if (REGNO (y) < 16)
3449 return (((1 << HARD_REGNO_NREGS (0, GET_MODE (x))) - 1)
3450 << (REGNO (y) +
3451 subreg_regno_offset (REGNO (y),
3452 GET_MODE (y),
3453 SUBREG_BYTE (x),
3454 GET_MODE (x)) + is_dest));
3455 return 0;
3457 case SET:
3458 return regs_used (SET_SRC (x), 0) | regs_used (SET_DEST (x), 16);
3459 case RETURN:
3460 /* If there was a return value, it must have been indicated with USE. */
3461 return 0x00ffff00;
3462 case CLOBBER:
3463 is_dest = 1;
3464 break;
3465 case MEM:
3466 is_dest = 0;
3467 break;
3468 case CALL:
3469 used |= 0x00ff00f0;
3470 break;
3471 default:
3472 break;
3475 fmt = GET_RTX_FORMAT (code);
3477 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
3479 if (fmt[i] == 'E')
3481 register int j;
3482 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
3483 used |= regs_used (XVECEXP (x, i, j), is_dest);
3485 else if (fmt[i] == 'e')
3486 used |= regs_used (XEXP (x, i), is_dest);
3488 return used;
3491 /* Create an instruction that prevents redirection of a conditional branch
3492 to the destination of the JUMP with address ADDR.
3493 If the branch needs to be implemented as an indirect jump, try to find
3494 a scratch register for it.
3495 If NEED_BLOCK is 0, don't do anything unless we need a scratch register.
3496 If any preceding insn that doesn't fit into a delay slot is good enough,
3497 pass 1. Pass 2 if a definite blocking insn is needed.
3498 -1 is used internally to avoid deep recursion.
3499 If a blocking instruction is made or recognized, return it. */
3501 static rtx
3502 gen_block_redirect (rtx jump, int addr, int need_block)
3504 int dead = 0;
3505 rtx prev = prev_nonnote_insn (jump);
3506 rtx dest;
3508 /* First, check if we already have an instruction that satisfies our need. */
3509 if (prev && GET_CODE (prev) == INSN && ! INSN_DELETED_P (prev))
3511 if (INSN_CODE (prev) == CODE_FOR_indirect_jump_scratch)
3512 return prev;
3513 if (GET_CODE (PATTERN (prev)) == USE
3514 || GET_CODE (PATTERN (prev)) == CLOBBER
3515 || get_attr_in_delay_slot (prev) == IN_DELAY_SLOT_YES)
3516 prev = jump;
3517 else if ((need_block &= ~1) < 0)
3518 return prev;
3519 else if (recog_memoized (prev) == CODE_FOR_block_branch_redirect)
3520 need_block = 0;
3522 if (GET_CODE (PATTERN (jump)) == RETURN)
3524 if (! need_block)
3525 return prev;
3526 /* Reorg even does nasty things with return insns that cause branches
3527 to go out of range - see find_end_label and callers. */
3528 return emit_insn_before (gen_block_branch_redirect (const0_rtx) , jump);
3530 /* We can't use JUMP_LABEL here because it might be undefined
3531 when not optimizing. */
3532 dest = XEXP (SET_SRC (PATTERN (jump)), 0);
3533 /* If the branch is out of range, try to find a scratch register for it. */
3534 if (optimize
3535 && (INSN_ADDRESSES (INSN_UID (dest)) - addr + (unsigned) 4092
3536 > 4092 + 4098))
3538 rtx scan;
3539 /* Don't look for the stack pointer as a scratch register,
3540 it would cause trouble if an interrupt occurred. */
3541 unsigned try = 0x7fff, used;
3542 int jump_left = flag_expensive_optimizations + 1;
3544 /* It is likely that the most recent eligible instruction is wanted for
3545 the delay slot. Therefore, find out which registers it uses, and
3546 try to avoid using them. */
3548 for (scan = jump; (scan = PREV_INSN (scan)); )
3550 enum rtx_code code;
3552 if (INSN_DELETED_P (scan))
3553 continue;
3554 code = GET_CODE (scan);
3555 if (code == CODE_LABEL || code == JUMP_INSN)
3556 break;
3557 if (code == INSN
3558 && GET_CODE (PATTERN (scan)) != USE
3559 && GET_CODE (PATTERN (scan)) != CLOBBER
3560 && get_attr_in_delay_slot (scan) == IN_DELAY_SLOT_YES)
3562 try &= ~regs_used (PATTERN (scan), 0);
3563 break;
3566 for (used = dead = 0, scan = JUMP_LABEL (jump);
3567 (scan = NEXT_INSN (scan)); )
3569 enum rtx_code code;
3571 if (INSN_DELETED_P (scan))
3572 continue;
3573 code = GET_CODE (scan);
3574 if (INSN_P (scan))
3576 used |= regs_used (PATTERN (scan), 0);
3577 if (code == CALL_INSN)
3578 used |= regs_used (CALL_INSN_FUNCTION_USAGE (scan), 0);
3579 dead |= (used >> 16) & ~used;
3580 if (dead & try)
3582 dead &= try;
3583 break;
3585 if (code == JUMP_INSN)
3587 if (jump_left-- && simplejump_p (scan))
3588 scan = JUMP_LABEL (scan);
3589 else
3590 break;
3594 /* Mask out the stack pointer again, in case it was
3595 the only 'free' register we have found. */
3596 dead &= 0x7fff;
3598 /* If the immediate destination is still in range, check for possible
3599 threading with a jump beyond the delay slot insn.
3600 Don't check if we are called recursively; the jump has been or will be
3601 checked in a different invocation then. */
3603 else if (optimize && need_block >= 0)
3605 rtx next = next_active_insn (next_active_insn (dest));
3606 if (next && GET_CODE (next) == JUMP_INSN
3607 && GET_CODE (PATTERN (next)) == SET
3608 && recog_memoized (next) == CODE_FOR_jump_compact)
3610 dest = JUMP_LABEL (next);
3611 if (dest
3612 && (INSN_ADDRESSES (INSN_UID (dest)) - addr + (unsigned) 4092
3613 > 4092 + 4098))
3614 gen_block_redirect (next, INSN_ADDRESSES (INSN_UID (next)), -1);
3618 if (dead)
3620 rtx reg = gen_rtx_REG (SImode, exact_log2 (dead & -dead));
3622 /* It would be nice if we could convert the jump into an indirect
3623 jump / far branch right now, and thus exposing all constituent
3624 instructions to further optimization. However, reorg uses
3625 simplejump_p to determine if there is an unconditional jump where
3626 it should try to schedule instructions from the target of the
3627 branch; simplejump_p fails for indirect jumps even if they have
3628 a JUMP_LABEL. */
3629 rtx insn = emit_insn_before (gen_indirect_jump_scratch
3630 (reg, GEN_INT (INSN_UID (JUMP_LABEL (jump))))
3631 , jump);
3632 /* ??? We would like this to have the scope of the jump, but that
3633 scope will change when a delay slot insn of an inner scope is added.
3634 Hence, after delay slot scheduling, we'll have to expect
3635 NOTE_INSN_BLOCK_END notes between the indirect_jump_scratch and
3636 the jump. */
3638 INSN_LOCATOR (insn) = INSN_LOCATOR (jump);
3639 INSN_CODE (insn) = CODE_FOR_indirect_jump_scratch;
3640 return insn;
3642 else if (need_block)
3643 /* We can't use JUMP_LABEL here because it might be undefined
3644 when not optimizing. */
3645 return emit_insn_before (gen_block_branch_redirect
3646 (GEN_INT (INSN_UID (XEXP (SET_SRC (PATTERN (jump)), 0))))
3647 , jump);
3648 return prev;
3651 #define CONDJUMP_MIN -252
3652 #define CONDJUMP_MAX 262
3653 struct far_branch
3655 /* A label (to be placed) in front of the jump
3656 that jumps to our ultimate destination. */
3657 rtx near_label;
3658 /* Where we are going to insert it if we cannot move the jump any farther,
3659 or the jump itself if we have picked up an existing jump. */
3660 rtx insert_place;
3661 /* The ultimate destination. */
3662 rtx far_label;
3663 struct far_branch *prev;
3664 /* If the branch has already been created, its address;
3665 else the address of its first prospective user. */
3666 int address;
3669 static void gen_far_branch (struct far_branch *);
3670 enum mdep_reorg_phase_e mdep_reorg_phase;
3671 static void
3672 gen_far_branch (struct far_branch *bp)
3674 rtx insn = bp->insert_place;
3675 rtx jump;
3676 rtx label = gen_label_rtx ();
3678 emit_label_after (label, insn);
3679 if (bp->far_label)
3681 jump = emit_jump_insn_after (gen_jump (bp->far_label), insn);
3682 LABEL_NUSES (bp->far_label)++;
3684 else
3685 jump = emit_jump_insn_after (gen_return (), insn);
3686 /* Emit a barrier so that reorg knows that any following instructions
3687 are not reachable via a fall-through path.
3688 But don't do this when not optimizing, since we wouldn't suppress the
3689 alignment for the barrier then, and could end up with out-of-range
3690 pc-relative loads. */
3691 if (optimize)
3692 emit_barrier_after (jump);
3693 emit_label_after (bp->near_label, insn);
3694 JUMP_LABEL (jump) = bp->far_label;
3695 if (! invert_jump (insn, label, 1))
3696 abort ();
3697 /* If we are branching around a jump (rather than a return), prevent
3698 reorg from using an insn from the jump target as the delay slot insn -
3699 when reorg did this, it pessimized code (we rather hide the delay slot)
3700 and it could cause branches to go out of range. */
3701 if (bp->far_label)
3702 (emit_insn_after
3703 (gen_stuff_delay_slot
3704 (GEN_INT (INSN_UID (XEXP (SET_SRC (PATTERN (jump)), 0))),
3705 GEN_INT (recog_memoized (insn) == CODE_FOR_branch_false)),
3706 insn));
3707 /* Prevent reorg from undoing our splits. */
3708 gen_block_redirect (jump, bp->address += 2, 2);
3711 /* Fix up ADDR_DIFF_VECs. */
3712 void
3713 fixup_addr_diff_vecs (rtx first)
3715 rtx insn;
3717 for (insn = first; insn; insn = NEXT_INSN (insn))
3719 rtx vec_lab, pat, prev, prevpat, x, braf_label;
3721 if (GET_CODE (insn) != JUMP_INSN
3722 || GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
3723 continue;
3724 pat = PATTERN (insn);
3725 vec_lab = XEXP (XEXP (pat, 0), 0);
3727 /* Search the matching casesi_jump_2. */
3728 for (prev = vec_lab; ; prev = PREV_INSN (prev))
3730 if (GET_CODE (prev) != JUMP_INSN)
3731 continue;
3732 prevpat = PATTERN (prev);
3733 if (GET_CODE (prevpat) != PARALLEL || XVECLEN (prevpat, 0) != 2)
3734 continue;
3735 x = XVECEXP (prevpat, 0, 1);
3736 if (GET_CODE (x) != USE)
3737 continue;
3738 x = XEXP (x, 0);
3739 if (GET_CODE (x) == LABEL_REF && XEXP (x, 0) == vec_lab)
3740 break;
3743 /* Emit the reference label of the braf where it belongs, right after
3744 the casesi_jump_2 (i.e. braf). */
3745 braf_label = XEXP (XEXP (SET_SRC (XVECEXP (prevpat, 0, 0)), 1), 0);
3746 emit_label_after (braf_label, prev);
3748 /* Fix up the ADDR_DIF_VEC to be relative
3749 to the reference address of the braf. */
3750 XEXP (XEXP (pat, 0), 0) = braf_label;
3754 /* BARRIER_OR_LABEL is either a BARRIER or a CODE_LABEL immediately following
3755 a barrier. Return the base 2 logarithm of the desired alignment. */
3757 barrier_align (rtx barrier_or_label)
3759 rtx next = next_real_insn (barrier_or_label), pat, prev;
3760 int slot, credit, jump_to_next = 0;
3762 if (! next)
3763 return 0;
3765 pat = PATTERN (next);
3767 if (GET_CODE (pat) == ADDR_DIFF_VEC)
3768 return 2;
3770 if (GET_CODE (pat) == UNSPEC_VOLATILE && XINT (pat, 1) == UNSPECV_ALIGN)
3771 /* This is a barrier in front of a constant table. */
3772 return 0;
3774 prev = prev_real_insn (barrier_or_label);
3775 if (GET_CODE (PATTERN (prev)) == ADDR_DIFF_VEC)
3777 pat = PATTERN (prev);
3778 /* If this is a very small table, we want to keep the alignment after
3779 the table to the minimum for proper code alignment. */
3780 return ((TARGET_SMALLCODE
3781 || ((unsigned) XVECLEN (pat, 1) * GET_MODE_SIZE (GET_MODE (pat))
3782 <= (unsigned) 1 << (CACHE_LOG - 2)))
3783 ? 1 << TARGET_SHMEDIA : align_jumps_log);
3786 if (TARGET_SMALLCODE)
3787 return 0;
3789 if (! TARGET_SH2 || ! optimize)
3790 return align_jumps_log;
3792 /* When fixing up pcloads, a constant table might be inserted just before
3793 the basic block that ends with the barrier. Thus, we can't trust the
3794 instruction lengths before that. */
3795 if (mdep_reorg_phase > SH_FIXUP_PCLOAD)
3797 /* Check if there is an immediately preceding branch to the insn beyond
3798 the barrier. We must weight the cost of discarding useful information
3799 from the current cache line when executing this branch and there is
3800 an alignment, against that of fetching unneeded insn in front of the
3801 branch target when there is no alignment. */
3803 /* There are two delay_slot cases to consider. One is the simple case
3804 where the preceding branch is to the insn beyond the barrier (simple
3805 delay slot filling), and the other is where the preceding branch has
3806 a delay slot that is a duplicate of the insn after the barrier
3807 (fill_eager_delay_slots) and the branch is to the insn after the insn
3808 after the barrier. */
3810 /* PREV is presumed to be the JUMP_INSN for the barrier under
3811 investigation. Skip to the insn before it. */
3812 prev = prev_real_insn (prev);
3814 for (slot = 2, credit = (1 << (CACHE_LOG - 2)) + 2;
3815 credit >= 0 && prev && GET_CODE (prev) == INSN;
3816 prev = prev_real_insn (prev))
3818 jump_to_next = 0;
3819 if (GET_CODE (PATTERN (prev)) == USE
3820 || GET_CODE (PATTERN (prev)) == CLOBBER)
3821 continue;
3822 if (GET_CODE (PATTERN (prev)) == SEQUENCE)
3824 prev = XVECEXP (PATTERN (prev), 0, 1);
3825 if (INSN_UID (prev) == INSN_UID (next))
3827 /* Delay slot was filled with insn at jump target. */
3828 jump_to_next = 1;
3829 continue;
3833 if (slot &&
3834 get_attr_in_delay_slot (prev) == IN_DELAY_SLOT_YES)
3835 slot = 0;
3836 credit -= get_attr_length (prev);
3838 if (prev
3839 && GET_CODE (prev) == JUMP_INSN
3840 && JUMP_LABEL (prev))
3842 rtx x;
3843 if (jump_to_next
3844 || next_real_insn (JUMP_LABEL (prev)) == next
3845 /* If relax_delay_slots() decides NEXT was redundant
3846 with some previous instruction, it will have
3847 redirected PREV's jump to the following insn. */
3848 || JUMP_LABEL (prev) == next_nonnote_insn (next)
3849 /* There is no upper bound on redundant instructions
3850 that might have been skipped, but we must not put an
3851 alignment where none had been before. */
3852 || (x = (NEXT_INSN (NEXT_INSN (PREV_INSN (prev)))),
3853 (INSN_P (x)
3854 && (INSN_CODE (x) == CODE_FOR_block_branch_redirect
3855 || INSN_CODE (x) == CODE_FOR_indirect_jump_scratch
3856 || INSN_CODE (x) == CODE_FOR_stuff_delay_slot))))
3858 rtx pat = PATTERN (prev);
3859 if (GET_CODE (pat) == PARALLEL)
3860 pat = XVECEXP (pat, 0, 0);
3861 if (credit - slot >= (GET_CODE (SET_SRC (pat)) == PC ? 2 : 0))
3862 return 0;
3867 return align_jumps_log;
3870 /* If we are inside a phony loop, almost any kind of label can turn up as the
3871 first one in the loop. Aligning a braf label causes incorrect switch
3872 destination addresses; we can detect braf labels because they are
3873 followed by a BARRIER.
3874 Applying loop alignment to small constant or switch tables is a waste
3875 of space, so we suppress this too. */
3877 sh_loop_align (rtx label)
3879 rtx next = label;
3882 next = next_nonnote_insn (next);
3883 while (next && GET_CODE (next) == CODE_LABEL);
3885 if (! next
3886 || ! INSN_P (next)
3887 || GET_CODE (PATTERN (next)) == ADDR_DIFF_VEC
3888 || recog_memoized (next) == CODE_FOR_consttable_2)
3889 return 0;
3891 return align_loops_log;
3894 /* Do a final pass over the function, just before delayed branch
3895 scheduling. */
3897 static void
3898 sh_reorg (void)
3900 rtx first, insn, mova = NULL_RTX;
3901 int num_mova;
3902 rtx r0_rtx = gen_rtx_REG (Pmode, 0);
3903 rtx r0_inc_rtx = gen_rtx_POST_INC (Pmode, r0_rtx);
3905 first = get_insns ();
3907 /* We must split call insns before introducing `mova's. If we're
3908 optimizing, they'll have already been split. Otherwise, make
3909 sure we don't split them too late. */
3910 if (! optimize)
3911 split_all_insns_noflow ();
3913 if (TARGET_SHMEDIA)
3914 return;
3916 /* If relaxing, generate pseudo-ops to associate function calls with
3917 the symbols they call. It does no harm to not generate these
3918 pseudo-ops. However, when we can generate them, it enables to
3919 linker to potentially relax the jsr to a bsr, and eliminate the
3920 register load and, possibly, the constant pool entry. */
3922 mdep_reorg_phase = SH_INSERT_USES_LABELS;
3923 if (TARGET_RELAX)
3925 /* Remove all REG_LABEL notes. We want to use them for our own
3926 purposes. This works because none of the remaining passes
3927 need to look at them.
3929 ??? But it may break in the future. We should use a machine
3930 dependent REG_NOTE, or some other approach entirely. */
3931 for (insn = first; insn; insn = NEXT_INSN (insn))
3933 if (INSN_P (insn))
3935 rtx note;
3937 while ((note = find_reg_note (insn, REG_LABEL, NULL_RTX)) != 0)
3938 remove_note (insn, note);
3942 for (insn = first; insn; insn = NEXT_INSN (insn))
3944 rtx pattern, reg, link, set, scan, dies, label;
3945 int rescan = 0, foundinsn = 0;
3947 if (GET_CODE (insn) == CALL_INSN)
3949 pattern = PATTERN (insn);
3951 if (GET_CODE (pattern) == PARALLEL)
3952 pattern = XVECEXP (pattern, 0, 0);
3953 if (GET_CODE (pattern) == SET)
3954 pattern = SET_SRC (pattern);
3956 if (GET_CODE (pattern) != CALL
3957 || GET_CODE (XEXP (pattern, 0)) != MEM)
3958 continue;
3960 reg = XEXP (XEXP (pattern, 0), 0);
3962 else
3964 reg = sfunc_uses_reg (insn);
3965 if (! reg)
3966 continue;
3969 if (GET_CODE (reg) != REG)
3970 continue;
3972 /* This is a function call via REG. If the only uses of REG
3973 between the time that it is set and the time that it dies
3974 are in function calls, then we can associate all the
3975 function calls with the setting of REG. */
3977 for (link = LOG_LINKS (insn); link; link = XEXP (link, 1))
3979 if (REG_NOTE_KIND (link) != 0)
3980 continue;
3981 set = single_set (XEXP (link, 0));
3982 if (set && rtx_equal_p (reg, SET_DEST (set)))
3984 link = XEXP (link, 0);
3985 break;
3989 if (! link)
3991 /* ??? Sometimes global register allocation will have
3992 deleted the insn pointed to by LOG_LINKS. Try
3993 scanning backward to find where the register is set. */
3994 for (scan = PREV_INSN (insn);
3995 scan && GET_CODE (scan) != CODE_LABEL;
3996 scan = PREV_INSN (scan))
3998 if (! INSN_P (scan))
3999 continue;
4001 if (! reg_mentioned_p (reg, scan))
4002 continue;
4004 if (noncall_uses_reg (reg, scan, &set))
4005 break;
4007 if (set)
4009 link = scan;
4010 break;
4015 if (! link)
4016 continue;
4018 /* The register is set at LINK. */
4020 /* We can only optimize the function call if the register is
4021 being set to a symbol. In theory, we could sometimes
4022 optimize calls to a constant location, but the assembler
4023 and linker do not support that at present. */
4024 if (GET_CODE (SET_SRC (set)) != SYMBOL_REF
4025 && GET_CODE (SET_SRC (set)) != LABEL_REF)
4026 continue;
4028 /* Scan forward from LINK to the place where REG dies, and
4029 make sure that the only insns which use REG are
4030 themselves function calls. */
4032 /* ??? This doesn't work for call targets that were allocated
4033 by reload, since there may not be a REG_DEAD note for the
4034 register. */
4036 dies = NULL_RTX;
4037 for (scan = NEXT_INSN (link); scan; scan = NEXT_INSN (scan))
4039 rtx scanset;
4041 /* Don't try to trace forward past a CODE_LABEL if we haven't
4042 seen INSN yet. Ordinarily, we will only find the setting insn
4043 in LOG_LINKS if it is in the same basic block. However,
4044 cross-jumping can insert code labels in between the load and
4045 the call, and can result in situations where a single call
4046 insn may have two targets depending on where we came from. */
4048 if (GET_CODE (scan) == CODE_LABEL && ! foundinsn)
4049 break;
4051 if (! INSN_P (scan))
4052 continue;
4054 /* Don't try to trace forward past a JUMP. To optimize
4055 safely, we would have to check that all the
4056 instructions at the jump destination did not use REG. */
4058 if (GET_CODE (scan) == JUMP_INSN)
4059 break;
4061 if (! reg_mentioned_p (reg, scan))
4062 continue;
4064 if (noncall_uses_reg (reg, scan, &scanset))
4065 break;
4067 if (scan == insn)
4068 foundinsn = 1;
4070 if (scan != insn
4071 && (GET_CODE (scan) == CALL_INSN || sfunc_uses_reg (scan)))
4073 /* There is a function call to this register other
4074 than the one we are checking. If we optimize
4075 this call, we need to rescan again below. */
4076 rescan = 1;
4079 /* ??? We shouldn't have to worry about SCANSET here.
4080 We should just be able to check for a REG_DEAD note
4081 on a function call. However, the REG_DEAD notes are
4082 apparently not dependable around libcalls; c-torture
4083 execute/920501-2 is a test case. If SCANSET is set,
4084 then this insn sets the register, so it must have
4085 died earlier. Unfortunately, this will only handle
4086 the cases in which the register is, in fact, set in a
4087 later insn. */
4089 /* ??? We shouldn't have to use FOUNDINSN here.
4090 However, the LOG_LINKS fields are apparently not
4091 entirely reliable around libcalls;
4092 newlib/libm/math/e_pow.c is a test case. Sometimes
4093 an insn will appear in LOG_LINKS even though it is
4094 not the most recent insn which sets the register. */
4096 if (foundinsn
4097 && (scanset
4098 || find_reg_note (scan, REG_DEAD, reg)))
4100 dies = scan;
4101 break;
4105 if (! dies)
4107 /* Either there was a branch, or some insn used REG
4108 other than as a function call address. */
4109 continue;
4112 /* Create a code label, and put it in a REG_LABEL note on
4113 the insn which sets the register, and on each call insn
4114 which uses the register. In final_prescan_insn we look
4115 for the REG_LABEL notes, and output the appropriate label
4116 or pseudo-op. */
4118 label = gen_label_rtx ();
4119 REG_NOTES (link) = gen_rtx_INSN_LIST (REG_LABEL, label,
4120 REG_NOTES (link));
4121 REG_NOTES (insn) = gen_rtx_INSN_LIST (REG_LABEL, label,
4122 REG_NOTES (insn));
4123 if (rescan)
4125 scan = link;
4128 rtx reg2;
4130 scan = NEXT_INSN (scan);
4131 if (scan != insn
4132 && ((GET_CODE (scan) == CALL_INSN
4133 && reg_mentioned_p (reg, scan))
4134 || ((reg2 = sfunc_uses_reg (scan))
4135 && REGNO (reg2) == REGNO (reg))))
4136 REG_NOTES (scan)
4137 = gen_rtx_INSN_LIST (REG_LABEL, label, REG_NOTES (scan));
4139 while (scan != dies);
4144 if (TARGET_SH2)
4145 fixup_addr_diff_vecs (first);
4147 if (optimize)
4149 mdep_reorg_phase = SH_SHORTEN_BRANCHES0;
4150 shorten_branches (first);
4152 /* Scan the function looking for move instructions which have to be
4153 changed to pc-relative loads and insert the literal tables. */
4155 mdep_reorg_phase = SH_FIXUP_PCLOAD;
4156 for (insn = first, num_mova = 0; insn; insn = NEXT_INSN (insn))
4158 if (mova_p (insn))
4160 /* ??? basic block reordering can move a switch table dispatch
4161 below the switch table. Check if that has happened.
4162 We only have the addresses available when optimizing; but then,
4163 this check shouldn't be needed when not optimizing. */
4164 rtx label_ref = XVECEXP (SET_SRC (PATTERN (insn)), 0, 0);
4165 if (optimize
4166 && (INSN_ADDRESSES (INSN_UID (insn))
4167 > INSN_ADDRESSES (INSN_UID (XEXP (label_ref, 0)))))
4169 /* Change the mova into a load.
4170 broken_move will then return true for it. */
4171 fixup_mova (insn);
4173 else if (! num_mova++)
4174 mova = insn;
4176 else if (GET_CODE (insn) == JUMP_INSN
4177 && GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC
4178 && num_mova)
4180 rtx scan;
4181 int total;
4183 num_mova--;
4185 /* Some code might have been inserted between the mova and
4186 its ADDR_DIFF_VEC. Check if the mova is still in range. */
4187 for (scan = mova, total = 0; scan != insn; scan = NEXT_INSN (scan))
4188 total += get_attr_length (scan);
4190 /* range of mova is 1020, add 4 because pc counts from address of
4191 second instruction after this one, subtract 2 in case pc is 2
4192 byte aligned. Possible alignment needed for the ADDR_DIFF_VEC
4193 cancels out with alignment effects of the mova itself. */
4194 if (total > 1022)
4196 /* Change the mova into a load, and restart scanning
4197 there. broken_move will then return true for mova. */
4198 fixup_mova (mova);
4199 insn = mova;
4202 if (broken_move (insn)
4203 || (GET_CODE (insn) == INSN
4204 && recog_memoized (insn) == CODE_FOR_casesi_worker_2))
4206 rtx scan;
4207 /* Scan ahead looking for a barrier to stick the constant table
4208 behind. */
4209 rtx barrier = find_barrier (num_mova, mova, insn);
4210 rtx last_float_move = NULL_RTX, last_float = 0, *last_float_addr = NULL;
4211 int need_aligned_label = 0;
4213 if (num_mova && ! mova_p (mova))
4215 /* find_barrier had to change the first mova into a
4216 pcload; thus, we have to start with this new pcload. */
4217 insn = mova;
4218 num_mova = 0;
4220 /* Now find all the moves between the points and modify them. */
4221 for (scan = insn; scan != barrier; scan = NEXT_INSN (scan))
4223 if (GET_CODE (scan) == CODE_LABEL)
4224 last_float = 0;
4225 if (GET_CODE (scan) == INSN
4226 && recog_memoized (scan) == CODE_FOR_casesi_worker_2)
4227 need_aligned_label = 1;
4228 if (broken_move (scan))
4230 rtx *patp = &PATTERN (scan), pat = *patp;
4231 rtx src, dst;
4232 rtx lab;
4233 rtx newsrc;
4234 enum machine_mode mode;
4236 if (GET_CODE (pat) == PARALLEL)
4237 patp = &XVECEXP (pat, 0, 0), pat = *patp;
4238 src = SET_SRC (pat);
4239 dst = SET_DEST (pat);
4240 mode = GET_MODE (dst);
4242 if (mode == SImode && hi_const (src)
4243 && REGNO (dst) != FPUL_REG)
4245 int offset = 0;
4247 mode = HImode;
4248 while (GET_CODE (dst) == SUBREG)
4250 offset += subreg_regno_offset (REGNO (SUBREG_REG (dst)),
4251 GET_MODE (SUBREG_REG (dst)),
4252 SUBREG_BYTE (dst),
4253 GET_MODE (dst));
4254 dst = SUBREG_REG (dst);
4256 dst = gen_rtx_REG (HImode, REGNO (dst) + offset);
4258 if (GET_CODE (dst) == REG && FP_ANY_REGISTER_P (REGNO (dst)))
4260 /* This must be an insn that clobbers r0. */
4261 rtx *clobberp = &XVECEXP (PATTERN (scan), 0,
4262 XVECLEN (PATTERN (scan), 0)
4263 - 1);
4264 rtx clobber = *clobberp;
4266 if (GET_CODE (clobber) != CLOBBER
4267 || ! rtx_equal_p (XEXP (clobber, 0), r0_rtx))
4268 abort ();
4270 if (last_float
4271 && reg_set_between_p (r0_rtx, last_float_move, scan))
4272 last_float = 0;
4273 if (last_float
4274 && TARGET_SHCOMPACT
4275 && GET_MODE_SIZE (mode) != 4
4276 && GET_MODE_SIZE (GET_MODE (last_float)) == 4)
4277 last_float = 0;
4278 lab = add_constant (src, mode, last_float);
4279 if (lab)
4280 emit_insn_before (gen_mova (lab), scan);
4281 else
4283 /* There will be a REG_UNUSED note for r0 on
4284 LAST_FLOAT_MOVE; we have to change it to REG_INC,
4285 lest reorg:mark_target_live_regs will not
4286 consider r0 to be used, and we end up with delay
4287 slot insn in front of SCAN that clobbers r0. */
4288 rtx note
4289 = find_regno_note (last_float_move, REG_UNUSED, 0);
4291 /* If we are not optimizing, then there may not be
4292 a note. */
4293 if (note)
4294 PUT_MODE (note, REG_INC);
4296 *last_float_addr = r0_inc_rtx;
4298 last_float_move = scan;
4299 last_float = src;
4300 newsrc = gen_rtx_MEM (mode,
4301 (((TARGET_SH4 && ! TARGET_FMOVD)
4302 || REGNO (dst) == FPUL_REG)
4303 ? r0_inc_rtx
4304 : r0_rtx));
4305 last_float_addr = &XEXP (newsrc, 0);
4307 /* Remove the clobber of r0. */
4308 *clobberp = gen_rtx_CLOBBER (GET_MODE (clobber),
4309 gen_rtx_SCRATCH (Pmode));
4310 RTX_UNCHANGING_P (newsrc) = 1;
4312 /* This is a mova needing a label. Create it. */
4313 else if (GET_CODE (src) == UNSPEC
4314 && XINT (src, 1) == UNSPEC_MOVA
4315 && GET_CODE (XVECEXP (src, 0, 0)) == CONST)
4317 lab = add_constant (XVECEXP (src, 0, 0), mode, 0);
4318 newsrc = gen_rtx_LABEL_REF (VOIDmode, lab);
4319 newsrc = gen_rtx_UNSPEC (SImode,
4320 gen_rtvec (1, newsrc),
4321 UNSPEC_MOVA);
4323 else
4325 lab = add_constant (src, mode, 0);
4326 newsrc = gen_rtx_MEM (mode,
4327 gen_rtx_LABEL_REF (VOIDmode, lab));
4328 RTX_UNCHANGING_P (newsrc) = 1;
4330 *patp = gen_rtx_SET (VOIDmode, dst, newsrc);
4331 INSN_CODE (scan) = -1;
4334 dump_table (need_aligned_label ? insn : 0, barrier);
4335 insn = barrier;
4339 mdep_reorg_phase = SH_SHORTEN_BRANCHES1;
4340 INSN_ADDRESSES_FREE ();
4341 split_branches (first);
4343 /* The INSN_REFERENCES_ARE_DELAYED in sh.h is problematic because it
4344 also has an effect on the register that holds the address of the sfunc.
4345 Insert an extra dummy insn in front of each sfunc that pretends to
4346 use this register. */
4347 if (flag_delayed_branch)
4349 for (insn = first; insn; insn = NEXT_INSN (insn))
4351 rtx reg = sfunc_uses_reg (insn);
4353 if (! reg)
4354 continue;
4355 emit_insn_before (gen_use_sfunc_addr (reg), insn);
4358 #if 0
4359 /* fpscr is not actually a user variable, but we pretend it is for the
4360 sake of the previous optimization passes, since we want it handled like
4361 one. However, we don't have any debugging information for it, so turn
4362 it into a non-user variable now. */
4363 if (TARGET_SH4)
4364 REG_USERVAR_P (get_fpscr_rtx ()) = 0;
4365 #endif
4366 mdep_reorg_phase = SH_AFTER_MDEP_REORG;
4370 get_dest_uid (rtx label, int max_uid)
4372 rtx dest = next_real_insn (label);
4373 int dest_uid;
4374 if (! dest)
4375 /* This can happen for an undefined label. */
4376 return 0;
4377 dest_uid = INSN_UID (dest);
4378 /* If this is a newly created branch redirection blocking instruction,
4379 we cannot index the branch_uid or insn_addresses arrays with its
4380 uid. But then, we won't need to, because the actual destination is
4381 the following branch. */
4382 while (dest_uid >= max_uid)
4384 dest = NEXT_INSN (dest);
4385 dest_uid = INSN_UID (dest);
4387 if (GET_CODE (dest) == JUMP_INSN && GET_CODE (PATTERN (dest)) == RETURN)
4388 return 0;
4389 return dest_uid;
4392 /* Split condbranches that are out of range. Also add clobbers for
4393 scratch registers that are needed in far jumps.
4394 We do this before delay slot scheduling, so that it can take our
4395 newly created instructions into account. It also allows us to
4396 find branches with common targets more easily. */
4398 static void
4399 split_branches (rtx first)
4401 rtx insn;
4402 struct far_branch **uid_branch, *far_branch_list = 0;
4403 int max_uid = get_max_uid ();
4405 /* Find out which branches are out of range. */
4406 shorten_branches (first);
4408 uid_branch = (struct far_branch **) alloca (max_uid * sizeof *uid_branch);
4409 memset ((char *) uid_branch, 0, max_uid * sizeof *uid_branch);
4411 for (insn = first; insn; insn = NEXT_INSN (insn))
4412 if (! INSN_P (insn))
4413 continue;
4414 else if (INSN_DELETED_P (insn))
4416 /* Shorten_branches would split this instruction again,
4417 so transform it into a note. */
4418 PUT_CODE (insn, NOTE);
4419 NOTE_LINE_NUMBER (insn) = NOTE_INSN_DELETED;
4420 NOTE_SOURCE_FILE (insn) = 0;
4422 else if (GET_CODE (insn) == JUMP_INSN
4423 /* Don't mess with ADDR_DIFF_VEC */
4424 && (GET_CODE (PATTERN (insn)) == SET
4425 || GET_CODE (PATTERN (insn)) == RETURN))
4427 enum attr_type type = get_attr_type (insn);
4428 if (type == TYPE_CBRANCH)
4430 rtx next, beyond;
4432 if (get_attr_length (insn) > 4)
4434 rtx src = SET_SRC (PATTERN (insn));
4435 rtx olabel = XEXP (XEXP (src, 1), 0);
4436 int addr = INSN_ADDRESSES (INSN_UID (insn));
4437 rtx label = 0;
4438 int dest_uid = get_dest_uid (olabel, max_uid);
4439 struct far_branch *bp = uid_branch[dest_uid];
4441 /* redirect_jump needs a valid JUMP_LABEL, and it might delete
4442 the label if the LABEL_NUSES count drops to zero. There is
4443 always a jump_optimize pass that sets these values, but it
4444 proceeds to delete unreferenced code, and then if not
4445 optimizing, to un-delete the deleted instructions, thus
4446 leaving labels with too low uses counts. */
4447 if (! optimize)
4449 JUMP_LABEL (insn) = olabel;
4450 LABEL_NUSES (olabel)++;
4452 if (! bp)
4454 bp = (struct far_branch *) alloca (sizeof *bp);
4455 uid_branch[dest_uid] = bp;
4456 bp->prev = far_branch_list;
4457 far_branch_list = bp;
4458 bp->far_label
4459 = XEXP (XEXP (SET_SRC (PATTERN (insn)), 1), 0);
4460 LABEL_NUSES (bp->far_label)++;
4462 else
4464 label = bp->near_label;
4465 if (! label && bp->address - addr >= CONDJUMP_MIN)
4467 rtx block = bp->insert_place;
4469 if (GET_CODE (PATTERN (block)) == RETURN)
4470 block = PREV_INSN (block);
4471 else
4472 block = gen_block_redirect (block,
4473 bp->address, 2);
4474 label = emit_label_after (gen_label_rtx (),
4475 PREV_INSN (block));
4476 bp->near_label = label;
4478 else if (label && ! NEXT_INSN (label))
4480 if (addr + 2 - bp->address <= CONDJUMP_MAX)
4481 bp->insert_place = insn;
4482 else
4483 gen_far_branch (bp);
4486 if (! label
4487 || (NEXT_INSN (label) && bp->address - addr < CONDJUMP_MIN))
4489 bp->near_label = label = gen_label_rtx ();
4490 bp->insert_place = insn;
4491 bp->address = addr;
4493 if (! redirect_jump (insn, label, 1))
4494 abort ();
4496 else
4498 /* get_attr_length (insn) == 2 */
4499 /* Check if we have a pattern where reorg wants to redirect
4500 the branch to a label from an unconditional branch that
4501 is too far away. */
4502 /* We can't use JUMP_LABEL here because it might be undefined
4503 when not optimizing. */
4504 /* A syntax error might cause beyond to be NULL_RTX. */
4505 beyond
4506 = next_active_insn (XEXP (XEXP (SET_SRC (PATTERN (insn)), 1),
4507 0));
4509 if (beyond
4510 && (GET_CODE (beyond) == JUMP_INSN
4511 || ((beyond = next_active_insn (beyond))
4512 && GET_CODE (beyond) == JUMP_INSN))
4513 && GET_CODE (PATTERN (beyond)) == SET
4514 && recog_memoized (beyond) == CODE_FOR_jump_compact
4515 && ((INSN_ADDRESSES
4516 (INSN_UID (XEXP (SET_SRC (PATTERN (beyond)), 0)))
4517 - INSN_ADDRESSES (INSN_UID (insn)) + (unsigned) 252)
4518 > 252 + 258 + 2))
4519 gen_block_redirect (beyond,
4520 INSN_ADDRESSES (INSN_UID (beyond)), 1);
4523 next = next_active_insn (insn);
4525 if ((GET_CODE (next) == JUMP_INSN
4526 || GET_CODE (next = next_active_insn (next)) == JUMP_INSN)
4527 && GET_CODE (PATTERN (next)) == SET
4528 && recog_memoized (next) == CODE_FOR_jump_compact
4529 && ((INSN_ADDRESSES
4530 (INSN_UID (XEXP (SET_SRC (PATTERN (next)), 0)))
4531 - INSN_ADDRESSES (INSN_UID (insn)) + (unsigned) 252)
4532 > 252 + 258 + 2))
4533 gen_block_redirect (next, INSN_ADDRESSES (INSN_UID (next)), 1);
4535 else if (type == TYPE_JUMP || type == TYPE_RETURN)
4537 int addr = INSN_ADDRESSES (INSN_UID (insn));
4538 rtx far_label = 0;
4539 int dest_uid = 0;
4540 struct far_branch *bp;
4542 if (type == TYPE_JUMP)
4544 far_label = XEXP (SET_SRC (PATTERN (insn)), 0);
4545 dest_uid = get_dest_uid (far_label, max_uid);
4546 if (! dest_uid)
4548 /* Parse errors can lead to labels outside
4549 the insn stream. */
4550 if (! NEXT_INSN (far_label))
4551 continue;
4553 if (! optimize)
4555 JUMP_LABEL (insn) = far_label;
4556 LABEL_NUSES (far_label)++;
4558 redirect_jump (insn, NULL_RTX, 1);
4559 far_label = 0;
4562 bp = uid_branch[dest_uid];
4563 if (! bp)
4565 bp = (struct far_branch *) alloca (sizeof *bp);
4566 uid_branch[dest_uid] = bp;
4567 bp->prev = far_branch_list;
4568 far_branch_list = bp;
4569 bp->near_label = 0;
4570 bp->far_label = far_label;
4571 if (far_label)
4572 LABEL_NUSES (far_label)++;
4574 else if (bp->near_label && ! NEXT_INSN (bp->near_label))
4575 if (addr - bp->address <= CONDJUMP_MAX)
4576 emit_label_after (bp->near_label, PREV_INSN (insn));
4577 else
4579 gen_far_branch (bp);
4580 bp->near_label = 0;
4582 else
4583 bp->near_label = 0;
4584 bp->address = addr;
4585 bp->insert_place = insn;
4586 if (! far_label)
4587 emit_insn_before (gen_block_branch_redirect (const0_rtx), insn);
4588 else
4589 gen_block_redirect (insn, addr, bp->near_label ? 2 : 0);
4592 /* Generate all pending far branches,
4593 and free our references to the far labels. */
4594 while (far_branch_list)
4596 if (far_branch_list->near_label
4597 && ! NEXT_INSN (far_branch_list->near_label))
4598 gen_far_branch (far_branch_list);
4599 if (optimize
4600 && far_branch_list->far_label
4601 && ! --LABEL_NUSES (far_branch_list->far_label))
4602 delete_insn (far_branch_list->far_label);
4603 far_branch_list = far_branch_list->prev;
4606 /* Instruction length information is no longer valid due to the new
4607 instructions that have been generated. */
4608 init_insn_lengths ();
4611 /* Dump out instruction addresses, which is useful for debugging the
4612 constant pool table stuff.
4614 If relaxing, output the label and pseudo-ops used to link together
4615 calls and the instruction which set the registers. */
4617 /* ??? The addresses printed by this routine for insns are nonsense for
4618 insns which are inside of a sequence where none of the inner insns have
4619 variable length. This is because the second pass of shorten_branches
4620 does not bother to update them. */
4622 void
4623 final_prescan_insn (rtx insn, rtx *opvec ATTRIBUTE_UNUSED,
4624 int noperands ATTRIBUTE_UNUSED)
4626 if (TARGET_DUMPISIZE)
4627 fprintf (asm_out_file, "\n! at %04x\n", INSN_ADDRESSES (INSN_UID (insn)));
4629 if (TARGET_RELAX)
4631 rtx note;
4633 note = find_reg_note (insn, REG_LABEL, NULL_RTX);
4634 if (note)
4636 rtx pattern;
4638 pattern = PATTERN (insn);
4639 if (GET_CODE (pattern) == PARALLEL)
4640 pattern = XVECEXP (pattern, 0, 0);
4641 if (GET_CODE (pattern) == CALL
4642 || (GET_CODE (pattern) == SET
4643 && (GET_CODE (SET_SRC (pattern)) == CALL
4644 || get_attr_type (insn) == TYPE_SFUNC)))
4645 asm_fprintf (asm_out_file, "\t.uses %LL%d\n",
4646 CODE_LABEL_NUMBER (XEXP (note, 0)));
4647 else if (GET_CODE (pattern) == SET)
4648 (*targetm.asm_out.internal_label) (asm_out_file, "L",
4649 CODE_LABEL_NUMBER (XEXP (note, 0)));
4650 else
4651 abort ();
4656 /* Dump out any constants accumulated in the final pass. These will
4657 only be labels. */
4659 const char *
4660 output_jump_label_table (void)
4662 int i;
4664 if (pool_size)
4666 fprintf (asm_out_file, "\t.align 2\n");
4667 for (i = 0; i < pool_size; i++)
4669 pool_node *p = &pool_vector[i];
4671 (*targetm.asm_out.internal_label) (asm_out_file, "L",
4672 CODE_LABEL_NUMBER (p->label));
4673 output_asm_insn (".long %O0", &p->value);
4675 pool_size = 0;
4678 return "";
4681 /* A full frame looks like:
4683 arg-5
4684 arg-4
4685 [ if current_function_anonymous_args
4686 arg-3
4687 arg-2
4688 arg-1
4689 arg-0 ]
4690 saved-fp
4691 saved-r10
4692 saved-r11
4693 saved-r12
4694 saved-pr
4695 local-n
4697 local-1
4698 local-0 <- fp points here. */
4700 /* Number of bytes pushed for anonymous args, used to pass information
4701 between expand_prologue and expand_epilogue. */
4703 /* Adjust the stack by SIZE bytes. REG holds the rtl of the register to be
4704 adjusted. If epilogue_p is zero, this is for a prologue; otherwise, it's
4705 for an epilogue and a negative value means that it's for a sibcall
4706 epilogue. If LIVE_REGS_MASK is nonzero, it points to a HARD_REG_SET of
4707 all the registers that are about to be restored, and hence dead. */
4709 static void
4710 output_stack_adjust (int size, rtx reg, int epilogue_p,
4711 HARD_REG_SET *live_regs_mask)
4713 rtx (*emit_fn) (rtx) = epilogue_p ? &emit_insn : &frame_insn;
4714 if (size)
4716 HOST_WIDE_INT align = STACK_BOUNDARY / BITS_PER_UNIT;
4718 if (size % align)
4719 abort ();
4721 if (CONST_OK_FOR_ADD (size))
4722 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size)));
4723 /* Try to do it with two partial adjustments; however, we must make
4724 sure that the stack is properly aligned at all times, in case
4725 an interrupt occurs between the two partial adjustments. */
4726 else if (CONST_OK_FOR_ADD (size / 2 & -align)
4727 && CONST_OK_FOR_ADD (size - (size / 2 & -align)))
4729 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size / 2 & -align)));
4730 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size - (size / 2 & -align))));
4732 else
4734 rtx const_reg;
4735 rtx insn;
4736 int temp = epilogue_p ? 7 : (TARGET_SH5 ? 0 : 1);
4737 int i;
4739 /* If TEMP is invalid, we could temporarily save a general
4740 register to MACL. However, there is currently no need
4741 to handle this case, so just abort when we see it. */
4742 if (epilogue_p < 0
4743 || current_function_interrupt
4744 || ! call_used_regs[temp] || fixed_regs[temp])
4745 temp = -1;
4746 if (temp < 0 && ! current_function_interrupt
4747 && (TARGET_SHMEDIA || epilogue_p >= 0))
4749 HARD_REG_SET temps;
4750 COPY_HARD_REG_SET (temps, call_used_reg_set);
4751 AND_COMPL_HARD_REG_SET (temps, call_fixed_reg_set);
4752 if (epilogue_p > 0)
4754 int nreg = 0;
4755 if (current_function_return_rtx)
4757 enum machine_mode mode;
4758 mode = GET_MODE (current_function_return_rtx);
4759 if (BASE_RETURN_VALUE_REG (mode) == FIRST_RET_REG)
4760 nreg = HARD_REGNO_NREGS (FIRST_RET_REG, mode);
4762 for (i = 0; i < nreg; i++)
4763 CLEAR_HARD_REG_BIT (temps, FIRST_RET_REG + i);
4764 if (current_function_calls_eh_return)
4766 CLEAR_HARD_REG_BIT (temps, EH_RETURN_STACKADJ_REGNO);
4767 for (i = 0; i <= 3; i++)
4768 CLEAR_HARD_REG_BIT (temps, EH_RETURN_DATA_REGNO (i));
4771 if (TARGET_SHMEDIA && epilogue_p < 0)
4772 for (i = FIRST_TARGET_REG; i <= LAST_TARGET_REG; i++)
4773 CLEAR_HARD_REG_BIT (temps, i);
4774 if (epilogue_p <= 0)
4776 for (i = FIRST_PARM_REG;
4777 i < FIRST_PARM_REG + NPARM_REGS (SImode); i++)
4778 CLEAR_HARD_REG_BIT (temps, i);
4779 if (cfun->static_chain_decl != NULL)
4780 CLEAR_HARD_REG_BIT (temps, STATIC_CHAIN_REGNUM);
4782 temp = scavenge_reg (&temps);
4784 if (temp < 0 && live_regs_mask)
4785 temp = scavenge_reg (live_regs_mask);
4786 if (temp < 0)
4788 /* If we reached here, the most likely case is the (sibcall)
4789 epilogue for non SHmedia. Put a special push/pop sequence
4790 for such case as the last resort. This looks lengthy but
4791 would not be problem because it seems to be very rare. */
4792 if (! TARGET_SHMEDIA && epilogue_p)
4794 rtx adj_reg, tmp_reg, mem;
4796 /* ??? There is still the slight possibility that r4 or r5
4797 have been reserved as fixed registers or assigned as
4798 global registers, and they change during an interrupt.
4799 There are possible ways to handle this:
4800 - If we are adjusting the frame pointer (r14), we can do
4801 with a single temp register and an ordinary push / pop
4802 on the stack.
4803 - Grab any call-used or call-saved registers (i.e. not
4804 fixed or globals) for the temps we need. We might
4805 also grab r14 if we are adjusting the stack pointer.
4806 If we can't find enough available registers, issue
4807 a diagnostic and abort - the user must have reserved
4808 way too many registers.
4809 But since all this is rather unlikely to happen and
4810 would require extra testing, we just abort if r4 / r5
4811 are not available. */
4812 if (fixed_regs[4] || fixed_regs[5]
4813 || global_regs[4] || global_regs[5])
4814 abort ();
4816 adj_reg = gen_rtx_REG (GET_MODE (reg), 4);
4817 tmp_reg = gen_rtx_REG (GET_MODE (reg), 5);
4818 emit_move_insn (gen_rtx_MEM (Pmode, reg), adj_reg);
4819 emit_insn (GEN_MOV (adj_reg, GEN_INT (size)));
4820 emit_insn (GEN_ADD3 (adj_reg, adj_reg, reg));
4821 mem = gen_rtx_MEM (Pmode, gen_rtx_PRE_DEC (Pmode, adj_reg));
4822 emit_move_insn (mem, tmp_reg);
4823 emit_move_insn (tmp_reg, gen_rtx_MEM (Pmode, reg));
4824 mem = gen_rtx_MEM (Pmode, gen_rtx_PRE_DEC (Pmode, adj_reg));
4825 emit_move_insn (mem, tmp_reg);
4826 emit_move_insn (reg, adj_reg);
4827 mem = gen_rtx_MEM (Pmode, gen_rtx_POST_INC (Pmode, reg));
4828 emit_move_insn (adj_reg, mem);
4829 mem = gen_rtx_MEM (Pmode, gen_rtx_POST_INC (Pmode, reg));
4830 emit_move_insn (tmp_reg, mem);
4831 return;
4833 else
4834 abort ();
4836 const_reg = gen_rtx_REG (GET_MODE (reg), temp);
4838 /* If SIZE is negative, subtract the positive value.
4839 This sometimes allows a constant pool entry to be shared
4840 between prologue and epilogue code. */
4841 if (size < 0)
4843 emit_insn (GEN_MOV (const_reg, GEN_INT (-size)));
4844 insn = emit_fn (GEN_SUB3 (reg, reg, const_reg));
4846 else
4848 emit_insn (GEN_MOV (const_reg, GEN_INT (size)));
4849 insn = emit_fn (GEN_ADD3 (reg, reg, const_reg));
4851 if (! epilogue_p)
4852 REG_NOTES (insn)
4853 = (gen_rtx_EXPR_LIST
4854 (REG_FRAME_RELATED_EXPR,
4855 gen_rtx_SET (VOIDmode, reg,
4856 gen_rtx_PLUS (SImode, reg, GEN_INT (size))),
4857 REG_NOTES (insn)));
4862 static rtx
4863 frame_insn (rtx x)
4865 x = emit_insn (x);
4866 RTX_FRAME_RELATED_P (x) = 1;
4867 return x;
4870 /* Output RTL to push register RN onto the stack. */
4872 static rtx
4873 push (int rn)
4875 rtx x;
4876 if (rn == FPUL_REG)
4877 x = gen_push_fpul ();
4878 else if (rn == FPSCR_REG)
4879 x = gen_push_fpscr ();
4880 else if (TARGET_SH4 && TARGET_FMOVD && ! TARGET_FPU_SINGLE
4881 && FP_OR_XD_REGISTER_P (rn))
4883 if (FP_REGISTER_P (rn) && (rn - FIRST_FP_REG) & 1)
4884 return NULL_RTX;
4885 x = gen_push_4 (gen_rtx_REG (DFmode, rn));
4887 else if (TARGET_SH2E && FP_REGISTER_P (rn))
4888 x = gen_push_e (gen_rtx_REG (SFmode, rn));
4889 else
4890 x = gen_push (gen_rtx_REG (SImode, rn));
4892 x = frame_insn (x);
4893 REG_NOTES (x)
4894 = gen_rtx_EXPR_LIST (REG_INC,
4895 gen_rtx_REG (SImode, STACK_POINTER_REGNUM), 0);
4896 return x;
4899 /* Output RTL to pop register RN from the stack. */
4901 static void
4902 pop (int rn)
4904 rtx x;
4905 if (rn == FPUL_REG)
4906 x = gen_pop_fpul ();
4907 else if (rn == FPSCR_REG)
4908 x = gen_pop_fpscr ();
4909 else if (TARGET_SH4 && TARGET_FMOVD && ! TARGET_FPU_SINGLE
4910 && FP_OR_XD_REGISTER_P (rn))
4912 if (FP_REGISTER_P (rn) && (rn - FIRST_FP_REG) & 1)
4913 return;
4914 x = gen_pop_4 (gen_rtx_REG (DFmode, rn));
4916 else if (TARGET_SH2E && FP_REGISTER_P (rn))
4917 x = gen_pop_e (gen_rtx_REG (SFmode, rn));
4918 else
4919 x = gen_pop (gen_rtx_REG (SImode, rn));
4921 x = emit_insn (x);
4922 REG_NOTES (x)
4923 = gen_rtx_EXPR_LIST (REG_INC,
4924 gen_rtx_REG (SImode, STACK_POINTER_REGNUM), 0);
4927 /* Generate code to push the regs specified in the mask. */
4929 static void
4930 push_regs (HARD_REG_SET *mask, int interrupt_handler)
4932 int i;
4933 int skip_fpscr = 0;
4935 /* Push PR last; this gives better latencies after the prologue, and
4936 candidates for the return delay slot when there are no general
4937 registers pushed. */
4938 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
4940 /* If this is an interrupt handler, and the SZ bit varies,
4941 and we have to push any floating point register, we need
4942 to switch to the correct precision first. */
4943 if (i == FIRST_FP_REG && interrupt_handler && TARGET_FMOVD
4944 && hard_regs_intersect_p (mask, &reg_class_contents[DF_REGS]))
4946 HARD_REG_SET unsaved;
4948 push (FPSCR_REG);
4949 COMPL_HARD_REG_SET (unsaved, *mask);
4950 fpscr_set_from_mem (NORMAL_MODE (FP_MODE), unsaved);
4951 skip_fpscr = 1;
4953 if (i != PR_REG
4954 && (i != FPSCR_REG || ! skip_fpscr)
4955 && TEST_HARD_REG_BIT (*mask, i))
4956 push (i);
4958 if (TEST_HARD_REG_BIT (*mask, PR_REG))
4959 push (PR_REG);
4962 /* Calculate how much extra space is needed to save all callee-saved
4963 target registers.
4964 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
4966 static int
4967 shmedia_target_regs_stack_space (HARD_REG_SET *live_regs_mask)
4969 int reg;
4970 int stack_space = 0;
4971 int interrupt_handler = sh_cfun_interrupt_handler_p ();
4973 for (reg = LAST_TARGET_REG; reg >= FIRST_TARGET_REG; reg--)
4974 if ((! call_used_regs[reg] || interrupt_handler)
4975 && ! TEST_HARD_REG_BIT (*live_regs_mask, reg))
4976 /* Leave space to save this target register on the stack,
4977 in case target register allocation wants to use it. */
4978 stack_space += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
4979 return stack_space;
4982 /* Decide whether we should reserve space for callee-save target registers,
4983 in case target register allocation wants to use them. REGS_SAVED is
4984 the space, in bytes, that is already required for register saves.
4985 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
4987 static int
4988 shmedia_reserve_space_for_target_registers_p (int regs_saved,
4989 HARD_REG_SET *live_regs_mask)
4991 if (optimize_size)
4992 return 0;
4993 return shmedia_target_regs_stack_space (live_regs_mask) <= regs_saved;
4996 /* Decide how much space to reserve for callee-save target registers
4997 in case target register allocation wants to use them.
4998 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
5000 static int
5001 shmedia_target_regs_stack_adjust (HARD_REG_SET *live_regs_mask)
5003 if (shmedia_space_reserved_for_target_registers)
5004 return shmedia_target_regs_stack_space (live_regs_mask);
5005 else
5006 return 0;
5009 /* Work out the registers which need to be saved, both as a mask and a
5010 count of saved words. Return the count.
5012 If doing a pragma interrupt function, then push all regs used by the
5013 function, and if we call another function (we can tell by looking at PR),
5014 make sure that all the regs it clobbers are safe too. */
5016 static int
5017 calc_live_regs (HARD_REG_SET *live_regs_mask)
5019 int reg;
5020 int count;
5021 int interrupt_handler;
5022 int pr_live, has_call;
5024 interrupt_handler = sh_cfun_interrupt_handler_p ();
5026 CLEAR_HARD_REG_SET (*live_regs_mask);
5027 if (TARGET_SH4 && TARGET_FMOVD && interrupt_handler
5028 && regs_ever_live[FPSCR_REG])
5029 target_flags &= ~FPU_SINGLE_BIT;
5030 /* If we can save a lot of saves by switching to double mode, do that. */
5031 else if (TARGET_SH4 && TARGET_FMOVD && TARGET_FPU_SINGLE)
5032 for (count = 0, reg = FIRST_FP_REG; reg <= LAST_FP_REG; reg += 2)
5033 if (regs_ever_live[reg] && regs_ever_live[reg+1]
5034 && (! call_used_regs[reg] || (interrupt_handler && ! pragma_trapa))
5035 && ++count > 2)
5037 target_flags &= ~FPU_SINGLE_BIT;
5038 break;
5040 /* PR_MEDIA_REG is a general purpose register, thus global_alloc already
5041 knows how to use it. That means the pseudo originally allocated for
5042 the initial value can become the PR_MEDIA_REG hard register, as seen for
5043 execute/20010122-1.c:test9. */
5044 if (TARGET_SHMEDIA)
5045 /* ??? this function is called from initial_elimination_offset, hence we
5046 can't use the result of sh_media_register_for_return here. */
5047 pr_live = sh_pr_n_sets ();
5048 else
5050 rtx pr_initial = has_hard_reg_initial_val (Pmode, PR_REG);
5051 pr_live = (pr_initial
5052 ? (GET_CODE (pr_initial) != REG
5053 || REGNO (pr_initial) != (PR_REG))
5054 : regs_ever_live[PR_REG]);
5055 /* For Shcompact, if not optimizing, we end up with a memory reference
5056 using the return address pointer for __builtin_return_address even
5057 though there is no actual need to put the PR register on the stack. */
5058 pr_live |= regs_ever_live[RETURN_ADDRESS_POINTER_REGNUM];
5060 /* Force PR to be live if the prologue has to call the SHmedia
5061 argument decoder or register saver. */
5062 if (TARGET_SHCOMPACT
5063 && ((current_function_args_info.call_cookie
5064 & ~ CALL_COOKIE_RET_TRAMP (1))
5065 || current_function_has_nonlocal_label))
5066 pr_live = 1;
5067 has_call = TARGET_SHMEDIA ? ! leaf_function_p () : pr_live;
5068 for (count = 0, reg = FIRST_PSEUDO_REGISTER - 1; reg >= 0; reg--)
5070 if (reg == (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG)
5071 ? pr_live
5072 : (interrupt_handler && ! pragma_trapa)
5073 ? (/* Need to save all the regs ever live. */
5074 (regs_ever_live[reg]
5075 || (call_used_regs[reg]
5076 && (! fixed_regs[reg] || reg == MACH_REG || reg == MACL_REG)
5077 && has_call)
5078 || (has_call && REGISTER_NATURAL_MODE (reg) == SImode
5079 && (GENERAL_REGISTER_P (reg) || TARGET_REGISTER_P (reg))))
5080 && reg != STACK_POINTER_REGNUM && reg != ARG_POINTER_REGNUM
5081 && reg != RETURN_ADDRESS_POINTER_REGNUM
5082 && reg != T_REG && reg != GBR_REG
5083 /* Push fpscr only on targets which have FPU */
5084 && (reg != FPSCR_REG || TARGET_FPU_ANY))
5085 : (/* Only push those regs which are used and need to be saved. */
5086 (TARGET_SHCOMPACT
5087 && flag_pic
5088 && current_function_args_info.call_cookie
5089 && reg == (int) PIC_OFFSET_TABLE_REGNUM)
5090 || (regs_ever_live[reg] && ! call_used_regs[reg])
5091 || (current_function_calls_eh_return
5092 && (reg == (int) EH_RETURN_DATA_REGNO (0)
5093 || reg == (int) EH_RETURN_DATA_REGNO (1)
5094 || reg == (int) EH_RETURN_DATA_REGNO (2)
5095 || reg == (int) EH_RETURN_DATA_REGNO (3)))
5096 || ((reg == MACL_REG || reg == MACH_REG)
5097 && regs_ever_live[reg]
5098 && sh_cfun_attr_renesas_p ())
5101 SET_HARD_REG_BIT (*live_regs_mask, reg);
5102 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
5104 if ((TARGET_SH4 || TARGET_SH5) && TARGET_FMOVD
5105 && GET_MODE_CLASS (REGISTER_NATURAL_MODE (reg)) == MODE_FLOAT)
5107 if (FP_REGISTER_P (reg))
5109 if (! TARGET_FPU_SINGLE && ! regs_ever_live[reg ^ 1])
5111 SET_HARD_REG_BIT (*live_regs_mask, (reg ^ 1));
5112 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg ^ 1));
5115 else if (XD_REGISTER_P (reg))
5117 /* Must switch to double mode to access these registers. */
5118 target_flags &= ~FPU_SINGLE_BIT;
5123 /* If we have a target register optimization pass after prologue / epilogue
5124 threading, we need to assume all target registers will be live even if
5125 they aren't now. */
5126 if (flag_branch_target_load_optimize2
5127 && TARGET_SAVE_ALL_TARGET_REGS
5128 && shmedia_space_reserved_for_target_registers)
5129 for (reg = LAST_TARGET_REG; reg >= FIRST_TARGET_REG; reg--)
5130 if ((! call_used_regs[reg] || interrupt_handler)
5131 && ! TEST_HARD_REG_BIT (*live_regs_mask, reg))
5133 SET_HARD_REG_BIT (*live_regs_mask, reg);
5134 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
5136 /* If this is an interrupt handler, we don't have any call-clobbered
5137 registers we can conveniently use for target register save/restore.
5138 Make sure we save at least one general purpose register when we need
5139 to save target registers. */
5140 if (interrupt_handler
5141 && hard_regs_intersect_p (live_regs_mask,
5142 &reg_class_contents[TARGET_REGS])
5143 && ! hard_regs_intersect_p (live_regs_mask,
5144 &reg_class_contents[GENERAL_REGS]))
5146 SET_HARD_REG_BIT (*live_regs_mask, R0_REG);
5147 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (R0_REG));
5150 return count;
5153 /* Code to generate prologue and epilogue sequences */
5155 /* PUSHED is the number of bytes that are being pushed on the
5156 stack for register saves. Return the frame size, padded
5157 appropriately so that the stack stays properly aligned. */
5158 static HOST_WIDE_INT
5159 rounded_frame_size (int pushed)
5161 HOST_WIDE_INT size = get_frame_size ();
5162 HOST_WIDE_INT align = STACK_BOUNDARY / BITS_PER_UNIT;
5164 return ((size + pushed + align - 1) & -align) - pushed;
5167 /* Choose a call-clobbered target-branch register that remains
5168 unchanged along the whole function. We set it up as the return
5169 value in the prologue. */
5171 sh_media_register_for_return (void)
5173 int regno;
5174 int tr0_used;
5176 if (! current_function_is_leaf)
5177 return -1;
5178 if (lookup_attribute ("interrupt_handler",
5179 DECL_ATTRIBUTES (current_function_decl)))
5180 return -1;
5182 tr0_used = flag_pic && regs_ever_live[PIC_OFFSET_TABLE_REGNUM];
5184 for (regno = FIRST_TARGET_REG + tr0_used; regno <= LAST_TARGET_REG; regno++)
5185 if (call_used_regs[regno] && ! regs_ever_live[regno])
5186 return regno;
5188 return -1;
5191 /* The maximum registers we need to save are:
5192 - 62 general purpose registers (r15 is stack pointer, r63 is zero)
5193 - 32 floating point registers (for each pair, we save none,
5194 one single precision value, or a double precision value).
5195 - 8 target registers
5196 - add 1 entry for a delimiter. */
5197 #define MAX_SAVED_REGS (62+32+8)
5199 typedef struct save_entry_s
5201 unsigned char reg;
5202 unsigned char mode;
5203 short offset;
5204 } save_entry;
5206 #define MAX_TEMPS 4
5208 /* There will be a delimiter entry with VOIDmode both at the start and the
5209 end of a filled in schedule. The end delimiter has the offset of the
5210 save with the smallest (i.e. most negative) offset. */
5211 typedef struct save_schedule_s
5213 save_entry entries[MAX_SAVED_REGS + 2];
5214 int temps[MAX_TEMPS+1];
5215 } save_schedule;
5217 /* Fill in SCHEDULE according to LIVE_REGS_MASK. If RESTORE is nonzero,
5218 use reverse order. Returns the last entry written to (not counting
5219 the delimiter). OFFSET_BASE is a number to be added to all offset
5220 entries. */
5222 static save_entry *
5223 sh5_schedule_saves (HARD_REG_SET *live_regs_mask, save_schedule *schedule,
5224 int offset_base)
5226 int align, i;
5227 save_entry *entry = schedule->entries;
5228 int tmpx = 0;
5229 int offset;
5231 if (! current_function_interrupt)
5232 for (i = FIRST_GENERAL_REG; tmpx < MAX_TEMPS && i <= LAST_GENERAL_REG; i++)
5233 if (call_used_regs[i] && ! fixed_regs[i] && i != PR_MEDIA_REG
5234 && ! FUNCTION_ARG_REGNO_P (i)
5235 && i != FIRST_RET_REG
5236 && ! (cfun->static_chain_decl != NULL && i == STATIC_CHAIN_REGNUM)
5237 && ! (current_function_calls_eh_return
5238 && (i == EH_RETURN_STACKADJ_REGNO
5239 || ((unsigned) i <= EH_RETURN_DATA_REGNO (0)
5240 && (unsigned) i >= EH_RETURN_DATA_REGNO (3)))))
5241 schedule->temps[tmpx++] = i;
5242 entry->reg = -1;
5243 entry->mode = VOIDmode;
5244 entry->offset = offset_base;
5245 entry++;
5246 /* We loop twice: first, we save 8-byte aligned registers in the
5247 higher addresses, that are known to be aligned. Then, we
5248 proceed to saving 32-bit registers that don't need 8-byte
5249 alignment.
5250 If this is an interrupt function, all registers that need saving
5251 need to be saved in full. moreover, we need to postpone saving
5252 target registers till we have saved some general purpose registers
5253 we can then use as scratch registers. */
5254 offset = offset_base;
5255 for (align = 1; align >= 0; align--)
5257 for (i = FIRST_PSEUDO_REGISTER - 1; i >= 0; i--)
5258 if (TEST_HARD_REG_BIT (*live_regs_mask, i))
5260 enum machine_mode mode = REGISTER_NATURAL_MODE (i);
5261 int reg = i;
5263 if (current_function_interrupt)
5265 if (TARGET_REGISTER_P (i))
5266 continue;
5267 if (GENERAL_REGISTER_P (i))
5268 mode = DImode;
5270 if (mode == SFmode && (i % 2) == 1
5271 && ! TARGET_FPU_SINGLE && FP_REGISTER_P (i)
5272 && (TEST_HARD_REG_BIT (*live_regs_mask, (i ^ 1))))
5274 mode = DFmode;
5275 i--;
5276 reg--;
5279 /* If we're doing the aligned pass and this is not aligned,
5280 or we're doing the unaligned pass and this is aligned,
5281 skip it. */
5282 if ((GET_MODE_SIZE (mode) % (STACK_BOUNDARY / BITS_PER_UNIT) == 0)
5283 != align)
5284 continue;
5286 if (current_function_interrupt
5287 && GENERAL_REGISTER_P (i)
5288 && tmpx < MAX_TEMPS)
5289 schedule->temps[tmpx++] = i;
5291 offset -= GET_MODE_SIZE (mode);
5292 entry->reg = i;
5293 entry->mode = mode;
5294 entry->offset = offset;
5295 entry++;
5297 if (align && current_function_interrupt)
5298 for (i = LAST_TARGET_REG; i >= FIRST_TARGET_REG; i--)
5299 if (TEST_HARD_REG_BIT (*live_regs_mask, i))
5301 offset -= GET_MODE_SIZE (DImode);
5302 entry->reg = i;
5303 entry->mode = DImode;
5304 entry->offset = offset;
5305 entry++;
5308 entry->reg = -1;
5309 entry->mode = VOIDmode;
5310 entry->offset = offset;
5311 schedule->temps[tmpx] = -1;
5312 return entry - 1;
5315 void
5316 sh_expand_prologue (void)
5318 HARD_REG_SET live_regs_mask;
5319 int d, i;
5320 int d_rounding = 0;
5321 int save_flags = target_flags;
5322 int pretend_args;
5324 current_function_interrupt = sh_cfun_interrupt_handler_p ();
5326 /* We have pretend args if we had an object sent partially in registers
5327 and partially on the stack, e.g. a large structure. */
5328 pretend_args = current_function_pretend_args_size;
5329 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl)
5330 && (NPARM_REGS(SImode)
5331 > current_function_args_info.arg_count[(int) SH_ARG_INT]))
5332 pretend_args = 0;
5333 output_stack_adjust (-pretend_args
5334 - current_function_args_info.stack_regs * 8,
5335 stack_pointer_rtx, 0, NULL);
5337 if (TARGET_SHCOMPACT && flag_pic && current_function_args_info.call_cookie)
5338 /* We're going to use the PIC register to load the address of the
5339 incoming-argument decoder and/or of the return trampoline from
5340 the GOT, so make sure the PIC register is preserved and
5341 initialized. */
5342 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
5344 if (TARGET_SHCOMPACT
5345 && (current_function_args_info.call_cookie & ~ CALL_COOKIE_RET_TRAMP(1)))
5347 int reg;
5349 /* First, make all registers with incoming arguments that will
5350 be pushed onto the stack live, so that register renaming
5351 doesn't overwrite them. */
5352 for (reg = 0; reg < NPARM_REGS (SImode); reg++)
5353 if (CALL_COOKIE_STACKSEQ_GET (current_function_args_info.call_cookie)
5354 >= NPARM_REGS (SImode) - reg)
5355 for (; reg < NPARM_REGS (SImode); reg++)
5356 emit_insn (gen_shcompact_preserve_incoming_args
5357 (gen_rtx_REG (SImode, FIRST_PARM_REG + reg)));
5358 else if (CALL_COOKIE_INT_REG_GET
5359 (current_function_args_info.call_cookie, reg) == 1)
5360 emit_insn (gen_shcompact_preserve_incoming_args
5361 (gen_rtx_REG (SImode, FIRST_PARM_REG + reg)));
5363 emit_move_insn (gen_rtx_REG (Pmode, MACL_REG),
5364 stack_pointer_rtx);
5365 emit_move_insn (gen_rtx_REG (SImode, R0_REG),
5366 GEN_INT (current_function_args_info.call_cookie));
5367 emit_move_insn (gen_rtx_REG (SImode, MACH_REG),
5368 gen_rtx_REG (SImode, R0_REG));
5370 else if (TARGET_SHMEDIA)
5372 int tr = sh_media_register_for_return ();
5374 if (tr >= 0)
5376 rtx insn = emit_move_insn (gen_rtx_REG (DImode, tr),
5377 gen_rtx_REG (DImode, PR_MEDIA_REG));
5379 /* ??? We should suppress saving pr when we don't need it, but this
5380 is tricky because of builtin_return_address. */
5382 /* If this function only exits with sibcalls, this copy
5383 will be flagged as dead. */
5384 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD,
5385 const0_rtx,
5386 REG_NOTES (insn));
5390 /* Emit the code for SETUP_VARARGS. */
5391 if (current_function_stdarg)
5393 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl))
5395 /* Push arg regs as if they'd been provided by caller in stack. */
5396 for (i = 0; i < NPARM_REGS(SImode); i++)
5398 int rn = NPARM_REGS(SImode) + FIRST_PARM_REG - i - 1;
5399 rtx insn;
5401 if (i >= (NPARM_REGS(SImode)
5402 - current_function_args_info.arg_count[(int) SH_ARG_INT]
5404 break;
5405 insn = push (rn);
5406 RTX_FRAME_RELATED_P (insn) = 0;
5411 /* If we're supposed to switch stacks at function entry, do so now. */
5412 if (sp_switch)
5413 emit_insn (gen_sp_switch_1 ());
5415 d = calc_live_regs (&live_regs_mask);
5416 /* ??? Maybe we could save some switching if we can move a mode switch
5417 that already happens to be at the function start into the prologue. */
5418 if (target_flags != save_flags && ! current_function_interrupt)
5419 emit_insn (gen_toggle_sz ());
5421 if (TARGET_SH5)
5423 int offset_base, offset;
5424 rtx r0 = NULL_RTX;
5425 int offset_in_r0 = -1;
5426 int sp_in_r0 = 0;
5427 int tregs_space = shmedia_target_regs_stack_adjust (&live_regs_mask);
5428 int total_size, save_size;
5429 save_schedule schedule;
5430 save_entry *entry;
5431 int *tmp_pnt;
5433 if (call_used_regs[R0_REG] && ! fixed_regs[R0_REG]
5434 && ! current_function_interrupt)
5435 r0 = gen_rtx_REG (Pmode, R0_REG);
5437 /* D is the actual number of bytes that we need for saving registers,
5438 however, in initial_elimination_offset we have committed to using
5439 an additional TREGS_SPACE amount of bytes - in order to keep both
5440 addresses to arguments supplied by the caller and local variables
5441 valid, we must keep this gap. Place it between the incoming
5442 arguments and the actually saved registers in a bid to optimize
5443 locality of reference. */
5444 total_size = d + tregs_space;
5445 total_size += rounded_frame_size (total_size);
5446 save_size = total_size - rounded_frame_size (d);
5447 if (save_size % (STACK_BOUNDARY / BITS_PER_UNIT))
5448 d_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
5449 - save_size % (STACK_BOUNDARY / BITS_PER_UNIT));
5451 /* If adjusting the stack in a single step costs nothing extra, do so.
5452 I.e. either if a single addi is enough, or we need a movi anyway,
5453 and we don't exceed the maximum offset range (the test for the
5454 latter is conservative for simplicity). */
5455 if (TARGET_SHMEDIA
5456 && (CONST_OK_FOR_I10 (-total_size)
5457 || (! CONST_OK_FOR_I10 (-(save_size + d_rounding))
5458 && total_size <= 2044)))
5459 d_rounding = total_size - save_size;
5461 offset_base = d + d_rounding;
5463 output_stack_adjust (-(save_size + d_rounding), stack_pointer_rtx,
5464 0, NULL);
5466 sh5_schedule_saves (&live_regs_mask, &schedule, offset_base);
5467 tmp_pnt = schedule.temps;
5468 for (entry = &schedule.entries[1]; entry->mode != VOIDmode; entry++)
5470 enum machine_mode mode = entry->mode;
5471 int reg = entry->reg;
5472 rtx reg_rtx, mem_rtx, pre_dec = NULL_RTX;
5474 offset = entry->offset;
5476 reg_rtx = gen_rtx_REG (mode, reg);
5478 mem_rtx = gen_rtx_MEM (mode,
5479 gen_rtx_PLUS (Pmode,
5480 stack_pointer_rtx,
5481 GEN_INT (offset)));
5483 GO_IF_LEGITIMATE_ADDRESS (mode, XEXP (mem_rtx, 0), try_pre_dec);
5485 if (! r0)
5486 abort ();
5487 mem_rtx = NULL_RTX;
5489 try_pre_dec:
5491 if (HAVE_PRE_DECREMENT
5492 && (offset_in_r0 - offset == GET_MODE_SIZE (mode)
5493 || mem_rtx == NULL_RTX
5494 || reg == PR_REG || SPECIAL_REGISTER_P (reg)))
5496 pre_dec = gen_rtx_MEM (mode,
5497 gen_rtx_PRE_DEC (Pmode, r0));
5499 GO_IF_LEGITIMATE_ADDRESS (mode, XEXP (pre_dec, 0),
5500 pre_dec_ok);
5502 pre_dec = NULL_RTX;
5504 break;
5506 pre_dec_ok:
5507 mem_rtx = NULL_RTX;
5508 offset += GET_MODE_SIZE (mode);
5510 while (0);
5512 if (mem_rtx != NULL_RTX)
5513 goto addr_ok;
5515 if (offset_in_r0 == -1)
5517 emit_move_insn (r0, GEN_INT (offset));
5518 offset_in_r0 = offset;
5520 else if (offset != offset_in_r0)
5522 emit_move_insn (r0,
5523 gen_rtx_PLUS
5524 (Pmode, r0,
5525 GEN_INT (offset - offset_in_r0)));
5526 offset_in_r0 += offset - offset_in_r0;
5529 if (pre_dec != NULL_RTX)
5531 if (! sp_in_r0)
5533 emit_move_insn (r0,
5534 gen_rtx_PLUS
5535 (Pmode, r0, stack_pointer_rtx));
5536 sp_in_r0 = 1;
5539 offset -= GET_MODE_SIZE (mode);
5540 offset_in_r0 -= GET_MODE_SIZE (mode);
5542 mem_rtx = pre_dec;
5544 else if (sp_in_r0)
5545 mem_rtx = gen_rtx_MEM (mode, r0);
5546 else
5547 mem_rtx = gen_rtx_MEM (mode,
5548 gen_rtx_PLUS (Pmode,
5549 stack_pointer_rtx,
5550 r0));
5552 /* We must not use an r0-based address for target-branch
5553 registers or for special registers without pre-dec
5554 memory addresses, since we store their values in r0
5555 first. */
5556 if (TARGET_REGISTER_P (reg)
5557 || ((reg == PR_REG || SPECIAL_REGISTER_P (reg))
5558 && mem_rtx != pre_dec))
5559 abort ();
5561 addr_ok:
5562 if (TARGET_REGISTER_P (reg)
5563 || ((reg == PR_REG || SPECIAL_REGISTER_P (reg))
5564 && mem_rtx != pre_dec))
5566 rtx tmp_reg = gen_rtx_REG (GET_MODE (reg_rtx), *tmp_pnt);
5568 emit_move_insn (tmp_reg, reg_rtx);
5570 if (REGNO (tmp_reg) == R0_REG)
5572 offset_in_r0 = -1;
5573 sp_in_r0 = 0;
5574 if (refers_to_regno_p (R0_REG, R0_REG+1, mem_rtx, (rtx *) 0))
5575 abort ();
5578 if (*++tmp_pnt <= 0)
5579 tmp_pnt = schedule.temps;
5581 reg_rtx = tmp_reg;
5584 rtx insn;
5586 /* Mark as interesting for dwarf cfi generator */
5587 insn = emit_move_insn (mem_rtx, reg_rtx);
5588 RTX_FRAME_RELATED_P (insn) = 1;
5590 if (TARGET_SHCOMPACT && (offset_in_r0 != -1))
5592 rtx reg_rtx = gen_rtx_REG (mode, reg);
5593 rtx set, note_rtx;
5594 rtx mem_rtx = gen_rtx_MEM (mode,
5595 gen_rtx_PLUS (Pmode,
5596 stack_pointer_rtx,
5597 GEN_INT (offset)));
5599 set = gen_rtx_SET (VOIDmode, mem_rtx, reg_rtx);
5600 note_rtx = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, set,
5601 REG_NOTES (insn));
5602 REG_NOTES (insn) = note_rtx;
5607 if (entry->offset != d_rounding)
5608 abort ();
5610 else
5611 push_regs (&live_regs_mask, current_function_interrupt);
5613 if (flag_pic && regs_ever_live[PIC_OFFSET_TABLE_REGNUM])
5615 rtx insn = get_last_insn ();
5616 rtx last = emit_insn (gen_GOTaddr2picreg ());
5618 /* Mark these insns as possibly dead. Sometimes, flow2 may
5619 delete all uses of the PIC register. In this case, let it
5620 delete the initialization too. */
5623 insn = NEXT_INSN (insn);
5625 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD,
5626 const0_rtx,
5627 REG_NOTES (insn));
5629 while (insn != last);
5632 if (SHMEDIA_REGS_STACK_ADJUST ())
5634 emit_move_insn (gen_rtx_REG (Pmode, R0_REG),
5635 function_symbol (TARGET_FPU_ANY
5636 ? "__GCC_push_shmedia_regs"
5637 : "__GCC_push_shmedia_regs_nofpu"));
5638 /* This must NOT go through the PLT, otherwise mach and macl
5639 may be clobbered. */
5640 emit_insn (gen_shmedia_save_restore_regs_compact
5641 (GEN_INT (-SHMEDIA_REGS_STACK_ADJUST ())));
5644 if (target_flags != save_flags && ! current_function_interrupt)
5646 rtx insn = emit_insn (gen_toggle_sz ());
5648 /* If we're lucky, a mode switch in the function body will
5649 overwrite fpscr, turning this insn dead. Tell flow this
5650 insn is ok to delete. */
5651 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD,
5652 const0_rtx,
5653 REG_NOTES (insn));
5656 target_flags = save_flags;
5658 output_stack_adjust (-rounded_frame_size (d) + d_rounding,
5659 stack_pointer_rtx, 0, NULL);
5661 if (frame_pointer_needed)
5662 frame_insn (GEN_MOV (frame_pointer_rtx, stack_pointer_rtx));
5664 if (TARGET_SHCOMPACT
5665 && (current_function_args_info.call_cookie & ~ CALL_COOKIE_RET_TRAMP(1)))
5667 /* This must NOT go through the PLT, otherwise mach and macl
5668 may be clobbered. */
5669 emit_move_insn (gen_rtx_REG (Pmode, R0_REG),
5670 function_symbol ("__GCC_shcompact_incoming_args"));
5671 emit_insn (gen_shcompact_incoming_args ());
5675 void
5676 sh_expand_epilogue (bool sibcall_p)
5678 HARD_REG_SET live_regs_mask;
5679 int d, i;
5680 int d_rounding = 0;
5682 int save_flags = target_flags;
5683 int frame_size, save_size;
5684 int fpscr_deferred = 0;
5685 int e = sibcall_p ? -1 : 1;
5687 d = calc_live_regs (&live_regs_mask);
5689 save_size = d;
5690 frame_size = rounded_frame_size (d);
5692 if (TARGET_SH5)
5694 int tregs_space = shmedia_target_regs_stack_adjust (&live_regs_mask);
5695 int total_size;
5696 if (d % (STACK_BOUNDARY / BITS_PER_UNIT))
5697 d_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
5698 - d % (STACK_BOUNDARY / BITS_PER_UNIT));
5700 total_size = d + tregs_space;
5701 total_size += rounded_frame_size (total_size);
5702 save_size = total_size - frame_size;
5704 /* If adjusting the stack in a single step costs nothing extra, do so.
5705 I.e. either if a single addi is enough, or we need a movi anyway,
5706 and we don't exceed the maximum offset range (the test for the
5707 latter is conservative for simplicity). */
5708 if (TARGET_SHMEDIA
5709 && ! frame_pointer_needed
5710 && (CONST_OK_FOR_I10 (total_size)
5711 || (! CONST_OK_FOR_I10 (save_size + d_rounding)
5712 && total_size <= 2044)))
5713 d_rounding = frame_size;
5715 frame_size -= d_rounding;
5718 if (frame_pointer_needed)
5720 output_stack_adjust (frame_size, frame_pointer_rtx, e, &live_regs_mask);
5722 /* We must avoid moving the stack pointer adjustment past code
5723 which reads from the local frame, else an interrupt could
5724 occur after the SP adjustment and clobber data in the local
5725 frame. */
5726 emit_insn (gen_blockage ());
5727 emit_insn (GEN_MOV (stack_pointer_rtx, frame_pointer_rtx));
5729 else if (frame_size)
5731 /* We must avoid moving the stack pointer adjustment past code
5732 which reads from the local frame, else an interrupt could
5733 occur after the SP adjustment and clobber data in the local
5734 frame. */
5735 emit_insn (gen_blockage ());
5736 output_stack_adjust (frame_size, stack_pointer_rtx, e, &live_regs_mask);
5739 if (SHMEDIA_REGS_STACK_ADJUST ())
5741 emit_move_insn (gen_rtx_REG (Pmode, R0_REG),
5742 function_symbol (TARGET_FPU_ANY
5743 ? "__GCC_pop_shmedia_regs"
5744 : "__GCC_pop_shmedia_regs_nofpu"));
5745 /* This must NOT go through the PLT, otherwise mach and macl
5746 may be clobbered. */
5747 emit_insn (gen_shmedia_save_restore_regs_compact
5748 (GEN_INT (SHMEDIA_REGS_STACK_ADJUST ())));
5751 /* Pop all the registers. */
5753 if (target_flags != save_flags && ! current_function_interrupt)
5754 emit_insn (gen_toggle_sz ());
5755 if (TARGET_SH5)
5757 int offset_base, offset;
5758 int offset_in_r0 = -1;
5759 int sp_in_r0 = 0;
5760 rtx r0 = gen_rtx_REG (Pmode, R0_REG);
5761 save_schedule schedule;
5762 save_entry *entry;
5763 int *tmp_pnt;
5765 entry = sh5_schedule_saves (&live_regs_mask, &schedule, d_rounding);
5766 offset_base = -entry[1].offset + d_rounding;
5767 tmp_pnt = schedule.temps;
5768 for (; entry->mode != VOIDmode; entry--)
5770 enum machine_mode mode = entry->mode;
5771 int reg = entry->reg;
5772 rtx reg_rtx, mem_rtx, post_inc = NULL_RTX, insn;
5774 offset = offset_base + entry->offset;
5775 reg_rtx = gen_rtx_REG (mode, reg);
5777 mem_rtx = gen_rtx_MEM (mode,
5778 gen_rtx_PLUS (Pmode,
5779 stack_pointer_rtx,
5780 GEN_INT (offset)));
5782 GO_IF_LEGITIMATE_ADDRESS (mode, XEXP (mem_rtx, 0), try_post_inc);
5784 mem_rtx = NULL_RTX;
5786 try_post_inc:
5788 if (HAVE_POST_INCREMENT
5789 && (offset == offset_in_r0
5790 || (offset + GET_MODE_SIZE (mode) != d + d_rounding
5791 && mem_rtx == NULL_RTX)
5792 || reg == PR_REG || SPECIAL_REGISTER_P (reg)))
5794 post_inc = gen_rtx_MEM (mode,
5795 gen_rtx_POST_INC (Pmode, r0));
5797 GO_IF_LEGITIMATE_ADDRESS (mode, XEXP (post_inc, 0),
5798 post_inc_ok);
5800 post_inc = NULL_RTX;
5802 break;
5804 post_inc_ok:
5805 mem_rtx = NULL_RTX;
5807 while (0);
5809 if (mem_rtx != NULL_RTX)
5810 goto addr_ok;
5812 if (offset_in_r0 == -1)
5814 emit_move_insn (r0, GEN_INT (offset));
5815 offset_in_r0 = offset;
5817 else if (offset != offset_in_r0)
5819 emit_move_insn (r0,
5820 gen_rtx_PLUS
5821 (Pmode, r0,
5822 GEN_INT (offset - offset_in_r0)));
5823 offset_in_r0 += offset - offset_in_r0;
5826 if (post_inc != NULL_RTX)
5828 if (! sp_in_r0)
5830 emit_move_insn (r0,
5831 gen_rtx_PLUS
5832 (Pmode, r0, stack_pointer_rtx));
5833 sp_in_r0 = 1;
5836 mem_rtx = post_inc;
5838 offset_in_r0 += GET_MODE_SIZE (mode);
5840 else if (sp_in_r0)
5841 mem_rtx = gen_rtx_MEM (mode, r0);
5842 else
5843 mem_rtx = gen_rtx_MEM (mode,
5844 gen_rtx_PLUS (Pmode,
5845 stack_pointer_rtx,
5846 r0));
5848 if ((reg == PR_REG || SPECIAL_REGISTER_P (reg))
5849 && mem_rtx != post_inc)
5850 abort ();
5852 addr_ok:
5853 if ((reg == PR_REG || SPECIAL_REGISTER_P (reg))
5854 && mem_rtx != post_inc)
5856 insn = emit_move_insn (r0, mem_rtx);
5857 mem_rtx = r0;
5859 else if (TARGET_REGISTER_P (reg))
5861 rtx tmp_reg = gen_rtx_REG (mode, *tmp_pnt);
5863 /* Give the scheduler a bit of freedom by using up to
5864 MAX_TEMPS registers in a round-robin fashion. */
5865 insn = emit_move_insn (tmp_reg, mem_rtx);
5866 mem_rtx = tmp_reg;
5867 if (*++tmp_pnt < 0)
5868 tmp_pnt = schedule.temps;
5871 insn = emit_move_insn (reg_rtx, mem_rtx);
5872 if (reg == PR_MEDIA_REG && sh_media_register_for_return () >= 0)
5873 /* This is dead, unless we return with a sibcall. */
5874 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD,
5875 const0_rtx,
5876 REG_NOTES (insn));
5879 if (entry->offset + offset_base != d + d_rounding)
5880 abort ();
5882 else /* ! TARGET_SH5 */
5884 save_size = 0;
5885 if (TEST_HARD_REG_BIT (live_regs_mask, PR_REG))
5886 pop (PR_REG);
5887 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
5889 int j = (FIRST_PSEUDO_REGISTER - 1) - i;
5891 if (j == FPSCR_REG && current_function_interrupt && TARGET_FMOVD
5892 && hard_regs_intersect_p (&live_regs_mask,
5893 &reg_class_contents[DF_REGS]))
5894 fpscr_deferred = 1;
5895 else if (j != PR_REG && TEST_HARD_REG_BIT (live_regs_mask, j))
5896 pop (j);
5897 if (j == FIRST_FP_REG && fpscr_deferred)
5898 pop (FPSCR_REG);
5902 if (target_flags != save_flags && ! current_function_interrupt)
5903 emit_insn (gen_toggle_sz ());
5904 target_flags = save_flags;
5906 output_stack_adjust (current_function_pretend_args_size
5907 + save_size + d_rounding
5908 + current_function_args_info.stack_regs * 8,
5909 stack_pointer_rtx, e, NULL);
5911 if (current_function_calls_eh_return)
5912 emit_insn (GEN_ADD3 (stack_pointer_rtx, stack_pointer_rtx,
5913 EH_RETURN_STACKADJ_RTX));
5915 /* Switch back to the normal stack if necessary. */
5916 if (sp_switch)
5917 emit_insn (gen_sp_switch_2 ());
5919 /* Tell flow the insn that pops PR isn't dead. */
5920 /* PR_REG will never be live in SHmedia mode, and we don't need to
5921 USE PR_MEDIA_REG, since it will be explicitly copied to TR0_REG
5922 by the return pattern. */
5923 if (TEST_HARD_REG_BIT (live_regs_mask, PR_REG))
5924 emit_insn (gen_rtx_USE (VOIDmode, gen_rtx_REG (SImode, PR_REG)));
5927 static int sh_need_epilogue_known = 0;
5930 sh_need_epilogue (void)
5932 if (! sh_need_epilogue_known)
5934 rtx epilogue;
5936 start_sequence ();
5937 sh_expand_epilogue (0);
5938 epilogue = get_insns ();
5939 end_sequence ();
5940 sh_need_epilogue_known = (epilogue == NULL ? -1 : 1);
5942 return sh_need_epilogue_known > 0;
5945 /* Emit code to change the current function's return address to RA.
5946 TEMP is available as a scratch register, if needed. */
5948 void
5949 sh_set_return_address (rtx ra, rtx tmp)
5951 HARD_REG_SET live_regs_mask;
5952 int d;
5953 int pr_reg = TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG;
5954 int pr_offset;
5956 d = calc_live_regs (&live_regs_mask);
5958 /* If pr_reg isn't life, we can set it (or the register given in
5959 sh_media_register_for_return) directly. */
5960 if (! TEST_HARD_REG_BIT (live_regs_mask, pr_reg))
5962 rtx rr;
5964 if (TARGET_SHMEDIA)
5966 int rr_regno = sh_media_register_for_return ();
5968 if (rr_regno < 0)
5969 rr_regno = pr_reg;
5971 rr = gen_rtx_REG (DImode, rr_regno);
5973 else
5974 rr = gen_rtx_REG (SImode, pr_reg);
5976 emit_insn (GEN_MOV (rr, ra));
5977 /* Tell flow the register for return isn't dead. */
5978 emit_insn (gen_rtx_USE (VOIDmode, rr));
5979 return;
5982 if (TARGET_SH5)
5984 int offset;
5985 save_schedule schedule;
5986 save_entry *entry;
5988 entry = sh5_schedule_saves (&live_regs_mask, &schedule, 0);
5989 offset = entry[1].offset;
5990 for (; entry->mode != VOIDmode; entry--)
5991 if (entry->reg == pr_reg)
5992 goto found;
5994 /* We can't find pr register. */
5995 abort ();
5997 found:
5998 offset = entry->offset - offset;
5999 pr_offset = (rounded_frame_size (d) + offset
6000 + SHMEDIA_REGS_STACK_ADJUST ());
6002 else
6003 pr_offset = rounded_frame_size (d);
6005 emit_insn (GEN_MOV (tmp, GEN_INT (pr_offset)));
6006 emit_insn (GEN_ADD3 (tmp, tmp, frame_pointer_rtx));
6008 tmp = gen_rtx_MEM (Pmode, tmp);
6009 emit_insn (GEN_MOV (tmp, ra));
6012 /* Clear variables at function end. */
6014 static void
6015 sh_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
6016 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
6018 trap_exit = pragma_interrupt = pragma_trapa = pragma_nosave_low_regs = 0;
6019 sh_need_epilogue_known = 0;
6020 sp_switch = NULL_RTX;
6023 static rtx
6024 sh_builtin_saveregs (void)
6026 /* First unnamed integer register. */
6027 int first_intreg = current_function_args_info.arg_count[(int) SH_ARG_INT];
6028 /* Number of integer registers we need to save. */
6029 int n_intregs = MAX (0, NPARM_REGS (SImode) - first_intreg);
6030 /* First unnamed SFmode float reg */
6031 int first_floatreg = current_function_args_info.arg_count[(int) SH_ARG_FLOAT];
6032 /* Number of SFmode float regs to save. */
6033 int n_floatregs = MAX (0, NPARM_REGS (SFmode) - first_floatreg);
6034 rtx regbuf, fpregs;
6035 int bufsize, regno;
6036 HOST_WIDE_INT alias_set;
6038 if (TARGET_SH5)
6040 if (n_intregs)
6042 int pushregs = n_intregs;
6044 while (pushregs < NPARM_REGS (SImode) - 1
6045 && (CALL_COOKIE_INT_REG_GET
6046 (current_function_args_info.call_cookie,
6047 NPARM_REGS (SImode) - pushregs)
6048 == 1))
6050 current_function_args_info.call_cookie
6051 &= ~ CALL_COOKIE_INT_REG (NPARM_REGS (SImode)
6052 - pushregs, 1);
6053 pushregs++;
6056 if (pushregs == NPARM_REGS (SImode))
6057 current_function_args_info.call_cookie
6058 |= (CALL_COOKIE_INT_REG (0, 1)
6059 | CALL_COOKIE_STACKSEQ (pushregs - 1));
6060 else
6061 current_function_args_info.call_cookie
6062 |= CALL_COOKIE_STACKSEQ (pushregs);
6064 current_function_pretend_args_size += 8 * n_intregs;
6066 if (TARGET_SHCOMPACT)
6067 return const0_rtx;
6070 if (! TARGET_SH2E && ! TARGET_SH4 && ! TARGET_SH5)
6072 error ("__builtin_saveregs not supported by this subtarget");
6073 return const0_rtx;
6076 if (TARGET_SHMEDIA)
6077 n_floatregs = 0;
6079 /* Allocate block of memory for the regs. */
6080 /* ??? If n_intregs + n_floatregs == 0, should we allocate at least 1 byte?
6081 Or can assign_stack_local accept a 0 SIZE argument? */
6082 bufsize = (n_intregs * UNITS_PER_WORD) + (n_floatregs * UNITS_PER_WORD);
6084 if (TARGET_SHMEDIA)
6085 regbuf = gen_rtx_MEM (BLKmode,
6086 gen_rtx_REG (Pmode, ARG_POINTER_REGNUM));
6087 else if (n_floatregs & 1)
6089 rtx addr;
6091 regbuf = assign_stack_local (BLKmode, bufsize + UNITS_PER_WORD, 0);
6092 addr = copy_to_mode_reg (Pmode, XEXP (regbuf, 0));
6093 emit_insn (gen_iorsi3 (addr, addr, GEN_INT (UNITS_PER_WORD)));
6094 regbuf = change_address (regbuf, BLKmode, addr);
6096 else
6097 regbuf = assign_stack_local (BLKmode, bufsize, 0);
6098 alias_set = get_varargs_alias_set ();
6099 set_mem_alias_set (regbuf, alias_set);
6101 /* Save int args.
6102 This is optimized to only save the regs that are necessary. Explicitly
6103 named args need not be saved. */
6104 if (n_intregs > 0)
6105 move_block_from_reg (BASE_ARG_REG (SImode) + first_intreg,
6106 adjust_address (regbuf, BLKmode,
6107 n_floatregs * UNITS_PER_WORD),
6108 n_intregs);
6110 if (TARGET_SHMEDIA)
6111 /* Return the address of the regbuf. */
6112 return XEXP (regbuf, 0);
6114 /* Save float args.
6115 This is optimized to only save the regs that are necessary. Explicitly
6116 named args need not be saved.
6117 We explicitly build a pointer to the buffer because it halves the insn
6118 count when not optimizing (otherwise the pointer is built for each reg
6119 saved).
6120 We emit the moves in reverse order so that we can use predecrement. */
6122 fpregs = gen_reg_rtx (Pmode);
6123 emit_move_insn (fpregs, XEXP (regbuf, 0));
6124 emit_insn (gen_addsi3 (fpregs, fpregs,
6125 GEN_INT (n_floatregs * UNITS_PER_WORD)));
6126 if (TARGET_SH4)
6128 rtx mem;
6129 for (regno = NPARM_REGS (DFmode) - 2; regno >= first_floatreg; regno -= 2)
6131 emit_insn (gen_addsi3 (fpregs, fpregs,
6132 GEN_INT (-2 * UNITS_PER_WORD)));
6133 mem = gen_rtx_MEM (DFmode, fpregs);
6134 set_mem_alias_set (mem, alias_set);
6135 emit_move_insn (mem,
6136 gen_rtx_REG (DFmode, BASE_ARG_REG (DFmode) + regno));
6138 regno = first_floatreg;
6139 if (regno & 1)
6141 emit_insn (gen_addsi3 (fpregs, fpregs, GEN_INT (-UNITS_PER_WORD)));
6142 mem = gen_rtx_MEM (SFmode, fpregs);
6143 set_mem_alias_set (mem, alias_set);
6144 emit_move_insn (mem,
6145 gen_rtx_REG (SFmode, BASE_ARG_REG (SFmode) + regno
6146 - (TARGET_LITTLE_ENDIAN != 0)));
6149 else
6150 for (regno = NPARM_REGS (SFmode) - 1; regno >= first_floatreg; regno--)
6152 rtx mem;
6154 emit_insn (gen_addsi3 (fpregs, fpregs, GEN_INT (-UNITS_PER_WORD)));
6155 mem = gen_rtx_MEM (SFmode, fpregs);
6156 set_mem_alias_set (mem, alias_set);
6157 emit_move_insn (mem,
6158 gen_rtx_REG (SFmode, BASE_ARG_REG (SFmode) + regno));
6161 /* Return the address of the regbuf. */
6162 return XEXP (regbuf, 0);
6165 /* Define the `__builtin_va_list' type for the ABI. */
6167 static tree
6168 sh_build_builtin_va_list (void)
6170 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
6171 tree record;
6173 if (TARGET_SH5 || (! TARGET_SH2E && ! TARGET_SH4)
6174 || TARGET_HITACHI || sh_cfun_attr_renesas_p ())
6175 return ptr_type_node;
6177 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
6179 f_next_o = build_decl (FIELD_DECL, get_identifier ("__va_next_o"),
6180 ptr_type_node);
6181 f_next_o_limit = build_decl (FIELD_DECL,
6182 get_identifier ("__va_next_o_limit"),
6183 ptr_type_node);
6184 f_next_fp = build_decl (FIELD_DECL, get_identifier ("__va_next_fp"),
6185 ptr_type_node);
6186 f_next_fp_limit = build_decl (FIELD_DECL,
6187 get_identifier ("__va_next_fp_limit"),
6188 ptr_type_node);
6189 f_next_stack = build_decl (FIELD_DECL, get_identifier ("__va_next_stack"),
6190 ptr_type_node);
6192 DECL_FIELD_CONTEXT (f_next_o) = record;
6193 DECL_FIELD_CONTEXT (f_next_o_limit) = record;
6194 DECL_FIELD_CONTEXT (f_next_fp) = record;
6195 DECL_FIELD_CONTEXT (f_next_fp_limit) = record;
6196 DECL_FIELD_CONTEXT (f_next_stack) = record;
6198 TYPE_FIELDS (record) = f_next_o;
6199 TREE_CHAIN (f_next_o) = f_next_o_limit;
6200 TREE_CHAIN (f_next_o_limit) = f_next_fp;
6201 TREE_CHAIN (f_next_fp) = f_next_fp_limit;
6202 TREE_CHAIN (f_next_fp_limit) = f_next_stack;
6204 layout_type (record);
6206 return record;
6209 /* Implement `va_start' for varargs and stdarg. */
6211 void
6212 sh_va_start (tree valist, rtx nextarg)
6214 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
6215 tree next_o, next_o_limit, next_fp, next_fp_limit, next_stack;
6216 tree t, u;
6217 int nfp, nint;
6219 if (TARGET_SH5)
6221 expand_builtin_saveregs ();
6222 std_expand_builtin_va_start (valist, nextarg);
6223 return;
6226 if ((! TARGET_SH2E && ! TARGET_SH4)
6227 || TARGET_HITACHI || sh_cfun_attr_renesas_p ())
6229 std_expand_builtin_va_start (valist, nextarg);
6230 return;
6233 f_next_o = TYPE_FIELDS (va_list_type_node);
6234 f_next_o_limit = TREE_CHAIN (f_next_o);
6235 f_next_fp = TREE_CHAIN (f_next_o_limit);
6236 f_next_fp_limit = TREE_CHAIN (f_next_fp);
6237 f_next_stack = TREE_CHAIN (f_next_fp_limit);
6239 next_o = build (COMPONENT_REF, TREE_TYPE (f_next_o), valist, f_next_o,
6240 NULL_TREE);
6241 next_o_limit = build (COMPONENT_REF, TREE_TYPE (f_next_o_limit),
6242 valist, f_next_o_limit, NULL_TREE);
6243 next_fp = build (COMPONENT_REF, TREE_TYPE (f_next_fp), valist, f_next_fp,
6244 NULL_TREE);
6245 next_fp_limit = build (COMPONENT_REF, TREE_TYPE (f_next_fp_limit),
6246 valist, f_next_fp_limit, NULL_TREE);
6247 next_stack = build (COMPONENT_REF, TREE_TYPE (f_next_stack),
6248 valist, f_next_stack, NULL_TREE);
6250 /* Call __builtin_saveregs. */
6251 u = make_tree (ptr_type_node, expand_builtin_saveregs ());
6252 t = build (MODIFY_EXPR, ptr_type_node, next_fp, u);
6253 TREE_SIDE_EFFECTS (t) = 1;
6254 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6256 nfp = current_function_args_info.arg_count[SH_ARG_FLOAT];
6257 if (nfp < 8)
6258 nfp = 8 - nfp;
6259 else
6260 nfp = 0;
6261 u = fold (build (PLUS_EXPR, ptr_type_node, u,
6262 build_int_2 (UNITS_PER_WORD * nfp, 0)));
6263 t = build (MODIFY_EXPR, ptr_type_node, next_fp_limit, u);
6264 TREE_SIDE_EFFECTS (t) = 1;
6265 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6267 t = build (MODIFY_EXPR, ptr_type_node, next_o, u);
6268 TREE_SIDE_EFFECTS (t) = 1;
6269 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6271 nint = current_function_args_info.arg_count[SH_ARG_INT];
6272 if (nint < 4)
6273 nint = 4 - nint;
6274 else
6275 nint = 0;
6276 u = fold (build (PLUS_EXPR, ptr_type_node, u,
6277 build_int_2 (UNITS_PER_WORD * nint, 0)));
6278 t = build (MODIFY_EXPR, ptr_type_node, next_o_limit, u);
6279 TREE_SIDE_EFFECTS (t) = 1;
6280 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6282 u = make_tree (ptr_type_node, nextarg);
6283 t = build (MODIFY_EXPR, ptr_type_node, next_stack, u);
6284 TREE_SIDE_EFFECTS (t) = 1;
6285 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6288 /* Implement `va_arg'. */
6291 sh_va_arg (tree valist, tree type)
6293 HOST_WIDE_INT size, rsize;
6294 tree tmp, pptr_type_node;
6295 rtx addr_rtx, r;
6296 rtx result_ptr, result = NULL_RTX;
6297 int pass_by_ref = MUST_PASS_IN_STACK (TYPE_MODE (type), type);
6298 rtx lab_over = NULL_RTX;
6300 size = int_size_in_bytes (type);
6301 rsize = (size + UNITS_PER_WORD - 1) & -UNITS_PER_WORD;
6302 pptr_type_node = build_pointer_type (ptr_type_node);
6304 if (pass_by_ref)
6305 type = build_pointer_type (type);
6307 if (! TARGET_SH5 && (TARGET_SH2E || TARGET_SH4)
6308 && ! (TARGET_HITACHI || sh_cfun_attr_renesas_p ()))
6310 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
6311 tree next_o, next_o_limit, next_fp, next_fp_limit, next_stack;
6312 int pass_as_float;
6313 rtx lab_false;
6315 f_next_o = TYPE_FIELDS (va_list_type_node);
6316 f_next_o_limit = TREE_CHAIN (f_next_o);
6317 f_next_fp = TREE_CHAIN (f_next_o_limit);
6318 f_next_fp_limit = TREE_CHAIN (f_next_fp);
6319 f_next_stack = TREE_CHAIN (f_next_fp_limit);
6321 next_o = build (COMPONENT_REF, TREE_TYPE (f_next_o), valist, f_next_o,
6322 NULL_TREE);
6323 next_o_limit = build (COMPONENT_REF, TREE_TYPE (f_next_o_limit),
6324 valist, f_next_o_limit, NULL_TREE);
6325 next_fp = build (COMPONENT_REF, TREE_TYPE (f_next_fp),
6326 valist, f_next_fp, NULL_TREE);
6327 next_fp_limit = build (COMPONENT_REF, TREE_TYPE (f_next_fp_limit),
6328 valist, f_next_fp_limit, NULL_TREE);
6329 next_stack = build (COMPONENT_REF, TREE_TYPE (f_next_stack),
6330 valist, f_next_stack, NULL_TREE);
6332 /* Structures with a single member with a distinct mode are passed
6333 like their member. This is relevant if the latter has a REAL_TYPE
6334 or COMPLEX_TYPE type. */
6335 if (TREE_CODE (type) == RECORD_TYPE
6336 && TYPE_FIELDS (type)
6337 && TREE_CODE (TYPE_FIELDS (type)) == FIELD_DECL
6338 && (TREE_CODE (TREE_TYPE (TYPE_FIELDS (type))) == REAL_TYPE
6339 || TREE_CODE (TREE_TYPE (TYPE_FIELDS (type))) == COMPLEX_TYPE)
6340 && TREE_CHAIN (TYPE_FIELDS (type)) == NULL_TREE)
6341 type = TREE_TYPE (TYPE_FIELDS (type));
6342 if (TARGET_SH4)
6344 pass_as_float = ((TREE_CODE (type) == REAL_TYPE && size <= 8)
6345 || (TREE_CODE (type) == COMPLEX_TYPE
6346 && TREE_CODE (TREE_TYPE (type)) == REAL_TYPE
6347 && size <= 16));
6349 else
6351 pass_as_float = (TREE_CODE (type) == REAL_TYPE && size == 4);
6354 addr_rtx = gen_reg_rtx (Pmode);
6355 lab_false = gen_label_rtx ();
6356 lab_over = gen_label_rtx ();
6358 tmp = make_tree (pptr_type_node, addr_rtx);
6359 valist = build1 (INDIRECT_REF, ptr_type_node, tmp);
6361 if (pass_as_float)
6363 int first_floatreg
6364 = current_function_args_info.arg_count[(int) SH_ARG_FLOAT];
6365 int n_floatregs = MAX (0, NPARM_REGS (SFmode) - first_floatreg);
6367 emit_cmp_and_jump_insns (expand_expr (next_fp, NULL_RTX, Pmode,
6368 EXPAND_NORMAL),
6369 expand_expr (next_fp_limit, NULL_RTX,
6370 Pmode, EXPAND_NORMAL),
6371 GE, const1_rtx, Pmode, 1, lab_false);
6373 if (TYPE_ALIGN (type) > BITS_PER_WORD
6374 || (((TREE_CODE (type) == REAL_TYPE && size == 8) || size == 16)
6375 && (n_floatregs & 1)))
6377 tmp = build (BIT_AND_EXPR, ptr_type_node, next_fp,
6378 build_int_2 (UNITS_PER_WORD, 0));
6379 tmp = build (PLUS_EXPR, ptr_type_node, next_fp, tmp);
6380 tmp = build (MODIFY_EXPR, ptr_type_node, next_fp, tmp);
6381 TREE_SIDE_EFFECTS (tmp) = 1;
6382 expand_expr (tmp, const0_rtx, VOIDmode, EXPAND_NORMAL);
6385 tmp = build1 (ADDR_EXPR, pptr_type_node, next_fp);
6386 r = expand_expr (tmp, addr_rtx, Pmode, EXPAND_NORMAL);
6387 if (r != addr_rtx)
6388 emit_move_insn (addr_rtx, r);
6390 #ifdef FUNCTION_ARG_SCmode_WART
6391 if (TYPE_MODE (type) == SCmode && TARGET_SH4 && TARGET_LITTLE_ENDIAN)
6393 rtx addr, real, imag, result_value, slot;
6394 tree subtype = TREE_TYPE (type);
6396 addr = std_expand_builtin_va_arg (valist, subtype);
6397 #ifdef POINTERS_EXTEND_UNSIGNED
6398 if (GET_MODE (addr) != Pmode)
6399 addr = convert_memory_address (Pmode, addr);
6400 #endif
6401 imag = gen_rtx_MEM (TYPE_MODE (type), addr);
6402 set_mem_alias_set (imag, get_varargs_alias_set ());
6404 addr = std_expand_builtin_va_arg (valist, subtype);
6405 #ifdef POINTERS_EXTEND_UNSIGNED
6406 if (GET_MODE (addr) != Pmode)
6407 addr = convert_memory_address (Pmode, addr);
6408 #endif
6409 real = gen_rtx_MEM (TYPE_MODE (type), addr);
6410 set_mem_alias_set (real, get_varargs_alias_set ());
6412 result_value = gen_rtx_CONCAT (SCmode, real, imag);
6413 /* ??? this interface is stupid - why require a pointer? */
6414 result = gen_reg_rtx (Pmode);
6415 slot = assign_stack_temp (SCmode, 8, 0);
6416 emit_move_insn (slot, result_value);
6417 emit_move_insn (result, XEXP (slot, 0));
6419 #endif /* FUNCTION_ARG_SCmode_WART */
6421 emit_jump_insn (gen_jump (lab_over));
6422 emit_barrier ();
6423 emit_label (lab_false);
6425 tmp = build1 (ADDR_EXPR, pptr_type_node, next_stack);
6426 r = expand_expr (tmp, addr_rtx, Pmode, EXPAND_NORMAL);
6427 if (r != addr_rtx)
6428 emit_move_insn (addr_rtx, r);
6430 else
6432 tmp = build (PLUS_EXPR, ptr_type_node, next_o,
6433 build_int_2 (rsize, 0));
6435 emit_cmp_and_jump_insns (expand_expr (tmp, NULL_RTX, Pmode,
6436 EXPAND_NORMAL),
6437 expand_expr (next_o_limit, NULL_RTX,
6438 Pmode, EXPAND_NORMAL),
6439 GT, const1_rtx, Pmode, 1, lab_false);
6441 tmp = build1 (ADDR_EXPR, pptr_type_node, next_o);
6442 r = expand_expr (tmp, addr_rtx, Pmode, EXPAND_NORMAL);
6443 if (r != addr_rtx)
6444 emit_move_insn (addr_rtx, r);
6446 emit_jump_insn (gen_jump (lab_over));
6447 emit_barrier ();
6448 emit_label (lab_false);
6450 if (size > 4 && ! TARGET_SH4)
6452 tmp = build (MODIFY_EXPR, ptr_type_node, next_o, next_o_limit);
6453 TREE_SIDE_EFFECTS (tmp) = 1;
6454 expand_expr (tmp, const0_rtx, VOIDmode, EXPAND_NORMAL);
6457 tmp = build1 (ADDR_EXPR, pptr_type_node, next_stack);
6458 r = expand_expr (tmp, addr_rtx, Pmode, EXPAND_NORMAL);
6459 if (r != addr_rtx)
6460 emit_move_insn (addr_rtx, r);
6463 if (! result)
6464 emit_label (lab_over);
6467 /* ??? In va-sh.h, there had been code to make values larger than
6468 size 8 indirect. This does not match the FUNCTION_ARG macros. */
6470 result_ptr = std_expand_builtin_va_arg (valist, type);
6471 if (result)
6473 emit_move_insn (result, result_ptr);
6474 emit_label (lab_over);
6476 else
6477 result = result_ptr;
6479 if (pass_by_ref)
6481 #ifdef POINTERS_EXTEND_UNSIGNED
6482 if (GET_MODE (addr) != Pmode)
6483 addr = convert_memory_address (Pmode, result);
6484 #endif
6485 result = gen_rtx_MEM (ptr_mode, force_reg (Pmode, result));
6486 set_mem_alias_set (result, get_varargs_alias_set ());
6488 /* ??? expand_builtin_va_arg will also set the alias set of the dereferenced
6489 argument to the varargs alias set. */
6490 return result;
6493 bool
6494 sh_promote_prototypes (tree type)
6496 if (TARGET_HITACHI)
6497 return 0;
6498 if (! type)
6499 return 1;
6500 return ! sh_attr_renesas_p (type);
6503 /* Define where to put the arguments to a function.
6504 Value is zero to push the argument on the stack,
6505 or a hard register in which to store the argument.
6507 MODE is the argument's machine mode.
6508 TYPE is the data type of the argument (as a tree).
6509 This is null for libcalls where that information may
6510 not be available.
6511 CUM is a variable of type CUMULATIVE_ARGS which gives info about
6512 the preceding args and about the function being called.
6513 NAMED is nonzero if this argument is a named parameter
6514 (otherwise it is an extra parameter matching an ellipsis).
6516 On SH the first args are normally in registers
6517 and the rest are pushed. Any arg that starts within the first
6518 NPARM_REGS words is at least partially passed in a register unless
6519 its data type forbids. */
6523 sh_function_arg (CUMULATIVE_ARGS *ca, enum machine_mode mode,
6524 tree type, int named)
6526 if (! TARGET_SH5 && mode == VOIDmode)
6527 return GEN_INT (ca->renesas_abi ? 1 : 0);
6529 if (! TARGET_SH5
6530 && PASS_IN_REG_P (*ca, mode, type)
6531 && (named || ! (TARGET_HITACHI || ca->renesas_abi)))
6533 int regno;
6535 if (mode == SCmode && TARGET_SH4 && TARGET_LITTLE_ENDIAN
6536 && (! FUNCTION_ARG_SCmode_WART || (ROUND_REG (*ca, mode) & 1)))
6538 rtx r1 = gen_rtx_EXPR_LIST (VOIDmode,
6539 gen_rtx_REG (SFmode,
6540 BASE_ARG_REG (mode)
6541 + (ROUND_REG (*ca, mode) ^ 1)),
6542 const0_rtx);
6543 rtx r2 = gen_rtx_EXPR_LIST (VOIDmode,
6544 gen_rtx_REG (SFmode,
6545 BASE_ARG_REG (mode)
6546 + ((ROUND_REG (*ca, mode) + 1) ^ 1)),
6547 GEN_INT (4));
6548 return gen_rtx_PARALLEL(SCmode, gen_rtvec(2, r1, r2));
6551 /* If the alignment of a DF value causes an SF register to be
6552 skipped, we will use that skipped register for the next SF
6553 value. */
6554 if ((TARGET_HITACHI || ca->renesas_abi)
6555 && ca->free_single_fp_reg
6556 && mode == SFmode)
6557 return gen_rtx_REG (mode, ca->free_single_fp_reg);
6559 regno = (BASE_ARG_REG (mode) + ROUND_REG (*ca, mode))
6560 ^ (mode == SFmode && TARGET_SH4
6561 && TARGET_LITTLE_ENDIAN != 0
6562 && ! TARGET_HITACHI && ! ca->renesas_abi);
6563 return gen_rtx_REG (mode, regno);
6567 if (TARGET_SH5)
6569 if (mode == VOIDmode && TARGET_SHCOMPACT)
6570 return GEN_INT (ca->call_cookie);
6572 /* The following test assumes unnamed arguments are promoted to
6573 DFmode. */
6574 if (mode == SFmode && ca->free_single_fp_reg)
6575 return SH5_PROTOTYPED_FLOAT_ARG (*ca, mode, ca->free_single_fp_reg);
6577 if ((GET_SH_ARG_CLASS (mode) == SH_ARG_FLOAT)
6578 && (named || ! ca->prototype_p)
6579 && ca->arg_count[(int) SH_ARG_FLOAT] < NPARM_REGS (SFmode))
6581 if (! ca->prototype_p && TARGET_SHMEDIA)
6582 return SH5_PROTOTYPELESS_FLOAT_ARG (*ca, mode);
6584 return SH5_PROTOTYPED_FLOAT_ARG (*ca, mode,
6585 FIRST_FP_PARM_REG
6586 + ca->arg_count[(int) SH_ARG_FLOAT]);
6589 if (ca->arg_count[(int) SH_ARG_INT] < NPARM_REGS (SImode)
6590 && (! TARGET_SHCOMPACT
6591 || (! SHCOMPACT_FORCE_ON_STACK (mode, type)
6592 && ! SH5_WOULD_BE_PARTIAL_NREGS (*ca, mode,
6593 type, named))))
6595 return gen_rtx_REG (mode, (FIRST_PARM_REG
6596 + ca->arg_count[(int) SH_ARG_INT]));
6599 return 0;
6602 return 0;
6605 /* Update the data in CUM to advance over an argument
6606 of mode MODE and data type TYPE.
6607 (TYPE is null for libcalls where that information may not be
6608 available.) */
6610 void
6611 sh_function_arg_advance (CUMULATIVE_ARGS *ca, enum machine_mode mode,
6612 tree type, int named)
6614 if (ca->force_mem)
6615 ca->force_mem = 0;
6616 else if (TARGET_SH5)
6618 tree type2 = (ca->byref && type
6619 ? TREE_TYPE (type)
6620 : type);
6621 enum machine_mode mode2 = (ca->byref && type
6622 ? TYPE_MODE (type2)
6623 : mode);
6624 int dwords = ((ca->byref
6625 ? ca->byref
6626 : mode2 == BLKmode
6627 ? int_size_in_bytes (type2)
6628 : GET_MODE_SIZE (mode2)) + 7) / 8;
6629 int numregs = MIN (dwords, NPARM_REGS (SImode)
6630 - ca->arg_count[(int) SH_ARG_INT]);
6632 if (numregs)
6634 ca->arg_count[(int) SH_ARG_INT] += numregs;
6635 if (TARGET_SHCOMPACT
6636 && SHCOMPACT_FORCE_ON_STACK (mode2, type2))
6638 ca->call_cookie
6639 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
6640 - numregs, 1);
6641 /* N.B. We want this also for outgoing. */
6642 ca->stack_regs += numregs;
6644 else if (ca->byref)
6646 if (! ca->outgoing)
6647 ca->stack_regs += numregs;
6648 ca->byref_regs += numregs;
6649 ca->byref = 0;
6651 ca->call_cookie
6652 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
6653 - numregs, 2);
6654 while (--numregs);
6655 ca->call_cookie
6656 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
6657 - 1, 1);
6659 else if (dwords > numregs)
6661 int pushregs = numregs;
6663 if (TARGET_SHCOMPACT)
6664 ca->stack_regs += numregs;
6665 while (pushregs < NPARM_REGS (SImode) - 1
6666 && (CALL_COOKIE_INT_REG_GET
6667 (ca->call_cookie,
6668 NPARM_REGS (SImode) - pushregs)
6669 == 1))
6671 ca->call_cookie
6672 &= ~ CALL_COOKIE_INT_REG (NPARM_REGS (SImode)
6673 - pushregs, 1);
6674 pushregs++;
6676 if (numregs == NPARM_REGS (SImode))
6677 ca->call_cookie
6678 |= CALL_COOKIE_INT_REG (0, 1)
6679 | CALL_COOKIE_STACKSEQ (numregs - 1);
6680 else
6681 ca->call_cookie
6682 |= CALL_COOKIE_STACKSEQ (numregs);
6685 if (GET_SH_ARG_CLASS (mode2) == SH_ARG_FLOAT
6686 && (named || ! ca->prototype_p))
6688 if (mode2 == SFmode && ca->free_single_fp_reg)
6689 ca->free_single_fp_reg = 0;
6690 else if (ca->arg_count[(int) SH_ARG_FLOAT]
6691 < NPARM_REGS (SFmode))
6693 int numfpregs
6694 = MIN ((GET_MODE_SIZE (mode2) + 7) / 8 * 2,
6695 NPARM_REGS (SFmode)
6696 - ca->arg_count[(int) SH_ARG_FLOAT]);
6698 ca->arg_count[(int) SH_ARG_FLOAT] += numfpregs;
6700 if (TARGET_SHCOMPACT && ! ca->prototype_p)
6702 if (ca->outgoing && numregs > 0)
6705 ca->call_cookie
6706 |= (CALL_COOKIE_INT_REG
6707 (ca->arg_count[(int) SH_ARG_INT]
6708 - numregs + ((numfpregs - 2) / 2),
6709 4 + (ca->arg_count[(int) SH_ARG_FLOAT]
6710 - numfpregs) / 2));
6712 while (numfpregs -= 2);
6714 else if (mode2 == SFmode && (named)
6715 && (ca->arg_count[(int) SH_ARG_FLOAT]
6716 < NPARM_REGS (SFmode)))
6717 ca->free_single_fp_reg
6718 = FIRST_FP_PARM_REG - numfpregs
6719 + ca->arg_count[(int) SH_ARG_FLOAT] + 1;
6722 return;
6725 if ((TARGET_HITACHI || ca->renesas_abi) && TARGET_FPU_DOUBLE)
6727 /* Note that we've used the skipped register. */
6728 if (mode == SFmode && ca->free_single_fp_reg)
6730 ca->free_single_fp_reg = 0;
6731 return;
6733 /* When we have a DF after an SF, there's an SF register that get
6734 skipped in order to align the DF value. We note this skipped
6735 register, because the next SF value will use it, and not the
6736 SF that follows the DF. */
6737 if (mode == DFmode
6738 && ROUND_REG (*ca, DFmode) != ROUND_REG (*ca, SFmode))
6740 ca->free_single_fp_reg = (ROUND_REG (*ca, SFmode)
6741 + BASE_ARG_REG (mode));
6745 if (! (TARGET_SH4 || ca->renesas_abi)
6746 || PASS_IN_REG_P (*ca, mode, type))
6747 (ca->arg_count[(int) GET_SH_ARG_CLASS (mode)]
6748 = (ROUND_REG (*ca, mode)
6749 + (mode == BLKmode
6750 ? ROUND_ADVANCE (int_size_in_bytes (type))
6751 : ROUND_ADVANCE (GET_MODE_SIZE (mode)))));
6754 /* The Renesas calling convention doesn't quite fit into this scheme since
6755 the address is passed like an invisible argument, but one that is always
6756 passed in memory. */
6757 static rtx
6758 sh_struct_value_rtx (tree fndecl, int incoming ATTRIBUTE_UNUSED)
6760 if (TARGET_HITACHI || sh_attr_renesas_p (fndecl))
6761 return 0;
6762 return gen_rtx_REG (Pmode, 2);
6765 /* Worker function for TARGET_RETURN_IN_MEMORY. */
6767 static bool
6768 sh_return_in_memory (tree type, tree fndecl)
6770 if (TARGET_SH5)
6772 if (TYPE_MODE (type) == BLKmode)
6773 return ((unsigned HOST_WIDE_INT) int_size_in_bytes (type)) > 8;
6774 else
6775 return GET_MODE_SIZE (TYPE_MODE (type)) > 8;
6777 else
6779 return (TYPE_MODE (type) == BLKmode
6780 || ((TARGET_HITACHI || sh_attr_renesas_p (fndecl))
6781 && TREE_CODE (type) == RECORD_TYPE));
6785 /* We actually emit the code in sh_expand_prologue. We used to use
6786 a static variable to flag that we need to emit this code, but that
6787 doesn't when inlining, when functions are deferred and then emitted
6788 later. Fortunately, we already have two flags that are part of struct
6789 function that tell if a function uses varargs or stdarg. */
6790 static void
6791 sh_setup_incoming_varargs (CUMULATIVE_ARGS *ca,
6792 enum machine_mode mode,
6793 tree type,
6794 int *pretend_arg_size,
6795 int second_time ATTRIBUTE_UNUSED)
6797 if (! current_function_stdarg)
6798 abort ();
6799 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl))
6801 int named_parm_regs, anon_parm_regs;
6803 named_parm_regs = (ROUND_REG (*ca, mode)
6804 + (mode == BLKmode
6805 ? ROUND_ADVANCE (int_size_in_bytes (type))
6806 : ROUND_ADVANCE (GET_MODE_SIZE (mode))));
6807 anon_parm_regs = NPARM_REGS (SImode) - named_parm_regs;
6808 if (anon_parm_regs > 0)
6809 *pretend_arg_size = anon_parm_regs * 4;
6813 static bool
6814 sh_strict_argument_naming (CUMULATIVE_ARGS *ca ATTRIBUTE_UNUSED)
6816 return TARGET_SH5;
6819 static bool
6820 sh_pretend_outgoing_varargs_named (CUMULATIVE_ARGS *ca)
6822 return ! (TARGET_HITACHI || ca->renesas_abi) && ! TARGET_SH5;
6826 /* Define the offset between two registers, one to be eliminated, and
6827 the other its replacement, at the start of a routine. */
6830 initial_elimination_offset (int from, int to)
6832 int regs_saved;
6833 int regs_saved_rounding = 0;
6834 int total_saved_regs_space;
6835 int total_auto_space;
6836 int save_flags = target_flags;
6837 int copy_flags;
6838 HARD_REG_SET live_regs_mask;
6840 shmedia_space_reserved_for_target_registers = false;
6841 regs_saved = calc_live_regs (&live_regs_mask);
6842 regs_saved += SHMEDIA_REGS_STACK_ADJUST ();
6844 if (shmedia_reserve_space_for_target_registers_p (regs_saved, &live_regs_mask))
6846 shmedia_space_reserved_for_target_registers = true;
6847 regs_saved += shmedia_target_regs_stack_adjust (&live_regs_mask);
6850 if (TARGET_SH5 && regs_saved % (STACK_BOUNDARY / BITS_PER_UNIT))
6851 regs_saved_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
6852 - regs_saved % (STACK_BOUNDARY / BITS_PER_UNIT));
6854 total_auto_space = rounded_frame_size (regs_saved) - regs_saved_rounding;
6855 copy_flags = target_flags;
6856 target_flags = save_flags;
6858 total_saved_regs_space = regs_saved + regs_saved_rounding;
6860 if (from == ARG_POINTER_REGNUM && to == FRAME_POINTER_REGNUM)
6861 return total_saved_regs_space + total_auto_space
6862 + current_function_args_info.byref_regs * 8;
6864 if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
6865 return total_saved_regs_space + total_auto_space
6866 + current_function_args_info.byref_regs * 8;
6868 /* Initial gap between fp and sp is 0. */
6869 if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
6870 return 0;
6872 if (from == RETURN_ADDRESS_POINTER_REGNUM
6873 && (to == FRAME_POINTER_REGNUM || to == STACK_POINTER_REGNUM))
6875 if (TARGET_SH5)
6877 int n = total_saved_regs_space;
6878 int pr_reg = TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG;
6879 save_schedule schedule;
6880 save_entry *entry;
6882 n += total_auto_space;
6884 /* If it wasn't saved, there's not much we can do. */
6885 if (! TEST_HARD_REG_BIT (live_regs_mask, pr_reg))
6886 return n;
6888 target_flags = copy_flags;
6890 sh5_schedule_saves (&live_regs_mask, &schedule, n);
6891 for (entry = &schedule.entries[1]; entry->mode != VOIDmode; entry++)
6892 if (entry->reg == pr_reg)
6894 target_flags = save_flags;
6895 return entry->offset;
6897 abort ();
6899 else
6900 return total_auto_space;
6903 abort ();
6906 /* Handle machine specific pragmas to be semi-compatible with Renesas
6907 compiler. */
6909 void
6910 sh_pr_interrupt (struct cpp_reader *pfile ATTRIBUTE_UNUSED)
6912 pragma_interrupt = 1;
6915 void
6916 sh_pr_trapa (struct cpp_reader *pfile ATTRIBUTE_UNUSED)
6918 pragma_interrupt = pragma_trapa = 1;
6921 void
6922 sh_pr_nosave_low_regs (struct cpp_reader *pfile ATTRIBUTE_UNUSED)
6924 pragma_nosave_low_regs = 1;
6927 /* Generate 'handle_interrupt' attribute for decls */
6929 static void
6930 sh_insert_attributes (tree node, tree *attributes)
6932 if (! pragma_interrupt
6933 || TREE_CODE (node) != FUNCTION_DECL)
6934 return;
6936 /* We are only interested in fields. */
6937 if (TREE_CODE_CLASS (TREE_CODE (node)) != 'd')
6938 return;
6940 /* Add a 'handle_interrupt' attribute. */
6941 * attributes = tree_cons (get_identifier ("interrupt_handler"), NULL, * attributes);
6943 return;
6946 /* Supported attributes:
6948 interrupt_handler -- specifies this function is an interrupt handler.
6950 sp_switch -- specifies an alternate stack for an interrupt handler
6951 to run on.
6953 trap_exit -- use a trapa to exit an interrupt function instead of
6954 an rte instruction.
6956 renesas -- use Renesas calling/layout conventions (functions and
6957 structures).
6961 const struct attribute_spec sh_attribute_table[] =
6963 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
6964 { "interrupt_handler", 0, 0, true, false, false, sh_handle_interrupt_handler_attribute },
6965 { "sp_switch", 1, 1, true, false, false, sh_handle_sp_switch_attribute },
6966 { "trap_exit", 1, 1, true, false, false, sh_handle_trap_exit_attribute },
6967 { "renesas", 0, 0, false, true, false, sh_handle_renesas_attribute },
6968 { NULL, 0, 0, false, false, false, NULL }
6971 /* Handle an "interrupt_handler" attribute; arguments as in
6972 struct attribute_spec.handler. */
6973 static tree
6974 sh_handle_interrupt_handler_attribute (tree *node, tree name,
6975 tree args ATTRIBUTE_UNUSED,
6976 int flags ATTRIBUTE_UNUSED,
6977 bool *no_add_attrs)
6979 if (TREE_CODE (*node) != FUNCTION_DECL)
6981 warning ("`%s' attribute only applies to functions",
6982 IDENTIFIER_POINTER (name));
6983 *no_add_attrs = true;
6985 else if (TARGET_SHCOMPACT)
6987 error ("attribute interrupt_handler is not compatible with -m5-compact");
6988 *no_add_attrs = true;
6991 return NULL_TREE;
6994 /* Handle an "sp_switch" attribute; arguments as in
6995 struct attribute_spec.handler. */
6996 static tree
6997 sh_handle_sp_switch_attribute (tree *node, tree name, tree args,
6998 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
7000 if (TREE_CODE (*node) != FUNCTION_DECL)
7002 warning ("`%s' attribute only applies to functions",
7003 IDENTIFIER_POINTER (name));
7004 *no_add_attrs = true;
7006 else if (!pragma_interrupt)
7008 /* The sp_switch attribute only has meaning for interrupt functions. */
7009 warning ("`%s' attribute only applies to interrupt functions",
7010 IDENTIFIER_POINTER (name));
7011 *no_add_attrs = true;
7013 else if (TREE_CODE (TREE_VALUE (args)) != STRING_CST)
7015 /* The argument must be a constant string. */
7016 warning ("`%s' attribute argument not a string constant",
7017 IDENTIFIER_POINTER (name));
7018 *no_add_attrs = true;
7020 else
7022 sp_switch = gen_rtx_SYMBOL_REF (VOIDmode,
7023 TREE_STRING_POINTER (TREE_VALUE (args)));
7026 return NULL_TREE;
7029 /* Handle an "trap_exit" attribute; arguments as in
7030 struct attribute_spec.handler. */
7031 static tree
7032 sh_handle_trap_exit_attribute (tree *node, tree name, tree args,
7033 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
7035 if (TREE_CODE (*node) != FUNCTION_DECL)
7037 warning ("`%s' attribute only applies to functions",
7038 IDENTIFIER_POINTER (name));
7039 *no_add_attrs = true;
7041 else if (!pragma_interrupt)
7043 /* The trap_exit attribute only has meaning for interrupt functions. */
7044 warning ("`%s' attribute only applies to interrupt functions",
7045 IDENTIFIER_POINTER (name));
7046 *no_add_attrs = true;
7048 else if (TREE_CODE (TREE_VALUE (args)) != INTEGER_CST)
7050 /* The argument must be a constant integer. */
7051 warning ("`%s' attribute argument not an integer constant",
7052 IDENTIFIER_POINTER (name));
7053 *no_add_attrs = true;
7055 else
7057 trap_exit = TREE_INT_CST_LOW (TREE_VALUE (args));
7060 return NULL_TREE;
7063 static tree
7064 sh_handle_renesas_attribute (tree *node ATTRIBUTE_UNUSED,
7065 tree name ATTRIBUTE_UNUSED,
7066 tree args ATTRIBUTE_UNUSED,
7067 int flags ATTRIBUTE_UNUSED,
7068 bool *no_add_attrs ATTRIBUTE_UNUSED)
7070 return NULL_TREE;
7073 /* True if __attribute__((renesas)) or -mrenesas. */
7075 sh_attr_renesas_p (tree td)
7077 if (TARGET_HITACHI)
7078 return 1;
7079 if (td == 0)
7080 return 0;
7081 if (DECL_P (td))
7082 td = TREE_TYPE (td);
7083 return (lookup_attribute ("renesas", TYPE_ATTRIBUTES (td))
7084 != NULL_TREE);
7087 /* True if __attribute__((renesas)) or -mrenesas, for the current
7088 function. */
7090 sh_cfun_attr_renesas_p (void)
7092 return sh_attr_renesas_p (current_function_decl);
7096 sh_cfun_interrupt_handler_p (void)
7098 return (lookup_attribute ("interrupt_handler",
7099 DECL_ATTRIBUTES (current_function_decl))
7100 != NULL_TREE);
7103 /* ??? target_switches in toplev.c is static, hence we have to duplicate it. */
7104 static const struct
7106 const char *const name;
7107 const int value;
7108 const char *const description;
7110 sh_target_switches[] = TARGET_SWITCHES;
7111 #define target_switches sh_target_switches
7113 /* Like default_pch_valid_p, but take flag_mask into account. */
7114 const char *
7115 sh_pch_valid_p (const void *data_p, size_t len)
7117 const char *data = (const char *)data_p;
7118 const char *flag_that_differs = NULL;
7119 size_t i;
7120 int old_flags;
7121 int flag_mask
7122 = (SH1_BIT | SH2_BIT | SH3_BIT | SH_E_BIT | HARD_SH4_BIT | FPU_SINGLE_BIT
7123 | SH4_BIT | HITACHI_BIT | LITTLE_ENDIAN_BIT);
7125 /* -fpic and -fpie also usually make a PCH invalid. */
7126 if (data[0] != flag_pic)
7127 return _("created and used with different settings of -fpic");
7128 if (data[1] != flag_pie)
7129 return _("created and used with different settings of -fpie");
7130 data += 2;
7132 /* Check target_flags. */
7133 memcpy (&old_flags, data, sizeof (target_flags));
7134 if (((old_flags ^ target_flags) & flag_mask) != 0)
7136 for (i = 0; i < ARRAY_SIZE (target_switches); i++)
7138 int bits;
7140 bits = target_switches[i].value;
7141 if (bits < 0)
7142 bits = -bits;
7143 bits &= flag_mask;
7144 if ((target_flags & bits) != (old_flags & bits))
7146 flag_that_differs = target_switches[i].name;
7147 goto make_message;
7150 abort ();
7152 data += sizeof (target_flags);
7153 len -= sizeof (target_flags);
7155 /* Check string options. */
7156 #ifdef TARGET_OPTIONS
7157 for (i = 0; i < ARRAY_SIZE (target_options); i++)
7159 const char *str = *target_options[i].variable;
7160 size_t l;
7161 if (! str)
7162 str = "";
7163 l = strlen (str) + 1;
7164 if (len < l || memcmp (data, str, l) != 0)
7166 flag_that_differs = target_options[i].prefix;
7167 goto make_message;
7169 data += l;
7170 len -= l;
7172 #endif
7174 return NULL;
7176 make_message:
7178 char *r;
7179 asprintf (&r, _("created and used with differing settings of `-m%s'"),
7180 flag_that_differs);
7181 if (r == NULL)
7182 return _("out of memory");
7183 return r;
7187 /* Predicates used by the templates. */
7189 /* Returns 1 if OP is MACL, MACH or PR. The input must be a REG rtx.
7190 Used only in general_movsrc_operand. */
7193 system_reg_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
7195 switch (REGNO (op))
7197 case PR_REG:
7198 case MACL_REG:
7199 case MACH_REG:
7200 return 1;
7202 return 0;
7205 /* Returns 1 if OP can be source of a simple move operation.
7206 Same as general_operand, but a LABEL_REF is valid, PRE_DEC is
7207 invalid as are subregs of system registers. */
7210 general_movsrc_operand (rtx op, enum machine_mode mode)
7212 if (GET_CODE (op) == MEM)
7214 rtx inside = XEXP (op, 0);
7215 if (GET_CODE (inside) == CONST)
7216 inside = XEXP (inside, 0);
7218 if (GET_CODE (inside) == LABEL_REF)
7219 return 1;
7221 if (GET_CODE (inside) == PLUS
7222 && GET_CODE (XEXP (inside, 0)) == LABEL_REF
7223 && GET_CODE (XEXP (inside, 1)) == CONST_INT)
7224 return 1;
7226 /* Only post inc allowed. */
7227 if (GET_CODE (inside) == PRE_DEC)
7228 return 0;
7231 if ((mode == QImode || mode == HImode)
7232 && (GET_CODE (op) == SUBREG
7233 && GET_CODE (XEXP (op, 0)) == REG
7234 && system_reg_operand (XEXP (op, 0), mode)))
7235 return 0;
7237 return general_operand (op, mode);
7240 /* Returns 1 if OP can be a destination of a move.
7241 Same as general_operand, but no preinc allowed. */
7244 general_movdst_operand (rtx op, enum machine_mode mode)
7246 /* Only pre dec allowed. */
7247 if (GET_CODE (op) == MEM && GET_CODE (XEXP (op, 0)) == POST_INC)
7248 return 0;
7250 return general_operand (op, mode);
7253 /* Returns 1 if OP is a normal arithmetic register. */
7256 arith_reg_operand (rtx op, enum machine_mode mode)
7258 if (register_operand (op, mode))
7260 int regno;
7262 if (GET_CODE (op) == REG)
7263 regno = REGNO (op);
7264 else if (GET_CODE (op) == SUBREG && GET_CODE (SUBREG_REG (op)) == REG)
7265 regno = REGNO (SUBREG_REG (op));
7266 else
7267 return 1;
7269 return (regno != T_REG && regno != PR_REG
7270 && ! TARGET_REGISTER_P (regno)
7271 && (regno != FPUL_REG || TARGET_SH4)
7272 && regno != MACH_REG && regno != MACL_REG);
7274 return 0;
7277 /* Like above, but for DImode destinations: forbid paradoxical DImode subregs,
7278 because this would lead to missing sign extensions when truncating from
7279 DImode to SImode. */
7281 arith_reg_dest (rtx op, enum machine_mode mode)
7283 if (mode == DImode && GET_CODE (op) == SUBREG
7284 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (op))) < 8)
7285 return 0;
7286 return arith_reg_operand (op, mode);
7290 int_gpr_dest (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
7292 enum machine_mode op_mode = GET_MODE (op);
7294 if (GET_MODE_CLASS (op_mode) != MODE_INT
7295 || GET_MODE_SIZE (op_mode) >= UNITS_PER_WORD)
7296 return 0;
7297 if (! reload_completed)
7298 return 0;
7299 return true_regnum (op) <= LAST_GENERAL_REG;
7303 fp_arith_reg_operand (rtx op, enum machine_mode mode)
7305 if (register_operand (op, mode))
7307 int regno;
7309 if (GET_CODE (op) == REG)
7310 regno = REGNO (op);
7311 else if (GET_CODE (op) == SUBREG && GET_CODE (SUBREG_REG (op)) == REG)
7312 regno = REGNO (SUBREG_REG (op));
7313 else
7314 return 1;
7316 return (regno >= FIRST_PSEUDO_REGISTER
7317 || FP_REGISTER_P (regno));
7319 return 0;
7322 /* Returns 1 if OP is a valid source operand for an arithmetic insn. */
7325 arith_operand (rtx op, enum machine_mode mode)
7327 if (arith_reg_operand (op, mode))
7328 return 1;
7330 if (TARGET_SHMEDIA)
7332 /* FIXME: We should be checking whether the CONST_INT fits in a
7333 CONST_OK_FOR_I16 here, but this causes reload_cse to crash when
7334 attempting to transform a sequence of two 64-bit sets of the
7335 same register from literal constants into a set and an add,
7336 when the difference is too wide for an add. */
7337 if (GET_CODE (op) == CONST_INT
7338 || EXTRA_CONSTRAINT_C16 (op))
7339 return 1;
7340 else
7341 return 0;
7343 else if (GET_CODE (op) == CONST_INT && CONST_OK_FOR_I08 (INTVAL (op)))
7344 return 1;
7346 return 0;
7349 /* Returns 1 if OP is a valid source operand for a compare insn. */
7352 arith_reg_or_0_operand (rtx op, enum machine_mode mode)
7354 if (arith_reg_operand (op, mode))
7355 return 1;
7357 if (EXTRA_CONSTRAINT_Z (op))
7358 return 1;
7360 return 0;
7363 /* Return 1 if OP is a valid source operand for an SHmedia operation
7364 that takes either a register or a 6-bit immediate. */
7367 shmedia_6bit_operand (rtx op, enum machine_mode mode)
7369 return (arith_reg_operand (op, mode)
7370 || (GET_CODE (op) == CONST_INT && CONST_OK_FOR_I06 (INTVAL (op))));
7373 /* Returns 1 if OP is a valid source operand for a logical operation. */
7376 logical_operand (rtx op, enum machine_mode mode)
7378 if (arith_reg_operand (op, mode))
7379 return 1;
7381 if (TARGET_SHMEDIA)
7383 if (GET_CODE (op) == CONST_INT && CONST_OK_FOR_I10 (INTVAL (op)))
7384 return 1;
7385 else
7386 return 0;
7388 else if (GET_CODE (op) == CONST_INT && CONST_OK_FOR_K08 (INTVAL (op)))
7389 return 1;
7391 return 0;
7395 and_operand (rtx op, enum machine_mode mode)
7397 if (logical_operand (op, mode))
7398 return 1;
7400 /* Check mshflo.l / mshflhi.l opportunities. */
7401 if (TARGET_SHMEDIA
7402 && mode == DImode
7403 && GET_CODE (op) == CONST_INT
7404 && CONST_OK_FOR_J16 (INTVAL (op)))
7405 return 1;
7407 return 0;
7410 /* Nonzero if OP is a floating point value with value 0.0. */
7413 fp_zero_operand (rtx op)
7415 REAL_VALUE_TYPE r;
7417 if (GET_MODE (op) != SFmode)
7418 return 0;
7420 REAL_VALUE_FROM_CONST_DOUBLE (r, op);
7421 return REAL_VALUES_EQUAL (r, dconst0) && ! REAL_VALUE_MINUS_ZERO (r);
7424 /* Nonzero if OP is a floating point value with value 1.0. */
7427 fp_one_operand (rtx op)
7429 REAL_VALUE_TYPE r;
7431 if (GET_MODE (op) != SFmode)
7432 return 0;
7434 REAL_VALUE_FROM_CONST_DOUBLE (r, op);
7435 return REAL_VALUES_EQUAL (r, dconst1);
7438 /* For -m4 and -m4-single-only, mode switching is used. If we are
7439 compiling without -mfmovd, movsf_ie isn't taken into account for
7440 mode switching. We could check in machine_dependent_reorg for
7441 cases where we know we are in single precision mode, but there is
7442 interface to find that out during reload, so we must avoid
7443 choosing an fldi alternative during reload and thus failing to
7444 allocate a scratch register for the constant loading. */
7446 fldi_ok (void)
7448 return ! TARGET_SH4 || TARGET_FMOVD || reload_completed;
7452 tertiary_reload_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
7454 enum rtx_code code = GET_CODE (op);
7455 return code == MEM || (TARGET_SH4 && code == CONST_DOUBLE);
7459 fpscr_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
7461 return (GET_CODE (op) == REG && REGNO (op) == FPSCR_REG
7462 && GET_MODE (op) == PSImode);
7466 fpul_operand (rtx op, enum machine_mode mode)
7468 if (TARGET_SHMEDIA)
7469 return fp_arith_reg_operand (op, mode);
7471 return (GET_CODE (op) == REG
7472 && (REGNO (op) == FPUL_REG || REGNO (op) >= FIRST_PSEUDO_REGISTER)
7473 && GET_MODE (op) == mode);
7477 symbol_ref_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
7479 return (GET_CODE (op) == SYMBOL_REF);
7482 /* Return the TLS type for TLS symbols, 0 for otherwise. */
7484 tls_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
7486 if (GET_CODE (op) != SYMBOL_REF)
7487 return 0;
7488 return SYMBOL_REF_TLS_MODEL (op);
7492 commutative_float_operator (rtx op, enum machine_mode mode)
7494 if (GET_MODE (op) != mode)
7495 return 0;
7496 switch (GET_CODE (op))
7498 case PLUS:
7499 case MULT:
7500 return 1;
7501 default:
7502 break;
7504 return 0;
7508 noncommutative_float_operator (rtx op, enum machine_mode mode)
7510 if (GET_MODE (op) != mode)
7511 return 0;
7512 switch (GET_CODE (op))
7514 case MINUS:
7515 case DIV:
7516 return 1;
7517 default:
7518 break;
7520 return 0;
7524 unary_float_operator (rtx op, enum machine_mode mode)
7526 if (GET_MODE (op) != mode)
7527 return 0;
7528 switch (GET_CODE (op))
7530 case ABS:
7531 case NEG:
7532 case SQRT:
7533 return 1;
7534 default:
7535 break;
7537 return 0;
7541 binary_float_operator (rtx op, enum machine_mode mode)
7543 if (GET_MODE (op) != mode)
7544 return 0;
7545 switch (GET_CODE (op))
7547 case PLUS:
7548 case MINUS:
7549 case MULT:
7550 case DIV:
7551 return 1;
7552 default:
7553 break;
7555 return 0;
7559 binary_logical_operator (rtx op, enum machine_mode mode)
7561 if (GET_MODE (op) != mode)
7562 return 0;
7563 switch (GET_CODE (op))
7565 case IOR:
7566 case AND:
7567 case XOR:
7568 return 1;
7569 default:
7570 break;
7572 return 0;
7576 equality_comparison_operator (rtx op, enum machine_mode mode)
7578 return ((mode == VOIDmode || GET_MODE (op) == mode)
7579 && (GET_CODE (op) == EQ || GET_CODE (op) == NE));
7583 greater_comparison_operator (rtx op, enum machine_mode mode)
7585 if (mode != VOIDmode && GET_MODE (op) == mode)
7586 return 0;
7587 switch (GET_CODE (op))
7589 case GT:
7590 case GE:
7591 case GTU:
7592 case GEU:
7593 return 1;
7594 default:
7595 return 0;
7600 less_comparison_operator (rtx op, enum machine_mode mode)
7602 if (mode != VOIDmode && GET_MODE (op) == mode)
7603 return 0;
7604 switch (GET_CODE (op))
7606 case LT:
7607 case LE:
7608 case LTU:
7609 case LEU:
7610 return 1;
7611 default:
7612 return 0;
7616 /* Accept pseudos and branch target registers. */
7618 target_reg_operand (rtx op, enum machine_mode mode)
7620 if (mode != DImode
7621 || GET_MODE (op) != DImode)
7622 return 0;
7624 if (GET_CODE (op) == SUBREG)
7625 op = XEXP (op, 0);
7627 if (GET_CODE (op) != REG)
7628 return 0;
7630 /* We must protect ourselves from matching pseudos that are virtual
7631 register, because they will eventually be replaced with hardware
7632 registers that aren't branch-target registers. */
7633 if (REGNO (op) > LAST_VIRTUAL_REGISTER
7634 || TARGET_REGISTER_P (REGNO (op)))
7635 return 1;
7637 return 0;
7640 /* Same as target_reg_operand, except that label_refs and symbol_refs
7641 are accepted before reload. */
7643 target_operand (rtx op, enum machine_mode mode)
7645 if (mode != DImode)
7646 return 0;
7648 if ((GET_MODE (op) == DImode || GET_MODE (op) == VOIDmode)
7649 && EXTRA_CONSTRAINT_Csy (op))
7650 return ! reload_completed;
7652 return target_reg_operand (op, mode);
7656 mextr_bit_offset (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
7658 HOST_WIDE_INT i;
7660 if (GET_CODE (op) != CONST_INT)
7661 return 0;
7662 i = INTVAL (op);
7663 return i >= 1 * 8 && i <= 7 * 8 && (i & 7) == 0;
7667 extend_reg_operand (rtx op, enum machine_mode mode)
7669 return (GET_CODE (op) == TRUNCATE
7670 ? arith_operand
7671 : arith_reg_operand) (op, mode);
7675 trunc_hi_operand (rtx op, enum machine_mode mode)
7677 enum machine_mode op_mode = GET_MODE (op);
7679 if (op_mode != SImode && op_mode != DImode
7680 && op_mode != V4HImode && op_mode != V2SImode)
7681 return 0;
7682 return extend_reg_operand (op, mode);
7686 extend_reg_or_0_operand (rtx op, enum machine_mode mode)
7688 return (GET_CODE (op) == TRUNCATE
7689 ? arith_operand
7690 : arith_reg_or_0_operand) (op, mode);
7694 general_extend_operand (rtx op, enum machine_mode mode)
7696 return (GET_CODE (op) == TRUNCATE
7697 ? arith_operand
7698 : nonimmediate_operand) (op, mode);
7702 inqhi_operand (rtx op, enum machine_mode mode)
7704 if (GET_CODE (op) != TRUNCATE || mode != GET_MODE (op))
7705 return 0;
7706 op = XEXP (op, 0);
7707 /* Can't use true_regnum here because copy_cost wants to know about
7708 SECONDARY_INPUT_RELOAD_CLASS. */
7709 return GET_CODE (op) == REG && FP_REGISTER_P (REGNO (op));
7713 sh_rep_vec (rtx v, enum machine_mode mode)
7715 int i;
7716 rtx x, y;
7718 if ((GET_CODE (v) != CONST_VECTOR && GET_CODE (v) != PARALLEL)
7719 || (GET_MODE (v) != mode && mode != VOIDmode))
7720 return 0;
7721 i = XVECLEN (v, 0) - 2;
7722 x = XVECEXP (v, 0, i + 1);
7723 if (GET_MODE_UNIT_SIZE (mode) == 1)
7725 y = XVECEXP (v, 0, i);
7726 for (i -= 2; i >= 0; i -= 2)
7727 if (! rtx_equal_p (XVECEXP (v, 0, i + 1), x)
7728 || ! rtx_equal_p (XVECEXP (v, 0, i), y))
7729 return 0;
7731 else
7732 for (; i >= 0; i--)
7733 if (XVECEXP (v, 0, i) != x)
7734 return 0;
7735 return 1;
7738 /* Determine if V is a constant vector matching MODE with only one element
7739 that is not a sign extension. Two byte-sized elements count as one. */
7741 sh_1el_vec (rtx v, enum machine_mode mode)
7743 int unit_size;
7744 int i, last, least, sign_ix;
7745 rtx sign;
7747 if (GET_CODE (v) != CONST_VECTOR
7748 || (GET_MODE (v) != mode && mode != VOIDmode))
7749 return 0;
7750 /* Determine numbers of last and of least significant elements. */
7751 last = XVECLEN (v, 0) - 1;
7752 least = TARGET_LITTLE_ENDIAN ? 0 : last;
7753 if (GET_CODE (XVECEXP (v, 0, least)) != CONST_INT)
7754 return 0;
7755 sign_ix = least;
7756 if (GET_MODE_UNIT_SIZE (mode) == 1)
7757 sign_ix = TARGET_LITTLE_ENDIAN ? 1 : last - 1;
7758 if (GET_CODE (XVECEXP (v, 0, sign_ix)) != CONST_INT)
7759 return 0;
7760 unit_size = GET_MODE_UNIT_SIZE (GET_MODE (v));
7761 sign = (INTVAL (XVECEXP (v, 0, sign_ix)) >> (unit_size * BITS_PER_UNIT - 1)
7762 ? constm1_rtx : const0_rtx);
7763 i = XVECLEN (v, 0) - 1;
7765 if (i != least && i != sign_ix && XVECEXP (v, 0, i) != sign)
7766 return 0;
7767 while (--i);
7768 return 1;
7772 sh_const_vec (rtx v, enum machine_mode mode)
7774 int i;
7776 if (GET_CODE (v) != CONST_VECTOR
7777 || (GET_MODE (v) != mode && mode != VOIDmode))
7778 return 0;
7779 i = XVECLEN (v, 0) - 1;
7780 for (; i >= 0; i--)
7781 if (GET_CODE (XVECEXP (v, 0, i)) != CONST_INT)
7782 return 0;
7783 return 1;
7786 /* Return the destination address of a branch. */
7788 static int
7789 branch_dest (rtx branch)
7791 rtx dest = SET_SRC (PATTERN (branch));
7792 int dest_uid;
7794 if (GET_CODE (dest) == IF_THEN_ELSE)
7795 dest = XEXP (dest, 1);
7796 dest = XEXP (dest, 0);
7797 dest_uid = INSN_UID (dest);
7798 return INSN_ADDRESSES (dest_uid);
7801 /* Return nonzero if REG is not used after INSN.
7802 We assume REG is a reload reg, and therefore does
7803 not live past labels. It may live past calls or jumps though. */
7805 reg_unused_after (rtx reg, rtx insn)
7807 enum rtx_code code;
7808 rtx set;
7810 /* If the reg is set by this instruction, then it is safe for our
7811 case. Disregard the case where this is a store to memory, since
7812 we are checking a register used in the store address. */
7813 set = single_set (insn);
7814 if (set && GET_CODE (SET_DEST (set)) != MEM
7815 && reg_overlap_mentioned_p (reg, SET_DEST (set)))
7816 return 1;
7818 while ((insn = NEXT_INSN (insn)))
7820 rtx set;
7821 if (!INSN_P (insn))
7822 continue;
7824 code = GET_CODE (insn);
7826 #if 0
7827 /* If this is a label that existed before reload, then the register
7828 if dead here. However, if this is a label added by reorg, then
7829 the register may still be live here. We can't tell the difference,
7830 so we just ignore labels completely. */
7831 if (code == CODE_LABEL)
7832 return 1;
7833 /* else */
7834 #endif
7836 if (code == JUMP_INSN)
7837 return 0;
7839 /* If this is a sequence, we must handle them all at once.
7840 We could have for instance a call that sets the target register,
7841 and an insn in a delay slot that uses the register. In this case,
7842 we must return 0. */
7843 else if (code == INSN && GET_CODE (PATTERN (insn)) == SEQUENCE)
7845 int i;
7846 int retval = 0;
7848 for (i = 0; i < XVECLEN (PATTERN (insn), 0); i++)
7850 rtx this_insn = XVECEXP (PATTERN (insn), 0, i);
7851 rtx set = single_set (this_insn);
7853 if (GET_CODE (this_insn) == CALL_INSN)
7854 code = CALL_INSN;
7855 else if (GET_CODE (this_insn) == JUMP_INSN)
7857 if (INSN_ANNULLED_BRANCH_P (this_insn))
7858 return 0;
7859 code = JUMP_INSN;
7862 if (set && reg_overlap_mentioned_p (reg, SET_SRC (set)))
7863 return 0;
7864 if (set && reg_overlap_mentioned_p (reg, SET_DEST (set)))
7866 if (GET_CODE (SET_DEST (set)) != MEM)
7867 retval = 1;
7868 else
7869 return 0;
7871 if (set == 0
7872 && reg_overlap_mentioned_p (reg, PATTERN (this_insn)))
7873 return 0;
7875 if (retval == 1)
7876 return 1;
7877 else if (code == JUMP_INSN)
7878 return 0;
7881 set = single_set (insn);
7882 if (set && reg_overlap_mentioned_p (reg, SET_SRC (set)))
7883 return 0;
7884 if (set && reg_overlap_mentioned_p (reg, SET_DEST (set)))
7885 return GET_CODE (SET_DEST (set)) != MEM;
7886 if (set == 0 && reg_overlap_mentioned_p (reg, PATTERN (insn)))
7887 return 0;
7889 if (code == CALL_INSN && call_used_regs[REGNO (reg)])
7890 return 1;
7892 return 1;
7895 #include "ggc.h"
7897 static GTY(()) rtx fpscr_rtx;
7899 get_fpscr_rtx (void)
7901 if (! fpscr_rtx)
7903 fpscr_rtx = gen_rtx_REG (PSImode, FPSCR_REG);
7904 REG_USERVAR_P (fpscr_rtx) = 1;
7905 mark_user_reg (fpscr_rtx);
7907 if (! reload_completed || mdep_reorg_phase != SH_AFTER_MDEP_REORG)
7908 mark_user_reg (fpscr_rtx);
7909 return fpscr_rtx;
7912 void
7913 emit_sf_insn (rtx pat)
7915 emit_insn (pat);
7918 void
7919 emit_df_insn (rtx pat)
7921 emit_insn (pat);
7924 void
7925 expand_sf_unop (rtx (*fun) (rtx, rtx, rtx), rtx *operands)
7927 emit_sf_insn ((*fun) (operands[0], operands[1], get_fpscr_rtx ()));
7930 void
7931 expand_sf_binop (rtx (*fun) (rtx, rtx, rtx, rtx), rtx *operands)
7933 emit_sf_insn ((*fun) (operands[0], operands[1], operands[2],
7934 get_fpscr_rtx ()));
7937 void
7938 expand_df_unop (rtx (*fun) (rtx, rtx, rtx), rtx *operands)
7940 emit_df_insn ((*fun) (operands[0], operands[1], get_fpscr_rtx ()));
7943 void
7944 expand_df_binop (rtx (*fun) (rtx, rtx, rtx, rtx), rtx *operands)
7946 emit_df_insn ((*fun) (operands[0], operands[1], operands[2],
7947 get_fpscr_rtx ()));
7950 /* ??? gcc does flow analysis strictly after common subexpression
7951 elimination. As a result, common subexpression elimination fails
7952 when there are some intervening statements setting the same register.
7953 If we did nothing about this, this would hurt the precision switching
7954 for SH4 badly. There is some cse after reload, but it is unable to
7955 undo the extra register pressure from the unused instructions, and
7956 it cannot remove auto-increment loads.
7958 A C code example that shows this flow/cse weakness for (at least) SH
7959 and sparc (as of gcc ss-970706) is this:
7961 double
7962 f(double a)
7964 double d;
7965 d = 0.1;
7966 a += d;
7967 d = 1.1;
7968 d = 0.1;
7969 a *= d;
7970 return a;
7973 So we add another pass before common subexpression elimination, to
7974 remove assignments that are dead due to a following assignment in the
7975 same basic block. */
7977 static void
7978 mark_use (rtx x, rtx *reg_set_block)
7980 enum rtx_code code;
7982 if (! x)
7983 return;
7984 code = GET_CODE (x);
7985 switch (code)
7987 case REG:
7989 int regno = REGNO (x);
7990 int nregs = (regno < FIRST_PSEUDO_REGISTER
7991 ? HARD_REGNO_NREGS (regno, GET_MODE (x))
7992 : 1);
7995 reg_set_block[regno + nregs - 1] = 0;
7997 while (--nregs);
7998 break;
8000 case SET:
8002 rtx dest = SET_DEST (x);
8004 if (GET_CODE (dest) == SUBREG)
8005 dest = SUBREG_REG (dest);
8006 if (GET_CODE (dest) != REG)
8007 mark_use (dest, reg_set_block);
8008 mark_use (SET_SRC (x), reg_set_block);
8009 break;
8011 case CLOBBER:
8012 break;
8013 default:
8015 const char *fmt = GET_RTX_FORMAT (code);
8016 int i, j;
8017 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
8019 if (fmt[i] == 'e')
8020 mark_use (XEXP (x, i), reg_set_block);
8021 else if (fmt[i] == 'E')
8022 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
8023 mark_use (XVECEXP (x, i, j), reg_set_block);
8025 break;
8030 static rtx get_free_reg (HARD_REG_SET);
8032 /* This function returns a register to use to load the address to load
8033 the fpscr from. Currently it always returns r1 or r7, but when we are
8034 able to use pseudo registers after combine, or have a better mechanism
8035 for choosing a register, it should be done here. */
8036 /* REGS_LIVE is the liveness information for the point for which we
8037 need this allocation. In some bare-bones exit blocks, r1 is live at the
8038 start. We can even have all of r0..r3 being live:
8039 __complex__ long long f (double d) { if (d == 0) return 2; else return 3; }
8040 INSN before which new insns are placed with will clobber the register
8041 we return. If a basic block consists only of setting the return value
8042 register to a pseudo and using that register, the return value is not
8043 live before or after this block, yet we we'll insert our insns right in
8044 the middle. */
8046 static rtx
8047 get_free_reg (HARD_REG_SET regs_live)
8049 if (! TEST_HARD_REG_BIT (regs_live, 1))
8050 return gen_rtx_REG (Pmode, 1);
8052 /* Hard reg 1 is live; since this is a SMALL_REGISTER_CLASSES target,
8053 there shouldn't be anything but a jump before the function end. */
8054 if (! TEST_HARD_REG_BIT (regs_live, 7))
8055 return gen_rtx_REG (Pmode, 7);
8057 abort ();
8060 /* This function will set the fpscr from memory.
8061 MODE is the mode we are setting it to. */
8062 void
8063 fpscr_set_from_mem (int mode, HARD_REG_SET regs_live)
8065 enum attr_fp_mode fp_mode = mode;
8066 rtx addr_reg = get_free_reg (regs_live);
8068 if (fp_mode == (enum attr_fp_mode) ACTUAL_NORMAL_MODE (FP_MODE))
8069 emit_insn (gen_fpu_switch1 (addr_reg));
8070 else
8071 emit_insn (gen_fpu_switch0 (addr_reg));
8074 /* Is the given character a logical line separator for the assembler? */
8075 #ifndef IS_ASM_LOGICAL_LINE_SEPARATOR
8076 #define IS_ASM_LOGICAL_LINE_SEPARATOR(C) ((C) == ';')
8077 #endif
8080 sh_insn_length_adjustment (rtx insn)
8082 /* Instructions with unfilled delay slots take up an extra two bytes for
8083 the nop in the delay slot. */
8084 if (((GET_CODE (insn) == INSN
8085 && GET_CODE (PATTERN (insn)) != USE
8086 && GET_CODE (PATTERN (insn)) != CLOBBER)
8087 || GET_CODE (insn) == CALL_INSN
8088 || (GET_CODE (insn) == JUMP_INSN
8089 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC
8090 && GET_CODE (PATTERN (insn)) != ADDR_VEC))
8091 && GET_CODE (PATTERN (NEXT_INSN (PREV_INSN (insn)))) != SEQUENCE
8092 && get_attr_needs_delay_slot (insn) == NEEDS_DELAY_SLOT_YES)
8093 return 2;
8095 /* SH2e has a bug that prevents the use of annulled branches, so if
8096 the delay slot is not filled, we'll have to put a NOP in it. */
8097 if (sh_cpu == CPU_SH2E
8098 && GET_CODE (insn) == JUMP_INSN
8099 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC
8100 && GET_CODE (PATTERN (insn)) != ADDR_VEC
8101 && get_attr_type (insn) == TYPE_CBRANCH
8102 && GET_CODE (PATTERN (NEXT_INSN (PREV_INSN (insn)))) != SEQUENCE)
8103 return 2;
8105 /* sh-dsp parallel processing insn take four bytes instead of two. */
8107 if (GET_CODE (insn) == INSN)
8109 int sum = 0;
8110 rtx body = PATTERN (insn);
8111 const char *template;
8112 char c;
8113 int maybe_label = 1;
8115 if (GET_CODE (body) == ASM_INPUT)
8116 template = XSTR (body, 0);
8117 else if (asm_noperands (body) >= 0)
8118 template
8119 = decode_asm_operands (body, NULL, NULL, NULL, NULL);
8120 else
8121 return 0;
8124 int ppi_adjust = 0;
8127 c = *template++;
8128 while (c == ' ' || c == '\t');
8129 /* all sh-dsp parallel-processing insns start with p.
8130 The only non-ppi sh insn starting with p is pref.
8131 The only ppi starting with pr is prnd. */
8132 if ((c == 'p' || c == 'P') && strncasecmp ("re", template, 2))
8133 ppi_adjust = 2;
8134 /* The repeat pseudo-insn expands two three insns, a total of
8135 six bytes in size. */
8136 else if ((c == 'r' || c == 'R')
8137 && ! strncasecmp ("epeat", template, 5))
8138 ppi_adjust = 4;
8139 while (c && c != '\n' && ! IS_ASM_LOGICAL_LINE_SEPARATOR (c))
8141 /* If this is a label, it is obviously not a ppi insn. */
8142 if (c == ':' && maybe_label)
8144 ppi_adjust = 0;
8145 break;
8147 else if (c == '\'' || c == '"')
8148 maybe_label = 0;
8149 c = *template++;
8151 sum += ppi_adjust;
8152 maybe_label = c != ':';
8154 while (c);
8155 return sum;
8157 return 0;
8160 /* Return TRUE if X references a SYMBOL_REF or LABEL_REF whose symbol
8161 isn't protected by a PIC unspec. */
8163 nonpic_symbol_mentioned_p (rtx x)
8165 register const char *fmt;
8166 register int i;
8168 if (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF
8169 || GET_CODE (x) == PC)
8170 return 1;
8172 /* We don't want to look into the possible MEM location of a
8173 CONST_DOUBLE, since we're not going to use it, in general. */
8174 if (GET_CODE (x) == CONST_DOUBLE)
8175 return 0;
8177 if (GET_CODE (x) == UNSPEC
8178 && (XINT (x, 1) == UNSPEC_PIC
8179 || XINT (x, 1) == UNSPEC_GOT
8180 || XINT (x, 1) == UNSPEC_GOTOFF
8181 || XINT (x, 1) == UNSPEC_GOTPLT
8182 || XINT (x, 1) == UNSPEC_GOTTPOFF
8183 || XINT (x, 1) == UNSPEC_DTPOFF
8184 || XINT (x, 1) == UNSPEC_PLT))
8185 return 0;
8187 fmt = GET_RTX_FORMAT (GET_CODE (x));
8188 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
8190 if (fmt[i] == 'E')
8192 register int j;
8194 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
8195 if (nonpic_symbol_mentioned_p (XVECEXP (x, i, j)))
8196 return 1;
8198 else if (fmt[i] == 'e' && nonpic_symbol_mentioned_p (XEXP (x, i)))
8199 return 1;
8202 return 0;
8205 /* Convert a non-PIC address in `orig' to a PIC address using @GOT or
8206 @GOTOFF in `reg'. */
8208 legitimize_pic_address (rtx orig, enum machine_mode mode ATTRIBUTE_UNUSED,
8209 rtx reg)
8211 if (tls_symbolic_operand (orig, Pmode))
8212 return orig;
8214 if (GET_CODE (orig) == LABEL_REF
8215 || (GET_CODE (orig) == SYMBOL_REF && SYMBOL_REF_LOCAL_P (orig)))
8217 if (reg == 0)
8218 reg = gen_reg_rtx (Pmode);
8220 emit_insn (gen_symGOTOFF2reg (reg, orig));
8221 return reg;
8223 else if (GET_CODE (orig) == SYMBOL_REF)
8225 if (reg == 0)
8226 reg = gen_reg_rtx (Pmode);
8228 emit_insn (gen_symGOT2reg (reg, orig));
8229 return reg;
8231 return orig;
8234 /* Mark the use of a constant in the literal table. If the constant
8235 has multiple labels, make it unique. */
8236 static rtx
8237 mark_constant_pool_use (rtx x)
8239 rtx insn, lab, pattern;
8241 if (x == NULL)
8242 return x;
8244 switch (GET_CODE (x))
8246 case LABEL_REF:
8247 x = XEXP (x, 0);
8248 case CODE_LABEL:
8249 break;
8250 default:
8251 return x;
8254 /* Get the first label in the list of labels for the same constant
8255 and delete another labels in the list. */
8256 lab = x;
8257 for (insn = PREV_INSN (x); insn; insn = PREV_INSN (insn))
8259 if (GET_CODE (insn) != CODE_LABEL
8260 || LABEL_REFS (insn) != NEXT_INSN (insn))
8261 break;
8262 lab = insn;
8265 for (insn = LABEL_REFS (lab); insn; insn = LABEL_REFS (insn))
8266 INSN_DELETED_P (insn) = 1;
8268 /* Mark constants in a window. */
8269 for (insn = NEXT_INSN (x); insn; insn = NEXT_INSN (insn))
8271 if (GET_CODE (insn) != INSN)
8272 continue;
8274 pattern = PATTERN (insn);
8275 if (GET_CODE (pattern) != UNSPEC_VOLATILE)
8276 continue;
8278 switch (XINT (pattern, 1))
8280 case UNSPECV_CONST2:
8281 case UNSPECV_CONST4:
8282 case UNSPECV_CONST8:
8283 XVECEXP (pattern, 0, 1) = const1_rtx;
8284 break;
8285 case UNSPECV_WINDOW_END:
8286 if (XVECEXP (pattern, 0, 0) == x)
8287 return lab;
8288 break;
8289 case UNSPECV_CONST_END:
8290 return lab;
8291 default:
8292 break;
8296 return lab;
8299 /* Return true if it's possible to redirect BRANCH1 to the destination
8300 of an unconditional jump BRANCH2. We only want to do this if the
8301 resulting branch will have a short displacement. */
8302 int
8303 sh_can_redirect_branch (rtx branch1, rtx branch2)
8305 if (flag_expensive_optimizations && simplejump_p (branch2))
8307 rtx dest = XEXP (SET_SRC (single_set (branch2)), 0);
8308 rtx insn;
8309 int distance;
8311 for (distance = 0, insn = NEXT_INSN (branch1);
8312 insn && distance < 256;
8313 insn = PREV_INSN (insn))
8315 if (insn == dest)
8316 return 1;
8317 else
8318 distance += get_attr_length (insn);
8320 for (distance = 0, insn = NEXT_INSN (branch1);
8321 insn && distance < 256;
8322 insn = NEXT_INSN (insn))
8324 if (insn == dest)
8325 return 1;
8326 else
8327 distance += get_attr_length (insn);
8330 return 0;
8333 /* Return nonzero if register old_reg can be renamed to register new_reg. */
8335 sh_hard_regno_rename_ok (unsigned int old_reg ATTRIBUTE_UNUSED,
8336 unsigned int new_reg)
8338 /* Interrupt functions can only use registers that have already been
8339 saved by the prologue, even if they would normally be
8340 call-clobbered. */
8342 if (sh_cfun_interrupt_handler_p () && !regs_ever_live[new_reg])
8343 return 0;
8345 return 1;
8348 /* Function to update the integer COST
8349 based on the relationship between INSN that is dependent on
8350 DEP_INSN through the dependence LINK. The default is to make no
8351 adjustment to COST. This can be used for example to specify to
8352 the scheduler that an output- or anti-dependence does not incur
8353 the same cost as a data-dependence. The return value should be
8354 the new value for COST. */
8355 static int
8356 sh_adjust_cost (rtx insn, rtx link ATTRIBUTE_UNUSED, rtx dep_insn, int cost)
8358 rtx reg, use_pat;
8360 if (TARGET_SHMEDIA)
8362 /* On SHmedia, if the dependence is an anti-dependence or
8363 output-dependence, there is no cost. */
8364 if (REG_NOTE_KIND (link) != 0)
8365 cost = 0;
8367 if (get_attr_is_mac_media (insn)
8368 && get_attr_is_mac_media (dep_insn))
8369 cost = 1;
8371 else if (REG_NOTE_KIND (link) == 0)
8373 enum attr_type dep_type, type;
8375 if (recog_memoized (insn) < 0
8376 || recog_memoized (dep_insn) < 0)
8377 return cost;
8379 dep_type = get_attr_type (dep_insn);
8380 if (dep_type == TYPE_FLOAD || dep_type == TYPE_PCFLOAD)
8381 cost--;
8382 if ((dep_type == TYPE_LOAD_SI || dep_type == TYPE_PCLOAD_SI)
8383 && (type = get_attr_type (insn)) != TYPE_CALL
8384 && type != TYPE_SFUNC)
8385 cost--;
8387 /* The only input for a call that is timing-critical is the
8388 function's address. */
8389 if (GET_CODE(insn) == CALL_INSN)
8391 rtx call = PATTERN (insn);
8393 if (GET_CODE (call) == PARALLEL)
8394 call = XVECEXP (call, 0 ,0);
8395 if (GET_CODE (call) == SET)
8396 call = SET_SRC (call);
8397 if (GET_CODE (call) == CALL && GET_CODE (XEXP (call, 0)) == MEM
8398 && ! reg_set_p (XEXP (XEXP (call, 0), 0), dep_insn))
8399 cost = 0;
8401 /* Likewise, the most timing critical input for an sfuncs call
8402 is the function address. However, sfuncs typically start
8403 using their arguments pretty quickly.
8404 Assume a four cycle delay before they are needed. */
8405 /* All sfunc calls are parallels with at least four components.
8406 Exploit this to avoid unnecessary calls to sfunc_uses_reg. */
8407 else if (GET_CODE (PATTERN (insn)) == PARALLEL
8408 && XVECLEN (PATTERN (insn), 0) >= 4
8409 && (reg = sfunc_uses_reg (insn)))
8411 if (! reg_set_p (reg, dep_insn))
8412 cost -= 4;
8414 /* When the preceding instruction loads the shift amount of
8415 the following SHAD/SHLD, the latency of the load is increased
8416 by 1 cycle. */
8417 else if (TARGET_SH4
8418 && get_attr_type (insn) == TYPE_DYN_SHIFT
8419 && get_attr_any_int_load (dep_insn) == ANY_INT_LOAD_YES
8420 && reg_overlap_mentioned_p (SET_DEST (PATTERN (dep_insn)),
8421 XEXP (SET_SRC (single_set (insn)),
8422 1)))
8423 cost++;
8424 /* When an LS group instruction with a latency of less than
8425 3 cycles is followed by a double-precision floating-point
8426 instruction, FIPR, or FTRV, the latency of the first
8427 instruction is increased to 3 cycles. */
8428 else if (cost < 3
8429 && get_attr_insn_class (dep_insn) == INSN_CLASS_LS_GROUP
8430 && get_attr_dfp_comp (insn) == DFP_COMP_YES)
8431 cost = 3;
8432 /* The lsw register of a double-precision computation is ready one
8433 cycle earlier. */
8434 else if (reload_completed
8435 && get_attr_dfp_comp (dep_insn) == DFP_COMP_YES
8436 && (use_pat = single_set (insn))
8437 && ! regno_use_in (REGNO (SET_DEST (single_set (dep_insn))),
8438 SET_SRC (use_pat)))
8439 cost -= 1;
8441 if (get_attr_any_fp_comp (dep_insn) == ANY_FP_COMP_YES
8442 && get_attr_late_fp_use (insn) == LATE_FP_USE_YES)
8443 cost -= 1;
8445 /* An anti-dependence penalty of two applies if the first insn is a double
8446 precision fadd / fsub / fmul. */
8447 else if (REG_NOTE_KIND (link) == REG_DEP_ANTI
8448 && recog_memoized (dep_insn) >= 0
8449 && get_attr_type (dep_insn) == TYPE_DFP_ARITH
8450 /* A lot of alleged anti-flow dependences are fake,
8451 so check this one is real. */
8452 && flow_dependent_p (dep_insn, insn))
8453 cost = 2;
8456 return cost;
8459 /* Check if INSN is flow-dependent on DEP_INSN. Can also be used to check
8460 if DEP_INSN is anti-flow dependent on INSN. */
8461 static int
8462 flow_dependent_p (rtx insn, rtx dep_insn)
8464 rtx tmp = PATTERN (insn);
8466 note_stores (PATTERN (dep_insn), flow_dependent_p_1, &tmp);
8467 return tmp == NULL_RTX;
8470 /* A helper function for flow_dependent_p called through note_stores. */
8471 static void
8472 flow_dependent_p_1 (rtx x, rtx pat ATTRIBUTE_UNUSED, void *data)
8474 rtx * pinsn = (rtx *) data;
8476 if (*pinsn && reg_referenced_p (x, *pinsn))
8477 *pinsn = NULL_RTX;
8480 /* For use by ALLOCATE_INITIAL_VALUE. Note that sh.md contains some
8481 'special function' patterns (type sfunc) that clobber pr, but that
8482 do not look like function calls to leaf_function_p. Hence we must
8483 do this extra check. */
8485 sh_pr_n_sets (void)
8487 return REG_N_SETS (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG);
8490 /* This Function returns nonzero if the DFA based scheduler interface
8491 is to be used. At present this is only supported properly for the SH4.
8492 For the SH1 the current DFA model is just the converted form of the old
8493 pipeline model description. */
8494 static int
8495 sh_use_dfa_interface (void)
8497 if (TARGET_SH1)
8498 return 1;
8499 else
8500 return 0;
8503 /* This function returns "2" to indicate dual issue for the SH4
8504 processor. To be used by the DFA pipeline description. */
8505 static int
8506 sh_issue_rate (void)
8508 if (TARGET_SUPERSCALAR)
8509 return 2;
8510 else
8511 return 1;
8514 /* Functions for ready queue reordering for sched1. */
8516 /* Get weight for mode for a set x. */
8517 static short
8518 find_set_regmode_weight (rtx x, enum machine_mode mode)
8520 if (GET_CODE (x) == CLOBBER && register_operand (SET_DEST (x), mode))
8521 return 1;
8522 if (GET_CODE (x) == SET && register_operand (SET_DEST (x), mode))
8524 if (GET_CODE (SET_DEST (x)) == REG)
8526 if (!reg_mentioned_p (SET_DEST (x), SET_SRC (x)))
8527 return 1;
8528 else
8529 return 0;
8531 return 1;
8533 return 0;
8536 /* Get regmode weight for insn. */
8537 static short
8538 find_insn_regmode_weight (rtx insn, enum machine_mode mode)
8540 short reg_weight = 0;
8541 rtx x;
8543 /* Increment weight for each register born here. */
8544 x = PATTERN (insn);
8545 reg_weight += find_set_regmode_weight (x, mode);
8546 if (GET_CODE (x) == PARALLEL)
8548 int j;
8549 for (j = XVECLEN (x, 0) - 1; j >= 0; j--)
8551 x = XVECEXP (PATTERN (insn), 0, j);
8552 reg_weight += find_set_regmode_weight (x, mode);
8555 /* Decrement weight for each register that dies here. */
8556 for (x = REG_NOTES (insn); x; x = XEXP (x, 1))
8558 if (REG_NOTE_KIND (x) == REG_DEAD || REG_NOTE_KIND (x) == REG_UNUSED)
8560 rtx note = XEXP (x, 0);
8561 if (GET_CODE (note) == REG && GET_MODE (note) == mode)
8562 reg_weight--;
8565 return reg_weight;
8568 /* Calculate regmode weights for all insns of a basic block. */
8569 static void
8570 find_regmode_weight (int b, enum machine_mode mode)
8572 rtx insn, next_tail, head, tail;
8574 get_block_head_tail (b, &head, &tail);
8575 next_tail = NEXT_INSN (tail);
8577 for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
8579 /* Handle register life information. */
8580 if (!INSN_P (insn))
8581 continue;
8583 if (mode == SFmode)
8584 INSN_REGMODE_WEIGHT (insn, mode) =
8585 find_insn_regmode_weight (insn, mode) + 2 * find_insn_regmode_weight (insn, DFmode);
8586 else if (mode == SImode)
8587 INSN_REGMODE_WEIGHT (insn, mode) =
8588 find_insn_regmode_weight (insn, mode) + 2 * find_insn_regmode_weight (insn, DImode);
8592 /* Comparison function for ready queue sorting. */
8593 static int
8594 rank_for_reorder (const void *x, const void *y)
8596 rtx tmp = *(const rtx *) y;
8597 rtx tmp2 = *(const rtx *) x;
8599 /* The insn in a schedule group should be issued the first. */
8600 if (SCHED_GROUP_P (tmp) != SCHED_GROUP_P (tmp2))
8601 return SCHED_GROUP_P (tmp2) ? 1 : -1;
8603 /* If insns are equally good, sort by INSN_LUID (original insn order), This
8604 minimizes instruction movement, thus minimizing sched's effect on
8605 register pressure. */
8606 return INSN_LUID (tmp) - INSN_LUID (tmp2);
8609 /* Resort the array A in which only element at index N may be out of order. */
8610 static void
8611 swap_reorder (rtx *a, int n)
8613 rtx insn = a[n - 1];
8614 int i = n - 2;
8616 while (i >= 0 && rank_for_reorder (a + i, &insn) >= 0)
8618 a[i + 1] = a[i];
8619 i -= 1;
8621 a[i + 1] = insn;
8624 #define SCHED_REORDER(READY, N_READY) \
8625 do \
8627 if ((N_READY) == 2) \
8628 swap_reorder (READY, N_READY); \
8629 else if ((N_READY) > 2) \
8630 qsort (READY, N_READY, sizeof (rtx), rank_for_reorder); \
8632 while (0)
8634 /* Sort the ready list READY by ascending priority, using the SCHED_REORDER
8635 macro. */
8636 static void
8637 ready_reorder (rtx *ready, int nready)
8639 SCHED_REORDER (ready, nready);
8642 /* Calculate regmode weights for all insns of all basic block. */
8643 static void
8644 sh_md_init_global (FILE *dump ATTRIBUTE_UNUSED,
8645 int verbose ATTRIBUTE_UNUSED,
8646 int old_max_uid)
8648 basic_block b;
8650 regmode_weight[0] = (short *) xcalloc (old_max_uid, sizeof (short));
8651 regmode_weight[1] = (short *) xcalloc (old_max_uid, sizeof (short));
8653 FOR_EACH_BB_REVERSE (b)
8655 find_regmode_weight (b->index, SImode);
8656 find_regmode_weight (b->index, SFmode);
8659 CURR_REGMODE_PRESSURE (SImode) = 0;
8660 CURR_REGMODE_PRESSURE (SFmode) = 0;
8664 /* Cleanup. */
8665 static void
8666 sh_md_finish_global (FILE *dump ATTRIBUTE_UNUSED,
8667 int verbose ATTRIBUTE_UNUSED)
8669 if (regmode_weight[0])
8671 free (regmode_weight[0]);
8672 regmode_weight[0] = NULL;
8674 if (regmode_weight[1])
8676 free (regmode_weight[1]);
8677 regmode_weight[1] = NULL;
8681 /* Cache the can_issue_more so that we can return it from reorder2. Also,
8682 keep count of register pressures on SImode and SFmode. */
8683 static int
8684 sh_variable_issue (FILE *dump ATTRIBUTE_UNUSED,
8685 int sched_verbose ATTRIBUTE_UNUSED,
8686 rtx insn,
8687 int can_issue_more)
8689 if (GET_CODE (PATTERN (insn)) != USE
8690 && GET_CODE (PATTERN (insn)) != CLOBBER)
8691 cached_can_issue_more = can_issue_more - 1;
8692 else
8693 cached_can_issue_more = can_issue_more;
8695 if (reload_completed)
8696 return cached_can_issue_more;
8698 CURR_REGMODE_PRESSURE (SImode) += INSN_REGMODE_WEIGHT (insn, SImode);
8699 CURR_REGMODE_PRESSURE (SFmode) += INSN_REGMODE_WEIGHT (insn, SFmode);
8701 return cached_can_issue_more;
8704 static void
8705 sh_md_init (FILE *dump ATTRIBUTE_UNUSED,
8706 int verbose ATTRIBUTE_UNUSED,
8707 int veclen ATTRIBUTE_UNUSED)
8709 CURR_REGMODE_PRESSURE (SImode) = 0;
8710 CURR_REGMODE_PRESSURE (SFmode) = 0;
8713 /* Some magic numbers. */
8714 /* Pressure on register r0 can lead to spill failures. so avoid sched1 for
8715 functions that already have high pressure on r0. */
8716 #define R0_MAX_LIFE_REGIONS 2
8717 #define R0_MAX_LIVE_LENGTH 12
8718 /* Register Pressure thresholds for SImode and SFmode registers. */
8719 #define SIMODE_MAX_WEIGHT 5
8720 #define SFMODE_MAX_WEIGHT 10
8722 /* Return true if the pressure is high for MODE. */
8723 static short
8724 high_pressure (enum machine_mode mode)
8726 /* Pressure on register r0 can lead to spill failures. so avoid sched1 for
8727 functions that already have high pressure on r0. */
8728 if ((REG_N_SETS (0) - REG_N_DEATHS (0)) >= R0_MAX_LIFE_REGIONS
8729 && REG_LIVE_LENGTH (0) >= R0_MAX_LIVE_LENGTH)
8730 return 1;
8732 if (mode == SFmode)
8733 return (CURR_REGMODE_PRESSURE (SFmode) > SFMODE_MAX_WEIGHT);
8734 else
8735 return (CURR_REGMODE_PRESSURE (SImode) > SIMODE_MAX_WEIGHT);
8738 /* Reorder ready queue if register pressure is high. */
8739 static int
8740 sh_reorder (FILE *dump ATTRIBUTE_UNUSED,
8741 int sched_verbose ATTRIBUTE_UNUSED,
8742 rtx *ready,
8743 int *n_readyp,
8744 int clock_var ATTRIBUTE_UNUSED)
8746 if (reload_completed)
8747 return sh_issue_rate ();
8749 if (high_pressure (SFmode) || high_pressure (SImode))
8751 ready_reorder (ready, *n_readyp);
8754 return sh_issue_rate ();
8757 /* Skip cycles if the current register pressure is high. */
8758 static int
8759 sh_reorder2 (FILE *dump ATTRIBUTE_UNUSED,
8760 int sched_verbose ATTRIBUTE_UNUSED,
8761 rtx *ready ATTRIBUTE_UNUSED,
8762 int *n_readyp ATTRIBUTE_UNUSED,
8763 int clock_var ATTRIBUTE_UNUSED)
8765 if (reload_completed)
8766 return cached_can_issue_more;
8768 if (high_pressure(SFmode) || high_pressure (SImode))
8769 skip_cycles = 1;
8771 return cached_can_issue_more;
8774 /* Skip cycles without sorting the ready queue. This will move insn from
8775 Q->R. If this is the last cycle we are skipping; allow sorting of ready
8776 queue by sh_reorder. */
8778 /* Generally, skipping these many cycles are sufficient for all insns to move
8779 from Q -> R. */
8780 #define MAX_SKIPS 8
8782 static int
8783 sh_dfa_new_cycle (FILE *sched_dump ATTRIBUTE_UNUSED,
8784 int sched_verbose ATTRIBUTE_UNUSED,
8785 rtx insn ATTRIBUTE_UNUSED,
8786 int last_clock_var,
8787 int clock_var,
8788 int *sort_p)
8790 if (reload_completed)
8791 return 0;
8793 if (skip_cycles)
8795 if ((clock_var - last_clock_var) < MAX_SKIPS)
8797 *sort_p = 0;
8798 return 1;
8800 /* If this is the last cycle we are skipping, allow reordering of R. */
8801 if ((clock_var - last_clock_var) == MAX_SKIPS)
8803 *sort_p = 1;
8804 return 1;
8808 skip_cycles = 0;
8810 return 0;
8813 /* SHmedia requires registers for branches, so we can't generate new
8814 branches past reload. */
8815 static bool
8816 sh_cannot_modify_jumps_p (void)
8818 return (TARGET_SHMEDIA && (reload_in_progress || reload_completed));
8821 static int
8822 sh_target_reg_class (void)
8824 return TARGET_SHMEDIA ? TARGET_REGS : NO_REGS;
8827 static bool
8828 sh_optimize_target_register_callee_saved (bool after_prologue_epilogue_gen)
8830 return (shmedia_space_reserved_for_target_registers
8831 && (! after_prologue_epilogue_gen || TARGET_SAVE_ALL_TARGET_REGS));
8834 static bool
8835 sh_ms_bitfield_layout_p (tree record_type ATTRIBUTE_UNUSED)
8837 return (TARGET_SH5 || TARGET_HITACHI || sh_attr_renesas_p (record_type));
8841 On the SH1..SH4, the trampoline looks like
8842 2 0002 D202 mov.l l2,r2
8843 1 0000 D301 mov.l l1,r3
8844 3 0004 422B jmp @r2
8845 4 0006 0009 nop
8846 5 0008 00000000 l1: .long area
8847 6 000c 00000000 l2: .long function
8849 SH5 (compact) uses r1 instead of r3 for the static chain. */
8852 /* Emit RTL insns to initialize the variable parts of a trampoline.
8853 FNADDR is an RTX for the address of the function's pure code.
8854 CXT is an RTX for the static chain value for the function. */
8856 void
8857 sh_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
8859 if (TARGET_SHMEDIA64)
8861 rtx tramp_templ;
8862 int fixed_len;
8864 rtx movi1 = GEN_INT (0xcc000010);
8865 rtx shori1 = GEN_INT (0xc8000010);
8866 rtx src, dst;
8868 /* The following trampoline works within a +- 128 KB range for cxt:
8869 ptb/u cxt,tr1; movi fnaddr >> 48,r0; shori fnaddr >> 32,r0;
8870 shori fnaddr >> 16,r0; shori fnaddr,r0; ptabs/l r0,tr0
8871 gettr tr1,r1; blink tr0,r63 */
8872 /* Address rounding makes it hard to compute the exact bounds of the
8873 offset for this trampoline, but we have a rather generous offset
8874 range, so frame_offset should do fine as an upper bound. */
8875 if (cxt == virtual_stack_vars_rtx && frame_offset < 0x20000)
8877 /* ??? could optimize this trampoline initialization
8878 by writing DImode words with two insns each. */
8879 rtx mask = force_reg (DImode, GEN_INT (0x3fffc00));
8880 rtx insn = gen_rtx_MINUS (DImode, cxt, tramp);
8881 insn = gen_rtx_ASHIFT (DImode, insn, GEN_INT (10-2));
8882 insn = gen_rtx_AND (DImode, insn, mask);
8883 /* Or in ptb/u .,tr1 pattern */
8884 insn = gen_rtx_IOR (DImode, insn, gen_int_mode (0xec000010, SImode));
8885 insn = force_operand (insn, NULL_RTX);
8886 insn = gen_lowpart (SImode, insn);
8887 emit_move_insn (gen_rtx_MEM (SImode, tramp), insn);
8888 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (38));
8889 insn = gen_rtx_AND (DImode, insn, mask);
8890 insn = force_operand (gen_rtx_IOR (DImode, movi1, insn), NULL_RTX);
8891 insn = gen_lowpart (SImode, insn);
8892 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 4)), insn);
8893 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (22));
8894 insn = gen_rtx_AND (DImode, insn, mask);
8895 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
8896 insn = gen_lowpart (SImode, insn);
8897 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 8)), insn);
8898 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (6));
8899 insn = gen_rtx_AND (DImode, insn, mask);
8900 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
8901 insn = gen_lowpart (SImode, insn);
8902 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 12)),
8903 insn);
8904 insn = gen_rtx_ASHIFT (DImode, fnaddr, GEN_INT (10));
8905 insn = gen_rtx_AND (DImode, insn, mask);
8906 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
8907 insn = gen_lowpart (SImode, insn);
8908 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 16)),
8909 insn);
8910 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 20)),
8911 GEN_INT (0x6bf10600));
8912 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 24)),
8913 GEN_INT (0x4415fc10));
8914 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 28)),
8915 GEN_INT (0x4401fff0));
8916 emit_insn (gen_ic_invalidate_line (tramp));
8917 return;
8919 tramp_templ = gen_rtx_SYMBOL_REF (Pmode,"__GCC_nested_trampoline");
8920 fixed_len = TRAMPOLINE_SIZE - 2 * GET_MODE_SIZE (Pmode);
8922 tramp_templ = gen_datalabel_ref (tramp_templ);
8923 dst = gen_rtx_MEM (BLKmode, tramp);
8924 src = gen_rtx_MEM (BLKmode, tramp_templ);
8925 set_mem_align (dst, 256);
8926 set_mem_align (src, 64);
8927 emit_block_move (dst, src, GEN_INT (fixed_len), BLOCK_OP_NORMAL);
8929 emit_move_insn (gen_rtx_MEM (Pmode, plus_constant (tramp, fixed_len)),
8930 fnaddr);
8931 emit_move_insn (gen_rtx_MEM (Pmode,
8932 plus_constant (tramp,
8933 fixed_len
8934 + GET_MODE_SIZE (Pmode))),
8935 cxt);
8936 emit_insn (gen_ic_invalidate_line (tramp));
8937 return;
8939 else if (TARGET_SHMEDIA)
8941 /* movi fnaddr >> 16,r1; shori fnaddr,r1; ptabs/l r1,tr0
8942 movi cxt >> 16,r1; shori cxt,r1; blink tr0,r63 */
8943 rtx quad0 = gen_reg_rtx (DImode), cxtload = gen_reg_rtx (DImode);
8944 rtx quad1 = gen_reg_rtx (DImode), quad2 = gen_reg_rtx (DImode);
8945 /* movi 0,r1: 0xcc000010 shori 0,r1: c8000010 concatenated,
8946 rotated 10 right, and higher 16 bit of every 32 selected. */
8947 rtx movishori
8948 = force_reg (V2HImode, (simplify_gen_subreg
8949 (V2HImode, GEN_INT (0x4330432), SImode, 0)));
8950 rtx ptabs = force_reg (DImode, GEN_INT (0x6bf10600));
8951 rtx blink = force_reg (DImode, GEN_INT (0x4401fff0));
8953 tramp = force_reg (Pmode, tramp);
8954 fnaddr = force_reg (SImode, fnaddr);
8955 cxt = force_reg (SImode, cxt);
8956 emit_insn (gen_mshflo_w_x (gen_rtx_SUBREG (V4HImode, quad0, 0),
8957 gen_rtx_SUBREG (V2HImode, fnaddr, 0),
8958 movishori));
8959 emit_insn (gen_rotrdi3_mextr (quad0, quad0,
8960 GEN_INT (TARGET_LITTLE_ENDIAN ? 24 : 56)));
8961 emit_insn (gen_ashldi3_media (quad0, quad0, const2_rtx));
8962 emit_move_insn (gen_rtx_MEM (DImode, tramp), quad0);
8963 emit_insn (gen_mshflo_w_x (gen_rtx_SUBREG (V4HImode, cxtload, 0),
8964 gen_rtx_SUBREG (V2HImode, cxt, 0),
8965 movishori));
8966 emit_insn (gen_rotrdi3_mextr (cxtload, cxtload,
8967 GEN_INT (TARGET_LITTLE_ENDIAN ? 24 : 56)));
8968 emit_insn (gen_ashldi3_media (cxtload, cxtload, const2_rtx));
8969 if (TARGET_LITTLE_ENDIAN)
8971 emit_insn (gen_mshflo_l_di (quad1, ptabs, cxtload));
8972 emit_insn (gen_mextr4 (quad2, cxtload, blink));
8974 else
8976 emit_insn (gen_mextr4 (quad1, cxtload, ptabs));
8977 emit_insn (gen_mshflo_l_di (quad2, blink, cxtload));
8979 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, 8)), quad1);
8980 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, 16)), quad2);
8981 emit_insn (gen_ic_invalidate_line (tramp));
8982 return;
8984 else if (TARGET_SHCOMPACT)
8986 emit_insn (gen_initialize_trampoline (tramp, cxt, fnaddr));
8987 return;
8989 emit_move_insn (gen_rtx_MEM (SImode, tramp),
8990 gen_int_mode (TARGET_LITTLE_ENDIAN ? 0xd301d202 : 0xd202d301,
8991 SImode));
8992 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 4)),
8993 gen_int_mode (TARGET_LITTLE_ENDIAN ? 0x0009422b : 0x422b0009,
8994 SImode));
8995 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 8)),
8996 cxt);
8997 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 12)),
8998 fnaddr);
8999 if (TARGET_HARVARD)
9001 if (TARGET_USERMODE)
9002 emit_library_call (function_symbol ("__ic_invalidate"),
9003 0, VOIDmode, 1, tramp, SImode);
9004 else
9005 emit_insn (gen_ic_invalidate_line (tramp));
9009 /* FIXME: This is overly conservative. A SHcompact function that
9010 receives arguments ``by reference'' will have them stored in its
9011 own stack frame, so it must not pass pointers or references to
9012 these arguments to other functions by means of sibling calls. */
9013 static bool
9014 sh_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
9016 return (decl
9017 && (! TARGET_SHCOMPACT
9018 || current_function_args_info.stack_regs == 0)
9019 && ! sh_cfun_interrupt_handler_p ());
9022 /* Machine specific built-in functions. */
9024 struct builtin_description
9026 const enum insn_code icode;
9027 const char *const name;
9028 int signature;
9031 /* describe number and signedness of arguments; arg[0] == result
9032 (1: unsigned, 2: signed, 4: don't care, 8: pointer 0: no argument */
9033 static const char signature_args[][4] =
9035 #define SH_BLTIN_V2SI2 0
9036 { 4, 4 },
9037 #define SH_BLTIN_V4HI2 1
9038 { 4, 4 },
9039 #define SH_BLTIN_V2SI3 2
9040 { 4, 4, 4 },
9041 #define SH_BLTIN_V4HI3 3
9042 { 4, 4, 4 },
9043 #define SH_BLTIN_V8QI3 4
9044 { 4, 4, 4 },
9045 #define SH_BLTIN_MAC_HISI 5
9046 { 1, 4, 4, 1 },
9047 #define SH_BLTIN_SH_HI 6
9048 { 4, 4, 1 },
9049 #define SH_BLTIN_SH_SI 7
9050 { 4, 4, 1 },
9051 #define SH_BLTIN_V4HI2V2SI 8
9052 { 4, 4, 4 },
9053 #define SH_BLTIN_V4HI2V8QI 9
9054 { 4, 4, 4 },
9055 #define SH_BLTIN_SISF 10
9056 { 4, 2 },
9057 #define SH_BLTIN_LDUA_L 11
9058 { 2, 8 },
9059 #define SH_BLTIN_LDUA_Q 12
9060 { 1, 8 },
9061 #define SH_BLTIN_STUA_L 13
9062 { 0, 8, 2 },
9063 #define SH_BLTIN_STUA_Q 14
9064 { 0, 8, 1 },
9065 #define SH_BLTIN_UDI 15
9066 { 0, 8, 1 },
9067 #define SH_BLTIN_NUM_SHARED_SIGNATURES 16
9068 #define SH_BLTIN_2 16
9069 #define SH_BLTIN_SU 16
9070 { 1, 2 },
9071 #define SH_BLTIN_3 17
9072 #define SH_BLTIN_SUS 17
9073 { 2, 2, 1 },
9074 #define SH_BLTIN_PSSV 18
9075 { 0, 8, 2, 2 },
9076 #define SH_BLTIN_XXUU 19
9077 #define SH_BLTIN_UUUU 19
9078 { 1, 1, 1, 1 },
9079 #define SH_BLTIN_PV 20
9080 { 0, 8 },
9082 /* mcmv: operands considered unsigned. */
9083 /* mmulsum_wq, msad_ubq: result considered unsigned long long. */
9084 /* mperm: control value considered unsigned int. */
9085 /* mshalds, mshard, mshards, mshlld, mshlrd: shift count is unsigned int. */
9086 /* mshards_q: returns signed short. */
9087 /* nsb: takes long long arg, returns unsigned char. */
9088 static const struct builtin_description bdesc[] =
9090 { CODE_FOR_absv2si2, "__builtin_absv2si2", SH_BLTIN_V2SI2 },
9091 { CODE_FOR_absv4hi2, "__builtin_absv4hi2", SH_BLTIN_V4HI2 },
9092 { CODE_FOR_addv2si3, "__builtin_addv2si3", SH_BLTIN_V2SI3 },
9093 { CODE_FOR_addv4hi3, "__builtin_addv4hi3", SH_BLTIN_V4HI3 },
9094 { CODE_FOR_ssaddv2si3,"__builtin_ssaddv2si3", SH_BLTIN_V2SI3 },
9095 { CODE_FOR_usaddv8qi3,"__builtin_usaddv8qi3", SH_BLTIN_V8QI3 },
9096 { CODE_FOR_ssaddv4hi3,"__builtin_ssaddv4hi3", SH_BLTIN_V4HI3 },
9097 #if 0
9098 { CODE_FOR_alloco32, "__builtin_sh_media_ALLOCO", SH_BLTIN_PV },
9099 { CODE_FOR_alloco64, "__builtin_sh_media_ALLOCO", SH_BLTIN_PV },
9100 #endif
9101 { CODE_FOR_negcmpeqv8qi,"__builtin_sh_media_MCMPEQ_B", SH_BLTIN_V8QI3 },
9102 { CODE_FOR_negcmpeqv2si,"__builtin_sh_media_MCMPEQ_L", SH_BLTIN_V2SI3 },
9103 { CODE_FOR_negcmpeqv4hi,"__builtin_sh_media_MCMPEQ_W", SH_BLTIN_V4HI3 },
9104 { CODE_FOR_negcmpgtuv8qi,"__builtin_sh_media_MCMPGT_UB", SH_BLTIN_V8QI3 },
9105 { CODE_FOR_negcmpgtv2si,"__builtin_sh_media_MCMPGT_L", SH_BLTIN_V2SI3 },
9106 { CODE_FOR_negcmpgtv4hi,"__builtin_sh_media_MCMPGT_W", SH_BLTIN_V4HI3 },
9107 { CODE_FOR_mcmv, "__builtin_sh_media_MCMV", SH_BLTIN_UUUU },
9108 { CODE_FOR_mcnvs_lw, "__builtin_sh_media_MCNVS_LW", SH_BLTIN_3 },
9109 { CODE_FOR_mcnvs_wb, "__builtin_sh_media_MCNVS_WB", SH_BLTIN_V4HI2V8QI },
9110 { CODE_FOR_mcnvs_wub, "__builtin_sh_media_MCNVS_WUB", SH_BLTIN_V4HI2V8QI },
9111 { CODE_FOR_mextr1, "__builtin_sh_media_MEXTR1", SH_BLTIN_UDI },
9112 { CODE_FOR_mextr2, "__builtin_sh_media_MEXTR2", SH_BLTIN_UDI },
9113 { CODE_FOR_mextr3, "__builtin_sh_media_MEXTR3", SH_BLTIN_UDI },
9114 { CODE_FOR_mextr4, "__builtin_sh_media_MEXTR4", SH_BLTIN_UDI },
9115 { CODE_FOR_mextr5, "__builtin_sh_media_MEXTR5", SH_BLTIN_UDI },
9116 { CODE_FOR_mextr6, "__builtin_sh_media_MEXTR6", SH_BLTIN_UDI },
9117 { CODE_FOR_mextr7, "__builtin_sh_media_MEXTR7", SH_BLTIN_UDI },
9118 { CODE_FOR_mmacfx_wl, "__builtin_sh_media_MMACFX_WL", SH_BLTIN_MAC_HISI },
9119 { CODE_FOR_mmacnfx_wl,"__builtin_sh_media_MMACNFX_WL", SH_BLTIN_MAC_HISI },
9120 { CODE_FOR_mulv2si3, "__builtin_mulv2si3", SH_BLTIN_V2SI3, },
9121 { CODE_FOR_mulv4hi3, "__builtin_mulv4hi3", SH_BLTIN_V4HI3 },
9122 { CODE_FOR_mmulfx_l, "__builtin_sh_media_MMULFX_L", SH_BLTIN_V2SI3 },
9123 { CODE_FOR_mmulfx_w, "__builtin_sh_media_MMULFX_W", SH_BLTIN_V4HI3 },
9124 { CODE_FOR_mmulfxrp_w,"__builtin_sh_media_MMULFXRP_W", SH_BLTIN_V4HI3 },
9125 { CODE_FOR_mmulhi_wl, "__builtin_sh_media_MMULHI_WL", SH_BLTIN_V4HI2V2SI },
9126 { CODE_FOR_mmullo_wl, "__builtin_sh_media_MMULLO_WL", SH_BLTIN_V4HI2V2SI },
9127 { CODE_FOR_mmulsum_wq,"__builtin_sh_media_MMULSUM_WQ", SH_BLTIN_XXUU },
9128 { CODE_FOR_mperm_w, "__builtin_sh_media_MPERM_W", SH_BLTIN_SH_HI },
9129 { CODE_FOR_msad_ubq, "__builtin_sh_media_MSAD_UBQ", SH_BLTIN_XXUU },
9130 { CODE_FOR_mshalds_l, "__builtin_sh_media_MSHALDS_L", SH_BLTIN_SH_SI },
9131 { CODE_FOR_mshalds_w, "__builtin_sh_media_MSHALDS_W", SH_BLTIN_SH_HI },
9132 { CODE_FOR_ashrv2si3, "__builtin_ashrv2si3", SH_BLTIN_SH_SI },
9133 { CODE_FOR_ashrv4hi3, "__builtin_ashrv4hi3", SH_BLTIN_SH_HI },
9134 { CODE_FOR_mshards_q, "__builtin_sh_media_MSHARDS_Q", SH_BLTIN_SUS },
9135 { CODE_FOR_mshfhi_b, "__builtin_sh_media_MSHFHI_B", SH_BLTIN_V8QI3 },
9136 { CODE_FOR_mshfhi_l, "__builtin_sh_media_MSHFHI_L", SH_BLTIN_V2SI3 },
9137 { CODE_FOR_mshfhi_w, "__builtin_sh_media_MSHFHI_W", SH_BLTIN_V4HI3 },
9138 { CODE_FOR_mshflo_b, "__builtin_sh_media_MSHFLO_B", SH_BLTIN_V8QI3 },
9139 { CODE_FOR_mshflo_l, "__builtin_sh_media_MSHFLO_L", SH_BLTIN_V2SI3 },
9140 { CODE_FOR_mshflo_w, "__builtin_sh_media_MSHFLO_W", SH_BLTIN_V4HI3 },
9141 { CODE_FOR_ashlv2si3, "__builtin_ashlv2si3", SH_BLTIN_SH_SI },
9142 { CODE_FOR_ashlv4hi3, "__builtin_ashlv4hi3", SH_BLTIN_SH_HI },
9143 { CODE_FOR_lshrv2si3, "__builtin_lshrv2si3", SH_BLTIN_SH_SI },
9144 { CODE_FOR_lshrv4hi3, "__builtin_lshrv4hi3", SH_BLTIN_SH_HI },
9145 { CODE_FOR_subv2si3, "__builtin_subv2si3", SH_BLTIN_V2SI3 },
9146 { CODE_FOR_subv4hi3, "__builtin_subv4hi3", SH_BLTIN_V4HI3 },
9147 { CODE_FOR_sssubv2si3,"__builtin_sssubv2si3", SH_BLTIN_V2SI3 },
9148 { CODE_FOR_ussubv8qi3,"__builtin_ussubv8qi3", SH_BLTIN_V8QI3 },
9149 { CODE_FOR_sssubv4hi3,"__builtin_sssubv4hi3", SH_BLTIN_V4HI3 },
9150 { CODE_FOR_fcosa_s, "__builtin_sh_media_FCOSA_S", SH_BLTIN_SISF },
9151 { CODE_FOR_fsina_s, "__builtin_sh_media_FSINA_S", SH_BLTIN_SISF },
9152 { CODE_FOR_fipr, "__builtin_sh_media_FIPR_S", SH_BLTIN_3 },
9153 { CODE_FOR_ftrv, "__builtin_sh_media_FTRV_S", SH_BLTIN_3 },
9154 { CODE_FOR_fsrra_s, "__builtin_sh_media_FSRRA_S", SH_BLTIN_2 },
9155 #if 0
9156 { CODE_FOR_ldhi_l, "__builtin_sh_media_LDHI_L", SH_BLTIN_LDUA_L },
9157 { CODE_FOR_ldhi_q, "__builtin_sh_media_LDHI_Q", SH_BLTIN_LDUA_Q },
9158 { CODE_FOR_ldlo_l, "__builtin_sh_media_LDLO_L", SH_BLTIN_LDUA_L },
9159 { CODE_FOR_ldlo_q, "__builtin_sh_media_LDLO_Q", SH_BLTIN_LDUA_Q },
9160 { CODE_FOR_sthi_l, "__builtin_sh_media_STHI_L", SH_BLTIN_STUA_L },
9161 { CODE_FOR_sthi_q, "__builtin_sh_media_STHI_Q", SH_BLTIN_STUA_Q },
9162 { CODE_FOR_stlo_l, "__builtin_sh_media_STLO_L", SH_BLTIN_STUA_L },
9163 { CODE_FOR_stlo_q, "__builtin_sh_media_STLO_Q", SH_BLTIN_STUA_Q },
9164 { CODE_FOR_ldhi_l64, "__builtin_sh_media_LDHI_L", SH_BLTIN_LDUA_L },
9165 { CODE_FOR_ldhi_q64, "__builtin_sh_media_LDHI_Q", SH_BLTIN_LDUA_Q },
9166 { CODE_FOR_ldlo_l64, "__builtin_sh_media_LDLO_L", SH_BLTIN_LDUA_L },
9167 { CODE_FOR_ldlo_q64, "__builtin_sh_media_LDLO_Q", SH_BLTIN_LDUA_Q },
9168 { CODE_FOR_sthi_l64, "__builtin_sh_media_STHI_L", SH_BLTIN_STUA_L },
9169 { CODE_FOR_sthi_q64, "__builtin_sh_media_STHI_Q", SH_BLTIN_STUA_Q },
9170 { CODE_FOR_stlo_l64, "__builtin_sh_media_STLO_L", SH_BLTIN_STUA_L },
9171 { CODE_FOR_stlo_q64, "__builtin_sh_media_STLO_Q", SH_BLTIN_STUA_Q },
9172 #endif
9173 { CODE_FOR_nsb, "__builtin_sh_media_NSB", SH_BLTIN_SU },
9174 { CODE_FOR_byterev, "__builtin_sh_media_BYTEREV", SH_BLTIN_2 },
9175 #if 0
9176 { CODE_FOR_prefetch32,"__builtin_sh_media_PREFO", SH_BLTIN_PSSV },
9177 { CODE_FOR_prefetch64,"__builtin_sh_media_PREFO", SH_BLTIN_PSSV }
9178 #endif
9181 static void
9182 sh_media_init_builtins (void)
9184 tree shared[SH_BLTIN_NUM_SHARED_SIGNATURES];
9185 const struct builtin_description *d;
9187 memset (shared, 0, sizeof shared);
9188 for (d = bdesc; d - bdesc < (int) ARRAY_SIZE (bdesc); d++)
9190 tree type, arg_type;
9191 int signature = d->signature;
9192 int i;
9194 if (signature < SH_BLTIN_NUM_SHARED_SIGNATURES && shared[signature])
9195 type = shared[signature];
9196 else
9198 int has_result = signature_args[signature][0] != 0;
9200 if (signature_args[signature][1] == 8
9201 && (insn_data[d->icode].operand[has_result].mode != Pmode))
9202 continue;
9203 if (! TARGET_FPU_ANY
9204 && FLOAT_MODE_P (insn_data[d->icode].operand[0].mode))
9205 continue;
9206 type = void_list_node;
9207 for (i = 3; ; i--)
9209 int arg = signature_args[signature][i];
9210 int opno = i - 1 + has_result;
9212 if (arg == 8)
9213 arg_type = ptr_type_node;
9214 else if (arg)
9215 arg_type = ((*lang_hooks.types.type_for_mode)
9216 (insn_data[d->icode].operand[opno].mode,
9217 (arg & 1)));
9218 else if (i)
9219 continue;
9220 else
9221 arg_type = void_type_node;
9222 if (i == 0)
9223 break;
9224 type = tree_cons (NULL_TREE, arg_type, type);
9226 type = build_function_type (arg_type, type);
9227 if (signature < SH_BLTIN_NUM_SHARED_SIGNATURES)
9228 shared[signature] = type;
9230 builtin_function (d->name, type, d - bdesc, BUILT_IN_MD,
9231 NULL, NULL_TREE);
9235 static void
9236 sh_init_builtins (void)
9238 if (TARGET_SHMEDIA)
9239 sh_media_init_builtins ();
9242 /* Expand an expression EXP that calls a built-in function,
9243 with result going to TARGET if that's convenient
9244 (and in mode MODE if that's convenient).
9245 SUBTARGET may be used as the target for computing one of EXP's operands.
9246 IGNORE is nonzero if the value is to be ignored. */
9248 static rtx
9249 sh_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
9250 enum machine_mode mode ATTRIBUTE_UNUSED, int ignore)
9252 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
9253 tree arglist = TREE_OPERAND (exp, 1);
9254 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
9255 const struct builtin_description *d = &bdesc[fcode];
9256 enum insn_code icode = d->icode;
9257 int signature = d->signature;
9258 enum machine_mode tmode = VOIDmode;
9259 int nop = 0, i;
9260 rtx op[4];
9261 rtx pat;
9263 if (signature_args[signature][0])
9265 if (ignore)
9266 return 0;
9268 tmode = insn_data[icode].operand[0].mode;
9269 if (! target
9270 || GET_MODE (target) != tmode
9271 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
9272 target = gen_reg_rtx (tmode);
9273 op[nop++] = target;
9275 else
9276 target = 0;
9278 for (i = 1; i <= 3; i++, nop++)
9280 tree arg;
9281 enum machine_mode opmode, argmode;
9283 if (! signature_args[signature][i])
9284 break;
9285 arg = TREE_VALUE (arglist);
9286 if (arg == error_mark_node)
9287 return const0_rtx;
9288 arglist = TREE_CHAIN (arglist);
9289 opmode = insn_data[icode].operand[nop].mode;
9290 argmode = TYPE_MODE (TREE_TYPE (arg));
9291 if (argmode != opmode)
9292 arg = build1 (NOP_EXPR,
9293 (*lang_hooks.types.type_for_mode) (opmode, 0), arg);
9294 op[nop] = expand_expr (arg, NULL_RTX, opmode, 0);
9295 if (! (*insn_data[icode].operand[nop].predicate) (op[nop], opmode))
9296 op[nop] = copy_to_mode_reg (opmode, op[nop]);
9299 switch (nop)
9301 case 1:
9302 pat = (*insn_data[d->icode].genfun) (op[0]);
9303 break;
9304 case 2:
9305 pat = (*insn_data[d->icode].genfun) (op[0], op[1]);
9306 break;
9307 case 3:
9308 pat = (*insn_data[d->icode].genfun) (op[0], op[1], op[2]);
9309 break;
9310 case 4:
9311 pat = (*insn_data[d->icode].genfun) (op[0], op[1], op[2], op[3]);
9312 break;
9313 default:
9314 abort ();
9316 if (! pat)
9317 return 0;
9318 emit_insn (pat);
9319 return target;
9322 void
9323 sh_expand_unop_v2sf (enum rtx_code code, rtx op0, rtx op1)
9325 rtx sel0 = const0_rtx;
9326 rtx sel1 = const1_rtx;
9327 rtx (*fn) (rtx, rtx, rtx, rtx, rtx) = gen_unary_sf_op;
9328 rtx op = gen_rtx_fmt_e (code, SFmode, op1);
9330 emit_insn ((*fn) (op0, op1, op, sel0, sel0));
9331 emit_insn ((*fn) (op0, op1, op, sel1, sel1));
9334 void
9335 sh_expand_binop_v2sf (enum rtx_code code, rtx op0, rtx op1, rtx op2)
9337 rtx sel0 = const0_rtx;
9338 rtx sel1 = const1_rtx;
9339 rtx (*fn) (rtx, rtx, rtx, rtx, rtx, rtx, rtx, rtx)
9340 = gen_binary_sf_op;
9341 rtx op = gen_rtx_fmt_ee (code, SFmode, op1, op2);
9343 emit_insn ((*fn) (op0, op1, op2, op, sel0, sel0, sel0, sel1));
9344 emit_insn ((*fn) (op0, op1, op2, op, sel1, sel1, sel1, sel0));
9347 /* Return the class of registers for which a mode change from FROM to TO
9348 is invalid. */
9349 bool
9350 sh_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
9351 enum reg_class class)
9353 if (GET_MODE_SIZE (from) != GET_MODE_SIZE (to))
9355 if (TARGET_LITTLE_ENDIAN)
9357 if (GET_MODE_SIZE (to) < 8 || GET_MODE_SIZE (from) < 8)
9358 return reg_classes_intersect_p (DF_REGS, class);
9360 else
9362 if (GET_MODE_SIZE (from) < 8)
9363 return reg_classes_intersect_p (DF_HI_REGS, class);
9366 return 0;
9370 /* If ADDRESS refers to a CODE_LABEL, add NUSES to the number of times
9371 that label is used. */
9373 void
9374 sh_mark_label (rtx address, int nuses)
9376 if (GOTOFF_P (address))
9378 /* Extract the label or symbol. */
9379 address = XEXP (address, 0);
9380 if (GET_CODE (address) == PLUS)
9381 address = XEXP (address, 0);
9382 address = XVECEXP (address, 0, 0);
9384 if (GET_CODE (address) == LABEL_REF
9385 && GET_CODE (XEXP (address, 0)) == CODE_LABEL)
9386 LABEL_NUSES (XEXP (address, 0)) += nuses;
9389 /* Compute extra cost of moving data between one register class
9390 and another. */
9392 /* If SECONDARY*_RELOAD_CLASS says something about the src/dst pair, regclass
9393 uses this information. Hence, the general register <-> floating point
9394 register information here is not used for SFmode. */
9397 sh_register_move_cost (enum machine_mode mode,
9398 enum reg_class srcclass, enum reg_class dstclass)
9400 if (dstclass == T_REGS || dstclass == PR_REGS)
9401 return 10;
9403 if (dstclass == MAC_REGS && srcclass == MAC_REGS)
9404 return 4;
9406 if (mode == SImode && ! TARGET_SHMEDIA && TARGET_FMOVD
9407 && REGCLASS_HAS_FP_REG (srcclass)
9408 && REGCLASS_HAS_FP_REG (dstclass))
9409 return 4;
9411 if ((REGCLASS_HAS_FP_REG (dstclass) && srcclass == MAC_REGS)
9412 || (dstclass == MAC_REGS && REGCLASS_HAS_FP_REG (srcclass)))
9413 return 9;
9415 if ((REGCLASS_HAS_FP_REG (dstclass)
9416 && REGCLASS_HAS_GENERAL_REG (srcclass))
9417 || (REGCLASS_HAS_GENERAL_REG (dstclass)
9418 && REGCLASS_HAS_FP_REG (srcclass)))
9419 return ((TARGET_SHMEDIA ? 4 : TARGET_FMOVD ? 8 : 12)
9420 * ((GET_MODE_SIZE (mode) + 7) / 8U));
9422 if ((dstclass == FPUL_REGS
9423 && REGCLASS_HAS_GENERAL_REG (srcclass))
9424 || (srcclass == FPUL_REGS
9425 && REGCLASS_HAS_GENERAL_REG (dstclass)))
9426 return 5;
9428 if ((dstclass == FPUL_REGS
9429 && (srcclass == PR_REGS || srcclass == MAC_REGS || srcclass == T_REGS))
9430 || (srcclass == FPUL_REGS
9431 && (dstclass == PR_REGS || dstclass == MAC_REGS)))
9432 return 7;
9434 if ((srcclass == TARGET_REGS && ! REGCLASS_HAS_GENERAL_REG (dstclass))
9435 || ((dstclass) == TARGET_REGS && ! REGCLASS_HAS_GENERAL_REG (srcclass)))
9436 return 20;
9438 if ((srcclass == FPSCR_REGS && ! REGCLASS_HAS_GENERAL_REG (dstclass))
9439 || (dstclass == FPSCR_REGS && ! REGCLASS_HAS_GENERAL_REG (srcclass)))
9440 return 4;
9442 if (TARGET_SHMEDIA
9443 || (TARGET_FMOVD
9444 && ! REGCLASS_HAS_GENERAL_REG (srcclass)
9445 && ! REGCLASS_HAS_GENERAL_REG (dstclass)))
9446 return 2 * ((GET_MODE_SIZE (mode) + 7) / 8U);
9448 return 2 * ((GET_MODE_SIZE (mode) + 3) / 4U);
9451 /* Like register_operand, but take into account that SHMEDIA can use
9452 the constant zero like a general register. */
9454 sh_register_operand (rtx op, enum machine_mode mode)
9456 if (op == CONST0_RTX (mode) && TARGET_SHMEDIA)
9457 return 1;
9458 return register_operand (op, mode);
9462 cmpsi_operand (rtx op, enum machine_mode mode)
9464 if (GET_CODE (op) == REG && REGNO (op) == T_REG
9465 && GET_MODE (op) == SImode)
9466 return 1;
9467 return arith_operand (op, mode);
9470 static rtx emit_load_ptr (rtx, rtx);
9472 static rtx
9473 emit_load_ptr (rtx reg, rtx addr)
9475 rtx mem = gen_rtx_MEM (ptr_mode, addr);
9477 if (Pmode != ptr_mode)
9478 mem = gen_rtx_SIGN_EXTEND (Pmode, mem);
9479 return emit_move_insn (reg, mem);
9482 void
9483 sh_output_mi_thunk (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED,
9484 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
9485 tree function)
9487 CUMULATIVE_ARGS cum;
9488 int structure_value_byref = 0;
9489 rtx this, this_value, sibcall, insns, funexp;
9490 tree funtype = TREE_TYPE (function);
9491 int simple_add = CONST_OK_FOR_ADD (delta);
9492 int did_load = 0;
9493 rtx scratch0, scratch1, scratch2;
9495 reload_completed = 1;
9496 epilogue_completed = 1;
9497 no_new_pseudos = 1;
9498 current_function_uses_only_leaf_regs = 1;
9499 reset_block_changes ();
9501 emit_note (NOTE_INSN_PROLOGUE_END);
9503 /* Find the "this" pointer. We have such a wide range of ABIs for the
9504 SH that it's best to do this completely machine independently.
9505 "this" is passed as first argument, unless a structure return pointer
9506 comes first, in which case "this" comes second. */
9507 INIT_CUMULATIVE_ARGS (cum, funtype, NULL_RTX, 0, 1);
9508 #ifndef PCC_STATIC_STRUCT_RETURN
9509 if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
9510 structure_value_byref = 1;
9511 #endif /* not PCC_STATIC_STRUCT_RETURN */
9512 if (structure_value_byref && sh_struct_value_rtx (function, 0) == 0)
9514 tree ptype = build_pointer_type (TREE_TYPE (funtype));
9516 FUNCTION_ARG_ADVANCE (cum, Pmode, ptype, 1);
9518 this = FUNCTION_ARG (cum, Pmode, ptr_type_node, 1);
9520 /* For SHcompact, we only have r0 for a scratch register: r1 is the
9521 static chain pointer (even if you can't have nested virtual functions
9522 right now, someone might implement them sometime), and the rest of the
9523 registers are used for argument passing, are callee-saved, or reserved. */
9524 scratch0 = scratch1 = scratch2 = gen_rtx_REG (Pmode, 0);
9525 if (! TARGET_SH5)
9527 scratch1 = gen_rtx_REG (ptr_mode, 1);
9528 /* N.B., if not TARGET_HITACHI, register 2 is used to pass the pointer
9529 pointing where to return struct values. */
9530 scratch2 = gen_rtx_REG (Pmode, 3);
9532 else if (TARGET_SHMEDIA)
9534 scratch1 = gen_rtx_REG (ptr_mode, 21);
9535 scratch2 = gen_rtx_REG (Pmode, TR0_REG);
9538 this_value = plus_constant (this, delta);
9539 if (vcall_offset
9540 && (simple_add || scratch0 != scratch1)
9541 && strict_memory_address_p (ptr_mode, this_value))
9543 emit_load_ptr (scratch0, this_value);
9544 did_load = 1;
9547 if (!delta)
9548 ; /* Do nothing. */
9549 else if (simple_add)
9550 emit_move_insn (this, this_value);
9551 else
9553 emit_move_insn (scratch1, GEN_INT (delta));
9554 emit_insn (gen_add2_insn (this, scratch1));
9557 if (vcall_offset)
9559 rtx offset_addr;
9561 if (!did_load)
9562 emit_load_ptr (scratch0, this);
9564 offset_addr = plus_constant (scratch0, vcall_offset);
9565 if (strict_memory_address_p (ptr_mode, offset_addr))
9566 ; /* Do nothing. */
9567 else if (! TARGET_SH5)
9569 /* scratch0 != scratch1, and we have indexed loads. Get better
9570 schedule by loading the offset into r1 and using an indexed
9571 load - then the load of r1 can issue before the load from
9572 (this + delta) finishes. */
9573 emit_move_insn (scratch1, GEN_INT (vcall_offset));
9574 offset_addr = gen_rtx_PLUS (Pmode, scratch0, scratch1);
9576 else if (CONST_OK_FOR_ADD (vcall_offset))
9578 emit_insn (gen_add2_insn (scratch0, GEN_INT (vcall_offset)));
9579 offset_addr = scratch0;
9581 else if (scratch0 != scratch1)
9583 emit_move_insn (scratch1, GEN_INT (vcall_offset));
9584 emit_insn (gen_add2_insn (scratch0, scratch1));
9585 offset_addr = scratch0;
9587 else
9588 abort (); /* FIXME */
9589 emit_load_ptr (scratch0, offset_addr);
9591 if (Pmode != ptr_mode)
9592 scratch0 = gen_rtx_TRUNCATE (ptr_mode, scratch0);
9593 emit_insn (gen_add2_insn (this, scratch0));
9596 /* Generate a tail call to the target function. */
9597 if (! TREE_USED (function))
9599 assemble_external (function);
9600 TREE_USED (function) = 1;
9602 funexp = XEXP (DECL_RTL (function), 0);
9603 emit_move_insn (scratch2, funexp);
9604 funexp = gen_rtx_MEM (FUNCTION_MODE, scratch2);
9605 sibcall = emit_call_insn (gen_sibcall (funexp, const0_rtx, NULL_RTX));
9606 SIBLING_CALL_P (sibcall) = 1;
9607 use_reg (&CALL_INSN_FUNCTION_USAGE (sibcall), this);
9608 emit_barrier ();
9610 /* Run just enough of rest_of_compilation to do scheduling and get
9611 the insns emitted. Note that use_thunk calls
9612 assemble_start_function and assemble_end_function. */
9614 insn_locators_initialize ();
9615 insns = get_insns ();
9617 if (optimize > 0 && flag_schedule_insns_after_reload)
9619 find_basic_blocks (insns, max_reg_num (), dump_file);
9620 life_analysis (dump_file, PROP_FINAL);
9622 split_all_insns (1);
9624 schedule_insns (dump_file);
9627 sh_reorg ();
9629 if (optimize > 0 && flag_delayed_branch)
9630 dbr_schedule (insns, dump_file);
9631 shorten_branches (insns);
9632 final_start_function (insns, file, 1);
9633 final (insns, file, 1, 0);
9634 final_end_function ();
9636 if (optimize > 0 && flag_schedule_insns_after_reload)
9638 /* Release all memory allocated by flow. */
9639 free_basic_block_vars ();
9641 /* Release all memory held by regsets now. */
9642 regset_release_memory ();
9645 reload_completed = 0;
9646 epilogue_completed = 0;
9647 no_new_pseudos = 0;
9651 function_symbol (const char *name)
9653 rtx sym = gen_rtx_SYMBOL_REF (Pmode, name);
9654 SYMBOL_REF_FLAGS (sym) = SYMBOL_FLAG_FUNCTION;
9655 return sym;
9658 /* Find the number of a general purpose register in S. */
9659 static int
9660 scavenge_reg (HARD_REG_SET *s)
9662 int r;
9663 for (r = FIRST_GENERAL_REG; r <= LAST_GENERAL_REG; r++)
9664 if (TEST_HARD_REG_BIT (*s, r))
9665 return r;
9666 return -1;
9670 sh_get_pr_initial_val (void)
9672 rtx val;
9674 /* ??? Unfortunately, get_hard_reg_initial_val doesn't always work for the
9675 PR register on SHcompact, because it might be clobbered by the prologue.
9676 We check first if that is known to be the case. */
9677 if (TARGET_SHCOMPACT
9678 && ((current_function_args_info.call_cookie
9679 & ~ CALL_COOKIE_RET_TRAMP (1))
9680 || current_function_has_nonlocal_label))
9681 return gen_rtx_MEM (SImode, return_address_pointer_rtx);
9683 /* If we haven't finished rtl generation, there might be a nonlocal label
9684 that we haven't seen yet.
9685 ??? get_hard_reg_initial_val fails if it is called while no_new_pseudos
9686 is set, unless it has been called before for the same register. And even
9687 then, we end in trouble if we didn't use the register in the same
9688 basic block before. So call get_hard_reg_initial_val now and wrap it
9689 in an unspec if we might need to replace it. */
9690 /* ??? We also must do this for TARGET_SH1 in general, because otherwise
9691 combine can put the pseudo returned by get_hard_reg_initial_val into
9692 instructions that need a general purpose registers, which will fail to
9693 be recognized when the pseudo becomes allocated to PR. */
9695 = get_hard_reg_initial_val (Pmode, TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG);
9696 if (TARGET_SH1)
9697 return gen_rtx_UNSPEC (SImode, gen_rtvec (1, val), UNSPEC_RA);
9698 return val;
9702 sh_expand_t_scc (enum rtx_code code, rtx target)
9704 rtx result = target;
9705 HOST_WIDE_INT val;
9707 if (GET_CODE (sh_compare_op0) != REG || REGNO (sh_compare_op0) != T_REG
9708 || GET_CODE (sh_compare_op1) != CONST_INT)
9709 return 0;
9710 if (GET_CODE (result) != REG)
9711 result = gen_reg_rtx (SImode);
9712 val = INTVAL (sh_compare_op1);
9713 if ((code == EQ && val == 1) || (code == NE && val == 0))
9714 emit_insn (gen_movt (result));
9715 else if ((code == EQ && val == 0) || (code == NE && val == 1))
9717 emit_insn (gen_rtx_CLOBBER (VOIDmode, result));
9718 emit_insn (gen_subc (result, result, result));
9719 emit_insn (gen_addsi3 (result, result, const1_rtx));
9721 else if (code == EQ || code == NE)
9722 emit_insn (gen_move_insn (result, GEN_INT (code == NE)));
9723 else
9724 return 0;
9725 if (result != target)
9726 emit_move_insn (target, result);
9727 return 1;
9730 /* INSN is an sfunc; return the rtx that describes the address used. */
9731 static rtx
9732 extract_sfunc_addr (rtx insn)
9734 rtx pattern, part = NULL_RTX;
9735 int len, i;
9737 pattern = PATTERN (insn);
9738 len = XVECLEN (pattern, 0);
9739 for (i = 0; i < len; i++)
9741 part = XVECEXP (pattern, 0, i);
9742 if (GET_CODE (part) == USE && GET_MODE (XEXP (part, 0)) == Pmode
9743 && GENERAL_REGISTER_P (true_regnum (XEXP (part, 0))))
9744 return XEXP (part, 0);
9746 if (GET_CODE (XVECEXP (pattern, 0, 0)) == UNSPEC_VOLATILE)
9747 return XVECEXP (XVECEXP (pattern, 0, 0), 0, 1);
9748 abort ();
9751 /* Verify that the register in use_sfunc_addr still agrees with the address
9752 used in the sfunc. This prevents fill_slots_from_thread from changing
9753 use_sfunc_addr.
9754 INSN is the use_sfunc_addr instruction, and REG is the register it
9755 guards. */
9757 check_use_sfunc_addr (rtx insn, rtx reg)
9759 /* Search for the sfunc. It should really come right after INSN. */
9760 while ((insn = NEXT_INSN (insn)))
9762 if (GET_CODE (insn) == CODE_LABEL || GET_CODE (insn) == JUMP_INSN)
9763 break;
9764 if (! INSN_P (insn))
9765 continue;
9767 if (GET_CODE (PATTERN (insn)) == SEQUENCE)
9768 insn = XVECEXP (PATTERN (insn), 0, 0);
9769 if (GET_CODE (PATTERN (insn)) != PARALLEL
9770 || get_attr_type (insn) != TYPE_SFUNC)
9771 continue;
9772 return rtx_equal_p (extract_sfunc_addr (insn), reg);
9774 abort ();
9777 #include "gt-sh.h"