Introduce sh2a support.
[official-gcc.git] / gcc / config / sh / sh.c
blobb2c01302778129d25e8edf29bf41a93c6ef3ff23
1 /* Output routines for GCC for Renesas / SuperH SH.
2 Copyright (C) 1993, 1994, 1995, 1997, 1997, 1998, 1999, 2000, 2001, 2002,
3 2003, 2004 Free Software Foundation, Inc.
4 Contributed by Steve Chamberlain (sac@cygnus.com).
5 Improved by Jim Wilson (wilson@cygnus.com).
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 2, or (at your option)
12 any later version.
14 GCC is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING. If not, write to
21 the Free Software Foundation, 59 Temple Place - Suite 330,
22 Boston, MA 02111-1307, USA. */
24 #include "config.h"
25 #include "system.h"
26 #include "coretypes.h"
27 #include "tm.h"
28 #include "insn-config.h"
29 #include "rtl.h"
30 #include "tree.h"
31 #include "flags.h"
32 #include "expr.h"
33 #include "optabs.h"
34 #include "function.h"
35 #include "regs.h"
36 #include "hard-reg-set.h"
37 #include "output.h"
38 #include "insn-attr.h"
39 #include "toplev.h"
40 #include "recog.h"
41 #include "c-pragma.h"
42 #include "integrate.h"
43 #include "tm_p.h"
44 #include "target.h"
45 #include "target-def.h"
46 #include "real.h"
47 #include "langhooks.h"
48 #include "basic-block.h"
49 #include "ra.h"
50 #include "cfglayout.h"
51 #include "intl.h"
52 #include "sched-int.h"
53 #include "ggc.h"
54 #include "tree-gimple.h"
57 int code_for_indirect_jump_scratch = CODE_FOR_indirect_jump_scratch;
59 #define MSW (TARGET_LITTLE_ENDIAN ? 1 : 0)
60 #define LSW (TARGET_LITTLE_ENDIAN ? 0 : 1)
62 /* These are some macros to abstract register modes. */
63 #define CONST_OK_FOR_ADD(size) \
64 (TARGET_SHMEDIA ? CONST_OK_FOR_I10 (size) : CONST_OK_FOR_I08 (size))
65 #define GEN_MOV (*(TARGET_SHMEDIA64 ? gen_movdi : gen_movsi))
66 #define GEN_ADD3 (*(TARGET_SHMEDIA64 ? gen_adddi3 : gen_addsi3))
67 #define GEN_SUB3 (*(TARGET_SHMEDIA64 ? gen_subdi3 : gen_subsi3))
69 /* Set to 1 by expand_prologue() when the function is an interrupt handler. */
70 int current_function_interrupt;
72 /* ??? The pragma interrupt support will not work for SH3. */
73 /* This is set by #pragma interrupt and #pragma trapa, and causes gcc to
74 output code for the next function appropriate for an interrupt handler. */
75 int pragma_interrupt;
77 /* This is set by the trap_exit attribute for functions. It specifies
78 a trap number to be used in a trapa instruction at function exit
79 (instead of an rte instruction). */
80 int trap_exit;
82 /* This is used by the sp_switch attribute for functions. It specifies
83 a variable holding the address of the stack the interrupt function
84 should switch to/from at entry/exit. */
85 rtx sp_switch;
87 /* This is set by #pragma trapa, and is similar to the above, except that
88 the compiler doesn't emit code to preserve all registers. */
89 static int pragma_trapa;
91 /* This is set by #pragma nosave_low_regs. This is useful on the SH3,
92 which has a separate set of low regs for User and Supervisor modes.
93 This should only be used for the lowest level of interrupts. Higher levels
94 of interrupts must save the registers in case they themselves are
95 interrupted. */
96 int pragma_nosave_low_regs;
98 /* This is used for communication between TARGET_SETUP_INCOMING_VARARGS and
99 sh_expand_prologue. */
100 int current_function_anonymous_args;
102 /* Global variables for machine-dependent things. */
104 /* Which cpu are we scheduling for. */
105 enum processor_type sh_cpu;
107 /* Definitions used in ready queue reordering for first scheduling pass. */
109 /* Reg weights arrays for modes SFmode and SImode, indexed by insn LUID. */
110 static short *regmode_weight[2];
112 /* Total SFmode and SImode weights of scheduled insns. */
113 static int curr_regmode_pressure[2];
115 /* If true, skip cycles for Q -> R movement. */
116 static int skip_cycles = 0;
118 /* Cached value of can_issue_more. This is cached in sh_variable_issue hook
119 and returned from sh_reorder2. */
120 static short cached_can_issue_more;
122 /* Saved operands from the last compare to use when we generate an scc
123 or bcc insn. */
125 rtx sh_compare_op0;
126 rtx sh_compare_op1;
128 /* Provides the class number of the smallest class containing
129 reg number. */
131 enum reg_class regno_reg_class[FIRST_PSEUDO_REGISTER] =
133 R0_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
134 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
135 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
136 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
137 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
138 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
139 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
140 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
141 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
142 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
143 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
144 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
145 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
146 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
147 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
148 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
149 FP0_REGS,FP_REGS, FP_REGS, FP_REGS,
150 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
151 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
152 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
153 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
154 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
155 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
156 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
157 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
158 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
159 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
160 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
161 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
162 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
163 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
164 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
165 TARGET_REGS, TARGET_REGS, TARGET_REGS, TARGET_REGS,
166 TARGET_REGS, TARGET_REGS, TARGET_REGS, TARGET_REGS,
167 DF_REGS, DF_REGS, DF_REGS, DF_REGS,
168 DF_REGS, DF_REGS, DF_REGS, DF_REGS,
169 NO_REGS, GENERAL_REGS, PR_REGS, T_REGS,
170 MAC_REGS, MAC_REGS, FPUL_REGS, FPSCR_REGS,
171 GENERAL_REGS,
174 char sh_register_names[FIRST_PSEUDO_REGISTER] \
175 [MAX_REGISTER_NAME_LENGTH + 1] = SH_REGISTER_NAMES_INITIALIZER;
177 char sh_additional_register_names[ADDREGNAMES_SIZE] \
178 [MAX_ADDITIONAL_REGISTER_NAME_LENGTH + 1]
179 = SH_ADDITIONAL_REGISTER_NAMES_INITIALIZER;
181 /* Provide reg_class from a letter such as appears in the machine
182 description. *: target independently reserved letter.
183 reg_class_from_letter['e' - 'a'] is set to NO_REGS for TARGET_FMOVD. */
185 enum reg_class reg_class_from_letter[] =
187 /* a */ ALL_REGS, /* b */ TARGET_REGS, /* c */ FPSCR_REGS, /* d */ DF_REGS,
188 /* e */ FP_REGS, /* f */ FP_REGS, /* g **/ NO_REGS, /* h */ NO_REGS,
189 /* i **/ NO_REGS, /* j */ NO_REGS, /* k */ SIBCALL_REGS, /* l */ PR_REGS,
190 /* m **/ NO_REGS, /* n **/ NO_REGS, /* o **/ NO_REGS, /* p **/ NO_REGS,
191 /* q */ NO_REGS, /* r **/ NO_REGS, /* s **/ NO_REGS, /* t */ T_REGS,
192 /* u */ NO_REGS, /* v */ NO_REGS, /* w */ FP0_REGS, /* x */ MAC_REGS,
193 /* y */ FPUL_REGS, /* z */ R0_REGS
196 int assembler_dialect;
198 static bool shmedia_space_reserved_for_target_registers;
200 static void split_branches (rtx);
201 static int branch_dest (rtx);
202 static void force_into (rtx, rtx);
203 static void print_slot (rtx);
204 static rtx add_constant (rtx, enum machine_mode, rtx);
205 static void dump_table (rtx, rtx);
206 static int hi_const (rtx);
207 static int broken_move (rtx);
208 static int mova_p (rtx);
209 static rtx find_barrier (int, rtx, rtx);
210 static int noncall_uses_reg (rtx, rtx, rtx *);
211 static rtx gen_block_redirect (rtx, int, int);
212 static void sh_reorg (void);
213 static void output_stack_adjust (int, rtx, int, HARD_REG_SET *);
214 static rtx frame_insn (rtx);
215 static rtx push (int);
216 static void pop (int);
217 static void push_regs (HARD_REG_SET *, int);
218 static int calc_live_regs (HARD_REG_SET *);
219 static void mark_use (rtx, rtx *);
220 static HOST_WIDE_INT rounded_frame_size (int);
221 static rtx mark_constant_pool_use (rtx);
222 const struct attribute_spec sh_attribute_table[];
223 static tree sh_handle_interrupt_handler_attribute (tree *, tree, tree, int, bool *);
224 static tree sh_handle_sp_switch_attribute (tree *, tree, tree, int, bool *);
225 static tree sh_handle_trap_exit_attribute (tree *, tree, tree, int, bool *);
226 static tree sh_handle_renesas_attribute (tree *, tree, tree, int, bool *);
227 static void sh_output_function_epilogue (FILE *, HOST_WIDE_INT);
228 static void sh_insert_attributes (tree, tree *);
229 static int sh_adjust_cost (rtx, rtx, rtx, int);
230 static int sh_issue_rate (void);
231 static int sh_dfa_new_cycle (FILE *, int, rtx, int, int, int *sort_p);
232 static short find_set_regmode_weight (rtx, enum machine_mode);
233 static short find_insn_regmode_weight (rtx, enum machine_mode);
234 static void find_regmode_weight (int, enum machine_mode);
235 static void sh_md_init_global (FILE *, int, int);
236 static void sh_md_finish_global (FILE *, int);
237 static int rank_for_reorder (const void *, const void *);
238 static void swap_reorder (rtx *, int);
239 static void ready_reorder (rtx *, int);
240 static short high_pressure (enum machine_mode);
241 static int sh_reorder (FILE *, int, rtx *, int *, int);
242 static int sh_reorder2 (FILE *, int, rtx *, int *, int);
243 static void sh_md_init (FILE *, int, int);
244 static int sh_variable_issue (FILE *, int, rtx, int);
246 static bool sh_function_ok_for_sibcall (tree, tree);
248 static bool sh_cannot_modify_jumps_p (void);
249 static int sh_target_reg_class (void);
250 static bool sh_optimize_target_register_callee_saved (bool);
251 static bool sh_ms_bitfield_layout_p (tree);
253 static void sh_init_builtins (void);
254 static void sh_media_init_builtins (void);
255 static rtx sh_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
256 static void sh_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT, tree);
257 static void sh_file_start (void);
258 static int flow_dependent_p (rtx, rtx);
259 static void flow_dependent_p_1 (rtx, rtx, void *);
260 static int shiftcosts (rtx);
261 static int andcosts (rtx);
262 static int addsubcosts (rtx);
263 static int multcosts (rtx);
264 static bool unspec_caller_rtx_p (rtx);
265 static bool sh_cannot_copy_insn_p (rtx);
266 static bool sh_rtx_costs (rtx, int, int, int *);
267 static int sh_address_cost (rtx);
268 static int shmedia_target_regs_stack_space (HARD_REG_SET *);
269 static int shmedia_reserve_space_for_target_registers_p (int, HARD_REG_SET *);
270 static int shmedia_target_regs_stack_adjust (HARD_REG_SET *);
271 static int scavenge_reg (HARD_REG_SET *s);
272 struct save_schedule_s;
273 static struct save_entry_s *sh5_schedule_saves (HARD_REG_SET *,
274 struct save_schedule_s *, int);
276 static rtx sh_struct_value_rtx (tree, int);
277 static bool sh_return_in_memory (tree, tree);
278 static rtx sh_builtin_saveregs (void);
279 static void sh_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode, tree, int *, int);
280 static bool sh_strict_argument_naming (CUMULATIVE_ARGS *);
281 static bool sh_pretend_outgoing_varargs_named (CUMULATIVE_ARGS *);
282 static tree sh_build_builtin_va_list (void);
283 static tree sh_gimplify_va_arg_expr (tree, tree, tree *, tree *);
284 static bool sh_pass_by_reference (CUMULATIVE_ARGS *, enum machine_mode,
285 tree, bool);
288 /* Initialize the GCC target structure. */
289 #undef TARGET_ATTRIBUTE_TABLE
290 #define TARGET_ATTRIBUTE_TABLE sh_attribute_table
292 /* The next two are used for debug info when compiling with -gdwarf. */
293 #undef TARGET_ASM_UNALIGNED_HI_OP
294 #define TARGET_ASM_UNALIGNED_HI_OP "\t.uaword\t"
295 #undef TARGET_ASM_UNALIGNED_SI_OP
296 #define TARGET_ASM_UNALIGNED_SI_OP "\t.ualong\t"
298 /* These are NULLed out on non-SH5 in OVERRIDE_OPTIONS. */
299 #undef TARGET_ASM_UNALIGNED_DI_OP
300 #define TARGET_ASM_UNALIGNED_DI_OP "\t.uaquad\t"
301 #undef TARGET_ASM_ALIGNED_DI_OP
302 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
304 #undef TARGET_ASM_FUNCTION_EPILOGUE
305 #define TARGET_ASM_FUNCTION_EPILOGUE sh_output_function_epilogue
307 #undef TARGET_ASM_OUTPUT_MI_THUNK
308 #define TARGET_ASM_OUTPUT_MI_THUNK sh_output_mi_thunk
310 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
311 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_tree_hwi_hwi_tree_true
313 #undef TARGET_ASM_FILE_START
314 #define TARGET_ASM_FILE_START sh_file_start
315 #undef TARGET_ASM_FILE_START_FILE_DIRECTIVE
316 #define TARGET_ASM_FILE_START_FILE_DIRECTIVE true
318 #undef TARGET_INSERT_ATTRIBUTES
319 #define TARGET_INSERT_ATTRIBUTES sh_insert_attributes
321 #undef TARGET_SCHED_ADJUST_COST
322 #define TARGET_SCHED_ADJUST_COST sh_adjust_cost
324 #undef TARGET_SCHED_ISSUE_RATE
325 #define TARGET_SCHED_ISSUE_RATE sh_issue_rate
327 /* The next 5 hooks have been implemented for reenabling sched1. With the
328 help of these macros we are limiting the movement of insns in sched1 to
329 reduce the register pressure. The overall idea is to keep count of SImode
330 and SFmode regs required by already scheduled insns. When these counts
331 cross some threshold values; give priority to insns that free registers.
332 The insn that frees registers is most likely to be the insn with lowest
333 LUID (original insn order); but such an insn might be there in the stalled
334 queue (Q) instead of the ready queue (R). To solve this, we skip cycles
335 upto a max of 8 cycles so that such insns may move from Q -> R.
337 The description of the hooks are as below:
339 TARGET_SCHED_INIT_GLOBAL: Added a new target hook in the generic
340 scheduler; it is called inside the sched_init function just after
341 find_insn_reg_weights function call. It is used to calculate the SImode
342 and SFmode weights of insns of basic blocks; much similar to what
343 find_insn_reg_weights does.
344 TARGET_SCHED_FINISH_GLOBAL: Corresponding cleanup hook.
346 TARGET_SCHED_DFA_NEW_CYCLE: Skip cycles if high register pressure is
347 indicated by TARGET_SCHED_REORDER2; doing this may move insns from
348 (Q)->(R).
350 TARGET_SCHED_REORDER: If the register pressure for SImode or SFmode is
351 high; reorder the ready queue so that the insn with lowest LUID will be
352 issued next.
354 TARGET_SCHED_REORDER2: If the register pressure is high, indicate to
355 TARGET_SCHED_DFA_NEW_CYCLE to skip cycles.
357 TARGET_SCHED_VARIABLE_ISSUE: Cache the value of can_issue_more so that it
358 can be returned from TARGET_SCHED_REORDER2.
360 TARGET_SCHED_INIT: Reset the register pressure counting variables. */
362 #undef TARGET_SCHED_DFA_NEW_CYCLE
363 #define TARGET_SCHED_DFA_NEW_CYCLE sh_dfa_new_cycle
365 #undef TARGET_SCHED_INIT_GLOBAL
366 #define TARGET_SCHED_INIT_GLOBAL sh_md_init_global
368 #undef TARGET_SCHED_FINISH_GLOBAL
369 #define TARGET_SCHED_FINISH_GLOBAL sh_md_finish_global
371 #undef TARGET_SCHED_VARIABLE_ISSUE
372 #define TARGET_SCHED_VARIABLE_ISSUE sh_variable_issue
374 #undef TARGET_SCHED_REORDER
375 #define TARGET_SCHED_REORDER sh_reorder
377 #undef TARGET_SCHED_REORDER2
378 #define TARGET_SCHED_REORDER2 sh_reorder2
380 #undef TARGET_SCHED_INIT
381 #define TARGET_SCHED_INIT sh_md_init
383 #undef TARGET_CANNOT_MODIFY_JUMPS_P
384 #define TARGET_CANNOT_MODIFY_JUMPS_P sh_cannot_modify_jumps_p
385 #undef TARGET_BRANCH_TARGET_REGISTER_CLASS
386 #define TARGET_BRANCH_TARGET_REGISTER_CLASS sh_target_reg_class
387 #undef TARGET_BRANCH_TARGET_REGISTER_CALLEE_SAVED
388 #define TARGET_BRANCH_TARGET_REGISTER_CALLEE_SAVED \
389 sh_optimize_target_register_callee_saved
391 #undef TARGET_MS_BITFIELD_LAYOUT_P
392 #define TARGET_MS_BITFIELD_LAYOUT_P sh_ms_bitfield_layout_p
394 #undef TARGET_INIT_BUILTINS
395 #define TARGET_INIT_BUILTINS sh_init_builtins
396 #undef TARGET_EXPAND_BUILTIN
397 #define TARGET_EXPAND_BUILTIN sh_expand_builtin
399 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
400 #define TARGET_FUNCTION_OK_FOR_SIBCALL sh_function_ok_for_sibcall
402 #undef TARGET_CANNOT_COPY_INSN_P
403 #define TARGET_CANNOT_COPY_INSN_P sh_cannot_copy_insn_p
404 #undef TARGET_RTX_COSTS
405 #define TARGET_RTX_COSTS sh_rtx_costs
406 #undef TARGET_ADDRESS_COST
407 #define TARGET_ADDRESS_COST sh_address_cost
409 #undef TARGET_MACHINE_DEPENDENT_REORG
410 #define TARGET_MACHINE_DEPENDENT_REORG sh_reorg
412 #ifdef HAVE_AS_TLS
413 #undef TARGET_HAVE_TLS
414 #define TARGET_HAVE_TLS true
415 #endif
417 #undef TARGET_PROMOTE_PROTOTYPES
418 #define TARGET_PROMOTE_PROTOTYPES sh_promote_prototypes
419 #undef TARGET_PROMOTE_FUNCTION_ARGS
420 #define TARGET_PROMOTE_FUNCTION_ARGS sh_promote_prototypes
421 #undef TARGET_PROMOTE_FUNCTION_RETURN
422 #define TARGET_PROMOTE_FUNCTION_RETURN sh_promote_prototypes
424 #undef TARGET_STRUCT_VALUE_RTX
425 #define TARGET_STRUCT_VALUE_RTX sh_struct_value_rtx
426 #undef TARGET_RETURN_IN_MEMORY
427 #define TARGET_RETURN_IN_MEMORY sh_return_in_memory
429 #undef TARGET_EXPAND_BUILTIN_SAVEREGS
430 #define TARGET_EXPAND_BUILTIN_SAVEREGS sh_builtin_saveregs
431 #undef TARGET_SETUP_INCOMING_VARARGS
432 #define TARGET_SETUP_INCOMING_VARARGS sh_setup_incoming_varargs
433 #undef TARGET_STRICT_ARGUMENT_NAMING
434 #define TARGET_STRICT_ARGUMENT_NAMING sh_strict_argument_naming
435 #undef TARGET_PRETEND_OUTGOING_VARARGS_NAMED
436 #define TARGET_PRETEND_OUTGOING_VARARGS_NAMED sh_pretend_outgoing_varargs_named
437 #undef TARGET_MUST_PASS_IN_STACK
438 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
439 #undef TARGET_PASS_BY_REFERENCE
440 #define TARGET_PASS_BY_REFERENCE sh_pass_by_reference
442 #undef TARGET_BUILD_BUILTIN_VA_LIST
443 #define TARGET_BUILD_BUILTIN_VA_LIST sh_build_builtin_va_list
444 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
445 #define TARGET_GIMPLIFY_VA_ARG_EXPR sh_gimplify_va_arg_expr
447 #undef TARGET_PCH_VALID_P
448 #define TARGET_PCH_VALID_P sh_pch_valid_p
450 /* Return regmode weight for insn. */
451 #define INSN_REGMODE_WEIGHT(INSN, MODE) regmode_weight[((MODE) == SImode) ? 0 : 1][INSN_UID (INSN)]
453 /* Return current register pressure for regmode. */
454 #define CURR_REGMODE_PRESSURE(MODE) curr_regmode_pressure[((MODE) == SImode) ? 0 : 1]
456 #ifdef SYMBIAN
458 #undef TARGET_ENCODE_SECTION_INFO
459 #define TARGET_ENCODE_SECTION_INFO sh_symbian_encode_section_info
460 #undef TARGET_STRIP_NAME_ENCODING
461 #define TARGET_STRIP_NAME_ENCODING sh_symbian_strip_name_encoding
462 #undef TARGET_CXX_IMPORT_EXPORT_CLASS
463 #define TARGET_CXX_IMPORT_EXPORT_CLASS symbian_import_export_class
465 #endif /* SYMBIAN */
467 struct gcc_target targetm = TARGET_INITIALIZER;
469 /* Print the operand address in x to the stream. */
471 void
472 print_operand_address (FILE *stream, rtx x)
474 switch (GET_CODE (x))
476 case REG:
477 case SUBREG:
478 fprintf (stream, "@%s", reg_names[true_regnum (x)]);
479 break;
481 case PLUS:
483 rtx base = XEXP (x, 0);
484 rtx index = XEXP (x, 1);
486 switch (GET_CODE (index))
488 case CONST_INT:
489 fprintf (stream, "@(%d,%s)", (int) INTVAL (index),
490 reg_names[true_regnum (base)]);
491 break;
493 case REG:
494 case SUBREG:
496 int base_num = true_regnum (base);
497 int index_num = true_regnum (index);
499 fprintf (stream, "@(r0,%s)",
500 reg_names[MAX (base_num, index_num)]);
501 break;
504 default:
505 debug_rtx (x);
506 abort ();
509 break;
511 case PRE_DEC:
512 fprintf (stream, "@-%s", reg_names[true_regnum (XEXP (x, 0))]);
513 break;
515 case POST_INC:
516 fprintf (stream, "@%s+", reg_names[true_regnum (XEXP (x, 0))]);
517 break;
519 default:
520 x = mark_constant_pool_use (x);
521 output_addr_const (stream, x);
522 break;
526 /* Print operand x (an rtx) in assembler syntax to file stream
527 according to modifier code.
529 '.' print a .s if insn needs delay slot
530 ',' print LOCAL_LABEL_PREFIX
531 '@' print trap, rte or rts depending upon pragma interruptness
532 '#' output a nop if there is nothing to put in the delay slot
533 ''' print likelihood suffix (/u for unlikely).
534 'O' print a constant without the #
535 'R' print the LSW of a dp value - changes if in little endian
536 'S' print the MSW of a dp value - changes if in little endian
537 'T' print the next word of a dp value - same as 'R' in big endian mode.
538 'M' print an `x' if `m' will print `base,index'.
539 'N' print 'r63' if the operand is (const_int 0).
540 'd' print a V2SF reg as dN instead of fpN.
541 'm' print a pair `base,offset' or `base,index', for LD and ST.
542 'u' prints the lowest 16 bits of CONST_INT, as an unsigned value.
543 'o' output an operator. */
545 void
546 print_operand (FILE *stream, rtx x, int code)
548 switch (code)
550 case '.':
551 if (final_sequence
552 && ! INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))
553 && get_attr_length (XVECEXP (final_sequence, 0, 1)))
554 fprintf (stream, ASSEMBLER_DIALECT ? "/s" : ".s");
555 break;
556 case ',':
557 fprintf (stream, "%s", LOCAL_LABEL_PREFIX);
558 break;
559 case '@':
560 if (trap_exit)
561 fprintf (stream, "trapa #%d", trap_exit);
562 else if (sh_cfun_interrupt_handler_p ())
563 fprintf (stream, "rte");
564 else
565 fprintf (stream, "rts");
566 break;
567 case '#':
568 /* Output a nop if there's nothing in the delay slot. */
569 if (dbr_sequence_length () == 0)
570 fprintf (stream, "\n\tnop");
571 break;
572 case '\'':
574 rtx note = find_reg_note (current_output_insn, REG_BR_PROB, 0);
576 if (note && INTVAL (XEXP (note, 0)) * 2 < REG_BR_PROB_BASE)
577 fputs ("/u", stream);
578 break;
580 case 'O':
581 x = mark_constant_pool_use (x);
582 output_addr_const (stream, x);
583 break;
584 case 'R':
585 fputs (reg_names[REGNO (x) + LSW], (stream));
586 break;
587 case 'S':
588 fputs (reg_names[REGNO (x) + MSW], (stream));
589 break;
590 case 'T':
591 /* Next word of a double. */
592 switch (GET_CODE (x))
594 case REG:
595 fputs (reg_names[REGNO (x) + 1], (stream));
596 break;
597 case MEM:
598 if (GET_CODE (XEXP (x, 0)) != PRE_DEC
599 && GET_CODE (XEXP (x, 0)) != POST_INC)
600 x = adjust_address (x, SImode, 4);
601 print_operand_address (stream, XEXP (x, 0));
602 break;
603 default:
604 break;
606 break;
607 case 'o':
608 switch (GET_CODE (x))
610 case PLUS: fputs ("add", stream); break;
611 case MINUS: fputs ("sub", stream); break;
612 case MULT: fputs ("mul", stream); break;
613 case DIV: fputs ("div", stream); break;
614 case EQ: fputs ("eq", stream); break;
615 case NE: fputs ("ne", stream); break;
616 case GT: case LT: fputs ("gt", stream); break;
617 case GE: case LE: fputs ("ge", stream); break;
618 case GTU: case LTU: fputs ("gtu", stream); break;
619 case GEU: case LEU: fputs ("geu", stream); break;
620 default:
621 break;
623 break;
624 case 'M':
625 if (GET_CODE (x) == MEM
626 && GET_CODE (XEXP (x, 0)) == PLUS
627 && (GET_CODE (XEXP (XEXP (x, 0), 1)) == REG
628 || GET_CODE (XEXP (XEXP (x, 0), 1)) == SUBREG))
629 fputc ('x', stream);
630 break;
632 case 'm':
633 if (GET_CODE (x) != MEM)
634 abort ();
635 x = XEXP (x, 0);
636 switch (GET_CODE (x))
638 case REG:
639 case SUBREG:
640 print_operand (stream, x, 0);
641 fputs (", 0", stream);
642 break;
644 case PLUS:
645 print_operand (stream, XEXP (x, 0), 0);
646 fputs (", ", stream);
647 print_operand (stream, XEXP (x, 1), 0);
648 break;
650 default:
651 abort ();
653 break;
655 case 'd':
656 if (GET_CODE (x) != REG || GET_MODE (x) != V2SFmode)
657 abort ();
659 fprintf ((stream), "d%s", reg_names[REGNO (x)] + 1);
660 break;
662 case 'N':
663 if (x == CONST0_RTX (GET_MODE (x)))
665 fprintf ((stream), "r63");
666 break;
668 goto default_output;
669 case 'u':
670 if (GET_CODE (x) == CONST_INT)
672 fprintf ((stream), "%u", (unsigned) INTVAL (x) & (0x10000 - 1));
673 break;
675 /* Fall through. */
677 default_output:
678 default:
679 switch (GET_CODE (x))
681 /* FIXME: We need this on SHmedia32 because reload generates
682 some sign-extended HI or QI loads into DImode registers
683 but, because Pmode is SImode, the address ends up with a
684 subreg:SI of the DImode register. Maybe reload should be
685 fixed so as to apply alter_subreg to such loads? */
686 case SUBREG:
687 if (SUBREG_BYTE (x) != 0
688 || GET_CODE (SUBREG_REG (x)) != REG)
689 abort ();
691 x = SUBREG_REG (x);
692 /* Fall through. */
694 case REG:
695 if (FP_REGISTER_P (REGNO (x))
696 && GET_MODE (x) == V16SFmode)
697 fprintf ((stream), "mtrx%s", reg_names[REGNO (x)] + 2);
698 else if (FP_REGISTER_P (REGNO (x))
699 && GET_MODE (x) == V4SFmode)
700 fprintf ((stream), "fv%s", reg_names[REGNO (x)] + 2);
701 else if (GET_CODE (x) == REG
702 && GET_MODE (x) == V2SFmode)
703 fprintf ((stream), "fp%s", reg_names[REGNO (x)] + 2);
704 else if (FP_REGISTER_P (REGNO (x))
705 && GET_MODE_SIZE (GET_MODE (x)) > 4)
706 fprintf ((stream), "d%s", reg_names[REGNO (x)] + 1);
707 else
708 fputs (reg_names[REGNO (x)], (stream));
709 break;
711 case MEM:
712 output_address (XEXP (x, 0));
713 break;
715 case CONST:
716 if (TARGET_SHMEDIA
717 && GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
718 && GET_MODE (XEXP (x, 0)) == DImode
719 && GET_CODE (XEXP (XEXP (x, 0), 0)) == TRUNCATE
720 && GET_MODE (XEXP (XEXP (x, 0), 0)) == HImode)
722 rtx val = XEXP (XEXP (XEXP (x, 0), 0), 0);
724 fputc ('(', stream);
725 if (GET_CODE (val) == ASHIFTRT)
727 fputc ('(', stream);
728 if (GET_CODE (XEXP (val, 0)) == CONST)
729 fputc ('(', stream);
730 output_addr_const (stream, XEXP (val, 0));
731 if (GET_CODE (XEXP (val, 0)) == CONST)
732 fputc (')', stream);
733 fputs (" >> ", stream);
734 output_addr_const (stream, XEXP (val, 1));
735 fputc (')', stream);
737 else
739 if (GET_CODE (val) == CONST)
740 fputc ('(', stream);
741 output_addr_const (stream, val);
742 if (GET_CODE (val) == CONST)
743 fputc (')', stream);
745 fputs (" & 65535)", stream);
746 break;
749 /* Fall through. */
750 default:
751 if (TARGET_SH1)
752 fputc ('#', stream);
753 output_addr_const (stream, x);
754 break;
756 break;
760 /* Like force_operand, but guarantees that VALUE ends up in TARGET. */
761 static void
762 force_into (rtx value, rtx target)
764 value = force_operand (value, target);
765 if (! rtx_equal_p (value, target))
766 emit_insn (gen_move_insn (target, value));
769 /* Emit code to perform a block move. Choose the best method.
771 OPERANDS[0] is the destination.
772 OPERANDS[1] is the source.
773 OPERANDS[2] is the size.
774 OPERANDS[3] is the alignment safe to use. */
777 expand_block_move (rtx *operands)
779 int align = INTVAL (operands[3]);
780 int constp = (GET_CODE (operands[2]) == CONST_INT);
781 int bytes = (constp ? INTVAL (operands[2]) : 0);
783 if (! constp)
784 return 0;
786 /* If we could use mov.l to move words and dest is word-aligned, we
787 can use movua.l for loads and still generate a relatively short
788 and efficient sequence. */
789 if (TARGET_SH4A_ARCH && align < 4
790 && MEM_ALIGN (operands[0]) >= 32
791 && can_move_by_pieces (bytes, 32))
793 rtx dest = copy_rtx (operands[0]);
794 rtx src = copy_rtx (operands[1]);
795 /* We could use different pseudos for each copied word, but
796 since movua can only load into r0, it's kind of
797 pointless. */
798 rtx temp = gen_reg_rtx (SImode);
799 rtx src_addr = copy_addr_to_reg (XEXP (src, 0));
800 int copied = 0;
802 while (copied + 4 <= bytes)
804 rtx to = adjust_address (dest, SImode, copied);
805 rtx from = adjust_automodify_address (src, SImode, src_addr, copied);
807 emit_insn (gen_movua (temp, from));
808 emit_move_insn (src_addr, plus_constant (src_addr, 4));
809 emit_move_insn (to, temp);
810 copied += 4;
813 if (copied < bytes)
814 move_by_pieces (adjust_address (dest, BLKmode, copied),
815 adjust_automodify_address (src, BLKmode,
816 src_addr, copied),
817 bytes - copied, align, 0);
819 return 1;
822 /* If it isn't a constant number of bytes, or if it doesn't have 4 byte
823 alignment, or if it isn't a multiple of 4 bytes, then fail. */
824 if (align < 4 || (bytes % 4 != 0))
825 return 0;
827 if (TARGET_HARD_SH4)
829 if (bytes < 12)
830 return 0;
831 else if (bytes == 12)
833 tree entry_name;
834 rtx sym;
835 rtx func_addr_rtx;
836 rtx r4 = gen_rtx_REG (SImode, 4);
837 rtx r5 = gen_rtx_REG (SImode, 5);
839 entry_name = get_identifier ("__movmemSI12_i4");
841 sym = function_symbol (IDENTIFIER_POINTER (entry_name));
842 func_addr_rtx = copy_to_mode_reg (Pmode, sym);
843 force_into (XEXP (operands[0], 0), r4);
844 force_into (XEXP (operands[1], 0), r5);
845 emit_insn (gen_block_move_real_i4 (func_addr_rtx));
846 return 1;
848 else if (! TARGET_SMALLCODE)
850 tree entry_name;
851 rtx sym;
852 rtx func_addr_rtx;
853 int dwords;
854 rtx r4 = gen_rtx_REG (SImode, 4);
855 rtx r5 = gen_rtx_REG (SImode, 5);
856 rtx r6 = gen_rtx_REG (SImode, 6);
858 entry_name = get_identifier (bytes & 4
859 ? "__movmem_i4_odd"
860 : "__movmem_i4_even");
861 sym = function_symbol (IDENTIFIER_POINTER (entry_name));
862 func_addr_rtx = copy_to_mode_reg (Pmode, sym);
863 force_into (XEXP (operands[0], 0), r4);
864 force_into (XEXP (operands[1], 0), r5);
866 dwords = bytes >> 3;
867 emit_insn (gen_move_insn (r6, GEN_INT (dwords - 1)));
868 emit_insn (gen_block_lump_real_i4 (func_addr_rtx));
869 return 1;
871 else
872 return 0;
874 if (bytes < 64)
876 char entry[30];
877 tree entry_name;
878 rtx sym;
879 rtx func_addr_rtx;
880 rtx r4 = gen_rtx_REG (SImode, 4);
881 rtx r5 = gen_rtx_REG (SImode, 5);
883 sprintf (entry, "__movmemSI%d", bytes);
884 entry_name = get_identifier (entry);
885 sym = function_symbol (IDENTIFIER_POINTER (entry_name));
886 func_addr_rtx = copy_to_mode_reg (Pmode, sym);
887 force_into (XEXP (operands[0], 0), r4);
888 force_into (XEXP (operands[1], 0), r5);
889 emit_insn (gen_block_move_real (func_addr_rtx));
890 return 1;
893 /* This is the same number of bytes as a memcpy call, but to a different
894 less common function name, so this will occasionally use more space. */
895 if (! TARGET_SMALLCODE)
897 tree entry_name;
898 rtx sym;
899 rtx func_addr_rtx;
900 int final_switch, while_loop;
901 rtx r4 = gen_rtx_REG (SImode, 4);
902 rtx r5 = gen_rtx_REG (SImode, 5);
903 rtx r6 = gen_rtx_REG (SImode, 6);
905 entry_name = get_identifier ("__movmem");
906 sym = function_symbol (IDENTIFIER_POINTER (entry_name));
907 func_addr_rtx = copy_to_mode_reg (Pmode, sym);
908 force_into (XEXP (operands[0], 0), r4);
909 force_into (XEXP (operands[1], 0), r5);
911 /* r6 controls the size of the move. 16 is decremented from it
912 for each 64 bytes moved. Then the negative bit left over is used
913 as an index into a list of move instructions. e.g., a 72 byte move
914 would be set up with size(r6) = 14, for one iteration through the
915 big while loop, and a switch of -2 for the last part. */
917 final_switch = 16 - ((bytes / 4) % 16);
918 while_loop = ((bytes / 4) / 16 - 1) * 16;
919 emit_insn (gen_move_insn (r6, GEN_INT (while_loop + final_switch)));
920 emit_insn (gen_block_lump_real (func_addr_rtx));
921 return 1;
924 return 0;
927 /* Prepare operands for a move define_expand; specifically, one of the
928 operands must be in a register. */
931 prepare_move_operands (rtx operands[], enum machine_mode mode)
933 if ((mode == SImode || mode == DImode)
934 && flag_pic
935 && ! ((mode == Pmode || mode == ptr_mode)
936 && tls_symbolic_operand (operands[1], Pmode) != 0))
938 rtx temp;
939 if (SYMBOLIC_CONST_P (operands[1]))
941 if (GET_CODE (operands[0]) == MEM)
942 operands[1] = force_reg (Pmode, operands[1]);
943 else if (TARGET_SHMEDIA
944 && GET_CODE (operands[1]) == LABEL_REF
945 && target_reg_operand (operands[0], mode))
946 /* It's ok. */;
947 else
949 temp = no_new_pseudos ? operands[0] : gen_reg_rtx (Pmode);
950 operands[1] = legitimize_pic_address (operands[1], mode, temp);
953 else if (GET_CODE (operands[1]) == CONST
954 && GET_CODE (XEXP (operands[1], 0)) == PLUS
955 && SYMBOLIC_CONST_P (XEXP (XEXP (operands[1], 0), 0)))
957 temp = no_new_pseudos ? operands[0] : gen_reg_rtx (Pmode);
958 temp = legitimize_pic_address (XEXP (XEXP (operands[1], 0), 0),
959 mode, temp);
960 operands[1] = expand_binop (mode, add_optab, temp,
961 XEXP (XEXP (operands[1], 0), 1),
962 no_new_pseudos ? temp
963 : gen_reg_rtx (Pmode),
964 0, OPTAB_LIB_WIDEN);
968 if (! reload_in_progress && ! reload_completed)
970 /* Copy the source to a register if both operands aren't registers. */
971 if (! register_operand (operands[0], mode)
972 && ! sh_register_operand (operands[1], mode))
973 operands[1] = copy_to_mode_reg (mode, operands[1]);
975 if (GET_CODE (operands[0]) == MEM && ! memory_operand (operands[0], mode))
977 /* This is like change_address_1 (operands[0], mode, 0, 1) ,
978 except that we can't use that function because it is static. */
979 rtx new = change_address (operands[0], mode, 0);
980 MEM_COPY_ATTRIBUTES (new, operands[0]);
981 operands[0] = new;
984 /* This case can happen while generating code to move the result
985 of a library call to the target. Reject `st r0,@(rX,rY)' because
986 reload will fail to find a spill register for rX, since r0 is already
987 being used for the source. */
988 else if (refers_to_regno_p (R0_REG, R0_REG + 1, operands[1], (rtx *)0)
989 && GET_CODE (operands[0]) == MEM
990 && GET_CODE (XEXP (operands[0], 0)) == PLUS
991 && GET_CODE (XEXP (XEXP (operands[0], 0), 1)) == REG)
992 operands[1] = copy_to_mode_reg (mode, operands[1]);
995 if (mode == Pmode || mode == ptr_mode)
997 rtx op0, op1;
998 enum tls_model tls_kind;
1000 op0 = operands[0];
1001 op1 = operands[1];
1002 if ((tls_kind = tls_symbolic_operand (op1, Pmode)))
1004 rtx tga_op1, tga_ret, tmp, tmp2;
1007 switch (tls_kind)
1009 case TLS_MODEL_GLOBAL_DYNAMIC:
1010 tga_ret = gen_rtx_REG (Pmode, R0_REG);
1011 emit_call_insn (gen_tls_global_dynamic (tga_ret, op1));
1012 op1 = tga_ret;
1013 break;
1015 case TLS_MODEL_LOCAL_DYNAMIC:
1016 tga_ret = gen_rtx_REG (Pmode, R0_REG);
1017 emit_call_insn (gen_tls_local_dynamic (tga_ret, op1));
1019 tmp = gen_reg_rtx (Pmode);
1020 emit_move_insn (tmp, tga_ret);
1022 if (register_operand (op0, Pmode))
1023 tmp2 = op0;
1024 else
1025 tmp2 = gen_reg_rtx (Pmode);
1027 emit_insn (gen_symDTPOFF2reg (tmp2, op1, tmp));
1028 op1 = tmp2;
1029 break;
1031 case TLS_MODEL_INITIAL_EXEC:
1032 if (! flag_pic)
1033 emit_insn (gen_GOTaddr2picreg ());
1034 tga_op1 = gen_reg_rtx (Pmode);
1035 tmp = gen_sym2GOTTPOFF (op1);
1036 emit_insn (gen_tls_initial_exec (tga_op1, tmp));
1037 op1 = tga_op1;
1038 break;
1040 case TLS_MODEL_LOCAL_EXEC:
1041 tmp2 = gen_reg_rtx (Pmode);
1042 emit_insn (gen_load_gbr (tmp2));
1043 tmp = gen_reg_rtx (Pmode);
1044 emit_insn (gen_symTPOFF2reg (tmp, op1));
1045 RTX_UNCHANGING_P (tmp) = 1;
1047 if (register_operand (op0, Pmode))
1048 op1 = op0;
1049 else
1050 op1 = gen_reg_rtx (Pmode);
1052 emit_insn (gen_addsi3 (op1, tmp, tmp2));
1053 break;
1055 default:
1056 abort ();
1058 operands[1] = op1;
1062 return 0;
1065 /* Prepare the operands for an scc instruction; make sure that the
1066 compare has been done. */
1068 prepare_scc_operands (enum rtx_code code)
1070 rtx t_reg = gen_rtx_REG (SImode, T_REG);
1071 enum rtx_code oldcode = code;
1072 enum machine_mode mode;
1074 /* First need a compare insn. */
1075 switch (code)
1077 case NE:
1078 /* It isn't possible to handle this case. */
1079 abort ();
1080 case LT:
1081 code = GT;
1082 break;
1083 case LE:
1084 code = GE;
1085 break;
1086 case LTU:
1087 code = GTU;
1088 break;
1089 case LEU:
1090 code = GEU;
1091 break;
1092 default:
1093 break;
1095 if (code != oldcode)
1097 rtx tmp = sh_compare_op0;
1098 sh_compare_op0 = sh_compare_op1;
1099 sh_compare_op1 = tmp;
1102 mode = GET_MODE (sh_compare_op0);
1103 if (mode == VOIDmode)
1104 mode = GET_MODE (sh_compare_op1);
1106 sh_compare_op0 = force_reg (mode, sh_compare_op0);
1107 if ((code != EQ && code != NE
1108 && (sh_compare_op1 != const0_rtx
1109 || code == GTU || code == GEU || code == LTU || code == LEU))
1110 || (mode == DImode && sh_compare_op1 != const0_rtx)
1111 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
1112 sh_compare_op1 = force_reg (mode, sh_compare_op1);
1114 if ((TARGET_SH4 || TARGET_SH2A) && GET_MODE_CLASS (mode) == MODE_FLOAT)
1115 (mode == SFmode ? emit_sf_insn : emit_df_insn)
1116 (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2,
1117 gen_rtx_SET (VOIDmode, t_reg,
1118 gen_rtx_fmt_ee (code, SImode,
1119 sh_compare_op0, sh_compare_op1)),
1120 gen_rtx_USE (VOIDmode, get_fpscr_rtx ()))));
1121 else
1122 emit_insn (gen_rtx_SET (VOIDmode, t_reg,
1123 gen_rtx_fmt_ee (code, SImode,
1124 sh_compare_op0, sh_compare_op1)));
1126 return t_reg;
1129 /* Called from the md file, set up the operands of a compare instruction. */
1131 void
1132 from_compare (rtx *operands, int code)
1134 enum machine_mode mode = GET_MODE (sh_compare_op0);
1135 rtx insn;
1136 if (mode == VOIDmode)
1137 mode = GET_MODE (sh_compare_op1);
1138 if (code != EQ
1139 || mode == DImode
1140 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
1142 /* Force args into regs, since we can't use constants here. */
1143 sh_compare_op0 = force_reg (mode, sh_compare_op0);
1144 if (sh_compare_op1 != const0_rtx
1145 || code == GTU || code == GEU
1146 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
1147 sh_compare_op1 = force_reg (mode, sh_compare_op1);
1149 if (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT && code == GE)
1151 from_compare (operands, GT);
1152 insn = gen_ieee_ccmpeqsf_t (sh_compare_op0, sh_compare_op1);
1154 else
1155 insn = gen_rtx_SET (VOIDmode,
1156 gen_rtx_REG (SImode, T_REG),
1157 gen_rtx_fmt_ee (code, SImode,
1158 sh_compare_op0, sh_compare_op1));
1159 if ((TARGET_SH4 || TARGET_SH2A) && GET_MODE_CLASS (mode) == MODE_FLOAT)
1161 insn = gen_rtx_PARALLEL (VOIDmode,
1162 gen_rtvec (2, insn,
1163 gen_rtx_USE (VOIDmode, get_fpscr_rtx ())));
1164 (mode == SFmode ? emit_sf_insn : emit_df_insn) (insn);
1166 else
1167 emit_insn (insn);
1170 /* Functions to output assembly code. */
1172 /* Return a sequence of instructions to perform DI or DF move.
1174 Since the SH cannot move a DI or DF in one instruction, we have
1175 to take care when we see overlapping source and dest registers. */
1177 const char *
1178 output_movedouble (rtx insn ATTRIBUTE_UNUSED, rtx operands[],
1179 enum machine_mode mode)
1181 rtx dst = operands[0];
1182 rtx src = operands[1];
1184 if (GET_CODE (dst) == MEM
1185 && GET_CODE (XEXP (dst, 0)) == PRE_DEC)
1186 return "mov.l %T1,%0\n\tmov.l %1,%0";
1188 if (register_operand (dst, mode)
1189 && register_operand (src, mode))
1191 if (REGNO (src) == MACH_REG)
1192 return "sts mach,%S0\n\tsts macl,%R0";
1194 /* When mov.d r1,r2 do r2->r3 then r1->r2;
1195 when mov.d r1,r0 do r1->r0 then r2->r1. */
1197 if (REGNO (src) + 1 == REGNO (dst))
1198 return "mov %T1,%T0\n\tmov %1,%0";
1199 else
1200 return "mov %1,%0\n\tmov %T1,%T0";
1202 else if (GET_CODE (src) == CONST_INT)
1204 if (INTVAL (src) < 0)
1205 output_asm_insn ("mov #-1,%S0", operands);
1206 else
1207 output_asm_insn ("mov #0,%S0", operands);
1209 return "mov %1,%R0";
1211 else if (GET_CODE (src) == MEM)
1213 int ptrreg = -1;
1214 int dreg = REGNO (dst);
1215 rtx inside = XEXP (src, 0);
1217 if (GET_CODE (inside) == REG)
1218 ptrreg = REGNO (inside);
1219 else if (GET_CODE (inside) == SUBREG)
1220 ptrreg = subreg_regno (inside);
1221 else if (GET_CODE (inside) == PLUS)
1223 ptrreg = REGNO (XEXP (inside, 0));
1224 /* ??? A r0+REG address shouldn't be possible here, because it isn't
1225 an offsettable address. Unfortunately, offsettable addresses use
1226 QImode to check the offset, and a QImode offsettable address
1227 requires r0 for the other operand, which is not currently
1228 supported, so we can't use the 'o' constraint.
1229 Thus we must check for and handle r0+REG addresses here.
1230 We punt for now, since this is likely very rare. */
1231 if (GET_CODE (XEXP (inside, 1)) == REG)
1232 abort ();
1234 else if (GET_CODE (inside) == LABEL_REF)
1235 return "mov.l %1,%0\n\tmov.l %1+4,%T0";
1236 else if (GET_CODE (inside) == POST_INC)
1237 return "mov.l %1,%0\n\tmov.l %1,%T0";
1238 else
1239 abort ();
1241 /* Work out the safe way to copy. Copy into the second half first. */
1242 if (dreg == ptrreg)
1243 return "mov.l %T1,%T0\n\tmov.l %1,%0";
1246 return "mov.l %1,%0\n\tmov.l %T1,%T0";
1249 /* Print an instruction which would have gone into a delay slot after
1250 another instruction, but couldn't because the other instruction expanded
1251 into a sequence where putting the slot insn at the end wouldn't work. */
1253 static void
1254 print_slot (rtx insn)
1256 final_scan_insn (XVECEXP (insn, 0, 1), asm_out_file, optimize, 0, 1, NULL);
1258 INSN_DELETED_P (XVECEXP (insn, 0, 1)) = 1;
1261 const char *
1262 output_far_jump (rtx insn, rtx op)
1264 struct { rtx lab, reg, op; } this;
1265 rtx braf_base_lab = NULL_RTX;
1266 const char *jump;
1267 int far;
1268 int offset = branch_dest (insn) - INSN_ADDRESSES (INSN_UID (insn));
1269 rtx prev;
1271 this.lab = gen_label_rtx ();
1273 if (TARGET_SH2
1274 && offset >= -32764
1275 && offset - get_attr_length (insn) <= 32766)
1277 far = 0;
1278 jump = "mov.w %O0,%1; braf %1";
1280 else
1282 far = 1;
1283 if (flag_pic)
1285 if (TARGET_SH2)
1286 jump = "mov.l %O0,%1; braf %1";
1287 else
1288 jump = "mov.l r0,@-r15; mova %O0,r0; mov.l @r0,%1; add r0,%1; mov.l @r15+,r0; jmp @%1";
1290 else
1291 jump = "mov.l %O0,%1; jmp @%1";
1293 /* If we have a scratch register available, use it. */
1294 if (GET_CODE ((prev = prev_nonnote_insn (insn))) == INSN
1295 && INSN_CODE (prev) == CODE_FOR_indirect_jump_scratch)
1297 this.reg = SET_DEST (XVECEXP (PATTERN (prev), 0, 0));
1298 if (REGNO (this.reg) == R0_REG && flag_pic && ! TARGET_SH2)
1299 jump = "mov.l r1,@-r15; mova %O0,r0; mov.l @r0,r1; add r1,r0; mov.l @r15+,r1; jmp @%1";
1300 output_asm_insn (jump, &this.lab);
1301 if (dbr_sequence_length ())
1302 print_slot (final_sequence);
1303 else
1304 output_asm_insn ("nop", 0);
1306 else
1308 /* Output the delay slot insn first if any. */
1309 if (dbr_sequence_length ())
1310 print_slot (final_sequence);
1312 this.reg = gen_rtx_REG (SImode, 13);
1313 /* We must keep the stack aligned to 8-byte boundaries on SH5.
1314 Fortunately, MACL is fixed and call-clobbered, and we never
1315 need its value across jumps, so save r13 in it instead of in
1316 the stack. */
1317 if (TARGET_SH5)
1318 output_asm_insn ("lds r13, macl", 0);
1319 else
1320 output_asm_insn ("mov.l r13,@-r15", 0);
1321 output_asm_insn (jump, &this.lab);
1322 if (TARGET_SH5)
1323 output_asm_insn ("sts macl, r13", 0);
1324 else
1325 output_asm_insn ("mov.l @r15+,r13", 0);
1327 if (far && flag_pic && TARGET_SH2)
1329 braf_base_lab = gen_label_rtx ();
1330 (*targetm.asm_out.internal_label) (asm_out_file, "L",
1331 CODE_LABEL_NUMBER (braf_base_lab));
1333 if (far)
1334 output_asm_insn (".align 2", 0);
1335 (*targetm.asm_out.internal_label) (asm_out_file, "L", CODE_LABEL_NUMBER (this.lab));
1336 this.op = op;
1337 if (far && flag_pic)
1339 if (TARGET_SH2)
1340 this.lab = braf_base_lab;
1341 output_asm_insn (".long %O2-%O0", &this.lab);
1343 else
1344 output_asm_insn (far ? ".long %O2" : ".word %O2-%O0", &this.lab);
1345 return "";
1348 /* Local label counter, used for constants in the pool and inside
1349 pattern branches. */
1351 static int lf = 100;
1353 /* Output code for ordinary branches. */
1355 const char *
1356 output_branch (int logic, rtx insn, rtx *operands)
1358 switch (get_attr_length (insn))
1360 case 6:
1361 /* This can happen if filling the delay slot has caused a forward
1362 branch to exceed its range (we could reverse it, but only
1363 when we know we won't overextend other branches; this should
1364 best be handled by relaxation).
1365 It can also happen when other condbranches hoist delay slot insn
1366 from their destination, thus leading to code size increase.
1367 But the branch will still be in the range -4092..+4098 bytes. */
1369 if (! TARGET_RELAX)
1371 int label = lf++;
1372 /* The call to print_slot will clobber the operands. */
1373 rtx op0 = operands[0];
1375 /* If the instruction in the delay slot is annulled (true), then
1376 there is no delay slot where we can put it now. The only safe
1377 place for it is after the label. final will do that by default. */
1379 if (final_sequence
1380 && ! INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0)))
1382 asm_fprintf (asm_out_file, "\tb%s%ss\t%LLF%d\n", logic ? "f" : "t",
1383 ASSEMBLER_DIALECT ? "/" : ".", label);
1384 print_slot (final_sequence);
1386 else
1387 asm_fprintf (asm_out_file, "\tb%s\t%LLF%d\n", logic ? "f" : "t", label);
1389 output_asm_insn ("bra\t%l0", &op0);
1390 fprintf (asm_out_file, "\tnop\n");
1391 (*targetm.asm_out.internal_label) (asm_out_file, "LF", label);
1393 return "";
1395 /* When relaxing, handle this like a short branch. The linker
1396 will fix it up if it still doesn't fit after relaxation. */
1397 case 2:
1398 return logic ? "bt%.\t%l0" : "bf%.\t%l0";
1400 /* These are for SH2e, in which we have to account for the
1401 extra nop because of the hardware bug in annulled branches. */
1402 case 8:
1403 if (! TARGET_RELAX)
1405 int label = lf++;
1407 if (final_sequence
1408 && INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0)))
1409 abort ();
1410 asm_fprintf (asm_out_file, "b%s%ss\t%LLF%d\n",
1411 logic ? "f" : "t",
1412 ASSEMBLER_DIALECT ? "/" : ".", label);
1413 fprintf (asm_out_file, "\tnop\n");
1414 output_asm_insn ("bra\t%l0", operands);
1415 fprintf (asm_out_file, "\tnop\n");
1416 (*targetm.asm_out.internal_label) (asm_out_file, "LF", label);
1418 return "";
1420 /* When relaxing, fall through. */
1421 case 4:
1423 char buffer[10];
1425 sprintf (buffer, "b%s%ss\t%%l0",
1426 logic ? "t" : "f",
1427 ASSEMBLER_DIALECT ? "/" : ".");
1428 output_asm_insn (buffer, &operands[0]);
1429 return "nop";
1432 default:
1433 /* There should be no longer branches now - that would
1434 indicate that something has destroyed the branches set
1435 up in machine_dependent_reorg. */
1436 abort ();
1440 const char *
1441 output_branchy_insn (enum rtx_code code, const char *template,
1442 rtx insn, rtx *operands)
1444 rtx next_insn = NEXT_INSN (insn);
1446 if (next_insn && GET_CODE (next_insn) == JUMP_INSN && condjump_p (next_insn))
1448 rtx src = SET_SRC (PATTERN (next_insn));
1449 if (GET_CODE (src) == IF_THEN_ELSE && GET_CODE (XEXP (src, 0)) != code)
1451 /* Following branch not taken */
1452 operands[9] = gen_label_rtx ();
1453 emit_label_after (operands[9], next_insn);
1454 INSN_ADDRESSES_NEW (operands[9],
1455 INSN_ADDRESSES (INSN_UID (next_insn))
1456 + get_attr_length (next_insn));
1457 return template;
1459 else
1461 int offset = (branch_dest (next_insn)
1462 - INSN_ADDRESSES (INSN_UID (next_insn)) + 4);
1463 if (offset >= -252 && offset <= 258)
1465 if (GET_CODE (src) == IF_THEN_ELSE)
1466 /* branch_true */
1467 src = XEXP (src, 1);
1468 operands[9] = src;
1469 return template;
1473 operands[9] = gen_label_rtx ();
1474 emit_label_after (operands[9], insn);
1475 INSN_ADDRESSES_NEW (operands[9],
1476 INSN_ADDRESSES (INSN_UID (insn))
1477 + get_attr_length (insn));
1478 return template;
1481 const char *
1482 output_ieee_ccmpeq (rtx insn, rtx *operands)
1484 return output_branchy_insn (NE, "bt\t%l9\\;fcmp/eq\t%1,%0", insn, operands);
1487 /* Output the start of the assembler file. */
1489 static void
1490 sh_file_start (void)
1492 default_file_start ();
1494 #ifdef SYMBIAN
1495 /* Declare the .directive section before it is used. */
1496 fputs ("\t.section .directive, \"SM\", @progbits, 1\n", asm_out_file);
1497 fputs ("\t.asciz \"#<SYMEDIT>#\\n\"\n", asm_out_file);
1498 #endif
1500 if (TARGET_ELF)
1501 /* We need to show the text section with the proper
1502 attributes as in TEXT_SECTION_ASM_OP, before dwarf2out
1503 emits it without attributes in TEXT_SECTION_ASM_OP, else GAS
1504 will complain. We can teach GAS specifically about the
1505 default attributes for our choice of text section, but
1506 then we would have to change GAS again if/when we change
1507 the text section name. */
1508 fprintf (asm_out_file, "%s\n", TEXT_SECTION_ASM_OP);
1509 else
1510 /* Switch to the data section so that the coffsem symbol
1511 isn't in the text section. */
1512 data_section ();
1514 if (TARGET_LITTLE_ENDIAN)
1515 fputs ("\t.little\n", asm_out_file);
1517 if (!TARGET_ELF)
1519 if (TARGET_SHCOMPACT)
1520 fputs ("\t.mode\tSHcompact\n", asm_out_file);
1521 else if (TARGET_SHMEDIA)
1522 fprintf (asm_out_file, "\t.mode\tSHmedia\n\t.abi\t%i\n",
1523 TARGET_SHMEDIA64 ? 64 : 32);
1527 /* Check if PAT includes UNSPEC_CALLER unspec pattern. */
1529 static bool
1530 unspec_caller_rtx_p (rtx pat)
1532 switch (GET_CODE (pat))
1534 case CONST:
1535 return unspec_caller_rtx_p (XEXP (pat, 0));
1536 case PLUS:
1537 case MINUS:
1538 if (unspec_caller_rtx_p (XEXP (pat, 0)))
1539 return true;
1540 return unspec_caller_rtx_p (XEXP (pat, 1));
1541 case UNSPEC:
1542 if (XINT (pat, 1) == UNSPEC_CALLER)
1543 return true;
1544 default:
1545 break;
1548 return false;
1551 /* Indicate that INSN cannot be duplicated. This is true for insn
1552 that generates an unique label. */
1554 static bool
1555 sh_cannot_copy_insn_p (rtx insn)
1557 rtx pat;
1559 if (!reload_completed || !flag_pic)
1560 return false;
1562 if (GET_CODE (insn) != INSN)
1563 return false;
1564 if (asm_noperands (insn) >= 0)
1565 return false;
1567 pat = PATTERN (insn);
1568 if (GET_CODE (pat) != SET)
1569 return false;
1570 pat = SET_SRC (pat);
1572 if (unspec_caller_rtx_p (pat))
1573 return true;
1575 return false;
1578 /* Actual number of instructions used to make a shift by N. */
1579 static const char ashiftrt_insns[] =
1580 { 0,1,2,3,4,5,8,8,8,8,8,8,8,8,8,8,2,3,4,5,8,8,8,8,8,8,8,8,8,8,8,2};
1582 /* Left shift and logical right shift are the same. */
1583 static const char shift_insns[] =
1584 { 0,1,1,2,2,3,3,4,1,2,2,3,3,4,3,3,1,2,2,3,3,4,3,3,2,3,3,4,4,4,3,3};
1586 /* Individual shift amounts needed to get the above length sequences.
1587 One bit right shifts clobber the T bit, so when possible, put one bit
1588 shifts in the middle of the sequence, so the ends are eligible for
1589 branch delay slots. */
1590 static const short shift_amounts[32][5] = {
1591 {0}, {1}, {2}, {2, 1},
1592 {2, 2}, {2, 1, 2}, {2, 2, 2}, {2, 2, 1, 2},
1593 {8}, {8, 1}, {8, 2}, {8, 1, 2},
1594 {8, 2, 2}, {8, 2, 1, 2}, {8, -2, 8}, {8, -1, 8},
1595 {16}, {16, 1}, {16, 2}, {16, 1, 2},
1596 {16, 2, 2}, {16, 2, 1, 2}, {16, -2, 8}, {16, -1, 8},
1597 {16, 8}, {16, 1, 8}, {16, 8, 2}, {16, 8, 1, 2},
1598 {16, 8, 2, 2}, {16, -1, -2, 16}, {16, -2, 16}, {16, -1, 16}};
1600 /* Likewise, but for shift amounts < 16, up to three highmost bits
1601 might be clobbered. This is typically used when combined with some
1602 kind of sign or zero extension. */
1604 static const char ext_shift_insns[] =
1605 { 0,1,1,2,2,3,2,2,1,2,2,3,3,3,2,2,1,2,2,3,3,4,3,3,2,3,3,4,4,4,3,3};
1607 static const short ext_shift_amounts[32][4] = {
1608 {0}, {1}, {2}, {2, 1},
1609 {2, 2}, {2, 1, 2}, {8, -2}, {8, -1},
1610 {8}, {8, 1}, {8, 2}, {8, 1, 2},
1611 {8, 2, 2}, {16, -2, -1}, {16, -2}, {16, -1},
1612 {16}, {16, 1}, {16, 2}, {16, 1, 2},
1613 {16, 2, 2}, {16, 2, 1, 2}, {16, -2, 8}, {16, -1, 8},
1614 {16, 8}, {16, 1, 8}, {16, 8, 2}, {16, 8, 1, 2},
1615 {16, 8, 2, 2}, {16, -1, -2, 16}, {16, -2, 16}, {16, -1, 16}};
1617 /* Assuming we have a value that has been sign-extended by at least one bit,
1618 can we use the ext_shift_amounts with the last shift turned to an arithmetic shift
1619 to shift it by N without data loss, and quicker than by other means? */
1620 #define EXT_SHIFT_SIGNED(n) (((n) | 8) == 15)
1622 /* This is used in length attributes in sh.md to help compute the length
1623 of arbitrary constant shift instructions. */
1626 shift_insns_rtx (rtx insn)
1628 rtx set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
1629 int shift_count = INTVAL (XEXP (set_src, 1));
1630 enum rtx_code shift_code = GET_CODE (set_src);
1632 switch (shift_code)
1634 case ASHIFTRT:
1635 return ashiftrt_insns[shift_count];
1636 case LSHIFTRT:
1637 case ASHIFT:
1638 return shift_insns[shift_count];
1639 default:
1640 abort ();
1644 /* Return the cost of a shift. */
1646 static inline int
1647 shiftcosts (rtx x)
1649 int value;
1651 if (TARGET_SHMEDIA)
1652 return 1;
1654 if (GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD)
1656 if (GET_MODE (x) == DImode
1657 && GET_CODE (XEXP (x, 1)) == CONST_INT
1658 && INTVAL (XEXP (x, 1)) == 1)
1659 return 2;
1661 /* Everything else is invalid, because there is no pattern for it. */
1662 return 10000;
1664 /* If shift by a non constant, then this will be expensive. */
1665 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
1666 return SH_DYNAMIC_SHIFT_COST;
1668 value = INTVAL (XEXP (x, 1));
1670 /* Otherwise, return the true cost in instructions. */
1671 if (GET_CODE (x) == ASHIFTRT)
1673 int cost = ashiftrt_insns[value];
1674 /* If SH3, then we put the constant in a reg and use shad. */
1675 if (cost > 1 + SH_DYNAMIC_SHIFT_COST)
1676 cost = 1 + SH_DYNAMIC_SHIFT_COST;
1677 return cost;
1679 else
1680 return shift_insns[value];
1683 /* Return the cost of an AND operation. */
1685 static inline int
1686 andcosts (rtx x)
1688 int i;
1690 /* Anding with a register is a single cycle and instruction. */
1691 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
1692 return 1;
1694 i = INTVAL (XEXP (x, 1));
1696 if (TARGET_SHMEDIA)
1698 if ((GET_CODE (XEXP (x, 1)) == CONST_INT
1699 && CONST_OK_FOR_I16 (INTVAL (XEXP (x, 1))))
1700 || EXTRA_CONSTRAINT_C16 (XEXP (x, 1)))
1701 return 1;
1702 else
1703 return 2;
1706 /* These constants are single cycle extu.[bw] instructions. */
1707 if (i == 0xff || i == 0xffff)
1708 return 1;
1709 /* Constants that can be used in an and immediate instruction in a single
1710 cycle, but this requires r0, so make it a little more expensive. */
1711 if (CONST_OK_FOR_K08 (i))
1712 return 2;
1713 /* Constants that can be loaded with a mov immediate and an and.
1714 This case is probably unnecessary. */
1715 if (CONST_OK_FOR_I08 (i))
1716 return 2;
1717 /* Any other constants requires a 2 cycle pc-relative load plus an and.
1718 This case is probably unnecessary. */
1719 return 3;
1722 /* Return the cost of an addition or a subtraction. */
1724 static inline int
1725 addsubcosts (rtx x)
1727 /* Adding a register is a single cycle insn. */
1728 if (GET_CODE (XEXP (x, 1)) == REG
1729 || GET_CODE (XEXP (x, 1)) == SUBREG)
1730 return 1;
1732 /* Likewise for small constants. */
1733 if (GET_CODE (XEXP (x, 1)) == CONST_INT
1734 && CONST_OK_FOR_ADD (INTVAL (XEXP (x, 1))))
1735 return 1;
1737 if (TARGET_SHMEDIA)
1738 switch (GET_CODE (XEXP (x, 1)))
1740 case CONST:
1741 case LABEL_REF:
1742 case SYMBOL_REF:
1743 return TARGET_SHMEDIA64 ? 5 : 3;
1745 case CONST_INT:
1746 if (CONST_OK_FOR_I16 (INTVAL (XEXP (x, 1))))
1747 return 2;
1748 else if (CONST_OK_FOR_I16 (INTVAL (XEXP (x, 1)) >> 16))
1749 return 3;
1750 else if (CONST_OK_FOR_I16 ((INTVAL (XEXP (x, 1)) >> 16) >> 16))
1751 return 4;
1753 /* Fall through. */
1754 default:
1755 return 5;
1758 /* Any other constant requires a 2 cycle pc-relative load plus an
1759 addition. */
1760 return 3;
1763 /* Return the cost of a multiply. */
1764 static inline int
1765 multcosts (rtx x ATTRIBUTE_UNUSED)
1767 if (TARGET_SHMEDIA)
1768 return 3;
1770 if (TARGET_SH2)
1772 /* We have a mul insn, so we can never take more than the mul and the
1773 read of the mac reg, but count more because of the latency and extra
1774 reg usage. */
1775 if (TARGET_SMALLCODE)
1776 return 2;
1777 return 3;
1780 /* If we're aiming at small code, then just count the number of
1781 insns in a multiply call sequence. */
1782 if (TARGET_SMALLCODE)
1783 return 5;
1785 /* Otherwise count all the insns in the routine we'd be calling too. */
1786 return 20;
1789 /* Compute a (partial) cost for rtx X. Return true if the complete
1790 cost has been computed, and false if subexpressions should be
1791 scanned. In either case, *TOTAL contains the cost result. */
1793 static bool
1794 sh_rtx_costs (rtx x, int code, int outer_code, int *total)
1796 switch (code)
1798 case CONST_INT:
1799 if (TARGET_SHMEDIA)
1801 if (INTVAL (x) == 0)
1802 *total = 0;
1803 else if (outer_code == AND && and_operand ((x), DImode))
1804 *total = 0;
1805 else if ((outer_code == IOR || outer_code == XOR
1806 || outer_code == PLUS)
1807 && CONST_OK_FOR_I10 (INTVAL (x)))
1808 *total = 0;
1809 else if (CONST_OK_FOR_I16 (INTVAL (x)))
1810 *total = COSTS_N_INSNS (outer_code != SET);
1811 else if (CONST_OK_FOR_I16 (INTVAL (x) >> 16))
1812 *total = COSTS_N_INSNS (2);
1813 else if (CONST_OK_FOR_I16 ((INTVAL (x) >> 16) >> 16))
1814 *total = COSTS_N_INSNS (3);
1815 else
1816 *total = COSTS_N_INSNS (4);
1817 return true;
1819 if (CONST_OK_FOR_I08 (INTVAL (x)))
1820 *total = 0;
1821 else if ((outer_code == AND || outer_code == IOR || outer_code == XOR)
1822 && CONST_OK_FOR_K08 (INTVAL (x)))
1823 *total = 1;
1824 else
1825 *total = 8;
1826 return true;
1828 case CONST:
1829 case LABEL_REF:
1830 case SYMBOL_REF:
1831 if (TARGET_SHMEDIA64)
1832 *total = COSTS_N_INSNS (4);
1833 else if (TARGET_SHMEDIA32)
1834 *total = COSTS_N_INSNS (2);
1835 else
1836 *total = 5;
1837 return true;
1839 case CONST_DOUBLE:
1840 if (TARGET_SHMEDIA)
1841 *total = COSTS_N_INSNS (4);
1842 else
1843 *total = 10;
1844 return true;
1846 case PLUS:
1847 *total = COSTS_N_INSNS (addsubcosts (x));
1848 return true;
1850 case AND:
1851 *total = COSTS_N_INSNS (andcosts (x));
1852 return true;
1854 case MULT:
1855 *total = COSTS_N_INSNS (multcosts (x));
1856 return true;
1858 case ASHIFT:
1859 case ASHIFTRT:
1860 case LSHIFTRT:
1861 *total = COSTS_N_INSNS (shiftcosts (x));
1862 return true;
1864 case DIV:
1865 case UDIV:
1866 case MOD:
1867 case UMOD:
1868 *total = COSTS_N_INSNS (20);
1869 return true;
1871 case FLOAT:
1872 case FIX:
1873 *total = 100;
1874 return true;
1876 default:
1877 return false;
1881 /* Compute the cost of an address. For the SH, all valid addresses are
1882 the same cost. Use a slightly higher cost for reg + reg addressing,
1883 since it increases pressure on r0. */
1885 static int
1886 sh_address_cost (rtx X)
1888 return (GET_CODE (X) == PLUS
1889 && ! CONSTANT_P (XEXP (X, 1))
1890 && ! TARGET_SHMEDIA ? 1 : 0);
1893 /* Code to expand a shift. */
1895 void
1896 gen_ashift (int type, int n, rtx reg)
1898 /* Negative values here come from the shift_amounts array. */
1899 if (n < 0)
1901 if (type == ASHIFT)
1902 type = LSHIFTRT;
1903 else
1904 type = ASHIFT;
1905 n = -n;
1908 switch (type)
1910 case ASHIFTRT:
1911 emit_insn (gen_ashrsi3_k (reg, reg, GEN_INT (n)));
1912 break;
1913 case LSHIFTRT:
1914 if (n == 1)
1915 emit_insn (gen_lshrsi3_m (reg, reg, GEN_INT (n)));
1916 else
1917 emit_insn (gen_lshrsi3_k (reg, reg, GEN_INT (n)));
1918 break;
1919 case ASHIFT:
1920 emit_insn (gen_ashlsi3_std (reg, reg, GEN_INT (n)));
1921 break;
1925 /* Same for HImode */
1927 void
1928 gen_ashift_hi (int type, int n, rtx reg)
1930 /* Negative values here come from the shift_amounts array. */
1931 if (n < 0)
1933 if (type == ASHIFT)
1934 type = LSHIFTRT;
1935 else
1936 type = ASHIFT;
1937 n = -n;
1940 switch (type)
1942 case ASHIFTRT:
1943 case LSHIFTRT:
1944 /* We don't have HImode right shift operations because using the
1945 ordinary 32 bit shift instructions for that doesn't generate proper
1946 zero/sign extension.
1947 gen_ashift_hi is only called in contexts where we know that the
1948 sign extension works out correctly. */
1950 int offset = 0;
1951 if (GET_CODE (reg) == SUBREG)
1953 offset = SUBREG_BYTE (reg);
1954 reg = SUBREG_REG (reg);
1956 gen_ashift (type, n, gen_rtx_SUBREG (SImode, reg, offset));
1957 break;
1959 case ASHIFT:
1960 emit_insn (gen_ashlhi3_k (reg, reg, GEN_INT (n)));
1961 break;
1965 /* Output RTL to split a constant shift into its component SH constant
1966 shift instructions. */
1968 void
1969 gen_shifty_op (int code, rtx *operands)
1971 int value = INTVAL (operands[2]);
1972 int max, i;
1974 /* Truncate the shift count in case it is out of bounds. */
1975 value = value & 0x1f;
1977 if (value == 31)
1979 if (code == LSHIFTRT)
1981 emit_insn (gen_rotlsi3_1 (operands[0], operands[0]));
1982 emit_insn (gen_movt (operands[0]));
1983 return;
1985 else if (code == ASHIFT)
1987 /* There is a two instruction sequence for 31 bit left shifts,
1988 but it requires r0. */
1989 if (GET_CODE (operands[0]) == REG && REGNO (operands[0]) == 0)
1991 emit_insn (gen_andsi3 (operands[0], operands[0], const1_rtx));
1992 emit_insn (gen_rotlsi3_31 (operands[0], operands[0]));
1993 return;
1997 else if (value == 0)
1999 /* This can happen when not optimizing. We must output something here
2000 to prevent the compiler from aborting in final.c after the try_split
2001 call. */
2002 emit_insn (gen_nop ());
2003 return;
2006 max = shift_insns[value];
2007 for (i = 0; i < max; i++)
2008 gen_ashift (code, shift_amounts[value][i], operands[0]);
2011 /* Same as above, but optimized for values where the topmost bits don't
2012 matter. */
2014 void
2015 gen_shifty_hi_op (int code, rtx *operands)
2017 int value = INTVAL (operands[2]);
2018 int max, i;
2019 void (*gen_fun) (int, int, rtx);
2021 /* This operation is used by and_shl for SImode values with a few
2022 high bits known to be cleared. */
2023 value &= 31;
2024 if (value == 0)
2026 emit_insn (gen_nop ());
2027 return;
2030 gen_fun = GET_MODE (operands[0]) == HImode ? gen_ashift_hi : gen_ashift;
2031 if (code == ASHIFT)
2033 max = ext_shift_insns[value];
2034 for (i = 0; i < max; i++)
2035 gen_fun (code, ext_shift_amounts[value][i], operands[0]);
2037 else
2038 /* When shifting right, emit the shifts in reverse order, so that
2039 solitary negative values come first. */
2040 for (i = ext_shift_insns[value] - 1; i >= 0; i--)
2041 gen_fun (code, ext_shift_amounts[value][i], operands[0]);
2044 /* Output RTL for an arithmetic right shift. */
2046 /* ??? Rewrite to use super-optimizer sequences. */
2049 expand_ashiftrt (rtx *operands)
2051 rtx sym;
2052 rtx wrk;
2053 char func[18];
2054 tree func_name;
2055 int value;
2057 if (TARGET_SH3)
2059 if (GET_CODE (operands[2]) != CONST_INT)
2061 rtx count = copy_to_mode_reg (SImode, operands[2]);
2062 emit_insn (gen_negsi2 (count, count));
2063 emit_insn (gen_ashrsi3_d (operands[0], operands[1], count));
2064 return 1;
2066 else if (ashiftrt_insns[INTVAL (operands[2]) & 31]
2067 > 1 + SH_DYNAMIC_SHIFT_COST)
2069 rtx count
2070 = force_reg (SImode, GEN_INT (- (INTVAL (operands[2]) & 31)));
2071 emit_insn (gen_ashrsi3_d (operands[0], operands[1], count));
2072 return 1;
2075 if (GET_CODE (operands[2]) != CONST_INT)
2076 return 0;
2078 value = INTVAL (operands[2]) & 31;
2080 if (value == 31)
2082 emit_insn (gen_ashrsi2_31 (operands[0], operands[1]));
2083 return 1;
2085 else if (value >= 16 && value <= 19)
2087 wrk = gen_reg_rtx (SImode);
2088 emit_insn (gen_ashrsi2_16 (wrk, operands[1]));
2089 value -= 16;
2090 while (value--)
2091 gen_ashift (ASHIFTRT, 1, wrk);
2092 emit_move_insn (operands[0], wrk);
2093 return 1;
2095 /* Expand a short sequence inline, longer call a magic routine. */
2096 else if (value <= 5)
2098 wrk = gen_reg_rtx (SImode);
2099 emit_move_insn (wrk, operands[1]);
2100 while (value--)
2101 gen_ashift (ASHIFTRT, 1, wrk);
2102 emit_move_insn (operands[0], wrk);
2103 return 1;
2106 wrk = gen_reg_rtx (Pmode);
2108 /* Load the value into an arg reg and call a helper. */
2109 emit_move_insn (gen_rtx_REG (SImode, 4), operands[1]);
2110 sprintf (func, "__ashiftrt_r4_%d", value);
2111 func_name = get_identifier (func);
2112 sym = function_symbol (IDENTIFIER_POINTER (func_name));
2113 emit_move_insn (wrk, sym);
2114 emit_insn (gen_ashrsi3_n (GEN_INT (value), wrk));
2115 emit_move_insn (operands[0], gen_rtx_REG (SImode, 4));
2116 return 1;
2120 sh_dynamicalize_shift_p (rtx count)
2122 return shift_insns[INTVAL (count)] > 1 + SH_DYNAMIC_SHIFT_COST;
2125 /* Try to find a good way to implement the combiner pattern
2126 [(set (match_operand:SI 0 "register_operand" "r")
2127 (and:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
2128 (match_operand:SI 2 "const_int_operand" "n"))
2129 (match_operand:SI 3 "const_int_operand" "n"))) .
2130 LEFT_RTX is operand 2 in the above pattern, and MASK_RTX is operand 3.
2131 return 0 for simple right / left or left/right shift combination.
2132 return 1 for a combination of shifts with zero_extend.
2133 return 2 for a combination of shifts with an AND that needs r0.
2134 return 3 for a combination of shifts with an AND that needs an extra
2135 scratch register, when the three highmost bits of the AND mask are clear.
2136 return 4 for a combination of shifts with an AND that needs an extra
2137 scratch register, when any of the three highmost bits of the AND mask
2138 is set.
2139 If ATTRP is set, store an initial right shift width in ATTRP[0],
2140 and the instruction length in ATTRP[1] . These values are not valid
2141 when returning 0.
2142 When ATTRP is set and returning 1, ATTRP[2] gets set to the index into
2143 shift_amounts for the last shift value that is to be used before the
2144 sign extend. */
2146 shl_and_kind (rtx left_rtx, rtx mask_rtx, int *attrp)
2148 unsigned HOST_WIDE_INT mask, lsb, mask2, lsb2;
2149 int left = INTVAL (left_rtx), right;
2150 int best = 0;
2151 int cost, best_cost = 10000;
2152 int best_right = 0, best_len = 0;
2153 int i;
2154 int can_ext;
2156 if (left < 0 || left > 31)
2157 return 0;
2158 if (GET_CODE (mask_rtx) == CONST_INT)
2159 mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> left;
2160 else
2161 mask = (unsigned HOST_WIDE_INT) GET_MODE_MASK (SImode) >> left;
2162 /* Can this be expressed as a right shift / left shift pair? */
2163 lsb = ((mask ^ (mask - 1)) >> 1) + 1;
2164 right = exact_log2 (lsb);
2165 mask2 = ~(mask + lsb - 1);
2166 lsb2 = ((mask2 ^ (mask2 - 1)) >> 1) + 1;
2167 /* mask has no zeroes but trailing zeroes <==> ! mask2 */
2168 if (! mask2)
2169 best_cost = shift_insns[right] + shift_insns[right + left];
2170 /* mask has no trailing zeroes <==> ! right */
2171 else if (! right && mask2 == ~(lsb2 - 1))
2173 int late_right = exact_log2 (lsb2);
2174 best_cost = shift_insns[left + late_right] + shift_insns[late_right];
2176 /* Try to use zero extend. */
2177 if (mask2 == ~(lsb2 - 1))
2179 int width, first;
2181 for (width = 8; width <= 16; width += 8)
2183 /* Can we zero-extend right away? */
2184 if (lsb2 == (unsigned HOST_WIDE_INT) 1 << width)
2186 cost
2187 = 1 + ext_shift_insns[right] + ext_shift_insns[left + right];
2188 if (cost < best_cost)
2190 best = 1;
2191 best_cost = cost;
2192 best_right = right;
2193 best_len = cost;
2194 if (attrp)
2195 attrp[2] = -1;
2197 continue;
2199 /* ??? Could try to put zero extend into initial right shift,
2200 or even shift a bit left before the right shift. */
2201 /* Determine value of first part of left shift, to get to the
2202 zero extend cut-off point. */
2203 first = width - exact_log2 (lsb2) + right;
2204 if (first >= 0 && right + left - first >= 0)
2206 cost = ext_shift_insns[right] + ext_shift_insns[first] + 1
2207 + ext_shift_insns[right + left - first];
2208 if (cost < best_cost)
2210 best = 1;
2211 best_cost = cost;
2212 best_right = right;
2213 best_len = cost;
2214 if (attrp)
2215 attrp[2] = first;
2220 /* Try to use r0 AND pattern */
2221 for (i = 0; i <= 2; i++)
2223 if (i > right)
2224 break;
2225 if (! CONST_OK_FOR_K08 (mask >> i))
2226 continue;
2227 cost = (i != 0) + 2 + ext_shift_insns[left + i];
2228 if (cost < best_cost)
2230 best = 2;
2231 best_cost = cost;
2232 best_right = i;
2233 best_len = cost - 1;
2236 /* Try to use a scratch register to hold the AND operand. */
2237 can_ext = ((mask << left) & ((unsigned HOST_WIDE_INT) 3 << 30)) == 0;
2238 for (i = 0; i <= 2; i++)
2240 if (i > right)
2241 break;
2242 cost = (i != 0) + (CONST_OK_FOR_I08 (mask >> i) ? 2 : 3)
2243 + (can_ext ? ext_shift_insns : shift_insns)[left + i];
2244 if (cost < best_cost)
2246 best = 4 - can_ext;
2247 best_cost = cost;
2248 best_right = i;
2249 best_len = cost - 1 - ! CONST_OK_FOR_I08 (mask >> i);
2253 if (attrp)
2255 attrp[0] = best_right;
2256 attrp[1] = best_len;
2258 return best;
2261 /* This is used in length attributes of the unnamed instructions
2262 corresponding to shl_and_kind return values of 1 and 2. */
2264 shl_and_length (rtx insn)
2266 rtx set_src, left_rtx, mask_rtx;
2267 int attributes[3];
2269 set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
2270 left_rtx = XEXP (XEXP (set_src, 0), 1);
2271 mask_rtx = XEXP (set_src, 1);
2272 shl_and_kind (left_rtx, mask_rtx, attributes);
2273 return attributes[1];
2276 /* This is used in length attribute of the and_shl_scratch instruction. */
2279 shl_and_scr_length (rtx insn)
2281 rtx set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
2282 int len = shift_insns[INTVAL (XEXP (set_src, 1))];
2283 rtx op = XEXP (set_src, 0);
2284 len += shift_insns[INTVAL (XEXP (op, 1))] + 1;
2285 op = XEXP (XEXP (op, 0), 0);
2286 return len + shift_insns[INTVAL (XEXP (op, 1))];
2289 /* Generating rtl? */
2290 extern int rtx_equal_function_value_matters;
2292 /* Generate rtl for instructions for which shl_and_kind advised a particular
2293 method of generating them, i.e. returned zero. */
2296 gen_shl_and (rtx dest, rtx left_rtx, rtx mask_rtx, rtx source)
2298 int attributes[3];
2299 unsigned HOST_WIDE_INT mask;
2300 int kind = shl_and_kind (left_rtx, mask_rtx, attributes);
2301 int right, total_shift;
2302 void (*shift_gen_fun) (int, rtx *) = gen_shifty_hi_op;
2304 right = attributes[0];
2305 total_shift = INTVAL (left_rtx) + right;
2306 mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> total_shift;
2307 switch (kind)
2309 default:
2310 return -1;
2311 case 1:
2313 int first = attributes[2];
2314 rtx operands[3];
2316 if (first < 0)
2318 emit_insn ((mask << right) <= 0xff
2319 ? gen_zero_extendqisi2 (dest,
2320 gen_lowpart (QImode, source))
2321 : gen_zero_extendhisi2 (dest,
2322 gen_lowpart (HImode, source)));
2323 source = dest;
2325 if (source != dest)
2326 emit_insn (gen_movsi (dest, source));
2327 operands[0] = dest;
2328 if (right)
2330 operands[2] = GEN_INT (right);
2331 gen_shifty_hi_op (LSHIFTRT, operands);
2333 if (first > 0)
2335 operands[2] = GEN_INT (first);
2336 gen_shifty_hi_op (ASHIFT, operands);
2337 total_shift -= first;
2338 mask <<= first;
2340 if (first >= 0)
2341 emit_insn (mask <= 0xff
2342 ? gen_zero_extendqisi2 (dest, gen_lowpart (QImode, dest))
2343 : gen_zero_extendhisi2 (dest, gen_lowpart (HImode, dest)));
2344 if (total_shift > 0)
2346 operands[2] = GEN_INT (total_shift);
2347 gen_shifty_hi_op (ASHIFT, operands);
2349 break;
2351 case 4:
2352 shift_gen_fun = gen_shifty_op;
2353 case 3:
2354 /* If the topmost bit that matters is set, set the topmost bits
2355 that don't matter. This way, we might be able to get a shorter
2356 signed constant. */
2357 if (mask & ((HOST_WIDE_INT) 1 << (31 - total_shift)))
2358 mask |= (HOST_WIDE_INT) ~0 << (31 - total_shift);
2359 case 2:
2360 /* Don't expand fine-grained when combining, because that will
2361 make the pattern fail. */
2362 if (rtx_equal_function_value_matters
2363 || reload_in_progress || reload_completed)
2365 rtx operands[3];
2367 /* Cases 3 and 4 should be handled by this split
2368 only while combining */
2369 if (kind > 2)
2370 abort ();
2371 if (right)
2373 emit_insn (gen_lshrsi3 (dest, source, GEN_INT (right)));
2374 source = dest;
2376 emit_insn (gen_andsi3 (dest, source, GEN_INT (mask)));
2377 if (total_shift)
2379 operands[0] = dest;
2380 operands[1] = dest;
2381 operands[2] = GEN_INT (total_shift);
2382 shift_gen_fun (ASHIFT, operands);
2384 break;
2386 else
2388 int neg = 0;
2389 if (kind != 4 && total_shift < 16)
2391 neg = -ext_shift_amounts[total_shift][1];
2392 if (neg > 0)
2393 neg -= ext_shift_amounts[total_shift][2];
2394 else
2395 neg = 0;
2397 emit_insn (gen_and_shl_scratch (dest, source,
2398 GEN_INT (right),
2399 GEN_INT (mask),
2400 GEN_INT (total_shift + neg),
2401 GEN_INT (neg)));
2402 emit_insn (gen_movsi (dest, dest));
2403 break;
2406 return 0;
2409 /* Try to find a good way to implement the combiner pattern
2410 [(set (match_operand:SI 0 "register_operand" "=r")
2411 (sign_extract:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
2412 (match_operand:SI 2 "const_int_operand" "n")
2413 (match_operand:SI 3 "const_int_operand" "n")
2414 (const_int 0)))
2415 (clobber (reg:SI T_REG))]
2416 LEFT_RTX is operand 2 in the above pattern, and SIZE_RTX is operand 3.
2417 return 0 for simple left / right shift combination.
2418 return 1 for left shift / 8 bit sign extend / left shift.
2419 return 2 for left shift / 16 bit sign extend / left shift.
2420 return 3 for left shift / 8 bit sign extend / shift / sign extend.
2421 return 4 for left shift / 16 bit sign extend / shift / sign extend.
2422 return 5 for left shift / 16 bit sign extend / right shift
2423 return 6 for < 8 bit sign extend / left shift.
2424 return 7 for < 8 bit sign extend / left shift / single right shift.
2425 If COSTP is nonzero, assign the calculated cost to *COSTP. */
2428 shl_sext_kind (rtx left_rtx, rtx size_rtx, int *costp)
2430 int left, size, insize, ext;
2431 int cost = 0, best_cost;
2432 int kind;
2434 left = INTVAL (left_rtx);
2435 size = INTVAL (size_rtx);
2436 insize = size - left;
2437 if (insize <= 0)
2438 abort ();
2439 /* Default to left / right shift. */
2440 kind = 0;
2441 best_cost = shift_insns[32 - insize] + ashiftrt_insns[32 - size];
2442 if (size <= 16)
2444 /* 16 bit shift / sign extend / 16 bit shift */
2445 cost = shift_insns[16 - insize] + 1 + ashiftrt_insns[16 - size];
2446 /* If ashiftrt_insns[16 - size] is 8, this choice will be overridden
2447 below, by alternative 3 or something even better. */
2448 if (cost < best_cost)
2450 kind = 5;
2451 best_cost = cost;
2454 /* Try a plain sign extend between two shifts. */
2455 for (ext = 16; ext >= insize; ext -= 8)
2457 if (ext <= size)
2459 cost = ext_shift_insns[ext - insize] + 1 + shift_insns[size - ext];
2460 if (cost < best_cost)
2462 kind = ext / (unsigned) 8;
2463 best_cost = cost;
2466 /* Check if we can do a sloppy shift with a final signed shift
2467 restoring the sign. */
2468 if (EXT_SHIFT_SIGNED (size - ext))
2469 cost = ext_shift_insns[ext - insize] + ext_shift_insns[size - ext] + 1;
2470 /* If not, maybe it's still cheaper to do the second shift sloppy,
2471 and do a final sign extend? */
2472 else if (size <= 16)
2473 cost = ext_shift_insns[ext - insize] + 1
2474 + ext_shift_insns[size > ext ? size - ext : ext - size] + 1;
2475 else
2476 continue;
2477 if (cost < best_cost)
2479 kind = ext / (unsigned) 8 + 2;
2480 best_cost = cost;
2483 /* Check if we can sign extend in r0 */
2484 if (insize < 8)
2486 cost = 3 + shift_insns[left];
2487 if (cost < best_cost)
2489 kind = 6;
2490 best_cost = cost;
2492 /* Try the same with a final signed shift. */
2493 if (left < 31)
2495 cost = 3 + ext_shift_insns[left + 1] + 1;
2496 if (cost < best_cost)
2498 kind = 7;
2499 best_cost = cost;
2503 if (TARGET_SH3)
2505 /* Try to use a dynamic shift. */
2506 cost = shift_insns[32 - insize] + 1 + SH_DYNAMIC_SHIFT_COST;
2507 if (cost < best_cost)
2509 kind = 0;
2510 best_cost = cost;
2513 if (costp)
2514 *costp = cost;
2515 return kind;
2518 /* Function to be used in the length attribute of the instructions
2519 implementing this pattern. */
2522 shl_sext_length (rtx insn)
2524 rtx set_src, left_rtx, size_rtx;
2525 int cost;
2527 set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
2528 left_rtx = XEXP (XEXP (set_src, 0), 1);
2529 size_rtx = XEXP (set_src, 1);
2530 shl_sext_kind (left_rtx, size_rtx, &cost);
2531 return cost;
2534 /* Generate rtl for this pattern */
2537 gen_shl_sext (rtx dest, rtx left_rtx, rtx size_rtx, rtx source)
2539 int kind;
2540 int left, size, insize, cost;
2541 rtx operands[3];
2543 kind = shl_sext_kind (left_rtx, size_rtx, &cost);
2544 left = INTVAL (left_rtx);
2545 size = INTVAL (size_rtx);
2546 insize = size - left;
2547 switch (kind)
2549 case 1:
2550 case 2:
2551 case 3:
2552 case 4:
2554 int ext = kind & 1 ? 8 : 16;
2555 int shift2 = size - ext;
2557 /* Don't expand fine-grained when combining, because that will
2558 make the pattern fail. */
2559 if (! rtx_equal_function_value_matters
2560 && ! reload_in_progress && ! reload_completed)
2562 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
2563 emit_insn (gen_movsi (dest, source));
2564 break;
2566 if (dest != source)
2567 emit_insn (gen_movsi (dest, source));
2568 operands[0] = dest;
2569 if (ext - insize)
2571 operands[2] = GEN_INT (ext - insize);
2572 gen_shifty_hi_op (ASHIFT, operands);
2574 emit_insn (kind & 1
2575 ? gen_extendqisi2 (dest, gen_lowpart (QImode, dest))
2576 : gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
2577 if (kind <= 2)
2579 if (shift2)
2581 operands[2] = GEN_INT (shift2);
2582 gen_shifty_op (ASHIFT, operands);
2585 else
2587 if (shift2 > 0)
2589 if (EXT_SHIFT_SIGNED (shift2))
2591 operands[2] = GEN_INT (shift2 + 1);
2592 gen_shifty_op (ASHIFT, operands);
2593 operands[2] = const1_rtx;
2594 gen_shifty_op (ASHIFTRT, operands);
2595 break;
2597 operands[2] = GEN_INT (shift2);
2598 gen_shifty_hi_op (ASHIFT, operands);
2600 else if (shift2)
2602 operands[2] = GEN_INT (-shift2);
2603 gen_shifty_hi_op (LSHIFTRT, operands);
2605 emit_insn (size <= 8
2606 ? gen_extendqisi2 (dest, gen_lowpart (QImode, dest))
2607 : gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
2609 break;
2611 case 5:
2613 int i = 16 - size;
2614 if (! rtx_equal_function_value_matters
2615 && ! reload_in_progress && ! reload_completed)
2616 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
2617 else
2619 operands[0] = dest;
2620 operands[2] = GEN_INT (16 - insize);
2621 gen_shifty_hi_op (ASHIFT, operands);
2622 emit_insn (gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
2624 /* Don't use gen_ashrsi3 because it generates new pseudos. */
2625 while (--i >= 0)
2626 gen_ashift (ASHIFTRT, 1, dest);
2627 break;
2629 case 6:
2630 case 7:
2631 /* Don't expand fine-grained when combining, because that will
2632 make the pattern fail. */
2633 if (! rtx_equal_function_value_matters
2634 && ! reload_in_progress && ! reload_completed)
2636 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
2637 emit_insn (gen_movsi (dest, source));
2638 break;
2640 emit_insn (gen_andsi3 (dest, source, GEN_INT ((1 << insize) - 1)));
2641 emit_insn (gen_xorsi3 (dest, dest, GEN_INT (1 << (insize - 1))));
2642 emit_insn (gen_addsi3 (dest, dest, GEN_INT (-1 << (insize - 1))));
2643 operands[0] = dest;
2644 operands[2] = kind == 7 ? GEN_INT (left + 1) : left_rtx;
2645 gen_shifty_op (ASHIFT, operands);
2646 if (kind == 7)
2647 emit_insn (gen_ashrsi3_k (dest, dest, const1_rtx));
2648 break;
2649 default:
2650 return -1;
2652 return 0;
2655 /* Prefix a symbol_ref name with "datalabel". */
2658 gen_datalabel_ref (rtx sym)
2660 if (GET_CODE (sym) == LABEL_REF)
2661 return gen_rtx_CONST (GET_MODE (sym),
2662 gen_rtx_UNSPEC (GET_MODE (sym),
2663 gen_rtvec (1, sym),
2664 UNSPEC_DATALABEL));
2666 if (GET_CODE (sym) != SYMBOL_REF)
2667 abort ();
2669 return sym;
2673 /* The SH cannot load a large constant into a register, constants have to
2674 come from a pc relative load. The reference of a pc relative load
2675 instruction must be less than 1k infront of the instruction. This
2676 means that we often have to dump a constant inside a function, and
2677 generate code to branch around it.
2679 It is important to minimize this, since the branches will slow things
2680 down and make things bigger.
2682 Worst case code looks like:
2684 mov.l L1,rn
2685 bra L2
2687 align
2688 L1: .long value
2692 mov.l L3,rn
2693 bra L4
2695 align
2696 L3: .long value
2700 We fix this by performing a scan before scheduling, which notices which
2701 instructions need to have their operands fetched from the constant table
2702 and builds the table.
2704 The algorithm is:
2706 scan, find an instruction which needs a pcrel move. Look forward, find the
2707 last barrier which is within MAX_COUNT bytes of the requirement.
2708 If there isn't one, make one. Process all the instructions between
2709 the find and the barrier.
2711 In the above example, we can tell that L3 is within 1k of L1, so
2712 the first move can be shrunk from the 3 insn+constant sequence into
2713 just 1 insn, and the constant moved to L3 to make:
2715 mov.l L1,rn
2717 mov.l L3,rn
2718 bra L4
2720 align
2721 L3:.long value
2722 L4:.long value
2724 Then the second move becomes the target for the shortening process. */
2726 typedef struct
2728 rtx value; /* Value in table. */
2729 rtx label; /* Label of value. */
2730 rtx wend; /* End of window. */
2731 enum machine_mode mode; /* Mode of value. */
2733 /* True if this constant is accessed as part of a post-increment
2734 sequence. Note that HImode constants are never accessed in this way. */
2735 bool part_of_sequence_p;
2736 } pool_node;
2738 /* The maximum number of constants that can fit into one pool, since
2739 the pc relative range is 0...1020 bytes and constants are at least 4
2740 bytes long. */
2742 #define MAX_POOL_SIZE (1020/4)
2743 static pool_node pool_vector[MAX_POOL_SIZE];
2744 static int pool_size;
2745 static rtx pool_window_label;
2746 static int pool_window_last;
2748 /* ??? If we need a constant in HImode which is the truncated value of a
2749 constant we need in SImode, we could combine the two entries thus saving
2750 two bytes. Is this common enough to be worth the effort of implementing
2751 it? */
2753 /* ??? This stuff should be done at the same time that we shorten branches.
2754 As it is now, we must assume that all branches are the maximum size, and
2755 this causes us to almost always output constant pools sooner than
2756 necessary. */
2758 /* Add a constant to the pool and return its label. */
2760 static rtx
2761 add_constant (rtx x, enum machine_mode mode, rtx last_value)
2763 int i;
2764 rtx lab, new, ref, newref;
2766 /* First see if we've already got it. */
2767 for (i = 0; i < pool_size; i++)
2769 if (x->code == pool_vector[i].value->code
2770 && mode == pool_vector[i].mode)
2772 if (x->code == CODE_LABEL)
2774 if (XINT (x, 3) != XINT (pool_vector[i].value, 3))
2775 continue;
2777 if (rtx_equal_p (x, pool_vector[i].value))
2779 lab = new = 0;
2780 if (! last_value
2781 || ! i
2782 || ! rtx_equal_p (last_value, pool_vector[i-1].value))
2784 new = gen_label_rtx ();
2785 LABEL_REFS (new) = pool_vector[i].label;
2786 pool_vector[i].label = lab = new;
2788 if (lab && pool_window_label)
2790 newref = gen_rtx_LABEL_REF (VOIDmode, pool_window_label);
2791 ref = pool_vector[pool_window_last].wend;
2792 LABEL_NEXTREF (newref) = ref;
2793 pool_vector[pool_window_last].wend = newref;
2795 if (new)
2796 pool_window_label = new;
2797 pool_window_last = i;
2798 return lab;
2803 /* Need a new one. */
2804 pool_vector[pool_size].value = x;
2805 if (last_value && rtx_equal_p (last_value, pool_vector[pool_size - 1].value))
2807 lab = 0;
2808 pool_vector[pool_size - 1].part_of_sequence_p = true;
2810 else
2811 lab = gen_label_rtx ();
2812 pool_vector[pool_size].mode = mode;
2813 pool_vector[pool_size].label = lab;
2814 pool_vector[pool_size].wend = NULL_RTX;
2815 pool_vector[pool_size].part_of_sequence_p = (lab == 0);
2816 if (lab && pool_window_label)
2818 newref = gen_rtx_LABEL_REF (VOIDmode, pool_window_label);
2819 ref = pool_vector[pool_window_last].wend;
2820 LABEL_NEXTREF (newref) = ref;
2821 pool_vector[pool_window_last].wend = newref;
2823 if (lab)
2824 pool_window_label = lab;
2825 pool_window_last = pool_size;
2826 pool_size++;
2827 return lab;
2830 /* Output the literal table. START, if nonzero, is the first instruction
2831 this table is needed for, and also indicates that there is at least one
2832 casesi_worker_2 instruction; We have to emit the operand3 labels from
2833 these insns at a 4-byte aligned position. BARRIER is the barrier
2834 after which we are to place the table. */
2836 static void
2837 dump_table (rtx start, rtx barrier)
2839 rtx scan = barrier;
2840 int i;
2841 int need_align = 1;
2842 rtx lab, ref;
2843 int have_df = 0;
2845 /* Do two passes, first time dump out the HI sized constants. */
2847 for (i = 0; i < pool_size; i++)
2849 pool_node *p = &pool_vector[i];
2851 if (p->mode == HImode)
2853 if (need_align)
2855 scan = emit_insn_after (gen_align_2 (), scan);
2856 need_align = 0;
2858 for (lab = p->label; lab; lab = LABEL_REFS (lab))
2859 scan = emit_label_after (lab, scan);
2860 scan = emit_insn_after (gen_consttable_2 (p->value, const0_rtx),
2861 scan);
2862 for (ref = p->wend; ref; ref = LABEL_NEXTREF (ref))
2864 lab = XEXP (ref, 0);
2865 scan = emit_insn_after (gen_consttable_window_end (lab), scan);
2868 else if (p->mode == DFmode)
2869 have_df = 1;
2872 need_align = 1;
2874 if (start)
2876 scan = emit_insn_after (gen_align_4 (), scan);
2877 need_align = 0;
2878 for (; start != barrier; start = NEXT_INSN (start))
2879 if (GET_CODE (start) == INSN
2880 && recog_memoized (start) == CODE_FOR_casesi_worker_2)
2882 rtx src = SET_SRC (XVECEXP (PATTERN (start), 0, 0));
2883 rtx lab = XEXP (XVECEXP (src, 0, 3), 0);
2885 scan = emit_label_after (lab, scan);
2888 if (TARGET_FMOVD && TARGET_ALIGN_DOUBLE && have_df)
2890 rtx align_insn = NULL_RTX;
2892 scan = emit_label_after (gen_label_rtx (), scan);
2893 scan = emit_insn_after (gen_align_log (GEN_INT (3)), scan);
2894 need_align = 0;
2896 for (i = 0; i < pool_size; i++)
2898 pool_node *p = &pool_vector[i];
2900 switch (p->mode)
2902 case HImode:
2903 break;
2904 case SImode:
2905 case SFmode:
2906 if (align_insn && !p->part_of_sequence_p)
2908 for (lab = p->label; lab; lab = LABEL_REFS (lab))
2909 emit_label_before (lab, align_insn);
2910 emit_insn_before (gen_consttable_4 (p->value, const0_rtx),
2911 align_insn);
2912 for (ref = p->wend; ref; ref = LABEL_NEXTREF (ref))
2914 lab = XEXP (ref, 0);
2915 emit_insn_before (gen_consttable_window_end (lab),
2916 align_insn);
2918 delete_insn (align_insn);
2919 align_insn = NULL_RTX;
2920 continue;
2922 else
2924 for (lab = p->label; lab; lab = LABEL_REFS (lab))
2925 scan = emit_label_after (lab, scan);
2926 scan = emit_insn_after (gen_consttable_4 (p->value,
2927 const0_rtx), scan);
2928 need_align = ! need_align;
2930 break;
2931 case DFmode:
2932 if (need_align)
2934 scan = emit_insn_after (gen_align_log (GEN_INT (3)), scan);
2935 align_insn = scan;
2936 need_align = 0;
2938 case DImode:
2939 for (lab = p->label; lab; lab = LABEL_REFS (lab))
2940 scan = emit_label_after (lab, scan);
2941 scan = emit_insn_after (gen_consttable_8 (p->value, const0_rtx),
2942 scan);
2943 break;
2944 default:
2945 abort ();
2946 break;
2949 if (p->mode != HImode)
2951 for (ref = p->wend; ref; ref = LABEL_NEXTREF (ref))
2953 lab = XEXP (ref, 0);
2954 scan = emit_insn_after (gen_consttable_window_end (lab),
2955 scan);
2960 pool_size = 0;
2963 for (i = 0; i < pool_size; i++)
2965 pool_node *p = &pool_vector[i];
2967 switch (p->mode)
2969 case HImode:
2970 break;
2971 case SImode:
2972 case SFmode:
2973 if (need_align)
2975 need_align = 0;
2976 scan = emit_label_after (gen_label_rtx (), scan);
2977 scan = emit_insn_after (gen_align_4 (), scan);
2979 for (lab = p->label; lab; lab = LABEL_REFS (lab))
2980 scan = emit_label_after (lab, scan);
2981 scan = emit_insn_after (gen_consttable_4 (p->value, const0_rtx),
2982 scan);
2983 break;
2984 case DFmode:
2985 case DImode:
2986 if (need_align)
2988 need_align = 0;
2989 scan = emit_label_after (gen_label_rtx (), scan);
2990 scan = emit_insn_after (gen_align_4 (), scan);
2992 for (lab = p->label; lab; lab = LABEL_REFS (lab))
2993 scan = emit_label_after (lab, scan);
2994 scan = emit_insn_after (gen_consttable_8 (p->value, const0_rtx),
2995 scan);
2996 break;
2997 default:
2998 abort ();
2999 break;
3002 if (p->mode != HImode)
3004 for (ref = p->wend; ref; ref = LABEL_NEXTREF (ref))
3006 lab = XEXP (ref, 0);
3007 scan = emit_insn_after (gen_consttable_window_end (lab), scan);
3012 scan = emit_insn_after (gen_consttable_end (), scan);
3013 scan = emit_barrier_after (scan);
3014 pool_size = 0;
3015 pool_window_label = NULL_RTX;
3016 pool_window_last = 0;
3019 /* Return nonzero if constant would be an ok source for a
3020 mov.w instead of a mov.l. */
3022 static int
3023 hi_const (rtx src)
3025 return (GET_CODE (src) == CONST_INT
3026 && INTVAL (src) >= -32768
3027 && INTVAL (src) <= 32767);
3030 /* Nonzero if the insn is a move instruction which needs to be fixed. */
3032 /* ??? For a DImode/DFmode moves, we don't need to fix it if each half of the
3033 CONST_DOUBLE input value is CONST_OK_FOR_I08. For a SFmode move, we don't
3034 need to fix it if the input value is CONST_OK_FOR_I08. */
3036 static int
3037 broken_move (rtx insn)
3039 if (GET_CODE (insn) == INSN)
3041 rtx pat = PATTERN (insn);
3042 if (GET_CODE (pat) == PARALLEL)
3043 pat = XVECEXP (pat, 0, 0);
3044 if (GET_CODE (pat) == SET
3045 /* We can load any 8 bit value if we don't care what the high
3046 order bits end up as. */
3047 && GET_MODE (SET_DEST (pat)) != QImode
3048 && (CONSTANT_P (SET_SRC (pat))
3049 /* Match mova_const. */
3050 || (GET_CODE (SET_SRC (pat)) == UNSPEC
3051 && XINT (SET_SRC (pat), 1) == UNSPEC_MOVA
3052 && GET_CODE (XVECEXP (SET_SRC (pat), 0, 0)) == CONST))
3053 && ! (TARGET_SH2E
3054 && GET_CODE (SET_SRC (pat)) == CONST_DOUBLE
3055 && (fp_zero_operand (SET_SRC (pat))
3056 || fp_one_operand (SET_SRC (pat)))
3057 /* ??? If this is a -m4 or -m4-single compilation, in general
3058 we don't know the current setting of fpscr, so disable fldi.
3059 There is an exception if this was a register-register move
3060 before reload - and hence it was ascertained that we have
3061 single precision setting - and in a post-reload optimization
3062 we changed this to do a constant load. In that case
3063 we don't have an r0 clobber, hence we must use fldi. */
3064 && (! TARGET_SH4 || TARGET_FMOVD
3065 || (GET_CODE (XEXP (XVECEXP (PATTERN (insn), 0, 2), 0))
3066 == SCRATCH))
3067 && GET_CODE (SET_DEST (pat)) == REG
3068 && FP_REGISTER_P (REGNO (SET_DEST (pat))))
3069 && ! (TARGET_SH2A
3070 && GET_MODE (SET_DEST (pat)) == SImode
3071 && GET_CODE (SET_SRC (pat)) == CONST_INT
3072 && CONST_OK_FOR_I20 (INTVAL (SET_SRC (pat))))
3073 && (GET_CODE (SET_SRC (pat)) != CONST_INT
3074 || ! CONST_OK_FOR_I08 (INTVAL (SET_SRC (pat)))))
3075 return 1;
3078 return 0;
3081 static int
3082 mova_p (rtx insn)
3084 return (GET_CODE (insn) == INSN
3085 && GET_CODE (PATTERN (insn)) == SET
3086 && GET_CODE (SET_SRC (PATTERN (insn))) == UNSPEC
3087 && XINT (SET_SRC (PATTERN (insn)), 1) == UNSPEC_MOVA
3088 /* Don't match mova_const. */
3089 && GET_CODE (XVECEXP (SET_SRC (PATTERN (insn)), 0, 0)) == LABEL_REF);
3092 /* Fix up a mova from a switch that went out of range. */
3093 static void
3094 fixup_mova (rtx mova)
3096 if (! flag_pic)
3098 SET_SRC (PATTERN (mova)) = XVECEXP (SET_SRC (PATTERN (mova)), 0, 0);
3099 INSN_CODE (mova) = -1;
3101 else
3103 rtx worker = mova;
3104 rtx lab = gen_label_rtx ();
3105 rtx wpat, wpat0, wpat1, wsrc, diff;
3109 worker = NEXT_INSN (worker);
3110 if (! worker
3111 || GET_CODE (worker) == CODE_LABEL
3112 || GET_CODE (worker) == JUMP_INSN)
3113 abort ();
3114 } while (recog_memoized (worker) != CODE_FOR_casesi_worker_1);
3115 wpat = PATTERN (worker);
3116 wpat0 = XVECEXP (wpat, 0, 0);
3117 wpat1 = XVECEXP (wpat, 0, 1);
3118 wsrc = SET_SRC (wpat0);
3119 PATTERN (worker) = (gen_casesi_worker_2
3120 (SET_DEST (wpat0), XVECEXP (wsrc, 0, 1),
3121 XEXP (XVECEXP (wsrc, 0, 2), 0), lab,
3122 XEXP (wpat1, 0)));
3123 INSN_CODE (worker) = -1;
3124 diff = gen_rtx_MINUS (Pmode, XVECEXP (SET_SRC (PATTERN (mova)), 0, 0),
3125 gen_rtx_LABEL_REF (Pmode, lab));
3126 diff = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, diff), UNSPEC_PIC);
3127 SET_SRC (PATTERN (mova)) = gen_rtx_CONST (Pmode, diff);
3128 INSN_CODE (mova) = -1;
3132 /* Find the last barrier from insn FROM which is close enough to hold the
3133 constant pool. If we can't find one, then create one near the end of
3134 the range. */
3136 static rtx
3137 find_barrier (int num_mova, rtx mova, rtx from)
3139 int count_si = 0;
3140 int count_hi = 0;
3141 int found_hi = 0;
3142 int found_si = 0;
3143 int found_di = 0;
3144 int hi_align = 2;
3145 int si_align = 2;
3146 int leading_mova = num_mova;
3147 rtx barrier_before_mova = 0, found_barrier = 0, good_barrier = 0;
3148 int si_limit;
3149 int hi_limit;
3151 /* For HImode: range is 510, add 4 because pc counts from address of
3152 second instruction after this one, subtract 2 for the jump instruction
3153 that we may need to emit before the table, subtract 2 for the instruction
3154 that fills the jump delay slot (in very rare cases, reorg will take an
3155 instruction from after the constant pool or will leave the delay slot
3156 empty). This gives 510.
3157 For SImode: range is 1020, add 4 because pc counts from address of
3158 second instruction after this one, subtract 2 in case pc is 2 byte
3159 aligned, subtract 2 for the jump instruction that we may need to emit
3160 before the table, subtract 2 for the instruction that fills the jump
3161 delay slot. This gives 1018. */
3163 /* The branch will always be shortened now that the reference address for
3164 forward branches is the successor address, thus we need no longer make
3165 adjustments to the [sh]i_limit for -O0. */
3167 si_limit = 1018;
3168 hi_limit = 510;
3170 while (from && count_si < si_limit && count_hi < hi_limit)
3172 int inc = get_attr_length (from);
3173 int new_align = 1;
3175 if (GET_CODE (from) == CODE_LABEL)
3177 if (optimize)
3178 new_align = 1 << label_to_alignment (from);
3179 else if (GET_CODE (prev_nonnote_insn (from)) == BARRIER)
3180 new_align = 1 << barrier_align (from);
3181 else
3182 new_align = 1;
3183 inc = 0;
3186 if (GET_CODE (from) == BARRIER)
3189 found_barrier = from;
3191 /* If we are at the end of the function, or in front of an alignment
3192 instruction, we need not insert an extra alignment. We prefer
3193 this kind of barrier. */
3194 if (barrier_align (from) > 2)
3195 good_barrier = from;
3198 if (broken_move (from))
3200 rtx pat, src, dst;
3201 enum machine_mode mode;
3203 pat = PATTERN (from);
3204 if (GET_CODE (pat) == PARALLEL)
3205 pat = XVECEXP (pat, 0, 0);
3206 src = SET_SRC (pat);
3207 dst = SET_DEST (pat);
3208 mode = GET_MODE (dst);
3210 /* We must explicitly check the mode, because sometimes the
3211 front end will generate code to load unsigned constants into
3212 HImode targets without properly sign extending them. */
3213 if (mode == HImode
3214 || (mode == SImode && hi_const (src) && REGNO (dst) != FPUL_REG))
3216 found_hi += 2;
3217 /* We put the short constants before the long constants, so
3218 we must count the length of short constants in the range
3219 for the long constants. */
3220 /* ??? This isn't optimal, but is easy to do. */
3221 si_limit -= 2;
3223 else
3225 /* We dump DF/DI constants before SF/SI ones, because
3226 the limit is the same, but the alignment requirements
3227 are higher. We may waste up to 4 additional bytes
3228 for alignment, and the DF/DI constant may have
3229 another SF/SI constant placed before it. */
3230 if (TARGET_SHCOMPACT
3231 && ! found_di
3232 && (mode == DFmode || mode == DImode))
3234 found_di = 1;
3235 si_limit -= 8;
3237 while (si_align > 2 && found_si + si_align - 2 > count_si)
3238 si_align >>= 1;
3239 if (found_si > count_si)
3240 count_si = found_si;
3241 found_si += GET_MODE_SIZE (mode);
3242 if (num_mova)
3243 si_limit -= GET_MODE_SIZE (mode);
3246 /* See the code in machine_dependent_reorg, which has a similar if
3247 statement that generates a new mova insn in many cases. */
3248 if (GET_CODE (dst) == REG && FP_ANY_REGISTER_P (REGNO (dst)))
3249 inc += 2;
3252 if (mova_p (from))
3254 if (! num_mova++)
3256 leading_mova = 0;
3257 mova = from;
3258 barrier_before_mova = good_barrier ? good_barrier : found_barrier;
3260 if (found_si > count_si)
3261 count_si = found_si;
3263 else if (GET_CODE (from) == JUMP_INSN
3264 && (GET_CODE (PATTERN (from)) == ADDR_VEC
3265 || GET_CODE (PATTERN (from)) == ADDR_DIFF_VEC))
3267 if (num_mova)
3268 num_mova--;
3269 if (barrier_align (next_real_insn (from)) == align_jumps_log)
3271 /* We have just passed the barrier in front of the
3272 ADDR_DIFF_VEC, which is stored in found_barrier. Since
3273 the ADDR_DIFF_VEC is accessed as data, just like our pool
3274 constants, this is a good opportunity to accommodate what
3275 we have gathered so far.
3276 If we waited any longer, we could end up at a barrier in
3277 front of code, which gives worse cache usage for separated
3278 instruction / data caches. */
3279 good_barrier = found_barrier;
3280 break;
3282 else
3284 rtx body = PATTERN (from);
3285 inc = XVECLEN (body, 1) * GET_MODE_SIZE (GET_MODE (body));
3288 /* For the SH1, we generate alignments even after jumps-around-jumps. */
3289 else if (GET_CODE (from) == JUMP_INSN
3290 && ! TARGET_SH2
3291 && ! TARGET_SMALLCODE)
3292 new_align = 4;
3294 if (found_si)
3296 count_si += inc;
3297 if (new_align > si_align)
3299 si_limit -= (count_si - 1) & (new_align - si_align);
3300 si_align = new_align;
3302 count_si = (count_si + new_align - 1) & -new_align;
3304 if (found_hi)
3306 count_hi += inc;
3307 if (new_align > hi_align)
3309 hi_limit -= (count_hi - 1) & (new_align - hi_align);
3310 hi_align = new_align;
3312 count_hi = (count_hi + new_align - 1) & -new_align;
3314 from = NEXT_INSN (from);
3317 if (num_mova)
3319 if (leading_mova)
3321 /* Try as we might, the leading mova is out of range. Change
3322 it into a load (which will become a pcload) and retry. */
3323 fixup_mova (mova);
3324 return find_barrier (0, 0, mova);
3326 else
3328 /* Insert the constant pool table before the mova instruction,
3329 to prevent the mova label reference from going out of range. */
3330 from = mova;
3331 good_barrier = found_barrier = barrier_before_mova;
3335 if (found_barrier)
3337 if (good_barrier && next_real_insn (found_barrier))
3338 found_barrier = good_barrier;
3340 else
3342 /* We didn't find a barrier in time to dump our stuff,
3343 so we'll make one. */
3344 rtx label = gen_label_rtx ();
3346 /* If we exceeded the range, then we must back up over the last
3347 instruction we looked at. Otherwise, we just need to undo the
3348 NEXT_INSN at the end of the loop. */
3349 if (count_hi > hi_limit || count_si > si_limit)
3350 from = PREV_INSN (PREV_INSN (from));
3351 else
3352 from = PREV_INSN (from);
3354 /* Walk back to be just before any jump or label.
3355 Putting it before a label reduces the number of times the branch
3356 around the constant pool table will be hit. Putting it before
3357 a jump makes it more likely that the bra delay slot will be
3358 filled. */
3359 while (GET_CODE (from) == JUMP_INSN || GET_CODE (from) == NOTE
3360 || GET_CODE (from) == CODE_LABEL)
3361 from = PREV_INSN (from);
3363 from = emit_jump_insn_after (gen_jump (label), from);
3364 JUMP_LABEL (from) = label;
3365 LABEL_NUSES (label) = 1;
3366 found_barrier = emit_barrier_after (from);
3367 emit_label_after (label, found_barrier);
3370 return found_barrier;
3373 /* If the instruction INSN is implemented by a special function, and we can
3374 positively find the register that is used to call the sfunc, and this
3375 register is not used anywhere else in this instruction - except as the
3376 destination of a set, return this register; else, return 0. */
3378 sfunc_uses_reg (rtx insn)
3380 int i;
3381 rtx pattern, part, reg_part, reg;
3383 if (GET_CODE (insn) != INSN)
3384 return 0;
3385 pattern = PATTERN (insn);
3386 if (GET_CODE (pattern) != PARALLEL || get_attr_type (insn) != TYPE_SFUNC)
3387 return 0;
3389 for (reg_part = 0, i = XVECLEN (pattern, 0) - 1; i >= 1; i--)
3391 part = XVECEXP (pattern, 0, i);
3392 if (GET_CODE (part) == USE && GET_MODE (XEXP (part, 0)) == SImode)
3393 reg_part = part;
3395 if (! reg_part)
3396 return 0;
3397 reg = XEXP (reg_part, 0);
3398 for (i = XVECLEN (pattern, 0) - 1; i >= 0; i--)
3400 part = XVECEXP (pattern, 0, i);
3401 if (part == reg_part || GET_CODE (part) == CLOBBER)
3402 continue;
3403 if (reg_mentioned_p (reg, ((GET_CODE (part) == SET
3404 && GET_CODE (SET_DEST (part)) == REG)
3405 ? SET_SRC (part) : part)))
3406 return 0;
3408 return reg;
3411 /* See if the only way in which INSN uses REG is by calling it, or by
3412 setting it while calling it. Set *SET to a SET rtx if the register
3413 is set by INSN. */
3415 static int
3416 noncall_uses_reg (rtx reg, rtx insn, rtx *set)
3418 rtx pattern, reg2;
3420 *set = NULL_RTX;
3422 reg2 = sfunc_uses_reg (insn);
3423 if (reg2 && REGNO (reg2) == REGNO (reg))
3425 pattern = single_set (insn);
3426 if (pattern
3427 && GET_CODE (SET_DEST (pattern)) == REG
3428 && REGNO (reg) == REGNO (SET_DEST (pattern)))
3429 *set = pattern;
3430 return 0;
3432 if (GET_CODE (insn) != CALL_INSN)
3434 /* We don't use rtx_equal_p because we don't care if the mode is
3435 different. */
3436 pattern = single_set (insn);
3437 if (pattern
3438 && GET_CODE (SET_DEST (pattern)) == REG
3439 && REGNO (reg) == REGNO (SET_DEST (pattern)))
3441 rtx par, part;
3442 int i;
3444 *set = pattern;
3445 par = PATTERN (insn);
3446 if (GET_CODE (par) == PARALLEL)
3447 for (i = XVECLEN (par, 0) - 1; i >= 0; i--)
3449 part = XVECEXP (par, 0, i);
3450 if (GET_CODE (part) != SET && reg_mentioned_p (reg, part))
3451 return 1;
3453 return reg_mentioned_p (reg, SET_SRC (pattern));
3456 return 1;
3459 pattern = PATTERN (insn);
3461 if (GET_CODE (pattern) == PARALLEL)
3463 int i;
3465 for (i = XVECLEN (pattern, 0) - 1; i >= 1; i--)
3466 if (reg_mentioned_p (reg, XVECEXP (pattern, 0, i)))
3467 return 1;
3468 pattern = XVECEXP (pattern, 0, 0);
3471 if (GET_CODE (pattern) == SET)
3473 if (reg_mentioned_p (reg, SET_DEST (pattern)))
3475 /* We don't use rtx_equal_p, because we don't care if the
3476 mode is different. */
3477 if (GET_CODE (SET_DEST (pattern)) != REG
3478 || REGNO (reg) != REGNO (SET_DEST (pattern)))
3479 return 1;
3481 *set = pattern;
3484 pattern = SET_SRC (pattern);
3487 if (GET_CODE (pattern) != CALL
3488 || GET_CODE (XEXP (pattern, 0)) != MEM
3489 || ! rtx_equal_p (reg, XEXP (XEXP (pattern, 0), 0)))
3490 return 1;
3492 return 0;
3495 /* Given a X, a pattern of an insn or a part of it, return a mask of used
3496 general registers. Bits 0..15 mean that the respective registers
3497 are used as inputs in the instruction. Bits 16..31 mean that the
3498 registers 0..15, respectively, are used as outputs, or are clobbered.
3499 IS_DEST should be set to 16 if X is the destination of a SET, else to 0. */
3501 regs_used (rtx x, int is_dest)
3503 enum rtx_code code;
3504 const char *fmt;
3505 int i, used = 0;
3507 if (! x)
3508 return used;
3509 code = GET_CODE (x);
3510 switch (code)
3512 case REG:
3513 if (REGNO (x) < 16)
3514 return (((1 << HARD_REGNO_NREGS (0, GET_MODE (x))) - 1)
3515 << (REGNO (x) + is_dest));
3516 return 0;
3517 case SUBREG:
3519 rtx y = SUBREG_REG (x);
3521 if (GET_CODE (y) != REG)
3522 break;
3523 if (REGNO (y) < 16)
3524 return (((1 << HARD_REGNO_NREGS (0, GET_MODE (x))) - 1)
3525 << (REGNO (y) +
3526 subreg_regno_offset (REGNO (y),
3527 GET_MODE (y),
3528 SUBREG_BYTE (x),
3529 GET_MODE (x)) + is_dest));
3530 return 0;
3532 case SET:
3533 return regs_used (SET_SRC (x), 0) | regs_used (SET_DEST (x), 16);
3534 case RETURN:
3535 /* If there was a return value, it must have been indicated with USE. */
3536 return 0x00ffff00;
3537 case CLOBBER:
3538 is_dest = 1;
3539 break;
3540 case MEM:
3541 is_dest = 0;
3542 break;
3543 case CALL:
3544 used |= 0x00ff00f0;
3545 break;
3546 default:
3547 break;
3550 fmt = GET_RTX_FORMAT (code);
3552 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
3554 if (fmt[i] == 'E')
3556 register int j;
3557 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
3558 used |= regs_used (XVECEXP (x, i, j), is_dest);
3560 else if (fmt[i] == 'e')
3561 used |= regs_used (XEXP (x, i), is_dest);
3563 return used;
3566 /* Create an instruction that prevents redirection of a conditional branch
3567 to the destination of the JUMP with address ADDR.
3568 If the branch needs to be implemented as an indirect jump, try to find
3569 a scratch register for it.
3570 If NEED_BLOCK is 0, don't do anything unless we need a scratch register.
3571 If any preceding insn that doesn't fit into a delay slot is good enough,
3572 pass 1. Pass 2 if a definite blocking insn is needed.
3573 -1 is used internally to avoid deep recursion.
3574 If a blocking instruction is made or recognized, return it. */
3576 static rtx
3577 gen_block_redirect (rtx jump, int addr, int need_block)
3579 int dead = 0;
3580 rtx prev = prev_nonnote_insn (jump);
3581 rtx dest;
3583 /* First, check if we already have an instruction that satisfies our need. */
3584 if (prev && GET_CODE (prev) == INSN && ! INSN_DELETED_P (prev))
3586 if (INSN_CODE (prev) == CODE_FOR_indirect_jump_scratch)
3587 return prev;
3588 if (GET_CODE (PATTERN (prev)) == USE
3589 || GET_CODE (PATTERN (prev)) == CLOBBER
3590 || get_attr_in_delay_slot (prev) == IN_DELAY_SLOT_YES)
3591 prev = jump;
3592 else if ((need_block &= ~1) < 0)
3593 return prev;
3594 else if (recog_memoized (prev) == CODE_FOR_block_branch_redirect)
3595 need_block = 0;
3597 if (GET_CODE (PATTERN (jump)) == RETURN)
3599 if (! need_block)
3600 return prev;
3601 /* Reorg even does nasty things with return insns that cause branches
3602 to go out of range - see find_end_label and callers. */
3603 return emit_insn_before (gen_block_branch_redirect (const0_rtx) , jump);
3605 /* We can't use JUMP_LABEL here because it might be undefined
3606 when not optimizing. */
3607 dest = XEXP (SET_SRC (PATTERN (jump)), 0);
3608 /* If the branch is out of range, try to find a scratch register for it. */
3609 if (optimize
3610 && (INSN_ADDRESSES (INSN_UID (dest)) - addr + (unsigned) 4092
3611 > 4092 + 4098))
3613 rtx scan;
3614 /* Don't look for the stack pointer as a scratch register,
3615 it would cause trouble if an interrupt occurred. */
3616 unsigned try = 0x7fff, used;
3617 int jump_left = flag_expensive_optimizations + 1;
3619 /* It is likely that the most recent eligible instruction is wanted for
3620 the delay slot. Therefore, find out which registers it uses, and
3621 try to avoid using them. */
3623 for (scan = jump; (scan = PREV_INSN (scan)); )
3625 enum rtx_code code;
3627 if (INSN_DELETED_P (scan))
3628 continue;
3629 code = GET_CODE (scan);
3630 if (code == CODE_LABEL || code == JUMP_INSN)
3631 break;
3632 if (code == INSN
3633 && GET_CODE (PATTERN (scan)) != USE
3634 && GET_CODE (PATTERN (scan)) != CLOBBER
3635 && get_attr_in_delay_slot (scan) == IN_DELAY_SLOT_YES)
3637 try &= ~regs_used (PATTERN (scan), 0);
3638 break;
3641 for (used = dead = 0, scan = JUMP_LABEL (jump);
3642 (scan = NEXT_INSN (scan)); )
3644 enum rtx_code code;
3646 if (INSN_DELETED_P (scan))
3647 continue;
3648 code = GET_CODE (scan);
3649 if (INSN_P (scan))
3651 used |= regs_used (PATTERN (scan), 0);
3652 if (code == CALL_INSN)
3653 used |= regs_used (CALL_INSN_FUNCTION_USAGE (scan), 0);
3654 dead |= (used >> 16) & ~used;
3655 if (dead & try)
3657 dead &= try;
3658 break;
3660 if (code == JUMP_INSN)
3662 if (jump_left-- && simplejump_p (scan))
3663 scan = JUMP_LABEL (scan);
3664 else
3665 break;
3669 /* Mask out the stack pointer again, in case it was
3670 the only 'free' register we have found. */
3671 dead &= 0x7fff;
3673 /* If the immediate destination is still in range, check for possible
3674 threading with a jump beyond the delay slot insn.
3675 Don't check if we are called recursively; the jump has been or will be
3676 checked in a different invocation then. */
3678 else if (optimize && need_block >= 0)
3680 rtx next = next_active_insn (next_active_insn (dest));
3681 if (next && GET_CODE (next) == JUMP_INSN
3682 && GET_CODE (PATTERN (next)) == SET
3683 && recog_memoized (next) == CODE_FOR_jump_compact)
3685 dest = JUMP_LABEL (next);
3686 if (dest
3687 && (INSN_ADDRESSES (INSN_UID (dest)) - addr + (unsigned) 4092
3688 > 4092 + 4098))
3689 gen_block_redirect (next, INSN_ADDRESSES (INSN_UID (next)), -1);
3693 if (dead)
3695 rtx reg = gen_rtx_REG (SImode, exact_log2 (dead & -dead));
3697 /* It would be nice if we could convert the jump into an indirect
3698 jump / far branch right now, and thus exposing all constituent
3699 instructions to further optimization. However, reorg uses
3700 simplejump_p to determine if there is an unconditional jump where
3701 it should try to schedule instructions from the target of the
3702 branch; simplejump_p fails for indirect jumps even if they have
3703 a JUMP_LABEL. */
3704 rtx insn = emit_insn_before (gen_indirect_jump_scratch
3705 (reg, GEN_INT (INSN_UID (JUMP_LABEL (jump))))
3706 , jump);
3707 /* ??? We would like this to have the scope of the jump, but that
3708 scope will change when a delay slot insn of an inner scope is added.
3709 Hence, after delay slot scheduling, we'll have to expect
3710 NOTE_INSN_BLOCK_END notes between the indirect_jump_scratch and
3711 the jump. */
3713 INSN_LOCATOR (insn) = INSN_LOCATOR (jump);
3714 INSN_CODE (insn) = CODE_FOR_indirect_jump_scratch;
3715 return insn;
3717 else if (need_block)
3718 /* We can't use JUMP_LABEL here because it might be undefined
3719 when not optimizing. */
3720 return emit_insn_before (gen_block_branch_redirect
3721 (GEN_INT (INSN_UID (XEXP (SET_SRC (PATTERN (jump)), 0))))
3722 , jump);
3723 return prev;
3726 #define CONDJUMP_MIN -252
3727 #define CONDJUMP_MAX 262
3728 struct far_branch
3730 /* A label (to be placed) in front of the jump
3731 that jumps to our ultimate destination. */
3732 rtx near_label;
3733 /* Where we are going to insert it if we cannot move the jump any farther,
3734 or the jump itself if we have picked up an existing jump. */
3735 rtx insert_place;
3736 /* The ultimate destination. */
3737 rtx far_label;
3738 struct far_branch *prev;
3739 /* If the branch has already been created, its address;
3740 else the address of its first prospective user. */
3741 int address;
3744 static void gen_far_branch (struct far_branch *);
3745 enum mdep_reorg_phase_e mdep_reorg_phase;
3746 static void
3747 gen_far_branch (struct far_branch *bp)
3749 rtx insn = bp->insert_place;
3750 rtx jump;
3751 rtx label = gen_label_rtx ();
3753 emit_label_after (label, insn);
3754 if (bp->far_label)
3756 jump = emit_jump_insn_after (gen_jump (bp->far_label), insn);
3757 LABEL_NUSES (bp->far_label)++;
3759 else
3760 jump = emit_jump_insn_after (gen_return (), insn);
3761 /* Emit a barrier so that reorg knows that any following instructions
3762 are not reachable via a fall-through path.
3763 But don't do this when not optimizing, since we wouldn't suppress the
3764 alignment for the barrier then, and could end up with out-of-range
3765 pc-relative loads. */
3766 if (optimize)
3767 emit_barrier_after (jump);
3768 emit_label_after (bp->near_label, insn);
3769 JUMP_LABEL (jump) = bp->far_label;
3770 if (! invert_jump (insn, label, 1))
3771 abort ();
3772 /* If we are branching around a jump (rather than a return), prevent
3773 reorg from using an insn from the jump target as the delay slot insn -
3774 when reorg did this, it pessimized code (we rather hide the delay slot)
3775 and it could cause branches to go out of range. */
3776 if (bp->far_label)
3777 (emit_insn_after
3778 (gen_stuff_delay_slot
3779 (GEN_INT (INSN_UID (XEXP (SET_SRC (PATTERN (jump)), 0))),
3780 GEN_INT (recog_memoized (insn) == CODE_FOR_branch_false)),
3781 insn));
3782 /* Prevent reorg from undoing our splits. */
3783 gen_block_redirect (jump, bp->address += 2, 2);
3786 /* Fix up ADDR_DIFF_VECs. */
3787 void
3788 fixup_addr_diff_vecs (rtx first)
3790 rtx insn;
3792 for (insn = first; insn; insn = NEXT_INSN (insn))
3794 rtx vec_lab, pat, prev, prevpat, x, braf_label;
3796 if (GET_CODE (insn) != JUMP_INSN
3797 || GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
3798 continue;
3799 pat = PATTERN (insn);
3800 vec_lab = XEXP (XEXP (pat, 0), 0);
3802 /* Search the matching casesi_jump_2. */
3803 for (prev = vec_lab; ; prev = PREV_INSN (prev))
3805 if (GET_CODE (prev) != JUMP_INSN)
3806 continue;
3807 prevpat = PATTERN (prev);
3808 if (GET_CODE (prevpat) != PARALLEL || XVECLEN (prevpat, 0) != 2)
3809 continue;
3810 x = XVECEXP (prevpat, 0, 1);
3811 if (GET_CODE (x) != USE)
3812 continue;
3813 x = XEXP (x, 0);
3814 if (GET_CODE (x) == LABEL_REF && XEXP (x, 0) == vec_lab)
3815 break;
3817 /* FIXME: This is a bug in the optimizer, but it seems harmless
3818 to just avoid panicing. */
3819 if (!prev)
3820 continue;
3822 /* Emit the reference label of the braf where it belongs, right after
3823 the casesi_jump_2 (i.e. braf). */
3824 braf_label = XEXP (XEXP (SET_SRC (XVECEXP (prevpat, 0, 0)), 1), 0);
3825 emit_label_after (braf_label, prev);
3827 /* Fix up the ADDR_DIF_VEC to be relative
3828 to the reference address of the braf. */
3829 XEXP (XEXP (pat, 0), 0) = braf_label;
3833 /* BARRIER_OR_LABEL is either a BARRIER or a CODE_LABEL immediately following
3834 a barrier. Return the base 2 logarithm of the desired alignment. */
3836 barrier_align (rtx barrier_or_label)
3838 rtx next = next_real_insn (barrier_or_label), pat, prev;
3839 int slot, credit, jump_to_next = 0;
3841 if (! next)
3842 return 0;
3844 pat = PATTERN (next);
3846 if (GET_CODE (pat) == ADDR_DIFF_VEC)
3847 return 2;
3849 if (GET_CODE (pat) == UNSPEC_VOLATILE && XINT (pat, 1) == UNSPECV_ALIGN)
3850 /* This is a barrier in front of a constant table. */
3851 return 0;
3853 prev = prev_real_insn (barrier_or_label);
3854 if (GET_CODE (PATTERN (prev)) == ADDR_DIFF_VEC)
3856 pat = PATTERN (prev);
3857 /* If this is a very small table, we want to keep the alignment after
3858 the table to the minimum for proper code alignment. */
3859 return ((TARGET_SMALLCODE
3860 || ((unsigned) XVECLEN (pat, 1) * GET_MODE_SIZE (GET_MODE (pat))
3861 <= (unsigned) 1 << (CACHE_LOG - 2)))
3862 ? 1 << TARGET_SHMEDIA : align_jumps_log);
3865 if (TARGET_SMALLCODE)
3866 return 0;
3868 if (! TARGET_SH2 || ! optimize)
3869 return align_jumps_log;
3871 /* When fixing up pcloads, a constant table might be inserted just before
3872 the basic block that ends with the barrier. Thus, we can't trust the
3873 instruction lengths before that. */
3874 if (mdep_reorg_phase > SH_FIXUP_PCLOAD)
3876 /* Check if there is an immediately preceding branch to the insn beyond
3877 the barrier. We must weight the cost of discarding useful information
3878 from the current cache line when executing this branch and there is
3879 an alignment, against that of fetching unneeded insn in front of the
3880 branch target when there is no alignment. */
3882 /* There are two delay_slot cases to consider. One is the simple case
3883 where the preceding branch is to the insn beyond the barrier (simple
3884 delay slot filling), and the other is where the preceding branch has
3885 a delay slot that is a duplicate of the insn after the barrier
3886 (fill_eager_delay_slots) and the branch is to the insn after the insn
3887 after the barrier. */
3889 /* PREV is presumed to be the JUMP_INSN for the barrier under
3890 investigation. Skip to the insn before it. */
3891 prev = prev_real_insn (prev);
3893 for (slot = 2, credit = (1 << (CACHE_LOG - 2)) + 2;
3894 credit >= 0 && prev && GET_CODE (prev) == INSN;
3895 prev = prev_real_insn (prev))
3897 jump_to_next = 0;
3898 if (GET_CODE (PATTERN (prev)) == USE
3899 || GET_CODE (PATTERN (prev)) == CLOBBER)
3900 continue;
3901 if (GET_CODE (PATTERN (prev)) == SEQUENCE)
3903 prev = XVECEXP (PATTERN (prev), 0, 1);
3904 if (INSN_UID (prev) == INSN_UID (next))
3906 /* Delay slot was filled with insn at jump target. */
3907 jump_to_next = 1;
3908 continue;
3912 if (slot &&
3913 get_attr_in_delay_slot (prev) == IN_DELAY_SLOT_YES)
3914 slot = 0;
3915 credit -= get_attr_length (prev);
3917 if (prev
3918 && GET_CODE (prev) == JUMP_INSN
3919 && JUMP_LABEL (prev))
3921 rtx x;
3922 if (jump_to_next
3923 || next_real_insn (JUMP_LABEL (prev)) == next
3924 /* If relax_delay_slots() decides NEXT was redundant
3925 with some previous instruction, it will have
3926 redirected PREV's jump to the following insn. */
3927 || JUMP_LABEL (prev) == next_nonnote_insn (next)
3928 /* There is no upper bound on redundant instructions
3929 that might have been skipped, but we must not put an
3930 alignment where none had been before. */
3931 || (x = (NEXT_INSN (NEXT_INSN (PREV_INSN (prev)))),
3932 (INSN_P (x)
3933 && (INSN_CODE (x) == CODE_FOR_block_branch_redirect
3934 || INSN_CODE (x) == CODE_FOR_indirect_jump_scratch
3935 || INSN_CODE (x) == CODE_FOR_stuff_delay_slot))))
3937 rtx pat = PATTERN (prev);
3938 if (GET_CODE (pat) == PARALLEL)
3939 pat = XVECEXP (pat, 0, 0);
3940 if (credit - slot >= (GET_CODE (SET_SRC (pat)) == PC ? 2 : 0))
3941 return 0;
3946 return align_jumps_log;
3949 /* If we are inside a phony loop, almost any kind of label can turn up as the
3950 first one in the loop. Aligning a braf label causes incorrect switch
3951 destination addresses; we can detect braf labels because they are
3952 followed by a BARRIER.
3953 Applying loop alignment to small constant or switch tables is a waste
3954 of space, so we suppress this too. */
3956 sh_loop_align (rtx label)
3958 rtx next = label;
3961 next = next_nonnote_insn (next);
3962 while (next && GET_CODE (next) == CODE_LABEL);
3964 if (! next
3965 || ! INSN_P (next)
3966 || GET_CODE (PATTERN (next)) == ADDR_DIFF_VEC
3967 || recog_memoized (next) == CODE_FOR_consttable_2)
3968 return 0;
3970 return align_loops_log;
3973 /* Do a final pass over the function, just before delayed branch
3974 scheduling. */
3976 static void
3977 sh_reorg (void)
3979 rtx first, insn, mova = NULL_RTX;
3980 int num_mova;
3981 rtx r0_rtx = gen_rtx_REG (Pmode, 0);
3982 rtx r0_inc_rtx = gen_rtx_POST_INC (Pmode, r0_rtx);
3984 first = get_insns ();
3986 /* We must split call insns before introducing `mova's. If we're
3987 optimizing, they'll have already been split. Otherwise, make
3988 sure we don't split them too late. */
3989 if (! optimize)
3990 split_all_insns_noflow ();
3992 if (TARGET_SHMEDIA)
3993 return;
3995 /* If relaxing, generate pseudo-ops to associate function calls with
3996 the symbols they call. It does no harm to not generate these
3997 pseudo-ops. However, when we can generate them, it enables to
3998 linker to potentially relax the jsr to a bsr, and eliminate the
3999 register load and, possibly, the constant pool entry. */
4001 mdep_reorg_phase = SH_INSERT_USES_LABELS;
4002 if (TARGET_RELAX)
4004 /* Remove all REG_LABEL notes. We want to use them for our own
4005 purposes. This works because none of the remaining passes
4006 need to look at them.
4008 ??? But it may break in the future. We should use a machine
4009 dependent REG_NOTE, or some other approach entirely. */
4010 for (insn = first; insn; insn = NEXT_INSN (insn))
4012 if (INSN_P (insn))
4014 rtx note;
4016 while ((note = find_reg_note (insn, REG_LABEL, NULL_RTX)) != 0)
4017 remove_note (insn, note);
4021 for (insn = first; insn; insn = NEXT_INSN (insn))
4023 rtx pattern, reg, link, set, scan, dies, label;
4024 int rescan = 0, foundinsn = 0;
4026 if (GET_CODE (insn) == CALL_INSN)
4028 pattern = PATTERN (insn);
4030 if (GET_CODE (pattern) == PARALLEL)
4031 pattern = XVECEXP (pattern, 0, 0);
4032 if (GET_CODE (pattern) == SET)
4033 pattern = SET_SRC (pattern);
4035 if (GET_CODE (pattern) != CALL
4036 || GET_CODE (XEXP (pattern, 0)) != MEM)
4037 continue;
4039 reg = XEXP (XEXP (pattern, 0), 0);
4041 else
4043 reg = sfunc_uses_reg (insn);
4044 if (! reg)
4045 continue;
4048 if (GET_CODE (reg) != REG)
4049 continue;
4051 /* This is a function call via REG. If the only uses of REG
4052 between the time that it is set and the time that it dies
4053 are in function calls, then we can associate all the
4054 function calls with the setting of REG. */
4056 for (link = LOG_LINKS (insn); link; link = XEXP (link, 1))
4058 if (REG_NOTE_KIND (link) != 0)
4059 continue;
4060 set = single_set (XEXP (link, 0));
4061 if (set && rtx_equal_p (reg, SET_DEST (set)))
4063 link = XEXP (link, 0);
4064 break;
4068 if (! link)
4070 /* ??? Sometimes global register allocation will have
4071 deleted the insn pointed to by LOG_LINKS. Try
4072 scanning backward to find where the register is set. */
4073 for (scan = PREV_INSN (insn);
4074 scan && GET_CODE (scan) != CODE_LABEL;
4075 scan = PREV_INSN (scan))
4077 if (! INSN_P (scan))
4078 continue;
4080 if (! reg_mentioned_p (reg, scan))
4081 continue;
4083 if (noncall_uses_reg (reg, scan, &set))
4084 break;
4086 if (set)
4088 link = scan;
4089 break;
4094 if (! link)
4095 continue;
4097 /* The register is set at LINK. */
4099 /* We can only optimize the function call if the register is
4100 being set to a symbol. In theory, we could sometimes
4101 optimize calls to a constant location, but the assembler
4102 and linker do not support that at present. */
4103 if (GET_CODE (SET_SRC (set)) != SYMBOL_REF
4104 && GET_CODE (SET_SRC (set)) != LABEL_REF)
4105 continue;
4107 /* Scan forward from LINK to the place where REG dies, and
4108 make sure that the only insns which use REG are
4109 themselves function calls. */
4111 /* ??? This doesn't work for call targets that were allocated
4112 by reload, since there may not be a REG_DEAD note for the
4113 register. */
4115 dies = NULL_RTX;
4116 for (scan = NEXT_INSN (link); scan; scan = NEXT_INSN (scan))
4118 rtx scanset;
4120 /* Don't try to trace forward past a CODE_LABEL if we haven't
4121 seen INSN yet. Ordinarily, we will only find the setting insn
4122 in LOG_LINKS if it is in the same basic block. However,
4123 cross-jumping can insert code labels in between the load and
4124 the call, and can result in situations where a single call
4125 insn may have two targets depending on where we came from. */
4127 if (GET_CODE (scan) == CODE_LABEL && ! foundinsn)
4128 break;
4130 if (! INSN_P (scan))
4131 continue;
4133 /* Don't try to trace forward past a JUMP. To optimize
4134 safely, we would have to check that all the
4135 instructions at the jump destination did not use REG. */
4137 if (GET_CODE (scan) == JUMP_INSN)
4138 break;
4140 if (! reg_mentioned_p (reg, scan))
4141 continue;
4143 if (noncall_uses_reg (reg, scan, &scanset))
4144 break;
4146 if (scan == insn)
4147 foundinsn = 1;
4149 if (scan != insn
4150 && (GET_CODE (scan) == CALL_INSN || sfunc_uses_reg (scan)))
4152 /* There is a function call to this register other
4153 than the one we are checking. If we optimize
4154 this call, we need to rescan again below. */
4155 rescan = 1;
4158 /* ??? We shouldn't have to worry about SCANSET here.
4159 We should just be able to check for a REG_DEAD note
4160 on a function call. However, the REG_DEAD notes are
4161 apparently not dependable around libcalls; c-torture
4162 execute/920501-2 is a test case. If SCANSET is set,
4163 then this insn sets the register, so it must have
4164 died earlier. Unfortunately, this will only handle
4165 the cases in which the register is, in fact, set in a
4166 later insn. */
4168 /* ??? We shouldn't have to use FOUNDINSN here.
4169 However, the LOG_LINKS fields are apparently not
4170 entirely reliable around libcalls;
4171 newlib/libm/math/e_pow.c is a test case. Sometimes
4172 an insn will appear in LOG_LINKS even though it is
4173 not the most recent insn which sets the register. */
4175 if (foundinsn
4176 && (scanset
4177 || find_reg_note (scan, REG_DEAD, reg)))
4179 dies = scan;
4180 break;
4184 if (! dies)
4186 /* Either there was a branch, or some insn used REG
4187 other than as a function call address. */
4188 continue;
4191 /* Create a code label, and put it in a REG_LABEL note on
4192 the insn which sets the register, and on each call insn
4193 which uses the register. In final_prescan_insn we look
4194 for the REG_LABEL notes, and output the appropriate label
4195 or pseudo-op. */
4197 label = gen_label_rtx ();
4198 REG_NOTES (link) = gen_rtx_INSN_LIST (REG_LABEL, label,
4199 REG_NOTES (link));
4200 REG_NOTES (insn) = gen_rtx_INSN_LIST (REG_LABEL, label,
4201 REG_NOTES (insn));
4202 if (rescan)
4204 scan = link;
4207 rtx reg2;
4209 scan = NEXT_INSN (scan);
4210 if (scan != insn
4211 && ((GET_CODE (scan) == CALL_INSN
4212 && reg_mentioned_p (reg, scan))
4213 || ((reg2 = sfunc_uses_reg (scan))
4214 && REGNO (reg2) == REGNO (reg))))
4215 REG_NOTES (scan)
4216 = gen_rtx_INSN_LIST (REG_LABEL, label, REG_NOTES (scan));
4218 while (scan != dies);
4223 if (TARGET_SH2)
4224 fixup_addr_diff_vecs (first);
4226 if (optimize)
4228 mdep_reorg_phase = SH_SHORTEN_BRANCHES0;
4229 shorten_branches (first);
4231 /* Scan the function looking for move instructions which have to be
4232 changed to pc-relative loads and insert the literal tables. */
4234 mdep_reorg_phase = SH_FIXUP_PCLOAD;
4235 for (insn = first, num_mova = 0; insn; insn = NEXT_INSN (insn))
4237 if (mova_p (insn))
4239 /* ??? basic block reordering can move a switch table dispatch
4240 below the switch table. Check if that has happened.
4241 We only have the addresses available when optimizing; but then,
4242 this check shouldn't be needed when not optimizing. */
4243 rtx label_ref = XVECEXP (SET_SRC (PATTERN (insn)), 0, 0);
4244 if (optimize
4245 && (INSN_ADDRESSES (INSN_UID (insn))
4246 > INSN_ADDRESSES (INSN_UID (XEXP (label_ref, 0)))))
4248 /* Change the mova into a load.
4249 broken_move will then return true for it. */
4250 fixup_mova (insn);
4252 else if (! num_mova++)
4253 mova = insn;
4255 else if (GET_CODE (insn) == JUMP_INSN
4256 && GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC
4257 && num_mova)
4259 rtx scan;
4260 int total;
4262 num_mova--;
4264 /* Some code might have been inserted between the mova and
4265 its ADDR_DIFF_VEC. Check if the mova is still in range. */
4266 for (scan = mova, total = 0; scan != insn; scan = NEXT_INSN (scan))
4267 total += get_attr_length (scan);
4269 /* range of mova is 1020, add 4 because pc counts from address of
4270 second instruction after this one, subtract 2 in case pc is 2
4271 byte aligned. Possible alignment needed for the ADDR_DIFF_VEC
4272 cancels out with alignment effects of the mova itself. */
4273 if (total > 1022)
4275 /* Change the mova into a load, and restart scanning
4276 there. broken_move will then return true for mova. */
4277 fixup_mova (mova);
4278 insn = mova;
4281 if (broken_move (insn)
4282 || (GET_CODE (insn) == INSN
4283 && recog_memoized (insn) == CODE_FOR_casesi_worker_2))
4285 rtx scan;
4286 /* Scan ahead looking for a barrier to stick the constant table
4287 behind. */
4288 rtx barrier = find_barrier (num_mova, mova, insn);
4289 rtx last_float_move = NULL_RTX, last_float = 0, *last_float_addr = NULL;
4290 int need_aligned_label = 0;
4292 if (num_mova && ! mova_p (mova))
4294 /* find_barrier had to change the first mova into a
4295 pcload; thus, we have to start with this new pcload. */
4296 insn = mova;
4297 num_mova = 0;
4299 /* Now find all the moves between the points and modify them. */
4300 for (scan = insn; scan != barrier; scan = NEXT_INSN (scan))
4302 if (GET_CODE (scan) == CODE_LABEL)
4303 last_float = 0;
4304 if (GET_CODE (scan) == INSN
4305 && recog_memoized (scan) == CODE_FOR_casesi_worker_2)
4306 need_aligned_label = 1;
4307 if (broken_move (scan))
4309 rtx *patp = &PATTERN (scan), pat = *patp;
4310 rtx src, dst;
4311 rtx lab;
4312 rtx newsrc;
4313 enum machine_mode mode;
4315 if (GET_CODE (pat) == PARALLEL)
4316 patp = &XVECEXP (pat, 0, 0), pat = *patp;
4317 src = SET_SRC (pat);
4318 dst = SET_DEST (pat);
4319 mode = GET_MODE (dst);
4321 if (mode == SImode && hi_const (src)
4322 && REGNO (dst) != FPUL_REG)
4324 int offset = 0;
4326 mode = HImode;
4327 while (GET_CODE (dst) == SUBREG)
4329 offset += subreg_regno_offset (REGNO (SUBREG_REG (dst)),
4330 GET_MODE (SUBREG_REG (dst)),
4331 SUBREG_BYTE (dst),
4332 GET_MODE (dst));
4333 dst = SUBREG_REG (dst);
4335 dst = gen_rtx_REG (HImode, REGNO (dst) + offset);
4337 if (GET_CODE (dst) == REG && FP_ANY_REGISTER_P (REGNO (dst)))
4339 /* This must be an insn that clobbers r0. */
4340 rtx *clobberp = &XVECEXP (PATTERN (scan), 0,
4341 XVECLEN (PATTERN (scan), 0)
4342 - 1);
4343 rtx clobber = *clobberp;
4345 if (GET_CODE (clobber) != CLOBBER
4346 || ! rtx_equal_p (XEXP (clobber, 0), r0_rtx))
4347 abort ();
4349 if (last_float
4350 && reg_set_between_p (r0_rtx, last_float_move, scan))
4351 last_float = 0;
4352 if (last_float
4353 && TARGET_SHCOMPACT
4354 && GET_MODE_SIZE (mode) != 4
4355 && GET_MODE_SIZE (GET_MODE (last_float)) == 4)
4356 last_float = 0;
4357 lab = add_constant (src, mode, last_float);
4358 if (lab)
4359 emit_insn_before (gen_mova (lab), scan);
4360 else
4362 /* There will be a REG_UNUSED note for r0 on
4363 LAST_FLOAT_MOVE; we have to change it to REG_INC,
4364 lest reorg:mark_target_live_regs will not
4365 consider r0 to be used, and we end up with delay
4366 slot insn in front of SCAN that clobbers r0. */
4367 rtx note
4368 = find_regno_note (last_float_move, REG_UNUSED, 0);
4370 /* If we are not optimizing, then there may not be
4371 a note. */
4372 if (note)
4373 PUT_MODE (note, REG_INC);
4375 *last_float_addr = r0_inc_rtx;
4377 last_float_move = scan;
4378 last_float = src;
4379 newsrc = gen_rtx_MEM (mode,
4380 (((TARGET_SH4 && ! TARGET_FMOVD)
4381 || REGNO (dst) == FPUL_REG)
4382 ? r0_inc_rtx
4383 : r0_rtx));
4384 last_float_addr = &XEXP (newsrc, 0);
4386 /* Remove the clobber of r0. */
4387 *clobberp = gen_rtx_CLOBBER (GET_MODE (clobber),
4388 gen_rtx_SCRATCH (Pmode));
4389 RTX_UNCHANGING_P (newsrc) = 1;
4391 /* This is a mova needing a label. Create it. */
4392 else if (GET_CODE (src) == UNSPEC
4393 && XINT (src, 1) == UNSPEC_MOVA
4394 && GET_CODE (XVECEXP (src, 0, 0)) == CONST)
4396 lab = add_constant (XVECEXP (src, 0, 0), mode, 0);
4397 newsrc = gen_rtx_LABEL_REF (VOIDmode, lab);
4398 newsrc = gen_rtx_UNSPEC (SImode,
4399 gen_rtvec (1, newsrc),
4400 UNSPEC_MOVA);
4402 else
4404 lab = add_constant (src, mode, 0);
4405 newsrc = gen_rtx_MEM (mode,
4406 gen_rtx_LABEL_REF (VOIDmode, lab));
4407 RTX_UNCHANGING_P (newsrc) = 1;
4409 *patp = gen_rtx_SET (VOIDmode, dst, newsrc);
4410 INSN_CODE (scan) = -1;
4413 dump_table (need_aligned_label ? insn : 0, barrier);
4414 insn = barrier;
4418 mdep_reorg_phase = SH_SHORTEN_BRANCHES1;
4419 INSN_ADDRESSES_FREE ();
4420 split_branches (first);
4422 /* The INSN_REFERENCES_ARE_DELAYED in sh.h is problematic because it
4423 also has an effect on the register that holds the address of the sfunc.
4424 Insert an extra dummy insn in front of each sfunc that pretends to
4425 use this register. */
4426 if (flag_delayed_branch)
4428 for (insn = first; insn; insn = NEXT_INSN (insn))
4430 rtx reg = sfunc_uses_reg (insn);
4432 if (! reg)
4433 continue;
4434 emit_insn_before (gen_use_sfunc_addr (reg), insn);
4437 #if 0
4438 /* fpscr is not actually a user variable, but we pretend it is for the
4439 sake of the previous optimization passes, since we want it handled like
4440 one. However, we don't have any debugging information for it, so turn
4441 it into a non-user variable now. */
4442 if (TARGET_SH4)
4443 REG_USERVAR_P (get_fpscr_rtx ()) = 0;
4444 #endif
4445 mdep_reorg_phase = SH_AFTER_MDEP_REORG;
4449 get_dest_uid (rtx label, int max_uid)
4451 rtx dest = next_real_insn (label);
4452 int dest_uid;
4453 if (! dest)
4454 /* This can happen for an undefined label. */
4455 return 0;
4456 dest_uid = INSN_UID (dest);
4457 /* If this is a newly created branch redirection blocking instruction,
4458 we cannot index the branch_uid or insn_addresses arrays with its
4459 uid. But then, we won't need to, because the actual destination is
4460 the following branch. */
4461 while (dest_uid >= max_uid)
4463 dest = NEXT_INSN (dest);
4464 dest_uid = INSN_UID (dest);
4466 if (GET_CODE (dest) == JUMP_INSN && GET_CODE (PATTERN (dest)) == RETURN)
4467 return 0;
4468 return dest_uid;
4471 /* Split condbranches that are out of range. Also add clobbers for
4472 scratch registers that are needed in far jumps.
4473 We do this before delay slot scheduling, so that it can take our
4474 newly created instructions into account. It also allows us to
4475 find branches with common targets more easily. */
4477 static void
4478 split_branches (rtx first)
4480 rtx insn;
4481 struct far_branch **uid_branch, *far_branch_list = 0;
4482 int max_uid = get_max_uid ();
4484 /* Find out which branches are out of range. */
4485 shorten_branches (first);
4487 uid_branch = (struct far_branch **) alloca (max_uid * sizeof *uid_branch);
4488 memset ((char *) uid_branch, 0, max_uid * sizeof *uid_branch);
4490 for (insn = first; insn; insn = NEXT_INSN (insn))
4491 if (! INSN_P (insn))
4492 continue;
4493 else if (INSN_DELETED_P (insn))
4495 /* Shorten_branches would split this instruction again,
4496 so transform it into a note. */
4497 PUT_CODE (insn, NOTE);
4498 NOTE_LINE_NUMBER (insn) = NOTE_INSN_DELETED;
4499 NOTE_SOURCE_FILE (insn) = 0;
4501 else if (GET_CODE (insn) == JUMP_INSN
4502 /* Don't mess with ADDR_DIFF_VEC */
4503 && (GET_CODE (PATTERN (insn)) == SET
4504 || GET_CODE (PATTERN (insn)) == RETURN))
4506 enum attr_type type = get_attr_type (insn);
4507 if (type == TYPE_CBRANCH)
4509 rtx next, beyond;
4511 if (get_attr_length (insn) > 4)
4513 rtx src = SET_SRC (PATTERN (insn));
4514 rtx olabel = XEXP (XEXP (src, 1), 0);
4515 int addr = INSN_ADDRESSES (INSN_UID (insn));
4516 rtx label = 0;
4517 int dest_uid = get_dest_uid (olabel, max_uid);
4518 struct far_branch *bp = uid_branch[dest_uid];
4520 /* redirect_jump needs a valid JUMP_LABEL, and it might delete
4521 the label if the LABEL_NUSES count drops to zero. There is
4522 always a jump_optimize pass that sets these values, but it
4523 proceeds to delete unreferenced code, and then if not
4524 optimizing, to un-delete the deleted instructions, thus
4525 leaving labels with too low uses counts. */
4526 if (! optimize)
4528 JUMP_LABEL (insn) = olabel;
4529 LABEL_NUSES (olabel)++;
4531 if (! bp)
4533 bp = (struct far_branch *) alloca (sizeof *bp);
4534 uid_branch[dest_uid] = bp;
4535 bp->prev = far_branch_list;
4536 far_branch_list = bp;
4537 bp->far_label
4538 = XEXP (XEXP (SET_SRC (PATTERN (insn)), 1), 0);
4539 LABEL_NUSES (bp->far_label)++;
4541 else
4543 label = bp->near_label;
4544 if (! label && bp->address - addr >= CONDJUMP_MIN)
4546 rtx block = bp->insert_place;
4548 if (GET_CODE (PATTERN (block)) == RETURN)
4549 block = PREV_INSN (block);
4550 else
4551 block = gen_block_redirect (block,
4552 bp->address, 2);
4553 label = emit_label_after (gen_label_rtx (),
4554 PREV_INSN (block));
4555 bp->near_label = label;
4557 else if (label && ! NEXT_INSN (label))
4559 if (addr + 2 - bp->address <= CONDJUMP_MAX)
4560 bp->insert_place = insn;
4561 else
4562 gen_far_branch (bp);
4565 if (! label
4566 || (NEXT_INSN (label) && bp->address - addr < CONDJUMP_MIN))
4568 bp->near_label = label = gen_label_rtx ();
4569 bp->insert_place = insn;
4570 bp->address = addr;
4572 if (! redirect_jump (insn, label, 1))
4573 abort ();
4575 else
4577 /* get_attr_length (insn) == 2 */
4578 /* Check if we have a pattern where reorg wants to redirect
4579 the branch to a label from an unconditional branch that
4580 is too far away. */
4581 /* We can't use JUMP_LABEL here because it might be undefined
4582 when not optimizing. */
4583 /* A syntax error might cause beyond to be NULL_RTX. */
4584 beyond
4585 = next_active_insn (XEXP (XEXP (SET_SRC (PATTERN (insn)), 1),
4586 0));
4588 if (beyond
4589 && (GET_CODE (beyond) == JUMP_INSN
4590 || ((beyond = next_active_insn (beyond))
4591 && GET_CODE (beyond) == JUMP_INSN))
4592 && GET_CODE (PATTERN (beyond)) == SET
4593 && recog_memoized (beyond) == CODE_FOR_jump_compact
4594 && ((INSN_ADDRESSES
4595 (INSN_UID (XEXP (SET_SRC (PATTERN (beyond)), 0)))
4596 - INSN_ADDRESSES (INSN_UID (insn)) + (unsigned) 252)
4597 > 252 + 258 + 2))
4598 gen_block_redirect (beyond,
4599 INSN_ADDRESSES (INSN_UID (beyond)), 1);
4602 next = next_active_insn (insn);
4604 if ((GET_CODE (next) == JUMP_INSN
4605 || GET_CODE (next = next_active_insn (next)) == JUMP_INSN)
4606 && GET_CODE (PATTERN (next)) == SET
4607 && recog_memoized (next) == CODE_FOR_jump_compact
4608 && ((INSN_ADDRESSES
4609 (INSN_UID (XEXP (SET_SRC (PATTERN (next)), 0)))
4610 - INSN_ADDRESSES (INSN_UID (insn)) + (unsigned) 252)
4611 > 252 + 258 + 2))
4612 gen_block_redirect (next, INSN_ADDRESSES (INSN_UID (next)), 1);
4614 else if (type == TYPE_JUMP || type == TYPE_RETURN)
4616 int addr = INSN_ADDRESSES (INSN_UID (insn));
4617 rtx far_label = 0;
4618 int dest_uid = 0;
4619 struct far_branch *bp;
4621 if (type == TYPE_JUMP)
4623 far_label = XEXP (SET_SRC (PATTERN (insn)), 0);
4624 dest_uid = get_dest_uid (far_label, max_uid);
4625 if (! dest_uid)
4627 /* Parse errors can lead to labels outside
4628 the insn stream. */
4629 if (! NEXT_INSN (far_label))
4630 continue;
4632 if (! optimize)
4634 JUMP_LABEL (insn) = far_label;
4635 LABEL_NUSES (far_label)++;
4637 redirect_jump (insn, NULL_RTX, 1);
4638 far_label = 0;
4641 bp = uid_branch[dest_uid];
4642 if (! bp)
4644 bp = (struct far_branch *) alloca (sizeof *bp);
4645 uid_branch[dest_uid] = bp;
4646 bp->prev = far_branch_list;
4647 far_branch_list = bp;
4648 bp->near_label = 0;
4649 bp->far_label = far_label;
4650 if (far_label)
4651 LABEL_NUSES (far_label)++;
4653 else if (bp->near_label && ! NEXT_INSN (bp->near_label))
4654 if (addr - bp->address <= CONDJUMP_MAX)
4655 emit_label_after (bp->near_label, PREV_INSN (insn));
4656 else
4658 gen_far_branch (bp);
4659 bp->near_label = 0;
4661 else
4662 bp->near_label = 0;
4663 bp->address = addr;
4664 bp->insert_place = insn;
4665 if (! far_label)
4666 emit_insn_before (gen_block_branch_redirect (const0_rtx), insn);
4667 else
4668 gen_block_redirect (insn, addr, bp->near_label ? 2 : 0);
4671 /* Generate all pending far branches,
4672 and free our references to the far labels. */
4673 while (far_branch_list)
4675 if (far_branch_list->near_label
4676 && ! NEXT_INSN (far_branch_list->near_label))
4677 gen_far_branch (far_branch_list);
4678 if (optimize
4679 && far_branch_list->far_label
4680 && ! --LABEL_NUSES (far_branch_list->far_label))
4681 delete_insn (far_branch_list->far_label);
4682 far_branch_list = far_branch_list->prev;
4685 /* Instruction length information is no longer valid due to the new
4686 instructions that have been generated. */
4687 init_insn_lengths ();
4690 /* Dump out instruction addresses, which is useful for debugging the
4691 constant pool table stuff.
4693 If relaxing, output the label and pseudo-ops used to link together
4694 calls and the instruction which set the registers. */
4696 /* ??? The addresses printed by this routine for insns are nonsense for
4697 insns which are inside of a sequence where none of the inner insns have
4698 variable length. This is because the second pass of shorten_branches
4699 does not bother to update them. */
4701 void
4702 final_prescan_insn (rtx insn, rtx *opvec ATTRIBUTE_UNUSED,
4703 int noperands ATTRIBUTE_UNUSED)
4705 if (TARGET_DUMPISIZE)
4706 fprintf (asm_out_file, "\n! at %04x\n", INSN_ADDRESSES (INSN_UID (insn)));
4708 if (TARGET_RELAX)
4710 rtx note;
4712 note = find_reg_note (insn, REG_LABEL, NULL_RTX);
4713 if (note)
4715 rtx pattern;
4717 pattern = PATTERN (insn);
4718 if (GET_CODE (pattern) == PARALLEL)
4719 pattern = XVECEXP (pattern, 0, 0);
4720 if (GET_CODE (pattern) == CALL
4721 || (GET_CODE (pattern) == SET
4722 && (GET_CODE (SET_SRC (pattern)) == CALL
4723 || get_attr_type (insn) == TYPE_SFUNC)))
4724 asm_fprintf (asm_out_file, "\t.uses %LL%d\n",
4725 CODE_LABEL_NUMBER (XEXP (note, 0)));
4726 else if (GET_CODE (pattern) == SET)
4727 (*targetm.asm_out.internal_label) (asm_out_file, "L",
4728 CODE_LABEL_NUMBER (XEXP (note, 0)));
4729 else
4730 abort ();
4735 /* Dump out any constants accumulated in the final pass. These will
4736 only be labels. */
4738 const char *
4739 output_jump_label_table (void)
4741 int i;
4743 if (pool_size)
4745 fprintf (asm_out_file, "\t.align 2\n");
4746 for (i = 0; i < pool_size; i++)
4748 pool_node *p = &pool_vector[i];
4750 (*targetm.asm_out.internal_label) (asm_out_file, "L",
4751 CODE_LABEL_NUMBER (p->label));
4752 output_asm_insn (".long %O0", &p->value);
4754 pool_size = 0;
4757 return "";
4760 /* A full frame looks like:
4762 arg-5
4763 arg-4
4764 [ if current_function_anonymous_args
4765 arg-3
4766 arg-2
4767 arg-1
4768 arg-0 ]
4769 saved-fp
4770 saved-r10
4771 saved-r11
4772 saved-r12
4773 saved-pr
4774 local-n
4776 local-1
4777 local-0 <- fp points here. */
4779 /* Number of bytes pushed for anonymous args, used to pass information
4780 between expand_prologue and expand_epilogue. */
4782 /* Adjust the stack by SIZE bytes. REG holds the rtl of the register to be
4783 adjusted. If epilogue_p is zero, this is for a prologue; otherwise, it's
4784 for an epilogue and a negative value means that it's for a sibcall
4785 epilogue. If LIVE_REGS_MASK is nonzero, it points to a HARD_REG_SET of
4786 all the registers that are about to be restored, and hence dead. */
4788 static void
4789 output_stack_adjust (int size, rtx reg, int epilogue_p,
4790 HARD_REG_SET *live_regs_mask)
4792 rtx (*emit_fn) (rtx) = epilogue_p ? &emit_insn : &frame_insn;
4793 if (size)
4795 HOST_WIDE_INT align = STACK_BOUNDARY / BITS_PER_UNIT;
4797 /* This test is bogus, as output_stack_adjust is used to re-align the
4798 stack. */
4799 #if 0
4800 if (size % align)
4801 abort ();
4802 #endif
4804 if (CONST_OK_FOR_ADD (size))
4805 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size)));
4806 /* Try to do it with two partial adjustments; however, we must make
4807 sure that the stack is properly aligned at all times, in case
4808 an interrupt occurs between the two partial adjustments. */
4809 else if (CONST_OK_FOR_ADD (size / 2 & -align)
4810 && CONST_OK_FOR_ADD (size - (size / 2 & -align)))
4812 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size / 2 & -align)));
4813 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size - (size / 2 & -align))));
4815 else
4817 rtx const_reg;
4818 rtx insn;
4819 int temp = epilogue_p ? 7 : (TARGET_SH5 ? 0 : 1);
4820 int i;
4822 /* If TEMP is invalid, we could temporarily save a general
4823 register to MACL. However, there is currently no need
4824 to handle this case, so just abort when we see it. */
4825 if (epilogue_p < 0
4826 || current_function_interrupt
4827 || ! call_used_regs[temp] || fixed_regs[temp])
4828 temp = -1;
4829 if (temp < 0 && ! current_function_interrupt
4830 && (TARGET_SHMEDIA || epilogue_p >= 0))
4832 HARD_REG_SET temps;
4833 COPY_HARD_REG_SET (temps, call_used_reg_set);
4834 AND_COMPL_HARD_REG_SET (temps, call_fixed_reg_set);
4835 if (epilogue_p > 0)
4837 int nreg = 0;
4838 if (current_function_return_rtx)
4840 enum machine_mode mode;
4841 mode = GET_MODE (current_function_return_rtx);
4842 if (BASE_RETURN_VALUE_REG (mode) == FIRST_RET_REG)
4843 nreg = HARD_REGNO_NREGS (FIRST_RET_REG, mode);
4845 for (i = 0; i < nreg; i++)
4846 CLEAR_HARD_REG_BIT (temps, FIRST_RET_REG + i);
4847 if (current_function_calls_eh_return)
4849 CLEAR_HARD_REG_BIT (temps, EH_RETURN_STACKADJ_REGNO);
4850 for (i = 0; i <= 3; i++)
4851 CLEAR_HARD_REG_BIT (temps, EH_RETURN_DATA_REGNO (i));
4854 if (TARGET_SHMEDIA && epilogue_p < 0)
4855 for (i = FIRST_TARGET_REG; i <= LAST_TARGET_REG; i++)
4856 CLEAR_HARD_REG_BIT (temps, i);
4857 if (epilogue_p <= 0)
4859 for (i = FIRST_PARM_REG;
4860 i < FIRST_PARM_REG + NPARM_REGS (SImode); i++)
4861 CLEAR_HARD_REG_BIT (temps, i);
4862 if (cfun->static_chain_decl != NULL)
4863 CLEAR_HARD_REG_BIT (temps, STATIC_CHAIN_REGNUM);
4865 temp = scavenge_reg (&temps);
4867 if (temp < 0 && live_regs_mask)
4868 temp = scavenge_reg (live_regs_mask);
4869 if (temp < 0)
4871 /* If we reached here, the most likely case is the (sibcall)
4872 epilogue for non SHmedia. Put a special push/pop sequence
4873 for such case as the last resort. This looks lengthy but
4874 would not be problem because it seems to be very rare. */
4875 if (! TARGET_SHMEDIA && epilogue_p)
4877 rtx adj_reg, tmp_reg, mem;
4879 /* ??? There is still the slight possibility that r4 or r5
4880 have been reserved as fixed registers or assigned as
4881 global registers, and they change during an interrupt.
4882 There are possible ways to handle this:
4883 - If we are adjusting the frame pointer (r14), we can do
4884 with a single temp register and an ordinary push / pop
4885 on the stack.
4886 - Grab any call-used or call-saved registers (i.e. not
4887 fixed or globals) for the temps we need. We might
4888 also grab r14 if we are adjusting the stack pointer.
4889 If we can't find enough available registers, issue
4890 a diagnostic and abort - the user must have reserved
4891 way too many registers.
4892 But since all this is rather unlikely to happen and
4893 would require extra testing, we just abort if r4 / r5
4894 are not available. */
4895 if (fixed_regs[4] || fixed_regs[5]
4896 || global_regs[4] || global_regs[5])
4897 abort ();
4899 adj_reg = gen_rtx_REG (GET_MODE (reg), 4);
4900 tmp_reg = gen_rtx_REG (GET_MODE (reg), 5);
4901 emit_move_insn (gen_rtx_MEM (Pmode, reg), adj_reg);
4902 emit_insn (GEN_MOV (adj_reg, GEN_INT (size)));
4903 emit_insn (GEN_ADD3 (adj_reg, adj_reg, reg));
4904 mem = gen_rtx_MEM (Pmode, gen_rtx_PRE_DEC (Pmode, adj_reg));
4905 emit_move_insn (mem, tmp_reg);
4906 emit_move_insn (tmp_reg, gen_rtx_MEM (Pmode, reg));
4907 mem = gen_rtx_MEM (Pmode, gen_rtx_PRE_DEC (Pmode, adj_reg));
4908 emit_move_insn (mem, tmp_reg);
4909 emit_move_insn (reg, adj_reg);
4910 mem = gen_rtx_MEM (Pmode, gen_rtx_POST_INC (Pmode, reg));
4911 emit_move_insn (adj_reg, mem);
4912 mem = gen_rtx_MEM (Pmode, gen_rtx_POST_INC (Pmode, reg));
4913 emit_move_insn (tmp_reg, mem);
4914 return;
4916 else
4917 abort ();
4919 const_reg = gen_rtx_REG (GET_MODE (reg), temp);
4921 /* If SIZE is negative, subtract the positive value.
4922 This sometimes allows a constant pool entry to be shared
4923 between prologue and epilogue code. */
4924 if (size < 0)
4926 emit_insn (GEN_MOV (const_reg, GEN_INT (-size)));
4927 insn = emit_fn (GEN_SUB3 (reg, reg, const_reg));
4929 else
4931 emit_insn (GEN_MOV (const_reg, GEN_INT (size)));
4932 insn = emit_fn (GEN_ADD3 (reg, reg, const_reg));
4934 if (! epilogue_p)
4935 REG_NOTES (insn)
4936 = (gen_rtx_EXPR_LIST
4937 (REG_FRAME_RELATED_EXPR,
4938 gen_rtx_SET (VOIDmode, reg,
4939 gen_rtx_PLUS (SImode, reg, GEN_INT (size))),
4940 REG_NOTES (insn)));
4945 static rtx
4946 frame_insn (rtx x)
4948 x = emit_insn (x);
4949 RTX_FRAME_RELATED_P (x) = 1;
4950 return x;
4953 /* Output RTL to push register RN onto the stack. */
4955 static rtx
4956 push (int rn)
4958 rtx x;
4959 if (rn == FPUL_REG)
4960 x = gen_push_fpul ();
4961 else if (rn == FPSCR_REG)
4962 x = gen_push_fpscr ();
4963 else if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && ! TARGET_FPU_SINGLE
4964 && FP_OR_XD_REGISTER_P (rn))
4966 if (FP_REGISTER_P (rn) && (rn - FIRST_FP_REG) & 1)
4967 return NULL_RTX;
4968 x = gen_push_4 (gen_rtx_REG (DFmode, rn));
4970 else if (TARGET_SH2E && FP_REGISTER_P (rn))
4971 x = gen_push_e (gen_rtx_REG (SFmode, rn));
4972 else
4973 x = gen_push (gen_rtx_REG (SImode, rn));
4975 x = frame_insn (x);
4976 REG_NOTES (x)
4977 = gen_rtx_EXPR_LIST (REG_INC,
4978 gen_rtx_REG (SImode, STACK_POINTER_REGNUM), 0);
4979 return x;
4982 /* Output RTL to pop register RN from the stack. */
4984 static void
4985 pop (int rn)
4987 rtx x;
4988 if (rn == FPUL_REG)
4989 x = gen_pop_fpul ();
4990 else if (rn == FPSCR_REG)
4991 x = gen_pop_fpscr ();
4992 else if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && ! TARGET_FPU_SINGLE
4993 && FP_OR_XD_REGISTER_P (rn))
4995 if (FP_REGISTER_P (rn) && (rn - FIRST_FP_REG) & 1)
4996 return;
4997 x = gen_pop_4 (gen_rtx_REG (DFmode, rn));
4999 else if (TARGET_SH2E && FP_REGISTER_P (rn))
5000 x = gen_pop_e (gen_rtx_REG (SFmode, rn));
5001 else
5002 x = gen_pop (gen_rtx_REG (SImode, rn));
5004 x = emit_insn (x);
5005 REG_NOTES (x)
5006 = gen_rtx_EXPR_LIST (REG_INC,
5007 gen_rtx_REG (SImode, STACK_POINTER_REGNUM), 0);
5010 /* Generate code to push the regs specified in the mask. */
5012 static void
5013 push_regs (HARD_REG_SET *mask, int interrupt_handler)
5015 int i;
5016 int skip_fpscr = 0;
5018 /* Push PR last; this gives better latencies after the prologue, and
5019 candidates for the return delay slot when there are no general
5020 registers pushed. */
5021 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
5023 /* If this is an interrupt handler, and the SZ bit varies,
5024 and we have to push any floating point register, we need
5025 to switch to the correct precision first. */
5026 if (i == FIRST_FP_REG && interrupt_handler && TARGET_FMOVD
5027 && hard_regs_intersect_p (mask, &reg_class_contents[DF_REGS]))
5029 HARD_REG_SET unsaved;
5031 push (FPSCR_REG);
5032 COMPL_HARD_REG_SET (unsaved, *mask);
5033 fpscr_set_from_mem (NORMAL_MODE (FP_MODE), unsaved);
5034 skip_fpscr = 1;
5036 if (i != PR_REG
5037 && (i != FPSCR_REG || ! skip_fpscr)
5038 && TEST_HARD_REG_BIT (*mask, i))
5039 push (i);
5041 if (TEST_HARD_REG_BIT (*mask, PR_REG))
5042 push (PR_REG);
5045 /* Calculate how much extra space is needed to save all callee-saved
5046 target registers.
5047 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
5049 static int
5050 shmedia_target_regs_stack_space (HARD_REG_SET *live_regs_mask)
5052 int reg;
5053 int stack_space = 0;
5054 int interrupt_handler = sh_cfun_interrupt_handler_p ();
5056 for (reg = LAST_TARGET_REG; reg >= FIRST_TARGET_REG; reg--)
5057 if ((! call_used_regs[reg] || interrupt_handler)
5058 && ! TEST_HARD_REG_BIT (*live_regs_mask, reg))
5059 /* Leave space to save this target register on the stack,
5060 in case target register allocation wants to use it. */
5061 stack_space += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
5062 return stack_space;
5065 /* Decide whether we should reserve space for callee-save target registers,
5066 in case target register allocation wants to use them. REGS_SAVED is
5067 the space, in bytes, that is already required for register saves.
5068 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
5070 static int
5071 shmedia_reserve_space_for_target_registers_p (int regs_saved,
5072 HARD_REG_SET *live_regs_mask)
5074 if (optimize_size)
5075 return 0;
5076 return shmedia_target_regs_stack_space (live_regs_mask) <= regs_saved;
5079 /* Decide how much space to reserve for callee-save target registers
5080 in case target register allocation wants to use them.
5081 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
5083 static int
5084 shmedia_target_regs_stack_adjust (HARD_REG_SET *live_regs_mask)
5086 if (shmedia_space_reserved_for_target_registers)
5087 return shmedia_target_regs_stack_space (live_regs_mask);
5088 else
5089 return 0;
5092 /* Work out the registers which need to be saved, both as a mask and a
5093 count of saved words. Return the count.
5095 If doing a pragma interrupt function, then push all regs used by the
5096 function, and if we call another function (we can tell by looking at PR),
5097 make sure that all the regs it clobbers are safe too. */
5099 static int
5100 calc_live_regs (HARD_REG_SET *live_regs_mask)
5102 int reg;
5103 int count;
5104 int interrupt_handler;
5105 int pr_live, has_call;
5107 interrupt_handler = sh_cfun_interrupt_handler_p ();
5109 CLEAR_HARD_REG_SET (*live_regs_mask);
5110 if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && interrupt_handler
5111 && regs_ever_live[FPSCR_REG])
5112 target_flags &= ~FPU_SINGLE_BIT;
5113 /* If we can save a lot of saves by switching to double mode, do that. */
5114 else if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && TARGET_FPU_SINGLE)
5115 for (count = 0, reg = FIRST_FP_REG; reg <= LAST_FP_REG; reg += 2)
5116 if (regs_ever_live[reg] && regs_ever_live[reg+1]
5117 && (! call_used_regs[reg] || (interrupt_handler && ! pragma_trapa))
5118 && ++count > 2)
5120 target_flags &= ~FPU_SINGLE_BIT;
5121 break;
5123 /* PR_MEDIA_REG is a general purpose register, thus global_alloc already
5124 knows how to use it. That means the pseudo originally allocated for
5125 the initial value can become the PR_MEDIA_REG hard register, as seen for
5126 execute/20010122-1.c:test9. */
5127 if (TARGET_SHMEDIA)
5128 /* ??? this function is called from initial_elimination_offset, hence we
5129 can't use the result of sh_media_register_for_return here. */
5130 pr_live = sh_pr_n_sets ();
5131 else
5133 rtx pr_initial = has_hard_reg_initial_val (Pmode, PR_REG);
5134 pr_live = (pr_initial
5135 ? (GET_CODE (pr_initial) != REG
5136 || REGNO (pr_initial) != (PR_REG))
5137 : regs_ever_live[PR_REG]);
5138 /* For Shcompact, if not optimizing, we end up with a memory reference
5139 using the return address pointer for __builtin_return_address even
5140 though there is no actual need to put the PR register on the stack. */
5141 pr_live |= regs_ever_live[RETURN_ADDRESS_POINTER_REGNUM];
5143 /* Force PR to be live if the prologue has to call the SHmedia
5144 argument decoder or register saver. */
5145 if (TARGET_SHCOMPACT
5146 && ((current_function_args_info.call_cookie
5147 & ~ CALL_COOKIE_RET_TRAMP (1))
5148 || current_function_has_nonlocal_label))
5149 pr_live = 1;
5150 has_call = TARGET_SHMEDIA ? ! leaf_function_p () : pr_live;
5151 for (count = 0, reg = FIRST_PSEUDO_REGISTER - 1; reg >= 0; reg--)
5153 if (reg == (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG)
5154 ? pr_live
5155 : (interrupt_handler && ! pragma_trapa)
5156 ? (/* Need to save all the regs ever live. */
5157 (regs_ever_live[reg]
5158 || (call_used_regs[reg]
5159 && (! fixed_regs[reg] || reg == MACH_REG || reg == MACL_REG)
5160 && has_call)
5161 || (has_call && REGISTER_NATURAL_MODE (reg) == SImode
5162 && (GENERAL_REGISTER_P (reg) || TARGET_REGISTER_P (reg))))
5163 && reg != STACK_POINTER_REGNUM && reg != ARG_POINTER_REGNUM
5164 && reg != RETURN_ADDRESS_POINTER_REGNUM
5165 && reg != T_REG && reg != GBR_REG
5166 /* Push fpscr only on targets which have FPU */
5167 && (reg != FPSCR_REG || TARGET_FPU_ANY))
5168 : (/* Only push those regs which are used and need to be saved. */
5169 (TARGET_SHCOMPACT
5170 && flag_pic
5171 && current_function_args_info.call_cookie
5172 && reg == (int) PIC_OFFSET_TABLE_REGNUM)
5173 || (regs_ever_live[reg] && ! call_used_regs[reg])
5174 || (current_function_calls_eh_return
5175 && (reg == (int) EH_RETURN_DATA_REGNO (0)
5176 || reg == (int) EH_RETURN_DATA_REGNO (1)
5177 || reg == (int) EH_RETURN_DATA_REGNO (2)
5178 || reg == (int) EH_RETURN_DATA_REGNO (3)))
5179 || ((reg == MACL_REG || reg == MACH_REG)
5180 && regs_ever_live[reg]
5181 && sh_cfun_attr_renesas_p ())
5184 SET_HARD_REG_BIT (*live_regs_mask, reg);
5185 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
5187 if ((TARGET_SH4 || TARGET_SH2A_DOUBLE || TARGET_SH5) && TARGET_FMOVD
5188 && GET_MODE_CLASS (REGISTER_NATURAL_MODE (reg)) == MODE_FLOAT)
5190 if (FP_REGISTER_P (reg))
5192 if (! TARGET_FPU_SINGLE && ! regs_ever_live[reg ^ 1])
5194 SET_HARD_REG_BIT (*live_regs_mask, (reg ^ 1));
5195 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg ^ 1));
5198 else if (XD_REGISTER_P (reg))
5200 /* Must switch to double mode to access these registers. */
5201 target_flags &= ~FPU_SINGLE_BIT;
5206 /* If we have a target register optimization pass after prologue / epilogue
5207 threading, we need to assume all target registers will be live even if
5208 they aren't now. */
5209 if (flag_branch_target_load_optimize2
5210 && TARGET_SAVE_ALL_TARGET_REGS
5211 && shmedia_space_reserved_for_target_registers)
5212 for (reg = LAST_TARGET_REG; reg >= FIRST_TARGET_REG; reg--)
5213 if ((! call_used_regs[reg] || interrupt_handler)
5214 && ! TEST_HARD_REG_BIT (*live_regs_mask, reg))
5216 SET_HARD_REG_BIT (*live_regs_mask, reg);
5217 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
5219 /* If this is an interrupt handler, we don't have any call-clobbered
5220 registers we can conveniently use for target register save/restore.
5221 Make sure we save at least one general purpose register when we need
5222 to save target registers. */
5223 if (interrupt_handler
5224 && hard_regs_intersect_p (live_regs_mask,
5225 &reg_class_contents[TARGET_REGS])
5226 && ! hard_regs_intersect_p (live_regs_mask,
5227 &reg_class_contents[GENERAL_REGS]))
5229 SET_HARD_REG_BIT (*live_regs_mask, R0_REG);
5230 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (R0_REG));
5233 return count;
5236 /* Code to generate prologue and epilogue sequences */
5238 /* PUSHED is the number of bytes that are being pushed on the
5239 stack for register saves. Return the frame size, padded
5240 appropriately so that the stack stays properly aligned. */
5241 static HOST_WIDE_INT
5242 rounded_frame_size (int pushed)
5244 HOST_WIDE_INT size = get_frame_size ();
5245 HOST_WIDE_INT align = STACK_BOUNDARY / BITS_PER_UNIT;
5247 return ((size + pushed + align - 1) & -align) - pushed;
5250 /* Choose a call-clobbered target-branch register that remains
5251 unchanged along the whole function. We set it up as the return
5252 value in the prologue. */
5254 sh_media_register_for_return (void)
5256 int regno;
5257 int tr0_used;
5259 if (! current_function_is_leaf)
5260 return -1;
5261 if (lookup_attribute ("interrupt_handler",
5262 DECL_ATTRIBUTES (current_function_decl)))
5263 return -1;
5265 tr0_used = flag_pic && regs_ever_live[PIC_OFFSET_TABLE_REGNUM];
5267 for (regno = FIRST_TARGET_REG + tr0_used; regno <= LAST_TARGET_REG; regno++)
5268 if (call_used_regs[regno] && ! regs_ever_live[regno])
5269 return regno;
5271 return -1;
5274 /* The maximum registers we need to save are:
5275 - 62 general purpose registers (r15 is stack pointer, r63 is zero)
5276 - 32 floating point registers (for each pair, we save none,
5277 one single precision value, or a double precision value).
5278 - 8 target registers
5279 - add 1 entry for a delimiter. */
5280 #define MAX_SAVED_REGS (62+32+8)
5282 typedef struct save_entry_s
5284 unsigned char reg;
5285 unsigned char mode;
5286 short offset;
5287 } save_entry;
5289 #define MAX_TEMPS 4
5291 /* There will be a delimiter entry with VOIDmode both at the start and the
5292 end of a filled in schedule. The end delimiter has the offset of the
5293 save with the smallest (i.e. most negative) offset. */
5294 typedef struct save_schedule_s
5296 save_entry entries[MAX_SAVED_REGS + 2];
5297 int temps[MAX_TEMPS+1];
5298 } save_schedule;
5300 /* Fill in SCHEDULE according to LIVE_REGS_MASK. If RESTORE is nonzero,
5301 use reverse order. Returns the last entry written to (not counting
5302 the delimiter). OFFSET_BASE is a number to be added to all offset
5303 entries. */
5305 static save_entry *
5306 sh5_schedule_saves (HARD_REG_SET *live_regs_mask, save_schedule *schedule,
5307 int offset_base)
5309 int align, i;
5310 save_entry *entry = schedule->entries;
5311 int tmpx = 0;
5312 int offset;
5314 if (! current_function_interrupt)
5315 for (i = FIRST_GENERAL_REG; tmpx < MAX_TEMPS && i <= LAST_GENERAL_REG; i++)
5316 if (call_used_regs[i] && ! fixed_regs[i] && i != PR_MEDIA_REG
5317 && ! FUNCTION_ARG_REGNO_P (i)
5318 && i != FIRST_RET_REG
5319 && ! (cfun->static_chain_decl != NULL && i == STATIC_CHAIN_REGNUM)
5320 && ! (current_function_calls_eh_return
5321 && (i == EH_RETURN_STACKADJ_REGNO
5322 || ((unsigned) i <= EH_RETURN_DATA_REGNO (0)
5323 && (unsigned) i >= EH_RETURN_DATA_REGNO (3)))))
5324 schedule->temps[tmpx++] = i;
5325 entry->reg = -1;
5326 entry->mode = VOIDmode;
5327 entry->offset = offset_base;
5328 entry++;
5329 /* We loop twice: first, we save 8-byte aligned registers in the
5330 higher addresses, that are known to be aligned. Then, we
5331 proceed to saving 32-bit registers that don't need 8-byte
5332 alignment.
5333 If this is an interrupt function, all registers that need saving
5334 need to be saved in full. moreover, we need to postpone saving
5335 target registers till we have saved some general purpose registers
5336 we can then use as scratch registers. */
5337 offset = offset_base;
5338 for (align = 1; align >= 0; align--)
5340 for (i = FIRST_PSEUDO_REGISTER - 1; i >= 0; i--)
5341 if (TEST_HARD_REG_BIT (*live_regs_mask, i))
5343 enum machine_mode mode = REGISTER_NATURAL_MODE (i);
5344 int reg = i;
5346 if (current_function_interrupt)
5348 if (TARGET_REGISTER_P (i))
5349 continue;
5350 if (GENERAL_REGISTER_P (i))
5351 mode = DImode;
5353 if (mode == SFmode && (i % 2) == 1
5354 && ! TARGET_FPU_SINGLE && FP_REGISTER_P (i)
5355 && (TEST_HARD_REG_BIT (*live_regs_mask, (i ^ 1))))
5357 mode = DFmode;
5358 i--;
5359 reg--;
5362 /* If we're doing the aligned pass and this is not aligned,
5363 or we're doing the unaligned pass and this is aligned,
5364 skip it. */
5365 if ((GET_MODE_SIZE (mode) % (STACK_BOUNDARY / BITS_PER_UNIT) == 0)
5366 != align)
5367 continue;
5369 if (current_function_interrupt
5370 && GENERAL_REGISTER_P (i)
5371 && tmpx < MAX_TEMPS)
5372 schedule->temps[tmpx++] = i;
5374 offset -= GET_MODE_SIZE (mode);
5375 entry->reg = i;
5376 entry->mode = mode;
5377 entry->offset = offset;
5378 entry++;
5380 if (align && current_function_interrupt)
5381 for (i = LAST_TARGET_REG; i >= FIRST_TARGET_REG; i--)
5382 if (TEST_HARD_REG_BIT (*live_regs_mask, i))
5384 offset -= GET_MODE_SIZE (DImode);
5385 entry->reg = i;
5386 entry->mode = DImode;
5387 entry->offset = offset;
5388 entry++;
5391 entry->reg = -1;
5392 entry->mode = VOIDmode;
5393 entry->offset = offset;
5394 schedule->temps[tmpx] = -1;
5395 return entry - 1;
5398 void
5399 sh_expand_prologue (void)
5401 HARD_REG_SET live_regs_mask;
5402 int d, i;
5403 int d_rounding = 0;
5404 int save_flags = target_flags;
5405 int pretend_args;
5407 current_function_interrupt = sh_cfun_interrupt_handler_p ();
5409 /* We have pretend args if we had an object sent partially in registers
5410 and partially on the stack, e.g. a large structure. */
5411 pretend_args = current_function_pretend_args_size;
5412 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl)
5413 && (NPARM_REGS(SImode)
5414 > current_function_args_info.arg_count[(int) SH_ARG_INT]))
5415 pretend_args = 0;
5416 output_stack_adjust (-pretend_args
5417 - current_function_args_info.stack_regs * 8,
5418 stack_pointer_rtx, 0, NULL);
5420 if (TARGET_SHCOMPACT && flag_pic && current_function_args_info.call_cookie)
5421 /* We're going to use the PIC register to load the address of the
5422 incoming-argument decoder and/or of the return trampoline from
5423 the GOT, so make sure the PIC register is preserved and
5424 initialized. */
5425 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
5427 if (TARGET_SHCOMPACT
5428 && (current_function_args_info.call_cookie & ~ CALL_COOKIE_RET_TRAMP(1)))
5430 int reg;
5432 /* First, make all registers with incoming arguments that will
5433 be pushed onto the stack live, so that register renaming
5434 doesn't overwrite them. */
5435 for (reg = 0; reg < NPARM_REGS (SImode); reg++)
5436 if (CALL_COOKIE_STACKSEQ_GET (current_function_args_info.call_cookie)
5437 >= NPARM_REGS (SImode) - reg)
5438 for (; reg < NPARM_REGS (SImode); reg++)
5439 emit_insn (gen_shcompact_preserve_incoming_args
5440 (gen_rtx_REG (SImode, FIRST_PARM_REG + reg)));
5441 else if (CALL_COOKIE_INT_REG_GET
5442 (current_function_args_info.call_cookie, reg) == 1)
5443 emit_insn (gen_shcompact_preserve_incoming_args
5444 (gen_rtx_REG (SImode, FIRST_PARM_REG + reg)));
5446 emit_move_insn (gen_rtx_REG (Pmode, MACL_REG),
5447 stack_pointer_rtx);
5448 emit_move_insn (gen_rtx_REG (SImode, R0_REG),
5449 GEN_INT (current_function_args_info.call_cookie));
5450 emit_move_insn (gen_rtx_REG (SImode, MACH_REG),
5451 gen_rtx_REG (SImode, R0_REG));
5453 else if (TARGET_SHMEDIA)
5455 int tr = sh_media_register_for_return ();
5457 if (tr >= 0)
5459 rtx insn = emit_move_insn (gen_rtx_REG (DImode, tr),
5460 gen_rtx_REG (DImode, PR_MEDIA_REG));
5462 /* ??? We should suppress saving pr when we don't need it, but this
5463 is tricky because of builtin_return_address. */
5465 /* If this function only exits with sibcalls, this copy
5466 will be flagged as dead. */
5467 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD,
5468 const0_rtx,
5469 REG_NOTES (insn));
5473 /* Emit the code for SETUP_VARARGS. */
5474 if (current_function_stdarg)
5476 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl))
5478 /* Push arg regs as if they'd been provided by caller in stack. */
5479 for (i = 0; i < NPARM_REGS(SImode); i++)
5481 int rn = NPARM_REGS(SImode) + FIRST_PARM_REG - i - 1;
5482 rtx insn;
5484 if (i >= (NPARM_REGS(SImode)
5485 - current_function_args_info.arg_count[(int) SH_ARG_INT]
5487 break;
5488 insn = push (rn);
5489 RTX_FRAME_RELATED_P (insn) = 0;
5494 /* If we're supposed to switch stacks at function entry, do so now. */
5495 if (sp_switch)
5496 emit_insn (gen_sp_switch_1 ());
5498 d = calc_live_regs (&live_regs_mask);
5499 /* ??? Maybe we could save some switching if we can move a mode switch
5500 that already happens to be at the function start into the prologue. */
5501 if (target_flags != save_flags && ! current_function_interrupt)
5502 emit_insn (gen_toggle_sz ());
5504 if (TARGET_SH5)
5506 int offset_base, offset;
5507 rtx r0 = NULL_RTX;
5508 int offset_in_r0 = -1;
5509 int sp_in_r0 = 0;
5510 int tregs_space = shmedia_target_regs_stack_adjust (&live_regs_mask);
5511 int total_size, save_size;
5512 save_schedule schedule;
5513 save_entry *entry;
5514 int *tmp_pnt;
5516 if (call_used_regs[R0_REG] && ! fixed_regs[R0_REG]
5517 && ! current_function_interrupt)
5518 r0 = gen_rtx_REG (Pmode, R0_REG);
5520 /* D is the actual number of bytes that we need for saving registers,
5521 however, in initial_elimination_offset we have committed to using
5522 an additional TREGS_SPACE amount of bytes - in order to keep both
5523 addresses to arguments supplied by the caller and local variables
5524 valid, we must keep this gap. Place it between the incoming
5525 arguments and the actually saved registers in a bid to optimize
5526 locality of reference. */
5527 total_size = d + tregs_space;
5528 total_size += rounded_frame_size (total_size);
5529 save_size = total_size - rounded_frame_size (d);
5530 if (save_size % (STACK_BOUNDARY / BITS_PER_UNIT))
5531 d_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
5532 - save_size % (STACK_BOUNDARY / BITS_PER_UNIT));
5534 /* If adjusting the stack in a single step costs nothing extra, do so.
5535 I.e. either if a single addi is enough, or we need a movi anyway,
5536 and we don't exceed the maximum offset range (the test for the
5537 latter is conservative for simplicity). */
5538 if (TARGET_SHMEDIA
5539 && (CONST_OK_FOR_I10 (-total_size)
5540 || (! CONST_OK_FOR_I10 (-(save_size + d_rounding))
5541 && total_size <= 2044)))
5542 d_rounding = total_size - save_size;
5544 offset_base = d + d_rounding;
5546 output_stack_adjust (-(save_size + d_rounding), stack_pointer_rtx,
5547 0, NULL);
5549 sh5_schedule_saves (&live_regs_mask, &schedule, offset_base);
5550 tmp_pnt = schedule.temps;
5551 for (entry = &schedule.entries[1]; entry->mode != VOIDmode; entry++)
5553 enum machine_mode mode = entry->mode;
5554 int reg = entry->reg;
5555 rtx reg_rtx, mem_rtx, pre_dec = NULL_RTX;
5557 offset = entry->offset;
5559 reg_rtx = gen_rtx_REG (mode, reg);
5561 mem_rtx = gen_rtx_MEM (mode,
5562 gen_rtx_PLUS (Pmode,
5563 stack_pointer_rtx,
5564 GEN_INT (offset)));
5566 GO_IF_LEGITIMATE_ADDRESS (mode, XEXP (mem_rtx, 0), try_pre_dec);
5568 if (! r0)
5569 abort ();
5570 mem_rtx = NULL_RTX;
5572 try_pre_dec:
5574 if (HAVE_PRE_DECREMENT
5575 && (offset_in_r0 - offset == GET_MODE_SIZE (mode)
5576 || mem_rtx == NULL_RTX
5577 || reg == PR_REG || SPECIAL_REGISTER_P (reg)))
5579 pre_dec = gen_rtx_MEM (mode,
5580 gen_rtx_PRE_DEC (Pmode, r0));
5582 GO_IF_LEGITIMATE_ADDRESS (mode, XEXP (pre_dec, 0),
5583 pre_dec_ok);
5585 pre_dec = NULL_RTX;
5587 break;
5589 pre_dec_ok:
5590 mem_rtx = NULL_RTX;
5591 offset += GET_MODE_SIZE (mode);
5593 while (0);
5595 if (mem_rtx != NULL_RTX)
5596 goto addr_ok;
5598 if (offset_in_r0 == -1)
5600 emit_move_insn (r0, GEN_INT (offset));
5601 offset_in_r0 = offset;
5603 else if (offset != offset_in_r0)
5605 emit_move_insn (r0,
5606 gen_rtx_PLUS
5607 (Pmode, r0,
5608 GEN_INT (offset - offset_in_r0)));
5609 offset_in_r0 += offset - offset_in_r0;
5612 if (pre_dec != NULL_RTX)
5614 if (! sp_in_r0)
5616 emit_move_insn (r0,
5617 gen_rtx_PLUS
5618 (Pmode, r0, stack_pointer_rtx));
5619 sp_in_r0 = 1;
5622 offset -= GET_MODE_SIZE (mode);
5623 offset_in_r0 -= GET_MODE_SIZE (mode);
5625 mem_rtx = pre_dec;
5627 else if (sp_in_r0)
5628 mem_rtx = gen_rtx_MEM (mode, r0);
5629 else
5630 mem_rtx = gen_rtx_MEM (mode,
5631 gen_rtx_PLUS (Pmode,
5632 stack_pointer_rtx,
5633 r0));
5635 /* We must not use an r0-based address for target-branch
5636 registers or for special registers without pre-dec
5637 memory addresses, since we store their values in r0
5638 first. */
5639 if (TARGET_REGISTER_P (reg)
5640 || ((reg == PR_REG || SPECIAL_REGISTER_P (reg))
5641 && mem_rtx != pre_dec))
5642 abort ();
5644 addr_ok:
5645 if (TARGET_REGISTER_P (reg)
5646 || ((reg == PR_REG || SPECIAL_REGISTER_P (reg))
5647 && mem_rtx != pre_dec))
5649 rtx tmp_reg = gen_rtx_REG (GET_MODE (reg_rtx), *tmp_pnt);
5651 emit_move_insn (tmp_reg, reg_rtx);
5653 if (REGNO (tmp_reg) == R0_REG)
5655 offset_in_r0 = -1;
5656 sp_in_r0 = 0;
5657 if (refers_to_regno_p (R0_REG, R0_REG+1, mem_rtx, (rtx *) 0))
5658 abort ();
5661 if (*++tmp_pnt <= 0)
5662 tmp_pnt = schedule.temps;
5664 reg_rtx = tmp_reg;
5667 rtx insn;
5669 /* Mark as interesting for dwarf cfi generator */
5670 insn = emit_move_insn (mem_rtx, reg_rtx);
5671 RTX_FRAME_RELATED_P (insn) = 1;
5673 if (TARGET_SHCOMPACT && (offset_in_r0 != -1))
5675 rtx reg_rtx = gen_rtx_REG (mode, reg);
5676 rtx set, note_rtx;
5677 rtx mem_rtx = gen_rtx_MEM (mode,
5678 gen_rtx_PLUS (Pmode,
5679 stack_pointer_rtx,
5680 GEN_INT (offset)));
5682 set = gen_rtx_SET (VOIDmode, mem_rtx, reg_rtx);
5683 note_rtx = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, set,
5684 REG_NOTES (insn));
5685 REG_NOTES (insn) = note_rtx;
5690 if (entry->offset != d_rounding)
5691 abort ();
5693 else
5694 push_regs (&live_regs_mask, current_function_interrupt);
5696 if (flag_pic && regs_ever_live[PIC_OFFSET_TABLE_REGNUM])
5698 rtx insn = get_last_insn ();
5699 rtx last = emit_insn (gen_GOTaddr2picreg ());
5701 /* Mark these insns as possibly dead. Sometimes, flow2 may
5702 delete all uses of the PIC register. In this case, let it
5703 delete the initialization too. */
5706 insn = NEXT_INSN (insn);
5708 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD,
5709 const0_rtx,
5710 REG_NOTES (insn));
5712 while (insn != last);
5715 if (SHMEDIA_REGS_STACK_ADJUST ())
5717 emit_move_insn (gen_rtx_REG (Pmode, R0_REG),
5718 function_symbol (TARGET_FPU_ANY
5719 ? "__GCC_push_shmedia_regs"
5720 : "__GCC_push_shmedia_regs_nofpu"));
5721 /* This must NOT go through the PLT, otherwise mach and macl
5722 may be clobbered. */
5723 emit_insn (gen_shmedia_save_restore_regs_compact
5724 (GEN_INT (-SHMEDIA_REGS_STACK_ADJUST ())));
5727 if (target_flags != save_flags && ! current_function_interrupt)
5729 rtx insn = emit_insn (gen_toggle_sz ());
5731 /* If we're lucky, a mode switch in the function body will
5732 overwrite fpscr, turning this insn dead. Tell flow this
5733 insn is ok to delete. */
5734 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD,
5735 const0_rtx,
5736 REG_NOTES (insn));
5739 target_flags = save_flags;
5741 output_stack_adjust (-rounded_frame_size (d) + d_rounding,
5742 stack_pointer_rtx, 0, NULL);
5744 if (frame_pointer_needed)
5745 frame_insn (GEN_MOV (frame_pointer_rtx, stack_pointer_rtx));
5747 if (TARGET_SHCOMPACT
5748 && (current_function_args_info.call_cookie & ~ CALL_COOKIE_RET_TRAMP(1)))
5750 /* This must NOT go through the PLT, otherwise mach and macl
5751 may be clobbered. */
5752 emit_move_insn (gen_rtx_REG (Pmode, R0_REG),
5753 function_symbol ("__GCC_shcompact_incoming_args"));
5754 emit_insn (gen_shcompact_incoming_args ());
5758 void
5759 sh_expand_epilogue (bool sibcall_p)
5761 HARD_REG_SET live_regs_mask;
5762 int d, i;
5763 int d_rounding = 0;
5765 int save_flags = target_flags;
5766 int frame_size, save_size;
5767 int fpscr_deferred = 0;
5768 int e = sibcall_p ? -1 : 1;
5770 d = calc_live_regs (&live_regs_mask);
5772 save_size = d;
5773 frame_size = rounded_frame_size (d);
5775 if (TARGET_SH5)
5777 int tregs_space = shmedia_target_regs_stack_adjust (&live_regs_mask);
5778 int total_size;
5779 if (d % (STACK_BOUNDARY / BITS_PER_UNIT))
5780 d_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
5781 - d % (STACK_BOUNDARY / BITS_PER_UNIT));
5783 total_size = d + tregs_space;
5784 total_size += rounded_frame_size (total_size);
5785 save_size = total_size - frame_size;
5787 /* If adjusting the stack in a single step costs nothing extra, do so.
5788 I.e. either if a single addi is enough, or we need a movi anyway,
5789 and we don't exceed the maximum offset range (the test for the
5790 latter is conservative for simplicity). */
5791 if (TARGET_SHMEDIA
5792 && ! frame_pointer_needed
5793 && (CONST_OK_FOR_I10 (total_size)
5794 || (! CONST_OK_FOR_I10 (save_size + d_rounding)
5795 && total_size <= 2044)))
5796 d_rounding = frame_size;
5798 frame_size -= d_rounding;
5801 if (frame_pointer_needed)
5803 output_stack_adjust (frame_size, frame_pointer_rtx, e, &live_regs_mask);
5805 /* We must avoid moving the stack pointer adjustment past code
5806 which reads from the local frame, else an interrupt could
5807 occur after the SP adjustment and clobber data in the local
5808 frame. */
5809 emit_insn (gen_blockage ());
5810 emit_insn (GEN_MOV (stack_pointer_rtx, frame_pointer_rtx));
5812 else if (frame_size)
5814 /* We must avoid moving the stack pointer adjustment past code
5815 which reads from the local frame, else an interrupt could
5816 occur after the SP adjustment and clobber data in the local
5817 frame. */
5818 emit_insn (gen_blockage ());
5819 output_stack_adjust (frame_size, stack_pointer_rtx, e, &live_regs_mask);
5822 if (SHMEDIA_REGS_STACK_ADJUST ())
5824 emit_move_insn (gen_rtx_REG (Pmode, R0_REG),
5825 function_symbol (TARGET_FPU_ANY
5826 ? "__GCC_pop_shmedia_regs"
5827 : "__GCC_pop_shmedia_regs_nofpu"));
5828 /* This must NOT go through the PLT, otherwise mach and macl
5829 may be clobbered. */
5830 emit_insn (gen_shmedia_save_restore_regs_compact
5831 (GEN_INT (SHMEDIA_REGS_STACK_ADJUST ())));
5834 /* Pop all the registers. */
5836 if (target_flags != save_flags && ! current_function_interrupt)
5837 emit_insn (gen_toggle_sz ());
5838 if (TARGET_SH5)
5840 int offset_base, offset;
5841 int offset_in_r0 = -1;
5842 int sp_in_r0 = 0;
5843 rtx r0 = gen_rtx_REG (Pmode, R0_REG);
5844 save_schedule schedule;
5845 save_entry *entry;
5846 int *tmp_pnt;
5848 entry = sh5_schedule_saves (&live_regs_mask, &schedule, d_rounding);
5849 offset_base = -entry[1].offset + d_rounding;
5850 tmp_pnt = schedule.temps;
5851 for (; entry->mode != VOIDmode; entry--)
5853 enum machine_mode mode = entry->mode;
5854 int reg = entry->reg;
5855 rtx reg_rtx, mem_rtx, post_inc = NULL_RTX, insn;
5857 offset = offset_base + entry->offset;
5858 reg_rtx = gen_rtx_REG (mode, reg);
5860 mem_rtx = gen_rtx_MEM (mode,
5861 gen_rtx_PLUS (Pmode,
5862 stack_pointer_rtx,
5863 GEN_INT (offset)));
5865 GO_IF_LEGITIMATE_ADDRESS (mode, XEXP (mem_rtx, 0), try_post_inc);
5867 mem_rtx = NULL_RTX;
5869 try_post_inc:
5871 if (HAVE_POST_INCREMENT
5872 && (offset == offset_in_r0
5873 || (offset + GET_MODE_SIZE (mode) != d + d_rounding
5874 && mem_rtx == NULL_RTX)
5875 || reg == PR_REG || SPECIAL_REGISTER_P (reg)))
5877 post_inc = gen_rtx_MEM (mode,
5878 gen_rtx_POST_INC (Pmode, r0));
5880 GO_IF_LEGITIMATE_ADDRESS (mode, XEXP (post_inc, 0),
5881 post_inc_ok);
5883 post_inc = NULL_RTX;
5885 break;
5887 post_inc_ok:
5888 mem_rtx = NULL_RTX;
5890 while (0);
5892 if (mem_rtx != NULL_RTX)
5893 goto addr_ok;
5895 if (offset_in_r0 == -1)
5897 emit_move_insn (r0, GEN_INT (offset));
5898 offset_in_r0 = offset;
5900 else if (offset != offset_in_r0)
5902 emit_move_insn (r0,
5903 gen_rtx_PLUS
5904 (Pmode, r0,
5905 GEN_INT (offset - offset_in_r0)));
5906 offset_in_r0 += offset - offset_in_r0;
5909 if (post_inc != NULL_RTX)
5911 if (! sp_in_r0)
5913 emit_move_insn (r0,
5914 gen_rtx_PLUS
5915 (Pmode, r0, stack_pointer_rtx));
5916 sp_in_r0 = 1;
5919 mem_rtx = post_inc;
5921 offset_in_r0 += GET_MODE_SIZE (mode);
5923 else if (sp_in_r0)
5924 mem_rtx = gen_rtx_MEM (mode, r0);
5925 else
5926 mem_rtx = gen_rtx_MEM (mode,
5927 gen_rtx_PLUS (Pmode,
5928 stack_pointer_rtx,
5929 r0));
5931 if ((reg == PR_REG || SPECIAL_REGISTER_P (reg))
5932 && mem_rtx != post_inc)
5933 abort ();
5935 addr_ok:
5936 if ((reg == PR_REG || SPECIAL_REGISTER_P (reg))
5937 && mem_rtx != post_inc)
5939 insn = emit_move_insn (r0, mem_rtx);
5940 mem_rtx = r0;
5942 else if (TARGET_REGISTER_P (reg))
5944 rtx tmp_reg = gen_rtx_REG (mode, *tmp_pnt);
5946 /* Give the scheduler a bit of freedom by using up to
5947 MAX_TEMPS registers in a round-robin fashion. */
5948 insn = emit_move_insn (tmp_reg, mem_rtx);
5949 mem_rtx = tmp_reg;
5950 if (*++tmp_pnt < 0)
5951 tmp_pnt = schedule.temps;
5954 insn = emit_move_insn (reg_rtx, mem_rtx);
5955 if (reg == PR_MEDIA_REG && sh_media_register_for_return () >= 0)
5956 /* This is dead, unless we return with a sibcall. */
5957 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD,
5958 const0_rtx,
5959 REG_NOTES (insn));
5962 if (entry->offset + offset_base != d + d_rounding)
5963 abort ();
5965 else /* ! TARGET_SH5 */
5967 save_size = 0;
5968 if (TEST_HARD_REG_BIT (live_regs_mask, PR_REG))
5969 pop (PR_REG);
5970 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
5972 int j = (FIRST_PSEUDO_REGISTER - 1) - i;
5974 if (j == FPSCR_REG && current_function_interrupt && TARGET_FMOVD
5975 && hard_regs_intersect_p (&live_regs_mask,
5976 &reg_class_contents[DF_REGS]))
5977 fpscr_deferred = 1;
5978 else if (j != PR_REG && TEST_HARD_REG_BIT (live_regs_mask, j))
5979 pop (j);
5980 if (j == FIRST_FP_REG && fpscr_deferred)
5981 pop (FPSCR_REG);
5985 if (target_flags != save_flags && ! current_function_interrupt)
5986 emit_insn (gen_toggle_sz ());
5987 target_flags = save_flags;
5989 output_stack_adjust (current_function_pretend_args_size
5990 + save_size + d_rounding
5991 + current_function_args_info.stack_regs * 8,
5992 stack_pointer_rtx, e, NULL);
5994 if (current_function_calls_eh_return)
5995 emit_insn (GEN_ADD3 (stack_pointer_rtx, stack_pointer_rtx,
5996 EH_RETURN_STACKADJ_RTX));
5998 /* Switch back to the normal stack if necessary. */
5999 if (sp_switch)
6000 emit_insn (gen_sp_switch_2 ());
6002 /* Tell flow the insn that pops PR isn't dead. */
6003 /* PR_REG will never be live in SHmedia mode, and we don't need to
6004 USE PR_MEDIA_REG, since it will be explicitly copied to TR0_REG
6005 by the return pattern. */
6006 if (TEST_HARD_REG_BIT (live_regs_mask, PR_REG))
6007 emit_insn (gen_rtx_USE (VOIDmode, gen_rtx_REG (SImode, PR_REG)));
6010 static int sh_need_epilogue_known = 0;
6013 sh_need_epilogue (void)
6015 if (! sh_need_epilogue_known)
6017 rtx epilogue;
6019 start_sequence ();
6020 sh_expand_epilogue (0);
6021 epilogue = get_insns ();
6022 end_sequence ();
6023 sh_need_epilogue_known = (epilogue == NULL ? -1 : 1);
6025 return sh_need_epilogue_known > 0;
6028 /* Emit code to change the current function's return address to RA.
6029 TEMP is available as a scratch register, if needed. */
6031 void
6032 sh_set_return_address (rtx ra, rtx tmp)
6034 HARD_REG_SET live_regs_mask;
6035 int d;
6036 int pr_reg = TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG;
6037 int pr_offset;
6039 d = calc_live_regs (&live_regs_mask);
6041 /* If pr_reg isn't life, we can set it (or the register given in
6042 sh_media_register_for_return) directly. */
6043 if (! TEST_HARD_REG_BIT (live_regs_mask, pr_reg))
6045 rtx rr;
6047 if (TARGET_SHMEDIA)
6049 int rr_regno = sh_media_register_for_return ();
6051 if (rr_regno < 0)
6052 rr_regno = pr_reg;
6054 rr = gen_rtx_REG (DImode, rr_regno);
6056 else
6057 rr = gen_rtx_REG (SImode, pr_reg);
6059 emit_insn (GEN_MOV (rr, ra));
6060 /* Tell flow the register for return isn't dead. */
6061 emit_insn (gen_rtx_USE (VOIDmode, rr));
6062 return;
6065 if (TARGET_SH5)
6067 int offset;
6068 save_schedule schedule;
6069 save_entry *entry;
6071 entry = sh5_schedule_saves (&live_regs_mask, &schedule, 0);
6072 offset = entry[1].offset;
6073 for (; entry->mode != VOIDmode; entry--)
6074 if (entry->reg == pr_reg)
6075 goto found;
6077 /* We can't find pr register. */
6078 abort ();
6080 found:
6081 offset = entry->offset - offset;
6082 pr_offset = (rounded_frame_size (d) + offset
6083 + SHMEDIA_REGS_STACK_ADJUST ());
6085 else
6086 pr_offset = rounded_frame_size (d);
6088 emit_insn (GEN_MOV (tmp, GEN_INT (pr_offset)));
6089 emit_insn (GEN_ADD3 (tmp, tmp, frame_pointer_rtx));
6091 tmp = gen_rtx_MEM (Pmode, tmp);
6092 emit_insn (GEN_MOV (tmp, ra));
6095 /* Clear variables at function end. */
6097 static void
6098 sh_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
6099 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
6101 trap_exit = pragma_interrupt = pragma_trapa = pragma_nosave_low_regs = 0;
6102 sh_need_epilogue_known = 0;
6103 sp_switch = NULL_RTX;
6106 static rtx
6107 sh_builtin_saveregs (void)
6109 /* First unnamed integer register. */
6110 int first_intreg = current_function_args_info.arg_count[(int) SH_ARG_INT];
6111 /* Number of integer registers we need to save. */
6112 int n_intregs = MAX (0, NPARM_REGS (SImode) - first_intreg);
6113 /* First unnamed SFmode float reg */
6114 int first_floatreg = current_function_args_info.arg_count[(int) SH_ARG_FLOAT];
6115 /* Number of SFmode float regs to save. */
6116 int n_floatregs = MAX (0, NPARM_REGS (SFmode) - first_floatreg);
6117 rtx regbuf, fpregs;
6118 int bufsize, regno;
6119 HOST_WIDE_INT alias_set;
6121 if (TARGET_SH5)
6123 if (n_intregs)
6125 int pushregs = n_intregs;
6127 while (pushregs < NPARM_REGS (SImode) - 1
6128 && (CALL_COOKIE_INT_REG_GET
6129 (current_function_args_info.call_cookie,
6130 NPARM_REGS (SImode) - pushregs)
6131 == 1))
6133 current_function_args_info.call_cookie
6134 &= ~ CALL_COOKIE_INT_REG (NPARM_REGS (SImode)
6135 - pushregs, 1);
6136 pushregs++;
6139 if (pushregs == NPARM_REGS (SImode))
6140 current_function_args_info.call_cookie
6141 |= (CALL_COOKIE_INT_REG (0, 1)
6142 | CALL_COOKIE_STACKSEQ (pushregs - 1));
6143 else
6144 current_function_args_info.call_cookie
6145 |= CALL_COOKIE_STACKSEQ (pushregs);
6147 current_function_pretend_args_size += 8 * n_intregs;
6149 if (TARGET_SHCOMPACT)
6150 return const0_rtx;
6153 if (! TARGET_SH2E && ! TARGET_SH4 && ! TARGET_SH5)
6155 error ("__builtin_saveregs not supported by this subtarget");
6156 return const0_rtx;
6159 if (TARGET_SHMEDIA)
6160 n_floatregs = 0;
6162 /* Allocate block of memory for the regs. */
6163 /* ??? If n_intregs + n_floatregs == 0, should we allocate at least 1 byte?
6164 Or can assign_stack_local accept a 0 SIZE argument? */
6165 bufsize = (n_intregs * UNITS_PER_WORD) + (n_floatregs * UNITS_PER_WORD);
6167 if (TARGET_SHMEDIA)
6168 regbuf = gen_rtx_MEM (BLKmode,
6169 gen_rtx_REG (Pmode, ARG_POINTER_REGNUM));
6170 else if (n_floatregs & 1)
6172 rtx addr;
6174 regbuf = assign_stack_local (BLKmode, bufsize + UNITS_PER_WORD, 0);
6175 addr = copy_to_mode_reg (Pmode, XEXP (regbuf, 0));
6176 emit_insn (gen_iorsi3 (addr, addr, GEN_INT (UNITS_PER_WORD)));
6177 regbuf = change_address (regbuf, BLKmode, addr);
6179 else
6180 regbuf = assign_stack_local (BLKmode, bufsize, 0);
6181 alias_set = get_varargs_alias_set ();
6182 set_mem_alias_set (regbuf, alias_set);
6184 /* Save int args.
6185 This is optimized to only save the regs that are necessary. Explicitly
6186 named args need not be saved. */
6187 if (n_intregs > 0)
6188 move_block_from_reg (BASE_ARG_REG (SImode) + first_intreg,
6189 adjust_address (regbuf, BLKmode,
6190 n_floatregs * UNITS_PER_WORD),
6191 n_intregs);
6193 if (TARGET_SHMEDIA)
6194 /* Return the address of the regbuf. */
6195 return XEXP (regbuf, 0);
6197 /* Save float args.
6198 This is optimized to only save the regs that are necessary. Explicitly
6199 named args need not be saved.
6200 We explicitly build a pointer to the buffer because it halves the insn
6201 count when not optimizing (otherwise the pointer is built for each reg
6202 saved).
6203 We emit the moves in reverse order so that we can use predecrement. */
6205 fpregs = gen_reg_rtx (Pmode);
6206 emit_move_insn (fpregs, XEXP (regbuf, 0));
6207 emit_insn (gen_addsi3 (fpregs, fpregs,
6208 GEN_INT (n_floatregs * UNITS_PER_WORD)));
6209 if (TARGET_SH4 || TARGET_SH2A_DOUBLE)
6211 rtx mem;
6212 for (regno = NPARM_REGS (DFmode) - 2; regno >= first_floatreg; regno -= 2)
6214 emit_insn (gen_addsi3 (fpregs, fpregs,
6215 GEN_INT (-2 * UNITS_PER_WORD)));
6216 mem = gen_rtx_MEM (DFmode, fpregs);
6217 set_mem_alias_set (mem, alias_set);
6218 emit_move_insn (mem,
6219 gen_rtx_REG (DFmode, BASE_ARG_REG (DFmode) + regno));
6221 regno = first_floatreg;
6222 if (regno & 1)
6224 emit_insn (gen_addsi3 (fpregs, fpregs, GEN_INT (-UNITS_PER_WORD)));
6225 mem = gen_rtx_MEM (SFmode, fpregs);
6226 set_mem_alias_set (mem, alias_set);
6227 emit_move_insn (mem,
6228 gen_rtx_REG (SFmode, BASE_ARG_REG (SFmode) + regno
6229 - (TARGET_LITTLE_ENDIAN != 0)));
6232 else
6233 for (regno = NPARM_REGS (SFmode) - 1; regno >= first_floatreg; regno--)
6235 rtx mem;
6237 emit_insn (gen_addsi3 (fpregs, fpregs, GEN_INT (-UNITS_PER_WORD)));
6238 mem = gen_rtx_MEM (SFmode, fpregs);
6239 set_mem_alias_set (mem, alias_set);
6240 emit_move_insn (mem,
6241 gen_rtx_REG (SFmode, BASE_ARG_REG (SFmode) + regno));
6244 /* Return the address of the regbuf. */
6245 return XEXP (regbuf, 0);
6248 /* Define the `__builtin_va_list' type for the ABI. */
6250 static tree
6251 sh_build_builtin_va_list (void)
6253 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
6254 tree record;
6256 if (TARGET_SH5 || (! TARGET_SH2E && ! TARGET_SH4)
6257 || TARGET_HITACHI || sh_cfun_attr_renesas_p ())
6258 return ptr_type_node;
6260 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
6262 f_next_o = build_decl (FIELD_DECL, get_identifier ("__va_next_o"),
6263 ptr_type_node);
6264 f_next_o_limit = build_decl (FIELD_DECL,
6265 get_identifier ("__va_next_o_limit"),
6266 ptr_type_node);
6267 f_next_fp = build_decl (FIELD_DECL, get_identifier ("__va_next_fp"),
6268 ptr_type_node);
6269 f_next_fp_limit = build_decl (FIELD_DECL,
6270 get_identifier ("__va_next_fp_limit"),
6271 ptr_type_node);
6272 f_next_stack = build_decl (FIELD_DECL, get_identifier ("__va_next_stack"),
6273 ptr_type_node);
6275 DECL_FIELD_CONTEXT (f_next_o) = record;
6276 DECL_FIELD_CONTEXT (f_next_o_limit) = record;
6277 DECL_FIELD_CONTEXT (f_next_fp) = record;
6278 DECL_FIELD_CONTEXT (f_next_fp_limit) = record;
6279 DECL_FIELD_CONTEXT (f_next_stack) = record;
6281 TYPE_FIELDS (record) = f_next_o;
6282 TREE_CHAIN (f_next_o) = f_next_o_limit;
6283 TREE_CHAIN (f_next_o_limit) = f_next_fp;
6284 TREE_CHAIN (f_next_fp) = f_next_fp_limit;
6285 TREE_CHAIN (f_next_fp_limit) = f_next_stack;
6287 layout_type (record);
6289 return record;
6292 /* Implement `va_start' for varargs and stdarg. */
6294 void
6295 sh_va_start (tree valist, rtx nextarg)
6297 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
6298 tree next_o, next_o_limit, next_fp, next_fp_limit, next_stack;
6299 tree t, u;
6300 int nfp, nint;
6302 if (TARGET_SH5)
6304 expand_builtin_saveregs ();
6305 std_expand_builtin_va_start (valist, nextarg);
6306 return;
6309 if ((! TARGET_SH2E && ! TARGET_SH4)
6310 || TARGET_HITACHI || sh_cfun_attr_renesas_p ())
6312 std_expand_builtin_va_start (valist, nextarg);
6313 return;
6316 f_next_o = TYPE_FIELDS (va_list_type_node);
6317 f_next_o_limit = TREE_CHAIN (f_next_o);
6318 f_next_fp = TREE_CHAIN (f_next_o_limit);
6319 f_next_fp_limit = TREE_CHAIN (f_next_fp);
6320 f_next_stack = TREE_CHAIN (f_next_fp_limit);
6322 next_o = build (COMPONENT_REF, TREE_TYPE (f_next_o), valist, f_next_o,
6323 NULL_TREE);
6324 next_o_limit = build (COMPONENT_REF, TREE_TYPE (f_next_o_limit),
6325 valist, f_next_o_limit, NULL_TREE);
6326 next_fp = build (COMPONENT_REF, TREE_TYPE (f_next_fp), valist, f_next_fp,
6327 NULL_TREE);
6328 next_fp_limit = build (COMPONENT_REF, TREE_TYPE (f_next_fp_limit),
6329 valist, f_next_fp_limit, NULL_TREE);
6330 next_stack = build (COMPONENT_REF, TREE_TYPE (f_next_stack),
6331 valist, f_next_stack, NULL_TREE);
6333 /* Call __builtin_saveregs. */
6334 u = make_tree (ptr_type_node, expand_builtin_saveregs ());
6335 t = build (MODIFY_EXPR, ptr_type_node, next_fp, u);
6336 TREE_SIDE_EFFECTS (t) = 1;
6337 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6339 nfp = current_function_args_info.arg_count[SH_ARG_FLOAT];
6340 if (nfp < 8)
6341 nfp = 8 - nfp;
6342 else
6343 nfp = 0;
6344 u = fold (build (PLUS_EXPR, ptr_type_node, u,
6345 build_int_2 (UNITS_PER_WORD * nfp, 0)));
6346 t = build (MODIFY_EXPR, ptr_type_node, next_fp_limit, u);
6347 TREE_SIDE_EFFECTS (t) = 1;
6348 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6350 t = build (MODIFY_EXPR, ptr_type_node, next_o, u);
6351 TREE_SIDE_EFFECTS (t) = 1;
6352 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6354 nint = current_function_args_info.arg_count[SH_ARG_INT];
6355 if (nint < 4)
6356 nint = 4 - nint;
6357 else
6358 nint = 0;
6359 u = fold (build (PLUS_EXPR, ptr_type_node, u,
6360 build_int_2 (UNITS_PER_WORD * nint, 0)));
6361 t = build (MODIFY_EXPR, ptr_type_node, next_o_limit, u);
6362 TREE_SIDE_EFFECTS (t) = 1;
6363 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6365 u = make_tree (ptr_type_node, nextarg);
6366 t = build (MODIFY_EXPR, ptr_type_node, next_stack, u);
6367 TREE_SIDE_EFFECTS (t) = 1;
6368 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6371 /* Implement `va_arg'. */
6373 static tree
6374 sh_gimplify_va_arg_expr (tree valist, tree type, tree *pre_p,
6375 tree *post_p ATTRIBUTE_UNUSED)
6377 HOST_WIDE_INT size, rsize;
6378 tree tmp, pptr_type_node;
6379 tree addr, lab_over, result = NULL;
6380 int pass_by_ref = pass_by_reference (NULL, TYPE_MODE (type), type, false);
6382 if (pass_by_ref)
6383 type = build_pointer_type (type);
6385 size = int_size_in_bytes (type);
6386 rsize = (size + UNITS_PER_WORD - 1) & -UNITS_PER_WORD;
6387 pptr_type_node = build_pointer_type (ptr_type_node);
6389 if (! TARGET_SH5 && (TARGET_SH2E || TARGET_SH4)
6390 && ! (TARGET_HITACHI || sh_cfun_attr_renesas_p ()))
6392 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
6393 tree next_o, next_o_limit, next_fp, next_fp_limit, next_stack;
6394 int pass_as_float;
6395 tree lab_false;
6397 f_next_o = TYPE_FIELDS (va_list_type_node);
6398 f_next_o_limit = TREE_CHAIN (f_next_o);
6399 f_next_fp = TREE_CHAIN (f_next_o_limit);
6400 f_next_fp_limit = TREE_CHAIN (f_next_fp);
6401 f_next_stack = TREE_CHAIN (f_next_fp_limit);
6403 next_o = build (COMPONENT_REF, TREE_TYPE (f_next_o), valist, f_next_o,
6404 NULL_TREE);
6405 next_o_limit = build (COMPONENT_REF, TREE_TYPE (f_next_o_limit),
6406 valist, f_next_o_limit, NULL_TREE);
6407 next_fp = build (COMPONENT_REF, TREE_TYPE (f_next_fp),
6408 valist, f_next_fp, NULL_TREE);
6409 next_fp_limit = build (COMPONENT_REF, TREE_TYPE (f_next_fp_limit),
6410 valist, f_next_fp_limit, NULL_TREE);
6411 next_stack = build (COMPONENT_REF, TREE_TYPE (f_next_stack),
6412 valist, f_next_stack, NULL_TREE);
6414 /* Structures with a single member with a distinct mode are passed
6415 like their member. This is relevant if the latter has a REAL_TYPE
6416 or COMPLEX_TYPE type. */
6417 if (TREE_CODE (type) == RECORD_TYPE
6418 && TYPE_FIELDS (type)
6419 && TREE_CODE (TYPE_FIELDS (type)) == FIELD_DECL
6420 && (TREE_CODE (TREE_TYPE (TYPE_FIELDS (type))) == REAL_TYPE
6421 || TREE_CODE (TREE_TYPE (TYPE_FIELDS (type))) == COMPLEX_TYPE)
6422 && TREE_CHAIN (TYPE_FIELDS (type)) == NULL_TREE)
6423 type = TREE_TYPE (TYPE_FIELDS (type));
6425 if (TARGET_SH4)
6427 pass_as_float = ((TREE_CODE (type) == REAL_TYPE && size <= 8)
6428 || (TREE_CODE (type) == COMPLEX_TYPE
6429 && TREE_CODE (TREE_TYPE (type)) == REAL_TYPE
6430 && size <= 16));
6432 else
6434 pass_as_float = (TREE_CODE (type) == REAL_TYPE && size == 4);
6437 addr = create_tmp_var (pptr_type_node, NULL);
6438 lab_false = create_artificial_label ();
6439 lab_over = create_artificial_label ();
6441 valist = build1 (INDIRECT_REF, ptr_type_node, addr);
6443 if (pass_as_float)
6445 int first_floatreg
6446 = current_function_args_info.arg_count[(int) SH_ARG_FLOAT];
6447 int n_floatregs = MAX (0, NPARM_REGS (SFmode) - first_floatreg);
6449 tmp = build (GE_EXPR, boolean_type_node, next_fp, next_fp_limit);
6450 tmp = build (COND_EXPR, void_type_node, tmp,
6451 build (GOTO_EXPR, void_type_node, lab_false),
6452 NULL);
6453 gimplify_and_add (tmp, pre_p);
6455 if (TYPE_ALIGN (type) > BITS_PER_WORD
6456 || (((TREE_CODE (type) == REAL_TYPE && size == 8) || size == 16)
6457 && (n_floatregs & 1)))
6459 tmp = fold_convert (ptr_type_node, size_int (UNITS_PER_WORD));
6460 tmp = build (BIT_AND_EXPR, ptr_type_node, next_fp, tmp);
6461 tmp = build (PLUS_EXPR, ptr_type_node, next_fp, tmp);
6462 tmp = build (MODIFY_EXPR, ptr_type_node, next_fp, tmp);
6463 gimplify_and_add (tmp, pre_p);
6466 tmp = build1 (ADDR_EXPR, pptr_type_node, next_fp);
6467 tmp = build (MODIFY_EXPR, void_type_node, addr, tmp);
6468 gimplify_and_add (tmp, pre_p);
6470 #ifdef FUNCTION_ARG_SCmode_WART
6471 if (TYPE_MODE (type) == SCmode && TARGET_SH4 && TARGET_LITTLE_ENDIAN)
6473 tree subtype = TREE_TYPE (type);
6474 tree real, imag;
6476 imag = std_gimplify_va_arg_expr (valist, subtype, pre_p, NULL);
6477 imag = get_initialized_tmp_var (imag, pre_p, NULL);
6479 real = std_gimplify_va_arg_expr (valist, subtype, pre_p, NULL);
6480 real = get_initialized_tmp_var (real, pre_p, NULL);
6482 result = build (COMPLEX_EXPR, type, real, imag);
6483 result = get_initialized_tmp_var (result, pre_p, NULL);
6485 #endif /* FUNCTION_ARG_SCmode_WART */
6487 tmp = build (GOTO_EXPR, void_type_node, lab_over);
6488 gimplify_and_add (tmp, pre_p);
6490 tmp = build (LABEL_EXPR, void_type_node, lab_false);
6491 gimplify_and_add (tmp, pre_p);
6493 tmp = build1 (ADDR_EXPR, pptr_type_node, next_stack);
6494 tmp = build (MODIFY_EXPR, void_type_node, addr, tmp);
6495 gimplify_and_add (tmp, pre_p);
6497 else
6499 tmp = fold_convert (ptr_type_node, size_int (rsize));
6500 tmp = build (PLUS_EXPR, ptr_type_node, next_o, tmp);
6501 tmp = build (GT_EXPR, boolean_type_node, tmp, next_o_limit);
6502 tmp = build (COND_EXPR, void_type_node, tmp,
6503 build (GOTO_EXPR, void_type_node, lab_false),
6504 NULL);
6505 gimplify_and_add (tmp, pre_p);
6507 tmp = build1 (ADDR_EXPR, pptr_type_node, next_o);
6508 tmp = build (MODIFY_EXPR, void_type_node, addr, tmp);
6509 gimplify_and_add (tmp, pre_p);
6511 tmp = build (GOTO_EXPR, void_type_node, lab_over);
6512 gimplify_and_add (tmp, pre_p);
6514 tmp = build (LABEL_EXPR, void_type_node, lab_false);
6515 gimplify_and_add (tmp, pre_p);
6517 if (size > 4 && ! TARGET_SH4)
6519 tmp = build (MODIFY_EXPR, ptr_type_node, next_o, next_o_limit);
6520 gimplify_and_add (tmp, pre_p);
6523 tmp = build1 (ADDR_EXPR, pptr_type_node, next_stack);
6524 tmp = build (MODIFY_EXPR, void_type_node, addr, tmp);
6525 gimplify_and_add (tmp, pre_p);
6528 if (!result)
6530 tmp = build (LABEL_EXPR, void_type_node, lab_over);
6531 gimplify_and_add (tmp, pre_p);
6535 /* ??? In va-sh.h, there had been code to make values larger than
6536 size 8 indirect. This does not match the FUNCTION_ARG macros. */
6538 tmp = std_gimplify_va_arg_expr (valist, type, pre_p, NULL);
6539 if (result)
6541 tmp = build (MODIFY_EXPR, void_type_node, result, tmp);
6542 gimplify_and_add (tmp, pre_p);
6544 tmp = build (LABEL_EXPR, void_type_node, lab_over);
6545 gimplify_and_add (tmp, pre_p);
6547 else
6548 result = tmp;
6550 if (pass_by_ref)
6551 result = build_fold_indirect_ref (result);
6553 return result;
6556 bool
6557 sh_promote_prototypes (tree type)
6559 if (TARGET_HITACHI)
6560 return 0;
6561 if (! type)
6562 return 1;
6563 return ! sh_attr_renesas_p (type);
6566 /* Whether an argument must be passed by reference. On SHcompact, we
6567 pretend arguments wider than 32-bits that would have been passed in
6568 registers are passed by reference, so that an SHmedia trampoline
6569 loads them into the full 64-bits registers. */
6571 static int
6572 shcompact_byref (CUMULATIVE_ARGS *cum, enum machine_mode mode,
6573 tree type, bool named)
6575 unsigned HOST_WIDE_INT size;
6577 if (type)
6578 size = int_size_in_bytes (type);
6579 else
6580 size = GET_MODE_SIZE (mode);
6582 if (cum->arg_count[SH_ARG_INT] < NPARM_REGS (SImode)
6583 && (!named
6584 || GET_SH_ARG_CLASS (mode) == SH_ARG_INT
6585 || (GET_SH_ARG_CLASS (mode) == SH_ARG_FLOAT
6586 && cum->arg_count[SH_ARG_FLOAT] >= NPARM_REGS (SFmode)))
6587 && size > 4
6588 && !SHCOMPACT_FORCE_ON_STACK (mode, type)
6589 && !SH5_WOULD_BE_PARTIAL_NREGS (*cum, mode, type, named))
6590 return size;
6591 else
6592 return 0;
6595 static bool
6596 sh_pass_by_reference (CUMULATIVE_ARGS *cum, enum machine_mode mode,
6597 tree type, bool named)
6599 if (targetm.calls.must_pass_in_stack (mode, type))
6600 return true;
6602 if (TARGET_SHCOMPACT)
6604 cum->byref = shcompact_byref (cum, mode, type, named);
6605 return cum->byref != 0;
6608 return false;
6611 /* Define where to put the arguments to a function.
6612 Value is zero to push the argument on the stack,
6613 or a hard register in which to store the argument.
6615 MODE is the argument's machine mode.
6616 TYPE is the data type of the argument (as a tree).
6617 This is null for libcalls where that information may
6618 not be available.
6619 CUM is a variable of type CUMULATIVE_ARGS which gives info about
6620 the preceding args and about the function being called.
6621 NAMED is nonzero if this argument is a named parameter
6622 (otherwise it is an extra parameter matching an ellipsis).
6624 On SH the first args are normally in registers
6625 and the rest are pushed. Any arg that starts within the first
6626 NPARM_REGS words is at least partially passed in a register unless
6627 its data type forbids. */
6631 sh_function_arg (CUMULATIVE_ARGS *ca, enum machine_mode mode,
6632 tree type, int named)
6634 if (! TARGET_SH5 && mode == VOIDmode)
6635 return GEN_INT (ca->renesas_abi ? 1 : 0);
6637 if (! TARGET_SH5
6638 && PASS_IN_REG_P (*ca, mode, type)
6639 && (named || ! (TARGET_HITACHI || ca->renesas_abi)))
6641 int regno;
6643 if (mode == SCmode && TARGET_SH4 && TARGET_LITTLE_ENDIAN
6644 && (! FUNCTION_ARG_SCmode_WART || (ROUND_REG (*ca, mode) & 1)))
6646 rtx r1 = gen_rtx_EXPR_LIST (VOIDmode,
6647 gen_rtx_REG (SFmode,
6648 BASE_ARG_REG (mode)
6649 + (ROUND_REG (*ca, mode) ^ 1)),
6650 const0_rtx);
6651 rtx r2 = gen_rtx_EXPR_LIST (VOIDmode,
6652 gen_rtx_REG (SFmode,
6653 BASE_ARG_REG (mode)
6654 + ((ROUND_REG (*ca, mode) + 1) ^ 1)),
6655 GEN_INT (4));
6656 return gen_rtx_PARALLEL(SCmode, gen_rtvec(2, r1, r2));
6659 /* If the alignment of a DF value causes an SF register to be
6660 skipped, we will use that skipped register for the next SF
6661 value. */
6662 if ((TARGET_HITACHI || ca->renesas_abi)
6663 && ca->free_single_fp_reg
6664 && mode == SFmode)
6665 return gen_rtx_REG (mode, ca->free_single_fp_reg);
6667 regno = (BASE_ARG_REG (mode) + ROUND_REG (*ca, mode))
6668 ^ (mode == SFmode && TARGET_SH4
6669 && TARGET_LITTLE_ENDIAN != 0
6670 && ! TARGET_HITACHI && ! ca->renesas_abi);
6671 return gen_rtx_REG (mode, regno);
6675 if (TARGET_SH5)
6677 if (mode == VOIDmode && TARGET_SHCOMPACT)
6678 return GEN_INT (ca->call_cookie);
6680 /* The following test assumes unnamed arguments are promoted to
6681 DFmode. */
6682 if (mode == SFmode && ca->free_single_fp_reg)
6683 return SH5_PROTOTYPED_FLOAT_ARG (*ca, mode, ca->free_single_fp_reg);
6685 if ((GET_SH_ARG_CLASS (mode) == SH_ARG_FLOAT)
6686 && (named || ! ca->prototype_p)
6687 && ca->arg_count[(int) SH_ARG_FLOAT] < NPARM_REGS (SFmode))
6689 if (! ca->prototype_p && TARGET_SHMEDIA)
6690 return SH5_PROTOTYPELESS_FLOAT_ARG (*ca, mode);
6692 return SH5_PROTOTYPED_FLOAT_ARG (*ca, mode,
6693 FIRST_FP_PARM_REG
6694 + ca->arg_count[(int) SH_ARG_FLOAT]);
6697 if (ca->arg_count[(int) SH_ARG_INT] < NPARM_REGS (SImode)
6698 && (! TARGET_SHCOMPACT
6699 || (! SHCOMPACT_FORCE_ON_STACK (mode, type)
6700 && ! SH5_WOULD_BE_PARTIAL_NREGS (*ca, mode,
6701 type, named))))
6703 return gen_rtx_REG (mode, (FIRST_PARM_REG
6704 + ca->arg_count[(int) SH_ARG_INT]));
6707 return 0;
6710 return 0;
6713 /* Update the data in CUM to advance over an argument
6714 of mode MODE and data type TYPE.
6715 (TYPE is null for libcalls where that information may not be
6716 available.) */
6718 void
6719 sh_function_arg_advance (CUMULATIVE_ARGS *ca, enum machine_mode mode,
6720 tree type, int named)
6722 if (ca->force_mem)
6723 ca->force_mem = 0;
6724 else if (TARGET_SH5)
6726 tree type2 = (ca->byref && type
6727 ? TREE_TYPE (type)
6728 : type);
6729 enum machine_mode mode2 = (ca->byref && type
6730 ? TYPE_MODE (type2)
6731 : mode);
6732 int dwords = ((ca->byref
6733 ? ca->byref
6734 : mode2 == BLKmode
6735 ? int_size_in_bytes (type2)
6736 : GET_MODE_SIZE (mode2)) + 7) / 8;
6737 int numregs = MIN (dwords, NPARM_REGS (SImode)
6738 - ca->arg_count[(int) SH_ARG_INT]);
6740 if (numregs)
6742 ca->arg_count[(int) SH_ARG_INT] += numregs;
6743 if (TARGET_SHCOMPACT
6744 && SHCOMPACT_FORCE_ON_STACK (mode2, type2))
6746 ca->call_cookie
6747 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
6748 - numregs, 1);
6749 /* N.B. We want this also for outgoing. */
6750 ca->stack_regs += numregs;
6752 else if (ca->byref)
6754 if (! ca->outgoing)
6755 ca->stack_regs += numregs;
6756 ca->byref_regs += numregs;
6757 ca->byref = 0;
6759 ca->call_cookie
6760 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
6761 - numregs, 2);
6762 while (--numregs);
6763 ca->call_cookie
6764 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
6765 - 1, 1);
6767 else if (dwords > numregs)
6769 int pushregs = numregs;
6771 if (TARGET_SHCOMPACT)
6772 ca->stack_regs += numregs;
6773 while (pushregs < NPARM_REGS (SImode) - 1
6774 && (CALL_COOKIE_INT_REG_GET
6775 (ca->call_cookie,
6776 NPARM_REGS (SImode) - pushregs)
6777 == 1))
6779 ca->call_cookie
6780 &= ~ CALL_COOKIE_INT_REG (NPARM_REGS (SImode)
6781 - pushregs, 1);
6782 pushregs++;
6784 if (numregs == NPARM_REGS (SImode))
6785 ca->call_cookie
6786 |= CALL_COOKIE_INT_REG (0, 1)
6787 | CALL_COOKIE_STACKSEQ (numregs - 1);
6788 else
6789 ca->call_cookie
6790 |= CALL_COOKIE_STACKSEQ (numregs);
6793 if (GET_SH_ARG_CLASS (mode2) == SH_ARG_FLOAT
6794 && (named || ! ca->prototype_p))
6796 if (mode2 == SFmode && ca->free_single_fp_reg)
6797 ca->free_single_fp_reg = 0;
6798 else if (ca->arg_count[(int) SH_ARG_FLOAT]
6799 < NPARM_REGS (SFmode))
6801 int numfpregs
6802 = MIN ((GET_MODE_SIZE (mode2) + 7) / 8 * 2,
6803 NPARM_REGS (SFmode)
6804 - ca->arg_count[(int) SH_ARG_FLOAT]);
6806 ca->arg_count[(int) SH_ARG_FLOAT] += numfpregs;
6808 if (TARGET_SHCOMPACT && ! ca->prototype_p)
6810 if (ca->outgoing && numregs > 0)
6813 ca->call_cookie
6814 |= (CALL_COOKIE_INT_REG
6815 (ca->arg_count[(int) SH_ARG_INT]
6816 - numregs + ((numfpregs - 2) / 2),
6817 4 + (ca->arg_count[(int) SH_ARG_FLOAT]
6818 - numfpregs) / 2));
6820 while (numfpregs -= 2);
6822 else if (mode2 == SFmode && (named)
6823 && (ca->arg_count[(int) SH_ARG_FLOAT]
6824 < NPARM_REGS (SFmode)))
6825 ca->free_single_fp_reg
6826 = FIRST_FP_PARM_REG - numfpregs
6827 + ca->arg_count[(int) SH_ARG_FLOAT] + 1;
6830 return;
6833 if ((TARGET_HITACHI || ca->renesas_abi) && TARGET_FPU_DOUBLE)
6835 /* Note that we've used the skipped register. */
6836 if (mode == SFmode && ca->free_single_fp_reg)
6838 ca->free_single_fp_reg = 0;
6839 return;
6841 /* When we have a DF after an SF, there's an SF register that get
6842 skipped in order to align the DF value. We note this skipped
6843 register, because the next SF value will use it, and not the
6844 SF that follows the DF. */
6845 if (mode == DFmode
6846 && ROUND_REG (*ca, DFmode) != ROUND_REG (*ca, SFmode))
6848 ca->free_single_fp_reg = (ROUND_REG (*ca, SFmode)
6849 + BASE_ARG_REG (mode));
6853 if (! ((TARGET_SH4 || TARGET_SH2A) || ca->renesas_abi)
6854 || PASS_IN_REG_P (*ca, mode, type))
6855 (ca->arg_count[(int) GET_SH_ARG_CLASS (mode)]
6856 = (ROUND_REG (*ca, mode)
6857 + (mode == BLKmode
6858 ? ROUND_ADVANCE (int_size_in_bytes (type))
6859 : ROUND_ADVANCE (GET_MODE_SIZE (mode)))));
6862 /* The Renesas calling convention doesn't quite fit into this scheme since
6863 the address is passed like an invisible argument, but one that is always
6864 passed in memory. */
6865 static rtx
6866 sh_struct_value_rtx (tree fndecl, int incoming ATTRIBUTE_UNUSED)
6868 if (TARGET_HITACHI || sh_attr_renesas_p (fndecl))
6869 return 0;
6870 return gen_rtx_REG (Pmode, 2);
6873 /* Worker function for TARGET_RETURN_IN_MEMORY. */
6875 static bool
6876 sh_return_in_memory (tree type, tree fndecl)
6878 if (TARGET_SH5)
6880 if (TYPE_MODE (type) == BLKmode)
6881 return ((unsigned HOST_WIDE_INT) int_size_in_bytes (type)) > 8;
6882 else
6883 return GET_MODE_SIZE (TYPE_MODE (type)) > 8;
6885 else
6887 return (TYPE_MODE (type) == BLKmode
6888 || ((TARGET_HITACHI || sh_attr_renesas_p (fndecl))
6889 && TREE_CODE (type) == RECORD_TYPE));
6893 /* We actually emit the code in sh_expand_prologue. We used to use
6894 a static variable to flag that we need to emit this code, but that
6895 doesn't when inlining, when functions are deferred and then emitted
6896 later. Fortunately, we already have two flags that are part of struct
6897 function that tell if a function uses varargs or stdarg. */
6898 static void
6899 sh_setup_incoming_varargs (CUMULATIVE_ARGS *ca,
6900 enum machine_mode mode,
6901 tree type,
6902 int *pretend_arg_size,
6903 int second_time ATTRIBUTE_UNUSED)
6905 if (! current_function_stdarg)
6906 abort ();
6907 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl))
6909 int named_parm_regs, anon_parm_regs;
6911 named_parm_regs = (ROUND_REG (*ca, mode)
6912 + (mode == BLKmode
6913 ? ROUND_ADVANCE (int_size_in_bytes (type))
6914 : ROUND_ADVANCE (GET_MODE_SIZE (mode))));
6915 anon_parm_regs = NPARM_REGS (SImode) - named_parm_regs;
6916 if (anon_parm_regs > 0)
6917 *pretend_arg_size = anon_parm_regs * 4;
6921 static bool
6922 sh_strict_argument_naming (CUMULATIVE_ARGS *ca ATTRIBUTE_UNUSED)
6924 return TARGET_SH5;
6927 static bool
6928 sh_pretend_outgoing_varargs_named (CUMULATIVE_ARGS *ca)
6930 return ! (TARGET_HITACHI || ca->renesas_abi) && ! TARGET_SH5;
6934 /* Define the offset between two registers, one to be eliminated, and
6935 the other its replacement, at the start of a routine. */
6938 initial_elimination_offset (int from, int to)
6940 int regs_saved;
6941 int regs_saved_rounding = 0;
6942 int total_saved_regs_space;
6943 int total_auto_space;
6944 int save_flags = target_flags;
6945 int copy_flags;
6946 HARD_REG_SET live_regs_mask;
6948 shmedia_space_reserved_for_target_registers = false;
6949 regs_saved = calc_live_regs (&live_regs_mask);
6950 regs_saved += SHMEDIA_REGS_STACK_ADJUST ();
6952 if (shmedia_reserve_space_for_target_registers_p (regs_saved, &live_regs_mask))
6954 shmedia_space_reserved_for_target_registers = true;
6955 regs_saved += shmedia_target_regs_stack_adjust (&live_regs_mask);
6958 if (TARGET_SH5 && regs_saved % (STACK_BOUNDARY / BITS_PER_UNIT))
6959 regs_saved_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
6960 - regs_saved % (STACK_BOUNDARY / BITS_PER_UNIT));
6962 total_auto_space = rounded_frame_size (regs_saved) - regs_saved_rounding;
6963 copy_flags = target_flags;
6964 target_flags = save_flags;
6966 total_saved_regs_space = regs_saved + regs_saved_rounding;
6968 if (from == ARG_POINTER_REGNUM && to == FRAME_POINTER_REGNUM)
6969 return total_saved_regs_space + total_auto_space
6970 + current_function_args_info.byref_regs * 8;
6972 if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
6973 return total_saved_regs_space + total_auto_space
6974 + current_function_args_info.byref_regs * 8;
6976 /* Initial gap between fp and sp is 0. */
6977 if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
6978 return 0;
6980 if (from == RETURN_ADDRESS_POINTER_REGNUM
6981 && (to == FRAME_POINTER_REGNUM || to == STACK_POINTER_REGNUM))
6983 if (TARGET_SH5)
6985 int n = total_saved_regs_space;
6986 int pr_reg = TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG;
6987 save_schedule schedule;
6988 save_entry *entry;
6990 n += total_auto_space;
6992 /* If it wasn't saved, there's not much we can do. */
6993 if (! TEST_HARD_REG_BIT (live_regs_mask, pr_reg))
6994 return n;
6996 target_flags = copy_flags;
6998 sh5_schedule_saves (&live_regs_mask, &schedule, n);
6999 for (entry = &schedule.entries[1]; entry->mode != VOIDmode; entry++)
7000 if (entry->reg == pr_reg)
7002 target_flags = save_flags;
7003 return entry->offset;
7005 abort ();
7007 else
7008 return total_auto_space;
7011 abort ();
7014 /* Handle machine specific pragmas to be semi-compatible with Renesas
7015 compiler. */
7017 void
7018 sh_pr_interrupt (struct cpp_reader *pfile ATTRIBUTE_UNUSED)
7020 pragma_interrupt = 1;
7023 void
7024 sh_pr_trapa (struct cpp_reader *pfile ATTRIBUTE_UNUSED)
7026 pragma_interrupt = pragma_trapa = 1;
7029 void
7030 sh_pr_nosave_low_regs (struct cpp_reader *pfile ATTRIBUTE_UNUSED)
7032 pragma_nosave_low_regs = 1;
7035 /* Generate 'handle_interrupt' attribute for decls */
7037 static void
7038 sh_insert_attributes (tree node, tree *attributes)
7040 if (! pragma_interrupt
7041 || TREE_CODE (node) != FUNCTION_DECL)
7042 return;
7044 /* We are only interested in fields. */
7045 if (TREE_CODE_CLASS (TREE_CODE (node)) != 'd')
7046 return;
7048 /* Add a 'handle_interrupt' attribute. */
7049 * attributes = tree_cons (get_identifier ("interrupt_handler"), NULL, * attributes);
7051 return;
7054 /* Supported attributes:
7056 interrupt_handler -- specifies this function is an interrupt handler.
7058 sp_switch -- specifies an alternate stack for an interrupt handler
7059 to run on.
7061 trap_exit -- use a trapa to exit an interrupt function instead of
7062 an rte instruction.
7064 renesas -- use Renesas calling/layout conventions (functions and
7065 structures).
7069 const struct attribute_spec sh_attribute_table[] =
7071 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
7072 { "interrupt_handler", 0, 0, true, false, false, sh_handle_interrupt_handler_attribute },
7073 { "sp_switch", 1, 1, true, false, false, sh_handle_sp_switch_attribute },
7074 { "trap_exit", 1, 1, true, false, false, sh_handle_trap_exit_attribute },
7075 { "renesas", 0, 0, false, true, false, sh_handle_renesas_attribute },
7076 #ifdef SYMBIAN
7077 /* Symbian support adds three new attributes:
7078 dllexport - for exporting a function/variable that will live in a dll
7079 dllimport - for importing a function/variable from a dll
7081 Microsoft allows multiple declspecs in one __declspec, separating
7082 them with spaces. We do NOT support this. Instead, use __declspec
7083 multiple times. */
7084 { "dllimport", 0, 0, true, false, false, sh_symbian_handle_dll_attribute },
7085 { "dllexport", 0, 0, true, false, false, sh_symbian_handle_dll_attribute },
7086 #endif
7087 { NULL, 0, 0, false, false, false, NULL }
7090 /* Handle an "interrupt_handler" attribute; arguments as in
7091 struct attribute_spec.handler. */
7092 static tree
7093 sh_handle_interrupt_handler_attribute (tree *node, tree name,
7094 tree args ATTRIBUTE_UNUSED,
7095 int flags ATTRIBUTE_UNUSED,
7096 bool *no_add_attrs)
7098 if (TREE_CODE (*node) != FUNCTION_DECL)
7100 warning ("`%s' attribute only applies to functions",
7101 IDENTIFIER_POINTER (name));
7102 *no_add_attrs = true;
7104 else if (TARGET_SHCOMPACT)
7106 error ("attribute interrupt_handler is not compatible with -m5-compact");
7107 *no_add_attrs = true;
7110 return NULL_TREE;
7113 /* Handle an "sp_switch" attribute; arguments as in
7114 struct attribute_spec.handler. */
7115 static tree
7116 sh_handle_sp_switch_attribute (tree *node, tree name, tree args,
7117 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
7119 if (TREE_CODE (*node) != FUNCTION_DECL)
7121 warning ("`%s' attribute only applies to functions",
7122 IDENTIFIER_POINTER (name));
7123 *no_add_attrs = true;
7125 else if (!pragma_interrupt)
7127 /* The sp_switch attribute only has meaning for interrupt functions. */
7128 warning ("`%s' attribute only applies to interrupt functions",
7129 IDENTIFIER_POINTER (name));
7130 *no_add_attrs = true;
7132 else if (TREE_CODE (TREE_VALUE (args)) != STRING_CST)
7134 /* The argument must be a constant string. */
7135 warning ("`%s' attribute argument not a string constant",
7136 IDENTIFIER_POINTER (name));
7137 *no_add_attrs = true;
7139 else
7141 sp_switch = gen_rtx_SYMBOL_REF (VOIDmode,
7142 TREE_STRING_POINTER (TREE_VALUE (args)));
7145 return NULL_TREE;
7148 /* Handle an "trap_exit" attribute; arguments as in
7149 struct attribute_spec.handler. */
7150 static tree
7151 sh_handle_trap_exit_attribute (tree *node, tree name, tree args,
7152 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
7154 if (TREE_CODE (*node) != FUNCTION_DECL)
7156 warning ("`%s' attribute only applies to functions",
7157 IDENTIFIER_POINTER (name));
7158 *no_add_attrs = true;
7160 else if (!pragma_interrupt)
7162 /* The trap_exit attribute only has meaning for interrupt functions. */
7163 warning ("`%s' attribute only applies to interrupt functions",
7164 IDENTIFIER_POINTER (name));
7165 *no_add_attrs = true;
7167 else if (TREE_CODE (TREE_VALUE (args)) != INTEGER_CST)
7169 /* The argument must be a constant integer. */
7170 warning ("`%s' attribute argument not an integer constant",
7171 IDENTIFIER_POINTER (name));
7172 *no_add_attrs = true;
7174 else
7176 trap_exit = TREE_INT_CST_LOW (TREE_VALUE (args));
7179 return NULL_TREE;
7182 static tree
7183 sh_handle_renesas_attribute (tree *node ATTRIBUTE_UNUSED,
7184 tree name ATTRIBUTE_UNUSED,
7185 tree args ATTRIBUTE_UNUSED,
7186 int flags ATTRIBUTE_UNUSED,
7187 bool *no_add_attrs ATTRIBUTE_UNUSED)
7189 return NULL_TREE;
7192 /* True if __attribute__((renesas)) or -mrenesas. */
7194 sh_attr_renesas_p (tree td)
7196 if (TARGET_HITACHI)
7197 return 1;
7198 if (td == 0)
7199 return 0;
7200 if (DECL_P (td))
7201 td = TREE_TYPE (td);
7202 return (lookup_attribute ("renesas", TYPE_ATTRIBUTES (td))
7203 != NULL_TREE);
7206 /* True if __attribute__((renesas)) or -mrenesas, for the current
7207 function. */
7209 sh_cfun_attr_renesas_p (void)
7211 return sh_attr_renesas_p (current_function_decl);
7215 sh_cfun_interrupt_handler_p (void)
7217 return (lookup_attribute ("interrupt_handler",
7218 DECL_ATTRIBUTES (current_function_decl))
7219 != NULL_TREE);
7222 /* ??? target_switches in toplev.c is static, hence we have to duplicate it. */
7223 static const struct
7225 const char *const name;
7226 const int value;
7227 const char *const description;
7229 sh_target_switches[] = TARGET_SWITCHES;
7230 #define target_switches sh_target_switches
7232 /* Like default_pch_valid_p, but take flag_mask into account. */
7233 const char *
7234 sh_pch_valid_p (const void *data_p, size_t len)
7236 const char *data = (const char *)data_p;
7237 const char *flag_that_differs = NULL;
7238 size_t i;
7239 int old_flags;
7240 int flag_mask
7241 = (SH1_BIT | SH2_BIT | SH3_BIT | SH_E_BIT | HARD_SH4_BIT | FPU_SINGLE_BIT
7242 | SH4_BIT | HITACHI_BIT | LITTLE_ENDIAN_BIT);
7244 /* -fpic and -fpie also usually make a PCH invalid. */
7245 if (data[0] != flag_pic)
7246 return _("created and used with different settings of -fpic");
7247 if (data[1] != flag_pie)
7248 return _("created and used with different settings of -fpie");
7249 data += 2;
7251 /* Check target_flags. */
7252 memcpy (&old_flags, data, sizeof (target_flags));
7253 if (((old_flags ^ target_flags) & flag_mask) != 0)
7255 for (i = 0; i < ARRAY_SIZE (target_switches); i++)
7257 int bits;
7259 bits = target_switches[i].value;
7260 if (bits < 0)
7261 bits = -bits;
7262 bits &= flag_mask;
7263 if ((target_flags & bits) != (old_flags & bits))
7265 flag_that_differs = target_switches[i].name;
7266 goto make_message;
7269 abort ();
7271 data += sizeof (target_flags);
7272 len -= sizeof (target_flags);
7274 /* Check string options. */
7275 #ifdef TARGET_OPTIONS
7276 for (i = 0; i < ARRAY_SIZE (target_options); i++)
7278 const char *str = *target_options[i].variable;
7279 size_t l;
7280 if (! str)
7281 str = "";
7282 l = strlen (str) + 1;
7283 if (len < l || memcmp (data, str, l) != 0)
7285 flag_that_differs = target_options[i].prefix;
7286 goto make_message;
7288 data += l;
7289 len -= l;
7291 #endif
7293 return NULL;
7295 make_message:
7297 char *r;
7298 asprintf (&r, _("created and used with differing settings of `-m%s'"),
7299 flag_that_differs);
7300 if (r == NULL)
7301 return _("out of memory");
7302 return r;
7306 /* Predicates used by the templates. */
7308 /* Returns 1 if OP is MACL, MACH or PR. The input must be a REG rtx.
7309 Used only in general_movsrc_operand. */
7312 system_reg_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
7314 switch (REGNO (op))
7316 case PR_REG:
7317 case MACL_REG:
7318 case MACH_REG:
7319 return 1;
7321 return 0;
7324 /* Returns 1 if OP can be source of a simple move operation.
7325 Same as general_operand, but a LABEL_REF is valid, PRE_DEC is
7326 invalid as are subregs of system registers. */
7329 general_movsrc_operand (rtx op, enum machine_mode mode)
7331 if (GET_CODE (op) == MEM)
7333 rtx inside = XEXP (op, 0);
7334 if (GET_CODE (inside) == CONST)
7335 inside = XEXP (inside, 0);
7337 if (GET_CODE (inside) == LABEL_REF)
7338 return 1;
7340 if (GET_CODE (inside) == PLUS
7341 && GET_CODE (XEXP (inside, 0)) == LABEL_REF
7342 && GET_CODE (XEXP (inside, 1)) == CONST_INT)
7343 return 1;
7345 /* Only post inc allowed. */
7346 if (GET_CODE (inside) == PRE_DEC)
7347 return 0;
7350 if ((mode == QImode || mode == HImode)
7351 && (GET_CODE (op) == SUBREG
7352 && GET_CODE (XEXP (op, 0)) == REG
7353 && system_reg_operand (XEXP (op, 0), mode)))
7354 return 0;
7356 return general_operand (op, mode);
7359 /* Returns 1 if OP can be a destination of a move.
7360 Same as general_operand, but no preinc allowed. */
7363 general_movdst_operand (rtx op, enum machine_mode mode)
7365 /* Only pre dec allowed. */
7366 if (GET_CODE (op) == MEM && GET_CODE (XEXP (op, 0)) == POST_INC)
7367 return 0;
7369 return general_operand (op, mode);
7372 /* Returns 1 if OP is a normal arithmetic register. */
7375 arith_reg_operand (rtx op, enum machine_mode mode)
7377 if (register_operand (op, mode))
7379 int regno;
7381 if (GET_CODE (op) == REG)
7382 regno = REGNO (op);
7383 else if (GET_CODE (op) == SUBREG && GET_CODE (SUBREG_REG (op)) == REG)
7384 regno = REGNO (SUBREG_REG (op));
7385 else
7386 return 1;
7388 return (regno != T_REG && regno != PR_REG
7389 && ! TARGET_REGISTER_P (regno)
7390 && (regno != FPUL_REG || TARGET_SH4)
7391 && regno != MACH_REG && regno != MACL_REG);
7393 return 0;
7396 /* Like above, but for DImode destinations: forbid paradoxical DImode subregs,
7397 because this would lead to missing sign extensions when truncating from
7398 DImode to SImode. */
7400 arith_reg_dest (rtx op, enum machine_mode mode)
7402 if (mode == DImode && GET_CODE (op) == SUBREG
7403 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (op))) < 8)
7404 return 0;
7405 return arith_reg_operand (op, mode);
7409 int_gpr_dest (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
7411 enum machine_mode op_mode = GET_MODE (op);
7413 if (GET_MODE_CLASS (op_mode) != MODE_INT
7414 || GET_MODE_SIZE (op_mode) >= UNITS_PER_WORD)
7415 return 0;
7416 if (! reload_completed)
7417 return 0;
7418 return true_regnum (op) <= LAST_GENERAL_REG;
7422 fp_arith_reg_operand (rtx op, enum machine_mode mode)
7424 if (register_operand (op, mode))
7426 int regno;
7428 if (GET_CODE (op) == REG)
7429 regno = REGNO (op);
7430 else if (GET_CODE (op) == SUBREG && GET_CODE (SUBREG_REG (op)) == REG)
7431 regno = REGNO (SUBREG_REG (op));
7432 else
7433 return 1;
7435 return (regno >= FIRST_PSEUDO_REGISTER
7436 || FP_REGISTER_P (regno));
7438 return 0;
7441 /* Returns 1 if OP is a valid source operand for an arithmetic insn. */
7444 arith_operand (rtx op, enum machine_mode mode)
7446 if (arith_reg_operand (op, mode))
7447 return 1;
7449 if (TARGET_SHMEDIA)
7451 /* FIXME: We should be checking whether the CONST_INT fits in a
7452 CONST_OK_FOR_I16 here, but this causes reload_cse to crash when
7453 attempting to transform a sequence of two 64-bit sets of the
7454 same register from literal constants into a set and an add,
7455 when the difference is too wide for an add. */
7456 if (GET_CODE (op) == CONST_INT
7457 || EXTRA_CONSTRAINT_C16 (op))
7458 return 1;
7459 else
7460 return 0;
7462 else if (GET_CODE (op) == CONST_INT && CONST_OK_FOR_I08 (INTVAL (op)))
7463 return 1;
7465 return 0;
7468 /* Returns 1 if OP is a valid source operand for a compare insn. */
7471 arith_reg_or_0_operand (rtx op, enum machine_mode mode)
7473 if (arith_reg_operand (op, mode))
7474 return 1;
7476 if (EXTRA_CONSTRAINT_Z (op))
7477 return 1;
7479 return 0;
7482 /* Return 1 if OP is a valid source operand for an SHmedia operation
7483 that takes either a register or a 6-bit immediate. */
7486 shmedia_6bit_operand (rtx op, enum machine_mode mode)
7488 return (arith_reg_operand (op, mode)
7489 || (GET_CODE (op) == CONST_INT && CONST_OK_FOR_I06 (INTVAL (op))));
7492 /* Returns 1 if OP is a valid source operand for a logical operation. */
7495 logical_operand (rtx op, enum machine_mode mode)
7497 if (arith_reg_operand (op, mode))
7498 return 1;
7500 if (TARGET_SHMEDIA)
7502 if (GET_CODE (op) == CONST_INT && CONST_OK_FOR_I10 (INTVAL (op)))
7503 return 1;
7504 else
7505 return 0;
7507 else if (GET_CODE (op) == CONST_INT && CONST_OK_FOR_K08 (INTVAL (op)))
7508 return 1;
7510 return 0;
7514 and_operand (rtx op, enum machine_mode mode)
7516 if (logical_operand (op, mode))
7517 return 1;
7519 /* Check mshflo.l / mshflhi.l opportunities. */
7520 if (TARGET_SHMEDIA
7521 && mode == DImode
7522 && GET_CODE (op) == CONST_INT
7523 && CONST_OK_FOR_J16 (INTVAL (op)))
7524 return 1;
7526 return 0;
7529 /* Nonzero if OP is a floating point value with value 0.0. */
7532 fp_zero_operand (rtx op)
7534 REAL_VALUE_TYPE r;
7536 if (GET_MODE (op) != SFmode)
7537 return 0;
7539 REAL_VALUE_FROM_CONST_DOUBLE (r, op);
7540 return REAL_VALUES_EQUAL (r, dconst0) && ! REAL_VALUE_MINUS_ZERO (r);
7543 /* Nonzero if OP is a floating point value with value 1.0. */
7546 fp_one_operand (rtx op)
7548 REAL_VALUE_TYPE r;
7550 if (GET_MODE (op) != SFmode)
7551 return 0;
7553 REAL_VALUE_FROM_CONST_DOUBLE (r, op);
7554 return REAL_VALUES_EQUAL (r, dconst1);
7557 /* For -m4 and -m4-single-only, mode switching is used. If we are
7558 compiling without -mfmovd, movsf_ie isn't taken into account for
7559 mode switching. We could check in machine_dependent_reorg for
7560 cases where we know we are in single precision mode, but there is
7561 interface to find that out during reload, so we must avoid
7562 choosing an fldi alternative during reload and thus failing to
7563 allocate a scratch register for the constant loading. */
7565 fldi_ok (void)
7567 return ! TARGET_SH4 || TARGET_FMOVD || reload_completed;
7571 tertiary_reload_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
7573 enum rtx_code code = GET_CODE (op);
7574 return code == MEM || (TARGET_SH4 && code == CONST_DOUBLE);
7578 fpscr_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
7580 return (GET_CODE (op) == REG
7581 && (REGNO (op) == FPSCR_REG
7582 || (REGNO (op) >= FIRST_PSEUDO_REGISTER
7583 && !(reload_in_progress || reload_completed)))
7584 && GET_MODE (op) == PSImode);
7588 fpul_operand (rtx op, enum machine_mode mode)
7590 if (TARGET_SHMEDIA)
7591 return fp_arith_reg_operand (op, mode);
7593 return (GET_CODE (op) == REG
7594 && (REGNO (op) == FPUL_REG || REGNO (op) >= FIRST_PSEUDO_REGISTER)
7595 && GET_MODE (op) == mode);
7599 symbol_ref_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
7601 return (GET_CODE (op) == SYMBOL_REF);
7604 /* Return the TLS type for TLS symbols, 0 for otherwise. */
7606 tls_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
7608 if (GET_CODE (op) != SYMBOL_REF)
7609 return 0;
7610 return SYMBOL_REF_TLS_MODEL (op);
7614 commutative_float_operator (rtx op, enum machine_mode mode)
7616 if (GET_MODE (op) != mode)
7617 return 0;
7618 switch (GET_CODE (op))
7620 case PLUS:
7621 case MULT:
7622 return 1;
7623 default:
7624 break;
7626 return 0;
7630 noncommutative_float_operator (rtx op, enum machine_mode mode)
7632 if (GET_MODE (op) != mode)
7633 return 0;
7634 switch (GET_CODE (op))
7636 case MINUS:
7637 case DIV:
7638 return 1;
7639 default:
7640 break;
7642 return 0;
7646 unary_float_operator (rtx op, enum machine_mode mode)
7648 if (GET_MODE (op) != mode)
7649 return 0;
7650 switch (GET_CODE (op))
7652 case ABS:
7653 case NEG:
7654 case SQRT:
7655 return 1;
7656 default:
7657 break;
7659 return 0;
7663 binary_float_operator (rtx op, enum machine_mode mode)
7665 if (GET_MODE (op) != mode)
7666 return 0;
7667 switch (GET_CODE (op))
7669 case PLUS:
7670 case MINUS:
7671 case MULT:
7672 case DIV:
7673 return 1;
7674 default:
7675 break;
7677 return 0;
7681 binary_logical_operator (rtx op, enum machine_mode mode)
7683 if (GET_MODE (op) != mode)
7684 return 0;
7685 switch (GET_CODE (op))
7687 case IOR:
7688 case AND:
7689 case XOR:
7690 return 1;
7691 default:
7692 break;
7694 return 0;
7698 equality_comparison_operator (rtx op, enum machine_mode mode)
7700 return ((mode == VOIDmode || GET_MODE (op) == mode)
7701 && (GET_CODE (op) == EQ || GET_CODE (op) == NE));
7705 greater_comparison_operator (rtx op, enum machine_mode mode)
7707 if (mode != VOIDmode && GET_MODE (op) == mode)
7708 return 0;
7709 switch (GET_CODE (op))
7711 case GT:
7712 case GE:
7713 case GTU:
7714 case GEU:
7715 return 1;
7716 default:
7717 return 0;
7722 less_comparison_operator (rtx op, enum machine_mode mode)
7724 if (mode != VOIDmode && GET_MODE (op) == mode)
7725 return 0;
7726 switch (GET_CODE (op))
7728 case LT:
7729 case LE:
7730 case LTU:
7731 case LEU:
7732 return 1;
7733 default:
7734 return 0;
7738 /* Accept pseudos and branch target registers. */
7740 target_reg_operand (rtx op, enum machine_mode mode)
7742 if (mode != DImode
7743 || GET_MODE (op) != DImode)
7744 return 0;
7746 if (GET_CODE (op) == SUBREG)
7747 op = XEXP (op, 0);
7749 if (GET_CODE (op) != REG)
7750 return 0;
7752 /* We must protect ourselves from matching pseudos that are virtual
7753 register, because they will eventually be replaced with hardware
7754 registers that aren't branch-target registers. */
7755 if (REGNO (op) > LAST_VIRTUAL_REGISTER
7756 || TARGET_REGISTER_P (REGNO (op)))
7757 return 1;
7759 return 0;
7762 /* Same as target_reg_operand, except that label_refs and symbol_refs
7763 are accepted before reload. */
7765 target_operand (rtx op, enum machine_mode mode)
7767 if (mode != DImode)
7768 return 0;
7770 if ((GET_MODE (op) == DImode || GET_MODE (op) == VOIDmode)
7771 && EXTRA_CONSTRAINT_Csy (op))
7772 return ! reload_completed;
7774 return target_reg_operand (op, mode);
7778 mextr_bit_offset (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
7780 HOST_WIDE_INT i;
7782 if (GET_CODE (op) != CONST_INT)
7783 return 0;
7784 i = INTVAL (op);
7785 return i >= 1 * 8 && i <= 7 * 8 && (i & 7) == 0;
7789 extend_reg_operand (rtx op, enum machine_mode mode)
7791 return (GET_CODE (op) == TRUNCATE
7792 ? arith_operand
7793 : arith_reg_operand) (op, mode);
7797 trunc_hi_operand (rtx op, enum machine_mode mode)
7799 enum machine_mode op_mode = GET_MODE (op);
7801 if (op_mode != SImode && op_mode != DImode
7802 && op_mode != V4HImode && op_mode != V2SImode)
7803 return 0;
7804 return extend_reg_operand (op, mode);
7808 extend_reg_or_0_operand (rtx op, enum machine_mode mode)
7810 return (GET_CODE (op) == TRUNCATE
7811 ? arith_operand
7812 : arith_reg_or_0_operand) (op, mode);
7816 general_extend_operand (rtx op, enum machine_mode mode)
7818 return (GET_CODE (op) == TRUNCATE
7819 ? arith_operand
7820 : nonimmediate_operand) (op, mode);
7824 inqhi_operand (rtx op, enum machine_mode mode)
7826 if (GET_CODE (op) != TRUNCATE || mode != GET_MODE (op))
7827 return 0;
7828 op = XEXP (op, 0);
7829 /* Can't use true_regnum here because copy_cost wants to know about
7830 SECONDARY_INPUT_RELOAD_CLASS. */
7831 return GET_CODE (op) == REG && FP_REGISTER_P (REGNO (op));
7835 sh_rep_vec (rtx v, enum machine_mode mode)
7837 int i;
7838 rtx x, y;
7840 if ((GET_CODE (v) != CONST_VECTOR && GET_CODE (v) != PARALLEL)
7841 || (GET_MODE (v) != mode && mode != VOIDmode))
7842 return 0;
7843 i = XVECLEN (v, 0) - 2;
7844 x = XVECEXP (v, 0, i + 1);
7845 if (GET_MODE_UNIT_SIZE (mode) == 1)
7847 y = XVECEXP (v, 0, i);
7848 for (i -= 2; i >= 0; i -= 2)
7849 if (! rtx_equal_p (XVECEXP (v, 0, i + 1), x)
7850 || ! rtx_equal_p (XVECEXP (v, 0, i), y))
7851 return 0;
7853 else
7854 for (; i >= 0; i--)
7855 if (XVECEXP (v, 0, i) != x)
7856 return 0;
7857 return 1;
7860 /* Determine if V is a constant vector matching MODE with only one element
7861 that is not a sign extension. Two byte-sized elements count as one. */
7863 sh_1el_vec (rtx v, enum machine_mode mode)
7865 int unit_size;
7866 int i, last, least, sign_ix;
7867 rtx sign;
7869 if (GET_CODE (v) != CONST_VECTOR
7870 || (GET_MODE (v) != mode && mode != VOIDmode))
7871 return 0;
7872 /* Determine numbers of last and of least significant elements. */
7873 last = XVECLEN (v, 0) - 1;
7874 least = TARGET_LITTLE_ENDIAN ? 0 : last;
7875 if (GET_CODE (XVECEXP (v, 0, least)) != CONST_INT)
7876 return 0;
7877 sign_ix = least;
7878 if (GET_MODE_UNIT_SIZE (mode) == 1)
7879 sign_ix = TARGET_LITTLE_ENDIAN ? 1 : last - 1;
7880 if (GET_CODE (XVECEXP (v, 0, sign_ix)) != CONST_INT)
7881 return 0;
7882 unit_size = GET_MODE_UNIT_SIZE (GET_MODE (v));
7883 sign = (INTVAL (XVECEXP (v, 0, sign_ix)) >> (unit_size * BITS_PER_UNIT - 1)
7884 ? constm1_rtx : const0_rtx);
7885 i = XVECLEN (v, 0) - 1;
7887 if (i != least && i != sign_ix && XVECEXP (v, 0, i) != sign)
7888 return 0;
7889 while (--i);
7890 return 1;
7894 sh_const_vec (rtx v, enum machine_mode mode)
7896 int i;
7898 if (GET_CODE (v) != CONST_VECTOR
7899 || (GET_MODE (v) != mode && mode != VOIDmode))
7900 return 0;
7901 i = XVECLEN (v, 0) - 1;
7902 for (; i >= 0; i--)
7903 if (GET_CODE (XVECEXP (v, 0, i)) != CONST_INT)
7904 return 0;
7905 return 1;
7908 /* Return the destination address of a branch. */
7910 static int
7911 branch_dest (rtx branch)
7913 rtx dest = SET_SRC (PATTERN (branch));
7914 int dest_uid;
7916 if (GET_CODE (dest) == IF_THEN_ELSE)
7917 dest = XEXP (dest, 1);
7918 dest = XEXP (dest, 0);
7919 dest_uid = INSN_UID (dest);
7920 return INSN_ADDRESSES (dest_uid);
7923 /* Return nonzero if REG is not used after INSN.
7924 We assume REG is a reload reg, and therefore does
7925 not live past labels. It may live past calls or jumps though. */
7927 reg_unused_after (rtx reg, rtx insn)
7929 enum rtx_code code;
7930 rtx set;
7932 /* If the reg is set by this instruction, then it is safe for our
7933 case. Disregard the case where this is a store to memory, since
7934 we are checking a register used in the store address. */
7935 set = single_set (insn);
7936 if (set && GET_CODE (SET_DEST (set)) != MEM
7937 && reg_overlap_mentioned_p (reg, SET_DEST (set)))
7938 return 1;
7940 while ((insn = NEXT_INSN (insn)))
7942 rtx set;
7943 if (!INSN_P (insn))
7944 continue;
7946 code = GET_CODE (insn);
7948 #if 0
7949 /* If this is a label that existed before reload, then the register
7950 if dead here. However, if this is a label added by reorg, then
7951 the register may still be live here. We can't tell the difference,
7952 so we just ignore labels completely. */
7953 if (code == CODE_LABEL)
7954 return 1;
7955 /* else */
7956 #endif
7958 if (code == JUMP_INSN)
7959 return 0;
7961 /* If this is a sequence, we must handle them all at once.
7962 We could have for instance a call that sets the target register,
7963 and an insn in a delay slot that uses the register. In this case,
7964 we must return 0. */
7965 else if (code == INSN && GET_CODE (PATTERN (insn)) == SEQUENCE)
7967 int i;
7968 int retval = 0;
7970 for (i = 0; i < XVECLEN (PATTERN (insn), 0); i++)
7972 rtx this_insn = XVECEXP (PATTERN (insn), 0, i);
7973 rtx set = single_set (this_insn);
7975 if (GET_CODE (this_insn) == CALL_INSN)
7976 code = CALL_INSN;
7977 else if (GET_CODE (this_insn) == JUMP_INSN)
7979 if (INSN_ANNULLED_BRANCH_P (this_insn))
7980 return 0;
7981 code = JUMP_INSN;
7984 if (set && reg_overlap_mentioned_p (reg, SET_SRC (set)))
7985 return 0;
7986 if (set && reg_overlap_mentioned_p (reg, SET_DEST (set)))
7988 if (GET_CODE (SET_DEST (set)) != MEM)
7989 retval = 1;
7990 else
7991 return 0;
7993 if (set == 0
7994 && reg_overlap_mentioned_p (reg, PATTERN (this_insn)))
7995 return 0;
7997 if (retval == 1)
7998 return 1;
7999 else if (code == JUMP_INSN)
8000 return 0;
8003 set = single_set (insn);
8004 if (set && reg_overlap_mentioned_p (reg, SET_SRC (set)))
8005 return 0;
8006 if (set && reg_overlap_mentioned_p (reg, SET_DEST (set)))
8007 return GET_CODE (SET_DEST (set)) != MEM;
8008 if (set == 0 && reg_overlap_mentioned_p (reg, PATTERN (insn)))
8009 return 0;
8011 if (code == CALL_INSN && call_used_regs[REGNO (reg)])
8012 return 1;
8014 return 1;
8017 #include "ggc.h"
8019 static GTY(()) rtx fpscr_rtx;
8021 get_fpscr_rtx (void)
8023 if (! fpscr_rtx)
8025 fpscr_rtx = gen_rtx_REG (PSImode, FPSCR_REG);
8026 REG_USERVAR_P (fpscr_rtx) = 1;
8027 mark_user_reg (fpscr_rtx);
8029 if (! reload_completed || mdep_reorg_phase != SH_AFTER_MDEP_REORG)
8030 mark_user_reg (fpscr_rtx);
8031 return fpscr_rtx;
8034 void
8035 emit_sf_insn (rtx pat)
8037 emit_insn (pat);
8040 void
8041 emit_df_insn (rtx pat)
8043 emit_insn (pat);
8046 void
8047 expand_sf_unop (rtx (*fun) (rtx, rtx, rtx), rtx *operands)
8049 emit_sf_insn ((*fun) (operands[0], operands[1], get_fpscr_rtx ()));
8052 void
8053 expand_sf_binop (rtx (*fun) (rtx, rtx, rtx, rtx), rtx *operands)
8055 emit_sf_insn ((*fun) (operands[0], operands[1], operands[2],
8056 get_fpscr_rtx ()));
8059 void
8060 expand_df_unop (rtx (*fun) (rtx, rtx, rtx), rtx *operands)
8062 emit_df_insn ((*fun) (operands[0], operands[1], get_fpscr_rtx ()));
8065 void
8066 expand_df_binop (rtx (*fun) (rtx, rtx, rtx, rtx), rtx *operands)
8068 emit_df_insn ((*fun) (operands[0], operands[1], operands[2],
8069 get_fpscr_rtx ()));
8072 /* ??? gcc does flow analysis strictly after common subexpression
8073 elimination. As a result, common subexpression elimination fails
8074 when there are some intervening statements setting the same register.
8075 If we did nothing about this, this would hurt the precision switching
8076 for SH4 badly. There is some cse after reload, but it is unable to
8077 undo the extra register pressure from the unused instructions, and
8078 it cannot remove auto-increment loads.
8080 A C code example that shows this flow/cse weakness for (at least) SH
8081 and sparc (as of gcc ss-970706) is this:
8083 double
8084 f(double a)
8086 double d;
8087 d = 0.1;
8088 a += d;
8089 d = 1.1;
8090 d = 0.1;
8091 a *= d;
8092 return a;
8095 So we add another pass before common subexpression elimination, to
8096 remove assignments that are dead due to a following assignment in the
8097 same basic block. */
8099 static void
8100 mark_use (rtx x, rtx *reg_set_block)
8102 enum rtx_code code;
8104 if (! x)
8105 return;
8106 code = GET_CODE (x);
8107 switch (code)
8109 case REG:
8111 int regno = REGNO (x);
8112 int nregs = (regno < FIRST_PSEUDO_REGISTER
8113 ? HARD_REGNO_NREGS (regno, GET_MODE (x))
8114 : 1);
8117 reg_set_block[regno + nregs - 1] = 0;
8119 while (--nregs);
8120 break;
8122 case SET:
8124 rtx dest = SET_DEST (x);
8126 if (GET_CODE (dest) == SUBREG)
8127 dest = SUBREG_REG (dest);
8128 if (GET_CODE (dest) != REG)
8129 mark_use (dest, reg_set_block);
8130 mark_use (SET_SRC (x), reg_set_block);
8131 break;
8133 case CLOBBER:
8134 break;
8135 default:
8137 const char *fmt = GET_RTX_FORMAT (code);
8138 int i, j;
8139 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
8141 if (fmt[i] == 'e')
8142 mark_use (XEXP (x, i), reg_set_block);
8143 else if (fmt[i] == 'E')
8144 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
8145 mark_use (XVECEXP (x, i, j), reg_set_block);
8147 break;
8152 static rtx get_free_reg (HARD_REG_SET);
8154 /* This function returns a register to use to load the address to load
8155 the fpscr from. Currently it always returns r1 or r7, but when we are
8156 able to use pseudo registers after combine, or have a better mechanism
8157 for choosing a register, it should be done here. */
8158 /* REGS_LIVE is the liveness information for the point for which we
8159 need this allocation. In some bare-bones exit blocks, r1 is live at the
8160 start. We can even have all of r0..r3 being live:
8161 __complex__ long long f (double d) { if (d == 0) return 2; else return 3; }
8162 INSN before which new insns are placed with will clobber the register
8163 we return. If a basic block consists only of setting the return value
8164 register to a pseudo and using that register, the return value is not
8165 live before or after this block, yet we we'll insert our insns right in
8166 the middle. */
8168 static rtx
8169 get_free_reg (HARD_REG_SET regs_live)
8171 if (! TEST_HARD_REG_BIT (regs_live, 1))
8172 return gen_rtx_REG (Pmode, 1);
8174 /* Hard reg 1 is live; since this is a SMALL_REGISTER_CLASSES target,
8175 there shouldn't be anything but a jump before the function end. */
8176 if (! TEST_HARD_REG_BIT (regs_live, 7))
8177 return gen_rtx_REG (Pmode, 7);
8179 abort ();
8182 /* This function will set the fpscr from memory.
8183 MODE is the mode we are setting it to. */
8184 void
8185 fpscr_set_from_mem (int mode, HARD_REG_SET regs_live)
8187 enum attr_fp_mode fp_mode = mode;
8188 rtx addr_reg = get_free_reg (regs_live);
8190 if (fp_mode == (enum attr_fp_mode) ACTUAL_NORMAL_MODE (FP_MODE))
8191 emit_insn (gen_fpu_switch1 (addr_reg));
8192 else
8193 emit_insn (gen_fpu_switch0 (addr_reg));
8196 /* Is the given character a logical line separator for the assembler? */
8197 #ifndef IS_ASM_LOGICAL_LINE_SEPARATOR
8198 #define IS_ASM_LOGICAL_LINE_SEPARATOR(C) ((C) == ';')
8199 #endif
8202 sh_insn_length_adjustment (rtx insn)
8204 /* Instructions with unfilled delay slots take up an extra two bytes for
8205 the nop in the delay slot. */
8206 if (((GET_CODE (insn) == INSN
8207 && GET_CODE (PATTERN (insn)) != USE
8208 && GET_CODE (PATTERN (insn)) != CLOBBER)
8209 || GET_CODE (insn) == CALL_INSN
8210 || (GET_CODE (insn) == JUMP_INSN
8211 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC
8212 && GET_CODE (PATTERN (insn)) != ADDR_VEC))
8213 && GET_CODE (PATTERN (NEXT_INSN (PREV_INSN (insn)))) != SEQUENCE
8214 && get_attr_needs_delay_slot (insn) == NEEDS_DELAY_SLOT_YES)
8215 return 2;
8217 /* SH2e has a bug that prevents the use of annulled branches, so if
8218 the delay slot is not filled, we'll have to put a NOP in it. */
8219 if (sh_cpu == CPU_SH2E
8220 && GET_CODE (insn) == JUMP_INSN
8221 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC
8222 && GET_CODE (PATTERN (insn)) != ADDR_VEC
8223 && get_attr_type (insn) == TYPE_CBRANCH
8224 && GET_CODE (PATTERN (NEXT_INSN (PREV_INSN (insn)))) != SEQUENCE)
8225 return 2;
8227 /* sh-dsp parallel processing insn take four bytes instead of two. */
8229 if (GET_CODE (insn) == INSN)
8231 int sum = 0;
8232 rtx body = PATTERN (insn);
8233 const char *template;
8234 char c;
8235 int maybe_label = 1;
8237 if (GET_CODE (body) == ASM_INPUT)
8238 template = XSTR (body, 0);
8239 else if (asm_noperands (body) >= 0)
8240 template
8241 = decode_asm_operands (body, NULL, NULL, NULL, NULL);
8242 else
8243 return 0;
8246 int ppi_adjust = 0;
8249 c = *template++;
8250 while (c == ' ' || c == '\t');
8251 /* all sh-dsp parallel-processing insns start with p.
8252 The only non-ppi sh insn starting with p is pref.
8253 The only ppi starting with pr is prnd. */
8254 if ((c == 'p' || c == 'P') && strncasecmp ("re", template, 2))
8255 ppi_adjust = 2;
8256 /* The repeat pseudo-insn expands two three insns, a total of
8257 six bytes in size. */
8258 else if ((c == 'r' || c == 'R')
8259 && ! strncasecmp ("epeat", template, 5))
8260 ppi_adjust = 4;
8261 while (c && c != '\n' && ! IS_ASM_LOGICAL_LINE_SEPARATOR (c))
8263 /* If this is a label, it is obviously not a ppi insn. */
8264 if (c == ':' && maybe_label)
8266 ppi_adjust = 0;
8267 break;
8269 else if (c == '\'' || c == '"')
8270 maybe_label = 0;
8271 c = *template++;
8273 sum += ppi_adjust;
8274 maybe_label = c != ':';
8276 while (c);
8277 return sum;
8279 return 0;
8282 /* Return TRUE if X references a SYMBOL_REF or LABEL_REF whose symbol
8283 isn't protected by a PIC unspec. */
8285 nonpic_symbol_mentioned_p (rtx x)
8287 register const char *fmt;
8288 register int i;
8290 if (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF
8291 || GET_CODE (x) == PC)
8292 return 1;
8294 /* We don't want to look into the possible MEM location of a
8295 CONST_DOUBLE, since we're not going to use it, in general. */
8296 if (GET_CODE (x) == CONST_DOUBLE)
8297 return 0;
8299 if (GET_CODE (x) == UNSPEC
8300 && (XINT (x, 1) == UNSPEC_PIC
8301 || XINT (x, 1) == UNSPEC_GOT
8302 || XINT (x, 1) == UNSPEC_GOTOFF
8303 || XINT (x, 1) == UNSPEC_GOTPLT
8304 || XINT (x, 1) == UNSPEC_GOTTPOFF
8305 || XINT (x, 1) == UNSPEC_DTPOFF
8306 || XINT (x, 1) == UNSPEC_PLT))
8307 return 0;
8309 fmt = GET_RTX_FORMAT (GET_CODE (x));
8310 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
8312 if (fmt[i] == 'E')
8314 register int j;
8316 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
8317 if (nonpic_symbol_mentioned_p (XVECEXP (x, i, j)))
8318 return 1;
8320 else if (fmt[i] == 'e' && nonpic_symbol_mentioned_p (XEXP (x, i)))
8321 return 1;
8324 return 0;
8327 /* Convert a non-PIC address in `orig' to a PIC address using @GOT or
8328 @GOTOFF in `reg'. */
8330 legitimize_pic_address (rtx orig, enum machine_mode mode ATTRIBUTE_UNUSED,
8331 rtx reg)
8333 if (tls_symbolic_operand (orig, Pmode))
8334 return orig;
8336 if (GET_CODE (orig) == LABEL_REF
8337 || (GET_CODE (orig) == SYMBOL_REF && SYMBOL_REF_LOCAL_P (orig)))
8339 if (reg == 0)
8340 reg = gen_reg_rtx (Pmode);
8342 emit_insn (gen_symGOTOFF2reg (reg, orig));
8343 return reg;
8345 else if (GET_CODE (orig) == SYMBOL_REF)
8347 if (reg == 0)
8348 reg = gen_reg_rtx (Pmode);
8350 emit_insn (gen_symGOT2reg (reg, orig));
8351 return reg;
8353 return orig;
8356 /* Mark the use of a constant in the literal table. If the constant
8357 has multiple labels, make it unique. */
8358 static rtx
8359 mark_constant_pool_use (rtx x)
8361 rtx insn, lab, pattern;
8363 if (x == NULL)
8364 return x;
8366 switch (GET_CODE (x))
8368 case LABEL_REF:
8369 x = XEXP (x, 0);
8370 case CODE_LABEL:
8371 break;
8372 default:
8373 return x;
8376 /* Get the first label in the list of labels for the same constant
8377 and delete another labels in the list. */
8378 lab = x;
8379 for (insn = PREV_INSN (x); insn; insn = PREV_INSN (insn))
8381 if (GET_CODE (insn) != CODE_LABEL
8382 || LABEL_REFS (insn) != NEXT_INSN (insn))
8383 break;
8384 lab = insn;
8387 for (insn = LABEL_REFS (lab); insn; insn = LABEL_REFS (insn))
8388 INSN_DELETED_P (insn) = 1;
8390 /* Mark constants in a window. */
8391 for (insn = NEXT_INSN (x); insn; insn = NEXT_INSN (insn))
8393 if (GET_CODE (insn) != INSN)
8394 continue;
8396 pattern = PATTERN (insn);
8397 if (GET_CODE (pattern) != UNSPEC_VOLATILE)
8398 continue;
8400 switch (XINT (pattern, 1))
8402 case UNSPECV_CONST2:
8403 case UNSPECV_CONST4:
8404 case UNSPECV_CONST8:
8405 XVECEXP (pattern, 0, 1) = const1_rtx;
8406 break;
8407 case UNSPECV_WINDOW_END:
8408 if (XVECEXP (pattern, 0, 0) == x)
8409 return lab;
8410 break;
8411 case UNSPECV_CONST_END:
8412 return lab;
8413 default:
8414 break;
8418 return lab;
8421 /* Return true if it's possible to redirect BRANCH1 to the destination
8422 of an unconditional jump BRANCH2. We only want to do this if the
8423 resulting branch will have a short displacement. */
8424 int
8425 sh_can_redirect_branch (rtx branch1, rtx branch2)
8427 if (flag_expensive_optimizations && simplejump_p (branch2))
8429 rtx dest = XEXP (SET_SRC (single_set (branch2)), 0);
8430 rtx insn;
8431 int distance;
8433 for (distance = 0, insn = NEXT_INSN (branch1);
8434 insn && distance < 256;
8435 insn = PREV_INSN (insn))
8437 if (insn == dest)
8438 return 1;
8439 else
8440 distance += get_attr_length (insn);
8442 for (distance = 0, insn = NEXT_INSN (branch1);
8443 insn && distance < 256;
8444 insn = NEXT_INSN (insn))
8446 if (insn == dest)
8447 return 1;
8448 else
8449 distance += get_attr_length (insn);
8452 return 0;
8455 /* Return nonzero if register old_reg can be renamed to register new_reg. */
8457 sh_hard_regno_rename_ok (unsigned int old_reg ATTRIBUTE_UNUSED,
8458 unsigned int new_reg)
8460 /* Interrupt functions can only use registers that have already been
8461 saved by the prologue, even if they would normally be
8462 call-clobbered. */
8464 if (sh_cfun_interrupt_handler_p () && !regs_ever_live[new_reg])
8465 return 0;
8467 return 1;
8470 /* Function to update the integer COST
8471 based on the relationship between INSN that is dependent on
8472 DEP_INSN through the dependence LINK. The default is to make no
8473 adjustment to COST. This can be used for example to specify to
8474 the scheduler that an output- or anti-dependence does not incur
8475 the same cost as a data-dependence. The return value should be
8476 the new value for COST. */
8477 static int
8478 sh_adjust_cost (rtx insn, rtx link ATTRIBUTE_UNUSED, rtx dep_insn, int cost)
8480 rtx reg, use_pat;
8482 if (TARGET_SHMEDIA)
8484 /* On SHmedia, if the dependence is an anti-dependence or
8485 output-dependence, there is no cost. */
8486 if (REG_NOTE_KIND (link) != 0)
8487 cost = 0;
8489 if (get_attr_is_mac_media (insn)
8490 && get_attr_is_mac_media (dep_insn))
8491 cost = 1;
8493 else if (REG_NOTE_KIND (link) == 0)
8495 enum attr_type dep_type, type;
8497 if (recog_memoized (insn) < 0
8498 || recog_memoized (dep_insn) < 0)
8499 return cost;
8501 dep_type = get_attr_type (dep_insn);
8502 if (dep_type == TYPE_FLOAD || dep_type == TYPE_PCFLOAD)
8503 cost--;
8504 if ((dep_type == TYPE_LOAD_SI || dep_type == TYPE_PCLOAD_SI)
8505 && (type = get_attr_type (insn)) != TYPE_CALL
8506 && type != TYPE_SFUNC)
8507 cost--;
8509 /* The only input for a call that is timing-critical is the
8510 function's address. */
8511 if (GET_CODE(insn) == CALL_INSN)
8513 rtx call = PATTERN (insn);
8515 if (GET_CODE (call) == PARALLEL)
8516 call = XVECEXP (call, 0 ,0);
8517 if (GET_CODE (call) == SET)
8518 call = SET_SRC (call);
8519 if (GET_CODE (call) == CALL && GET_CODE (XEXP (call, 0)) == MEM
8520 && ! reg_set_p (XEXP (XEXP (call, 0), 0), dep_insn))
8521 cost = 0;
8523 /* Likewise, the most timing critical input for an sfuncs call
8524 is the function address. However, sfuncs typically start
8525 using their arguments pretty quickly.
8526 Assume a four cycle delay before they are needed. */
8527 /* All sfunc calls are parallels with at least four components.
8528 Exploit this to avoid unnecessary calls to sfunc_uses_reg. */
8529 else if (GET_CODE (PATTERN (insn)) == PARALLEL
8530 && XVECLEN (PATTERN (insn), 0) >= 4
8531 && (reg = sfunc_uses_reg (insn)))
8533 if (! reg_set_p (reg, dep_insn))
8534 cost -= 4;
8536 /* When the preceding instruction loads the shift amount of
8537 the following SHAD/SHLD, the latency of the load is increased
8538 by 1 cycle. */
8539 else if (TARGET_SH4
8540 && get_attr_type (insn) == TYPE_DYN_SHIFT
8541 && get_attr_any_int_load (dep_insn) == ANY_INT_LOAD_YES
8542 && reg_overlap_mentioned_p (SET_DEST (PATTERN (dep_insn)),
8543 XEXP (SET_SRC (single_set (insn)),
8544 1)))
8545 cost++;
8546 /* When an LS group instruction with a latency of less than
8547 3 cycles is followed by a double-precision floating-point
8548 instruction, FIPR, or FTRV, the latency of the first
8549 instruction is increased to 3 cycles. */
8550 else if (cost < 3
8551 && get_attr_insn_class (dep_insn) == INSN_CLASS_LS_GROUP
8552 && get_attr_dfp_comp (insn) == DFP_COMP_YES)
8553 cost = 3;
8554 /* The lsw register of a double-precision computation is ready one
8555 cycle earlier. */
8556 else if (reload_completed
8557 && get_attr_dfp_comp (dep_insn) == DFP_COMP_YES
8558 && (use_pat = single_set (insn))
8559 && ! regno_use_in (REGNO (SET_DEST (single_set (dep_insn))),
8560 SET_SRC (use_pat)))
8561 cost -= 1;
8563 if (get_attr_any_fp_comp (dep_insn) == ANY_FP_COMP_YES
8564 && get_attr_late_fp_use (insn) == LATE_FP_USE_YES)
8565 cost -= 1;
8567 /* An anti-dependence penalty of two applies if the first insn is a double
8568 precision fadd / fsub / fmul. */
8569 else if (REG_NOTE_KIND (link) == REG_DEP_ANTI
8570 && recog_memoized (dep_insn) >= 0
8571 && get_attr_type (dep_insn) == TYPE_DFP_ARITH
8572 /* A lot of alleged anti-flow dependences are fake,
8573 so check this one is real. */
8574 && flow_dependent_p (dep_insn, insn))
8575 cost = 2;
8578 return cost;
8581 /* Check if INSN is flow-dependent on DEP_INSN. Can also be used to check
8582 if DEP_INSN is anti-flow dependent on INSN. */
8583 static int
8584 flow_dependent_p (rtx insn, rtx dep_insn)
8586 rtx tmp = PATTERN (insn);
8588 note_stores (PATTERN (dep_insn), flow_dependent_p_1, &tmp);
8589 return tmp == NULL_RTX;
8592 /* A helper function for flow_dependent_p called through note_stores. */
8593 static void
8594 flow_dependent_p_1 (rtx x, rtx pat ATTRIBUTE_UNUSED, void *data)
8596 rtx * pinsn = (rtx *) data;
8598 if (*pinsn && reg_referenced_p (x, *pinsn))
8599 *pinsn = NULL_RTX;
8602 /* For use by ALLOCATE_INITIAL_VALUE. Note that sh.md contains some
8603 'special function' patterns (type sfunc) that clobber pr, but that
8604 do not look like function calls to leaf_function_p. Hence we must
8605 do this extra check. */
8607 sh_pr_n_sets (void)
8609 return REG_N_SETS (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG);
8612 /* This function returns "2" to indicate dual issue for the SH4
8613 processor. To be used by the DFA pipeline description. */
8614 static int
8615 sh_issue_rate (void)
8617 if (TARGET_SUPERSCALAR)
8618 return 2;
8619 else
8620 return 1;
8623 /* Functions for ready queue reordering for sched1. */
8625 /* Get weight for mode for a set x. */
8626 static short
8627 find_set_regmode_weight (rtx x, enum machine_mode mode)
8629 if (GET_CODE (x) == CLOBBER && register_operand (SET_DEST (x), mode))
8630 return 1;
8631 if (GET_CODE (x) == SET && register_operand (SET_DEST (x), mode))
8633 if (GET_CODE (SET_DEST (x)) == REG)
8635 if (!reg_mentioned_p (SET_DEST (x), SET_SRC (x)))
8636 return 1;
8637 else
8638 return 0;
8640 return 1;
8642 return 0;
8645 /* Get regmode weight for insn. */
8646 static short
8647 find_insn_regmode_weight (rtx insn, enum machine_mode mode)
8649 short reg_weight = 0;
8650 rtx x;
8652 /* Increment weight for each register born here. */
8653 x = PATTERN (insn);
8654 reg_weight += find_set_regmode_weight (x, mode);
8655 if (GET_CODE (x) == PARALLEL)
8657 int j;
8658 for (j = XVECLEN (x, 0) - 1; j >= 0; j--)
8660 x = XVECEXP (PATTERN (insn), 0, j);
8661 reg_weight += find_set_regmode_weight (x, mode);
8664 /* Decrement weight for each register that dies here. */
8665 for (x = REG_NOTES (insn); x; x = XEXP (x, 1))
8667 if (REG_NOTE_KIND (x) == REG_DEAD || REG_NOTE_KIND (x) == REG_UNUSED)
8669 rtx note = XEXP (x, 0);
8670 if (GET_CODE (note) == REG && GET_MODE (note) == mode)
8671 reg_weight--;
8674 return reg_weight;
8677 /* Calculate regmode weights for all insns of a basic block. */
8678 static void
8679 find_regmode_weight (int b, enum machine_mode mode)
8681 rtx insn, next_tail, head, tail;
8683 get_block_head_tail (b, &head, &tail);
8684 next_tail = NEXT_INSN (tail);
8686 for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
8688 /* Handle register life information. */
8689 if (!INSN_P (insn))
8690 continue;
8692 if (mode == SFmode)
8693 INSN_REGMODE_WEIGHT (insn, mode) =
8694 find_insn_regmode_weight (insn, mode) + 2 * find_insn_regmode_weight (insn, DFmode);
8695 else if (mode == SImode)
8696 INSN_REGMODE_WEIGHT (insn, mode) =
8697 find_insn_regmode_weight (insn, mode) + 2 * find_insn_regmode_weight (insn, DImode);
8701 /* Comparison function for ready queue sorting. */
8702 static int
8703 rank_for_reorder (const void *x, const void *y)
8705 rtx tmp = *(const rtx *) y;
8706 rtx tmp2 = *(const rtx *) x;
8708 /* The insn in a schedule group should be issued the first. */
8709 if (SCHED_GROUP_P (tmp) != SCHED_GROUP_P (tmp2))
8710 return SCHED_GROUP_P (tmp2) ? 1 : -1;
8712 /* If insns are equally good, sort by INSN_LUID (original insn order), This
8713 minimizes instruction movement, thus minimizing sched's effect on
8714 register pressure. */
8715 return INSN_LUID (tmp) - INSN_LUID (tmp2);
8718 /* Resort the array A in which only element at index N may be out of order. */
8719 static void
8720 swap_reorder (rtx *a, int n)
8722 rtx insn = a[n - 1];
8723 int i = n - 2;
8725 while (i >= 0 && rank_for_reorder (a + i, &insn) >= 0)
8727 a[i + 1] = a[i];
8728 i -= 1;
8730 a[i + 1] = insn;
8733 #define SCHED_REORDER(READY, N_READY) \
8734 do \
8736 if ((N_READY) == 2) \
8737 swap_reorder (READY, N_READY); \
8738 else if ((N_READY) > 2) \
8739 qsort (READY, N_READY, sizeof (rtx), rank_for_reorder); \
8741 while (0)
8743 /* Sort the ready list READY by ascending priority, using the SCHED_REORDER
8744 macro. */
8745 static void
8746 ready_reorder (rtx *ready, int nready)
8748 SCHED_REORDER (ready, nready);
8751 /* Calculate regmode weights for all insns of all basic block. */
8752 static void
8753 sh_md_init_global (FILE *dump ATTRIBUTE_UNUSED,
8754 int verbose ATTRIBUTE_UNUSED,
8755 int old_max_uid)
8757 basic_block b;
8759 regmode_weight[0] = (short *) xcalloc (old_max_uid, sizeof (short));
8760 regmode_weight[1] = (short *) xcalloc (old_max_uid, sizeof (short));
8762 FOR_EACH_BB_REVERSE (b)
8764 find_regmode_weight (b->index, SImode);
8765 find_regmode_weight (b->index, SFmode);
8768 CURR_REGMODE_PRESSURE (SImode) = 0;
8769 CURR_REGMODE_PRESSURE (SFmode) = 0;
8773 /* Cleanup. */
8774 static void
8775 sh_md_finish_global (FILE *dump ATTRIBUTE_UNUSED,
8776 int verbose ATTRIBUTE_UNUSED)
8778 if (regmode_weight[0])
8780 free (regmode_weight[0]);
8781 regmode_weight[0] = NULL;
8783 if (regmode_weight[1])
8785 free (regmode_weight[1]);
8786 regmode_weight[1] = NULL;
8790 /* Cache the can_issue_more so that we can return it from reorder2. Also,
8791 keep count of register pressures on SImode and SFmode. */
8792 static int
8793 sh_variable_issue (FILE *dump ATTRIBUTE_UNUSED,
8794 int sched_verbose ATTRIBUTE_UNUSED,
8795 rtx insn,
8796 int can_issue_more)
8798 if (GET_CODE (PATTERN (insn)) != USE
8799 && GET_CODE (PATTERN (insn)) != CLOBBER)
8800 cached_can_issue_more = can_issue_more - 1;
8801 else
8802 cached_can_issue_more = can_issue_more;
8804 if (reload_completed)
8805 return cached_can_issue_more;
8807 CURR_REGMODE_PRESSURE (SImode) += INSN_REGMODE_WEIGHT (insn, SImode);
8808 CURR_REGMODE_PRESSURE (SFmode) += INSN_REGMODE_WEIGHT (insn, SFmode);
8810 return cached_can_issue_more;
8813 static void
8814 sh_md_init (FILE *dump ATTRIBUTE_UNUSED,
8815 int verbose ATTRIBUTE_UNUSED,
8816 int veclen ATTRIBUTE_UNUSED)
8818 CURR_REGMODE_PRESSURE (SImode) = 0;
8819 CURR_REGMODE_PRESSURE (SFmode) = 0;
8822 /* Some magic numbers. */
8823 /* Pressure on register r0 can lead to spill failures. so avoid sched1 for
8824 functions that already have high pressure on r0. */
8825 #define R0_MAX_LIFE_REGIONS 2
8826 #define R0_MAX_LIVE_LENGTH 12
8827 /* Register Pressure thresholds for SImode and SFmode registers. */
8828 #define SIMODE_MAX_WEIGHT 5
8829 #define SFMODE_MAX_WEIGHT 10
8831 /* Return true if the pressure is high for MODE. */
8832 static short
8833 high_pressure (enum machine_mode mode)
8835 /* Pressure on register r0 can lead to spill failures. so avoid sched1 for
8836 functions that already have high pressure on r0. */
8837 if ((REG_N_SETS (0) - REG_N_DEATHS (0)) >= R0_MAX_LIFE_REGIONS
8838 && REG_LIVE_LENGTH (0) >= R0_MAX_LIVE_LENGTH)
8839 return 1;
8841 if (mode == SFmode)
8842 return (CURR_REGMODE_PRESSURE (SFmode) > SFMODE_MAX_WEIGHT);
8843 else
8844 return (CURR_REGMODE_PRESSURE (SImode) > SIMODE_MAX_WEIGHT);
8847 /* Reorder ready queue if register pressure is high. */
8848 static int
8849 sh_reorder (FILE *dump ATTRIBUTE_UNUSED,
8850 int sched_verbose ATTRIBUTE_UNUSED,
8851 rtx *ready,
8852 int *n_readyp,
8853 int clock_var ATTRIBUTE_UNUSED)
8855 if (reload_completed)
8856 return sh_issue_rate ();
8858 if (high_pressure (SFmode) || high_pressure (SImode))
8860 ready_reorder (ready, *n_readyp);
8863 return sh_issue_rate ();
8866 /* Skip cycles if the current register pressure is high. */
8867 static int
8868 sh_reorder2 (FILE *dump ATTRIBUTE_UNUSED,
8869 int sched_verbose ATTRIBUTE_UNUSED,
8870 rtx *ready ATTRIBUTE_UNUSED,
8871 int *n_readyp ATTRIBUTE_UNUSED,
8872 int clock_var ATTRIBUTE_UNUSED)
8874 if (reload_completed)
8875 return cached_can_issue_more;
8877 if (high_pressure(SFmode) || high_pressure (SImode))
8878 skip_cycles = 1;
8880 return cached_can_issue_more;
8883 /* Skip cycles without sorting the ready queue. This will move insn from
8884 Q->R. If this is the last cycle we are skipping; allow sorting of ready
8885 queue by sh_reorder. */
8887 /* Generally, skipping these many cycles are sufficient for all insns to move
8888 from Q -> R. */
8889 #define MAX_SKIPS 8
8891 static int
8892 sh_dfa_new_cycle (FILE *sched_dump ATTRIBUTE_UNUSED,
8893 int sched_verbose ATTRIBUTE_UNUSED,
8894 rtx insn ATTRIBUTE_UNUSED,
8895 int last_clock_var,
8896 int clock_var,
8897 int *sort_p)
8899 if (reload_completed)
8900 return 0;
8902 if (skip_cycles)
8904 if ((clock_var - last_clock_var) < MAX_SKIPS)
8906 *sort_p = 0;
8907 return 1;
8909 /* If this is the last cycle we are skipping, allow reordering of R. */
8910 if ((clock_var - last_clock_var) == MAX_SKIPS)
8912 *sort_p = 1;
8913 return 1;
8917 skip_cycles = 0;
8919 return 0;
8922 /* SHmedia requires registers for branches, so we can't generate new
8923 branches past reload. */
8924 static bool
8925 sh_cannot_modify_jumps_p (void)
8927 return (TARGET_SHMEDIA && (reload_in_progress || reload_completed));
8930 static int
8931 sh_target_reg_class (void)
8933 return TARGET_SHMEDIA ? TARGET_REGS : NO_REGS;
8936 static bool
8937 sh_optimize_target_register_callee_saved (bool after_prologue_epilogue_gen)
8939 return (shmedia_space_reserved_for_target_registers
8940 && (! after_prologue_epilogue_gen || TARGET_SAVE_ALL_TARGET_REGS));
8943 static bool
8944 sh_ms_bitfield_layout_p (tree record_type ATTRIBUTE_UNUSED)
8946 return (TARGET_SH5 || TARGET_HITACHI || sh_attr_renesas_p (record_type));
8950 On the SH1..SH4, the trampoline looks like
8951 2 0002 D202 mov.l l2,r2
8952 1 0000 D301 mov.l l1,r3
8953 3 0004 422B jmp @r2
8954 4 0006 0009 nop
8955 5 0008 00000000 l1: .long area
8956 6 000c 00000000 l2: .long function
8958 SH5 (compact) uses r1 instead of r3 for the static chain. */
8961 /* Emit RTL insns to initialize the variable parts of a trampoline.
8962 FNADDR is an RTX for the address of the function's pure code.
8963 CXT is an RTX for the static chain value for the function. */
8965 void
8966 sh_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
8968 if (TARGET_SHMEDIA64)
8970 rtx tramp_templ;
8971 int fixed_len;
8973 rtx movi1 = GEN_INT (0xcc000010);
8974 rtx shori1 = GEN_INT (0xc8000010);
8975 rtx src, dst;
8977 /* The following trampoline works within a +- 128 KB range for cxt:
8978 ptb/u cxt,tr1; movi fnaddr >> 48,r0; shori fnaddr >> 32,r0;
8979 shori fnaddr >> 16,r0; shori fnaddr,r0; ptabs/l r0,tr0
8980 gettr tr1,r1; blink tr0,r63 */
8981 /* Address rounding makes it hard to compute the exact bounds of the
8982 offset for this trampoline, but we have a rather generous offset
8983 range, so frame_offset should do fine as an upper bound. */
8984 if (cxt == virtual_stack_vars_rtx && frame_offset < 0x20000)
8986 /* ??? could optimize this trampoline initialization
8987 by writing DImode words with two insns each. */
8988 rtx mask = force_reg (DImode, GEN_INT (0x3fffc00));
8989 rtx insn = gen_rtx_MINUS (DImode, cxt, tramp);
8990 insn = gen_rtx_ASHIFT (DImode, insn, GEN_INT (10-2));
8991 insn = gen_rtx_AND (DImode, insn, mask);
8992 /* Or in ptb/u .,tr1 pattern */
8993 insn = gen_rtx_IOR (DImode, insn, gen_int_mode (0xec000010, SImode));
8994 insn = force_operand (insn, NULL_RTX);
8995 insn = gen_lowpart (SImode, insn);
8996 emit_move_insn (gen_rtx_MEM (SImode, tramp), insn);
8997 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (38));
8998 insn = gen_rtx_AND (DImode, insn, mask);
8999 insn = force_operand (gen_rtx_IOR (DImode, movi1, insn), NULL_RTX);
9000 insn = gen_lowpart (SImode, insn);
9001 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 4)), insn);
9002 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (22));
9003 insn = gen_rtx_AND (DImode, insn, mask);
9004 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
9005 insn = gen_lowpart (SImode, insn);
9006 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 8)), insn);
9007 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (6));
9008 insn = gen_rtx_AND (DImode, insn, mask);
9009 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
9010 insn = gen_lowpart (SImode, insn);
9011 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 12)),
9012 insn);
9013 insn = gen_rtx_ASHIFT (DImode, fnaddr, GEN_INT (10));
9014 insn = gen_rtx_AND (DImode, insn, mask);
9015 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
9016 insn = gen_lowpart (SImode, insn);
9017 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 16)),
9018 insn);
9019 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 20)),
9020 GEN_INT (0x6bf10600));
9021 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 24)),
9022 GEN_INT (0x4415fc10));
9023 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 28)),
9024 GEN_INT (0x4401fff0));
9025 emit_insn (gen_ic_invalidate_line (tramp));
9026 return;
9028 tramp_templ = gen_rtx_SYMBOL_REF (Pmode,"__GCC_nested_trampoline");
9029 fixed_len = TRAMPOLINE_SIZE - 2 * GET_MODE_SIZE (Pmode);
9031 tramp_templ = gen_datalabel_ref (tramp_templ);
9032 dst = gen_rtx_MEM (BLKmode, tramp);
9033 src = gen_rtx_MEM (BLKmode, tramp_templ);
9034 set_mem_align (dst, 256);
9035 set_mem_align (src, 64);
9036 emit_block_move (dst, src, GEN_INT (fixed_len), BLOCK_OP_NORMAL);
9038 emit_move_insn (gen_rtx_MEM (Pmode, plus_constant (tramp, fixed_len)),
9039 fnaddr);
9040 emit_move_insn (gen_rtx_MEM (Pmode,
9041 plus_constant (tramp,
9042 fixed_len
9043 + GET_MODE_SIZE (Pmode))),
9044 cxt);
9045 emit_insn (gen_ic_invalidate_line (tramp));
9046 return;
9048 else if (TARGET_SHMEDIA)
9050 /* movi fnaddr >> 16,r1; shori fnaddr,r1; ptabs/l r1,tr0
9051 movi cxt >> 16,r1; shori cxt,r1; blink tr0,r63 */
9052 rtx quad0 = gen_reg_rtx (DImode), cxtload = gen_reg_rtx (DImode);
9053 rtx quad1 = gen_reg_rtx (DImode), quad2 = gen_reg_rtx (DImode);
9054 /* movi 0,r1: 0xcc000010 shori 0,r1: c8000010 concatenated,
9055 rotated 10 right, and higher 16 bit of every 32 selected. */
9056 rtx movishori
9057 = force_reg (V2HImode, (simplify_gen_subreg
9058 (V2HImode, GEN_INT (0x4330432), SImode, 0)));
9059 rtx ptabs = force_reg (DImode, GEN_INT (0x6bf10600));
9060 rtx blink = force_reg (DImode, GEN_INT (0x4401fff0));
9062 tramp = force_reg (Pmode, tramp);
9063 fnaddr = force_reg (SImode, fnaddr);
9064 cxt = force_reg (SImode, cxt);
9065 emit_insn (gen_mshflo_w_x (gen_rtx_SUBREG (V4HImode, quad0, 0),
9066 gen_rtx_SUBREG (V2HImode, fnaddr, 0),
9067 movishori));
9068 emit_insn (gen_rotrdi3_mextr (quad0, quad0,
9069 GEN_INT (TARGET_LITTLE_ENDIAN ? 24 : 56)));
9070 emit_insn (gen_ashldi3_media (quad0, quad0, const2_rtx));
9071 emit_move_insn (gen_rtx_MEM (DImode, tramp), quad0);
9072 emit_insn (gen_mshflo_w_x (gen_rtx_SUBREG (V4HImode, cxtload, 0),
9073 gen_rtx_SUBREG (V2HImode, cxt, 0),
9074 movishori));
9075 emit_insn (gen_rotrdi3_mextr (cxtload, cxtload,
9076 GEN_INT (TARGET_LITTLE_ENDIAN ? 24 : 56)));
9077 emit_insn (gen_ashldi3_media (cxtload, cxtload, const2_rtx));
9078 if (TARGET_LITTLE_ENDIAN)
9080 emit_insn (gen_mshflo_l_di (quad1, ptabs, cxtload));
9081 emit_insn (gen_mextr4 (quad2, cxtload, blink));
9083 else
9085 emit_insn (gen_mextr4 (quad1, cxtload, ptabs));
9086 emit_insn (gen_mshflo_l_di (quad2, blink, cxtload));
9088 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, 8)), quad1);
9089 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, 16)), quad2);
9090 emit_insn (gen_ic_invalidate_line (tramp));
9091 return;
9093 else if (TARGET_SHCOMPACT)
9095 emit_insn (gen_initialize_trampoline (tramp, cxt, fnaddr));
9096 return;
9098 emit_move_insn (gen_rtx_MEM (SImode, tramp),
9099 gen_int_mode (TARGET_LITTLE_ENDIAN ? 0xd301d202 : 0xd202d301,
9100 SImode));
9101 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 4)),
9102 gen_int_mode (TARGET_LITTLE_ENDIAN ? 0x0009422b : 0x422b0009,
9103 SImode));
9104 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 8)),
9105 cxt);
9106 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 12)),
9107 fnaddr);
9108 if (TARGET_HARVARD)
9110 if (TARGET_USERMODE)
9111 emit_library_call (function_symbol ("__ic_invalidate"),
9112 0, VOIDmode, 1, tramp, SImode);
9113 else
9114 emit_insn (gen_ic_invalidate_line (tramp));
9118 /* FIXME: This is overly conservative. A SHcompact function that
9119 receives arguments ``by reference'' will have them stored in its
9120 own stack frame, so it must not pass pointers or references to
9121 these arguments to other functions by means of sibling calls. */
9122 static bool
9123 sh_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
9125 return (decl
9126 && (! TARGET_SHCOMPACT
9127 || current_function_args_info.stack_regs == 0)
9128 && ! sh_cfun_interrupt_handler_p ());
9131 /* Machine specific built-in functions. */
9133 struct builtin_description
9135 const enum insn_code icode;
9136 const char *const name;
9137 int signature;
9140 /* describe number and signedness of arguments; arg[0] == result
9141 (1: unsigned, 2: signed, 4: don't care, 8: pointer 0: no argument */
9142 static const char signature_args[][4] =
9144 #define SH_BLTIN_V2SI2 0
9145 { 4, 4 },
9146 #define SH_BLTIN_V4HI2 1
9147 { 4, 4 },
9148 #define SH_BLTIN_V2SI3 2
9149 { 4, 4, 4 },
9150 #define SH_BLTIN_V4HI3 3
9151 { 4, 4, 4 },
9152 #define SH_BLTIN_V8QI3 4
9153 { 4, 4, 4 },
9154 #define SH_BLTIN_MAC_HISI 5
9155 { 1, 4, 4, 1 },
9156 #define SH_BLTIN_SH_HI 6
9157 { 4, 4, 1 },
9158 #define SH_BLTIN_SH_SI 7
9159 { 4, 4, 1 },
9160 #define SH_BLTIN_V4HI2V2SI 8
9161 { 4, 4, 4 },
9162 #define SH_BLTIN_V4HI2V8QI 9
9163 { 4, 4, 4 },
9164 #define SH_BLTIN_SISF 10
9165 { 4, 2 },
9166 #define SH_BLTIN_LDUA_L 11
9167 { 2, 8 },
9168 #define SH_BLTIN_LDUA_Q 12
9169 { 1, 8 },
9170 #define SH_BLTIN_STUA_L 13
9171 { 0, 8, 2 },
9172 #define SH_BLTIN_STUA_Q 14
9173 { 0, 8, 1 },
9174 #define SH_BLTIN_UDI 15
9175 { 0, 8, 1 },
9176 #define SH_BLTIN_NUM_SHARED_SIGNATURES 16
9177 #define SH_BLTIN_2 16
9178 #define SH_BLTIN_SU 16
9179 { 1, 2 },
9180 #define SH_BLTIN_3 17
9181 #define SH_BLTIN_SUS 17
9182 { 2, 2, 1 },
9183 #define SH_BLTIN_PSSV 18
9184 { 0, 8, 2, 2 },
9185 #define SH_BLTIN_XXUU 19
9186 #define SH_BLTIN_UUUU 19
9187 { 1, 1, 1, 1 },
9188 #define SH_BLTIN_PV 20
9189 { 0, 8 },
9191 /* mcmv: operands considered unsigned. */
9192 /* mmulsum_wq, msad_ubq: result considered unsigned long long. */
9193 /* mperm: control value considered unsigned int. */
9194 /* mshalds, mshard, mshards, mshlld, mshlrd: shift count is unsigned int. */
9195 /* mshards_q: returns signed short. */
9196 /* nsb: takes long long arg, returns unsigned char. */
9197 static const struct builtin_description bdesc[] =
9199 { CODE_FOR_absv2si2, "__builtin_absv2si2", SH_BLTIN_V2SI2 },
9200 { CODE_FOR_absv4hi2, "__builtin_absv4hi2", SH_BLTIN_V4HI2 },
9201 { CODE_FOR_addv2si3, "__builtin_addv2si3", SH_BLTIN_V2SI3 },
9202 { CODE_FOR_addv4hi3, "__builtin_addv4hi3", SH_BLTIN_V4HI3 },
9203 { CODE_FOR_ssaddv2si3,"__builtin_ssaddv2si3", SH_BLTIN_V2SI3 },
9204 { CODE_FOR_usaddv8qi3,"__builtin_usaddv8qi3", SH_BLTIN_V8QI3 },
9205 { CODE_FOR_ssaddv4hi3,"__builtin_ssaddv4hi3", SH_BLTIN_V4HI3 },
9206 #if 0
9207 { CODE_FOR_alloco32, "__builtin_sh_media_ALLOCO", SH_BLTIN_PV },
9208 { CODE_FOR_alloco64, "__builtin_sh_media_ALLOCO", SH_BLTIN_PV },
9209 #endif
9210 { CODE_FOR_negcmpeqv8qi,"__builtin_sh_media_MCMPEQ_B", SH_BLTIN_V8QI3 },
9211 { CODE_FOR_negcmpeqv2si,"__builtin_sh_media_MCMPEQ_L", SH_BLTIN_V2SI3 },
9212 { CODE_FOR_negcmpeqv4hi,"__builtin_sh_media_MCMPEQ_W", SH_BLTIN_V4HI3 },
9213 { CODE_FOR_negcmpgtuv8qi,"__builtin_sh_media_MCMPGT_UB", SH_BLTIN_V8QI3 },
9214 { CODE_FOR_negcmpgtv2si,"__builtin_sh_media_MCMPGT_L", SH_BLTIN_V2SI3 },
9215 { CODE_FOR_negcmpgtv4hi,"__builtin_sh_media_MCMPGT_W", SH_BLTIN_V4HI3 },
9216 { CODE_FOR_mcmv, "__builtin_sh_media_MCMV", SH_BLTIN_UUUU },
9217 { CODE_FOR_mcnvs_lw, "__builtin_sh_media_MCNVS_LW", SH_BLTIN_3 },
9218 { CODE_FOR_mcnvs_wb, "__builtin_sh_media_MCNVS_WB", SH_BLTIN_V4HI2V8QI },
9219 { CODE_FOR_mcnvs_wub, "__builtin_sh_media_MCNVS_WUB", SH_BLTIN_V4HI2V8QI },
9220 { CODE_FOR_mextr1, "__builtin_sh_media_MEXTR1", SH_BLTIN_UDI },
9221 { CODE_FOR_mextr2, "__builtin_sh_media_MEXTR2", SH_BLTIN_UDI },
9222 { CODE_FOR_mextr3, "__builtin_sh_media_MEXTR3", SH_BLTIN_UDI },
9223 { CODE_FOR_mextr4, "__builtin_sh_media_MEXTR4", SH_BLTIN_UDI },
9224 { CODE_FOR_mextr5, "__builtin_sh_media_MEXTR5", SH_BLTIN_UDI },
9225 { CODE_FOR_mextr6, "__builtin_sh_media_MEXTR6", SH_BLTIN_UDI },
9226 { CODE_FOR_mextr7, "__builtin_sh_media_MEXTR7", SH_BLTIN_UDI },
9227 { CODE_FOR_mmacfx_wl, "__builtin_sh_media_MMACFX_WL", SH_BLTIN_MAC_HISI },
9228 { CODE_FOR_mmacnfx_wl,"__builtin_sh_media_MMACNFX_WL", SH_BLTIN_MAC_HISI },
9229 { CODE_FOR_mulv2si3, "__builtin_mulv2si3", SH_BLTIN_V2SI3, },
9230 { CODE_FOR_mulv4hi3, "__builtin_mulv4hi3", SH_BLTIN_V4HI3 },
9231 { CODE_FOR_mmulfx_l, "__builtin_sh_media_MMULFX_L", SH_BLTIN_V2SI3 },
9232 { CODE_FOR_mmulfx_w, "__builtin_sh_media_MMULFX_W", SH_BLTIN_V4HI3 },
9233 { CODE_FOR_mmulfxrp_w,"__builtin_sh_media_MMULFXRP_W", SH_BLTIN_V4HI3 },
9234 { CODE_FOR_mmulhi_wl, "__builtin_sh_media_MMULHI_WL", SH_BLTIN_V4HI2V2SI },
9235 { CODE_FOR_mmullo_wl, "__builtin_sh_media_MMULLO_WL", SH_BLTIN_V4HI2V2SI },
9236 { CODE_FOR_mmulsum_wq,"__builtin_sh_media_MMULSUM_WQ", SH_BLTIN_XXUU },
9237 { CODE_FOR_mperm_w, "__builtin_sh_media_MPERM_W", SH_BLTIN_SH_HI },
9238 { CODE_FOR_msad_ubq, "__builtin_sh_media_MSAD_UBQ", SH_BLTIN_XXUU },
9239 { CODE_FOR_mshalds_l, "__builtin_sh_media_MSHALDS_L", SH_BLTIN_SH_SI },
9240 { CODE_FOR_mshalds_w, "__builtin_sh_media_MSHALDS_W", SH_BLTIN_SH_HI },
9241 { CODE_FOR_ashrv2si3, "__builtin_ashrv2si3", SH_BLTIN_SH_SI },
9242 { CODE_FOR_ashrv4hi3, "__builtin_ashrv4hi3", SH_BLTIN_SH_HI },
9243 { CODE_FOR_mshards_q, "__builtin_sh_media_MSHARDS_Q", SH_BLTIN_SUS },
9244 { CODE_FOR_mshfhi_b, "__builtin_sh_media_MSHFHI_B", SH_BLTIN_V8QI3 },
9245 { CODE_FOR_mshfhi_l, "__builtin_sh_media_MSHFHI_L", SH_BLTIN_V2SI3 },
9246 { CODE_FOR_mshfhi_w, "__builtin_sh_media_MSHFHI_W", SH_BLTIN_V4HI3 },
9247 { CODE_FOR_mshflo_b, "__builtin_sh_media_MSHFLO_B", SH_BLTIN_V8QI3 },
9248 { CODE_FOR_mshflo_l, "__builtin_sh_media_MSHFLO_L", SH_BLTIN_V2SI3 },
9249 { CODE_FOR_mshflo_w, "__builtin_sh_media_MSHFLO_W", SH_BLTIN_V4HI3 },
9250 { CODE_FOR_ashlv2si3, "__builtin_ashlv2si3", SH_BLTIN_SH_SI },
9251 { CODE_FOR_ashlv4hi3, "__builtin_ashlv4hi3", SH_BLTIN_SH_HI },
9252 { CODE_FOR_lshrv2si3, "__builtin_lshrv2si3", SH_BLTIN_SH_SI },
9253 { CODE_FOR_lshrv4hi3, "__builtin_lshrv4hi3", SH_BLTIN_SH_HI },
9254 { CODE_FOR_subv2si3, "__builtin_subv2si3", SH_BLTIN_V2SI3 },
9255 { CODE_FOR_subv4hi3, "__builtin_subv4hi3", SH_BLTIN_V4HI3 },
9256 { CODE_FOR_sssubv2si3,"__builtin_sssubv2si3", SH_BLTIN_V2SI3 },
9257 { CODE_FOR_ussubv8qi3,"__builtin_ussubv8qi3", SH_BLTIN_V8QI3 },
9258 { CODE_FOR_sssubv4hi3,"__builtin_sssubv4hi3", SH_BLTIN_V4HI3 },
9259 { CODE_FOR_fcosa_s, "__builtin_sh_media_FCOSA_S", SH_BLTIN_SISF },
9260 { CODE_FOR_fsina_s, "__builtin_sh_media_FSINA_S", SH_BLTIN_SISF },
9261 { CODE_FOR_fipr, "__builtin_sh_media_FIPR_S", SH_BLTIN_3 },
9262 { CODE_FOR_ftrv, "__builtin_sh_media_FTRV_S", SH_BLTIN_3 },
9263 { CODE_FOR_fsrra_s, "__builtin_sh_media_FSRRA_S", SH_BLTIN_2 },
9264 #if 0
9265 { CODE_FOR_ldhi_l, "__builtin_sh_media_LDHI_L", SH_BLTIN_LDUA_L },
9266 { CODE_FOR_ldhi_q, "__builtin_sh_media_LDHI_Q", SH_BLTIN_LDUA_Q },
9267 { CODE_FOR_ldlo_l, "__builtin_sh_media_LDLO_L", SH_BLTIN_LDUA_L },
9268 { CODE_FOR_ldlo_q, "__builtin_sh_media_LDLO_Q", SH_BLTIN_LDUA_Q },
9269 { CODE_FOR_sthi_l, "__builtin_sh_media_STHI_L", SH_BLTIN_STUA_L },
9270 { CODE_FOR_sthi_q, "__builtin_sh_media_STHI_Q", SH_BLTIN_STUA_Q },
9271 { CODE_FOR_stlo_l, "__builtin_sh_media_STLO_L", SH_BLTIN_STUA_L },
9272 { CODE_FOR_stlo_q, "__builtin_sh_media_STLO_Q", SH_BLTIN_STUA_Q },
9273 { CODE_FOR_ldhi_l64, "__builtin_sh_media_LDHI_L", SH_BLTIN_LDUA_L },
9274 { CODE_FOR_ldhi_q64, "__builtin_sh_media_LDHI_Q", SH_BLTIN_LDUA_Q },
9275 { CODE_FOR_ldlo_l64, "__builtin_sh_media_LDLO_L", SH_BLTIN_LDUA_L },
9276 { CODE_FOR_ldlo_q64, "__builtin_sh_media_LDLO_Q", SH_BLTIN_LDUA_Q },
9277 { CODE_FOR_sthi_l64, "__builtin_sh_media_STHI_L", SH_BLTIN_STUA_L },
9278 { CODE_FOR_sthi_q64, "__builtin_sh_media_STHI_Q", SH_BLTIN_STUA_Q },
9279 { CODE_FOR_stlo_l64, "__builtin_sh_media_STLO_L", SH_BLTIN_STUA_L },
9280 { CODE_FOR_stlo_q64, "__builtin_sh_media_STLO_Q", SH_BLTIN_STUA_Q },
9281 #endif
9282 { CODE_FOR_nsb, "__builtin_sh_media_NSB", SH_BLTIN_SU },
9283 { CODE_FOR_byterev, "__builtin_sh_media_BYTEREV", SH_BLTIN_2 },
9284 #if 0
9285 { CODE_FOR_prefetch32,"__builtin_sh_media_PREFO", SH_BLTIN_PSSV },
9286 { CODE_FOR_prefetch64,"__builtin_sh_media_PREFO", SH_BLTIN_PSSV }
9287 #endif
9290 static void
9291 sh_media_init_builtins (void)
9293 tree shared[SH_BLTIN_NUM_SHARED_SIGNATURES];
9294 const struct builtin_description *d;
9296 memset (shared, 0, sizeof shared);
9297 for (d = bdesc; d - bdesc < (int) ARRAY_SIZE (bdesc); d++)
9299 tree type, arg_type;
9300 int signature = d->signature;
9301 int i;
9303 if (signature < SH_BLTIN_NUM_SHARED_SIGNATURES && shared[signature])
9304 type = shared[signature];
9305 else
9307 int has_result = signature_args[signature][0] != 0;
9309 if (signature_args[signature][1] == 8
9310 && (insn_data[d->icode].operand[has_result].mode != Pmode))
9311 continue;
9312 if (! TARGET_FPU_ANY
9313 && FLOAT_MODE_P (insn_data[d->icode].operand[0].mode))
9314 continue;
9315 type = void_list_node;
9316 for (i = 3; ; i--)
9318 int arg = signature_args[signature][i];
9319 int opno = i - 1 + has_result;
9321 if (arg == 8)
9322 arg_type = ptr_type_node;
9323 else if (arg)
9324 arg_type = ((*lang_hooks.types.type_for_mode)
9325 (insn_data[d->icode].operand[opno].mode,
9326 (arg & 1)));
9327 else if (i)
9328 continue;
9329 else
9330 arg_type = void_type_node;
9331 if (i == 0)
9332 break;
9333 type = tree_cons (NULL_TREE, arg_type, type);
9335 type = build_function_type (arg_type, type);
9336 if (signature < SH_BLTIN_NUM_SHARED_SIGNATURES)
9337 shared[signature] = type;
9339 lang_hooks.builtin_function (d->name, type, d - bdesc, BUILT_IN_MD,
9340 NULL, NULL_TREE);
9344 static void
9345 sh_init_builtins (void)
9347 if (TARGET_SHMEDIA)
9348 sh_media_init_builtins ();
9351 /* Expand an expression EXP that calls a built-in function,
9352 with result going to TARGET if that's convenient
9353 (and in mode MODE if that's convenient).
9354 SUBTARGET may be used as the target for computing one of EXP's operands.
9355 IGNORE is nonzero if the value is to be ignored. */
9357 static rtx
9358 sh_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
9359 enum machine_mode mode ATTRIBUTE_UNUSED, int ignore)
9361 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
9362 tree arglist = TREE_OPERAND (exp, 1);
9363 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
9364 const struct builtin_description *d = &bdesc[fcode];
9365 enum insn_code icode = d->icode;
9366 int signature = d->signature;
9367 enum machine_mode tmode = VOIDmode;
9368 int nop = 0, i;
9369 rtx op[4];
9370 rtx pat;
9372 if (signature_args[signature][0])
9374 if (ignore)
9375 return 0;
9377 tmode = insn_data[icode].operand[0].mode;
9378 if (! target
9379 || GET_MODE (target) != tmode
9380 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
9381 target = gen_reg_rtx (tmode);
9382 op[nop++] = target;
9384 else
9385 target = 0;
9387 for (i = 1; i <= 3; i++, nop++)
9389 tree arg;
9390 enum machine_mode opmode, argmode;
9392 if (! signature_args[signature][i])
9393 break;
9394 arg = TREE_VALUE (arglist);
9395 if (arg == error_mark_node)
9396 return const0_rtx;
9397 arglist = TREE_CHAIN (arglist);
9398 opmode = insn_data[icode].operand[nop].mode;
9399 argmode = TYPE_MODE (TREE_TYPE (arg));
9400 if (argmode != opmode)
9401 arg = build1 (NOP_EXPR,
9402 (*lang_hooks.types.type_for_mode) (opmode, 0), arg);
9403 op[nop] = expand_expr (arg, NULL_RTX, opmode, 0);
9404 if (! (*insn_data[icode].operand[nop].predicate) (op[nop], opmode))
9405 op[nop] = copy_to_mode_reg (opmode, op[nop]);
9408 switch (nop)
9410 case 1:
9411 pat = (*insn_data[d->icode].genfun) (op[0]);
9412 break;
9413 case 2:
9414 pat = (*insn_data[d->icode].genfun) (op[0], op[1]);
9415 break;
9416 case 3:
9417 pat = (*insn_data[d->icode].genfun) (op[0], op[1], op[2]);
9418 break;
9419 case 4:
9420 pat = (*insn_data[d->icode].genfun) (op[0], op[1], op[2], op[3]);
9421 break;
9422 default:
9423 abort ();
9425 if (! pat)
9426 return 0;
9427 emit_insn (pat);
9428 return target;
9431 void
9432 sh_expand_unop_v2sf (enum rtx_code code, rtx op0, rtx op1)
9434 rtx sel0 = const0_rtx;
9435 rtx sel1 = const1_rtx;
9436 rtx (*fn) (rtx, rtx, rtx, rtx, rtx) = gen_unary_sf_op;
9437 rtx op = gen_rtx_fmt_e (code, SFmode, op1);
9439 emit_insn ((*fn) (op0, op1, op, sel0, sel0));
9440 emit_insn ((*fn) (op0, op1, op, sel1, sel1));
9443 void
9444 sh_expand_binop_v2sf (enum rtx_code code, rtx op0, rtx op1, rtx op2)
9446 rtx sel0 = const0_rtx;
9447 rtx sel1 = const1_rtx;
9448 rtx (*fn) (rtx, rtx, rtx, rtx, rtx, rtx, rtx, rtx)
9449 = gen_binary_sf_op;
9450 rtx op = gen_rtx_fmt_ee (code, SFmode, op1, op2);
9452 emit_insn ((*fn) (op0, op1, op2, op, sel0, sel0, sel0, sel1));
9453 emit_insn ((*fn) (op0, op1, op2, op, sel1, sel1, sel1, sel0));
9456 /* Return the class of registers for which a mode change from FROM to TO
9457 is invalid. */
9458 bool
9459 sh_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
9460 enum reg_class class)
9462 /* We want to enable the use of SUBREGs as a means to
9463 VEC_SELECT a single element of a vector. */
9464 if (to == SFmode && VECTOR_MODE_P (from) && GET_MODE_INNER (from) == SFmode)
9465 return (reg_classes_intersect_p (GENERAL_REGS, class));
9467 if (GET_MODE_SIZE (from) != GET_MODE_SIZE (to))
9469 if (TARGET_LITTLE_ENDIAN)
9471 if (GET_MODE_SIZE (to) < 8 || GET_MODE_SIZE (from) < 8)
9472 return reg_classes_intersect_p (DF_REGS, class);
9474 else
9476 if (GET_MODE_SIZE (from) < 8)
9477 return reg_classes_intersect_p (DF_HI_REGS, class);
9480 return 0;
9484 /* If ADDRESS refers to a CODE_LABEL, add NUSES to the number of times
9485 that label is used. */
9487 void
9488 sh_mark_label (rtx address, int nuses)
9490 if (GOTOFF_P (address))
9492 /* Extract the label or symbol. */
9493 address = XEXP (address, 0);
9494 if (GET_CODE (address) == PLUS)
9495 address = XEXP (address, 0);
9496 address = XVECEXP (address, 0, 0);
9498 if (GET_CODE (address) == LABEL_REF
9499 && GET_CODE (XEXP (address, 0)) == CODE_LABEL)
9500 LABEL_NUSES (XEXP (address, 0)) += nuses;
9503 /* Compute extra cost of moving data between one register class
9504 and another. */
9506 /* If SECONDARY*_RELOAD_CLASS says something about the src/dst pair, regclass
9507 uses this information. Hence, the general register <-> floating point
9508 register information here is not used for SFmode. */
9511 sh_register_move_cost (enum machine_mode mode,
9512 enum reg_class srcclass, enum reg_class dstclass)
9514 if (dstclass == T_REGS || dstclass == PR_REGS)
9515 return 10;
9517 if (dstclass == MAC_REGS && srcclass == MAC_REGS)
9518 return 4;
9520 if (mode == SImode && ! TARGET_SHMEDIA && TARGET_FMOVD
9521 && REGCLASS_HAS_FP_REG (srcclass)
9522 && REGCLASS_HAS_FP_REG (dstclass))
9523 return 4;
9525 if ((REGCLASS_HAS_FP_REG (dstclass) && srcclass == MAC_REGS)
9526 || (dstclass == MAC_REGS && REGCLASS_HAS_FP_REG (srcclass)))
9527 return 9;
9529 if ((REGCLASS_HAS_FP_REG (dstclass)
9530 && REGCLASS_HAS_GENERAL_REG (srcclass))
9531 || (REGCLASS_HAS_GENERAL_REG (dstclass)
9532 && REGCLASS_HAS_FP_REG (srcclass)))
9533 return ((TARGET_SHMEDIA ? 4 : TARGET_FMOVD ? 8 : 12)
9534 * ((GET_MODE_SIZE (mode) + 7) / 8U));
9536 if ((dstclass == FPUL_REGS
9537 && REGCLASS_HAS_GENERAL_REG (srcclass))
9538 || (srcclass == FPUL_REGS
9539 && REGCLASS_HAS_GENERAL_REG (dstclass)))
9540 return 5;
9542 if ((dstclass == FPUL_REGS
9543 && (srcclass == PR_REGS || srcclass == MAC_REGS || srcclass == T_REGS))
9544 || (srcclass == FPUL_REGS
9545 && (dstclass == PR_REGS || dstclass == MAC_REGS)))
9546 return 7;
9548 if ((srcclass == TARGET_REGS && ! REGCLASS_HAS_GENERAL_REG (dstclass))
9549 || ((dstclass) == TARGET_REGS && ! REGCLASS_HAS_GENERAL_REG (srcclass)))
9550 return 20;
9552 if ((srcclass == FPSCR_REGS && ! REGCLASS_HAS_GENERAL_REG (dstclass))
9553 || (dstclass == FPSCR_REGS && ! REGCLASS_HAS_GENERAL_REG (srcclass)))
9554 return 4;
9556 if (TARGET_SHMEDIA
9557 || (TARGET_FMOVD
9558 && ! REGCLASS_HAS_GENERAL_REG (srcclass)
9559 && ! REGCLASS_HAS_GENERAL_REG (dstclass)))
9560 return 2 * ((GET_MODE_SIZE (mode) + 7) / 8U);
9562 return 2 * ((GET_MODE_SIZE (mode) + 3) / 4U);
9565 /* Like register_operand, but take into account that SHMEDIA can use
9566 the constant zero like a general register. */
9568 sh_register_operand (rtx op, enum machine_mode mode)
9570 if (op == CONST0_RTX (mode) && TARGET_SHMEDIA)
9571 return 1;
9572 return register_operand (op, mode);
9576 cmpsi_operand (rtx op, enum machine_mode mode)
9578 if (GET_CODE (op) == REG && REGNO (op) == T_REG
9579 && GET_MODE (op) == SImode)
9580 return 1;
9581 return arith_operand (op, mode);
9584 static rtx emit_load_ptr (rtx, rtx);
9586 static rtx
9587 emit_load_ptr (rtx reg, rtx addr)
9589 rtx mem = gen_rtx_MEM (ptr_mode, addr);
9591 if (Pmode != ptr_mode)
9592 mem = gen_rtx_SIGN_EXTEND (Pmode, mem);
9593 return emit_move_insn (reg, mem);
9596 void
9597 sh_output_mi_thunk (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED,
9598 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
9599 tree function)
9601 CUMULATIVE_ARGS cum;
9602 int structure_value_byref = 0;
9603 rtx this, this_value, sibcall, insns, funexp;
9604 tree funtype = TREE_TYPE (function);
9605 int simple_add = CONST_OK_FOR_ADD (delta);
9606 int did_load = 0;
9607 rtx scratch0, scratch1, scratch2;
9609 reload_completed = 1;
9610 epilogue_completed = 1;
9611 no_new_pseudos = 1;
9612 current_function_uses_only_leaf_regs = 1;
9613 reset_block_changes ();
9615 emit_note (NOTE_INSN_PROLOGUE_END);
9617 /* Find the "this" pointer. We have such a wide range of ABIs for the
9618 SH that it's best to do this completely machine independently.
9619 "this" is passed as first argument, unless a structure return pointer
9620 comes first, in which case "this" comes second. */
9621 INIT_CUMULATIVE_ARGS (cum, funtype, NULL_RTX, 0, 1);
9622 #ifndef PCC_STATIC_STRUCT_RETURN
9623 if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
9624 structure_value_byref = 1;
9625 #endif /* not PCC_STATIC_STRUCT_RETURN */
9626 if (structure_value_byref && sh_struct_value_rtx (function, 0) == 0)
9628 tree ptype = build_pointer_type (TREE_TYPE (funtype));
9630 FUNCTION_ARG_ADVANCE (cum, Pmode, ptype, 1);
9632 this = FUNCTION_ARG (cum, Pmode, ptr_type_node, 1);
9634 /* For SHcompact, we only have r0 for a scratch register: r1 is the
9635 static chain pointer (even if you can't have nested virtual functions
9636 right now, someone might implement them sometime), and the rest of the
9637 registers are used for argument passing, are callee-saved, or reserved. */
9638 scratch0 = scratch1 = scratch2 = gen_rtx_REG (Pmode, 0);
9639 if (! TARGET_SH5)
9641 scratch1 = gen_rtx_REG (ptr_mode, 1);
9642 /* N.B., if not TARGET_HITACHI, register 2 is used to pass the pointer
9643 pointing where to return struct values. */
9644 scratch2 = gen_rtx_REG (Pmode, 3);
9646 else if (TARGET_SHMEDIA)
9648 scratch1 = gen_rtx_REG (ptr_mode, 21);
9649 scratch2 = gen_rtx_REG (Pmode, TR0_REG);
9652 this_value = plus_constant (this, delta);
9653 if (vcall_offset
9654 && (simple_add || scratch0 != scratch1)
9655 && strict_memory_address_p (ptr_mode, this_value))
9657 emit_load_ptr (scratch0, this_value);
9658 did_load = 1;
9661 if (!delta)
9662 ; /* Do nothing. */
9663 else if (simple_add)
9664 emit_move_insn (this, this_value);
9665 else
9667 emit_move_insn (scratch1, GEN_INT (delta));
9668 emit_insn (gen_add2_insn (this, scratch1));
9671 if (vcall_offset)
9673 rtx offset_addr;
9675 if (!did_load)
9676 emit_load_ptr (scratch0, this);
9678 offset_addr = plus_constant (scratch0, vcall_offset);
9679 if (strict_memory_address_p (ptr_mode, offset_addr))
9680 ; /* Do nothing. */
9681 else if (! TARGET_SH5)
9683 /* scratch0 != scratch1, and we have indexed loads. Get better
9684 schedule by loading the offset into r1 and using an indexed
9685 load - then the load of r1 can issue before the load from
9686 (this + delta) finishes. */
9687 emit_move_insn (scratch1, GEN_INT (vcall_offset));
9688 offset_addr = gen_rtx_PLUS (Pmode, scratch0, scratch1);
9690 else if (CONST_OK_FOR_ADD (vcall_offset))
9692 emit_insn (gen_add2_insn (scratch0, GEN_INT (vcall_offset)));
9693 offset_addr = scratch0;
9695 else if (scratch0 != scratch1)
9697 emit_move_insn (scratch1, GEN_INT (vcall_offset));
9698 emit_insn (gen_add2_insn (scratch0, scratch1));
9699 offset_addr = scratch0;
9701 else
9702 abort (); /* FIXME */
9703 emit_load_ptr (scratch0, offset_addr);
9705 if (Pmode != ptr_mode)
9706 scratch0 = gen_rtx_TRUNCATE (ptr_mode, scratch0);
9707 emit_insn (gen_add2_insn (this, scratch0));
9710 /* Generate a tail call to the target function. */
9711 if (! TREE_USED (function))
9713 assemble_external (function);
9714 TREE_USED (function) = 1;
9716 funexp = XEXP (DECL_RTL (function), 0);
9717 emit_move_insn (scratch2, funexp);
9718 funexp = gen_rtx_MEM (FUNCTION_MODE, scratch2);
9719 sibcall = emit_call_insn (gen_sibcall (funexp, const0_rtx, NULL_RTX));
9720 SIBLING_CALL_P (sibcall) = 1;
9721 use_reg (&CALL_INSN_FUNCTION_USAGE (sibcall), this);
9722 emit_barrier ();
9724 /* Run just enough of rest_of_compilation to do scheduling and get
9725 the insns emitted. Note that use_thunk calls
9726 assemble_start_function and assemble_end_function. */
9728 insn_locators_initialize ();
9729 insns = get_insns ();
9731 if (optimize > 0 && flag_schedule_insns_after_reload)
9733 find_basic_blocks (insns, max_reg_num (), dump_file);
9734 life_analysis (dump_file, PROP_FINAL);
9736 split_all_insns (1);
9738 schedule_insns (dump_file);
9741 sh_reorg ();
9743 if (optimize > 0 && flag_delayed_branch)
9744 dbr_schedule (insns, dump_file);
9745 shorten_branches (insns);
9746 final_start_function (insns, file, 1);
9747 final (insns, file, 1, 0);
9748 final_end_function ();
9750 if (optimize > 0 && flag_schedule_insns_after_reload)
9752 /* Release all memory allocated by flow. */
9753 free_basic_block_vars ();
9755 /* Release all memory held by regsets now. */
9756 regset_release_memory ();
9759 reload_completed = 0;
9760 epilogue_completed = 0;
9761 no_new_pseudos = 0;
9765 function_symbol (const char *name)
9767 rtx sym = gen_rtx_SYMBOL_REF (Pmode, name);
9768 SYMBOL_REF_FLAGS (sym) = SYMBOL_FLAG_FUNCTION;
9769 return sym;
9772 /* Find the number of a general purpose register in S. */
9773 static int
9774 scavenge_reg (HARD_REG_SET *s)
9776 int r;
9777 for (r = FIRST_GENERAL_REG; r <= LAST_GENERAL_REG; r++)
9778 if (TEST_HARD_REG_BIT (*s, r))
9779 return r;
9780 return -1;
9784 sh_get_pr_initial_val (void)
9786 rtx val;
9788 /* ??? Unfortunately, get_hard_reg_initial_val doesn't always work for the
9789 PR register on SHcompact, because it might be clobbered by the prologue.
9790 We check first if that is known to be the case. */
9791 if (TARGET_SHCOMPACT
9792 && ((current_function_args_info.call_cookie
9793 & ~ CALL_COOKIE_RET_TRAMP (1))
9794 || current_function_has_nonlocal_label))
9795 return gen_rtx_MEM (SImode, return_address_pointer_rtx);
9797 /* If we haven't finished rtl generation, there might be a nonlocal label
9798 that we haven't seen yet.
9799 ??? get_hard_reg_initial_val fails if it is called while no_new_pseudos
9800 is set, unless it has been called before for the same register. And even
9801 then, we end in trouble if we didn't use the register in the same
9802 basic block before. So call get_hard_reg_initial_val now and wrap it
9803 in an unspec if we might need to replace it. */
9804 /* ??? We also must do this for TARGET_SH1 in general, because otherwise
9805 combine can put the pseudo returned by get_hard_reg_initial_val into
9806 instructions that need a general purpose registers, which will fail to
9807 be recognized when the pseudo becomes allocated to PR. */
9809 = get_hard_reg_initial_val (Pmode, TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG);
9810 if (TARGET_SH1)
9811 return gen_rtx_UNSPEC (SImode, gen_rtvec (1, val), UNSPEC_RA);
9812 return val;
9816 sh_expand_t_scc (enum rtx_code code, rtx target)
9818 rtx result = target;
9819 HOST_WIDE_INT val;
9821 if (GET_CODE (sh_compare_op0) != REG || REGNO (sh_compare_op0) != T_REG
9822 || GET_CODE (sh_compare_op1) != CONST_INT)
9823 return 0;
9824 if (GET_CODE (result) != REG)
9825 result = gen_reg_rtx (SImode);
9826 val = INTVAL (sh_compare_op1);
9827 if ((code == EQ && val == 1) || (code == NE && val == 0))
9828 emit_insn (gen_movt (result));
9829 else if ((code == EQ && val == 0) || (code == NE && val == 1))
9831 emit_insn (gen_rtx_CLOBBER (VOIDmode, result));
9832 emit_insn (gen_subc (result, result, result));
9833 emit_insn (gen_addsi3 (result, result, const1_rtx));
9835 else if (code == EQ || code == NE)
9836 emit_insn (gen_move_insn (result, GEN_INT (code == NE)));
9837 else
9838 return 0;
9839 if (result != target)
9840 emit_move_insn (target, result);
9841 return 1;
9844 /* INSN is an sfunc; return the rtx that describes the address used. */
9845 static rtx
9846 extract_sfunc_addr (rtx insn)
9848 rtx pattern, part = NULL_RTX;
9849 int len, i;
9851 pattern = PATTERN (insn);
9852 len = XVECLEN (pattern, 0);
9853 for (i = 0; i < len; i++)
9855 part = XVECEXP (pattern, 0, i);
9856 if (GET_CODE (part) == USE && GET_MODE (XEXP (part, 0)) == Pmode
9857 && GENERAL_REGISTER_P (true_regnum (XEXP (part, 0))))
9858 return XEXP (part, 0);
9860 if (GET_CODE (XVECEXP (pattern, 0, 0)) == UNSPEC_VOLATILE)
9861 return XVECEXP (XVECEXP (pattern, 0, 0), 0, 1);
9862 abort ();
9865 /* Verify that the register in use_sfunc_addr still agrees with the address
9866 used in the sfunc. This prevents fill_slots_from_thread from changing
9867 use_sfunc_addr.
9868 INSN is the use_sfunc_addr instruction, and REG is the register it
9869 guards. */
9871 check_use_sfunc_addr (rtx insn, rtx reg)
9873 /* Search for the sfunc. It should really come right after INSN. */
9874 while ((insn = NEXT_INSN (insn)))
9876 if (GET_CODE (insn) == CODE_LABEL || GET_CODE (insn) == JUMP_INSN)
9877 break;
9878 if (! INSN_P (insn))
9879 continue;
9881 if (GET_CODE (PATTERN (insn)) == SEQUENCE)
9882 insn = XVECEXP (PATTERN (insn), 0, 0);
9883 if (GET_CODE (PATTERN (insn)) != PARALLEL
9884 || get_attr_type (insn) != TYPE_SFUNC)
9885 continue;
9886 return rtx_equal_p (extract_sfunc_addr (insn), reg);
9888 abort ();
9891 /* Returns 1 if OP is a MEM that can be source of a simple move operation. */
9894 unaligned_load_operand (rtx op, enum machine_mode mode)
9896 rtx inside;
9898 if (GET_CODE (op) != MEM || GET_MODE (op) != mode)
9899 return 0;
9901 inside = XEXP (op, 0);
9903 if (GET_CODE (inside) == POST_INC)
9904 inside = XEXP (inside, 0);
9906 if (GET_CODE (inside) == REG)
9907 return 1;
9909 return 0;
9912 /* This function returns a constant rtx that represents pi / 2**15 in
9913 SFmode. it's used to scale SFmode angles, in radians, to a
9914 fixed-point signed 16.16-bit fraction of a full circle, i.e., 2*pi
9915 maps to 0x10000). */
9917 static GTY(()) rtx sh_fsca_sf2int_rtx;
9920 sh_fsca_sf2int (void)
9922 if (! sh_fsca_sf2int_rtx)
9924 REAL_VALUE_TYPE rv;
9926 real_from_string (&rv, "10430.378350470453");
9927 sh_fsca_sf2int_rtx = const_double_from_real_value (rv, SFmode);
9930 return sh_fsca_sf2int_rtx;
9933 /* This function returns a constant rtx that represents pi / 2**15 in
9934 DFmode. it's used to scale DFmode angles, in radians, to a
9935 fixed-point signed 16.16-bit fraction of a full circle, i.e., 2*pi
9936 maps to 0x10000). */
9938 static GTY(()) rtx sh_fsca_df2int_rtx;
9941 sh_fsca_df2int (void)
9943 if (! sh_fsca_df2int_rtx)
9945 REAL_VALUE_TYPE rv;
9947 real_from_string (&rv, "10430.378350470453");
9948 sh_fsca_df2int_rtx = const_double_from_real_value (rv, DFmode);
9951 return sh_fsca_df2int_rtx;
9954 /* This function returns a constant rtx that represents 2**15 / pi in
9955 SFmode. it's used to scale a fixed-point signed 16.16-bit fraction
9956 of a full circle back to a SFmode value, i.e., 0x10000 maps to
9957 2*pi). */
9959 static GTY(()) rtx sh_fsca_int2sf_rtx;
9962 sh_fsca_int2sf (void)
9964 if (! sh_fsca_int2sf_rtx)
9966 REAL_VALUE_TYPE rv;
9968 real_from_string (&rv, "9.587379924285257e-5");
9969 sh_fsca_int2sf_rtx = const_double_from_real_value (rv, SFmode);
9972 return sh_fsca_int2sf_rtx;
9974 #include "gt-sh.h"