2004-08-23 Eric Christopher <echristo@redhat.com>
[official-gcc.git] / gcc / config / sh / sh.c
blob1c539e880a3c3e27e76f7b6ce798fe09a273127d
1 /* Output routines for GCC for Renesas / SuperH SH.
2 Copyright (C) 1993, 1994, 1995, 1997, 1997, 1998, 1999, 2000, 2001, 2002,
3 2003, 2004 Free Software Foundation, Inc.
4 Contributed by Steve Chamberlain (sac@cygnus.com).
5 Improved by Jim Wilson (wilson@cygnus.com).
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 2, or (at your option)
12 any later version.
14 GCC is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING. If not, write to
21 the Free Software Foundation, 59 Temple Place - Suite 330,
22 Boston, MA 02111-1307, USA. */
24 #include "config.h"
25 #include "system.h"
26 #include "coretypes.h"
27 #include "tm.h"
28 #include "insn-config.h"
29 #include "rtl.h"
30 #include "tree.h"
31 #include "flags.h"
32 #include "expr.h"
33 #include "optabs.h"
34 #include "function.h"
35 #include "regs.h"
36 #include "hard-reg-set.h"
37 #include "output.h"
38 #include "insn-attr.h"
39 #include "toplev.h"
40 #include "recog.h"
41 #include "c-pragma.h"
42 #include "integrate.h"
43 #include "tm_p.h"
44 #include "target.h"
45 #include "target-def.h"
46 #include "real.h"
47 #include "langhooks.h"
48 #include "basic-block.h"
49 #include "ra.h"
50 #include "cfglayout.h"
51 #include "intl.h"
52 #include "sched-int.h"
53 #include "ggc.h"
54 #include "tree-gimple.h"
57 int code_for_indirect_jump_scratch = CODE_FOR_indirect_jump_scratch;
59 #define MSW (TARGET_LITTLE_ENDIAN ? 1 : 0)
60 #define LSW (TARGET_LITTLE_ENDIAN ? 0 : 1)
62 /* These are some macros to abstract register modes. */
63 #define CONST_OK_FOR_ADD(size) \
64 (TARGET_SHMEDIA ? CONST_OK_FOR_I10 (size) : CONST_OK_FOR_I08 (size))
65 #define GEN_MOV (*(TARGET_SHMEDIA64 ? gen_movdi : gen_movsi))
66 #define GEN_ADD3 (*(TARGET_SHMEDIA64 ? gen_adddi3 : gen_addsi3))
67 #define GEN_SUB3 (*(TARGET_SHMEDIA64 ? gen_subdi3 : gen_subsi3))
69 /* Set to 1 by expand_prologue() when the function is an interrupt handler. */
70 int current_function_interrupt;
72 /* ??? The pragma interrupt support will not work for SH3. */
73 /* This is set by #pragma interrupt and #pragma trapa, and causes gcc to
74 output code for the next function appropriate for an interrupt handler. */
75 int pragma_interrupt;
77 /* This is set by the trap_exit attribute for functions. It specifies
78 a trap number to be used in a trapa instruction at function exit
79 (instead of an rte instruction). */
80 int trap_exit;
82 /* This is used by the sp_switch attribute for functions. It specifies
83 a variable holding the address of the stack the interrupt function
84 should switch to/from at entry/exit. */
85 rtx sp_switch;
87 /* This is set by #pragma trapa, and is similar to the above, except that
88 the compiler doesn't emit code to preserve all registers. */
89 static int pragma_trapa;
91 /* This is set by #pragma nosave_low_regs. This is useful on the SH3,
92 which has a separate set of low regs for User and Supervisor modes.
93 This should only be used for the lowest level of interrupts. Higher levels
94 of interrupts must save the registers in case they themselves are
95 interrupted. */
96 int pragma_nosave_low_regs;
98 /* This is used for communication between TARGET_SETUP_INCOMING_VARARGS and
99 sh_expand_prologue. */
100 int current_function_anonymous_args;
102 /* Global variables for machine-dependent things. */
104 /* Which cpu are we scheduling for. */
105 enum processor_type sh_cpu;
107 /* Definitions used in ready queue reordering for first scheduling pass. */
109 /* Reg weights arrays for modes SFmode and SImode, indexed by insn LUID. */
110 static short *regmode_weight[2];
112 /* Total SFmode and SImode weights of scheduled insns. */
113 static int curr_regmode_pressure[2];
115 /* If true, skip cycles for Q -> R movement. */
116 static int skip_cycles = 0;
118 /* Cached value of can_issue_more. This is cached in sh_variable_issue hook
119 and returned from sh_reorder2. */
120 static short cached_can_issue_more;
122 /* Saved operands from the last compare to use when we generate an scc
123 or bcc insn. */
125 rtx sh_compare_op0;
126 rtx sh_compare_op1;
128 /* Provides the class number of the smallest class containing
129 reg number. */
131 enum reg_class regno_reg_class[FIRST_PSEUDO_REGISTER] =
133 R0_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
134 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
135 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
136 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
137 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
138 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
139 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
140 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
141 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
142 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
143 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
144 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
145 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
146 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
147 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
148 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
149 FP0_REGS,FP_REGS, FP_REGS, FP_REGS,
150 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
151 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
152 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
153 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
154 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
155 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
156 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
157 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
158 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
159 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
160 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
161 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
162 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
163 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
164 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
165 TARGET_REGS, TARGET_REGS, TARGET_REGS, TARGET_REGS,
166 TARGET_REGS, TARGET_REGS, TARGET_REGS, TARGET_REGS,
167 DF_REGS, DF_REGS, DF_REGS, DF_REGS,
168 DF_REGS, DF_REGS, DF_REGS, DF_REGS,
169 NO_REGS, GENERAL_REGS, PR_REGS, T_REGS,
170 MAC_REGS, MAC_REGS, FPUL_REGS, FPSCR_REGS,
171 GENERAL_REGS,
174 char sh_register_names[FIRST_PSEUDO_REGISTER] \
175 [MAX_REGISTER_NAME_LENGTH + 1] = SH_REGISTER_NAMES_INITIALIZER;
177 char sh_additional_register_names[ADDREGNAMES_SIZE] \
178 [MAX_ADDITIONAL_REGISTER_NAME_LENGTH + 1]
179 = SH_ADDITIONAL_REGISTER_NAMES_INITIALIZER;
181 /* Provide reg_class from a letter such as appears in the machine
182 description. *: target independently reserved letter.
183 reg_class_from_letter['e' - 'a'] is set to NO_REGS for TARGET_FMOVD. */
185 enum reg_class reg_class_from_letter[] =
187 /* a */ ALL_REGS, /* b */ TARGET_REGS, /* c */ FPSCR_REGS, /* d */ DF_REGS,
188 /* e */ FP_REGS, /* f */ FP_REGS, /* g **/ NO_REGS, /* h */ NO_REGS,
189 /* i **/ NO_REGS, /* j */ NO_REGS, /* k */ SIBCALL_REGS, /* l */ PR_REGS,
190 /* m **/ NO_REGS, /* n **/ NO_REGS, /* o **/ NO_REGS, /* p **/ NO_REGS,
191 /* q */ NO_REGS, /* r **/ NO_REGS, /* s **/ NO_REGS, /* t */ T_REGS,
192 /* u */ NO_REGS, /* v */ NO_REGS, /* w */ FP0_REGS, /* x */ MAC_REGS,
193 /* y */ FPUL_REGS, /* z */ R0_REGS
196 int assembler_dialect;
198 static bool shmedia_space_reserved_for_target_registers;
200 static void split_branches (rtx);
201 static int branch_dest (rtx);
202 static void force_into (rtx, rtx);
203 static void print_slot (rtx);
204 static rtx add_constant (rtx, enum machine_mode, rtx);
205 static void dump_table (rtx, rtx);
206 static int hi_const (rtx);
207 static int broken_move (rtx);
208 static int mova_p (rtx);
209 static rtx find_barrier (int, rtx, rtx);
210 static int noncall_uses_reg (rtx, rtx, rtx *);
211 static rtx gen_block_redirect (rtx, int, int);
212 static void sh_reorg (void);
213 static void output_stack_adjust (int, rtx, int, HARD_REG_SET *);
214 static rtx frame_insn (rtx);
215 static rtx push (int);
216 static void pop (int);
217 static void push_regs (HARD_REG_SET *, int);
218 static int calc_live_regs (HARD_REG_SET *);
219 static void mark_use (rtx, rtx *);
220 static HOST_WIDE_INT rounded_frame_size (int);
221 static rtx mark_constant_pool_use (rtx);
222 const struct attribute_spec sh_attribute_table[];
223 static tree sh_handle_interrupt_handler_attribute (tree *, tree, tree, int, bool *);
224 static tree sh_handle_sp_switch_attribute (tree *, tree, tree, int, bool *);
225 static tree sh_handle_trap_exit_attribute (tree *, tree, tree, int, bool *);
226 static tree sh_handle_renesas_attribute (tree *, tree, tree, int, bool *);
227 static void sh_output_function_epilogue (FILE *, HOST_WIDE_INT);
228 static void sh_insert_attributes (tree, tree *);
229 static int sh_adjust_cost (rtx, rtx, rtx, int);
230 static int sh_issue_rate (void);
231 static int sh_dfa_new_cycle (FILE *, int, rtx, int, int, int *sort_p);
232 static short find_set_regmode_weight (rtx, enum machine_mode);
233 static short find_insn_regmode_weight (rtx, enum machine_mode);
234 static void find_regmode_weight (int, enum machine_mode);
235 static void sh_md_init_global (FILE *, int, int);
236 static void sh_md_finish_global (FILE *, int);
237 static int rank_for_reorder (const void *, const void *);
238 static void swap_reorder (rtx *, int);
239 static void ready_reorder (rtx *, int);
240 static short high_pressure (enum machine_mode);
241 static int sh_reorder (FILE *, int, rtx *, int *, int);
242 static int sh_reorder2 (FILE *, int, rtx *, int *, int);
243 static void sh_md_init (FILE *, int, int);
244 static int sh_variable_issue (FILE *, int, rtx, int);
246 static bool sh_function_ok_for_sibcall (tree, tree);
248 static bool sh_cannot_modify_jumps_p (void);
249 static int sh_target_reg_class (void);
250 static bool sh_optimize_target_register_callee_saved (bool);
251 static bool sh_ms_bitfield_layout_p (tree);
253 static void sh_init_builtins (void);
254 static void sh_media_init_builtins (void);
255 static rtx sh_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
256 static void sh_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT, tree);
257 static void sh_file_start (void);
258 static int flow_dependent_p (rtx, rtx);
259 static void flow_dependent_p_1 (rtx, rtx, void *);
260 static int shiftcosts (rtx);
261 static int andcosts (rtx);
262 static int addsubcosts (rtx);
263 static int multcosts (rtx);
264 static bool unspec_caller_rtx_p (rtx);
265 static bool sh_cannot_copy_insn_p (rtx);
266 static bool sh_rtx_costs (rtx, int, int, int *);
267 static int sh_address_cost (rtx);
268 static int shmedia_target_regs_stack_space (HARD_REG_SET *);
269 static int shmedia_reserve_space_for_target_registers_p (int, HARD_REG_SET *);
270 static int shmedia_target_regs_stack_adjust (HARD_REG_SET *);
271 static int scavenge_reg (HARD_REG_SET *s);
272 struct save_schedule_s;
273 static struct save_entry_s *sh5_schedule_saves (HARD_REG_SET *,
274 struct save_schedule_s *, int);
276 static rtx sh_struct_value_rtx (tree, int);
277 static bool sh_return_in_memory (tree, tree);
278 static rtx sh_builtin_saveregs (void);
279 static void sh_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode, tree, int *, int);
280 static bool sh_strict_argument_naming (CUMULATIVE_ARGS *);
281 static bool sh_pretend_outgoing_varargs_named (CUMULATIVE_ARGS *);
282 static tree sh_build_builtin_va_list (void);
283 static tree sh_gimplify_va_arg_expr (tree, tree, tree *, tree *);
284 static bool sh_pass_by_reference (CUMULATIVE_ARGS *, enum machine_mode,
285 tree, bool);
288 /* Initialize the GCC target structure. */
289 #undef TARGET_ATTRIBUTE_TABLE
290 #define TARGET_ATTRIBUTE_TABLE sh_attribute_table
292 /* The next two are used for debug info when compiling with -gdwarf. */
293 #undef TARGET_ASM_UNALIGNED_HI_OP
294 #define TARGET_ASM_UNALIGNED_HI_OP "\t.uaword\t"
295 #undef TARGET_ASM_UNALIGNED_SI_OP
296 #define TARGET_ASM_UNALIGNED_SI_OP "\t.ualong\t"
298 /* These are NULLed out on non-SH5 in OVERRIDE_OPTIONS. */
299 #undef TARGET_ASM_UNALIGNED_DI_OP
300 #define TARGET_ASM_UNALIGNED_DI_OP "\t.uaquad\t"
301 #undef TARGET_ASM_ALIGNED_DI_OP
302 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
304 #undef TARGET_ASM_FUNCTION_EPILOGUE
305 #define TARGET_ASM_FUNCTION_EPILOGUE sh_output_function_epilogue
307 #undef TARGET_ASM_OUTPUT_MI_THUNK
308 #define TARGET_ASM_OUTPUT_MI_THUNK sh_output_mi_thunk
310 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
311 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_tree_hwi_hwi_tree_true
313 #undef TARGET_ASM_FILE_START
314 #define TARGET_ASM_FILE_START sh_file_start
315 #undef TARGET_ASM_FILE_START_FILE_DIRECTIVE
316 #define TARGET_ASM_FILE_START_FILE_DIRECTIVE true
318 #undef TARGET_INSERT_ATTRIBUTES
319 #define TARGET_INSERT_ATTRIBUTES sh_insert_attributes
321 #undef TARGET_SCHED_ADJUST_COST
322 #define TARGET_SCHED_ADJUST_COST sh_adjust_cost
324 #undef TARGET_SCHED_ISSUE_RATE
325 #define TARGET_SCHED_ISSUE_RATE sh_issue_rate
327 /* The next 5 hooks have been implemented for reenabling sched1. With the
328 help of these macros we are limiting the movement of insns in sched1 to
329 reduce the register pressure. The overall idea is to keep count of SImode
330 and SFmode regs required by already scheduled insns. When these counts
331 cross some threshold values; give priority to insns that free registers.
332 The insn that frees registers is most likely to be the insn with lowest
333 LUID (original insn order); but such an insn might be there in the stalled
334 queue (Q) instead of the ready queue (R). To solve this, we skip cycles
335 upto a max of 8 cycles so that such insns may move from Q -> R.
337 The description of the hooks are as below:
339 TARGET_SCHED_INIT_GLOBAL: Added a new target hook in the generic
340 scheduler; it is called inside the sched_init function just after
341 find_insn_reg_weights function call. It is used to calculate the SImode
342 and SFmode weights of insns of basic blocks; much similar to what
343 find_insn_reg_weights does.
344 TARGET_SCHED_FINISH_GLOBAL: Corresponding cleanup hook.
346 TARGET_SCHED_DFA_NEW_CYCLE: Skip cycles if high register pressure is
347 indicated by TARGET_SCHED_REORDER2; doing this may move insns from
348 (Q)->(R).
350 TARGET_SCHED_REORDER: If the register pressure for SImode or SFmode is
351 high; reorder the ready queue so that the insn with lowest LUID will be
352 issued next.
354 TARGET_SCHED_REORDER2: If the register pressure is high, indicate to
355 TARGET_SCHED_DFA_NEW_CYCLE to skip cycles.
357 TARGET_SCHED_VARIABLE_ISSUE: Cache the value of can_issue_more so that it
358 can be returned from TARGET_SCHED_REORDER2.
360 TARGET_SCHED_INIT: Reset the register pressure counting variables. */
362 #undef TARGET_SCHED_DFA_NEW_CYCLE
363 #define TARGET_SCHED_DFA_NEW_CYCLE sh_dfa_new_cycle
365 #undef TARGET_SCHED_INIT_GLOBAL
366 #define TARGET_SCHED_INIT_GLOBAL sh_md_init_global
368 #undef TARGET_SCHED_FINISH_GLOBAL
369 #define TARGET_SCHED_FINISH_GLOBAL sh_md_finish_global
371 #undef TARGET_SCHED_VARIABLE_ISSUE
372 #define TARGET_SCHED_VARIABLE_ISSUE sh_variable_issue
374 #undef TARGET_SCHED_REORDER
375 #define TARGET_SCHED_REORDER sh_reorder
377 #undef TARGET_SCHED_REORDER2
378 #define TARGET_SCHED_REORDER2 sh_reorder2
380 #undef TARGET_SCHED_INIT
381 #define TARGET_SCHED_INIT sh_md_init
383 #undef TARGET_CANNOT_MODIFY_JUMPS_P
384 #define TARGET_CANNOT_MODIFY_JUMPS_P sh_cannot_modify_jumps_p
385 #undef TARGET_BRANCH_TARGET_REGISTER_CLASS
386 #define TARGET_BRANCH_TARGET_REGISTER_CLASS sh_target_reg_class
387 #undef TARGET_BRANCH_TARGET_REGISTER_CALLEE_SAVED
388 #define TARGET_BRANCH_TARGET_REGISTER_CALLEE_SAVED \
389 sh_optimize_target_register_callee_saved
391 #undef TARGET_MS_BITFIELD_LAYOUT_P
392 #define TARGET_MS_BITFIELD_LAYOUT_P sh_ms_bitfield_layout_p
394 #undef TARGET_INIT_BUILTINS
395 #define TARGET_INIT_BUILTINS sh_init_builtins
396 #undef TARGET_EXPAND_BUILTIN
397 #define TARGET_EXPAND_BUILTIN sh_expand_builtin
399 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
400 #define TARGET_FUNCTION_OK_FOR_SIBCALL sh_function_ok_for_sibcall
402 #undef TARGET_CANNOT_COPY_INSN_P
403 #define TARGET_CANNOT_COPY_INSN_P sh_cannot_copy_insn_p
404 #undef TARGET_RTX_COSTS
405 #define TARGET_RTX_COSTS sh_rtx_costs
406 #undef TARGET_ADDRESS_COST
407 #define TARGET_ADDRESS_COST sh_address_cost
409 #undef TARGET_MACHINE_DEPENDENT_REORG
410 #define TARGET_MACHINE_DEPENDENT_REORG sh_reorg
412 #ifdef HAVE_AS_TLS
413 #undef TARGET_HAVE_TLS
414 #define TARGET_HAVE_TLS true
415 #endif
417 #undef TARGET_PROMOTE_PROTOTYPES
418 #define TARGET_PROMOTE_PROTOTYPES sh_promote_prototypes
419 #undef TARGET_PROMOTE_FUNCTION_ARGS
420 #define TARGET_PROMOTE_FUNCTION_ARGS sh_promote_prototypes
421 #undef TARGET_PROMOTE_FUNCTION_RETURN
422 #define TARGET_PROMOTE_FUNCTION_RETURN sh_promote_prototypes
424 #undef TARGET_STRUCT_VALUE_RTX
425 #define TARGET_STRUCT_VALUE_RTX sh_struct_value_rtx
426 #undef TARGET_RETURN_IN_MEMORY
427 #define TARGET_RETURN_IN_MEMORY sh_return_in_memory
429 #undef TARGET_EXPAND_BUILTIN_SAVEREGS
430 #define TARGET_EXPAND_BUILTIN_SAVEREGS sh_builtin_saveregs
431 #undef TARGET_SETUP_INCOMING_VARARGS
432 #define TARGET_SETUP_INCOMING_VARARGS sh_setup_incoming_varargs
433 #undef TARGET_STRICT_ARGUMENT_NAMING
434 #define TARGET_STRICT_ARGUMENT_NAMING sh_strict_argument_naming
435 #undef TARGET_PRETEND_OUTGOING_VARARGS_NAMED
436 #define TARGET_PRETEND_OUTGOING_VARARGS_NAMED sh_pretend_outgoing_varargs_named
437 #undef TARGET_MUST_PASS_IN_STACK
438 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
439 #undef TARGET_PASS_BY_REFERENCE
440 #define TARGET_PASS_BY_REFERENCE sh_pass_by_reference
442 #undef TARGET_BUILD_BUILTIN_VA_LIST
443 #define TARGET_BUILD_BUILTIN_VA_LIST sh_build_builtin_va_list
444 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
445 #define TARGET_GIMPLIFY_VA_ARG_EXPR sh_gimplify_va_arg_expr
447 #undef TARGET_VECTOR_MODE_SUPPORTED_P
448 #define TARGET_VECTOR_MODE_SUPPORTED_P sh_vector_mode_supported_p
450 #undef TARGET_PCH_VALID_P
451 #define TARGET_PCH_VALID_P sh_pch_valid_p
453 /* Return regmode weight for insn. */
454 #define INSN_REGMODE_WEIGHT(INSN, MODE) regmode_weight[((MODE) == SImode) ? 0 : 1][INSN_UID (INSN)]
456 /* Return current register pressure for regmode. */
457 #define CURR_REGMODE_PRESSURE(MODE) curr_regmode_pressure[((MODE) == SImode) ? 0 : 1]
459 #ifdef SYMBIAN
461 #undef TARGET_ENCODE_SECTION_INFO
462 #define TARGET_ENCODE_SECTION_INFO sh_symbian_encode_section_info
463 #undef TARGET_STRIP_NAME_ENCODING
464 #define TARGET_STRIP_NAME_ENCODING sh_symbian_strip_name_encoding
465 #undef TARGET_CXX_IMPORT_EXPORT_CLASS
466 #define TARGET_CXX_IMPORT_EXPORT_CLASS symbian_import_export_class
468 #endif /* SYMBIAN */
470 struct gcc_target targetm = TARGET_INITIALIZER;
472 /* Print the operand address in x to the stream. */
474 void
475 print_operand_address (FILE *stream, rtx x)
477 switch (GET_CODE (x))
479 case REG:
480 case SUBREG:
481 fprintf (stream, "@%s", reg_names[true_regnum (x)]);
482 break;
484 case PLUS:
486 rtx base = XEXP (x, 0);
487 rtx index = XEXP (x, 1);
489 switch (GET_CODE (index))
491 case CONST_INT:
492 fprintf (stream, "@(%d,%s)", (int) INTVAL (index),
493 reg_names[true_regnum (base)]);
494 break;
496 case REG:
497 case SUBREG:
499 int base_num = true_regnum (base);
500 int index_num = true_regnum (index);
502 fprintf (stream, "@(r0,%s)",
503 reg_names[MAX (base_num, index_num)]);
504 break;
507 default:
508 debug_rtx (x);
509 abort ();
512 break;
514 case PRE_DEC:
515 fprintf (stream, "@-%s", reg_names[true_regnum (XEXP (x, 0))]);
516 break;
518 case POST_INC:
519 fprintf (stream, "@%s+", reg_names[true_regnum (XEXP (x, 0))]);
520 break;
522 default:
523 x = mark_constant_pool_use (x);
524 output_addr_const (stream, x);
525 break;
529 /* Print operand x (an rtx) in assembler syntax to file stream
530 according to modifier code.
532 '.' print a .s if insn needs delay slot
533 ',' print LOCAL_LABEL_PREFIX
534 '@' print trap, rte or rts depending upon pragma interruptness
535 '#' output a nop if there is nothing to put in the delay slot
536 ''' print likelihood suffix (/u for unlikely).
537 'O' print a constant without the #
538 'R' print the LSW of a dp value - changes if in little endian
539 'S' print the MSW of a dp value - changes if in little endian
540 'T' print the next word of a dp value - same as 'R' in big endian mode.
541 'M' print an `x' if `m' will print `base,index'.
542 'N' print 'r63' if the operand is (const_int 0).
543 'd' print a V2SF reg as dN instead of fpN.
544 'm' print a pair `base,offset' or `base,index', for LD and ST.
545 'u' prints the lowest 16 bits of CONST_INT, as an unsigned value.
546 'o' output an operator. */
548 void
549 print_operand (FILE *stream, rtx x, int code)
551 switch (code)
553 case '.':
554 if (final_sequence
555 && ! INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))
556 && get_attr_length (XVECEXP (final_sequence, 0, 1)))
557 fprintf (stream, ASSEMBLER_DIALECT ? "/s" : ".s");
558 break;
559 case ',':
560 fprintf (stream, "%s", LOCAL_LABEL_PREFIX);
561 break;
562 case '@':
563 if (trap_exit)
564 fprintf (stream, "trapa #%d", trap_exit);
565 else if (sh_cfun_interrupt_handler_p ())
566 fprintf (stream, "rte");
567 else
568 fprintf (stream, "rts");
569 break;
570 case '#':
571 /* Output a nop if there's nothing in the delay slot. */
572 if (dbr_sequence_length () == 0)
573 fprintf (stream, "\n\tnop");
574 break;
575 case '\'':
577 rtx note = find_reg_note (current_output_insn, REG_BR_PROB, 0);
579 if (note && INTVAL (XEXP (note, 0)) * 2 < REG_BR_PROB_BASE)
580 fputs ("/u", stream);
581 break;
583 case 'O':
584 x = mark_constant_pool_use (x);
585 output_addr_const (stream, x);
586 break;
587 case 'R':
588 fputs (reg_names[REGNO (x) + LSW], (stream));
589 break;
590 case 'S':
591 fputs (reg_names[REGNO (x) + MSW], (stream));
592 break;
593 case 'T':
594 /* Next word of a double. */
595 switch (GET_CODE (x))
597 case REG:
598 fputs (reg_names[REGNO (x) + 1], (stream));
599 break;
600 case MEM:
601 if (GET_CODE (XEXP (x, 0)) != PRE_DEC
602 && GET_CODE (XEXP (x, 0)) != POST_INC)
603 x = adjust_address (x, SImode, 4);
604 print_operand_address (stream, XEXP (x, 0));
605 break;
606 default:
607 break;
609 break;
610 case 'o':
611 switch (GET_CODE (x))
613 case PLUS: fputs ("add", stream); break;
614 case MINUS: fputs ("sub", stream); break;
615 case MULT: fputs ("mul", stream); break;
616 case DIV: fputs ("div", stream); break;
617 case EQ: fputs ("eq", stream); break;
618 case NE: fputs ("ne", stream); break;
619 case GT: case LT: fputs ("gt", stream); break;
620 case GE: case LE: fputs ("ge", stream); break;
621 case GTU: case LTU: fputs ("gtu", stream); break;
622 case GEU: case LEU: fputs ("geu", stream); break;
623 default:
624 break;
626 break;
627 case 'M':
628 if (GET_CODE (x) == MEM
629 && GET_CODE (XEXP (x, 0)) == PLUS
630 && (GET_CODE (XEXP (XEXP (x, 0), 1)) == REG
631 || GET_CODE (XEXP (XEXP (x, 0), 1)) == SUBREG))
632 fputc ('x', stream);
633 break;
635 case 'm':
636 if (GET_CODE (x) != MEM)
637 abort ();
638 x = XEXP (x, 0);
639 switch (GET_CODE (x))
641 case REG:
642 case SUBREG:
643 print_operand (stream, x, 0);
644 fputs (", 0", stream);
645 break;
647 case PLUS:
648 print_operand (stream, XEXP (x, 0), 0);
649 fputs (", ", stream);
650 print_operand (stream, XEXP (x, 1), 0);
651 break;
653 default:
654 abort ();
656 break;
658 case 'd':
659 if (GET_CODE (x) != REG || GET_MODE (x) != V2SFmode)
660 abort ();
662 fprintf ((stream), "d%s", reg_names[REGNO (x)] + 1);
663 break;
665 case 'N':
666 if (x == CONST0_RTX (GET_MODE (x)))
668 fprintf ((stream), "r63");
669 break;
671 goto default_output;
672 case 'u':
673 if (GET_CODE (x) == CONST_INT)
675 fprintf ((stream), "%u", (unsigned) INTVAL (x) & (0x10000 - 1));
676 break;
678 /* Fall through. */
680 default_output:
681 default:
682 switch (GET_CODE (x))
684 /* FIXME: We need this on SHmedia32 because reload generates
685 some sign-extended HI or QI loads into DImode registers
686 but, because Pmode is SImode, the address ends up with a
687 subreg:SI of the DImode register. Maybe reload should be
688 fixed so as to apply alter_subreg to such loads? */
689 case SUBREG:
690 if (SUBREG_BYTE (x) != 0
691 || GET_CODE (SUBREG_REG (x)) != REG)
692 abort ();
694 x = SUBREG_REG (x);
695 /* Fall through. */
697 case REG:
698 if (FP_REGISTER_P (REGNO (x))
699 && GET_MODE (x) == V16SFmode)
700 fprintf ((stream), "mtrx%s", reg_names[REGNO (x)] + 2);
701 else if (FP_REGISTER_P (REGNO (x))
702 && GET_MODE (x) == V4SFmode)
703 fprintf ((stream), "fv%s", reg_names[REGNO (x)] + 2);
704 else if (GET_CODE (x) == REG
705 && GET_MODE (x) == V2SFmode)
706 fprintf ((stream), "fp%s", reg_names[REGNO (x)] + 2);
707 else if (FP_REGISTER_P (REGNO (x))
708 && GET_MODE_SIZE (GET_MODE (x)) > 4)
709 fprintf ((stream), "d%s", reg_names[REGNO (x)] + 1);
710 else
711 fputs (reg_names[REGNO (x)], (stream));
712 break;
714 case MEM:
715 output_address (XEXP (x, 0));
716 break;
718 case CONST:
719 if (TARGET_SHMEDIA
720 && GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
721 && GET_MODE (XEXP (x, 0)) == DImode
722 && GET_CODE (XEXP (XEXP (x, 0), 0)) == TRUNCATE
723 && GET_MODE (XEXP (XEXP (x, 0), 0)) == HImode)
725 rtx val = XEXP (XEXP (XEXP (x, 0), 0), 0);
727 fputc ('(', stream);
728 if (GET_CODE (val) == ASHIFTRT)
730 fputc ('(', stream);
731 if (GET_CODE (XEXP (val, 0)) == CONST)
732 fputc ('(', stream);
733 output_addr_const (stream, XEXP (val, 0));
734 if (GET_CODE (XEXP (val, 0)) == CONST)
735 fputc (')', stream);
736 fputs (" >> ", stream);
737 output_addr_const (stream, XEXP (val, 1));
738 fputc (')', stream);
740 else
742 if (GET_CODE (val) == CONST)
743 fputc ('(', stream);
744 output_addr_const (stream, val);
745 if (GET_CODE (val) == CONST)
746 fputc (')', stream);
748 fputs (" & 65535)", stream);
749 break;
752 /* Fall through. */
753 default:
754 if (TARGET_SH1)
755 fputc ('#', stream);
756 output_addr_const (stream, x);
757 break;
759 break;
763 /* Like force_operand, but guarantees that VALUE ends up in TARGET. */
764 static void
765 force_into (rtx value, rtx target)
767 value = force_operand (value, target);
768 if (! rtx_equal_p (value, target))
769 emit_insn (gen_move_insn (target, value));
772 /* Emit code to perform a block move. Choose the best method.
774 OPERANDS[0] is the destination.
775 OPERANDS[1] is the source.
776 OPERANDS[2] is the size.
777 OPERANDS[3] is the alignment safe to use. */
780 expand_block_move (rtx *operands)
782 int align = INTVAL (operands[3]);
783 int constp = (GET_CODE (operands[2]) == CONST_INT);
784 int bytes = (constp ? INTVAL (operands[2]) : 0);
786 if (! constp)
787 return 0;
789 /* If we could use mov.l to move words and dest is word-aligned, we
790 can use movua.l for loads and still generate a relatively short
791 and efficient sequence. */
792 if (TARGET_SH4A_ARCH && align < 4
793 && MEM_ALIGN (operands[0]) >= 32
794 && can_move_by_pieces (bytes, 32))
796 rtx dest = copy_rtx (operands[0]);
797 rtx src = copy_rtx (operands[1]);
798 /* We could use different pseudos for each copied word, but
799 since movua can only load into r0, it's kind of
800 pointless. */
801 rtx temp = gen_reg_rtx (SImode);
802 rtx src_addr = copy_addr_to_reg (XEXP (src, 0));
803 int copied = 0;
805 while (copied + 4 <= bytes)
807 rtx to = adjust_address (dest, SImode, copied);
808 rtx from = adjust_automodify_address (src, SImode, src_addr, copied);
810 emit_insn (gen_movua (temp, from));
811 emit_move_insn (src_addr, plus_constant (src_addr, 4));
812 emit_move_insn (to, temp);
813 copied += 4;
816 if (copied < bytes)
817 move_by_pieces (adjust_address (dest, BLKmode, copied),
818 adjust_automodify_address (src, BLKmode,
819 src_addr, copied),
820 bytes - copied, align, 0);
822 return 1;
825 /* If it isn't a constant number of bytes, or if it doesn't have 4 byte
826 alignment, or if it isn't a multiple of 4 bytes, then fail. */
827 if (align < 4 || (bytes % 4 != 0))
828 return 0;
830 if (TARGET_HARD_SH4)
832 if (bytes < 12)
833 return 0;
834 else if (bytes == 12)
836 tree entry_name;
837 rtx sym;
838 rtx func_addr_rtx;
839 rtx r4 = gen_rtx_REG (SImode, 4);
840 rtx r5 = gen_rtx_REG (SImode, 5);
842 entry_name = get_identifier ("__movmemSI12_i4");
844 sym = function_symbol (IDENTIFIER_POINTER (entry_name));
845 func_addr_rtx = copy_to_mode_reg (Pmode, sym);
846 force_into (XEXP (operands[0], 0), r4);
847 force_into (XEXP (operands[1], 0), r5);
848 emit_insn (gen_block_move_real_i4 (func_addr_rtx));
849 return 1;
851 else if (! TARGET_SMALLCODE)
853 tree entry_name;
854 rtx sym;
855 rtx func_addr_rtx;
856 int dwords;
857 rtx r4 = gen_rtx_REG (SImode, 4);
858 rtx r5 = gen_rtx_REG (SImode, 5);
859 rtx r6 = gen_rtx_REG (SImode, 6);
861 entry_name = get_identifier (bytes & 4
862 ? "__movmem_i4_odd"
863 : "__movmem_i4_even");
864 sym = function_symbol (IDENTIFIER_POINTER (entry_name));
865 func_addr_rtx = copy_to_mode_reg (Pmode, sym);
866 force_into (XEXP (operands[0], 0), r4);
867 force_into (XEXP (operands[1], 0), r5);
869 dwords = bytes >> 3;
870 emit_insn (gen_move_insn (r6, GEN_INT (dwords - 1)));
871 emit_insn (gen_block_lump_real_i4 (func_addr_rtx));
872 return 1;
874 else
875 return 0;
877 if (bytes < 64)
879 char entry[30];
880 tree entry_name;
881 rtx sym;
882 rtx func_addr_rtx;
883 rtx r4 = gen_rtx_REG (SImode, 4);
884 rtx r5 = gen_rtx_REG (SImode, 5);
886 sprintf (entry, "__movmemSI%d", bytes);
887 entry_name = get_identifier (entry);
888 sym = function_symbol (IDENTIFIER_POINTER (entry_name));
889 func_addr_rtx = copy_to_mode_reg (Pmode, sym);
890 force_into (XEXP (operands[0], 0), r4);
891 force_into (XEXP (operands[1], 0), r5);
892 emit_insn (gen_block_move_real (func_addr_rtx));
893 return 1;
896 /* This is the same number of bytes as a memcpy call, but to a different
897 less common function name, so this will occasionally use more space. */
898 if (! TARGET_SMALLCODE)
900 tree entry_name;
901 rtx sym;
902 rtx func_addr_rtx;
903 int final_switch, while_loop;
904 rtx r4 = gen_rtx_REG (SImode, 4);
905 rtx r5 = gen_rtx_REG (SImode, 5);
906 rtx r6 = gen_rtx_REG (SImode, 6);
908 entry_name = get_identifier ("__movmem");
909 sym = function_symbol (IDENTIFIER_POINTER (entry_name));
910 func_addr_rtx = copy_to_mode_reg (Pmode, sym);
911 force_into (XEXP (operands[0], 0), r4);
912 force_into (XEXP (operands[1], 0), r5);
914 /* r6 controls the size of the move. 16 is decremented from it
915 for each 64 bytes moved. Then the negative bit left over is used
916 as an index into a list of move instructions. e.g., a 72 byte move
917 would be set up with size(r6) = 14, for one iteration through the
918 big while loop, and a switch of -2 for the last part. */
920 final_switch = 16 - ((bytes / 4) % 16);
921 while_loop = ((bytes / 4) / 16 - 1) * 16;
922 emit_insn (gen_move_insn (r6, GEN_INT (while_loop + final_switch)));
923 emit_insn (gen_block_lump_real (func_addr_rtx));
924 return 1;
927 return 0;
930 /* Prepare operands for a move define_expand; specifically, one of the
931 operands must be in a register. */
934 prepare_move_operands (rtx operands[], enum machine_mode mode)
936 if ((mode == SImode || mode == DImode)
937 && flag_pic
938 && ! ((mode == Pmode || mode == ptr_mode)
939 && tls_symbolic_operand (operands[1], Pmode) != 0))
941 rtx temp;
942 if (SYMBOLIC_CONST_P (operands[1]))
944 if (GET_CODE (operands[0]) == MEM)
945 operands[1] = force_reg (Pmode, operands[1]);
946 else if (TARGET_SHMEDIA
947 && GET_CODE (operands[1]) == LABEL_REF
948 && target_reg_operand (operands[0], mode))
949 /* It's ok. */;
950 else
952 temp = no_new_pseudos ? operands[0] : gen_reg_rtx (Pmode);
953 operands[1] = legitimize_pic_address (operands[1], mode, temp);
956 else if (GET_CODE (operands[1]) == CONST
957 && GET_CODE (XEXP (operands[1], 0)) == PLUS
958 && SYMBOLIC_CONST_P (XEXP (XEXP (operands[1], 0), 0)))
960 temp = no_new_pseudos ? operands[0] : gen_reg_rtx (Pmode);
961 temp = legitimize_pic_address (XEXP (XEXP (operands[1], 0), 0),
962 mode, temp);
963 operands[1] = expand_binop (mode, add_optab, temp,
964 XEXP (XEXP (operands[1], 0), 1),
965 no_new_pseudos ? temp
966 : gen_reg_rtx (Pmode),
967 0, OPTAB_LIB_WIDEN);
971 if (! reload_in_progress && ! reload_completed)
973 /* Copy the source to a register if both operands aren't registers. */
974 if (! register_operand (operands[0], mode)
975 && ! sh_register_operand (operands[1], mode))
976 operands[1] = copy_to_mode_reg (mode, operands[1]);
978 if (GET_CODE (operands[0]) == MEM && ! memory_operand (operands[0], mode))
980 /* This is like change_address_1 (operands[0], mode, 0, 1) ,
981 except that we can't use that function because it is static. */
982 rtx new = change_address (operands[0], mode, 0);
983 MEM_COPY_ATTRIBUTES (new, operands[0]);
984 operands[0] = new;
987 /* This case can happen while generating code to move the result
988 of a library call to the target. Reject `st r0,@(rX,rY)' because
989 reload will fail to find a spill register for rX, since r0 is already
990 being used for the source. */
991 else if (refers_to_regno_p (R0_REG, R0_REG + 1, operands[1], (rtx *)0)
992 && GET_CODE (operands[0]) == MEM
993 && GET_CODE (XEXP (operands[0], 0)) == PLUS
994 && GET_CODE (XEXP (XEXP (operands[0], 0), 1)) == REG)
995 operands[1] = copy_to_mode_reg (mode, operands[1]);
998 if (mode == Pmode || mode == ptr_mode)
1000 rtx op0, op1;
1001 enum tls_model tls_kind;
1003 op0 = operands[0];
1004 op1 = operands[1];
1005 if ((tls_kind = tls_symbolic_operand (op1, Pmode)))
1007 rtx tga_op1, tga_ret, tmp, tmp2;
1010 switch (tls_kind)
1012 case TLS_MODEL_GLOBAL_DYNAMIC:
1013 tga_ret = gen_rtx_REG (Pmode, R0_REG);
1014 emit_call_insn (gen_tls_global_dynamic (tga_ret, op1));
1015 op1 = tga_ret;
1016 break;
1018 case TLS_MODEL_LOCAL_DYNAMIC:
1019 tga_ret = gen_rtx_REG (Pmode, R0_REG);
1020 emit_call_insn (gen_tls_local_dynamic (tga_ret, op1));
1022 tmp = gen_reg_rtx (Pmode);
1023 emit_move_insn (tmp, tga_ret);
1025 if (register_operand (op0, Pmode))
1026 tmp2 = op0;
1027 else
1028 tmp2 = gen_reg_rtx (Pmode);
1030 emit_insn (gen_symDTPOFF2reg (tmp2, op1, tmp));
1031 op1 = tmp2;
1032 break;
1034 case TLS_MODEL_INITIAL_EXEC:
1035 if (! flag_pic)
1036 emit_insn (gen_GOTaddr2picreg ());
1037 tga_op1 = gen_reg_rtx (Pmode);
1038 tmp = gen_sym2GOTTPOFF (op1);
1039 emit_insn (gen_tls_initial_exec (tga_op1, tmp));
1040 op1 = tga_op1;
1041 break;
1043 case TLS_MODEL_LOCAL_EXEC:
1044 tmp2 = gen_reg_rtx (Pmode);
1045 emit_insn (gen_load_gbr (tmp2));
1046 tmp = gen_reg_rtx (Pmode);
1047 emit_insn (gen_symTPOFF2reg (tmp, op1));
1049 if (register_operand (op0, Pmode))
1050 op1 = op0;
1051 else
1052 op1 = gen_reg_rtx (Pmode);
1054 emit_insn (gen_addsi3 (op1, tmp, tmp2));
1055 break;
1057 default:
1058 abort ();
1060 operands[1] = op1;
1064 return 0;
1067 /* Prepare the operands for an scc instruction; make sure that the
1068 compare has been done. */
1070 prepare_scc_operands (enum rtx_code code)
1072 rtx t_reg = gen_rtx_REG (SImode, T_REG);
1073 enum rtx_code oldcode = code;
1074 enum machine_mode mode;
1076 /* First need a compare insn. */
1077 switch (code)
1079 case NE:
1080 /* It isn't possible to handle this case. */
1081 abort ();
1082 case LT:
1083 code = GT;
1084 break;
1085 case LE:
1086 code = GE;
1087 break;
1088 case LTU:
1089 code = GTU;
1090 break;
1091 case LEU:
1092 code = GEU;
1093 break;
1094 default:
1095 break;
1097 if (code != oldcode)
1099 rtx tmp = sh_compare_op0;
1100 sh_compare_op0 = sh_compare_op1;
1101 sh_compare_op1 = tmp;
1104 mode = GET_MODE (sh_compare_op0);
1105 if (mode == VOIDmode)
1106 mode = GET_MODE (sh_compare_op1);
1108 sh_compare_op0 = force_reg (mode, sh_compare_op0);
1109 if ((code != EQ && code != NE
1110 && (sh_compare_op1 != const0_rtx
1111 || code == GTU || code == GEU || code == LTU || code == LEU))
1112 || (mode == DImode && sh_compare_op1 != const0_rtx)
1113 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
1114 sh_compare_op1 = force_reg (mode, sh_compare_op1);
1116 if ((TARGET_SH4 || TARGET_SH2A) && GET_MODE_CLASS (mode) == MODE_FLOAT)
1117 (mode == SFmode ? emit_sf_insn : emit_df_insn)
1118 (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2,
1119 gen_rtx_SET (VOIDmode, t_reg,
1120 gen_rtx_fmt_ee (code, SImode,
1121 sh_compare_op0, sh_compare_op1)),
1122 gen_rtx_USE (VOIDmode, get_fpscr_rtx ()))));
1123 else
1124 emit_insn (gen_rtx_SET (VOIDmode, t_reg,
1125 gen_rtx_fmt_ee (code, SImode,
1126 sh_compare_op0, sh_compare_op1)));
1128 return t_reg;
1131 /* Called from the md file, set up the operands of a compare instruction. */
1133 void
1134 from_compare (rtx *operands, int code)
1136 enum machine_mode mode = GET_MODE (sh_compare_op0);
1137 rtx insn;
1138 if (mode == VOIDmode)
1139 mode = GET_MODE (sh_compare_op1);
1140 if (code != EQ
1141 || mode == DImode
1142 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
1144 /* Force args into regs, since we can't use constants here. */
1145 sh_compare_op0 = force_reg (mode, sh_compare_op0);
1146 if (sh_compare_op1 != const0_rtx
1147 || code == GTU || code == GEU
1148 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
1149 sh_compare_op1 = force_reg (mode, sh_compare_op1);
1151 if (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT && code == GE)
1153 from_compare (operands, GT);
1154 insn = gen_ieee_ccmpeqsf_t (sh_compare_op0, sh_compare_op1);
1156 else
1157 insn = gen_rtx_SET (VOIDmode,
1158 gen_rtx_REG (SImode, T_REG),
1159 gen_rtx_fmt_ee (code, SImode,
1160 sh_compare_op0, sh_compare_op1));
1161 if ((TARGET_SH4 || TARGET_SH2A) && GET_MODE_CLASS (mode) == MODE_FLOAT)
1163 insn = gen_rtx_PARALLEL (VOIDmode,
1164 gen_rtvec (2, insn,
1165 gen_rtx_USE (VOIDmode, get_fpscr_rtx ())));
1166 (mode == SFmode ? emit_sf_insn : emit_df_insn) (insn);
1168 else
1169 emit_insn (insn);
1172 /* Functions to output assembly code. */
1174 /* Return a sequence of instructions to perform DI or DF move.
1176 Since the SH cannot move a DI or DF in one instruction, we have
1177 to take care when we see overlapping source and dest registers. */
1179 const char *
1180 output_movedouble (rtx insn ATTRIBUTE_UNUSED, rtx operands[],
1181 enum machine_mode mode)
1183 rtx dst = operands[0];
1184 rtx src = operands[1];
1186 if (GET_CODE (dst) == MEM
1187 && GET_CODE (XEXP (dst, 0)) == PRE_DEC)
1188 return "mov.l %T1,%0\n\tmov.l %1,%0";
1190 if (register_operand (dst, mode)
1191 && register_operand (src, mode))
1193 if (REGNO (src) == MACH_REG)
1194 return "sts mach,%S0\n\tsts macl,%R0";
1196 /* When mov.d r1,r2 do r2->r3 then r1->r2;
1197 when mov.d r1,r0 do r1->r0 then r2->r1. */
1199 if (REGNO (src) + 1 == REGNO (dst))
1200 return "mov %T1,%T0\n\tmov %1,%0";
1201 else
1202 return "mov %1,%0\n\tmov %T1,%T0";
1204 else if (GET_CODE (src) == CONST_INT)
1206 if (INTVAL (src) < 0)
1207 output_asm_insn ("mov #-1,%S0", operands);
1208 else
1209 output_asm_insn ("mov #0,%S0", operands);
1211 return "mov %1,%R0";
1213 else if (GET_CODE (src) == MEM)
1215 int ptrreg = -1;
1216 int dreg = REGNO (dst);
1217 rtx inside = XEXP (src, 0);
1219 if (GET_CODE (inside) == REG)
1220 ptrreg = REGNO (inside);
1221 else if (GET_CODE (inside) == SUBREG)
1222 ptrreg = subreg_regno (inside);
1223 else if (GET_CODE (inside) == PLUS)
1225 ptrreg = REGNO (XEXP (inside, 0));
1226 /* ??? A r0+REG address shouldn't be possible here, because it isn't
1227 an offsettable address. Unfortunately, offsettable addresses use
1228 QImode to check the offset, and a QImode offsettable address
1229 requires r0 for the other operand, which is not currently
1230 supported, so we can't use the 'o' constraint.
1231 Thus we must check for and handle r0+REG addresses here.
1232 We punt for now, since this is likely very rare. */
1233 if (GET_CODE (XEXP (inside, 1)) == REG)
1234 abort ();
1236 else if (GET_CODE (inside) == LABEL_REF)
1237 return "mov.l %1,%0\n\tmov.l %1+4,%T0";
1238 else if (GET_CODE (inside) == POST_INC)
1239 return "mov.l %1,%0\n\tmov.l %1,%T0";
1240 else
1241 abort ();
1243 /* Work out the safe way to copy. Copy into the second half first. */
1244 if (dreg == ptrreg)
1245 return "mov.l %T1,%T0\n\tmov.l %1,%0";
1248 return "mov.l %1,%0\n\tmov.l %T1,%T0";
1251 /* Print an instruction which would have gone into a delay slot after
1252 another instruction, but couldn't because the other instruction expanded
1253 into a sequence where putting the slot insn at the end wouldn't work. */
1255 static void
1256 print_slot (rtx insn)
1258 final_scan_insn (XVECEXP (insn, 0, 1), asm_out_file, optimize, 0, 1, NULL);
1260 INSN_DELETED_P (XVECEXP (insn, 0, 1)) = 1;
1263 const char *
1264 output_far_jump (rtx insn, rtx op)
1266 struct { rtx lab, reg, op; } this;
1267 rtx braf_base_lab = NULL_RTX;
1268 const char *jump;
1269 int far;
1270 int offset = branch_dest (insn) - INSN_ADDRESSES (INSN_UID (insn));
1271 rtx prev;
1273 this.lab = gen_label_rtx ();
1275 if (TARGET_SH2
1276 && offset >= -32764
1277 && offset - get_attr_length (insn) <= 32766)
1279 far = 0;
1280 jump = "mov.w %O0,%1; braf %1";
1282 else
1284 far = 1;
1285 if (flag_pic)
1287 if (TARGET_SH2)
1288 jump = "mov.l %O0,%1; braf %1";
1289 else
1290 jump = "mov.l r0,@-r15; mova %O0,r0; mov.l @r0,%1; add r0,%1; mov.l @r15+,r0; jmp @%1";
1292 else
1293 jump = "mov.l %O0,%1; jmp @%1";
1295 /* If we have a scratch register available, use it. */
1296 if (GET_CODE ((prev = prev_nonnote_insn (insn))) == INSN
1297 && INSN_CODE (prev) == CODE_FOR_indirect_jump_scratch)
1299 this.reg = SET_DEST (XVECEXP (PATTERN (prev), 0, 0));
1300 if (REGNO (this.reg) == R0_REG && flag_pic && ! TARGET_SH2)
1301 jump = "mov.l r1,@-r15; mova %O0,r0; mov.l @r0,r1; add r1,r0; mov.l @r15+,r1; jmp @%1";
1302 output_asm_insn (jump, &this.lab);
1303 if (dbr_sequence_length ())
1304 print_slot (final_sequence);
1305 else
1306 output_asm_insn ("nop", 0);
1308 else
1310 /* Output the delay slot insn first if any. */
1311 if (dbr_sequence_length ())
1312 print_slot (final_sequence);
1314 this.reg = gen_rtx_REG (SImode, 13);
1315 /* We must keep the stack aligned to 8-byte boundaries on SH5.
1316 Fortunately, MACL is fixed and call-clobbered, and we never
1317 need its value across jumps, so save r13 in it instead of in
1318 the stack. */
1319 if (TARGET_SH5)
1320 output_asm_insn ("lds r13, macl", 0);
1321 else
1322 output_asm_insn ("mov.l r13,@-r15", 0);
1323 output_asm_insn (jump, &this.lab);
1324 if (TARGET_SH5)
1325 output_asm_insn ("sts macl, r13", 0);
1326 else
1327 output_asm_insn ("mov.l @r15+,r13", 0);
1329 if (far && flag_pic && TARGET_SH2)
1331 braf_base_lab = gen_label_rtx ();
1332 (*targetm.asm_out.internal_label) (asm_out_file, "L",
1333 CODE_LABEL_NUMBER (braf_base_lab));
1335 if (far)
1336 output_asm_insn (".align 2", 0);
1337 (*targetm.asm_out.internal_label) (asm_out_file, "L", CODE_LABEL_NUMBER (this.lab));
1338 this.op = op;
1339 if (far && flag_pic)
1341 if (TARGET_SH2)
1342 this.lab = braf_base_lab;
1343 output_asm_insn (".long %O2-%O0", &this.lab);
1345 else
1346 output_asm_insn (far ? ".long %O2" : ".word %O2-%O0", &this.lab);
1347 return "";
1350 /* Local label counter, used for constants in the pool and inside
1351 pattern branches. */
1353 static int lf = 100;
1355 /* Output code for ordinary branches. */
1357 const char *
1358 output_branch (int logic, rtx insn, rtx *operands)
1360 switch (get_attr_length (insn))
1362 case 6:
1363 /* This can happen if filling the delay slot has caused a forward
1364 branch to exceed its range (we could reverse it, but only
1365 when we know we won't overextend other branches; this should
1366 best be handled by relaxation).
1367 It can also happen when other condbranches hoist delay slot insn
1368 from their destination, thus leading to code size increase.
1369 But the branch will still be in the range -4092..+4098 bytes. */
1371 if (! TARGET_RELAX)
1373 int label = lf++;
1374 /* The call to print_slot will clobber the operands. */
1375 rtx op0 = operands[0];
1377 /* If the instruction in the delay slot is annulled (true), then
1378 there is no delay slot where we can put it now. The only safe
1379 place for it is after the label. final will do that by default. */
1381 if (final_sequence
1382 && ! INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0)))
1384 asm_fprintf (asm_out_file, "\tb%s%ss\t%LLF%d\n", logic ? "f" : "t",
1385 ASSEMBLER_DIALECT ? "/" : ".", label);
1386 print_slot (final_sequence);
1388 else
1389 asm_fprintf (asm_out_file, "\tb%s\t%LLF%d\n", logic ? "f" : "t", label);
1391 output_asm_insn ("bra\t%l0", &op0);
1392 fprintf (asm_out_file, "\tnop\n");
1393 (*targetm.asm_out.internal_label) (asm_out_file, "LF", label);
1395 return "";
1397 /* When relaxing, handle this like a short branch. The linker
1398 will fix it up if it still doesn't fit after relaxation. */
1399 case 2:
1400 return logic ? "bt%.\t%l0" : "bf%.\t%l0";
1402 /* These are for SH2e, in which we have to account for the
1403 extra nop because of the hardware bug in annulled branches. */
1404 case 8:
1405 if (! TARGET_RELAX)
1407 int label = lf++;
1409 if (final_sequence
1410 && INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0)))
1411 abort ();
1412 asm_fprintf (asm_out_file, "b%s%ss\t%LLF%d\n",
1413 logic ? "f" : "t",
1414 ASSEMBLER_DIALECT ? "/" : ".", label);
1415 fprintf (asm_out_file, "\tnop\n");
1416 output_asm_insn ("bra\t%l0", operands);
1417 fprintf (asm_out_file, "\tnop\n");
1418 (*targetm.asm_out.internal_label) (asm_out_file, "LF", label);
1420 return "";
1422 /* When relaxing, fall through. */
1423 case 4:
1425 char buffer[10];
1427 sprintf (buffer, "b%s%ss\t%%l0",
1428 logic ? "t" : "f",
1429 ASSEMBLER_DIALECT ? "/" : ".");
1430 output_asm_insn (buffer, &operands[0]);
1431 return "nop";
1434 default:
1435 /* There should be no longer branches now - that would
1436 indicate that something has destroyed the branches set
1437 up in machine_dependent_reorg. */
1438 abort ();
1442 const char *
1443 output_branchy_insn (enum rtx_code code, const char *template,
1444 rtx insn, rtx *operands)
1446 rtx next_insn = NEXT_INSN (insn);
1448 if (next_insn && GET_CODE (next_insn) == JUMP_INSN && condjump_p (next_insn))
1450 rtx src = SET_SRC (PATTERN (next_insn));
1451 if (GET_CODE (src) == IF_THEN_ELSE && GET_CODE (XEXP (src, 0)) != code)
1453 /* Following branch not taken */
1454 operands[9] = gen_label_rtx ();
1455 emit_label_after (operands[9], next_insn);
1456 INSN_ADDRESSES_NEW (operands[9],
1457 INSN_ADDRESSES (INSN_UID (next_insn))
1458 + get_attr_length (next_insn));
1459 return template;
1461 else
1463 int offset = (branch_dest (next_insn)
1464 - INSN_ADDRESSES (INSN_UID (next_insn)) + 4);
1465 if (offset >= -252 && offset <= 258)
1467 if (GET_CODE (src) == IF_THEN_ELSE)
1468 /* branch_true */
1469 src = XEXP (src, 1);
1470 operands[9] = src;
1471 return template;
1475 operands[9] = gen_label_rtx ();
1476 emit_label_after (operands[9], insn);
1477 INSN_ADDRESSES_NEW (operands[9],
1478 INSN_ADDRESSES (INSN_UID (insn))
1479 + get_attr_length (insn));
1480 return template;
1483 const char *
1484 output_ieee_ccmpeq (rtx insn, rtx *operands)
1486 return output_branchy_insn (NE, "bt\t%l9\\;fcmp/eq\t%1,%0", insn, operands);
1489 /* Output the start of the assembler file. */
1491 static void
1492 sh_file_start (void)
1494 default_file_start ();
1496 #ifdef SYMBIAN
1497 /* Declare the .directive section before it is used. */
1498 fputs ("\t.section .directive, \"SM\", @progbits, 1\n", asm_out_file);
1499 fputs ("\t.asciz \"#<SYMEDIT>#\\n\"\n", asm_out_file);
1500 #endif
1502 if (TARGET_ELF)
1503 /* We need to show the text section with the proper
1504 attributes as in TEXT_SECTION_ASM_OP, before dwarf2out
1505 emits it without attributes in TEXT_SECTION_ASM_OP, else GAS
1506 will complain. We can teach GAS specifically about the
1507 default attributes for our choice of text section, but
1508 then we would have to change GAS again if/when we change
1509 the text section name. */
1510 fprintf (asm_out_file, "%s\n", TEXT_SECTION_ASM_OP);
1511 else
1512 /* Switch to the data section so that the coffsem symbol
1513 isn't in the text section. */
1514 data_section ();
1516 if (TARGET_LITTLE_ENDIAN)
1517 fputs ("\t.little\n", asm_out_file);
1519 if (!TARGET_ELF)
1521 if (TARGET_SHCOMPACT)
1522 fputs ("\t.mode\tSHcompact\n", asm_out_file);
1523 else if (TARGET_SHMEDIA)
1524 fprintf (asm_out_file, "\t.mode\tSHmedia\n\t.abi\t%i\n",
1525 TARGET_SHMEDIA64 ? 64 : 32);
1529 /* Check if PAT includes UNSPEC_CALLER unspec pattern. */
1531 static bool
1532 unspec_caller_rtx_p (rtx pat)
1534 switch (GET_CODE (pat))
1536 case CONST:
1537 return unspec_caller_rtx_p (XEXP (pat, 0));
1538 case PLUS:
1539 case MINUS:
1540 if (unspec_caller_rtx_p (XEXP (pat, 0)))
1541 return true;
1542 return unspec_caller_rtx_p (XEXP (pat, 1));
1543 case UNSPEC:
1544 if (XINT (pat, 1) == UNSPEC_CALLER)
1545 return true;
1546 default:
1547 break;
1550 return false;
1553 /* Indicate that INSN cannot be duplicated. This is true for insn
1554 that generates an unique label. */
1556 static bool
1557 sh_cannot_copy_insn_p (rtx insn)
1559 rtx pat;
1561 if (!reload_completed || !flag_pic)
1562 return false;
1564 if (GET_CODE (insn) != INSN)
1565 return false;
1566 if (asm_noperands (insn) >= 0)
1567 return false;
1569 pat = PATTERN (insn);
1570 if (GET_CODE (pat) != SET)
1571 return false;
1572 pat = SET_SRC (pat);
1574 if (unspec_caller_rtx_p (pat))
1575 return true;
1577 return false;
1580 /* Actual number of instructions used to make a shift by N. */
1581 static const char ashiftrt_insns[] =
1582 { 0,1,2,3,4,5,8,8,8,8,8,8,8,8,8,8,2,3,4,5,8,8,8,8,8,8,8,8,8,8,8,2};
1584 /* Left shift and logical right shift are the same. */
1585 static const char shift_insns[] =
1586 { 0,1,1,2,2,3,3,4,1,2,2,3,3,4,3,3,1,2,2,3,3,4,3,3,2,3,3,4,4,4,3,3};
1588 /* Individual shift amounts needed to get the above length sequences.
1589 One bit right shifts clobber the T bit, so when possible, put one bit
1590 shifts in the middle of the sequence, so the ends are eligible for
1591 branch delay slots. */
1592 static const short shift_amounts[32][5] = {
1593 {0}, {1}, {2}, {2, 1},
1594 {2, 2}, {2, 1, 2}, {2, 2, 2}, {2, 2, 1, 2},
1595 {8}, {8, 1}, {8, 2}, {8, 1, 2},
1596 {8, 2, 2}, {8, 2, 1, 2}, {8, -2, 8}, {8, -1, 8},
1597 {16}, {16, 1}, {16, 2}, {16, 1, 2},
1598 {16, 2, 2}, {16, 2, 1, 2}, {16, -2, 8}, {16, -1, 8},
1599 {16, 8}, {16, 1, 8}, {16, 8, 2}, {16, 8, 1, 2},
1600 {16, 8, 2, 2}, {16, -1, -2, 16}, {16, -2, 16}, {16, -1, 16}};
1602 /* Likewise, but for shift amounts < 16, up to three highmost bits
1603 might be clobbered. This is typically used when combined with some
1604 kind of sign or zero extension. */
1606 static const char ext_shift_insns[] =
1607 { 0,1,1,2,2,3,2,2,1,2,2,3,3,3,2,2,1,2,2,3,3,4,3,3,2,3,3,4,4,4,3,3};
1609 static const short ext_shift_amounts[32][4] = {
1610 {0}, {1}, {2}, {2, 1},
1611 {2, 2}, {2, 1, 2}, {8, -2}, {8, -1},
1612 {8}, {8, 1}, {8, 2}, {8, 1, 2},
1613 {8, 2, 2}, {16, -2, -1}, {16, -2}, {16, -1},
1614 {16}, {16, 1}, {16, 2}, {16, 1, 2},
1615 {16, 2, 2}, {16, 2, 1, 2}, {16, -2, 8}, {16, -1, 8},
1616 {16, 8}, {16, 1, 8}, {16, 8, 2}, {16, 8, 1, 2},
1617 {16, 8, 2, 2}, {16, -1, -2, 16}, {16, -2, 16}, {16, -1, 16}};
1619 /* Assuming we have a value that has been sign-extended by at least one bit,
1620 can we use the ext_shift_amounts with the last shift turned to an arithmetic shift
1621 to shift it by N without data loss, and quicker than by other means? */
1622 #define EXT_SHIFT_SIGNED(n) (((n) | 8) == 15)
1624 /* This is used in length attributes in sh.md to help compute the length
1625 of arbitrary constant shift instructions. */
1628 shift_insns_rtx (rtx insn)
1630 rtx set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
1631 int shift_count = INTVAL (XEXP (set_src, 1));
1632 enum rtx_code shift_code = GET_CODE (set_src);
1634 switch (shift_code)
1636 case ASHIFTRT:
1637 return ashiftrt_insns[shift_count];
1638 case LSHIFTRT:
1639 case ASHIFT:
1640 return shift_insns[shift_count];
1641 default:
1642 abort ();
1646 /* Return the cost of a shift. */
1648 static inline int
1649 shiftcosts (rtx x)
1651 int value;
1653 if (TARGET_SHMEDIA)
1654 return 1;
1656 if (GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD)
1658 if (GET_MODE (x) == DImode
1659 && GET_CODE (XEXP (x, 1)) == CONST_INT
1660 && INTVAL (XEXP (x, 1)) == 1)
1661 return 2;
1663 /* Everything else is invalid, because there is no pattern for it. */
1664 return 10000;
1666 /* If shift by a non constant, then this will be expensive. */
1667 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
1668 return SH_DYNAMIC_SHIFT_COST;
1670 value = INTVAL (XEXP (x, 1));
1672 /* Otherwise, return the true cost in instructions. */
1673 if (GET_CODE (x) == ASHIFTRT)
1675 int cost = ashiftrt_insns[value];
1676 /* If SH3, then we put the constant in a reg and use shad. */
1677 if (cost > 1 + SH_DYNAMIC_SHIFT_COST)
1678 cost = 1 + SH_DYNAMIC_SHIFT_COST;
1679 return cost;
1681 else
1682 return shift_insns[value];
1685 /* Return the cost of an AND operation. */
1687 static inline int
1688 andcosts (rtx x)
1690 int i;
1692 /* Anding with a register is a single cycle and instruction. */
1693 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
1694 return 1;
1696 i = INTVAL (XEXP (x, 1));
1698 if (TARGET_SHMEDIA)
1700 if ((GET_CODE (XEXP (x, 1)) == CONST_INT
1701 && CONST_OK_FOR_I16 (INTVAL (XEXP (x, 1))))
1702 || EXTRA_CONSTRAINT_C16 (XEXP (x, 1)))
1703 return 1;
1704 else
1705 return 2;
1708 /* These constants are single cycle extu.[bw] instructions. */
1709 if (i == 0xff || i == 0xffff)
1710 return 1;
1711 /* Constants that can be used in an and immediate instruction in a single
1712 cycle, but this requires r0, so make it a little more expensive. */
1713 if (CONST_OK_FOR_K08 (i))
1714 return 2;
1715 /* Constants that can be loaded with a mov immediate and an and.
1716 This case is probably unnecessary. */
1717 if (CONST_OK_FOR_I08 (i))
1718 return 2;
1719 /* Any other constants requires a 2 cycle pc-relative load plus an and.
1720 This case is probably unnecessary. */
1721 return 3;
1724 /* Return the cost of an addition or a subtraction. */
1726 static inline int
1727 addsubcosts (rtx x)
1729 /* Adding a register is a single cycle insn. */
1730 if (GET_CODE (XEXP (x, 1)) == REG
1731 || GET_CODE (XEXP (x, 1)) == SUBREG)
1732 return 1;
1734 /* Likewise for small constants. */
1735 if (GET_CODE (XEXP (x, 1)) == CONST_INT
1736 && CONST_OK_FOR_ADD (INTVAL (XEXP (x, 1))))
1737 return 1;
1739 if (TARGET_SHMEDIA)
1740 switch (GET_CODE (XEXP (x, 1)))
1742 case CONST:
1743 case LABEL_REF:
1744 case SYMBOL_REF:
1745 return TARGET_SHMEDIA64 ? 5 : 3;
1747 case CONST_INT:
1748 if (CONST_OK_FOR_I16 (INTVAL (XEXP (x, 1))))
1749 return 2;
1750 else if (CONST_OK_FOR_I16 (INTVAL (XEXP (x, 1)) >> 16))
1751 return 3;
1752 else if (CONST_OK_FOR_I16 ((INTVAL (XEXP (x, 1)) >> 16) >> 16))
1753 return 4;
1755 /* Fall through. */
1756 default:
1757 return 5;
1760 /* Any other constant requires a 2 cycle pc-relative load plus an
1761 addition. */
1762 return 3;
1765 /* Return the cost of a multiply. */
1766 static inline int
1767 multcosts (rtx x ATTRIBUTE_UNUSED)
1769 if (TARGET_SHMEDIA)
1770 return 3;
1772 if (TARGET_SH2)
1774 /* We have a mul insn, so we can never take more than the mul and the
1775 read of the mac reg, but count more because of the latency and extra
1776 reg usage. */
1777 if (TARGET_SMALLCODE)
1778 return 2;
1779 return 3;
1782 /* If we're aiming at small code, then just count the number of
1783 insns in a multiply call sequence. */
1784 if (TARGET_SMALLCODE)
1785 return 5;
1787 /* Otherwise count all the insns in the routine we'd be calling too. */
1788 return 20;
1791 /* Compute a (partial) cost for rtx X. Return true if the complete
1792 cost has been computed, and false if subexpressions should be
1793 scanned. In either case, *TOTAL contains the cost result. */
1795 static bool
1796 sh_rtx_costs (rtx x, int code, int outer_code, int *total)
1798 switch (code)
1800 case CONST_INT:
1801 if (TARGET_SHMEDIA)
1803 if (INTVAL (x) == 0)
1804 *total = 0;
1805 else if (outer_code == AND && and_operand ((x), DImode))
1806 *total = 0;
1807 else if ((outer_code == IOR || outer_code == XOR
1808 || outer_code == PLUS)
1809 && CONST_OK_FOR_I10 (INTVAL (x)))
1810 *total = 0;
1811 else if (CONST_OK_FOR_I16 (INTVAL (x)))
1812 *total = COSTS_N_INSNS (outer_code != SET);
1813 else if (CONST_OK_FOR_I16 (INTVAL (x) >> 16))
1814 *total = COSTS_N_INSNS (2);
1815 else if (CONST_OK_FOR_I16 ((INTVAL (x) >> 16) >> 16))
1816 *total = COSTS_N_INSNS (3);
1817 else
1818 *total = COSTS_N_INSNS (4);
1819 return true;
1821 if (CONST_OK_FOR_I08 (INTVAL (x)))
1822 *total = 0;
1823 else if ((outer_code == AND || outer_code == IOR || outer_code == XOR)
1824 && CONST_OK_FOR_K08 (INTVAL (x)))
1825 *total = 1;
1826 else
1827 *total = 8;
1828 return true;
1830 case CONST:
1831 case LABEL_REF:
1832 case SYMBOL_REF:
1833 if (TARGET_SHMEDIA64)
1834 *total = COSTS_N_INSNS (4);
1835 else if (TARGET_SHMEDIA32)
1836 *total = COSTS_N_INSNS (2);
1837 else
1838 *total = 5;
1839 return true;
1841 case CONST_DOUBLE:
1842 if (TARGET_SHMEDIA)
1843 *total = COSTS_N_INSNS (4);
1844 else
1845 *total = 10;
1846 return true;
1848 case PLUS:
1849 *total = COSTS_N_INSNS (addsubcosts (x));
1850 return true;
1852 case AND:
1853 *total = COSTS_N_INSNS (andcosts (x));
1854 return true;
1856 case MULT:
1857 *total = COSTS_N_INSNS (multcosts (x));
1858 return true;
1860 case ASHIFT:
1861 case ASHIFTRT:
1862 case LSHIFTRT:
1863 *total = COSTS_N_INSNS (shiftcosts (x));
1864 return true;
1866 case DIV:
1867 case UDIV:
1868 case MOD:
1869 case UMOD:
1870 *total = COSTS_N_INSNS (20);
1871 return true;
1873 case FLOAT:
1874 case FIX:
1875 *total = 100;
1876 return true;
1878 default:
1879 return false;
1883 /* Compute the cost of an address. For the SH, all valid addresses are
1884 the same cost. Use a slightly higher cost for reg + reg addressing,
1885 since it increases pressure on r0. */
1887 static int
1888 sh_address_cost (rtx X)
1890 return (GET_CODE (X) == PLUS
1891 && ! CONSTANT_P (XEXP (X, 1))
1892 && ! TARGET_SHMEDIA ? 1 : 0);
1895 /* Code to expand a shift. */
1897 void
1898 gen_ashift (int type, int n, rtx reg)
1900 /* Negative values here come from the shift_amounts array. */
1901 if (n < 0)
1903 if (type == ASHIFT)
1904 type = LSHIFTRT;
1905 else
1906 type = ASHIFT;
1907 n = -n;
1910 switch (type)
1912 case ASHIFTRT:
1913 emit_insn (gen_ashrsi3_k (reg, reg, GEN_INT (n)));
1914 break;
1915 case LSHIFTRT:
1916 if (n == 1)
1917 emit_insn (gen_lshrsi3_m (reg, reg, GEN_INT (n)));
1918 else
1919 emit_insn (gen_lshrsi3_k (reg, reg, GEN_INT (n)));
1920 break;
1921 case ASHIFT:
1922 emit_insn (gen_ashlsi3_std (reg, reg, GEN_INT (n)));
1923 break;
1927 /* Same for HImode */
1929 void
1930 gen_ashift_hi (int type, int n, rtx reg)
1932 /* Negative values here come from the shift_amounts array. */
1933 if (n < 0)
1935 if (type == ASHIFT)
1936 type = LSHIFTRT;
1937 else
1938 type = ASHIFT;
1939 n = -n;
1942 switch (type)
1944 case ASHIFTRT:
1945 case LSHIFTRT:
1946 /* We don't have HImode right shift operations because using the
1947 ordinary 32 bit shift instructions for that doesn't generate proper
1948 zero/sign extension.
1949 gen_ashift_hi is only called in contexts where we know that the
1950 sign extension works out correctly. */
1952 int offset = 0;
1953 if (GET_CODE (reg) == SUBREG)
1955 offset = SUBREG_BYTE (reg);
1956 reg = SUBREG_REG (reg);
1958 gen_ashift (type, n, gen_rtx_SUBREG (SImode, reg, offset));
1959 break;
1961 case ASHIFT:
1962 emit_insn (gen_ashlhi3_k (reg, reg, GEN_INT (n)));
1963 break;
1967 /* Output RTL to split a constant shift into its component SH constant
1968 shift instructions. */
1970 void
1971 gen_shifty_op (int code, rtx *operands)
1973 int value = INTVAL (operands[2]);
1974 int max, i;
1976 /* Truncate the shift count in case it is out of bounds. */
1977 value = value & 0x1f;
1979 if (value == 31)
1981 if (code == LSHIFTRT)
1983 emit_insn (gen_rotlsi3_1 (operands[0], operands[0]));
1984 emit_insn (gen_movt (operands[0]));
1985 return;
1987 else if (code == ASHIFT)
1989 /* There is a two instruction sequence for 31 bit left shifts,
1990 but it requires r0. */
1991 if (GET_CODE (operands[0]) == REG && REGNO (operands[0]) == 0)
1993 emit_insn (gen_andsi3 (operands[0], operands[0], const1_rtx));
1994 emit_insn (gen_rotlsi3_31 (operands[0], operands[0]));
1995 return;
1999 else if (value == 0)
2001 /* This can happen when not optimizing. We must output something here
2002 to prevent the compiler from aborting in final.c after the try_split
2003 call. */
2004 emit_insn (gen_nop ());
2005 return;
2008 max = shift_insns[value];
2009 for (i = 0; i < max; i++)
2010 gen_ashift (code, shift_amounts[value][i], operands[0]);
2013 /* Same as above, but optimized for values where the topmost bits don't
2014 matter. */
2016 void
2017 gen_shifty_hi_op (int code, rtx *operands)
2019 int value = INTVAL (operands[2]);
2020 int max, i;
2021 void (*gen_fun) (int, int, rtx);
2023 /* This operation is used by and_shl for SImode values with a few
2024 high bits known to be cleared. */
2025 value &= 31;
2026 if (value == 0)
2028 emit_insn (gen_nop ());
2029 return;
2032 gen_fun = GET_MODE (operands[0]) == HImode ? gen_ashift_hi : gen_ashift;
2033 if (code == ASHIFT)
2035 max = ext_shift_insns[value];
2036 for (i = 0; i < max; i++)
2037 gen_fun (code, ext_shift_amounts[value][i], operands[0]);
2039 else
2040 /* When shifting right, emit the shifts in reverse order, so that
2041 solitary negative values come first. */
2042 for (i = ext_shift_insns[value] - 1; i >= 0; i--)
2043 gen_fun (code, ext_shift_amounts[value][i], operands[0]);
2046 /* Output RTL for an arithmetic right shift. */
2048 /* ??? Rewrite to use super-optimizer sequences. */
2051 expand_ashiftrt (rtx *operands)
2053 rtx sym;
2054 rtx wrk;
2055 char func[18];
2056 tree func_name;
2057 int value;
2059 if (TARGET_SH3)
2061 if (GET_CODE (operands[2]) != CONST_INT)
2063 rtx count = copy_to_mode_reg (SImode, operands[2]);
2064 emit_insn (gen_negsi2 (count, count));
2065 emit_insn (gen_ashrsi3_d (operands[0], operands[1], count));
2066 return 1;
2068 else if (ashiftrt_insns[INTVAL (operands[2]) & 31]
2069 > 1 + SH_DYNAMIC_SHIFT_COST)
2071 rtx count
2072 = force_reg (SImode, GEN_INT (- (INTVAL (operands[2]) & 31)));
2073 emit_insn (gen_ashrsi3_d (operands[0], operands[1], count));
2074 return 1;
2077 if (GET_CODE (operands[2]) != CONST_INT)
2078 return 0;
2080 value = INTVAL (operands[2]) & 31;
2082 if (value == 31)
2084 emit_insn (gen_ashrsi2_31 (operands[0], operands[1]));
2085 return 1;
2087 else if (value >= 16 && value <= 19)
2089 wrk = gen_reg_rtx (SImode);
2090 emit_insn (gen_ashrsi2_16 (wrk, operands[1]));
2091 value -= 16;
2092 while (value--)
2093 gen_ashift (ASHIFTRT, 1, wrk);
2094 emit_move_insn (operands[0], wrk);
2095 return 1;
2097 /* Expand a short sequence inline, longer call a magic routine. */
2098 else if (value <= 5)
2100 wrk = gen_reg_rtx (SImode);
2101 emit_move_insn (wrk, operands[1]);
2102 while (value--)
2103 gen_ashift (ASHIFTRT, 1, wrk);
2104 emit_move_insn (operands[0], wrk);
2105 return 1;
2108 wrk = gen_reg_rtx (Pmode);
2110 /* Load the value into an arg reg and call a helper. */
2111 emit_move_insn (gen_rtx_REG (SImode, 4), operands[1]);
2112 sprintf (func, "__ashiftrt_r4_%d", value);
2113 func_name = get_identifier (func);
2114 sym = function_symbol (IDENTIFIER_POINTER (func_name));
2115 emit_move_insn (wrk, sym);
2116 emit_insn (gen_ashrsi3_n (GEN_INT (value), wrk));
2117 emit_move_insn (operands[0], gen_rtx_REG (SImode, 4));
2118 return 1;
2122 sh_dynamicalize_shift_p (rtx count)
2124 return shift_insns[INTVAL (count)] > 1 + SH_DYNAMIC_SHIFT_COST;
2127 /* Try to find a good way to implement the combiner pattern
2128 [(set (match_operand:SI 0 "register_operand" "r")
2129 (and:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
2130 (match_operand:SI 2 "const_int_operand" "n"))
2131 (match_operand:SI 3 "const_int_operand" "n"))) .
2132 LEFT_RTX is operand 2 in the above pattern, and MASK_RTX is operand 3.
2133 return 0 for simple right / left or left/right shift combination.
2134 return 1 for a combination of shifts with zero_extend.
2135 return 2 for a combination of shifts with an AND that needs r0.
2136 return 3 for a combination of shifts with an AND that needs an extra
2137 scratch register, when the three highmost bits of the AND mask are clear.
2138 return 4 for a combination of shifts with an AND that needs an extra
2139 scratch register, when any of the three highmost bits of the AND mask
2140 is set.
2141 If ATTRP is set, store an initial right shift width in ATTRP[0],
2142 and the instruction length in ATTRP[1] . These values are not valid
2143 when returning 0.
2144 When ATTRP is set and returning 1, ATTRP[2] gets set to the index into
2145 shift_amounts for the last shift value that is to be used before the
2146 sign extend. */
2148 shl_and_kind (rtx left_rtx, rtx mask_rtx, int *attrp)
2150 unsigned HOST_WIDE_INT mask, lsb, mask2, lsb2;
2151 int left = INTVAL (left_rtx), right;
2152 int best = 0;
2153 int cost, best_cost = 10000;
2154 int best_right = 0, best_len = 0;
2155 int i;
2156 int can_ext;
2158 if (left < 0 || left > 31)
2159 return 0;
2160 if (GET_CODE (mask_rtx) == CONST_INT)
2161 mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> left;
2162 else
2163 mask = (unsigned HOST_WIDE_INT) GET_MODE_MASK (SImode) >> left;
2164 /* Can this be expressed as a right shift / left shift pair? */
2165 lsb = ((mask ^ (mask - 1)) >> 1) + 1;
2166 right = exact_log2 (lsb);
2167 mask2 = ~(mask + lsb - 1);
2168 lsb2 = ((mask2 ^ (mask2 - 1)) >> 1) + 1;
2169 /* mask has no zeroes but trailing zeroes <==> ! mask2 */
2170 if (! mask2)
2171 best_cost = shift_insns[right] + shift_insns[right + left];
2172 /* mask has no trailing zeroes <==> ! right */
2173 else if (! right && mask2 == ~(lsb2 - 1))
2175 int late_right = exact_log2 (lsb2);
2176 best_cost = shift_insns[left + late_right] + shift_insns[late_right];
2178 /* Try to use zero extend. */
2179 if (mask2 == ~(lsb2 - 1))
2181 int width, first;
2183 for (width = 8; width <= 16; width += 8)
2185 /* Can we zero-extend right away? */
2186 if (lsb2 == (unsigned HOST_WIDE_INT) 1 << width)
2188 cost
2189 = 1 + ext_shift_insns[right] + ext_shift_insns[left + right];
2190 if (cost < best_cost)
2192 best = 1;
2193 best_cost = cost;
2194 best_right = right;
2195 best_len = cost;
2196 if (attrp)
2197 attrp[2] = -1;
2199 continue;
2201 /* ??? Could try to put zero extend into initial right shift,
2202 or even shift a bit left before the right shift. */
2203 /* Determine value of first part of left shift, to get to the
2204 zero extend cut-off point. */
2205 first = width - exact_log2 (lsb2) + right;
2206 if (first >= 0 && right + left - first >= 0)
2208 cost = ext_shift_insns[right] + ext_shift_insns[first] + 1
2209 + ext_shift_insns[right + left - first];
2210 if (cost < best_cost)
2212 best = 1;
2213 best_cost = cost;
2214 best_right = right;
2215 best_len = cost;
2216 if (attrp)
2217 attrp[2] = first;
2222 /* Try to use r0 AND pattern */
2223 for (i = 0; i <= 2; i++)
2225 if (i > right)
2226 break;
2227 if (! CONST_OK_FOR_K08 (mask >> i))
2228 continue;
2229 cost = (i != 0) + 2 + ext_shift_insns[left + i];
2230 if (cost < best_cost)
2232 best = 2;
2233 best_cost = cost;
2234 best_right = i;
2235 best_len = cost - 1;
2238 /* Try to use a scratch register to hold the AND operand. */
2239 can_ext = ((mask << left) & ((unsigned HOST_WIDE_INT) 3 << 30)) == 0;
2240 for (i = 0; i <= 2; i++)
2242 if (i > right)
2243 break;
2244 cost = (i != 0) + (CONST_OK_FOR_I08 (mask >> i) ? 2 : 3)
2245 + (can_ext ? ext_shift_insns : shift_insns)[left + i];
2246 if (cost < best_cost)
2248 best = 4 - can_ext;
2249 best_cost = cost;
2250 best_right = i;
2251 best_len = cost - 1 - ! CONST_OK_FOR_I08 (mask >> i);
2255 if (attrp)
2257 attrp[0] = best_right;
2258 attrp[1] = best_len;
2260 return best;
2263 /* This is used in length attributes of the unnamed instructions
2264 corresponding to shl_and_kind return values of 1 and 2. */
2266 shl_and_length (rtx insn)
2268 rtx set_src, left_rtx, mask_rtx;
2269 int attributes[3];
2271 set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
2272 left_rtx = XEXP (XEXP (set_src, 0), 1);
2273 mask_rtx = XEXP (set_src, 1);
2274 shl_and_kind (left_rtx, mask_rtx, attributes);
2275 return attributes[1];
2278 /* This is used in length attribute of the and_shl_scratch instruction. */
2281 shl_and_scr_length (rtx insn)
2283 rtx set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
2284 int len = shift_insns[INTVAL (XEXP (set_src, 1))];
2285 rtx op = XEXP (set_src, 0);
2286 len += shift_insns[INTVAL (XEXP (op, 1))] + 1;
2287 op = XEXP (XEXP (op, 0), 0);
2288 return len + shift_insns[INTVAL (XEXP (op, 1))];
2291 /* Generate rtl for instructions for which shl_and_kind advised a particular
2292 method of generating them, i.e. returned zero. */
2295 gen_shl_and (rtx dest, rtx left_rtx, rtx mask_rtx, rtx source)
2297 int attributes[3];
2298 unsigned HOST_WIDE_INT mask;
2299 int kind = shl_and_kind (left_rtx, mask_rtx, attributes);
2300 int right, total_shift;
2301 void (*shift_gen_fun) (int, rtx *) = gen_shifty_hi_op;
2303 right = attributes[0];
2304 total_shift = INTVAL (left_rtx) + right;
2305 mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> total_shift;
2306 switch (kind)
2308 default:
2309 return -1;
2310 case 1:
2312 int first = attributes[2];
2313 rtx operands[3];
2315 if (first < 0)
2317 emit_insn ((mask << right) <= 0xff
2318 ? gen_zero_extendqisi2 (dest,
2319 gen_lowpart (QImode, source))
2320 : gen_zero_extendhisi2 (dest,
2321 gen_lowpart (HImode, source)));
2322 source = dest;
2324 if (source != dest)
2325 emit_insn (gen_movsi (dest, source));
2326 operands[0] = dest;
2327 if (right)
2329 operands[2] = GEN_INT (right);
2330 gen_shifty_hi_op (LSHIFTRT, operands);
2332 if (first > 0)
2334 operands[2] = GEN_INT (first);
2335 gen_shifty_hi_op (ASHIFT, operands);
2336 total_shift -= first;
2337 mask <<= first;
2339 if (first >= 0)
2340 emit_insn (mask <= 0xff
2341 ? gen_zero_extendqisi2 (dest, gen_lowpart (QImode, dest))
2342 : gen_zero_extendhisi2 (dest, gen_lowpart (HImode, dest)));
2343 if (total_shift > 0)
2345 operands[2] = GEN_INT (total_shift);
2346 gen_shifty_hi_op (ASHIFT, operands);
2348 break;
2350 case 4:
2351 shift_gen_fun = gen_shifty_op;
2352 case 3:
2353 /* If the topmost bit that matters is set, set the topmost bits
2354 that don't matter. This way, we might be able to get a shorter
2355 signed constant. */
2356 if (mask & ((HOST_WIDE_INT) 1 << (31 - total_shift)))
2357 mask |= (HOST_WIDE_INT) ~0 << (31 - total_shift);
2358 case 2:
2359 /* Don't expand fine-grained when combining, because that will
2360 make the pattern fail. */
2361 if (currently_expanding_to_rtl
2362 || reload_in_progress || reload_completed)
2364 rtx operands[3];
2366 /* Cases 3 and 4 should be handled by this split
2367 only while combining */
2368 if (kind > 2)
2369 abort ();
2370 if (right)
2372 emit_insn (gen_lshrsi3 (dest, source, GEN_INT (right)));
2373 source = dest;
2375 emit_insn (gen_andsi3 (dest, source, GEN_INT (mask)));
2376 if (total_shift)
2378 operands[0] = dest;
2379 operands[1] = dest;
2380 operands[2] = GEN_INT (total_shift);
2381 shift_gen_fun (ASHIFT, operands);
2383 break;
2385 else
2387 int neg = 0;
2388 if (kind != 4 && total_shift < 16)
2390 neg = -ext_shift_amounts[total_shift][1];
2391 if (neg > 0)
2392 neg -= ext_shift_amounts[total_shift][2];
2393 else
2394 neg = 0;
2396 emit_insn (gen_and_shl_scratch (dest, source,
2397 GEN_INT (right),
2398 GEN_INT (mask),
2399 GEN_INT (total_shift + neg),
2400 GEN_INT (neg)));
2401 emit_insn (gen_movsi (dest, dest));
2402 break;
2405 return 0;
2408 /* Try to find a good way to implement the combiner pattern
2409 [(set (match_operand:SI 0 "register_operand" "=r")
2410 (sign_extract:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
2411 (match_operand:SI 2 "const_int_operand" "n")
2412 (match_operand:SI 3 "const_int_operand" "n")
2413 (const_int 0)))
2414 (clobber (reg:SI T_REG))]
2415 LEFT_RTX is operand 2 in the above pattern, and SIZE_RTX is operand 3.
2416 return 0 for simple left / right shift combination.
2417 return 1 for left shift / 8 bit sign extend / left shift.
2418 return 2 for left shift / 16 bit sign extend / left shift.
2419 return 3 for left shift / 8 bit sign extend / shift / sign extend.
2420 return 4 for left shift / 16 bit sign extend / shift / sign extend.
2421 return 5 for left shift / 16 bit sign extend / right shift
2422 return 6 for < 8 bit sign extend / left shift.
2423 return 7 for < 8 bit sign extend / left shift / single right shift.
2424 If COSTP is nonzero, assign the calculated cost to *COSTP. */
2427 shl_sext_kind (rtx left_rtx, rtx size_rtx, int *costp)
2429 int left, size, insize, ext;
2430 int cost = 0, best_cost;
2431 int kind;
2433 left = INTVAL (left_rtx);
2434 size = INTVAL (size_rtx);
2435 insize = size - left;
2436 if (insize <= 0)
2437 abort ();
2438 /* Default to left / right shift. */
2439 kind = 0;
2440 best_cost = shift_insns[32 - insize] + ashiftrt_insns[32 - size];
2441 if (size <= 16)
2443 /* 16 bit shift / sign extend / 16 bit shift */
2444 cost = shift_insns[16 - insize] + 1 + ashiftrt_insns[16 - size];
2445 /* If ashiftrt_insns[16 - size] is 8, this choice will be overridden
2446 below, by alternative 3 or something even better. */
2447 if (cost < best_cost)
2449 kind = 5;
2450 best_cost = cost;
2453 /* Try a plain sign extend between two shifts. */
2454 for (ext = 16; ext >= insize; ext -= 8)
2456 if (ext <= size)
2458 cost = ext_shift_insns[ext - insize] + 1 + shift_insns[size - ext];
2459 if (cost < best_cost)
2461 kind = ext / (unsigned) 8;
2462 best_cost = cost;
2465 /* Check if we can do a sloppy shift with a final signed shift
2466 restoring the sign. */
2467 if (EXT_SHIFT_SIGNED (size - ext))
2468 cost = ext_shift_insns[ext - insize] + ext_shift_insns[size - ext] + 1;
2469 /* If not, maybe it's still cheaper to do the second shift sloppy,
2470 and do a final sign extend? */
2471 else if (size <= 16)
2472 cost = ext_shift_insns[ext - insize] + 1
2473 + ext_shift_insns[size > ext ? size - ext : ext - size] + 1;
2474 else
2475 continue;
2476 if (cost < best_cost)
2478 kind = ext / (unsigned) 8 + 2;
2479 best_cost = cost;
2482 /* Check if we can sign extend in r0 */
2483 if (insize < 8)
2485 cost = 3 + shift_insns[left];
2486 if (cost < best_cost)
2488 kind = 6;
2489 best_cost = cost;
2491 /* Try the same with a final signed shift. */
2492 if (left < 31)
2494 cost = 3 + ext_shift_insns[left + 1] + 1;
2495 if (cost < best_cost)
2497 kind = 7;
2498 best_cost = cost;
2502 if (TARGET_SH3)
2504 /* Try to use a dynamic shift. */
2505 cost = shift_insns[32 - insize] + 1 + SH_DYNAMIC_SHIFT_COST;
2506 if (cost < best_cost)
2508 kind = 0;
2509 best_cost = cost;
2512 if (costp)
2513 *costp = cost;
2514 return kind;
2517 /* Function to be used in the length attribute of the instructions
2518 implementing this pattern. */
2521 shl_sext_length (rtx insn)
2523 rtx set_src, left_rtx, size_rtx;
2524 int cost;
2526 set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
2527 left_rtx = XEXP (XEXP (set_src, 0), 1);
2528 size_rtx = XEXP (set_src, 1);
2529 shl_sext_kind (left_rtx, size_rtx, &cost);
2530 return cost;
2533 /* Generate rtl for this pattern */
2536 gen_shl_sext (rtx dest, rtx left_rtx, rtx size_rtx, rtx source)
2538 int kind;
2539 int left, size, insize, cost;
2540 rtx operands[3];
2542 kind = shl_sext_kind (left_rtx, size_rtx, &cost);
2543 left = INTVAL (left_rtx);
2544 size = INTVAL (size_rtx);
2545 insize = size - left;
2546 switch (kind)
2548 case 1:
2549 case 2:
2550 case 3:
2551 case 4:
2553 int ext = kind & 1 ? 8 : 16;
2554 int shift2 = size - ext;
2556 /* Don't expand fine-grained when combining, because that will
2557 make the pattern fail. */
2558 if (! currently_expanding_to_rtl
2559 && ! reload_in_progress && ! reload_completed)
2561 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
2562 emit_insn (gen_movsi (dest, source));
2563 break;
2565 if (dest != source)
2566 emit_insn (gen_movsi (dest, source));
2567 operands[0] = dest;
2568 if (ext - insize)
2570 operands[2] = GEN_INT (ext - insize);
2571 gen_shifty_hi_op (ASHIFT, operands);
2573 emit_insn (kind & 1
2574 ? gen_extendqisi2 (dest, gen_lowpart (QImode, dest))
2575 : gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
2576 if (kind <= 2)
2578 if (shift2)
2580 operands[2] = GEN_INT (shift2);
2581 gen_shifty_op (ASHIFT, operands);
2584 else
2586 if (shift2 > 0)
2588 if (EXT_SHIFT_SIGNED (shift2))
2590 operands[2] = GEN_INT (shift2 + 1);
2591 gen_shifty_op (ASHIFT, operands);
2592 operands[2] = const1_rtx;
2593 gen_shifty_op (ASHIFTRT, operands);
2594 break;
2596 operands[2] = GEN_INT (shift2);
2597 gen_shifty_hi_op (ASHIFT, operands);
2599 else if (shift2)
2601 operands[2] = GEN_INT (-shift2);
2602 gen_shifty_hi_op (LSHIFTRT, operands);
2604 emit_insn (size <= 8
2605 ? gen_extendqisi2 (dest, gen_lowpart (QImode, dest))
2606 : gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
2608 break;
2610 case 5:
2612 int i = 16 - size;
2613 if (! currently_expanding_to_rtl
2614 && ! reload_in_progress && ! reload_completed)
2615 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
2616 else
2618 operands[0] = dest;
2619 operands[2] = GEN_INT (16 - insize);
2620 gen_shifty_hi_op (ASHIFT, operands);
2621 emit_insn (gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
2623 /* Don't use gen_ashrsi3 because it generates new pseudos. */
2624 while (--i >= 0)
2625 gen_ashift (ASHIFTRT, 1, dest);
2626 break;
2628 case 6:
2629 case 7:
2630 /* Don't expand fine-grained when combining, because that will
2631 make the pattern fail. */
2632 if (! currently_expanding_to_rtl
2633 && ! reload_in_progress && ! reload_completed)
2635 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
2636 emit_insn (gen_movsi (dest, source));
2637 break;
2639 emit_insn (gen_andsi3 (dest, source, GEN_INT ((1 << insize) - 1)));
2640 emit_insn (gen_xorsi3 (dest, dest, GEN_INT (1 << (insize - 1))));
2641 emit_insn (gen_addsi3 (dest, dest, GEN_INT (-1 << (insize - 1))));
2642 operands[0] = dest;
2643 operands[2] = kind == 7 ? GEN_INT (left + 1) : left_rtx;
2644 gen_shifty_op (ASHIFT, operands);
2645 if (kind == 7)
2646 emit_insn (gen_ashrsi3_k (dest, dest, const1_rtx));
2647 break;
2648 default:
2649 return -1;
2651 return 0;
2654 /* Prefix a symbol_ref name with "datalabel". */
2657 gen_datalabel_ref (rtx sym)
2659 if (GET_CODE (sym) == LABEL_REF)
2660 return gen_rtx_CONST (GET_MODE (sym),
2661 gen_rtx_UNSPEC (GET_MODE (sym),
2662 gen_rtvec (1, sym),
2663 UNSPEC_DATALABEL));
2665 if (GET_CODE (sym) != SYMBOL_REF)
2666 abort ();
2668 return sym;
2672 /* The SH cannot load a large constant into a register, constants have to
2673 come from a pc relative load. The reference of a pc relative load
2674 instruction must be less than 1k infront of the instruction. This
2675 means that we often have to dump a constant inside a function, and
2676 generate code to branch around it.
2678 It is important to minimize this, since the branches will slow things
2679 down and make things bigger.
2681 Worst case code looks like:
2683 mov.l L1,rn
2684 bra L2
2686 align
2687 L1: .long value
2691 mov.l L3,rn
2692 bra L4
2694 align
2695 L3: .long value
2699 We fix this by performing a scan before scheduling, which notices which
2700 instructions need to have their operands fetched from the constant table
2701 and builds the table.
2703 The algorithm is:
2705 scan, find an instruction which needs a pcrel move. Look forward, find the
2706 last barrier which is within MAX_COUNT bytes of the requirement.
2707 If there isn't one, make one. Process all the instructions between
2708 the find and the barrier.
2710 In the above example, we can tell that L3 is within 1k of L1, so
2711 the first move can be shrunk from the 3 insn+constant sequence into
2712 just 1 insn, and the constant moved to L3 to make:
2714 mov.l L1,rn
2716 mov.l L3,rn
2717 bra L4
2719 align
2720 L3:.long value
2721 L4:.long value
2723 Then the second move becomes the target for the shortening process. */
2725 typedef struct
2727 rtx value; /* Value in table. */
2728 rtx label; /* Label of value. */
2729 rtx wend; /* End of window. */
2730 enum machine_mode mode; /* Mode of value. */
2732 /* True if this constant is accessed as part of a post-increment
2733 sequence. Note that HImode constants are never accessed in this way. */
2734 bool part_of_sequence_p;
2735 } pool_node;
2737 /* The maximum number of constants that can fit into one pool, since
2738 the pc relative range is 0...1020 bytes and constants are at least 4
2739 bytes long. */
2741 #define MAX_POOL_SIZE (1020/4)
2742 static pool_node pool_vector[MAX_POOL_SIZE];
2743 static int pool_size;
2744 static rtx pool_window_label;
2745 static int pool_window_last;
2747 /* ??? If we need a constant in HImode which is the truncated value of a
2748 constant we need in SImode, we could combine the two entries thus saving
2749 two bytes. Is this common enough to be worth the effort of implementing
2750 it? */
2752 /* ??? This stuff should be done at the same time that we shorten branches.
2753 As it is now, we must assume that all branches are the maximum size, and
2754 this causes us to almost always output constant pools sooner than
2755 necessary. */
2757 /* Add a constant to the pool and return its label. */
2759 static rtx
2760 add_constant (rtx x, enum machine_mode mode, rtx last_value)
2762 int i;
2763 rtx lab, new, ref, newref;
2765 /* First see if we've already got it. */
2766 for (i = 0; i < pool_size; i++)
2768 if (x->code == pool_vector[i].value->code
2769 && mode == pool_vector[i].mode)
2771 if (x->code == CODE_LABEL)
2773 if (XINT (x, 3) != XINT (pool_vector[i].value, 3))
2774 continue;
2776 if (rtx_equal_p (x, pool_vector[i].value))
2778 lab = new = 0;
2779 if (! last_value
2780 || ! i
2781 || ! rtx_equal_p (last_value, pool_vector[i-1].value))
2783 new = gen_label_rtx ();
2784 LABEL_REFS (new) = pool_vector[i].label;
2785 pool_vector[i].label = lab = new;
2787 if (lab && pool_window_label)
2789 newref = gen_rtx_LABEL_REF (VOIDmode, pool_window_label);
2790 ref = pool_vector[pool_window_last].wend;
2791 LABEL_NEXTREF (newref) = ref;
2792 pool_vector[pool_window_last].wend = newref;
2794 if (new)
2795 pool_window_label = new;
2796 pool_window_last = i;
2797 return lab;
2802 /* Need a new one. */
2803 pool_vector[pool_size].value = x;
2804 if (last_value && rtx_equal_p (last_value, pool_vector[pool_size - 1].value))
2806 lab = 0;
2807 pool_vector[pool_size - 1].part_of_sequence_p = true;
2809 else
2810 lab = gen_label_rtx ();
2811 pool_vector[pool_size].mode = mode;
2812 pool_vector[pool_size].label = lab;
2813 pool_vector[pool_size].wend = NULL_RTX;
2814 pool_vector[pool_size].part_of_sequence_p = (lab == 0);
2815 if (lab && pool_window_label)
2817 newref = gen_rtx_LABEL_REF (VOIDmode, pool_window_label);
2818 ref = pool_vector[pool_window_last].wend;
2819 LABEL_NEXTREF (newref) = ref;
2820 pool_vector[pool_window_last].wend = newref;
2822 if (lab)
2823 pool_window_label = lab;
2824 pool_window_last = pool_size;
2825 pool_size++;
2826 return lab;
2829 /* Output the literal table. START, if nonzero, is the first instruction
2830 this table is needed for, and also indicates that there is at least one
2831 casesi_worker_2 instruction; We have to emit the operand3 labels from
2832 these insns at a 4-byte aligned position. BARRIER is the barrier
2833 after which we are to place the table. */
2835 static void
2836 dump_table (rtx start, rtx barrier)
2838 rtx scan = barrier;
2839 int i;
2840 int need_align = 1;
2841 rtx lab, ref;
2842 int have_df = 0;
2844 /* Do two passes, first time dump out the HI sized constants. */
2846 for (i = 0; i < pool_size; i++)
2848 pool_node *p = &pool_vector[i];
2850 if (p->mode == HImode)
2852 if (need_align)
2854 scan = emit_insn_after (gen_align_2 (), scan);
2855 need_align = 0;
2857 for (lab = p->label; lab; lab = LABEL_REFS (lab))
2858 scan = emit_label_after (lab, scan);
2859 scan = emit_insn_after (gen_consttable_2 (p->value, const0_rtx),
2860 scan);
2861 for (ref = p->wend; ref; ref = LABEL_NEXTREF (ref))
2863 lab = XEXP (ref, 0);
2864 scan = emit_insn_after (gen_consttable_window_end (lab), scan);
2867 else if (p->mode == DFmode)
2868 have_df = 1;
2871 need_align = 1;
2873 if (start)
2875 scan = emit_insn_after (gen_align_4 (), scan);
2876 need_align = 0;
2877 for (; start != barrier; start = NEXT_INSN (start))
2878 if (GET_CODE (start) == INSN
2879 && recog_memoized (start) == CODE_FOR_casesi_worker_2)
2881 rtx src = SET_SRC (XVECEXP (PATTERN (start), 0, 0));
2882 rtx lab = XEXP (XVECEXP (src, 0, 3), 0);
2884 scan = emit_label_after (lab, scan);
2887 if (TARGET_FMOVD && TARGET_ALIGN_DOUBLE && have_df)
2889 rtx align_insn = NULL_RTX;
2891 scan = emit_label_after (gen_label_rtx (), scan);
2892 scan = emit_insn_after (gen_align_log (GEN_INT (3)), scan);
2893 need_align = 0;
2895 for (i = 0; i < pool_size; i++)
2897 pool_node *p = &pool_vector[i];
2899 switch (p->mode)
2901 case HImode:
2902 break;
2903 case SImode:
2904 case SFmode:
2905 if (align_insn && !p->part_of_sequence_p)
2907 for (lab = p->label; lab; lab = LABEL_REFS (lab))
2908 emit_label_before (lab, align_insn);
2909 emit_insn_before (gen_consttable_4 (p->value, const0_rtx),
2910 align_insn);
2911 for (ref = p->wend; ref; ref = LABEL_NEXTREF (ref))
2913 lab = XEXP (ref, 0);
2914 emit_insn_before (gen_consttable_window_end (lab),
2915 align_insn);
2917 delete_insn (align_insn);
2918 align_insn = NULL_RTX;
2919 continue;
2921 else
2923 for (lab = p->label; lab; lab = LABEL_REFS (lab))
2924 scan = emit_label_after (lab, scan);
2925 scan = emit_insn_after (gen_consttable_4 (p->value,
2926 const0_rtx), scan);
2927 need_align = ! need_align;
2929 break;
2930 case DFmode:
2931 if (need_align)
2933 scan = emit_insn_after (gen_align_log (GEN_INT (3)), scan);
2934 align_insn = scan;
2935 need_align = 0;
2937 case DImode:
2938 for (lab = p->label; lab; lab = LABEL_REFS (lab))
2939 scan = emit_label_after (lab, scan);
2940 scan = emit_insn_after (gen_consttable_8 (p->value, const0_rtx),
2941 scan);
2942 break;
2943 default:
2944 abort ();
2945 break;
2948 if (p->mode != HImode)
2950 for (ref = p->wend; ref; ref = LABEL_NEXTREF (ref))
2952 lab = XEXP (ref, 0);
2953 scan = emit_insn_after (gen_consttable_window_end (lab),
2954 scan);
2959 pool_size = 0;
2962 for (i = 0; i < pool_size; i++)
2964 pool_node *p = &pool_vector[i];
2966 switch (p->mode)
2968 case HImode:
2969 break;
2970 case SImode:
2971 case SFmode:
2972 if (need_align)
2974 need_align = 0;
2975 scan = emit_label_after (gen_label_rtx (), scan);
2976 scan = emit_insn_after (gen_align_4 (), scan);
2978 for (lab = p->label; lab; lab = LABEL_REFS (lab))
2979 scan = emit_label_after (lab, scan);
2980 scan = emit_insn_after (gen_consttable_4 (p->value, const0_rtx),
2981 scan);
2982 break;
2983 case DFmode:
2984 case DImode:
2985 if (need_align)
2987 need_align = 0;
2988 scan = emit_label_after (gen_label_rtx (), scan);
2989 scan = emit_insn_after (gen_align_4 (), scan);
2991 for (lab = p->label; lab; lab = LABEL_REFS (lab))
2992 scan = emit_label_after (lab, scan);
2993 scan = emit_insn_after (gen_consttable_8 (p->value, const0_rtx),
2994 scan);
2995 break;
2996 default:
2997 abort ();
2998 break;
3001 if (p->mode != HImode)
3003 for (ref = p->wend; ref; ref = LABEL_NEXTREF (ref))
3005 lab = XEXP (ref, 0);
3006 scan = emit_insn_after (gen_consttable_window_end (lab), scan);
3011 scan = emit_insn_after (gen_consttable_end (), scan);
3012 scan = emit_barrier_after (scan);
3013 pool_size = 0;
3014 pool_window_label = NULL_RTX;
3015 pool_window_last = 0;
3018 /* Return nonzero if constant would be an ok source for a
3019 mov.w instead of a mov.l. */
3021 static int
3022 hi_const (rtx src)
3024 return (GET_CODE (src) == CONST_INT
3025 && INTVAL (src) >= -32768
3026 && INTVAL (src) <= 32767);
3029 /* Nonzero if the insn is a move instruction which needs to be fixed. */
3031 /* ??? For a DImode/DFmode moves, we don't need to fix it if each half of the
3032 CONST_DOUBLE input value is CONST_OK_FOR_I08. For a SFmode move, we don't
3033 need to fix it if the input value is CONST_OK_FOR_I08. */
3035 static int
3036 broken_move (rtx insn)
3038 if (GET_CODE (insn) == INSN)
3040 rtx pat = PATTERN (insn);
3041 if (GET_CODE (pat) == PARALLEL)
3042 pat = XVECEXP (pat, 0, 0);
3043 if (GET_CODE (pat) == SET
3044 /* We can load any 8 bit value if we don't care what the high
3045 order bits end up as. */
3046 && GET_MODE (SET_DEST (pat)) != QImode
3047 && (CONSTANT_P (SET_SRC (pat))
3048 /* Match mova_const. */
3049 || (GET_CODE (SET_SRC (pat)) == UNSPEC
3050 && XINT (SET_SRC (pat), 1) == UNSPEC_MOVA
3051 && GET_CODE (XVECEXP (SET_SRC (pat), 0, 0)) == CONST))
3052 && ! (TARGET_SH2E
3053 && GET_CODE (SET_SRC (pat)) == CONST_DOUBLE
3054 && (fp_zero_operand (SET_SRC (pat))
3055 || fp_one_operand (SET_SRC (pat)))
3056 /* ??? If this is a -m4 or -m4-single compilation, in general
3057 we don't know the current setting of fpscr, so disable fldi.
3058 There is an exception if this was a register-register move
3059 before reload - and hence it was ascertained that we have
3060 single precision setting - and in a post-reload optimization
3061 we changed this to do a constant load. In that case
3062 we don't have an r0 clobber, hence we must use fldi. */
3063 && (! TARGET_SH4 || TARGET_FMOVD
3064 || (GET_CODE (XEXP (XVECEXP (PATTERN (insn), 0, 2), 0))
3065 == SCRATCH))
3066 && GET_CODE (SET_DEST (pat)) == REG
3067 && FP_REGISTER_P (REGNO (SET_DEST (pat))))
3068 && ! (TARGET_SH2A
3069 && GET_MODE (SET_DEST (pat)) == SImode
3070 && GET_CODE (SET_SRC (pat)) == CONST_INT
3071 && CONST_OK_FOR_I20 (INTVAL (SET_SRC (pat))))
3072 && (GET_CODE (SET_SRC (pat)) != CONST_INT
3073 || ! CONST_OK_FOR_I08 (INTVAL (SET_SRC (pat)))))
3074 return 1;
3077 return 0;
3080 static int
3081 mova_p (rtx insn)
3083 return (GET_CODE (insn) == INSN
3084 && GET_CODE (PATTERN (insn)) == SET
3085 && GET_CODE (SET_SRC (PATTERN (insn))) == UNSPEC
3086 && XINT (SET_SRC (PATTERN (insn)), 1) == UNSPEC_MOVA
3087 /* Don't match mova_const. */
3088 && GET_CODE (XVECEXP (SET_SRC (PATTERN (insn)), 0, 0)) == LABEL_REF);
3091 /* Fix up a mova from a switch that went out of range. */
3092 static void
3093 fixup_mova (rtx mova)
3095 if (! flag_pic)
3097 SET_SRC (PATTERN (mova)) = XVECEXP (SET_SRC (PATTERN (mova)), 0, 0);
3098 INSN_CODE (mova) = -1;
3100 else
3102 rtx worker = mova;
3103 rtx lab = gen_label_rtx ();
3104 rtx wpat, wpat0, wpat1, wsrc, diff;
3108 worker = NEXT_INSN (worker);
3109 if (! worker
3110 || GET_CODE (worker) == CODE_LABEL
3111 || GET_CODE (worker) == JUMP_INSN)
3112 abort ();
3113 } while (recog_memoized (worker) != CODE_FOR_casesi_worker_1);
3114 wpat = PATTERN (worker);
3115 wpat0 = XVECEXP (wpat, 0, 0);
3116 wpat1 = XVECEXP (wpat, 0, 1);
3117 wsrc = SET_SRC (wpat0);
3118 PATTERN (worker) = (gen_casesi_worker_2
3119 (SET_DEST (wpat0), XVECEXP (wsrc, 0, 1),
3120 XEXP (XVECEXP (wsrc, 0, 2), 0), lab,
3121 XEXP (wpat1, 0)));
3122 INSN_CODE (worker) = -1;
3123 diff = gen_rtx_MINUS (Pmode, XVECEXP (SET_SRC (PATTERN (mova)), 0, 0),
3124 gen_rtx_LABEL_REF (Pmode, lab));
3125 diff = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, diff), UNSPEC_PIC);
3126 SET_SRC (PATTERN (mova)) = gen_rtx_CONST (Pmode, diff);
3127 INSN_CODE (mova) = -1;
3131 /* Find the last barrier from insn FROM which is close enough to hold the
3132 constant pool. If we can't find one, then create one near the end of
3133 the range. */
3135 static rtx
3136 find_barrier (int num_mova, rtx mova, rtx from)
3138 int count_si = 0;
3139 int count_hi = 0;
3140 int found_hi = 0;
3141 int found_si = 0;
3142 int found_di = 0;
3143 int hi_align = 2;
3144 int si_align = 2;
3145 int leading_mova = num_mova;
3146 rtx barrier_before_mova = 0, found_barrier = 0, good_barrier = 0;
3147 int si_limit;
3148 int hi_limit;
3150 /* For HImode: range is 510, add 4 because pc counts from address of
3151 second instruction after this one, subtract 2 for the jump instruction
3152 that we may need to emit before the table, subtract 2 for the instruction
3153 that fills the jump delay slot (in very rare cases, reorg will take an
3154 instruction from after the constant pool or will leave the delay slot
3155 empty). This gives 510.
3156 For SImode: range is 1020, add 4 because pc counts from address of
3157 second instruction after this one, subtract 2 in case pc is 2 byte
3158 aligned, subtract 2 for the jump instruction that we may need to emit
3159 before the table, subtract 2 for the instruction that fills the jump
3160 delay slot. This gives 1018. */
3162 /* The branch will always be shortened now that the reference address for
3163 forward branches is the successor address, thus we need no longer make
3164 adjustments to the [sh]i_limit for -O0. */
3166 si_limit = 1018;
3167 hi_limit = 510;
3169 while (from && count_si < si_limit && count_hi < hi_limit)
3171 int inc = get_attr_length (from);
3172 int new_align = 1;
3174 if (GET_CODE (from) == CODE_LABEL)
3176 if (optimize)
3177 new_align = 1 << label_to_alignment (from);
3178 else if (GET_CODE (prev_nonnote_insn (from)) == BARRIER)
3179 new_align = 1 << barrier_align (from);
3180 else
3181 new_align = 1;
3182 inc = 0;
3185 if (GET_CODE (from) == BARRIER)
3188 found_barrier = from;
3190 /* If we are at the end of the function, or in front of an alignment
3191 instruction, we need not insert an extra alignment. We prefer
3192 this kind of barrier. */
3193 if (barrier_align (from) > 2)
3194 good_barrier = from;
3197 if (broken_move (from))
3199 rtx pat, src, dst;
3200 enum machine_mode mode;
3202 pat = PATTERN (from);
3203 if (GET_CODE (pat) == PARALLEL)
3204 pat = XVECEXP (pat, 0, 0);
3205 src = SET_SRC (pat);
3206 dst = SET_DEST (pat);
3207 mode = GET_MODE (dst);
3209 /* We must explicitly check the mode, because sometimes the
3210 front end will generate code to load unsigned constants into
3211 HImode targets without properly sign extending them. */
3212 if (mode == HImode
3213 || (mode == SImode && hi_const (src) && REGNO (dst) != FPUL_REG))
3215 found_hi += 2;
3216 /* We put the short constants before the long constants, so
3217 we must count the length of short constants in the range
3218 for the long constants. */
3219 /* ??? This isn't optimal, but is easy to do. */
3220 si_limit -= 2;
3222 else
3224 /* We dump DF/DI constants before SF/SI ones, because
3225 the limit is the same, but the alignment requirements
3226 are higher. We may waste up to 4 additional bytes
3227 for alignment, and the DF/DI constant may have
3228 another SF/SI constant placed before it. */
3229 if (TARGET_SHCOMPACT
3230 && ! found_di
3231 && (mode == DFmode || mode == DImode))
3233 found_di = 1;
3234 si_limit -= 8;
3236 while (si_align > 2 && found_si + si_align - 2 > count_si)
3237 si_align >>= 1;
3238 if (found_si > count_si)
3239 count_si = found_si;
3240 found_si += GET_MODE_SIZE (mode);
3241 if (num_mova)
3242 si_limit -= GET_MODE_SIZE (mode);
3245 /* See the code in machine_dependent_reorg, which has a similar if
3246 statement that generates a new mova insn in many cases. */
3247 if (GET_CODE (dst) == REG && FP_ANY_REGISTER_P (REGNO (dst)))
3248 inc += 2;
3251 if (mova_p (from))
3253 if (! num_mova++)
3255 leading_mova = 0;
3256 mova = from;
3257 barrier_before_mova = good_barrier ? good_barrier : found_barrier;
3259 if (found_si > count_si)
3260 count_si = found_si;
3262 else if (GET_CODE (from) == JUMP_INSN
3263 && (GET_CODE (PATTERN (from)) == ADDR_VEC
3264 || GET_CODE (PATTERN (from)) == ADDR_DIFF_VEC))
3266 if (num_mova)
3267 num_mova--;
3268 if (barrier_align (next_real_insn (from)) == align_jumps_log)
3270 /* We have just passed the barrier in front of the
3271 ADDR_DIFF_VEC, which is stored in found_barrier. Since
3272 the ADDR_DIFF_VEC is accessed as data, just like our pool
3273 constants, this is a good opportunity to accommodate what
3274 we have gathered so far.
3275 If we waited any longer, we could end up at a barrier in
3276 front of code, which gives worse cache usage for separated
3277 instruction / data caches. */
3278 good_barrier = found_barrier;
3279 break;
3281 else
3283 rtx body = PATTERN (from);
3284 inc = XVECLEN (body, 1) * GET_MODE_SIZE (GET_MODE (body));
3287 /* For the SH1, we generate alignments even after jumps-around-jumps. */
3288 else if (GET_CODE (from) == JUMP_INSN
3289 && ! TARGET_SH2
3290 && ! TARGET_SMALLCODE)
3291 new_align = 4;
3293 if (found_si)
3295 count_si += inc;
3296 if (new_align > si_align)
3298 si_limit -= (count_si - 1) & (new_align - si_align);
3299 si_align = new_align;
3301 count_si = (count_si + new_align - 1) & -new_align;
3303 if (found_hi)
3305 count_hi += inc;
3306 if (new_align > hi_align)
3308 hi_limit -= (count_hi - 1) & (new_align - hi_align);
3309 hi_align = new_align;
3311 count_hi = (count_hi + new_align - 1) & -new_align;
3313 from = NEXT_INSN (from);
3316 if (num_mova)
3318 if (leading_mova)
3320 /* Try as we might, the leading mova is out of range. Change
3321 it into a load (which will become a pcload) and retry. */
3322 fixup_mova (mova);
3323 return find_barrier (0, 0, mova);
3325 else
3327 /* Insert the constant pool table before the mova instruction,
3328 to prevent the mova label reference from going out of range. */
3329 from = mova;
3330 good_barrier = found_barrier = barrier_before_mova;
3334 if (found_barrier)
3336 if (good_barrier && next_real_insn (found_barrier))
3337 found_barrier = good_barrier;
3339 else
3341 /* We didn't find a barrier in time to dump our stuff,
3342 so we'll make one. */
3343 rtx label = gen_label_rtx ();
3345 /* If we exceeded the range, then we must back up over the last
3346 instruction we looked at. Otherwise, we just need to undo the
3347 NEXT_INSN at the end of the loop. */
3348 if (count_hi > hi_limit || count_si > si_limit)
3349 from = PREV_INSN (PREV_INSN (from));
3350 else
3351 from = PREV_INSN (from);
3353 /* Walk back to be just before any jump or label.
3354 Putting it before a label reduces the number of times the branch
3355 around the constant pool table will be hit. Putting it before
3356 a jump makes it more likely that the bra delay slot will be
3357 filled. */
3358 while (GET_CODE (from) == JUMP_INSN || GET_CODE (from) == NOTE
3359 || GET_CODE (from) == CODE_LABEL)
3360 from = PREV_INSN (from);
3362 from = emit_jump_insn_after (gen_jump (label), from);
3363 JUMP_LABEL (from) = label;
3364 LABEL_NUSES (label) = 1;
3365 found_barrier = emit_barrier_after (from);
3366 emit_label_after (label, found_barrier);
3369 return found_barrier;
3372 /* If the instruction INSN is implemented by a special function, and we can
3373 positively find the register that is used to call the sfunc, and this
3374 register is not used anywhere else in this instruction - except as the
3375 destination of a set, return this register; else, return 0. */
3377 sfunc_uses_reg (rtx insn)
3379 int i;
3380 rtx pattern, part, reg_part, reg;
3382 if (GET_CODE (insn) != INSN)
3383 return 0;
3384 pattern = PATTERN (insn);
3385 if (GET_CODE (pattern) != PARALLEL || get_attr_type (insn) != TYPE_SFUNC)
3386 return 0;
3388 for (reg_part = 0, i = XVECLEN (pattern, 0) - 1; i >= 1; i--)
3390 part = XVECEXP (pattern, 0, i);
3391 if (GET_CODE (part) == USE && GET_MODE (XEXP (part, 0)) == SImode)
3392 reg_part = part;
3394 if (! reg_part)
3395 return 0;
3396 reg = XEXP (reg_part, 0);
3397 for (i = XVECLEN (pattern, 0) - 1; i >= 0; i--)
3399 part = XVECEXP (pattern, 0, i);
3400 if (part == reg_part || GET_CODE (part) == CLOBBER)
3401 continue;
3402 if (reg_mentioned_p (reg, ((GET_CODE (part) == SET
3403 && GET_CODE (SET_DEST (part)) == REG)
3404 ? SET_SRC (part) : part)))
3405 return 0;
3407 return reg;
3410 /* See if the only way in which INSN uses REG is by calling it, or by
3411 setting it while calling it. Set *SET to a SET rtx if the register
3412 is set by INSN. */
3414 static int
3415 noncall_uses_reg (rtx reg, rtx insn, rtx *set)
3417 rtx pattern, reg2;
3419 *set = NULL_RTX;
3421 reg2 = sfunc_uses_reg (insn);
3422 if (reg2 && REGNO (reg2) == REGNO (reg))
3424 pattern = single_set (insn);
3425 if (pattern
3426 && GET_CODE (SET_DEST (pattern)) == REG
3427 && REGNO (reg) == REGNO (SET_DEST (pattern)))
3428 *set = pattern;
3429 return 0;
3431 if (GET_CODE (insn) != CALL_INSN)
3433 /* We don't use rtx_equal_p because we don't care if the mode is
3434 different. */
3435 pattern = single_set (insn);
3436 if (pattern
3437 && GET_CODE (SET_DEST (pattern)) == REG
3438 && REGNO (reg) == REGNO (SET_DEST (pattern)))
3440 rtx par, part;
3441 int i;
3443 *set = pattern;
3444 par = PATTERN (insn);
3445 if (GET_CODE (par) == PARALLEL)
3446 for (i = XVECLEN (par, 0) - 1; i >= 0; i--)
3448 part = XVECEXP (par, 0, i);
3449 if (GET_CODE (part) != SET && reg_mentioned_p (reg, part))
3450 return 1;
3452 return reg_mentioned_p (reg, SET_SRC (pattern));
3455 return 1;
3458 pattern = PATTERN (insn);
3460 if (GET_CODE (pattern) == PARALLEL)
3462 int i;
3464 for (i = XVECLEN (pattern, 0) - 1; i >= 1; i--)
3465 if (reg_mentioned_p (reg, XVECEXP (pattern, 0, i)))
3466 return 1;
3467 pattern = XVECEXP (pattern, 0, 0);
3470 if (GET_CODE (pattern) == SET)
3472 if (reg_mentioned_p (reg, SET_DEST (pattern)))
3474 /* We don't use rtx_equal_p, because we don't care if the
3475 mode is different. */
3476 if (GET_CODE (SET_DEST (pattern)) != REG
3477 || REGNO (reg) != REGNO (SET_DEST (pattern)))
3478 return 1;
3480 *set = pattern;
3483 pattern = SET_SRC (pattern);
3486 if (GET_CODE (pattern) != CALL
3487 || GET_CODE (XEXP (pattern, 0)) != MEM
3488 || ! rtx_equal_p (reg, XEXP (XEXP (pattern, 0), 0)))
3489 return 1;
3491 return 0;
3494 /* Given a X, a pattern of an insn or a part of it, return a mask of used
3495 general registers. Bits 0..15 mean that the respective registers
3496 are used as inputs in the instruction. Bits 16..31 mean that the
3497 registers 0..15, respectively, are used as outputs, or are clobbered.
3498 IS_DEST should be set to 16 if X is the destination of a SET, else to 0. */
3500 regs_used (rtx x, int is_dest)
3502 enum rtx_code code;
3503 const char *fmt;
3504 int i, used = 0;
3506 if (! x)
3507 return used;
3508 code = GET_CODE (x);
3509 switch (code)
3511 case REG:
3512 if (REGNO (x) < 16)
3513 return (((1 << HARD_REGNO_NREGS (0, GET_MODE (x))) - 1)
3514 << (REGNO (x) + is_dest));
3515 return 0;
3516 case SUBREG:
3518 rtx y = SUBREG_REG (x);
3520 if (GET_CODE (y) != REG)
3521 break;
3522 if (REGNO (y) < 16)
3523 return (((1 << HARD_REGNO_NREGS (0, GET_MODE (x))) - 1)
3524 << (REGNO (y) +
3525 subreg_regno_offset (REGNO (y),
3526 GET_MODE (y),
3527 SUBREG_BYTE (x),
3528 GET_MODE (x)) + is_dest));
3529 return 0;
3531 case SET:
3532 return regs_used (SET_SRC (x), 0) | regs_used (SET_DEST (x), 16);
3533 case RETURN:
3534 /* If there was a return value, it must have been indicated with USE. */
3535 return 0x00ffff00;
3536 case CLOBBER:
3537 is_dest = 1;
3538 break;
3539 case MEM:
3540 is_dest = 0;
3541 break;
3542 case CALL:
3543 used |= 0x00ff00f0;
3544 break;
3545 default:
3546 break;
3549 fmt = GET_RTX_FORMAT (code);
3551 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
3553 if (fmt[i] == 'E')
3555 register int j;
3556 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
3557 used |= regs_used (XVECEXP (x, i, j), is_dest);
3559 else if (fmt[i] == 'e')
3560 used |= regs_used (XEXP (x, i), is_dest);
3562 return used;
3565 /* Create an instruction that prevents redirection of a conditional branch
3566 to the destination of the JUMP with address ADDR.
3567 If the branch needs to be implemented as an indirect jump, try to find
3568 a scratch register for it.
3569 If NEED_BLOCK is 0, don't do anything unless we need a scratch register.
3570 If any preceding insn that doesn't fit into a delay slot is good enough,
3571 pass 1. Pass 2 if a definite blocking insn is needed.
3572 -1 is used internally to avoid deep recursion.
3573 If a blocking instruction is made or recognized, return it. */
3575 static rtx
3576 gen_block_redirect (rtx jump, int addr, int need_block)
3578 int dead = 0;
3579 rtx prev = prev_nonnote_insn (jump);
3580 rtx dest;
3582 /* First, check if we already have an instruction that satisfies our need. */
3583 if (prev && GET_CODE (prev) == INSN && ! INSN_DELETED_P (prev))
3585 if (INSN_CODE (prev) == CODE_FOR_indirect_jump_scratch)
3586 return prev;
3587 if (GET_CODE (PATTERN (prev)) == USE
3588 || GET_CODE (PATTERN (prev)) == CLOBBER
3589 || get_attr_in_delay_slot (prev) == IN_DELAY_SLOT_YES)
3590 prev = jump;
3591 else if ((need_block &= ~1) < 0)
3592 return prev;
3593 else if (recog_memoized (prev) == CODE_FOR_block_branch_redirect)
3594 need_block = 0;
3596 if (GET_CODE (PATTERN (jump)) == RETURN)
3598 if (! need_block)
3599 return prev;
3600 /* Reorg even does nasty things with return insns that cause branches
3601 to go out of range - see find_end_label and callers. */
3602 return emit_insn_before (gen_block_branch_redirect (const0_rtx) , jump);
3604 /* We can't use JUMP_LABEL here because it might be undefined
3605 when not optimizing. */
3606 dest = XEXP (SET_SRC (PATTERN (jump)), 0);
3607 /* If the branch is out of range, try to find a scratch register for it. */
3608 if (optimize
3609 && (INSN_ADDRESSES (INSN_UID (dest)) - addr + (unsigned) 4092
3610 > 4092 + 4098))
3612 rtx scan;
3613 /* Don't look for the stack pointer as a scratch register,
3614 it would cause trouble if an interrupt occurred. */
3615 unsigned try = 0x7fff, used;
3616 int jump_left = flag_expensive_optimizations + 1;
3618 /* It is likely that the most recent eligible instruction is wanted for
3619 the delay slot. Therefore, find out which registers it uses, and
3620 try to avoid using them. */
3622 for (scan = jump; (scan = PREV_INSN (scan)); )
3624 enum rtx_code code;
3626 if (INSN_DELETED_P (scan))
3627 continue;
3628 code = GET_CODE (scan);
3629 if (code == CODE_LABEL || code == JUMP_INSN)
3630 break;
3631 if (code == INSN
3632 && GET_CODE (PATTERN (scan)) != USE
3633 && GET_CODE (PATTERN (scan)) != CLOBBER
3634 && get_attr_in_delay_slot (scan) == IN_DELAY_SLOT_YES)
3636 try &= ~regs_used (PATTERN (scan), 0);
3637 break;
3640 for (used = dead = 0, scan = JUMP_LABEL (jump);
3641 (scan = NEXT_INSN (scan)); )
3643 enum rtx_code code;
3645 if (INSN_DELETED_P (scan))
3646 continue;
3647 code = GET_CODE (scan);
3648 if (INSN_P (scan))
3650 used |= regs_used (PATTERN (scan), 0);
3651 if (code == CALL_INSN)
3652 used |= regs_used (CALL_INSN_FUNCTION_USAGE (scan), 0);
3653 dead |= (used >> 16) & ~used;
3654 if (dead & try)
3656 dead &= try;
3657 break;
3659 if (code == JUMP_INSN)
3661 if (jump_left-- && simplejump_p (scan))
3662 scan = JUMP_LABEL (scan);
3663 else
3664 break;
3668 /* Mask out the stack pointer again, in case it was
3669 the only 'free' register we have found. */
3670 dead &= 0x7fff;
3672 /* If the immediate destination is still in range, check for possible
3673 threading with a jump beyond the delay slot insn.
3674 Don't check if we are called recursively; the jump has been or will be
3675 checked in a different invocation then. */
3677 else if (optimize && need_block >= 0)
3679 rtx next = next_active_insn (next_active_insn (dest));
3680 if (next && GET_CODE (next) == JUMP_INSN
3681 && GET_CODE (PATTERN (next)) == SET
3682 && recog_memoized (next) == CODE_FOR_jump_compact)
3684 dest = JUMP_LABEL (next);
3685 if (dest
3686 && (INSN_ADDRESSES (INSN_UID (dest)) - addr + (unsigned) 4092
3687 > 4092 + 4098))
3688 gen_block_redirect (next, INSN_ADDRESSES (INSN_UID (next)), -1);
3692 if (dead)
3694 rtx reg = gen_rtx_REG (SImode, exact_log2 (dead & -dead));
3696 /* It would be nice if we could convert the jump into an indirect
3697 jump / far branch right now, and thus exposing all constituent
3698 instructions to further optimization. However, reorg uses
3699 simplejump_p to determine if there is an unconditional jump where
3700 it should try to schedule instructions from the target of the
3701 branch; simplejump_p fails for indirect jumps even if they have
3702 a JUMP_LABEL. */
3703 rtx insn = emit_insn_before (gen_indirect_jump_scratch
3704 (reg, GEN_INT (INSN_UID (JUMP_LABEL (jump))))
3705 , jump);
3706 /* ??? We would like this to have the scope of the jump, but that
3707 scope will change when a delay slot insn of an inner scope is added.
3708 Hence, after delay slot scheduling, we'll have to expect
3709 NOTE_INSN_BLOCK_END notes between the indirect_jump_scratch and
3710 the jump. */
3712 INSN_LOCATOR (insn) = INSN_LOCATOR (jump);
3713 INSN_CODE (insn) = CODE_FOR_indirect_jump_scratch;
3714 return insn;
3716 else if (need_block)
3717 /* We can't use JUMP_LABEL here because it might be undefined
3718 when not optimizing. */
3719 return emit_insn_before (gen_block_branch_redirect
3720 (GEN_INT (INSN_UID (XEXP (SET_SRC (PATTERN (jump)), 0))))
3721 , jump);
3722 return prev;
3725 #define CONDJUMP_MIN -252
3726 #define CONDJUMP_MAX 262
3727 struct far_branch
3729 /* A label (to be placed) in front of the jump
3730 that jumps to our ultimate destination. */
3731 rtx near_label;
3732 /* Where we are going to insert it if we cannot move the jump any farther,
3733 or the jump itself if we have picked up an existing jump. */
3734 rtx insert_place;
3735 /* The ultimate destination. */
3736 rtx far_label;
3737 struct far_branch *prev;
3738 /* If the branch has already been created, its address;
3739 else the address of its first prospective user. */
3740 int address;
3743 static void gen_far_branch (struct far_branch *);
3744 enum mdep_reorg_phase_e mdep_reorg_phase;
3745 static void
3746 gen_far_branch (struct far_branch *bp)
3748 rtx insn = bp->insert_place;
3749 rtx jump;
3750 rtx label = gen_label_rtx ();
3752 emit_label_after (label, insn);
3753 if (bp->far_label)
3755 jump = emit_jump_insn_after (gen_jump (bp->far_label), insn);
3756 LABEL_NUSES (bp->far_label)++;
3758 else
3759 jump = emit_jump_insn_after (gen_return (), insn);
3760 /* Emit a barrier so that reorg knows that any following instructions
3761 are not reachable via a fall-through path.
3762 But don't do this when not optimizing, since we wouldn't suppress the
3763 alignment for the barrier then, and could end up with out-of-range
3764 pc-relative loads. */
3765 if (optimize)
3766 emit_barrier_after (jump);
3767 emit_label_after (bp->near_label, insn);
3768 JUMP_LABEL (jump) = bp->far_label;
3769 if (! invert_jump (insn, label, 1))
3770 abort ();
3771 /* If we are branching around a jump (rather than a return), prevent
3772 reorg from using an insn from the jump target as the delay slot insn -
3773 when reorg did this, it pessimized code (we rather hide the delay slot)
3774 and it could cause branches to go out of range. */
3775 if (bp->far_label)
3776 (emit_insn_after
3777 (gen_stuff_delay_slot
3778 (GEN_INT (INSN_UID (XEXP (SET_SRC (PATTERN (jump)), 0))),
3779 GEN_INT (recog_memoized (insn) == CODE_FOR_branch_false)),
3780 insn));
3781 /* Prevent reorg from undoing our splits. */
3782 gen_block_redirect (jump, bp->address += 2, 2);
3785 /* Fix up ADDR_DIFF_VECs. */
3786 void
3787 fixup_addr_diff_vecs (rtx first)
3789 rtx insn;
3791 for (insn = first; insn; insn = NEXT_INSN (insn))
3793 rtx vec_lab, pat, prev, prevpat, x, braf_label;
3795 if (GET_CODE (insn) != JUMP_INSN
3796 || GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
3797 continue;
3798 pat = PATTERN (insn);
3799 vec_lab = XEXP (XEXP (pat, 0), 0);
3801 /* Search the matching casesi_jump_2. */
3802 for (prev = vec_lab; ; prev = PREV_INSN (prev))
3804 if (GET_CODE (prev) != JUMP_INSN)
3805 continue;
3806 prevpat = PATTERN (prev);
3807 if (GET_CODE (prevpat) != PARALLEL || XVECLEN (prevpat, 0) != 2)
3808 continue;
3809 x = XVECEXP (prevpat, 0, 1);
3810 if (GET_CODE (x) != USE)
3811 continue;
3812 x = XEXP (x, 0);
3813 if (GET_CODE (x) == LABEL_REF && XEXP (x, 0) == vec_lab)
3814 break;
3816 /* FIXME: This is a bug in the optimizer, but it seems harmless
3817 to just avoid panicing. */
3818 if (!prev)
3819 continue;
3821 /* Emit the reference label of the braf where it belongs, right after
3822 the casesi_jump_2 (i.e. braf). */
3823 braf_label = XEXP (XEXP (SET_SRC (XVECEXP (prevpat, 0, 0)), 1), 0);
3824 emit_label_after (braf_label, prev);
3826 /* Fix up the ADDR_DIF_VEC to be relative
3827 to the reference address of the braf. */
3828 XEXP (XEXP (pat, 0), 0) = braf_label;
3832 /* BARRIER_OR_LABEL is either a BARRIER or a CODE_LABEL immediately following
3833 a barrier. Return the base 2 logarithm of the desired alignment. */
3835 barrier_align (rtx barrier_or_label)
3837 rtx next = next_real_insn (barrier_or_label), pat, prev;
3838 int slot, credit, jump_to_next = 0;
3840 if (! next)
3841 return 0;
3843 pat = PATTERN (next);
3845 if (GET_CODE (pat) == ADDR_DIFF_VEC)
3846 return 2;
3848 if (GET_CODE (pat) == UNSPEC_VOLATILE && XINT (pat, 1) == UNSPECV_ALIGN)
3849 /* This is a barrier in front of a constant table. */
3850 return 0;
3852 prev = prev_real_insn (barrier_or_label);
3853 if (GET_CODE (PATTERN (prev)) == ADDR_DIFF_VEC)
3855 pat = PATTERN (prev);
3856 /* If this is a very small table, we want to keep the alignment after
3857 the table to the minimum for proper code alignment. */
3858 return ((TARGET_SMALLCODE
3859 || ((unsigned) XVECLEN (pat, 1) * GET_MODE_SIZE (GET_MODE (pat))
3860 <= (unsigned) 1 << (CACHE_LOG - 2)))
3861 ? 1 << TARGET_SHMEDIA : align_jumps_log);
3864 if (TARGET_SMALLCODE)
3865 return 0;
3867 if (! TARGET_SH2 || ! optimize)
3868 return align_jumps_log;
3870 /* When fixing up pcloads, a constant table might be inserted just before
3871 the basic block that ends with the barrier. Thus, we can't trust the
3872 instruction lengths before that. */
3873 if (mdep_reorg_phase > SH_FIXUP_PCLOAD)
3875 /* Check if there is an immediately preceding branch to the insn beyond
3876 the barrier. We must weight the cost of discarding useful information
3877 from the current cache line when executing this branch and there is
3878 an alignment, against that of fetching unneeded insn in front of the
3879 branch target when there is no alignment. */
3881 /* There are two delay_slot cases to consider. One is the simple case
3882 where the preceding branch is to the insn beyond the barrier (simple
3883 delay slot filling), and the other is where the preceding branch has
3884 a delay slot that is a duplicate of the insn after the barrier
3885 (fill_eager_delay_slots) and the branch is to the insn after the insn
3886 after the barrier. */
3888 /* PREV is presumed to be the JUMP_INSN for the barrier under
3889 investigation. Skip to the insn before it. */
3890 prev = prev_real_insn (prev);
3892 for (slot = 2, credit = (1 << (CACHE_LOG - 2)) + 2;
3893 credit >= 0 && prev && GET_CODE (prev) == INSN;
3894 prev = prev_real_insn (prev))
3896 jump_to_next = 0;
3897 if (GET_CODE (PATTERN (prev)) == USE
3898 || GET_CODE (PATTERN (prev)) == CLOBBER)
3899 continue;
3900 if (GET_CODE (PATTERN (prev)) == SEQUENCE)
3902 prev = XVECEXP (PATTERN (prev), 0, 1);
3903 if (INSN_UID (prev) == INSN_UID (next))
3905 /* Delay slot was filled with insn at jump target. */
3906 jump_to_next = 1;
3907 continue;
3911 if (slot &&
3912 get_attr_in_delay_slot (prev) == IN_DELAY_SLOT_YES)
3913 slot = 0;
3914 credit -= get_attr_length (prev);
3916 if (prev
3917 && GET_CODE (prev) == JUMP_INSN
3918 && JUMP_LABEL (prev))
3920 rtx x;
3921 if (jump_to_next
3922 || next_real_insn (JUMP_LABEL (prev)) == next
3923 /* If relax_delay_slots() decides NEXT was redundant
3924 with some previous instruction, it will have
3925 redirected PREV's jump to the following insn. */
3926 || JUMP_LABEL (prev) == next_nonnote_insn (next)
3927 /* There is no upper bound on redundant instructions
3928 that might have been skipped, but we must not put an
3929 alignment where none had been before. */
3930 || (x = (NEXT_INSN (NEXT_INSN (PREV_INSN (prev)))),
3931 (INSN_P (x)
3932 && (INSN_CODE (x) == CODE_FOR_block_branch_redirect
3933 || INSN_CODE (x) == CODE_FOR_indirect_jump_scratch
3934 || INSN_CODE (x) == CODE_FOR_stuff_delay_slot))))
3936 rtx pat = PATTERN (prev);
3937 if (GET_CODE (pat) == PARALLEL)
3938 pat = XVECEXP (pat, 0, 0);
3939 if (credit - slot >= (GET_CODE (SET_SRC (pat)) == PC ? 2 : 0))
3940 return 0;
3945 return align_jumps_log;
3948 /* If we are inside a phony loop, almost any kind of label can turn up as the
3949 first one in the loop. Aligning a braf label causes incorrect switch
3950 destination addresses; we can detect braf labels because they are
3951 followed by a BARRIER.
3952 Applying loop alignment to small constant or switch tables is a waste
3953 of space, so we suppress this too. */
3955 sh_loop_align (rtx label)
3957 rtx next = label;
3960 next = next_nonnote_insn (next);
3961 while (next && GET_CODE (next) == CODE_LABEL);
3963 if (! next
3964 || ! INSN_P (next)
3965 || GET_CODE (PATTERN (next)) == ADDR_DIFF_VEC
3966 || recog_memoized (next) == CODE_FOR_consttable_2)
3967 return 0;
3969 return align_loops_log;
3972 /* Do a final pass over the function, just before delayed branch
3973 scheduling. */
3975 static void
3976 sh_reorg (void)
3978 rtx first, insn, mova = NULL_RTX;
3979 int num_mova;
3980 rtx r0_rtx = gen_rtx_REG (Pmode, 0);
3981 rtx r0_inc_rtx = gen_rtx_POST_INC (Pmode, r0_rtx);
3983 first = get_insns ();
3985 /* We must split call insns before introducing `mova's. If we're
3986 optimizing, they'll have already been split. Otherwise, make
3987 sure we don't split them too late. */
3988 if (! optimize)
3989 split_all_insns_noflow ();
3991 if (TARGET_SHMEDIA)
3992 return;
3994 /* If relaxing, generate pseudo-ops to associate function calls with
3995 the symbols they call. It does no harm to not generate these
3996 pseudo-ops. However, when we can generate them, it enables to
3997 linker to potentially relax the jsr to a bsr, and eliminate the
3998 register load and, possibly, the constant pool entry. */
4000 mdep_reorg_phase = SH_INSERT_USES_LABELS;
4001 if (TARGET_RELAX)
4003 /* Remove all REG_LABEL notes. We want to use them for our own
4004 purposes. This works because none of the remaining passes
4005 need to look at them.
4007 ??? But it may break in the future. We should use a machine
4008 dependent REG_NOTE, or some other approach entirely. */
4009 for (insn = first; insn; insn = NEXT_INSN (insn))
4011 if (INSN_P (insn))
4013 rtx note;
4015 while ((note = find_reg_note (insn, REG_LABEL, NULL_RTX)) != 0)
4016 remove_note (insn, note);
4020 for (insn = first; insn; insn = NEXT_INSN (insn))
4022 rtx pattern, reg, link, set, scan, dies, label;
4023 int rescan = 0, foundinsn = 0;
4025 if (GET_CODE (insn) == CALL_INSN)
4027 pattern = PATTERN (insn);
4029 if (GET_CODE (pattern) == PARALLEL)
4030 pattern = XVECEXP (pattern, 0, 0);
4031 if (GET_CODE (pattern) == SET)
4032 pattern = SET_SRC (pattern);
4034 if (GET_CODE (pattern) != CALL
4035 || GET_CODE (XEXP (pattern, 0)) != MEM)
4036 continue;
4038 reg = XEXP (XEXP (pattern, 0), 0);
4040 else
4042 reg = sfunc_uses_reg (insn);
4043 if (! reg)
4044 continue;
4047 if (GET_CODE (reg) != REG)
4048 continue;
4050 /* This is a function call via REG. If the only uses of REG
4051 between the time that it is set and the time that it dies
4052 are in function calls, then we can associate all the
4053 function calls with the setting of REG. */
4055 for (link = LOG_LINKS (insn); link; link = XEXP (link, 1))
4057 if (REG_NOTE_KIND (link) != 0)
4058 continue;
4059 set = single_set (XEXP (link, 0));
4060 if (set && rtx_equal_p (reg, SET_DEST (set)))
4062 link = XEXP (link, 0);
4063 break;
4067 if (! link)
4069 /* ??? Sometimes global register allocation will have
4070 deleted the insn pointed to by LOG_LINKS. Try
4071 scanning backward to find where the register is set. */
4072 for (scan = PREV_INSN (insn);
4073 scan && GET_CODE (scan) != CODE_LABEL;
4074 scan = PREV_INSN (scan))
4076 if (! INSN_P (scan))
4077 continue;
4079 if (! reg_mentioned_p (reg, scan))
4080 continue;
4082 if (noncall_uses_reg (reg, scan, &set))
4083 break;
4085 if (set)
4087 link = scan;
4088 break;
4093 if (! link)
4094 continue;
4096 /* The register is set at LINK. */
4098 /* We can only optimize the function call if the register is
4099 being set to a symbol. In theory, we could sometimes
4100 optimize calls to a constant location, but the assembler
4101 and linker do not support that at present. */
4102 if (GET_CODE (SET_SRC (set)) != SYMBOL_REF
4103 && GET_CODE (SET_SRC (set)) != LABEL_REF)
4104 continue;
4106 /* Scan forward from LINK to the place where REG dies, and
4107 make sure that the only insns which use REG are
4108 themselves function calls. */
4110 /* ??? This doesn't work for call targets that were allocated
4111 by reload, since there may not be a REG_DEAD note for the
4112 register. */
4114 dies = NULL_RTX;
4115 for (scan = NEXT_INSN (link); scan; scan = NEXT_INSN (scan))
4117 rtx scanset;
4119 /* Don't try to trace forward past a CODE_LABEL if we haven't
4120 seen INSN yet. Ordinarily, we will only find the setting insn
4121 in LOG_LINKS if it is in the same basic block. However,
4122 cross-jumping can insert code labels in between the load and
4123 the call, and can result in situations where a single call
4124 insn may have two targets depending on where we came from. */
4126 if (GET_CODE (scan) == CODE_LABEL && ! foundinsn)
4127 break;
4129 if (! INSN_P (scan))
4130 continue;
4132 /* Don't try to trace forward past a JUMP. To optimize
4133 safely, we would have to check that all the
4134 instructions at the jump destination did not use REG. */
4136 if (GET_CODE (scan) == JUMP_INSN)
4137 break;
4139 if (! reg_mentioned_p (reg, scan))
4140 continue;
4142 if (noncall_uses_reg (reg, scan, &scanset))
4143 break;
4145 if (scan == insn)
4146 foundinsn = 1;
4148 if (scan != insn
4149 && (GET_CODE (scan) == CALL_INSN || sfunc_uses_reg (scan)))
4151 /* There is a function call to this register other
4152 than the one we are checking. If we optimize
4153 this call, we need to rescan again below. */
4154 rescan = 1;
4157 /* ??? We shouldn't have to worry about SCANSET here.
4158 We should just be able to check for a REG_DEAD note
4159 on a function call. However, the REG_DEAD notes are
4160 apparently not dependable around libcalls; c-torture
4161 execute/920501-2 is a test case. If SCANSET is set,
4162 then this insn sets the register, so it must have
4163 died earlier. Unfortunately, this will only handle
4164 the cases in which the register is, in fact, set in a
4165 later insn. */
4167 /* ??? We shouldn't have to use FOUNDINSN here.
4168 However, the LOG_LINKS fields are apparently not
4169 entirely reliable around libcalls;
4170 newlib/libm/math/e_pow.c is a test case. Sometimes
4171 an insn will appear in LOG_LINKS even though it is
4172 not the most recent insn which sets the register. */
4174 if (foundinsn
4175 && (scanset
4176 || find_reg_note (scan, REG_DEAD, reg)))
4178 dies = scan;
4179 break;
4183 if (! dies)
4185 /* Either there was a branch, or some insn used REG
4186 other than as a function call address. */
4187 continue;
4190 /* Create a code label, and put it in a REG_LABEL note on
4191 the insn which sets the register, and on each call insn
4192 which uses the register. In final_prescan_insn we look
4193 for the REG_LABEL notes, and output the appropriate label
4194 or pseudo-op. */
4196 label = gen_label_rtx ();
4197 REG_NOTES (link) = gen_rtx_INSN_LIST (REG_LABEL, label,
4198 REG_NOTES (link));
4199 REG_NOTES (insn) = gen_rtx_INSN_LIST (REG_LABEL, label,
4200 REG_NOTES (insn));
4201 if (rescan)
4203 scan = link;
4206 rtx reg2;
4208 scan = NEXT_INSN (scan);
4209 if (scan != insn
4210 && ((GET_CODE (scan) == CALL_INSN
4211 && reg_mentioned_p (reg, scan))
4212 || ((reg2 = sfunc_uses_reg (scan))
4213 && REGNO (reg2) == REGNO (reg))))
4214 REG_NOTES (scan)
4215 = gen_rtx_INSN_LIST (REG_LABEL, label, REG_NOTES (scan));
4217 while (scan != dies);
4222 if (TARGET_SH2)
4223 fixup_addr_diff_vecs (first);
4225 if (optimize)
4227 mdep_reorg_phase = SH_SHORTEN_BRANCHES0;
4228 shorten_branches (first);
4230 /* Scan the function looking for move instructions which have to be
4231 changed to pc-relative loads and insert the literal tables. */
4233 mdep_reorg_phase = SH_FIXUP_PCLOAD;
4234 for (insn = first, num_mova = 0; insn; insn = NEXT_INSN (insn))
4236 if (mova_p (insn))
4238 /* ??? basic block reordering can move a switch table dispatch
4239 below the switch table. Check if that has happened.
4240 We only have the addresses available when optimizing; but then,
4241 this check shouldn't be needed when not optimizing. */
4242 rtx label_ref = XVECEXP (SET_SRC (PATTERN (insn)), 0, 0);
4243 if (optimize
4244 && (INSN_ADDRESSES (INSN_UID (insn))
4245 > INSN_ADDRESSES (INSN_UID (XEXP (label_ref, 0)))))
4247 /* Change the mova into a load.
4248 broken_move will then return true for it. */
4249 fixup_mova (insn);
4251 else if (! num_mova++)
4252 mova = insn;
4254 else if (GET_CODE (insn) == JUMP_INSN
4255 && GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC
4256 && num_mova)
4258 rtx scan;
4259 int total;
4261 num_mova--;
4263 /* Some code might have been inserted between the mova and
4264 its ADDR_DIFF_VEC. Check if the mova is still in range. */
4265 for (scan = mova, total = 0; scan != insn; scan = NEXT_INSN (scan))
4266 total += get_attr_length (scan);
4268 /* range of mova is 1020, add 4 because pc counts from address of
4269 second instruction after this one, subtract 2 in case pc is 2
4270 byte aligned. Possible alignment needed for the ADDR_DIFF_VEC
4271 cancels out with alignment effects of the mova itself. */
4272 if (total > 1022)
4274 /* Change the mova into a load, and restart scanning
4275 there. broken_move will then return true for mova. */
4276 fixup_mova (mova);
4277 insn = mova;
4280 if (broken_move (insn)
4281 || (GET_CODE (insn) == INSN
4282 && recog_memoized (insn) == CODE_FOR_casesi_worker_2))
4284 rtx scan;
4285 /* Scan ahead looking for a barrier to stick the constant table
4286 behind. */
4287 rtx barrier = find_barrier (num_mova, mova, insn);
4288 rtx last_float_move = NULL_RTX, last_float = 0, *last_float_addr = NULL;
4289 int need_aligned_label = 0;
4291 if (num_mova && ! mova_p (mova))
4293 /* find_barrier had to change the first mova into a
4294 pcload; thus, we have to start with this new pcload. */
4295 insn = mova;
4296 num_mova = 0;
4298 /* Now find all the moves between the points and modify them. */
4299 for (scan = insn; scan != barrier; scan = NEXT_INSN (scan))
4301 if (GET_CODE (scan) == CODE_LABEL)
4302 last_float = 0;
4303 if (GET_CODE (scan) == INSN
4304 && recog_memoized (scan) == CODE_FOR_casesi_worker_2)
4305 need_aligned_label = 1;
4306 if (broken_move (scan))
4308 rtx *patp = &PATTERN (scan), pat = *patp;
4309 rtx src, dst;
4310 rtx lab;
4311 rtx newsrc;
4312 enum machine_mode mode;
4314 if (GET_CODE (pat) == PARALLEL)
4315 patp = &XVECEXP (pat, 0, 0), pat = *patp;
4316 src = SET_SRC (pat);
4317 dst = SET_DEST (pat);
4318 mode = GET_MODE (dst);
4320 if (mode == SImode && hi_const (src)
4321 && REGNO (dst) != FPUL_REG)
4323 int offset = 0;
4325 mode = HImode;
4326 while (GET_CODE (dst) == SUBREG)
4328 offset += subreg_regno_offset (REGNO (SUBREG_REG (dst)),
4329 GET_MODE (SUBREG_REG (dst)),
4330 SUBREG_BYTE (dst),
4331 GET_MODE (dst));
4332 dst = SUBREG_REG (dst);
4334 dst = gen_rtx_REG (HImode, REGNO (dst) + offset);
4336 if (GET_CODE (dst) == REG && FP_ANY_REGISTER_P (REGNO (dst)))
4338 /* This must be an insn that clobbers r0. */
4339 rtx *clobberp = &XVECEXP (PATTERN (scan), 0,
4340 XVECLEN (PATTERN (scan), 0)
4341 - 1);
4342 rtx clobber = *clobberp;
4344 if (GET_CODE (clobber) != CLOBBER
4345 || ! rtx_equal_p (XEXP (clobber, 0), r0_rtx))
4346 abort ();
4348 if (last_float
4349 && reg_set_between_p (r0_rtx, last_float_move, scan))
4350 last_float = 0;
4351 if (last_float
4352 && TARGET_SHCOMPACT
4353 && GET_MODE_SIZE (mode) != 4
4354 && GET_MODE_SIZE (GET_MODE (last_float)) == 4)
4355 last_float = 0;
4356 lab = add_constant (src, mode, last_float);
4357 if (lab)
4358 emit_insn_before (gen_mova (lab), scan);
4359 else
4361 /* There will be a REG_UNUSED note for r0 on
4362 LAST_FLOAT_MOVE; we have to change it to REG_INC,
4363 lest reorg:mark_target_live_regs will not
4364 consider r0 to be used, and we end up with delay
4365 slot insn in front of SCAN that clobbers r0. */
4366 rtx note
4367 = find_regno_note (last_float_move, REG_UNUSED, 0);
4369 /* If we are not optimizing, then there may not be
4370 a note. */
4371 if (note)
4372 PUT_MODE (note, REG_INC);
4374 *last_float_addr = r0_inc_rtx;
4376 last_float_move = scan;
4377 last_float = src;
4378 newsrc = gen_rtx_MEM (mode,
4379 (((TARGET_SH4 && ! TARGET_FMOVD)
4380 || REGNO (dst) == FPUL_REG)
4381 ? r0_inc_rtx
4382 : r0_rtx));
4383 last_float_addr = &XEXP (newsrc, 0);
4385 /* Remove the clobber of r0. */
4386 *clobberp = gen_rtx_CLOBBER (GET_MODE (clobber),
4387 gen_rtx_SCRATCH (Pmode));
4389 /* This is a mova needing a label. Create it. */
4390 else if (GET_CODE (src) == UNSPEC
4391 && XINT (src, 1) == UNSPEC_MOVA
4392 && GET_CODE (XVECEXP (src, 0, 0)) == CONST)
4394 lab = add_constant (XVECEXP (src, 0, 0), mode, 0);
4395 newsrc = gen_rtx_LABEL_REF (VOIDmode, lab);
4396 newsrc = gen_rtx_UNSPEC (SImode,
4397 gen_rtvec (1, newsrc),
4398 UNSPEC_MOVA);
4400 else
4402 lab = add_constant (src, mode, 0);
4403 newsrc = gen_rtx_LABEL_REF (VOIDmode, lab);
4404 newsrc = gen_const_mem (mode, newsrc);
4406 *patp = gen_rtx_SET (VOIDmode, dst, newsrc);
4407 INSN_CODE (scan) = -1;
4410 dump_table (need_aligned_label ? insn : 0, barrier);
4411 insn = barrier;
4415 mdep_reorg_phase = SH_SHORTEN_BRANCHES1;
4416 INSN_ADDRESSES_FREE ();
4417 split_branches (first);
4419 /* The INSN_REFERENCES_ARE_DELAYED in sh.h is problematic because it
4420 also has an effect on the register that holds the address of the sfunc.
4421 Insert an extra dummy insn in front of each sfunc that pretends to
4422 use this register. */
4423 if (flag_delayed_branch)
4425 for (insn = first; insn; insn = NEXT_INSN (insn))
4427 rtx reg = sfunc_uses_reg (insn);
4429 if (! reg)
4430 continue;
4431 emit_insn_before (gen_use_sfunc_addr (reg), insn);
4434 #if 0
4435 /* fpscr is not actually a user variable, but we pretend it is for the
4436 sake of the previous optimization passes, since we want it handled like
4437 one. However, we don't have any debugging information for it, so turn
4438 it into a non-user variable now. */
4439 if (TARGET_SH4)
4440 REG_USERVAR_P (get_fpscr_rtx ()) = 0;
4441 #endif
4442 mdep_reorg_phase = SH_AFTER_MDEP_REORG;
4446 get_dest_uid (rtx label, int max_uid)
4448 rtx dest = next_real_insn (label);
4449 int dest_uid;
4450 if (! dest)
4451 /* This can happen for an undefined label. */
4452 return 0;
4453 dest_uid = INSN_UID (dest);
4454 /* If this is a newly created branch redirection blocking instruction,
4455 we cannot index the branch_uid or insn_addresses arrays with its
4456 uid. But then, we won't need to, because the actual destination is
4457 the following branch. */
4458 while (dest_uid >= max_uid)
4460 dest = NEXT_INSN (dest);
4461 dest_uid = INSN_UID (dest);
4463 if (GET_CODE (dest) == JUMP_INSN && GET_CODE (PATTERN (dest)) == RETURN)
4464 return 0;
4465 return dest_uid;
4468 /* Split condbranches that are out of range. Also add clobbers for
4469 scratch registers that are needed in far jumps.
4470 We do this before delay slot scheduling, so that it can take our
4471 newly created instructions into account. It also allows us to
4472 find branches with common targets more easily. */
4474 static void
4475 split_branches (rtx first)
4477 rtx insn;
4478 struct far_branch **uid_branch, *far_branch_list = 0;
4479 int max_uid = get_max_uid ();
4481 /* Find out which branches are out of range. */
4482 shorten_branches (first);
4484 uid_branch = (struct far_branch **) alloca (max_uid * sizeof *uid_branch);
4485 memset ((char *) uid_branch, 0, max_uid * sizeof *uid_branch);
4487 for (insn = first; insn; insn = NEXT_INSN (insn))
4488 if (! INSN_P (insn))
4489 continue;
4490 else if (INSN_DELETED_P (insn))
4492 /* Shorten_branches would split this instruction again,
4493 so transform it into a note. */
4494 PUT_CODE (insn, NOTE);
4495 NOTE_LINE_NUMBER (insn) = NOTE_INSN_DELETED;
4496 NOTE_SOURCE_FILE (insn) = 0;
4498 else if (GET_CODE (insn) == JUMP_INSN
4499 /* Don't mess with ADDR_DIFF_VEC */
4500 && (GET_CODE (PATTERN (insn)) == SET
4501 || GET_CODE (PATTERN (insn)) == RETURN))
4503 enum attr_type type = get_attr_type (insn);
4504 if (type == TYPE_CBRANCH)
4506 rtx next, beyond;
4508 if (get_attr_length (insn) > 4)
4510 rtx src = SET_SRC (PATTERN (insn));
4511 rtx olabel = XEXP (XEXP (src, 1), 0);
4512 int addr = INSN_ADDRESSES (INSN_UID (insn));
4513 rtx label = 0;
4514 int dest_uid = get_dest_uid (olabel, max_uid);
4515 struct far_branch *bp = uid_branch[dest_uid];
4517 /* redirect_jump needs a valid JUMP_LABEL, and it might delete
4518 the label if the LABEL_NUSES count drops to zero. There is
4519 always a jump_optimize pass that sets these values, but it
4520 proceeds to delete unreferenced code, and then if not
4521 optimizing, to un-delete the deleted instructions, thus
4522 leaving labels with too low uses counts. */
4523 if (! optimize)
4525 JUMP_LABEL (insn) = olabel;
4526 LABEL_NUSES (olabel)++;
4528 if (! bp)
4530 bp = (struct far_branch *) alloca (sizeof *bp);
4531 uid_branch[dest_uid] = bp;
4532 bp->prev = far_branch_list;
4533 far_branch_list = bp;
4534 bp->far_label
4535 = XEXP (XEXP (SET_SRC (PATTERN (insn)), 1), 0);
4536 LABEL_NUSES (bp->far_label)++;
4538 else
4540 label = bp->near_label;
4541 if (! label && bp->address - addr >= CONDJUMP_MIN)
4543 rtx block = bp->insert_place;
4545 if (GET_CODE (PATTERN (block)) == RETURN)
4546 block = PREV_INSN (block);
4547 else
4548 block = gen_block_redirect (block,
4549 bp->address, 2);
4550 label = emit_label_after (gen_label_rtx (),
4551 PREV_INSN (block));
4552 bp->near_label = label;
4554 else if (label && ! NEXT_INSN (label))
4556 if (addr + 2 - bp->address <= CONDJUMP_MAX)
4557 bp->insert_place = insn;
4558 else
4559 gen_far_branch (bp);
4562 if (! label
4563 || (NEXT_INSN (label) && bp->address - addr < CONDJUMP_MIN))
4565 bp->near_label = label = gen_label_rtx ();
4566 bp->insert_place = insn;
4567 bp->address = addr;
4569 if (! redirect_jump (insn, label, 1))
4570 abort ();
4572 else
4574 /* get_attr_length (insn) == 2 */
4575 /* Check if we have a pattern where reorg wants to redirect
4576 the branch to a label from an unconditional branch that
4577 is too far away. */
4578 /* We can't use JUMP_LABEL here because it might be undefined
4579 when not optimizing. */
4580 /* A syntax error might cause beyond to be NULL_RTX. */
4581 beyond
4582 = next_active_insn (XEXP (XEXP (SET_SRC (PATTERN (insn)), 1),
4583 0));
4585 if (beyond
4586 && (GET_CODE (beyond) == JUMP_INSN
4587 || ((beyond = next_active_insn (beyond))
4588 && GET_CODE (beyond) == JUMP_INSN))
4589 && GET_CODE (PATTERN (beyond)) == SET
4590 && recog_memoized (beyond) == CODE_FOR_jump_compact
4591 && ((INSN_ADDRESSES
4592 (INSN_UID (XEXP (SET_SRC (PATTERN (beyond)), 0)))
4593 - INSN_ADDRESSES (INSN_UID (insn)) + (unsigned) 252)
4594 > 252 + 258 + 2))
4595 gen_block_redirect (beyond,
4596 INSN_ADDRESSES (INSN_UID (beyond)), 1);
4599 next = next_active_insn (insn);
4601 if ((GET_CODE (next) == JUMP_INSN
4602 || ((next = next_active_insn (next))
4603 && GET_CODE (next) == JUMP_INSN))
4604 && GET_CODE (PATTERN (next)) == SET
4605 && recog_memoized (next) == CODE_FOR_jump_compact
4606 && ((INSN_ADDRESSES
4607 (INSN_UID (XEXP (SET_SRC (PATTERN (next)), 0)))
4608 - INSN_ADDRESSES (INSN_UID (insn)) + (unsigned) 252)
4609 > 252 + 258 + 2))
4610 gen_block_redirect (next, INSN_ADDRESSES (INSN_UID (next)), 1);
4612 else if (type == TYPE_JUMP || type == TYPE_RETURN)
4614 int addr = INSN_ADDRESSES (INSN_UID (insn));
4615 rtx far_label = 0;
4616 int dest_uid = 0;
4617 struct far_branch *bp;
4619 if (type == TYPE_JUMP)
4621 far_label = XEXP (SET_SRC (PATTERN (insn)), 0);
4622 dest_uid = get_dest_uid (far_label, max_uid);
4623 if (! dest_uid)
4625 /* Parse errors can lead to labels outside
4626 the insn stream. */
4627 if (! NEXT_INSN (far_label))
4628 continue;
4630 if (! optimize)
4632 JUMP_LABEL (insn) = far_label;
4633 LABEL_NUSES (far_label)++;
4635 redirect_jump (insn, NULL_RTX, 1);
4636 far_label = 0;
4639 bp = uid_branch[dest_uid];
4640 if (! bp)
4642 bp = (struct far_branch *) alloca (sizeof *bp);
4643 uid_branch[dest_uid] = bp;
4644 bp->prev = far_branch_list;
4645 far_branch_list = bp;
4646 bp->near_label = 0;
4647 bp->far_label = far_label;
4648 if (far_label)
4649 LABEL_NUSES (far_label)++;
4651 else if (bp->near_label && ! NEXT_INSN (bp->near_label))
4652 if (addr - bp->address <= CONDJUMP_MAX)
4653 emit_label_after (bp->near_label, PREV_INSN (insn));
4654 else
4656 gen_far_branch (bp);
4657 bp->near_label = 0;
4659 else
4660 bp->near_label = 0;
4661 bp->address = addr;
4662 bp->insert_place = insn;
4663 if (! far_label)
4664 emit_insn_before (gen_block_branch_redirect (const0_rtx), insn);
4665 else
4666 gen_block_redirect (insn, addr, bp->near_label ? 2 : 0);
4669 /* Generate all pending far branches,
4670 and free our references to the far labels. */
4671 while (far_branch_list)
4673 if (far_branch_list->near_label
4674 && ! NEXT_INSN (far_branch_list->near_label))
4675 gen_far_branch (far_branch_list);
4676 if (optimize
4677 && far_branch_list->far_label
4678 && ! --LABEL_NUSES (far_branch_list->far_label))
4679 delete_insn (far_branch_list->far_label);
4680 far_branch_list = far_branch_list->prev;
4683 /* Instruction length information is no longer valid due to the new
4684 instructions that have been generated. */
4685 init_insn_lengths ();
4688 /* Dump out instruction addresses, which is useful for debugging the
4689 constant pool table stuff.
4691 If relaxing, output the label and pseudo-ops used to link together
4692 calls and the instruction which set the registers. */
4694 /* ??? The addresses printed by this routine for insns are nonsense for
4695 insns which are inside of a sequence where none of the inner insns have
4696 variable length. This is because the second pass of shorten_branches
4697 does not bother to update them. */
4699 void
4700 final_prescan_insn (rtx insn, rtx *opvec ATTRIBUTE_UNUSED,
4701 int noperands ATTRIBUTE_UNUSED)
4703 if (TARGET_DUMPISIZE)
4704 fprintf (asm_out_file, "\n! at %04x\n", INSN_ADDRESSES (INSN_UID (insn)));
4706 if (TARGET_RELAX)
4708 rtx note;
4710 note = find_reg_note (insn, REG_LABEL, NULL_RTX);
4711 if (note)
4713 rtx pattern;
4715 pattern = PATTERN (insn);
4716 if (GET_CODE (pattern) == PARALLEL)
4717 pattern = XVECEXP (pattern, 0, 0);
4718 if (GET_CODE (pattern) == CALL
4719 || (GET_CODE (pattern) == SET
4720 && (GET_CODE (SET_SRC (pattern)) == CALL
4721 || get_attr_type (insn) == TYPE_SFUNC)))
4722 asm_fprintf (asm_out_file, "\t.uses %LL%d\n",
4723 CODE_LABEL_NUMBER (XEXP (note, 0)));
4724 else if (GET_CODE (pattern) == SET)
4725 (*targetm.asm_out.internal_label) (asm_out_file, "L",
4726 CODE_LABEL_NUMBER (XEXP (note, 0)));
4727 else
4728 abort ();
4733 /* Dump out any constants accumulated in the final pass. These will
4734 only be labels. */
4736 const char *
4737 output_jump_label_table (void)
4739 int i;
4741 if (pool_size)
4743 fprintf (asm_out_file, "\t.align 2\n");
4744 for (i = 0; i < pool_size; i++)
4746 pool_node *p = &pool_vector[i];
4748 (*targetm.asm_out.internal_label) (asm_out_file, "L",
4749 CODE_LABEL_NUMBER (p->label));
4750 output_asm_insn (".long %O0", &p->value);
4752 pool_size = 0;
4755 return "";
4758 /* A full frame looks like:
4760 arg-5
4761 arg-4
4762 [ if current_function_anonymous_args
4763 arg-3
4764 arg-2
4765 arg-1
4766 arg-0 ]
4767 saved-fp
4768 saved-r10
4769 saved-r11
4770 saved-r12
4771 saved-pr
4772 local-n
4774 local-1
4775 local-0 <- fp points here. */
4777 /* Number of bytes pushed for anonymous args, used to pass information
4778 between expand_prologue and expand_epilogue. */
4780 /* Adjust the stack by SIZE bytes. REG holds the rtl of the register to be
4781 adjusted. If epilogue_p is zero, this is for a prologue; otherwise, it's
4782 for an epilogue and a negative value means that it's for a sibcall
4783 epilogue. If LIVE_REGS_MASK is nonzero, it points to a HARD_REG_SET of
4784 all the registers that are about to be restored, and hence dead. */
4786 static void
4787 output_stack_adjust (int size, rtx reg, int epilogue_p,
4788 HARD_REG_SET *live_regs_mask)
4790 rtx (*emit_fn) (rtx) = epilogue_p ? &emit_insn : &frame_insn;
4791 if (size)
4793 HOST_WIDE_INT align = STACK_BOUNDARY / BITS_PER_UNIT;
4795 /* This test is bogus, as output_stack_adjust is used to re-align the
4796 stack. */
4797 #if 0
4798 if (size % align)
4799 abort ();
4800 #endif
4802 if (CONST_OK_FOR_ADD (size))
4803 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size)));
4804 /* Try to do it with two partial adjustments; however, we must make
4805 sure that the stack is properly aligned at all times, in case
4806 an interrupt occurs between the two partial adjustments. */
4807 else if (CONST_OK_FOR_ADD (size / 2 & -align)
4808 && CONST_OK_FOR_ADD (size - (size / 2 & -align)))
4810 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size / 2 & -align)));
4811 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size - (size / 2 & -align))));
4813 else
4815 rtx const_reg;
4816 rtx insn;
4817 int temp = epilogue_p ? 7 : (TARGET_SH5 ? 0 : 1);
4818 int i;
4820 /* If TEMP is invalid, we could temporarily save a general
4821 register to MACL. However, there is currently no need
4822 to handle this case, so just abort when we see it. */
4823 if (epilogue_p < 0
4824 || current_function_interrupt
4825 || ! call_really_used_regs[temp] || fixed_regs[temp])
4826 temp = -1;
4827 if (temp < 0 && ! current_function_interrupt
4828 && (TARGET_SHMEDIA || epilogue_p >= 0))
4830 HARD_REG_SET temps;
4831 COPY_HARD_REG_SET (temps, call_used_reg_set);
4832 AND_COMPL_HARD_REG_SET (temps, call_fixed_reg_set);
4833 if (epilogue_p > 0)
4835 int nreg = 0;
4836 if (current_function_return_rtx)
4838 enum machine_mode mode;
4839 mode = GET_MODE (current_function_return_rtx);
4840 if (BASE_RETURN_VALUE_REG (mode) == FIRST_RET_REG)
4841 nreg = HARD_REGNO_NREGS (FIRST_RET_REG, mode);
4843 for (i = 0; i < nreg; i++)
4844 CLEAR_HARD_REG_BIT (temps, FIRST_RET_REG + i);
4845 if (current_function_calls_eh_return)
4847 CLEAR_HARD_REG_BIT (temps, EH_RETURN_STACKADJ_REGNO);
4848 for (i = 0; i <= 3; i++)
4849 CLEAR_HARD_REG_BIT (temps, EH_RETURN_DATA_REGNO (i));
4852 if (TARGET_SHMEDIA && epilogue_p < 0)
4853 for (i = FIRST_TARGET_REG; i <= LAST_TARGET_REG; i++)
4854 CLEAR_HARD_REG_BIT (temps, i);
4855 if (epilogue_p <= 0)
4857 for (i = FIRST_PARM_REG;
4858 i < FIRST_PARM_REG + NPARM_REGS (SImode); i++)
4859 CLEAR_HARD_REG_BIT (temps, i);
4860 if (cfun->static_chain_decl != NULL)
4861 CLEAR_HARD_REG_BIT (temps, STATIC_CHAIN_REGNUM);
4863 temp = scavenge_reg (&temps);
4865 if (temp < 0 && live_regs_mask)
4866 temp = scavenge_reg (live_regs_mask);
4867 if (temp < 0)
4869 /* If we reached here, the most likely case is the (sibcall)
4870 epilogue for non SHmedia. Put a special push/pop sequence
4871 for such case as the last resort. This looks lengthy but
4872 would not be problem because it seems to be very rare. */
4873 if (! TARGET_SHMEDIA && epilogue_p)
4875 rtx adj_reg, tmp_reg, mem;
4877 /* ??? There is still the slight possibility that r4 or r5
4878 have been reserved as fixed registers or assigned as
4879 global registers, and they change during an interrupt.
4880 There are possible ways to handle this:
4881 - If we are adjusting the frame pointer (r14), we can do
4882 with a single temp register and an ordinary push / pop
4883 on the stack.
4884 - Grab any call-used or call-saved registers (i.e. not
4885 fixed or globals) for the temps we need. We might
4886 also grab r14 if we are adjusting the stack pointer.
4887 If we can't find enough available registers, issue
4888 a diagnostic and abort - the user must have reserved
4889 way too many registers.
4890 But since all this is rather unlikely to happen and
4891 would require extra testing, we just abort if r4 / r5
4892 are not available. */
4893 if (fixed_regs[4] || fixed_regs[5]
4894 || global_regs[4] || global_regs[5])
4895 abort ();
4897 adj_reg = gen_rtx_REG (GET_MODE (reg), 4);
4898 tmp_reg = gen_rtx_REG (GET_MODE (reg), 5);
4899 emit_move_insn (gen_rtx_MEM (Pmode, reg), adj_reg);
4900 emit_insn (GEN_MOV (adj_reg, GEN_INT (size)));
4901 emit_insn (GEN_ADD3 (adj_reg, adj_reg, reg));
4902 mem = gen_rtx_MEM (Pmode, gen_rtx_PRE_DEC (Pmode, adj_reg));
4903 emit_move_insn (mem, tmp_reg);
4904 emit_move_insn (tmp_reg, gen_rtx_MEM (Pmode, reg));
4905 mem = gen_rtx_MEM (Pmode, gen_rtx_PRE_DEC (Pmode, adj_reg));
4906 emit_move_insn (mem, tmp_reg);
4907 emit_move_insn (reg, adj_reg);
4908 mem = gen_rtx_MEM (Pmode, gen_rtx_POST_INC (Pmode, reg));
4909 emit_move_insn (adj_reg, mem);
4910 mem = gen_rtx_MEM (Pmode, gen_rtx_POST_INC (Pmode, reg));
4911 emit_move_insn (tmp_reg, mem);
4912 return;
4914 else
4915 abort ();
4917 const_reg = gen_rtx_REG (GET_MODE (reg), temp);
4919 /* If SIZE is negative, subtract the positive value.
4920 This sometimes allows a constant pool entry to be shared
4921 between prologue and epilogue code. */
4922 if (size < 0)
4924 emit_insn (GEN_MOV (const_reg, GEN_INT (-size)));
4925 insn = emit_fn (GEN_SUB3 (reg, reg, const_reg));
4927 else
4929 emit_insn (GEN_MOV (const_reg, GEN_INT (size)));
4930 insn = emit_fn (GEN_ADD3 (reg, reg, const_reg));
4932 if (! epilogue_p)
4933 REG_NOTES (insn)
4934 = (gen_rtx_EXPR_LIST
4935 (REG_FRAME_RELATED_EXPR,
4936 gen_rtx_SET (VOIDmode, reg,
4937 gen_rtx_PLUS (SImode, reg, GEN_INT (size))),
4938 REG_NOTES (insn)));
4943 static rtx
4944 frame_insn (rtx x)
4946 x = emit_insn (x);
4947 RTX_FRAME_RELATED_P (x) = 1;
4948 return x;
4951 /* Output RTL to push register RN onto the stack. */
4953 static rtx
4954 push (int rn)
4956 rtx x;
4957 if (rn == FPUL_REG)
4958 x = gen_push_fpul ();
4959 else if (rn == FPSCR_REG)
4960 x = gen_push_fpscr ();
4961 else if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && ! TARGET_FPU_SINGLE
4962 && FP_OR_XD_REGISTER_P (rn))
4964 if (FP_REGISTER_P (rn) && (rn - FIRST_FP_REG) & 1)
4965 return NULL_RTX;
4966 x = gen_push_4 (gen_rtx_REG (DFmode, rn));
4968 else if (TARGET_SH2E && FP_REGISTER_P (rn))
4969 x = gen_push_e (gen_rtx_REG (SFmode, rn));
4970 else
4971 x = gen_push (gen_rtx_REG (SImode, rn));
4973 x = frame_insn (x);
4974 REG_NOTES (x)
4975 = gen_rtx_EXPR_LIST (REG_INC,
4976 gen_rtx_REG (SImode, STACK_POINTER_REGNUM), 0);
4977 return x;
4980 /* Output RTL to pop register RN from the stack. */
4982 static void
4983 pop (int rn)
4985 rtx x;
4986 if (rn == FPUL_REG)
4987 x = gen_pop_fpul ();
4988 else if (rn == FPSCR_REG)
4989 x = gen_pop_fpscr ();
4990 else if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && ! TARGET_FPU_SINGLE
4991 && FP_OR_XD_REGISTER_P (rn))
4993 if (FP_REGISTER_P (rn) && (rn - FIRST_FP_REG) & 1)
4994 return;
4995 x = gen_pop_4 (gen_rtx_REG (DFmode, rn));
4997 else if (TARGET_SH2E && FP_REGISTER_P (rn))
4998 x = gen_pop_e (gen_rtx_REG (SFmode, rn));
4999 else
5000 x = gen_pop (gen_rtx_REG (SImode, rn));
5002 x = emit_insn (x);
5003 REG_NOTES (x)
5004 = gen_rtx_EXPR_LIST (REG_INC,
5005 gen_rtx_REG (SImode, STACK_POINTER_REGNUM), 0);
5008 /* Generate code to push the regs specified in the mask. */
5010 static void
5011 push_regs (HARD_REG_SET *mask, int interrupt_handler)
5013 int i;
5014 int skip_fpscr = 0;
5016 /* Push PR last; this gives better latencies after the prologue, and
5017 candidates for the return delay slot when there are no general
5018 registers pushed. */
5019 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
5021 /* If this is an interrupt handler, and the SZ bit varies,
5022 and we have to push any floating point register, we need
5023 to switch to the correct precision first. */
5024 if (i == FIRST_FP_REG && interrupt_handler && TARGET_FMOVD
5025 && hard_regs_intersect_p (mask, &reg_class_contents[DF_REGS]))
5027 HARD_REG_SET unsaved;
5029 push (FPSCR_REG);
5030 COMPL_HARD_REG_SET (unsaved, *mask);
5031 fpscr_set_from_mem (NORMAL_MODE (FP_MODE), unsaved);
5032 skip_fpscr = 1;
5034 if (i != PR_REG
5035 && (i != FPSCR_REG || ! skip_fpscr)
5036 && TEST_HARD_REG_BIT (*mask, i))
5037 push (i);
5039 if (TEST_HARD_REG_BIT (*mask, PR_REG))
5040 push (PR_REG);
5043 /* Calculate how much extra space is needed to save all callee-saved
5044 target registers.
5045 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
5047 static int
5048 shmedia_target_regs_stack_space (HARD_REG_SET *live_regs_mask)
5050 int reg;
5051 int stack_space = 0;
5052 int interrupt_handler = sh_cfun_interrupt_handler_p ();
5054 for (reg = LAST_TARGET_REG; reg >= FIRST_TARGET_REG; reg--)
5055 if ((! call_really_used_regs[reg] || interrupt_handler)
5056 && ! TEST_HARD_REG_BIT (*live_regs_mask, reg))
5057 /* Leave space to save this target register on the stack,
5058 in case target register allocation wants to use it. */
5059 stack_space += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
5060 return stack_space;
5063 /* Decide whether we should reserve space for callee-save target registers,
5064 in case target register allocation wants to use them. REGS_SAVED is
5065 the space, in bytes, that is already required for register saves.
5066 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
5068 static int
5069 shmedia_reserve_space_for_target_registers_p (int regs_saved,
5070 HARD_REG_SET *live_regs_mask)
5072 if (optimize_size)
5073 return 0;
5074 return shmedia_target_regs_stack_space (live_regs_mask) <= regs_saved;
5077 /* Decide how much space to reserve for callee-save target registers
5078 in case target register allocation wants to use them.
5079 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
5081 static int
5082 shmedia_target_regs_stack_adjust (HARD_REG_SET *live_regs_mask)
5084 if (shmedia_space_reserved_for_target_registers)
5085 return shmedia_target_regs_stack_space (live_regs_mask);
5086 else
5087 return 0;
5090 /* Work out the registers which need to be saved, both as a mask and a
5091 count of saved words. Return the count.
5093 If doing a pragma interrupt function, then push all regs used by the
5094 function, and if we call another function (we can tell by looking at PR),
5095 make sure that all the regs it clobbers are safe too. */
5097 static int
5098 calc_live_regs (HARD_REG_SET *live_regs_mask)
5100 int reg;
5101 int count;
5102 int interrupt_handler;
5103 int pr_live, has_call;
5105 interrupt_handler = sh_cfun_interrupt_handler_p ();
5107 CLEAR_HARD_REG_SET (*live_regs_mask);
5108 if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && interrupt_handler
5109 && regs_ever_live[FPSCR_REG])
5110 target_flags &= ~FPU_SINGLE_BIT;
5111 /* If we can save a lot of saves by switching to double mode, do that. */
5112 else if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && TARGET_FPU_SINGLE)
5113 for (count = 0, reg = FIRST_FP_REG; reg <= LAST_FP_REG; reg += 2)
5114 if (regs_ever_live[reg] && regs_ever_live[reg+1]
5115 && (! call_really_used_regs[reg]
5116 || (interrupt_handler && ! pragma_trapa))
5117 && ++count > 2)
5119 target_flags &= ~FPU_SINGLE_BIT;
5120 break;
5122 /* PR_MEDIA_REG is a general purpose register, thus global_alloc already
5123 knows how to use it. That means the pseudo originally allocated for
5124 the initial value can become the PR_MEDIA_REG hard register, as seen for
5125 execute/20010122-1.c:test9. */
5126 if (TARGET_SHMEDIA)
5127 /* ??? this function is called from initial_elimination_offset, hence we
5128 can't use the result of sh_media_register_for_return here. */
5129 pr_live = sh_pr_n_sets ();
5130 else
5132 rtx pr_initial = has_hard_reg_initial_val (Pmode, PR_REG);
5133 pr_live = (pr_initial
5134 ? (GET_CODE (pr_initial) != REG
5135 || REGNO (pr_initial) != (PR_REG))
5136 : regs_ever_live[PR_REG]);
5137 /* For Shcompact, if not optimizing, we end up with a memory reference
5138 using the return address pointer for __builtin_return_address even
5139 though there is no actual need to put the PR register on the stack. */
5140 pr_live |= regs_ever_live[RETURN_ADDRESS_POINTER_REGNUM];
5142 /* Force PR to be live if the prologue has to call the SHmedia
5143 argument decoder or register saver. */
5144 if (TARGET_SHCOMPACT
5145 && ((current_function_args_info.call_cookie
5146 & ~ CALL_COOKIE_RET_TRAMP (1))
5147 || current_function_has_nonlocal_label))
5148 pr_live = 1;
5149 has_call = TARGET_SHMEDIA ? ! leaf_function_p () : pr_live;
5150 for (count = 0, reg = FIRST_PSEUDO_REGISTER - 1; reg >= 0; reg--)
5152 if (reg == (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG)
5153 ? pr_live
5154 : (interrupt_handler && ! pragma_trapa)
5155 ? (/* Need to save all the regs ever live. */
5156 (regs_ever_live[reg]
5157 || (call_really_used_regs[reg]
5158 && (! fixed_regs[reg] || reg == MACH_REG || reg == MACL_REG
5159 || reg == PIC_OFFSET_TABLE_REGNUM)
5160 && has_call)
5161 || (has_call && REGISTER_NATURAL_MODE (reg) == SImode
5162 && (GENERAL_REGISTER_P (reg) || TARGET_REGISTER_P (reg))))
5163 && reg != STACK_POINTER_REGNUM && reg != ARG_POINTER_REGNUM
5164 && reg != RETURN_ADDRESS_POINTER_REGNUM
5165 && reg != T_REG && reg != GBR_REG
5166 /* Push fpscr only on targets which have FPU */
5167 && (reg != FPSCR_REG || TARGET_FPU_ANY))
5168 : (/* Only push those regs which are used and need to be saved. */
5169 (TARGET_SHCOMPACT
5170 && flag_pic
5171 && current_function_args_info.call_cookie
5172 && reg == (int) PIC_OFFSET_TABLE_REGNUM)
5173 || (regs_ever_live[reg] && ! call_really_used_regs[reg])
5174 || (current_function_calls_eh_return
5175 && (reg == (int) EH_RETURN_DATA_REGNO (0)
5176 || reg == (int) EH_RETURN_DATA_REGNO (1)
5177 || reg == (int) EH_RETURN_DATA_REGNO (2)
5178 || reg == (int) EH_RETURN_DATA_REGNO (3)))
5179 || ((reg == MACL_REG || reg == MACH_REG)
5180 && regs_ever_live[reg]
5181 && sh_cfun_attr_renesas_p ())
5184 SET_HARD_REG_BIT (*live_regs_mask, reg);
5185 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
5187 if ((TARGET_SH4 || TARGET_SH2A_DOUBLE || TARGET_SH5) && TARGET_FMOVD
5188 && GET_MODE_CLASS (REGISTER_NATURAL_MODE (reg)) == MODE_FLOAT)
5190 if (FP_REGISTER_P (reg))
5192 if (! TARGET_FPU_SINGLE && ! regs_ever_live[reg ^ 1])
5194 SET_HARD_REG_BIT (*live_regs_mask, (reg ^ 1));
5195 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg ^ 1));
5198 else if (XD_REGISTER_P (reg))
5200 /* Must switch to double mode to access these registers. */
5201 target_flags &= ~FPU_SINGLE_BIT;
5206 /* If we have a target register optimization pass after prologue / epilogue
5207 threading, we need to assume all target registers will be live even if
5208 they aren't now. */
5209 if (flag_branch_target_load_optimize2
5210 && TARGET_SAVE_ALL_TARGET_REGS
5211 && shmedia_space_reserved_for_target_registers)
5212 for (reg = LAST_TARGET_REG; reg >= FIRST_TARGET_REG; reg--)
5213 if ((! call_really_used_regs[reg] || interrupt_handler)
5214 && ! TEST_HARD_REG_BIT (*live_regs_mask, reg))
5216 SET_HARD_REG_BIT (*live_regs_mask, reg);
5217 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
5219 /* If this is an interrupt handler, we don't have any call-clobbered
5220 registers we can conveniently use for target register save/restore.
5221 Make sure we save at least one general purpose register when we need
5222 to save target registers. */
5223 if (interrupt_handler
5224 && hard_regs_intersect_p (live_regs_mask,
5225 &reg_class_contents[TARGET_REGS])
5226 && ! hard_regs_intersect_p (live_regs_mask,
5227 &reg_class_contents[GENERAL_REGS]))
5229 SET_HARD_REG_BIT (*live_regs_mask, R0_REG);
5230 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (R0_REG));
5233 return count;
5236 /* Code to generate prologue and epilogue sequences */
5238 /* PUSHED is the number of bytes that are being pushed on the
5239 stack for register saves. Return the frame size, padded
5240 appropriately so that the stack stays properly aligned. */
5241 static HOST_WIDE_INT
5242 rounded_frame_size (int pushed)
5244 HOST_WIDE_INT size = get_frame_size ();
5245 HOST_WIDE_INT align = STACK_BOUNDARY / BITS_PER_UNIT;
5247 return ((size + pushed + align - 1) & -align) - pushed;
5250 /* Choose a call-clobbered target-branch register that remains
5251 unchanged along the whole function. We set it up as the return
5252 value in the prologue. */
5254 sh_media_register_for_return (void)
5256 int regno;
5257 int tr0_used;
5259 if (! current_function_is_leaf)
5260 return -1;
5261 if (lookup_attribute ("interrupt_handler",
5262 DECL_ATTRIBUTES (current_function_decl)))
5263 return -1;
5265 tr0_used = flag_pic && regs_ever_live[PIC_OFFSET_TABLE_REGNUM];
5267 for (regno = FIRST_TARGET_REG + tr0_used; regno <= LAST_TARGET_REG; regno++)
5268 if (call_really_used_regs[regno] && ! regs_ever_live[regno])
5269 return regno;
5271 return -1;
5274 /* The maximum registers we need to save are:
5275 - 62 general purpose registers (r15 is stack pointer, r63 is zero)
5276 - 32 floating point registers (for each pair, we save none,
5277 one single precision value, or a double precision value).
5278 - 8 target registers
5279 - add 1 entry for a delimiter. */
5280 #define MAX_SAVED_REGS (62+32+8)
5282 typedef struct save_entry_s
5284 unsigned char reg;
5285 unsigned char mode;
5286 short offset;
5287 } save_entry;
5289 #define MAX_TEMPS 4
5291 /* There will be a delimiter entry with VOIDmode both at the start and the
5292 end of a filled in schedule. The end delimiter has the offset of the
5293 save with the smallest (i.e. most negative) offset. */
5294 typedef struct save_schedule_s
5296 save_entry entries[MAX_SAVED_REGS + 2];
5297 int temps[MAX_TEMPS+1];
5298 } save_schedule;
5300 /* Fill in SCHEDULE according to LIVE_REGS_MASK. If RESTORE is nonzero,
5301 use reverse order. Returns the last entry written to (not counting
5302 the delimiter). OFFSET_BASE is a number to be added to all offset
5303 entries. */
5305 static save_entry *
5306 sh5_schedule_saves (HARD_REG_SET *live_regs_mask, save_schedule *schedule,
5307 int offset_base)
5309 int align, i;
5310 save_entry *entry = schedule->entries;
5311 int tmpx = 0;
5312 int offset;
5314 if (! current_function_interrupt)
5315 for (i = FIRST_GENERAL_REG; tmpx < MAX_TEMPS && i <= LAST_GENERAL_REG; i++)
5316 if (call_really_used_regs[i] && ! fixed_regs[i] && i != PR_MEDIA_REG
5317 && ! FUNCTION_ARG_REGNO_P (i)
5318 && i != FIRST_RET_REG
5319 && ! (cfun->static_chain_decl != NULL && i == STATIC_CHAIN_REGNUM)
5320 && ! (current_function_calls_eh_return
5321 && (i == EH_RETURN_STACKADJ_REGNO
5322 || ((unsigned) i <= EH_RETURN_DATA_REGNO (0)
5323 && (unsigned) i >= EH_RETURN_DATA_REGNO (3)))))
5324 schedule->temps[tmpx++] = i;
5325 entry->reg = -1;
5326 entry->mode = VOIDmode;
5327 entry->offset = offset_base;
5328 entry++;
5329 /* We loop twice: first, we save 8-byte aligned registers in the
5330 higher addresses, that are known to be aligned. Then, we
5331 proceed to saving 32-bit registers that don't need 8-byte
5332 alignment.
5333 If this is an interrupt function, all registers that need saving
5334 need to be saved in full. moreover, we need to postpone saving
5335 target registers till we have saved some general purpose registers
5336 we can then use as scratch registers. */
5337 offset = offset_base;
5338 for (align = 1; align >= 0; align--)
5340 for (i = FIRST_PSEUDO_REGISTER - 1; i >= 0; i--)
5341 if (TEST_HARD_REG_BIT (*live_regs_mask, i))
5343 enum machine_mode mode = REGISTER_NATURAL_MODE (i);
5344 int reg = i;
5346 if (current_function_interrupt)
5348 if (TARGET_REGISTER_P (i))
5349 continue;
5350 if (GENERAL_REGISTER_P (i))
5351 mode = DImode;
5353 if (mode == SFmode && (i % 2) == 1
5354 && ! TARGET_FPU_SINGLE && FP_REGISTER_P (i)
5355 && (TEST_HARD_REG_BIT (*live_regs_mask, (i ^ 1))))
5357 mode = DFmode;
5358 i--;
5359 reg--;
5362 /* If we're doing the aligned pass and this is not aligned,
5363 or we're doing the unaligned pass and this is aligned,
5364 skip it. */
5365 if ((GET_MODE_SIZE (mode) % (STACK_BOUNDARY / BITS_PER_UNIT) == 0)
5366 != align)
5367 continue;
5369 if (current_function_interrupt
5370 && GENERAL_REGISTER_P (i)
5371 && tmpx < MAX_TEMPS)
5372 schedule->temps[tmpx++] = i;
5374 offset -= GET_MODE_SIZE (mode);
5375 entry->reg = i;
5376 entry->mode = mode;
5377 entry->offset = offset;
5378 entry++;
5380 if (align && current_function_interrupt)
5381 for (i = LAST_TARGET_REG; i >= FIRST_TARGET_REG; i--)
5382 if (TEST_HARD_REG_BIT (*live_regs_mask, i))
5384 offset -= GET_MODE_SIZE (DImode);
5385 entry->reg = i;
5386 entry->mode = DImode;
5387 entry->offset = offset;
5388 entry++;
5391 entry->reg = -1;
5392 entry->mode = VOIDmode;
5393 entry->offset = offset;
5394 schedule->temps[tmpx] = -1;
5395 return entry - 1;
5398 void
5399 sh_expand_prologue (void)
5401 HARD_REG_SET live_regs_mask;
5402 int d, i;
5403 int d_rounding = 0;
5404 int save_flags = target_flags;
5405 int pretend_args;
5407 current_function_interrupt = sh_cfun_interrupt_handler_p ();
5409 /* We have pretend args if we had an object sent partially in registers
5410 and partially on the stack, e.g. a large structure. */
5411 pretend_args = current_function_pretend_args_size;
5412 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl)
5413 && (NPARM_REGS(SImode)
5414 > current_function_args_info.arg_count[(int) SH_ARG_INT]))
5415 pretend_args = 0;
5416 output_stack_adjust (-pretend_args
5417 - current_function_args_info.stack_regs * 8,
5418 stack_pointer_rtx, 0, NULL);
5420 if (TARGET_SHCOMPACT && flag_pic && current_function_args_info.call_cookie)
5421 /* We're going to use the PIC register to load the address of the
5422 incoming-argument decoder and/or of the return trampoline from
5423 the GOT, so make sure the PIC register is preserved and
5424 initialized. */
5425 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
5427 if (TARGET_SHCOMPACT
5428 && (current_function_args_info.call_cookie & ~ CALL_COOKIE_RET_TRAMP(1)))
5430 int reg;
5432 /* First, make all registers with incoming arguments that will
5433 be pushed onto the stack live, so that register renaming
5434 doesn't overwrite them. */
5435 for (reg = 0; reg < NPARM_REGS (SImode); reg++)
5436 if (CALL_COOKIE_STACKSEQ_GET (current_function_args_info.call_cookie)
5437 >= NPARM_REGS (SImode) - reg)
5438 for (; reg < NPARM_REGS (SImode); reg++)
5439 emit_insn (gen_shcompact_preserve_incoming_args
5440 (gen_rtx_REG (SImode, FIRST_PARM_REG + reg)));
5441 else if (CALL_COOKIE_INT_REG_GET
5442 (current_function_args_info.call_cookie, reg) == 1)
5443 emit_insn (gen_shcompact_preserve_incoming_args
5444 (gen_rtx_REG (SImode, FIRST_PARM_REG + reg)));
5446 emit_move_insn (gen_rtx_REG (Pmode, MACL_REG),
5447 stack_pointer_rtx);
5448 emit_move_insn (gen_rtx_REG (SImode, R0_REG),
5449 GEN_INT (current_function_args_info.call_cookie));
5450 emit_move_insn (gen_rtx_REG (SImode, MACH_REG),
5451 gen_rtx_REG (SImode, R0_REG));
5453 else if (TARGET_SHMEDIA)
5455 int tr = sh_media_register_for_return ();
5457 if (tr >= 0)
5459 rtx insn = emit_move_insn (gen_rtx_REG (DImode, tr),
5460 gen_rtx_REG (DImode, PR_MEDIA_REG));
5462 /* ??? We should suppress saving pr when we don't need it, but this
5463 is tricky because of builtin_return_address. */
5465 /* If this function only exits with sibcalls, this copy
5466 will be flagged as dead. */
5467 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD,
5468 const0_rtx,
5469 REG_NOTES (insn));
5473 /* Emit the code for SETUP_VARARGS. */
5474 if (current_function_stdarg)
5476 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl))
5478 /* Push arg regs as if they'd been provided by caller in stack. */
5479 for (i = 0; i < NPARM_REGS(SImode); i++)
5481 int rn = NPARM_REGS(SImode) + FIRST_PARM_REG - i - 1;
5482 rtx insn;
5484 if (i >= (NPARM_REGS(SImode)
5485 - current_function_args_info.arg_count[(int) SH_ARG_INT]
5487 break;
5488 insn = push (rn);
5489 RTX_FRAME_RELATED_P (insn) = 0;
5494 /* If we're supposed to switch stacks at function entry, do so now. */
5495 if (sp_switch)
5496 emit_insn (gen_sp_switch_1 ());
5498 d = calc_live_regs (&live_regs_mask);
5499 /* ??? Maybe we could save some switching if we can move a mode switch
5500 that already happens to be at the function start into the prologue. */
5501 if (target_flags != save_flags && ! current_function_interrupt)
5502 emit_insn (gen_toggle_sz ());
5504 if (TARGET_SH5)
5506 int offset_base, offset;
5507 rtx r0 = NULL_RTX;
5508 int offset_in_r0 = -1;
5509 int sp_in_r0 = 0;
5510 int tregs_space = shmedia_target_regs_stack_adjust (&live_regs_mask);
5511 int total_size, save_size;
5512 save_schedule schedule;
5513 save_entry *entry;
5514 int *tmp_pnt;
5516 if (call_really_used_regs[R0_REG] && ! fixed_regs[R0_REG]
5517 && ! current_function_interrupt)
5518 r0 = gen_rtx_REG (Pmode, R0_REG);
5520 /* D is the actual number of bytes that we need for saving registers,
5521 however, in initial_elimination_offset we have committed to using
5522 an additional TREGS_SPACE amount of bytes - in order to keep both
5523 addresses to arguments supplied by the caller and local variables
5524 valid, we must keep this gap. Place it between the incoming
5525 arguments and the actually saved registers in a bid to optimize
5526 locality of reference. */
5527 total_size = d + tregs_space;
5528 total_size += rounded_frame_size (total_size);
5529 save_size = total_size - rounded_frame_size (d);
5530 if (save_size % (STACK_BOUNDARY / BITS_PER_UNIT))
5531 d_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
5532 - save_size % (STACK_BOUNDARY / BITS_PER_UNIT));
5534 /* If adjusting the stack in a single step costs nothing extra, do so.
5535 I.e. either if a single addi is enough, or we need a movi anyway,
5536 and we don't exceed the maximum offset range (the test for the
5537 latter is conservative for simplicity). */
5538 if (TARGET_SHMEDIA
5539 && (CONST_OK_FOR_I10 (-total_size)
5540 || (! CONST_OK_FOR_I10 (-(save_size + d_rounding))
5541 && total_size <= 2044)))
5542 d_rounding = total_size - save_size;
5544 offset_base = d + d_rounding;
5546 output_stack_adjust (-(save_size + d_rounding), stack_pointer_rtx,
5547 0, NULL);
5549 sh5_schedule_saves (&live_regs_mask, &schedule, offset_base);
5550 tmp_pnt = schedule.temps;
5551 for (entry = &schedule.entries[1]; entry->mode != VOIDmode; entry++)
5553 enum machine_mode mode = entry->mode;
5554 int reg = entry->reg;
5555 rtx reg_rtx, mem_rtx, pre_dec = NULL_RTX;
5557 offset = entry->offset;
5559 reg_rtx = gen_rtx_REG (mode, reg);
5561 mem_rtx = gen_rtx_MEM (mode,
5562 gen_rtx_PLUS (Pmode,
5563 stack_pointer_rtx,
5564 GEN_INT (offset)));
5566 GO_IF_LEGITIMATE_ADDRESS (mode, XEXP (mem_rtx, 0), try_pre_dec);
5568 if (! r0)
5569 abort ();
5570 mem_rtx = NULL_RTX;
5572 try_pre_dec:
5574 if (HAVE_PRE_DECREMENT
5575 && (offset_in_r0 - offset == GET_MODE_SIZE (mode)
5576 || mem_rtx == NULL_RTX
5577 || reg == PR_REG || SPECIAL_REGISTER_P (reg)))
5579 pre_dec = gen_rtx_MEM (mode,
5580 gen_rtx_PRE_DEC (Pmode, r0));
5582 GO_IF_LEGITIMATE_ADDRESS (mode, XEXP (pre_dec, 0),
5583 pre_dec_ok);
5585 pre_dec = NULL_RTX;
5587 break;
5589 pre_dec_ok:
5590 mem_rtx = NULL_RTX;
5591 offset += GET_MODE_SIZE (mode);
5593 while (0);
5595 if (mem_rtx != NULL_RTX)
5596 goto addr_ok;
5598 if (offset_in_r0 == -1)
5600 emit_move_insn (r0, GEN_INT (offset));
5601 offset_in_r0 = offset;
5603 else if (offset != offset_in_r0)
5605 emit_move_insn (r0,
5606 gen_rtx_PLUS
5607 (Pmode, r0,
5608 GEN_INT (offset - offset_in_r0)));
5609 offset_in_r0 += offset - offset_in_r0;
5612 if (pre_dec != NULL_RTX)
5614 if (! sp_in_r0)
5616 emit_move_insn (r0,
5617 gen_rtx_PLUS
5618 (Pmode, r0, stack_pointer_rtx));
5619 sp_in_r0 = 1;
5622 offset -= GET_MODE_SIZE (mode);
5623 offset_in_r0 -= GET_MODE_SIZE (mode);
5625 mem_rtx = pre_dec;
5627 else if (sp_in_r0)
5628 mem_rtx = gen_rtx_MEM (mode, r0);
5629 else
5630 mem_rtx = gen_rtx_MEM (mode,
5631 gen_rtx_PLUS (Pmode,
5632 stack_pointer_rtx,
5633 r0));
5635 /* We must not use an r0-based address for target-branch
5636 registers or for special registers without pre-dec
5637 memory addresses, since we store their values in r0
5638 first. */
5639 if (TARGET_REGISTER_P (reg)
5640 || ((reg == PR_REG || SPECIAL_REGISTER_P (reg))
5641 && mem_rtx != pre_dec))
5642 abort ();
5644 addr_ok:
5645 if (TARGET_REGISTER_P (reg)
5646 || ((reg == PR_REG || SPECIAL_REGISTER_P (reg))
5647 && mem_rtx != pre_dec))
5649 rtx tmp_reg = gen_rtx_REG (GET_MODE (reg_rtx), *tmp_pnt);
5651 emit_move_insn (tmp_reg, reg_rtx);
5653 if (REGNO (tmp_reg) == R0_REG)
5655 offset_in_r0 = -1;
5656 sp_in_r0 = 0;
5657 if (refers_to_regno_p (R0_REG, R0_REG+1, mem_rtx, (rtx *) 0))
5658 abort ();
5661 if (*++tmp_pnt <= 0)
5662 tmp_pnt = schedule.temps;
5664 reg_rtx = tmp_reg;
5667 rtx insn;
5669 /* Mark as interesting for dwarf cfi generator */
5670 insn = emit_move_insn (mem_rtx, reg_rtx);
5671 RTX_FRAME_RELATED_P (insn) = 1;
5673 if (TARGET_SHCOMPACT && (offset_in_r0 != -1))
5675 rtx reg_rtx = gen_rtx_REG (mode, reg);
5676 rtx set, note_rtx;
5677 rtx mem_rtx = gen_rtx_MEM (mode,
5678 gen_rtx_PLUS (Pmode,
5679 stack_pointer_rtx,
5680 GEN_INT (offset)));
5682 set = gen_rtx_SET (VOIDmode, mem_rtx, reg_rtx);
5683 note_rtx = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, set,
5684 REG_NOTES (insn));
5685 REG_NOTES (insn) = note_rtx;
5690 if (entry->offset != d_rounding)
5691 abort ();
5693 else
5694 push_regs (&live_regs_mask, current_function_interrupt);
5696 if (flag_pic && regs_ever_live[PIC_OFFSET_TABLE_REGNUM])
5698 rtx insn = get_last_insn ();
5699 rtx last = emit_insn (gen_GOTaddr2picreg ());
5701 /* Mark these insns as possibly dead. Sometimes, flow2 may
5702 delete all uses of the PIC register. In this case, let it
5703 delete the initialization too. */
5706 insn = NEXT_INSN (insn);
5708 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD,
5709 const0_rtx,
5710 REG_NOTES (insn));
5712 while (insn != last);
5715 if (SHMEDIA_REGS_STACK_ADJUST ())
5717 emit_move_insn (gen_rtx_REG (Pmode, R0_REG),
5718 function_symbol (TARGET_FPU_ANY
5719 ? "__GCC_push_shmedia_regs"
5720 : "__GCC_push_shmedia_regs_nofpu"));
5721 /* This must NOT go through the PLT, otherwise mach and macl
5722 may be clobbered. */
5723 emit_insn (gen_shmedia_save_restore_regs_compact
5724 (GEN_INT (-SHMEDIA_REGS_STACK_ADJUST ())));
5727 if (target_flags != save_flags && ! current_function_interrupt)
5729 rtx insn = emit_insn (gen_toggle_sz ());
5731 /* If we're lucky, a mode switch in the function body will
5732 overwrite fpscr, turning this insn dead. Tell flow this
5733 insn is ok to delete. */
5734 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD,
5735 const0_rtx,
5736 REG_NOTES (insn));
5739 target_flags = save_flags;
5741 output_stack_adjust (-rounded_frame_size (d) + d_rounding,
5742 stack_pointer_rtx, 0, NULL);
5744 if (frame_pointer_needed)
5745 frame_insn (GEN_MOV (frame_pointer_rtx, stack_pointer_rtx));
5747 if (TARGET_SHCOMPACT
5748 && (current_function_args_info.call_cookie & ~ CALL_COOKIE_RET_TRAMP(1)))
5750 /* This must NOT go through the PLT, otherwise mach and macl
5751 may be clobbered. */
5752 emit_move_insn (gen_rtx_REG (Pmode, R0_REG),
5753 function_symbol ("__GCC_shcompact_incoming_args"));
5754 emit_insn (gen_shcompact_incoming_args ());
5758 void
5759 sh_expand_epilogue (bool sibcall_p)
5761 HARD_REG_SET live_regs_mask;
5762 int d, i;
5763 int d_rounding = 0;
5765 int save_flags = target_flags;
5766 int frame_size, save_size;
5767 int fpscr_deferred = 0;
5768 int e = sibcall_p ? -1 : 1;
5770 d = calc_live_regs (&live_regs_mask);
5772 save_size = d;
5773 frame_size = rounded_frame_size (d);
5775 if (TARGET_SH5)
5777 int tregs_space = shmedia_target_regs_stack_adjust (&live_regs_mask);
5778 int total_size;
5779 if (d % (STACK_BOUNDARY / BITS_PER_UNIT))
5780 d_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
5781 - d % (STACK_BOUNDARY / BITS_PER_UNIT));
5783 total_size = d + tregs_space;
5784 total_size += rounded_frame_size (total_size);
5785 save_size = total_size - frame_size;
5787 /* If adjusting the stack in a single step costs nothing extra, do so.
5788 I.e. either if a single addi is enough, or we need a movi anyway,
5789 and we don't exceed the maximum offset range (the test for the
5790 latter is conservative for simplicity). */
5791 if (TARGET_SHMEDIA
5792 && ! frame_pointer_needed
5793 && (CONST_OK_FOR_I10 (total_size)
5794 || (! CONST_OK_FOR_I10 (save_size + d_rounding)
5795 && total_size <= 2044)))
5796 d_rounding = frame_size;
5798 frame_size -= d_rounding;
5801 if (frame_pointer_needed)
5803 output_stack_adjust (frame_size, frame_pointer_rtx, e, &live_regs_mask);
5805 /* We must avoid moving the stack pointer adjustment past code
5806 which reads from the local frame, else an interrupt could
5807 occur after the SP adjustment and clobber data in the local
5808 frame. */
5809 emit_insn (gen_blockage ());
5810 emit_insn (GEN_MOV (stack_pointer_rtx, frame_pointer_rtx));
5812 else if (frame_size)
5814 /* We must avoid moving the stack pointer adjustment past code
5815 which reads from the local frame, else an interrupt could
5816 occur after the SP adjustment and clobber data in the local
5817 frame. */
5818 emit_insn (gen_blockage ());
5819 output_stack_adjust (frame_size, stack_pointer_rtx, e, &live_regs_mask);
5822 if (SHMEDIA_REGS_STACK_ADJUST ())
5824 emit_move_insn (gen_rtx_REG (Pmode, R0_REG),
5825 function_symbol (TARGET_FPU_ANY
5826 ? "__GCC_pop_shmedia_regs"
5827 : "__GCC_pop_shmedia_regs_nofpu"));
5828 /* This must NOT go through the PLT, otherwise mach and macl
5829 may be clobbered. */
5830 emit_insn (gen_shmedia_save_restore_regs_compact
5831 (GEN_INT (SHMEDIA_REGS_STACK_ADJUST ())));
5834 /* Pop all the registers. */
5836 if (target_flags != save_flags && ! current_function_interrupt)
5837 emit_insn (gen_toggle_sz ());
5838 if (TARGET_SH5)
5840 int offset_base, offset;
5841 int offset_in_r0 = -1;
5842 int sp_in_r0 = 0;
5843 rtx r0 = gen_rtx_REG (Pmode, R0_REG);
5844 save_schedule schedule;
5845 save_entry *entry;
5846 int *tmp_pnt;
5848 entry = sh5_schedule_saves (&live_regs_mask, &schedule, d_rounding);
5849 offset_base = -entry[1].offset + d_rounding;
5850 tmp_pnt = schedule.temps;
5851 for (; entry->mode != VOIDmode; entry--)
5853 enum machine_mode mode = entry->mode;
5854 int reg = entry->reg;
5855 rtx reg_rtx, mem_rtx, post_inc = NULL_RTX, insn;
5857 offset = offset_base + entry->offset;
5858 reg_rtx = gen_rtx_REG (mode, reg);
5860 mem_rtx = gen_rtx_MEM (mode,
5861 gen_rtx_PLUS (Pmode,
5862 stack_pointer_rtx,
5863 GEN_INT (offset)));
5865 GO_IF_LEGITIMATE_ADDRESS (mode, XEXP (mem_rtx, 0), try_post_inc);
5867 mem_rtx = NULL_RTX;
5869 try_post_inc:
5871 if (HAVE_POST_INCREMENT
5872 && (offset == offset_in_r0
5873 || (offset + GET_MODE_SIZE (mode) != d + d_rounding
5874 && mem_rtx == NULL_RTX)
5875 || reg == PR_REG || SPECIAL_REGISTER_P (reg)))
5877 post_inc = gen_rtx_MEM (mode,
5878 gen_rtx_POST_INC (Pmode, r0));
5880 GO_IF_LEGITIMATE_ADDRESS (mode, XEXP (post_inc, 0),
5881 post_inc_ok);
5883 post_inc = NULL_RTX;
5885 break;
5887 post_inc_ok:
5888 mem_rtx = NULL_RTX;
5890 while (0);
5892 if (mem_rtx != NULL_RTX)
5893 goto addr_ok;
5895 if (offset_in_r0 == -1)
5897 emit_move_insn (r0, GEN_INT (offset));
5898 offset_in_r0 = offset;
5900 else if (offset != offset_in_r0)
5902 emit_move_insn (r0,
5903 gen_rtx_PLUS
5904 (Pmode, r0,
5905 GEN_INT (offset - offset_in_r0)));
5906 offset_in_r0 += offset - offset_in_r0;
5909 if (post_inc != NULL_RTX)
5911 if (! sp_in_r0)
5913 emit_move_insn (r0,
5914 gen_rtx_PLUS
5915 (Pmode, r0, stack_pointer_rtx));
5916 sp_in_r0 = 1;
5919 mem_rtx = post_inc;
5921 offset_in_r0 += GET_MODE_SIZE (mode);
5923 else if (sp_in_r0)
5924 mem_rtx = gen_rtx_MEM (mode, r0);
5925 else
5926 mem_rtx = gen_rtx_MEM (mode,
5927 gen_rtx_PLUS (Pmode,
5928 stack_pointer_rtx,
5929 r0));
5931 if ((reg == PR_REG || SPECIAL_REGISTER_P (reg))
5932 && mem_rtx != post_inc)
5933 abort ();
5935 addr_ok:
5936 if ((reg == PR_REG || SPECIAL_REGISTER_P (reg))
5937 && mem_rtx != post_inc)
5939 insn = emit_move_insn (r0, mem_rtx);
5940 mem_rtx = r0;
5942 else if (TARGET_REGISTER_P (reg))
5944 rtx tmp_reg = gen_rtx_REG (mode, *tmp_pnt);
5946 /* Give the scheduler a bit of freedom by using up to
5947 MAX_TEMPS registers in a round-robin fashion. */
5948 insn = emit_move_insn (tmp_reg, mem_rtx);
5949 mem_rtx = tmp_reg;
5950 if (*++tmp_pnt < 0)
5951 tmp_pnt = schedule.temps;
5954 insn = emit_move_insn (reg_rtx, mem_rtx);
5955 if (reg == PR_MEDIA_REG && sh_media_register_for_return () >= 0)
5956 /* This is dead, unless we return with a sibcall. */
5957 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD,
5958 const0_rtx,
5959 REG_NOTES (insn));
5962 if (entry->offset + offset_base != d + d_rounding)
5963 abort ();
5965 else /* ! TARGET_SH5 */
5967 save_size = 0;
5968 if (TEST_HARD_REG_BIT (live_regs_mask, PR_REG))
5969 pop (PR_REG);
5970 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
5972 int j = (FIRST_PSEUDO_REGISTER - 1) - i;
5974 if (j == FPSCR_REG && current_function_interrupt && TARGET_FMOVD
5975 && hard_regs_intersect_p (&live_regs_mask,
5976 &reg_class_contents[DF_REGS]))
5977 fpscr_deferred = 1;
5978 else if (j != PR_REG && TEST_HARD_REG_BIT (live_regs_mask, j))
5979 pop (j);
5980 if (j == FIRST_FP_REG && fpscr_deferred)
5981 pop (FPSCR_REG);
5985 if (target_flags != save_flags && ! current_function_interrupt)
5986 emit_insn (gen_toggle_sz ());
5987 target_flags = save_flags;
5989 output_stack_adjust (current_function_pretend_args_size
5990 + save_size + d_rounding
5991 + current_function_args_info.stack_regs * 8,
5992 stack_pointer_rtx, e, NULL);
5994 if (current_function_calls_eh_return)
5995 emit_insn (GEN_ADD3 (stack_pointer_rtx, stack_pointer_rtx,
5996 EH_RETURN_STACKADJ_RTX));
5998 /* Switch back to the normal stack if necessary. */
5999 if (sp_switch)
6000 emit_insn (gen_sp_switch_2 ());
6002 /* Tell flow the insn that pops PR isn't dead. */
6003 /* PR_REG will never be live in SHmedia mode, and we don't need to
6004 USE PR_MEDIA_REG, since it will be explicitly copied to TR0_REG
6005 by the return pattern. */
6006 if (TEST_HARD_REG_BIT (live_regs_mask, PR_REG))
6007 emit_insn (gen_rtx_USE (VOIDmode, gen_rtx_REG (SImode, PR_REG)));
6010 static int sh_need_epilogue_known = 0;
6013 sh_need_epilogue (void)
6015 if (! sh_need_epilogue_known)
6017 rtx epilogue;
6019 start_sequence ();
6020 sh_expand_epilogue (0);
6021 epilogue = get_insns ();
6022 end_sequence ();
6023 sh_need_epilogue_known = (epilogue == NULL ? -1 : 1);
6025 return sh_need_epilogue_known > 0;
6028 /* Emit code to change the current function's return address to RA.
6029 TEMP is available as a scratch register, if needed. */
6031 void
6032 sh_set_return_address (rtx ra, rtx tmp)
6034 HARD_REG_SET live_regs_mask;
6035 int d;
6036 int pr_reg = TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG;
6037 int pr_offset;
6039 d = calc_live_regs (&live_regs_mask);
6041 /* If pr_reg isn't life, we can set it (or the register given in
6042 sh_media_register_for_return) directly. */
6043 if (! TEST_HARD_REG_BIT (live_regs_mask, pr_reg))
6045 rtx rr;
6047 if (TARGET_SHMEDIA)
6049 int rr_regno = sh_media_register_for_return ();
6051 if (rr_regno < 0)
6052 rr_regno = pr_reg;
6054 rr = gen_rtx_REG (DImode, rr_regno);
6056 else
6057 rr = gen_rtx_REG (SImode, pr_reg);
6059 emit_insn (GEN_MOV (rr, ra));
6060 /* Tell flow the register for return isn't dead. */
6061 emit_insn (gen_rtx_USE (VOIDmode, rr));
6062 return;
6065 if (TARGET_SH5)
6067 int offset;
6068 save_schedule schedule;
6069 save_entry *entry;
6071 entry = sh5_schedule_saves (&live_regs_mask, &schedule, 0);
6072 offset = entry[1].offset;
6073 for (; entry->mode != VOIDmode; entry--)
6074 if (entry->reg == pr_reg)
6075 goto found;
6077 /* We can't find pr register. */
6078 abort ();
6080 found:
6081 offset = entry->offset - offset;
6082 pr_offset = (rounded_frame_size (d) + offset
6083 + SHMEDIA_REGS_STACK_ADJUST ());
6085 else
6086 pr_offset = rounded_frame_size (d);
6088 emit_insn (GEN_MOV (tmp, GEN_INT (pr_offset)));
6089 emit_insn (GEN_ADD3 (tmp, tmp, frame_pointer_rtx));
6091 tmp = gen_rtx_MEM (Pmode, tmp);
6092 emit_insn (GEN_MOV (tmp, ra));
6095 /* Clear variables at function end. */
6097 static void
6098 sh_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
6099 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
6101 trap_exit = pragma_interrupt = pragma_trapa = pragma_nosave_low_regs = 0;
6102 sh_need_epilogue_known = 0;
6103 sp_switch = NULL_RTX;
6106 static rtx
6107 sh_builtin_saveregs (void)
6109 /* First unnamed integer register. */
6110 int first_intreg = current_function_args_info.arg_count[(int) SH_ARG_INT];
6111 /* Number of integer registers we need to save. */
6112 int n_intregs = MAX (0, NPARM_REGS (SImode) - first_intreg);
6113 /* First unnamed SFmode float reg */
6114 int first_floatreg = current_function_args_info.arg_count[(int) SH_ARG_FLOAT];
6115 /* Number of SFmode float regs to save. */
6116 int n_floatregs = MAX (0, NPARM_REGS (SFmode) - first_floatreg);
6117 rtx regbuf, fpregs;
6118 int bufsize, regno;
6119 HOST_WIDE_INT alias_set;
6121 if (TARGET_SH5)
6123 if (n_intregs)
6125 int pushregs = n_intregs;
6127 while (pushregs < NPARM_REGS (SImode) - 1
6128 && (CALL_COOKIE_INT_REG_GET
6129 (current_function_args_info.call_cookie,
6130 NPARM_REGS (SImode) - pushregs)
6131 == 1))
6133 current_function_args_info.call_cookie
6134 &= ~ CALL_COOKIE_INT_REG (NPARM_REGS (SImode)
6135 - pushregs, 1);
6136 pushregs++;
6139 if (pushregs == NPARM_REGS (SImode))
6140 current_function_args_info.call_cookie
6141 |= (CALL_COOKIE_INT_REG (0, 1)
6142 | CALL_COOKIE_STACKSEQ (pushregs - 1));
6143 else
6144 current_function_args_info.call_cookie
6145 |= CALL_COOKIE_STACKSEQ (pushregs);
6147 current_function_pretend_args_size += 8 * n_intregs;
6149 if (TARGET_SHCOMPACT)
6150 return const0_rtx;
6153 if (! TARGET_SH2E && ! TARGET_SH4 && ! TARGET_SH5)
6155 error ("__builtin_saveregs not supported by this subtarget");
6156 return const0_rtx;
6159 if (TARGET_SHMEDIA)
6160 n_floatregs = 0;
6162 /* Allocate block of memory for the regs. */
6163 /* ??? If n_intregs + n_floatregs == 0, should we allocate at least 1 byte?
6164 Or can assign_stack_local accept a 0 SIZE argument? */
6165 bufsize = (n_intregs * UNITS_PER_WORD) + (n_floatregs * UNITS_PER_WORD);
6167 if (TARGET_SHMEDIA)
6168 regbuf = gen_rtx_MEM (BLKmode,
6169 gen_rtx_REG (Pmode, ARG_POINTER_REGNUM));
6170 else if (n_floatregs & 1)
6172 rtx addr;
6174 regbuf = assign_stack_local (BLKmode, bufsize + UNITS_PER_WORD, 0);
6175 addr = copy_to_mode_reg (Pmode, XEXP (regbuf, 0));
6176 emit_insn (gen_iorsi3 (addr, addr, GEN_INT (UNITS_PER_WORD)));
6177 regbuf = change_address (regbuf, BLKmode, addr);
6179 else
6180 regbuf = assign_stack_local (BLKmode, bufsize, 0);
6181 alias_set = get_varargs_alias_set ();
6182 set_mem_alias_set (regbuf, alias_set);
6184 /* Save int args.
6185 This is optimized to only save the regs that are necessary. Explicitly
6186 named args need not be saved. */
6187 if (n_intregs > 0)
6188 move_block_from_reg (BASE_ARG_REG (SImode) + first_intreg,
6189 adjust_address (regbuf, BLKmode,
6190 n_floatregs * UNITS_PER_WORD),
6191 n_intregs);
6193 if (TARGET_SHMEDIA)
6194 /* Return the address of the regbuf. */
6195 return XEXP (regbuf, 0);
6197 /* Save float args.
6198 This is optimized to only save the regs that are necessary. Explicitly
6199 named args need not be saved.
6200 We explicitly build a pointer to the buffer because it halves the insn
6201 count when not optimizing (otherwise the pointer is built for each reg
6202 saved).
6203 We emit the moves in reverse order so that we can use predecrement. */
6205 fpregs = gen_reg_rtx (Pmode);
6206 emit_move_insn (fpregs, XEXP (regbuf, 0));
6207 emit_insn (gen_addsi3 (fpregs, fpregs,
6208 GEN_INT (n_floatregs * UNITS_PER_WORD)));
6209 if (TARGET_SH4 || TARGET_SH2A_DOUBLE)
6211 rtx mem;
6212 for (regno = NPARM_REGS (DFmode) - 2; regno >= first_floatreg; regno -= 2)
6214 emit_insn (gen_addsi3 (fpregs, fpregs,
6215 GEN_INT (-2 * UNITS_PER_WORD)));
6216 mem = gen_rtx_MEM (DFmode, fpregs);
6217 set_mem_alias_set (mem, alias_set);
6218 emit_move_insn (mem,
6219 gen_rtx_REG (DFmode, BASE_ARG_REG (DFmode) + regno));
6221 regno = first_floatreg;
6222 if (regno & 1)
6224 emit_insn (gen_addsi3 (fpregs, fpregs, GEN_INT (-UNITS_PER_WORD)));
6225 mem = gen_rtx_MEM (SFmode, fpregs);
6226 set_mem_alias_set (mem, alias_set);
6227 emit_move_insn (mem,
6228 gen_rtx_REG (SFmode, BASE_ARG_REG (SFmode) + regno
6229 - (TARGET_LITTLE_ENDIAN != 0)));
6232 else
6233 for (regno = NPARM_REGS (SFmode) - 1; regno >= first_floatreg; regno--)
6235 rtx mem;
6237 emit_insn (gen_addsi3 (fpregs, fpregs, GEN_INT (-UNITS_PER_WORD)));
6238 mem = gen_rtx_MEM (SFmode, fpregs);
6239 set_mem_alias_set (mem, alias_set);
6240 emit_move_insn (mem,
6241 gen_rtx_REG (SFmode, BASE_ARG_REG (SFmode) + regno));
6244 /* Return the address of the regbuf. */
6245 return XEXP (regbuf, 0);
6248 /* Define the `__builtin_va_list' type for the ABI. */
6250 static tree
6251 sh_build_builtin_va_list (void)
6253 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
6254 tree record;
6256 if (TARGET_SH5 || (! TARGET_SH2E && ! TARGET_SH4)
6257 || TARGET_HITACHI || sh_cfun_attr_renesas_p ())
6258 return ptr_type_node;
6260 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
6262 f_next_o = build_decl (FIELD_DECL, get_identifier ("__va_next_o"),
6263 ptr_type_node);
6264 f_next_o_limit = build_decl (FIELD_DECL,
6265 get_identifier ("__va_next_o_limit"),
6266 ptr_type_node);
6267 f_next_fp = build_decl (FIELD_DECL, get_identifier ("__va_next_fp"),
6268 ptr_type_node);
6269 f_next_fp_limit = build_decl (FIELD_DECL,
6270 get_identifier ("__va_next_fp_limit"),
6271 ptr_type_node);
6272 f_next_stack = build_decl (FIELD_DECL, get_identifier ("__va_next_stack"),
6273 ptr_type_node);
6275 DECL_FIELD_CONTEXT (f_next_o) = record;
6276 DECL_FIELD_CONTEXT (f_next_o_limit) = record;
6277 DECL_FIELD_CONTEXT (f_next_fp) = record;
6278 DECL_FIELD_CONTEXT (f_next_fp_limit) = record;
6279 DECL_FIELD_CONTEXT (f_next_stack) = record;
6281 TYPE_FIELDS (record) = f_next_o;
6282 TREE_CHAIN (f_next_o) = f_next_o_limit;
6283 TREE_CHAIN (f_next_o_limit) = f_next_fp;
6284 TREE_CHAIN (f_next_fp) = f_next_fp_limit;
6285 TREE_CHAIN (f_next_fp_limit) = f_next_stack;
6287 layout_type (record);
6289 return record;
6292 /* Implement `va_start' for varargs and stdarg. */
6294 void
6295 sh_va_start (tree valist, rtx nextarg)
6297 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
6298 tree next_o, next_o_limit, next_fp, next_fp_limit, next_stack;
6299 tree t, u;
6300 int nfp, nint;
6302 if (TARGET_SH5)
6304 expand_builtin_saveregs ();
6305 std_expand_builtin_va_start (valist, nextarg);
6306 return;
6309 if ((! TARGET_SH2E && ! TARGET_SH4)
6310 || TARGET_HITACHI || sh_cfun_attr_renesas_p ())
6312 std_expand_builtin_va_start (valist, nextarg);
6313 return;
6316 f_next_o = TYPE_FIELDS (va_list_type_node);
6317 f_next_o_limit = TREE_CHAIN (f_next_o);
6318 f_next_fp = TREE_CHAIN (f_next_o_limit);
6319 f_next_fp_limit = TREE_CHAIN (f_next_fp);
6320 f_next_stack = TREE_CHAIN (f_next_fp_limit);
6322 next_o = build (COMPONENT_REF, TREE_TYPE (f_next_o), valist, f_next_o,
6323 NULL_TREE);
6324 next_o_limit = build (COMPONENT_REF, TREE_TYPE (f_next_o_limit),
6325 valist, f_next_o_limit, NULL_TREE);
6326 next_fp = build (COMPONENT_REF, TREE_TYPE (f_next_fp), valist, f_next_fp,
6327 NULL_TREE);
6328 next_fp_limit = build (COMPONENT_REF, TREE_TYPE (f_next_fp_limit),
6329 valist, f_next_fp_limit, NULL_TREE);
6330 next_stack = build (COMPONENT_REF, TREE_TYPE (f_next_stack),
6331 valist, f_next_stack, NULL_TREE);
6333 /* Call __builtin_saveregs. */
6334 u = make_tree (ptr_type_node, expand_builtin_saveregs ());
6335 t = build (MODIFY_EXPR, ptr_type_node, next_fp, u);
6336 TREE_SIDE_EFFECTS (t) = 1;
6337 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6339 nfp = current_function_args_info.arg_count[SH_ARG_FLOAT];
6340 if (nfp < 8)
6341 nfp = 8 - nfp;
6342 else
6343 nfp = 0;
6344 u = fold (build (PLUS_EXPR, ptr_type_node, u,
6345 build_int_cst (NULL_TREE, UNITS_PER_WORD * nfp, 0)));
6346 t = build (MODIFY_EXPR, ptr_type_node, next_fp_limit, u);
6347 TREE_SIDE_EFFECTS (t) = 1;
6348 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6350 t = build (MODIFY_EXPR, ptr_type_node, next_o, u);
6351 TREE_SIDE_EFFECTS (t) = 1;
6352 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6354 nint = current_function_args_info.arg_count[SH_ARG_INT];
6355 if (nint < 4)
6356 nint = 4 - nint;
6357 else
6358 nint = 0;
6359 u = fold (build (PLUS_EXPR, ptr_type_node, u,
6360 build_int_cst (NULL_TREE, UNITS_PER_WORD * nint, 0)));
6361 t = build (MODIFY_EXPR, ptr_type_node, next_o_limit, u);
6362 TREE_SIDE_EFFECTS (t) = 1;
6363 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6365 u = make_tree (ptr_type_node, nextarg);
6366 t = build (MODIFY_EXPR, ptr_type_node, next_stack, u);
6367 TREE_SIDE_EFFECTS (t) = 1;
6368 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6371 /* Implement `va_arg'. */
6373 static tree
6374 sh_gimplify_va_arg_expr (tree valist, tree type, tree *pre_p,
6375 tree *post_p ATTRIBUTE_UNUSED)
6377 HOST_WIDE_INT size, rsize;
6378 tree tmp, pptr_type_node;
6379 tree addr, lab_over, result = NULL;
6380 int pass_by_ref = pass_by_reference (NULL, TYPE_MODE (type), type, false);
6382 if (pass_by_ref)
6383 type = build_pointer_type (type);
6385 size = int_size_in_bytes (type);
6386 rsize = (size + UNITS_PER_WORD - 1) & -UNITS_PER_WORD;
6387 pptr_type_node = build_pointer_type (ptr_type_node);
6389 if (! TARGET_SH5 && (TARGET_SH2E || TARGET_SH4)
6390 && ! (TARGET_HITACHI || sh_cfun_attr_renesas_p ()))
6392 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
6393 tree next_o, next_o_limit, next_fp, next_fp_limit, next_stack;
6394 int pass_as_float;
6395 tree lab_false;
6397 f_next_o = TYPE_FIELDS (va_list_type_node);
6398 f_next_o_limit = TREE_CHAIN (f_next_o);
6399 f_next_fp = TREE_CHAIN (f_next_o_limit);
6400 f_next_fp_limit = TREE_CHAIN (f_next_fp);
6401 f_next_stack = TREE_CHAIN (f_next_fp_limit);
6403 next_o = build (COMPONENT_REF, TREE_TYPE (f_next_o), valist, f_next_o,
6404 NULL_TREE);
6405 next_o_limit = build (COMPONENT_REF, TREE_TYPE (f_next_o_limit),
6406 valist, f_next_o_limit, NULL_TREE);
6407 next_fp = build (COMPONENT_REF, TREE_TYPE (f_next_fp),
6408 valist, f_next_fp, NULL_TREE);
6409 next_fp_limit = build (COMPONENT_REF, TREE_TYPE (f_next_fp_limit),
6410 valist, f_next_fp_limit, NULL_TREE);
6411 next_stack = build (COMPONENT_REF, TREE_TYPE (f_next_stack),
6412 valist, f_next_stack, NULL_TREE);
6414 /* Structures with a single member with a distinct mode are passed
6415 like their member. This is relevant if the latter has a REAL_TYPE
6416 or COMPLEX_TYPE type. */
6417 if (TREE_CODE (type) == RECORD_TYPE
6418 && TYPE_FIELDS (type)
6419 && TREE_CODE (TYPE_FIELDS (type)) == FIELD_DECL
6420 && (TREE_CODE (TREE_TYPE (TYPE_FIELDS (type))) == REAL_TYPE
6421 || TREE_CODE (TREE_TYPE (TYPE_FIELDS (type))) == COMPLEX_TYPE)
6422 && TREE_CHAIN (TYPE_FIELDS (type)) == NULL_TREE)
6423 type = TREE_TYPE (TYPE_FIELDS (type));
6425 if (TARGET_SH4)
6427 pass_as_float = ((TREE_CODE (type) == REAL_TYPE && size <= 8)
6428 || (TREE_CODE (type) == COMPLEX_TYPE
6429 && TREE_CODE (TREE_TYPE (type)) == REAL_TYPE
6430 && size <= 16));
6432 else
6434 pass_as_float = (TREE_CODE (type) == REAL_TYPE && size == 4);
6437 addr = create_tmp_var (pptr_type_node, NULL);
6438 lab_false = create_artificial_label ();
6439 lab_over = create_artificial_label ();
6441 valist = build1 (INDIRECT_REF, ptr_type_node, addr);
6443 if (pass_as_float)
6445 int first_floatreg
6446 = current_function_args_info.arg_count[(int) SH_ARG_FLOAT];
6447 int n_floatregs = MAX (0, NPARM_REGS (SFmode) - first_floatreg);
6449 tmp = build (GE_EXPR, boolean_type_node, next_fp, next_fp_limit);
6450 tmp = build (COND_EXPR, void_type_node, tmp,
6451 build (GOTO_EXPR, void_type_node, lab_false),
6452 NULL);
6453 gimplify_and_add (tmp, pre_p);
6455 if (TYPE_ALIGN (type) > BITS_PER_WORD
6456 || (((TREE_CODE (type) == REAL_TYPE && size == 8) || size == 16)
6457 && (n_floatregs & 1)))
6459 tmp = fold_convert (ptr_type_node, size_int (UNITS_PER_WORD));
6460 tmp = build (BIT_AND_EXPR, ptr_type_node, next_fp, tmp);
6461 tmp = build (PLUS_EXPR, ptr_type_node, next_fp, tmp);
6462 tmp = build (MODIFY_EXPR, ptr_type_node, next_fp, tmp);
6463 gimplify_and_add (tmp, pre_p);
6466 tmp = build1 (ADDR_EXPR, pptr_type_node, next_fp);
6467 tmp = build (MODIFY_EXPR, void_type_node, addr, tmp);
6468 gimplify_and_add (tmp, pre_p);
6470 #ifdef FUNCTION_ARG_SCmode_WART
6471 if (TYPE_MODE (type) == SCmode && TARGET_SH4 && TARGET_LITTLE_ENDIAN)
6473 tree subtype = TREE_TYPE (type);
6474 tree real, imag;
6476 imag = std_gimplify_va_arg_expr (valist, subtype, pre_p, NULL);
6477 imag = get_initialized_tmp_var (imag, pre_p, NULL);
6479 real = std_gimplify_va_arg_expr (valist, subtype, pre_p, NULL);
6480 real = get_initialized_tmp_var (real, pre_p, NULL);
6482 result = build (COMPLEX_EXPR, type, real, imag);
6483 result = get_initialized_tmp_var (result, pre_p, NULL);
6485 #endif /* FUNCTION_ARG_SCmode_WART */
6487 tmp = build (GOTO_EXPR, void_type_node, lab_over);
6488 gimplify_and_add (tmp, pre_p);
6490 tmp = build (LABEL_EXPR, void_type_node, lab_false);
6491 gimplify_and_add (tmp, pre_p);
6493 tmp = build1 (ADDR_EXPR, pptr_type_node, next_stack);
6494 tmp = build (MODIFY_EXPR, void_type_node, addr, tmp);
6495 gimplify_and_add (tmp, pre_p);
6497 else
6499 tmp = fold_convert (ptr_type_node, size_int (rsize));
6500 tmp = build (PLUS_EXPR, ptr_type_node, next_o, tmp);
6501 tmp = build (GT_EXPR, boolean_type_node, tmp, next_o_limit);
6502 tmp = build (COND_EXPR, void_type_node, tmp,
6503 build (GOTO_EXPR, void_type_node, lab_false),
6504 NULL);
6505 gimplify_and_add (tmp, pre_p);
6507 tmp = build1 (ADDR_EXPR, pptr_type_node, next_o);
6508 tmp = build (MODIFY_EXPR, void_type_node, addr, tmp);
6509 gimplify_and_add (tmp, pre_p);
6511 tmp = build (GOTO_EXPR, void_type_node, lab_over);
6512 gimplify_and_add (tmp, pre_p);
6514 tmp = build (LABEL_EXPR, void_type_node, lab_false);
6515 gimplify_and_add (tmp, pre_p);
6517 if (size > 4 && ! TARGET_SH4)
6519 tmp = build (MODIFY_EXPR, ptr_type_node, next_o, next_o_limit);
6520 gimplify_and_add (tmp, pre_p);
6523 tmp = build1 (ADDR_EXPR, pptr_type_node, next_stack);
6524 tmp = build (MODIFY_EXPR, void_type_node, addr, tmp);
6525 gimplify_and_add (tmp, pre_p);
6528 if (!result)
6530 tmp = build (LABEL_EXPR, void_type_node, lab_over);
6531 gimplify_and_add (tmp, pre_p);
6535 /* ??? In va-sh.h, there had been code to make values larger than
6536 size 8 indirect. This does not match the FUNCTION_ARG macros. */
6538 tmp = std_gimplify_va_arg_expr (valist, type, pre_p, NULL);
6539 if (result)
6541 tmp = build (MODIFY_EXPR, void_type_node, result, tmp);
6542 gimplify_and_add (tmp, pre_p);
6544 tmp = build (LABEL_EXPR, void_type_node, lab_over);
6545 gimplify_and_add (tmp, pre_p);
6547 else
6548 result = tmp;
6550 if (pass_by_ref)
6551 result = build_fold_indirect_ref (result);
6553 return result;
6556 bool
6557 sh_promote_prototypes (tree type)
6559 if (TARGET_HITACHI)
6560 return 0;
6561 if (! type)
6562 return 1;
6563 return ! sh_attr_renesas_p (type);
6566 /* Whether an argument must be passed by reference. On SHcompact, we
6567 pretend arguments wider than 32-bits that would have been passed in
6568 registers are passed by reference, so that an SHmedia trampoline
6569 loads them into the full 64-bits registers. */
6571 static int
6572 shcompact_byref (CUMULATIVE_ARGS *cum, enum machine_mode mode,
6573 tree type, bool named)
6575 unsigned HOST_WIDE_INT size;
6577 if (type)
6578 size = int_size_in_bytes (type);
6579 else
6580 size = GET_MODE_SIZE (mode);
6582 if (cum->arg_count[SH_ARG_INT] < NPARM_REGS (SImode)
6583 && (!named
6584 || GET_SH_ARG_CLASS (mode) == SH_ARG_INT
6585 || (GET_SH_ARG_CLASS (mode) == SH_ARG_FLOAT
6586 && cum->arg_count[SH_ARG_FLOAT] >= NPARM_REGS (SFmode)))
6587 && size > 4
6588 && !SHCOMPACT_FORCE_ON_STACK (mode, type)
6589 && !SH5_WOULD_BE_PARTIAL_NREGS (*cum, mode, type, named))
6590 return size;
6591 else
6592 return 0;
6595 static bool
6596 sh_pass_by_reference (CUMULATIVE_ARGS *cum, enum machine_mode mode,
6597 tree type, bool named)
6599 if (targetm.calls.must_pass_in_stack (mode, type))
6600 return true;
6602 if (TARGET_SHCOMPACT)
6604 cum->byref = shcompact_byref (cum, mode, type, named);
6605 return cum->byref != 0;
6608 return false;
6611 /* Define where to put the arguments to a function.
6612 Value is zero to push the argument on the stack,
6613 or a hard register in which to store the argument.
6615 MODE is the argument's machine mode.
6616 TYPE is the data type of the argument (as a tree).
6617 This is null for libcalls where that information may
6618 not be available.
6619 CUM is a variable of type CUMULATIVE_ARGS which gives info about
6620 the preceding args and about the function being called.
6621 NAMED is nonzero if this argument is a named parameter
6622 (otherwise it is an extra parameter matching an ellipsis).
6624 On SH the first args are normally in registers
6625 and the rest are pushed. Any arg that starts within the first
6626 NPARM_REGS words is at least partially passed in a register unless
6627 its data type forbids. */
6631 sh_function_arg (CUMULATIVE_ARGS *ca, enum machine_mode mode,
6632 tree type, int named)
6634 if (! TARGET_SH5 && mode == VOIDmode)
6635 return GEN_INT (ca->renesas_abi ? 1 : 0);
6637 if (! TARGET_SH5
6638 && PASS_IN_REG_P (*ca, mode, type)
6639 && (named || ! (TARGET_HITACHI || ca->renesas_abi)))
6641 int regno;
6643 if (mode == SCmode && TARGET_SH4 && TARGET_LITTLE_ENDIAN
6644 && (! FUNCTION_ARG_SCmode_WART || (ROUND_REG (*ca, mode) & 1)))
6646 rtx r1 = gen_rtx_EXPR_LIST (VOIDmode,
6647 gen_rtx_REG (SFmode,
6648 BASE_ARG_REG (mode)
6649 + (ROUND_REG (*ca, mode) ^ 1)),
6650 const0_rtx);
6651 rtx r2 = gen_rtx_EXPR_LIST (VOIDmode,
6652 gen_rtx_REG (SFmode,
6653 BASE_ARG_REG (mode)
6654 + ((ROUND_REG (*ca, mode) + 1) ^ 1)),
6655 GEN_INT (4));
6656 return gen_rtx_PARALLEL(SCmode, gen_rtvec(2, r1, r2));
6659 /* If the alignment of a DF value causes an SF register to be
6660 skipped, we will use that skipped register for the next SF
6661 value. */
6662 if ((TARGET_HITACHI || ca->renesas_abi)
6663 && ca->free_single_fp_reg
6664 && mode == SFmode)
6665 return gen_rtx_REG (mode, ca->free_single_fp_reg);
6667 regno = (BASE_ARG_REG (mode) + ROUND_REG (*ca, mode))
6668 ^ (mode == SFmode && TARGET_SH4
6669 && TARGET_LITTLE_ENDIAN != 0
6670 && ! TARGET_HITACHI && ! ca->renesas_abi);
6671 return gen_rtx_REG (mode, regno);
6675 if (TARGET_SH5)
6677 if (mode == VOIDmode && TARGET_SHCOMPACT)
6678 return GEN_INT (ca->call_cookie);
6680 /* The following test assumes unnamed arguments are promoted to
6681 DFmode. */
6682 if (mode == SFmode && ca->free_single_fp_reg)
6683 return SH5_PROTOTYPED_FLOAT_ARG (*ca, mode, ca->free_single_fp_reg);
6685 if ((GET_SH_ARG_CLASS (mode) == SH_ARG_FLOAT)
6686 && (named || ! ca->prototype_p)
6687 && ca->arg_count[(int) SH_ARG_FLOAT] < NPARM_REGS (SFmode))
6689 if (! ca->prototype_p && TARGET_SHMEDIA)
6690 return SH5_PROTOTYPELESS_FLOAT_ARG (*ca, mode);
6692 return SH5_PROTOTYPED_FLOAT_ARG (*ca, mode,
6693 FIRST_FP_PARM_REG
6694 + ca->arg_count[(int) SH_ARG_FLOAT]);
6697 if (ca->arg_count[(int) SH_ARG_INT] < NPARM_REGS (SImode)
6698 && (! TARGET_SHCOMPACT
6699 || (! SHCOMPACT_FORCE_ON_STACK (mode, type)
6700 && ! SH5_WOULD_BE_PARTIAL_NREGS (*ca, mode,
6701 type, named))))
6703 return gen_rtx_REG (mode, (FIRST_PARM_REG
6704 + ca->arg_count[(int) SH_ARG_INT]));
6707 return 0;
6710 return 0;
6713 /* Update the data in CUM to advance over an argument
6714 of mode MODE and data type TYPE.
6715 (TYPE is null for libcalls where that information may not be
6716 available.) */
6718 void
6719 sh_function_arg_advance (CUMULATIVE_ARGS *ca, enum machine_mode mode,
6720 tree type, int named)
6722 if (ca->force_mem)
6723 ca->force_mem = 0;
6724 else if (TARGET_SH5)
6726 tree type2 = (ca->byref && type
6727 ? TREE_TYPE (type)
6728 : type);
6729 enum machine_mode mode2 = (ca->byref && type
6730 ? TYPE_MODE (type2)
6731 : mode);
6732 int dwords = ((ca->byref
6733 ? ca->byref
6734 : mode2 == BLKmode
6735 ? int_size_in_bytes (type2)
6736 : GET_MODE_SIZE (mode2)) + 7) / 8;
6737 int numregs = MIN (dwords, NPARM_REGS (SImode)
6738 - ca->arg_count[(int) SH_ARG_INT]);
6740 if (numregs)
6742 ca->arg_count[(int) SH_ARG_INT] += numregs;
6743 if (TARGET_SHCOMPACT
6744 && SHCOMPACT_FORCE_ON_STACK (mode2, type2))
6746 ca->call_cookie
6747 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
6748 - numregs, 1);
6749 /* N.B. We want this also for outgoing. */
6750 ca->stack_regs += numregs;
6752 else if (ca->byref)
6754 if (! ca->outgoing)
6755 ca->stack_regs += numregs;
6756 ca->byref_regs += numregs;
6757 ca->byref = 0;
6759 ca->call_cookie
6760 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
6761 - numregs, 2);
6762 while (--numregs);
6763 ca->call_cookie
6764 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
6765 - 1, 1);
6767 else if (dwords > numregs)
6769 int pushregs = numregs;
6771 if (TARGET_SHCOMPACT)
6772 ca->stack_regs += numregs;
6773 while (pushregs < NPARM_REGS (SImode) - 1
6774 && (CALL_COOKIE_INT_REG_GET
6775 (ca->call_cookie,
6776 NPARM_REGS (SImode) - pushregs)
6777 == 1))
6779 ca->call_cookie
6780 &= ~ CALL_COOKIE_INT_REG (NPARM_REGS (SImode)
6781 - pushregs, 1);
6782 pushregs++;
6784 if (numregs == NPARM_REGS (SImode))
6785 ca->call_cookie
6786 |= CALL_COOKIE_INT_REG (0, 1)
6787 | CALL_COOKIE_STACKSEQ (numregs - 1);
6788 else
6789 ca->call_cookie
6790 |= CALL_COOKIE_STACKSEQ (numregs);
6793 if (GET_SH_ARG_CLASS (mode2) == SH_ARG_FLOAT
6794 && (named || ! ca->prototype_p))
6796 if (mode2 == SFmode && ca->free_single_fp_reg)
6797 ca->free_single_fp_reg = 0;
6798 else if (ca->arg_count[(int) SH_ARG_FLOAT]
6799 < NPARM_REGS (SFmode))
6801 int numfpregs
6802 = MIN ((GET_MODE_SIZE (mode2) + 7) / 8 * 2,
6803 NPARM_REGS (SFmode)
6804 - ca->arg_count[(int) SH_ARG_FLOAT]);
6806 ca->arg_count[(int) SH_ARG_FLOAT] += numfpregs;
6808 if (TARGET_SHCOMPACT && ! ca->prototype_p)
6810 if (ca->outgoing && numregs > 0)
6813 ca->call_cookie
6814 |= (CALL_COOKIE_INT_REG
6815 (ca->arg_count[(int) SH_ARG_INT]
6816 - numregs + ((numfpregs - 2) / 2),
6817 4 + (ca->arg_count[(int) SH_ARG_FLOAT]
6818 - numfpregs) / 2));
6820 while (numfpregs -= 2);
6822 else if (mode2 == SFmode && (named)
6823 && (ca->arg_count[(int) SH_ARG_FLOAT]
6824 < NPARM_REGS (SFmode)))
6825 ca->free_single_fp_reg
6826 = FIRST_FP_PARM_REG - numfpregs
6827 + ca->arg_count[(int) SH_ARG_FLOAT] + 1;
6830 return;
6833 if ((TARGET_HITACHI || ca->renesas_abi) && TARGET_FPU_DOUBLE)
6835 /* Note that we've used the skipped register. */
6836 if (mode == SFmode && ca->free_single_fp_reg)
6838 ca->free_single_fp_reg = 0;
6839 return;
6841 /* When we have a DF after an SF, there's an SF register that get
6842 skipped in order to align the DF value. We note this skipped
6843 register, because the next SF value will use it, and not the
6844 SF that follows the DF. */
6845 if (mode == DFmode
6846 && ROUND_REG (*ca, DFmode) != ROUND_REG (*ca, SFmode))
6848 ca->free_single_fp_reg = (ROUND_REG (*ca, SFmode)
6849 + BASE_ARG_REG (mode));
6853 if (! ((TARGET_SH4 || TARGET_SH2A) || ca->renesas_abi)
6854 || PASS_IN_REG_P (*ca, mode, type))
6855 (ca->arg_count[(int) GET_SH_ARG_CLASS (mode)]
6856 = (ROUND_REG (*ca, mode)
6857 + (mode == BLKmode
6858 ? ROUND_ADVANCE (int_size_in_bytes (type))
6859 : ROUND_ADVANCE (GET_MODE_SIZE (mode)))));
6862 /* The Renesas calling convention doesn't quite fit into this scheme since
6863 the address is passed like an invisible argument, but one that is always
6864 passed in memory. */
6865 static rtx
6866 sh_struct_value_rtx (tree fndecl, int incoming ATTRIBUTE_UNUSED)
6868 if (TARGET_HITACHI || sh_attr_renesas_p (fndecl))
6869 return 0;
6870 return gen_rtx_REG (Pmode, 2);
6873 /* Worker function for TARGET_RETURN_IN_MEMORY. */
6875 static bool
6876 sh_return_in_memory (tree type, tree fndecl)
6878 if (TARGET_SH5)
6880 if (TYPE_MODE (type) == BLKmode)
6881 return ((unsigned HOST_WIDE_INT) int_size_in_bytes (type)) > 8;
6882 else
6883 return GET_MODE_SIZE (TYPE_MODE (type)) > 8;
6885 else
6887 return (TYPE_MODE (type) == BLKmode
6888 || ((TARGET_HITACHI || sh_attr_renesas_p (fndecl))
6889 && TREE_CODE (type) == RECORD_TYPE));
6893 /* We actually emit the code in sh_expand_prologue. We used to use
6894 a static variable to flag that we need to emit this code, but that
6895 doesn't when inlining, when functions are deferred and then emitted
6896 later. Fortunately, we already have two flags that are part of struct
6897 function that tell if a function uses varargs or stdarg. */
6898 static void
6899 sh_setup_incoming_varargs (CUMULATIVE_ARGS *ca,
6900 enum machine_mode mode,
6901 tree type,
6902 int *pretend_arg_size,
6903 int second_time ATTRIBUTE_UNUSED)
6905 if (! current_function_stdarg)
6906 abort ();
6907 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl))
6909 int named_parm_regs, anon_parm_regs;
6911 named_parm_regs = (ROUND_REG (*ca, mode)
6912 + (mode == BLKmode
6913 ? ROUND_ADVANCE (int_size_in_bytes (type))
6914 : ROUND_ADVANCE (GET_MODE_SIZE (mode))));
6915 anon_parm_regs = NPARM_REGS (SImode) - named_parm_regs;
6916 if (anon_parm_regs > 0)
6917 *pretend_arg_size = anon_parm_regs * 4;
6921 static bool
6922 sh_strict_argument_naming (CUMULATIVE_ARGS *ca ATTRIBUTE_UNUSED)
6924 return TARGET_SH5;
6927 static bool
6928 sh_pretend_outgoing_varargs_named (CUMULATIVE_ARGS *ca)
6930 return ! (TARGET_HITACHI || ca->renesas_abi) && ! TARGET_SH5;
6934 /* Define the offset between two registers, one to be eliminated, and
6935 the other its replacement, at the start of a routine. */
6938 initial_elimination_offset (int from, int to)
6940 int regs_saved;
6941 int regs_saved_rounding = 0;
6942 int total_saved_regs_space;
6943 int total_auto_space;
6944 int save_flags = target_flags;
6945 int copy_flags;
6946 HARD_REG_SET live_regs_mask;
6948 shmedia_space_reserved_for_target_registers = false;
6949 regs_saved = calc_live_regs (&live_regs_mask);
6950 regs_saved += SHMEDIA_REGS_STACK_ADJUST ();
6952 if (shmedia_reserve_space_for_target_registers_p (regs_saved, &live_regs_mask))
6954 shmedia_space_reserved_for_target_registers = true;
6955 regs_saved += shmedia_target_regs_stack_adjust (&live_regs_mask);
6958 if (TARGET_SH5 && regs_saved % (STACK_BOUNDARY / BITS_PER_UNIT))
6959 regs_saved_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
6960 - regs_saved % (STACK_BOUNDARY / BITS_PER_UNIT));
6962 total_auto_space = rounded_frame_size (regs_saved) - regs_saved_rounding;
6963 copy_flags = target_flags;
6964 target_flags = save_flags;
6966 total_saved_regs_space = regs_saved + regs_saved_rounding;
6968 if (from == ARG_POINTER_REGNUM && to == FRAME_POINTER_REGNUM)
6969 return total_saved_regs_space + total_auto_space
6970 + current_function_args_info.byref_regs * 8;
6972 if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
6973 return total_saved_regs_space + total_auto_space
6974 + current_function_args_info.byref_regs * 8;
6976 /* Initial gap between fp and sp is 0. */
6977 if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
6978 return 0;
6980 if (from == RETURN_ADDRESS_POINTER_REGNUM
6981 && (to == FRAME_POINTER_REGNUM || to == STACK_POINTER_REGNUM))
6983 if (TARGET_SH5)
6985 int n = total_saved_regs_space;
6986 int pr_reg = TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG;
6987 save_schedule schedule;
6988 save_entry *entry;
6990 n += total_auto_space;
6992 /* If it wasn't saved, there's not much we can do. */
6993 if (! TEST_HARD_REG_BIT (live_regs_mask, pr_reg))
6994 return n;
6996 target_flags = copy_flags;
6998 sh5_schedule_saves (&live_regs_mask, &schedule, n);
6999 for (entry = &schedule.entries[1]; entry->mode != VOIDmode; entry++)
7000 if (entry->reg == pr_reg)
7002 target_flags = save_flags;
7003 return entry->offset;
7005 abort ();
7007 else
7008 return total_auto_space;
7011 abort ();
7014 /* Handle machine specific pragmas to be semi-compatible with Renesas
7015 compiler. */
7017 void
7018 sh_pr_interrupt (struct cpp_reader *pfile ATTRIBUTE_UNUSED)
7020 pragma_interrupt = 1;
7023 void
7024 sh_pr_trapa (struct cpp_reader *pfile ATTRIBUTE_UNUSED)
7026 pragma_interrupt = pragma_trapa = 1;
7029 void
7030 sh_pr_nosave_low_regs (struct cpp_reader *pfile ATTRIBUTE_UNUSED)
7032 pragma_nosave_low_regs = 1;
7035 /* Generate 'handle_interrupt' attribute for decls */
7037 static void
7038 sh_insert_attributes (tree node, tree *attributes)
7040 if (! pragma_interrupt
7041 || TREE_CODE (node) != FUNCTION_DECL)
7042 return;
7044 /* We are only interested in fields. */
7045 if (TREE_CODE_CLASS (TREE_CODE (node)) != 'd')
7046 return;
7048 /* Add a 'handle_interrupt' attribute. */
7049 * attributes = tree_cons (get_identifier ("interrupt_handler"), NULL, * attributes);
7051 return;
7054 /* Supported attributes:
7056 interrupt_handler -- specifies this function is an interrupt handler.
7058 sp_switch -- specifies an alternate stack for an interrupt handler
7059 to run on.
7061 trap_exit -- use a trapa to exit an interrupt function instead of
7062 an rte instruction.
7064 renesas -- use Renesas calling/layout conventions (functions and
7065 structures).
7069 const struct attribute_spec sh_attribute_table[] =
7071 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
7072 { "interrupt_handler", 0, 0, true, false, false, sh_handle_interrupt_handler_attribute },
7073 { "sp_switch", 1, 1, true, false, false, sh_handle_sp_switch_attribute },
7074 { "trap_exit", 1, 1, true, false, false, sh_handle_trap_exit_attribute },
7075 { "renesas", 0, 0, false, true, false, sh_handle_renesas_attribute },
7076 #ifdef SYMBIAN
7077 /* Symbian support adds three new attributes:
7078 dllexport - for exporting a function/variable that will live in a dll
7079 dllimport - for importing a function/variable from a dll
7081 Microsoft allows multiple declspecs in one __declspec, separating
7082 them with spaces. We do NOT support this. Instead, use __declspec
7083 multiple times. */
7084 { "dllimport", 0, 0, true, false, false, sh_symbian_handle_dll_attribute },
7085 { "dllexport", 0, 0, true, false, false, sh_symbian_handle_dll_attribute },
7086 #endif
7087 { NULL, 0, 0, false, false, false, NULL }
7090 /* Handle an "interrupt_handler" attribute; arguments as in
7091 struct attribute_spec.handler. */
7092 static tree
7093 sh_handle_interrupt_handler_attribute (tree *node, tree name,
7094 tree args ATTRIBUTE_UNUSED,
7095 int flags ATTRIBUTE_UNUSED,
7096 bool *no_add_attrs)
7098 if (TREE_CODE (*node) != FUNCTION_DECL)
7100 warning ("`%s' attribute only applies to functions",
7101 IDENTIFIER_POINTER (name));
7102 *no_add_attrs = true;
7104 else if (TARGET_SHCOMPACT)
7106 error ("attribute interrupt_handler is not compatible with -m5-compact");
7107 *no_add_attrs = true;
7110 return NULL_TREE;
7113 /* Handle an "sp_switch" attribute; arguments as in
7114 struct attribute_spec.handler. */
7115 static tree
7116 sh_handle_sp_switch_attribute (tree *node, tree name, tree args,
7117 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
7119 if (TREE_CODE (*node) != FUNCTION_DECL)
7121 warning ("`%s' attribute only applies to functions",
7122 IDENTIFIER_POINTER (name));
7123 *no_add_attrs = true;
7125 else if (!pragma_interrupt)
7127 /* The sp_switch attribute only has meaning for interrupt functions. */
7128 warning ("`%s' attribute only applies to interrupt functions",
7129 IDENTIFIER_POINTER (name));
7130 *no_add_attrs = true;
7132 else if (TREE_CODE (TREE_VALUE (args)) != STRING_CST)
7134 /* The argument must be a constant string. */
7135 warning ("`%s' attribute argument not a string constant",
7136 IDENTIFIER_POINTER (name));
7137 *no_add_attrs = true;
7139 else
7141 sp_switch = gen_rtx_SYMBOL_REF (VOIDmode,
7142 TREE_STRING_POINTER (TREE_VALUE (args)));
7145 return NULL_TREE;
7148 /* Handle an "trap_exit" attribute; arguments as in
7149 struct attribute_spec.handler. */
7150 static tree
7151 sh_handle_trap_exit_attribute (tree *node, tree name, tree args,
7152 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
7154 if (TREE_CODE (*node) != FUNCTION_DECL)
7156 warning ("`%s' attribute only applies to functions",
7157 IDENTIFIER_POINTER (name));
7158 *no_add_attrs = true;
7160 else if (!pragma_interrupt)
7162 /* The trap_exit attribute only has meaning for interrupt functions. */
7163 warning ("`%s' attribute only applies to interrupt functions",
7164 IDENTIFIER_POINTER (name));
7165 *no_add_attrs = true;
7167 else if (TREE_CODE (TREE_VALUE (args)) != INTEGER_CST)
7169 /* The argument must be a constant integer. */
7170 warning ("`%s' attribute argument not an integer constant",
7171 IDENTIFIER_POINTER (name));
7172 *no_add_attrs = true;
7174 else
7176 trap_exit = TREE_INT_CST_LOW (TREE_VALUE (args));
7179 return NULL_TREE;
7182 static tree
7183 sh_handle_renesas_attribute (tree *node ATTRIBUTE_UNUSED,
7184 tree name ATTRIBUTE_UNUSED,
7185 tree args ATTRIBUTE_UNUSED,
7186 int flags ATTRIBUTE_UNUSED,
7187 bool *no_add_attrs ATTRIBUTE_UNUSED)
7189 return NULL_TREE;
7192 /* True if __attribute__((renesas)) or -mrenesas. */
7194 sh_attr_renesas_p (tree td)
7196 if (TARGET_HITACHI)
7197 return 1;
7198 if (td == 0)
7199 return 0;
7200 if (DECL_P (td))
7201 td = TREE_TYPE (td);
7202 return (lookup_attribute ("renesas", TYPE_ATTRIBUTES (td))
7203 != NULL_TREE);
7206 /* True if __attribute__((renesas)) or -mrenesas, for the current
7207 function. */
7209 sh_cfun_attr_renesas_p (void)
7211 return sh_attr_renesas_p (current_function_decl);
7215 sh_cfun_interrupt_handler_p (void)
7217 return (lookup_attribute ("interrupt_handler",
7218 DECL_ATTRIBUTES (current_function_decl))
7219 != NULL_TREE);
7222 /* ??? target_switches in toplev.c is static, hence we have to duplicate it. */
7223 static const struct
7225 const char *const name;
7226 const int value;
7227 const char *const description;
7229 sh_target_switches[] = TARGET_SWITCHES;
7230 #define target_switches sh_target_switches
7232 /* Like default_pch_valid_p, but take flag_mask into account. */
7233 const char *
7234 sh_pch_valid_p (const void *data_p, size_t len)
7236 const char *data = (const char *)data_p;
7237 const char *flag_that_differs = NULL;
7238 size_t i;
7239 int old_flags;
7240 int flag_mask
7241 = (SH1_BIT | SH2_BIT | SH3_BIT | SH_E_BIT | HARD_SH4_BIT | FPU_SINGLE_BIT
7242 | SH4_BIT | HITACHI_BIT | LITTLE_ENDIAN_BIT);
7244 /* -fpic and -fpie also usually make a PCH invalid. */
7245 if (data[0] != flag_pic)
7246 return _("created and used with different settings of -fpic");
7247 if (data[1] != flag_pie)
7248 return _("created and used with different settings of -fpie");
7249 data += 2;
7251 /* Check target_flags. */
7252 memcpy (&old_flags, data, sizeof (target_flags));
7253 if (((old_flags ^ target_flags) & flag_mask) != 0)
7255 for (i = 0; i < ARRAY_SIZE (target_switches); i++)
7257 int bits;
7259 bits = target_switches[i].value;
7260 if (bits < 0)
7261 bits = -bits;
7262 bits &= flag_mask;
7263 if ((target_flags & bits) != (old_flags & bits))
7265 flag_that_differs = target_switches[i].name;
7266 goto make_message;
7269 abort ();
7271 data += sizeof (target_flags);
7272 len -= sizeof (target_flags);
7274 /* Check string options. */
7275 #ifdef TARGET_OPTIONS
7276 for (i = 0; i < ARRAY_SIZE (target_options); i++)
7278 const char *str = *target_options[i].variable;
7279 size_t l;
7280 if (! str)
7281 str = "";
7282 l = strlen (str) + 1;
7283 if (len < l || memcmp (data, str, l) != 0)
7285 flag_that_differs = target_options[i].prefix;
7286 goto make_message;
7288 data += l;
7289 len -= l;
7291 #endif
7293 return NULL;
7295 make_message:
7297 char *r;
7298 asprintf (&r, _("created and used with differing settings of `-m%s'"),
7299 flag_that_differs);
7300 if (r == NULL)
7301 return _("out of memory");
7302 return r;
7306 /* Predicates used by the templates. */
7308 /* Returns 1 if OP is MACL, MACH or PR. The input must be a REG rtx.
7309 Used only in general_movsrc_operand. */
7312 system_reg_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
7314 switch (REGNO (op))
7316 case PR_REG:
7317 case MACL_REG:
7318 case MACH_REG:
7319 return 1;
7321 return 0;
7324 /* Returns 1 if OP can be source of a simple move operation.
7325 Same as general_operand, but a LABEL_REF is valid, PRE_DEC is
7326 invalid as are subregs of system registers. */
7329 general_movsrc_operand (rtx op, enum machine_mode mode)
7331 if (GET_CODE (op) == MEM)
7333 rtx inside = XEXP (op, 0);
7334 if (GET_CODE (inside) == CONST)
7335 inside = XEXP (inside, 0);
7337 if (GET_CODE (inside) == LABEL_REF)
7338 return 1;
7340 if (GET_CODE (inside) == PLUS
7341 && GET_CODE (XEXP (inside, 0)) == LABEL_REF
7342 && GET_CODE (XEXP (inside, 1)) == CONST_INT)
7343 return 1;
7345 /* Only post inc allowed. */
7346 if (GET_CODE (inside) == PRE_DEC)
7347 return 0;
7350 if ((mode == QImode || mode == HImode)
7351 && (GET_CODE (op) == SUBREG
7352 && GET_CODE (XEXP (op, 0)) == REG
7353 && system_reg_operand (XEXP (op, 0), mode)))
7354 return 0;
7356 return general_operand (op, mode);
7359 /* Returns 1 if OP can be a destination of a move.
7360 Same as general_operand, but no preinc allowed. */
7363 general_movdst_operand (rtx op, enum machine_mode mode)
7365 /* Only pre dec allowed. */
7366 if (GET_CODE (op) == MEM && GET_CODE (XEXP (op, 0)) == POST_INC)
7367 return 0;
7369 return general_operand (op, mode);
7372 /* Returns 1 if OP is a normal arithmetic register. */
7375 arith_reg_operand (rtx op, enum machine_mode mode)
7377 if (register_operand (op, mode))
7379 int regno;
7381 if (GET_CODE (op) == REG)
7382 regno = REGNO (op);
7383 else if (GET_CODE (op) == SUBREG && GET_CODE (SUBREG_REG (op)) == REG)
7384 regno = REGNO (SUBREG_REG (op));
7385 else
7386 return 1;
7388 return (regno != T_REG && regno != PR_REG
7389 && ! TARGET_REGISTER_P (regno)
7390 && (regno != FPUL_REG || TARGET_SH4)
7391 && regno != MACH_REG && regno != MACL_REG);
7393 return 0;
7396 /* Like above, but for DImode destinations: forbid paradoxical DImode subregs,
7397 because this would lead to missing sign extensions when truncating from
7398 DImode to SImode. */
7400 arith_reg_dest (rtx op, enum machine_mode mode)
7402 if (mode == DImode && GET_CODE (op) == SUBREG
7403 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (op))) < 8)
7404 return 0;
7405 return arith_reg_operand (op, mode);
7409 int_gpr_dest (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
7411 enum machine_mode op_mode = GET_MODE (op);
7413 if (GET_MODE_CLASS (op_mode) != MODE_INT
7414 || GET_MODE_SIZE (op_mode) >= UNITS_PER_WORD)
7415 return 0;
7416 if (! reload_completed)
7417 return 0;
7418 return true_regnum (op) <= LAST_GENERAL_REG;
7422 fp_arith_reg_operand (rtx op, enum machine_mode mode)
7424 if (register_operand (op, mode))
7426 int regno;
7428 if (GET_CODE (op) == REG)
7429 regno = REGNO (op);
7430 else if (GET_CODE (op) == SUBREG && GET_CODE (SUBREG_REG (op)) == REG)
7431 regno = REGNO (SUBREG_REG (op));
7432 else
7433 return 1;
7435 return (regno >= FIRST_PSEUDO_REGISTER
7436 || FP_REGISTER_P (regno));
7438 return 0;
7441 /* Returns 1 if OP is a valid source operand for an arithmetic insn. */
7444 arith_operand (rtx op, enum machine_mode mode)
7446 if (arith_reg_operand (op, mode))
7447 return 1;
7449 if (TARGET_SHMEDIA)
7451 /* FIXME: We should be checking whether the CONST_INT fits in a
7452 CONST_OK_FOR_I16 here, but this causes reload_cse to crash when
7453 attempting to transform a sequence of two 64-bit sets of the
7454 same register from literal constants into a set and an add,
7455 when the difference is too wide for an add. */
7456 if (GET_CODE (op) == CONST_INT
7457 || EXTRA_CONSTRAINT_C16 (op))
7458 return 1;
7459 else
7460 return 0;
7462 else if (GET_CODE (op) == CONST_INT && CONST_OK_FOR_I08 (INTVAL (op)))
7463 return 1;
7465 return 0;
7468 /* Returns 1 if OP is a valid source operand for a compare insn. */
7471 arith_reg_or_0_operand (rtx op, enum machine_mode mode)
7473 if (arith_reg_operand (op, mode))
7474 return 1;
7476 if (EXTRA_CONSTRAINT_Z (op))
7477 return 1;
7479 return 0;
7482 /* Return 1 if OP is a valid source operand for an SHmedia operation
7483 that takes either a register or a 6-bit immediate. */
7486 shmedia_6bit_operand (rtx op, enum machine_mode mode)
7488 return (arith_reg_operand (op, mode)
7489 || (GET_CODE (op) == CONST_INT && CONST_OK_FOR_I06 (INTVAL (op))));
7492 /* Returns 1 if OP is a valid source operand for a logical operation. */
7495 logical_operand (rtx op, enum machine_mode mode)
7497 if (arith_reg_operand (op, mode))
7498 return 1;
7500 if (TARGET_SHMEDIA)
7502 if (GET_CODE (op) == CONST_INT && CONST_OK_FOR_I10 (INTVAL (op)))
7503 return 1;
7504 else
7505 return 0;
7507 else if (GET_CODE (op) == CONST_INT && CONST_OK_FOR_K08 (INTVAL (op)))
7508 return 1;
7510 return 0;
7514 and_operand (rtx op, enum machine_mode mode)
7516 if (logical_operand (op, mode))
7517 return 1;
7519 /* Check mshflo.l / mshflhi.l opportunities. */
7520 if (TARGET_SHMEDIA
7521 && mode == DImode
7522 && GET_CODE (op) == CONST_INT
7523 && CONST_OK_FOR_J16 (INTVAL (op)))
7524 return 1;
7526 return 0;
7529 /* Nonzero if OP is a floating point value with value 0.0. */
7532 fp_zero_operand (rtx op)
7534 REAL_VALUE_TYPE r;
7536 if (GET_MODE (op) != SFmode)
7537 return 0;
7539 REAL_VALUE_FROM_CONST_DOUBLE (r, op);
7540 return REAL_VALUES_EQUAL (r, dconst0) && ! REAL_VALUE_MINUS_ZERO (r);
7543 /* Nonzero if OP is a floating point value with value 1.0. */
7546 fp_one_operand (rtx op)
7548 REAL_VALUE_TYPE r;
7550 if (GET_MODE (op) != SFmode)
7551 return 0;
7553 REAL_VALUE_FROM_CONST_DOUBLE (r, op);
7554 return REAL_VALUES_EQUAL (r, dconst1);
7557 /* For -m4 and -m4-single-only, mode switching is used. If we are
7558 compiling without -mfmovd, movsf_ie isn't taken into account for
7559 mode switching. We could check in machine_dependent_reorg for
7560 cases where we know we are in single precision mode, but there is
7561 interface to find that out during reload, so we must avoid
7562 choosing an fldi alternative during reload and thus failing to
7563 allocate a scratch register for the constant loading. */
7565 fldi_ok (void)
7567 return ! TARGET_SH4 || TARGET_FMOVD || reload_completed;
7571 tertiary_reload_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
7573 enum rtx_code code = GET_CODE (op);
7574 return code == MEM || (TARGET_SH4 && code == CONST_DOUBLE);
7578 fpscr_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
7580 return (GET_CODE (op) == REG
7581 && (REGNO (op) == FPSCR_REG
7582 || (REGNO (op) >= FIRST_PSEUDO_REGISTER
7583 && !(reload_in_progress || reload_completed)))
7584 && GET_MODE (op) == PSImode);
7588 fpul_operand (rtx op, enum machine_mode mode)
7590 if (TARGET_SHMEDIA)
7591 return fp_arith_reg_operand (op, mode);
7593 return (GET_CODE (op) == REG
7594 && (REGNO (op) == FPUL_REG || REGNO (op) >= FIRST_PSEUDO_REGISTER)
7595 && GET_MODE (op) == mode);
7599 symbol_ref_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
7601 return (GET_CODE (op) == SYMBOL_REF);
7604 /* Return the TLS type for TLS symbols, 0 for otherwise. */
7606 tls_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
7608 if (GET_CODE (op) != SYMBOL_REF)
7609 return 0;
7610 return SYMBOL_REF_TLS_MODEL (op);
7614 commutative_float_operator (rtx op, enum machine_mode mode)
7616 if (GET_MODE (op) != mode)
7617 return 0;
7618 switch (GET_CODE (op))
7620 case PLUS:
7621 case MULT:
7622 return 1;
7623 default:
7624 break;
7626 return 0;
7630 noncommutative_float_operator (rtx op, enum machine_mode mode)
7632 if (GET_MODE (op) != mode)
7633 return 0;
7634 switch (GET_CODE (op))
7636 case MINUS:
7637 case DIV:
7638 return 1;
7639 default:
7640 break;
7642 return 0;
7646 unary_float_operator (rtx op, enum machine_mode mode)
7648 if (GET_MODE (op) != mode)
7649 return 0;
7650 switch (GET_CODE (op))
7652 case ABS:
7653 case NEG:
7654 case SQRT:
7655 return 1;
7656 default:
7657 break;
7659 return 0;
7663 binary_float_operator (rtx op, enum machine_mode mode)
7665 if (GET_MODE (op) != mode)
7666 return 0;
7667 switch (GET_CODE (op))
7669 case PLUS:
7670 case MINUS:
7671 case MULT:
7672 case DIV:
7673 return 1;
7674 default:
7675 break;
7677 return 0;
7681 binary_logical_operator (rtx op, enum machine_mode mode)
7683 if (GET_MODE (op) != mode)
7684 return 0;
7685 switch (GET_CODE (op))
7687 case IOR:
7688 case AND:
7689 case XOR:
7690 return 1;
7691 default:
7692 break;
7694 return 0;
7698 equality_comparison_operator (rtx op, enum machine_mode mode)
7700 return ((mode == VOIDmode || GET_MODE (op) == mode)
7701 && (GET_CODE (op) == EQ || GET_CODE (op) == NE));
7705 greater_comparison_operator (rtx op, enum machine_mode mode)
7707 if (mode != VOIDmode && GET_MODE (op) == mode)
7708 return 0;
7709 switch (GET_CODE (op))
7711 case GT:
7712 case GE:
7713 case GTU:
7714 case GEU:
7715 return 1;
7716 default:
7717 return 0;
7722 less_comparison_operator (rtx op, enum machine_mode mode)
7724 if (mode != VOIDmode && GET_MODE (op) == mode)
7725 return 0;
7726 switch (GET_CODE (op))
7728 case LT:
7729 case LE:
7730 case LTU:
7731 case LEU:
7732 return 1;
7733 default:
7734 return 0;
7738 /* Accept pseudos and branch target registers. */
7740 target_reg_operand (rtx op, enum machine_mode mode)
7742 if (mode != DImode
7743 || GET_MODE (op) != DImode)
7744 return 0;
7746 if (GET_CODE (op) == SUBREG)
7747 op = XEXP (op, 0);
7749 if (GET_CODE (op) != REG)
7750 return 0;
7752 /* We must protect ourselves from matching pseudos that are virtual
7753 register, because they will eventually be replaced with hardware
7754 registers that aren't branch-target registers. */
7755 if (REGNO (op) > LAST_VIRTUAL_REGISTER
7756 || TARGET_REGISTER_P (REGNO (op)))
7757 return 1;
7759 return 0;
7762 /* Same as target_reg_operand, except that label_refs and symbol_refs
7763 are accepted before reload. */
7765 target_operand (rtx op, enum machine_mode mode)
7767 if (mode != DImode)
7768 return 0;
7770 if ((GET_MODE (op) == DImode || GET_MODE (op) == VOIDmode)
7771 && EXTRA_CONSTRAINT_Csy (op))
7772 return ! reload_completed;
7774 return target_reg_operand (op, mode);
7778 mextr_bit_offset (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
7780 HOST_WIDE_INT i;
7782 if (GET_CODE (op) != CONST_INT)
7783 return 0;
7784 i = INTVAL (op);
7785 return i >= 1 * 8 && i <= 7 * 8 && (i & 7) == 0;
7789 extend_reg_operand (rtx op, enum machine_mode mode)
7791 return (GET_CODE (op) == TRUNCATE
7792 ? arith_operand
7793 : arith_reg_operand) (op, mode);
7797 trunc_hi_operand (rtx op, enum machine_mode mode)
7799 enum machine_mode op_mode = GET_MODE (op);
7801 if (op_mode != SImode && op_mode != DImode
7802 && op_mode != V4HImode && op_mode != V2SImode)
7803 return 0;
7804 return extend_reg_operand (op, mode);
7808 extend_reg_or_0_operand (rtx op, enum machine_mode mode)
7810 return (GET_CODE (op) == TRUNCATE
7811 ? arith_operand
7812 : arith_reg_or_0_operand) (op, mode);
7816 general_extend_operand (rtx op, enum machine_mode mode)
7818 return (GET_CODE (op) == TRUNCATE
7819 ? arith_operand
7820 : nonimmediate_operand) (op, mode);
7824 inqhi_operand (rtx op, enum machine_mode mode)
7826 if (GET_CODE (op) != TRUNCATE || mode != GET_MODE (op))
7827 return 0;
7828 op = XEXP (op, 0);
7829 /* Can't use true_regnum here because copy_cost wants to know about
7830 SECONDARY_INPUT_RELOAD_CLASS. */
7831 return GET_CODE (op) == REG && FP_REGISTER_P (REGNO (op));
7835 sh_rep_vec (rtx v, enum machine_mode mode)
7837 int i;
7838 rtx x, y;
7840 if ((GET_CODE (v) != CONST_VECTOR && GET_CODE (v) != PARALLEL)
7841 || (GET_MODE (v) != mode && mode != VOIDmode))
7842 return 0;
7843 i = XVECLEN (v, 0) - 2;
7844 x = XVECEXP (v, 0, i + 1);
7845 if (GET_MODE_UNIT_SIZE (mode) == 1)
7847 y = XVECEXP (v, 0, i);
7848 for (i -= 2; i >= 0; i -= 2)
7849 if (! rtx_equal_p (XVECEXP (v, 0, i + 1), x)
7850 || ! rtx_equal_p (XVECEXP (v, 0, i), y))
7851 return 0;
7853 else
7854 for (; i >= 0; i--)
7855 if (XVECEXP (v, 0, i) != x)
7856 return 0;
7857 return 1;
7860 /* Determine if V is a constant vector matching MODE with only one element
7861 that is not a sign extension. Two byte-sized elements count as one. */
7863 sh_1el_vec (rtx v, enum machine_mode mode)
7865 int unit_size;
7866 int i, last, least, sign_ix;
7867 rtx sign;
7869 if (GET_CODE (v) != CONST_VECTOR
7870 || (GET_MODE (v) != mode && mode != VOIDmode))
7871 return 0;
7872 /* Determine numbers of last and of least significant elements. */
7873 last = XVECLEN (v, 0) - 1;
7874 least = TARGET_LITTLE_ENDIAN ? 0 : last;
7875 if (GET_CODE (XVECEXP (v, 0, least)) != CONST_INT)
7876 return 0;
7877 sign_ix = least;
7878 if (GET_MODE_UNIT_SIZE (mode) == 1)
7879 sign_ix = TARGET_LITTLE_ENDIAN ? 1 : last - 1;
7880 if (GET_CODE (XVECEXP (v, 0, sign_ix)) != CONST_INT)
7881 return 0;
7882 unit_size = GET_MODE_UNIT_SIZE (GET_MODE (v));
7883 sign = (INTVAL (XVECEXP (v, 0, sign_ix)) >> (unit_size * BITS_PER_UNIT - 1)
7884 ? constm1_rtx : const0_rtx);
7885 i = XVECLEN (v, 0) - 1;
7887 if (i != least && i != sign_ix && XVECEXP (v, 0, i) != sign)
7888 return 0;
7889 while (--i);
7890 return 1;
7894 sh_const_vec (rtx v, enum machine_mode mode)
7896 int i;
7898 if (GET_CODE (v) != CONST_VECTOR
7899 || (GET_MODE (v) != mode && mode != VOIDmode))
7900 return 0;
7901 i = XVECLEN (v, 0) - 1;
7902 for (; i >= 0; i--)
7903 if (GET_CODE (XVECEXP (v, 0, i)) != CONST_INT)
7904 return 0;
7905 return 1;
7908 /* Return the destination address of a branch. */
7910 static int
7911 branch_dest (rtx branch)
7913 rtx dest = SET_SRC (PATTERN (branch));
7914 int dest_uid;
7916 if (GET_CODE (dest) == IF_THEN_ELSE)
7917 dest = XEXP (dest, 1);
7918 dest = XEXP (dest, 0);
7919 dest_uid = INSN_UID (dest);
7920 return INSN_ADDRESSES (dest_uid);
7923 /* Return nonzero if REG is not used after INSN.
7924 We assume REG is a reload reg, and therefore does
7925 not live past labels. It may live past calls or jumps though. */
7927 reg_unused_after (rtx reg, rtx insn)
7929 enum rtx_code code;
7930 rtx set;
7932 /* If the reg is set by this instruction, then it is safe for our
7933 case. Disregard the case where this is a store to memory, since
7934 we are checking a register used in the store address. */
7935 set = single_set (insn);
7936 if (set && GET_CODE (SET_DEST (set)) != MEM
7937 && reg_overlap_mentioned_p (reg, SET_DEST (set)))
7938 return 1;
7940 while ((insn = NEXT_INSN (insn)))
7942 rtx set;
7943 if (!INSN_P (insn))
7944 continue;
7946 code = GET_CODE (insn);
7948 #if 0
7949 /* If this is a label that existed before reload, then the register
7950 if dead here. However, if this is a label added by reorg, then
7951 the register may still be live here. We can't tell the difference,
7952 so we just ignore labels completely. */
7953 if (code == CODE_LABEL)
7954 return 1;
7955 /* else */
7956 #endif
7958 if (code == JUMP_INSN)
7959 return 0;
7961 /* If this is a sequence, we must handle them all at once.
7962 We could have for instance a call that sets the target register,
7963 and an insn in a delay slot that uses the register. In this case,
7964 we must return 0. */
7965 else if (code == INSN && GET_CODE (PATTERN (insn)) == SEQUENCE)
7967 int i;
7968 int retval = 0;
7970 for (i = 0; i < XVECLEN (PATTERN (insn), 0); i++)
7972 rtx this_insn = XVECEXP (PATTERN (insn), 0, i);
7973 rtx set = single_set (this_insn);
7975 if (GET_CODE (this_insn) == CALL_INSN)
7976 code = CALL_INSN;
7977 else if (GET_CODE (this_insn) == JUMP_INSN)
7979 if (INSN_ANNULLED_BRANCH_P (this_insn))
7980 return 0;
7981 code = JUMP_INSN;
7984 if (set && reg_overlap_mentioned_p (reg, SET_SRC (set)))
7985 return 0;
7986 if (set && reg_overlap_mentioned_p (reg, SET_DEST (set)))
7988 if (GET_CODE (SET_DEST (set)) != MEM)
7989 retval = 1;
7990 else
7991 return 0;
7993 if (set == 0
7994 && reg_overlap_mentioned_p (reg, PATTERN (this_insn)))
7995 return 0;
7997 if (retval == 1)
7998 return 1;
7999 else if (code == JUMP_INSN)
8000 return 0;
8003 set = single_set (insn);
8004 if (set && reg_overlap_mentioned_p (reg, SET_SRC (set)))
8005 return 0;
8006 if (set && reg_overlap_mentioned_p (reg, SET_DEST (set)))
8007 return GET_CODE (SET_DEST (set)) != MEM;
8008 if (set == 0 && reg_overlap_mentioned_p (reg, PATTERN (insn)))
8009 return 0;
8011 if (code == CALL_INSN && call_really_used_regs[REGNO (reg)])
8012 return 1;
8014 return 1;
8017 #include "ggc.h"
8019 static GTY(()) rtx fpscr_rtx;
8021 get_fpscr_rtx (void)
8023 if (! fpscr_rtx)
8025 fpscr_rtx = gen_rtx_REG (PSImode, FPSCR_REG);
8026 REG_USERVAR_P (fpscr_rtx) = 1;
8027 mark_user_reg (fpscr_rtx);
8029 if (! reload_completed || mdep_reorg_phase != SH_AFTER_MDEP_REORG)
8030 mark_user_reg (fpscr_rtx);
8031 return fpscr_rtx;
8034 void
8035 emit_sf_insn (rtx pat)
8037 emit_insn (pat);
8040 void
8041 emit_df_insn (rtx pat)
8043 emit_insn (pat);
8046 void
8047 expand_sf_unop (rtx (*fun) (rtx, rtx, rtx), rtx *operands)
8049 emit_sf_insn ((*fun) (operands[0], operands[1], get_fpscr_rtx ()));
8052 void
8053 expand_sf_binop (rtx (*fun) (rtx, rtx, rtx, rtx), rtx *operands)
8055 emit_sf_insn ((*fun) (operands[0], operands[1], operands[2],
8056 get_fpscr_rtx ()));
8059 void
8060 expand_df_unop (rtx (*fun) (rtx, rtx, rtx), rtx *operands)
8062 emit_df_insn ((*fun) (operands[0], operands[1], get_fpscr_rtx ()));
8065 void
8066 expand_df_binop (rtx (*fun) (rtx, rtx, rtx, rtx), rtx *operands)
8068 emit_df_insn ((*fun) (operands[0], operands[1], operands[2],
8069 get_fpscr_rtx ()));
8072 /* ??? gcc does flow analysis strictly after common subexpression
8073 elimination. As a result, common subexpression elimination fails
8074 when there are some intervening statements setting the same register.
8075 If we did nothing about this, this would hurt the precision switching
8076 for SH4 badly. There is some cse after reload, but it is unable to
8077 undo the extra register pressure from the unused instructions, and
8078 it cannot remove auto-increment loads.
8080 A C code example that shows this flow/cse weakness for (at least) SH
8081 and sparc (as of gcc ss-970706) is this:
8083 double
8084 f(double a)
8086 double d;
8087 d = 0.1;
8088 a += d;
8089 d = 1.1;
8090 d = 0.1;
8091 a *= d;
8092 return a;
8095 So we add another pass before common subexpression elimination, to
8096 remove assignments that are dead due to a following assignment in the
8097 same basic block. */
8099 static void
8100 mark_use (rtx x, rtx *reg_set_block)
8102 enum rtx_code code;
8104 if (! x)
8105 return;
8106 code = GET_CODE (x);
8107 switch (code)
8109 case REG:
8111 int regno = REGNO (x);
8112 int nregs = (regno < FIRST_PSEUDO_REGISTER
8113 ? HARD_REGNO_NREGS (regno, GET_MODE (x))
8114 : 1);
8117 reg_set_block[regno + nregs - 1] = 0;
8119 while (--nregs);
8120 break;
8122 case SET:
8124 rtx dest = SET_DEST (x);
8126 if (GET_CODE (dest) == SUBREG)
8127 dest = SUBREG_REG (dest);
8128 if (GET_CODE (dest) != REG)
8129 mark_use (dest, reg_set_block);
8130 mark_use (SET_SRC (x), reg_set_block);
8131 break;
8133 case CLOBBER:
8134 break;
8135 default:
8137 const char *fmt = GET_RTX_FORMAT (code);
8138 int i, j;
8139 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
8141 if (fmt[i] == 'e')
8142 mark_use (XEXP (x, i), reg_set_block);
8143 else if (fmt[i] == 'E')
8144 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
8145 mark_use (XVECEXP (x, i, j), reg_set_block);
8147 break;
8152 static rtx get_free_reg (HARD_REG_SET);
8154 /* This function returns a register to use to load the address to load
8155 the fpscr from. Currently it always returns r1 or r7, but when we are
8156 able to use pseudo registers after combine, or have a better mechanism
8157 for choosing a register, it should be done here. */
8158 /* REGS_LIVE is the liveness information for the point for which we
8159 need this allocation. In some bare-bones exit blocks, r1 is live at the
8160 start. We can even have all of r0..r3 being live:
8161 __complex__ long long f (double d) { if (d == 0) return 2; else return 3; }
8162 INSN before which new insns are placed with will clobber the register
8163 we return. If a basic block consists only of setting the return value
8164 register to a pseudo and using that register, the return value is not
8165 live before or after this block, yet we we'll insert our insns right in
8166 the middle. */
8168 static rtx
8169 get_free_reg (HARD_REG_SET regs_live)
8171 if (! TEST_HARD_REG_BIT (regs_live, 1))
8172 return gen_rtx_REG (Pmode, 1);
8174 /* Hard reg 1 is live; since this is a SMALL_REGISTER_CLASSES target,
8175 there shouldn't be anything but a jump before the function end. */
8176 if (! TEST_HARD_REG_BIT (regs_live, 7))
8177 return gen_rtx_REG (Pmode, 7);
8179 abort ();
8182 /* This function will set the fpscr from memory.
8183 MODE is the mode we are setting it to. */
8184 void
8185 fpscr_set_from_mem (int mode, HARD_REG_SET regs_live)
8187 enum attr_fp_mode fp_mode = mode;
8188 rtx addr_reg = get_free_reg (regs_live);
8190 if (fp_mode == (enum attr_fp_mode) ACTUAL_NORMAL_MODE (FP_MODE))
8191 emit_insn (gen_fpu_switch1 (addr_reg));
8192 else
8193 emit_insn (gen_fpu_switch0 (addr_reg));
8196 /* Is the given character a logical line separator for the assembler? */
8197 #ifndef IS_ASM_LOGICAL_LINE_SEPARATOR
8198 #define IS_ASM_LOGICAL_LINE_SEPARATOR(C) ((C) == ';')
8199 #endif
8202 sh_insn_length_adjustment (rtx insn)
8204 /* Instructions with unfilled delay slots take up an extra two bytes for
8205 the nop in the delay slot. */
8206 if (((GET_CODE (insn) == INSN
8207 && GET_CODE (PATTERN (insn)) != USE
8208 && GET_CODE (PATTERN (insn)) != CLOBBER)
8209 || GET_CODE (insn) == CALL_INSN
8210 || (GET_CODE (insn) == JUMP_INSN
8211 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC
8212 && GET_CODE (PATTERN (insn)) != ADDR_VEC))
8213 && GET_CODE (PATTERN (NEXT_INSN (PREV_INSN (insn)))) != SEQUENCE
8214 && get_attr_needs_delay_slot (insn) == NEEDS_DELAY_SLOT_YES)
8215 return 2;
8217 /* SH2e has a bug that prevents the use of annulled branches, so if
8218 the delay slot is not filled, we'll have to put a NOP in it. */
8219 if (sh_cpu == CPU_SH2E
8220 && GET_CODE (insn) == JUMP_INSN
8221 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC
8222 && GET_CODE (PATTERN (insn)) != ADDR_VEC
8223 && get_attr_type (insn) == TYPE_CBRANCH
8224 && GET_CODE (PATTERN (NEXT_INSN (PREV_INSN (insn)))) != SEQUENCE)
8225 return 2;
8227 /* sh-dsp parallel processing insn take four bytes instead of two. */
8229 if (GET_CODE (insn) == INSN)
8231 int sum = 0;
8232 rtx body = PATTERN (insn);
8233 const char *template;
8234 char c;
8235 int maybe_label = 1;
8237 if (GET_CODE (body) == ASM_INPUT)
8238 template = XSTR (body, 0);
8239 else if (asm_noperands (body) >= 0)
8240 template
8241 = decode_asm_operands (body, NULL, NULL, NULL, NULL);
8242 else
8243 return 0;
8246 int ppi_adjust = 0;
8249 c = *template++;
8250 while (c == ' ' || c == '\t');
8251 /* all sh-dsp parallel-processing insns start with p.
8252 The only non-ppi sh insn starting with p is pref.
8253 The only ppi starting with pr is prnd. */
8254 if ((c == 'p' || c == 'P') && strncasecmp ("re", template, 2))
8255 ppi_adjust = 2;
8256 /* The repeat pseudo-insn expands two three insns, a total of
8257 six bytes in size. */
8258 else if ((c == 'r' || c == 'R')
8259 && ! strncasecmp ("epeat", template, 5))
8260 ppi_adjust = 4;
8261 while (c && c != '\n' && ! IS_ASM_LOGICAL_LINE_SEPARATOR (c))
8263 /* If this is a label, it is obviously not a ppi insn. */
8264 if (c == ':' && maybe_label)
8266 ppi_adjust = 0;
8267 break;
8269 else if (c == '\'' || c == '"')
8270 maybe_label = 0;
8271 c = *template++;
8273 sum += ppi_adjust;
8274 maybe_label = c != ':';
8276 while (c);
8277 return sum;
8279 return 0;
8282 /* Return TRUE if X references a SYMBOL_REF or LABEL_REF whose symbol
8283 isn't protected by a PIC unspec. */
8285 nonpic_symbol_mentioned_p (rtx x)
8287 register const char *fmt;
8288 register int i;
8290 if (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF
8291 || GET_CODE (x) == PC)
8292 return 1;
8294 /* We don't want to look into the possible MEM location of a
8295 CONST_DOUBLE, since we're not going to use it, in general. */
8296 if (GET_CODE (x) == CONST_DOUBLE)
8297 return 0;
8299 if (GET_CODE (x) == UNSPEC
8300 && (XINT (x, 1) == UNSPEC_PIC
8301 || XINT (x, 1) == UNSPEC_GOT
8302 || XINT (x, 1) == UNSPEC_GOTOFF
8303 || XINT (x, 1) == UNSPEC_GOTPLT
8304 || XINT (x, 1) == UNSPEC_GOTTPOFF
8305 || XINT (x, 1) == UNSPEC_DTPOFF
8306 || XINT (x, 1) == UNSPEC_PLT))
8307 return 0;
8309 fmt = GET_RTX_FORMAT (GET_CODE (x));
8310 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
8312 if (fmt[i] == 'E')
8314 register int j;
8316 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
8317 if (nonpic_symbol_mentioned_p (XVECEXP (x, i, j)))
8318 return 1;
8320 else if (fmt[i] == 'e' && nonpic_symbol_mentioned_p (XEXP (x, i)))
8321 return 1;
8324 return 0;
8327 /* Convert a non-PIC address in `orig' to a PIC address using @GOT or
8328 @GOTOFF in `reg'. */
8330 legitimize_pic_address (rtx orig, enum machine_mode mode ATTRIBUTE_UNUSED,
8331 rtx reg)
8333 if (tls_symbolic_operand (orig, Pmode))
8334 return orig;
8336 if (GET_CODE (orig) == LABEL_REF
8337 || (GET_CODE (orig) == SYMBOL_REF && SYMBOL_REF_LOCAL_P (orig)))
8339 if (reg == 0)
8340 reg = gen_reg_rtx (Pmode);
8342 emit_insn (gen_symGOTOFF2reg (reg, orig));
8343 return reg;
8345 else if (GET_CODE (orig) == SYMBOL_REF)
8347 if (reg == 0)
8348 reg = gen_reg_rtx (Pmode);
8350 emit_insn (gen_symGOT2reg (reg, orig));
8351 return reg;
8353 return orig;
8356 /* Mark the use of a constant in the literal table. If the constant
8357 has multiple labels, make it unique. */
8358 static rtx
8359 mark_constant_pool_use (rtx x)
8361 rtx insn, lab, pattern;
8363 if (x == NULL)
8364 return x;
8366 switch (GET_CODE (x))
8368 case LABEL_REF:
8369 x = XEXP (x, 0);
8370 case CODE_LABEL:
8371 break;
8372 default:
8373 return x;
8376 /* Get the first label in the list of labels for the same constant
8377 and delete another labels in the list. */
8378 lab = x;
8379 for (insn = PREV_INSN (x); insn; insn = PREV_INSN (insn))
8381 if (GET_CODE (insn) != CODE_LABEL
8382 || LABEL_REFS (insn) != NEXT_INSN (insn))
8383 break;
8384 lab = insn;
8387 for (insn = LABEL_REFS (lab); insn; insn = LABEL_REFS (insn))
8388 INSN_DELETED_P (insn) = 1;
8390 /* Mark constants in a window. */
8391 for (insn = NEXT_INSN (x); insn; insn = NEXT_INSN (insn))
8393 if (GET_CODE (insn) != INSN)
8394 continue;
8396 pattern = PATTERN (insn);
8397 if (GET_CODE (pattern) != UNSPEC_VOLATILE)
8398 continue;
8400 switch (XINT (pattern, 1))
8402 case UNSPECV_CONST2:
8403 case UNSPECV_CONST4:
8404 case UNSPECV_CONST8:
8405 XVECEXP (pattern, 0, 1) = const1_rtx;
8406 break;
8407 case UNSPECV_WINDOW_END:
8408 if (XVECEXP (pattern, 0, 0) == x)
8409 return lab;
8410 break;
8411 case UNSPECV_CONST_END:
8412 return lab;
8413 default:
8414 break;
8418 return lab;
8421 /* Return true if it's possible to redirect BRANCH1 to the destination
8422 of an unconditional jump BRANCH2. We only want to do this if the
8423 resulting branch will have a short displacement. */
8425 sh_can_redirect_branch (rtx branch1, rtx branch2)
8427 if (flag_expensive_optimizations && simplejump_p (branch2))
8429 rtx dest = XEXP (SET_SRC (single_set (branch2)), 0);
8430 rtx insn;
8431 int distance;
8433 for (distance = 0, insn = NEXT_INSN (branch1);
8434 insn && distance < 256;
8435 insn = PREV_INSN (insn))
8437 if (insn == dest)
8438 return 1;
8439 else
8440 distance += get_attr_length (insn);
8442 for (distance = 0, insn = NEXT_INSN (branch1);
8443 insn && distance < 256;
8444 insn = NEXT_INSN (insn))
8446 if (insn == dest)
8447 return 1;
8448 else
8449 distance += get_attr_length (insn);
8452 return 0;
8455 /* Return nonzero if register old_reg can be renamed to register new_reg. */
8457 sh_hard_regno_rename_ok (unsigned int old_reg ATTRIBUTE_UNUSED,
8458 unsigned int new_reg)
8460 /* Interrupt functions can only use registers that have already been
8461 saved by the prologue, even if they would normally be
8462 call-clobbered. */
8464 if (sh_cfun_interrupt_handler_p () && !regs_ever_live[new_reg])
8465 return 0;
8467 return 1;
8470 /* Function to update the integer COST
8471 based on the relationship between INSN that is dependent on
8472 DEP_INSN through the dependence LINK. The default is to make no
8473 adjustment to COST. This can be used for example to specify to
8474 the scheduler that an output- or anti-dependence does not incur
8475 the same cost as a data-dependence. The return value should be
8476 the new value for COST. */
8477 static int
8478 sh_adjust_cost (rtx insn, rtx link ATTRIBUTE_UNUSED, rtx dep_insn, int cost)
8480 rtx reg, use_pat;
8482 if (TARGET_SHMEDIA)
8484 /* On SHmedia, if the dependence is an anti-dependence or
8485 output-dependence, there is no cost. */
8486 if (REG_NOTE_KIND (link) != 0)
8487 cost = 0;
8489 if (get_attr_is_mac_media (insn)
8490 && get_attr_is_mac_media (dep_insn))
8491 cost = 1;
8493 else if (REG_NOTE_KIND (link) == 0)
8495 enum attr_type dep_type, type;
8497 if (recog_memoized (insn) < 0
8498 || recog_memoized (dep_insn) < 0)
8499 return cost;
8501 dep_type = get_attr_type (dep_insn);
8502 if (dep_type == TYPE_FLOAD || dep_type == TYPE_PCFLOAD)
8503 cost--;
8504 if ((dep_type == TYPE_LOAD_SI || dep_type == TYPE_PCLOAD_SI)
8505 && (type = get_attr_type (insn)) != TYPE_CALL
8506 && type != TYPE_SFUNC)
8507 cost--;
8509 /* The only input for a call that is timing-critical is the
8510 function's address. */
8511 if (GET_CODE(insn) == CALL_INSN)
8513 rtx call = PATTERN (insn);
8515 if (GET_CODE (call) == PARALLEL)
8516 call = XVECEXP (call, 0 ,0);
8517 if (GET_CODE (call) == SET)
8518 call = SET_SRC (call);
8519 if (GET_CODE (call) == CALL && GET_CODE (XEXP (call, 0)) == MEM
8520 && ! reg_set_p (XEXP (XEXP (call, 0), 0), dep_insn))
8521 cost = 0;
8523 /* Likewise, the most timing critical input for an sfuncs call
8524 is the function address. However, sfuncs typically start
8525 using their arguments pretty quickly.
8526 Assume a four cycle delay before they are needed. */
8527 /* All sfunc calls are parallels with at least four components.
8528 Exploit this to avoid unnecessary calls to sfunc_uses_reg. */
8529 else if (GET_CODE (PATTERN (insn)) == PARALLEL
8530 && XVECLEN (PATTERN (insn), 0) >= 4
8531 && (reg = sfunc_uses_reg (insn)))
8533 if (! reg_set_p (reg, dep_insn))
8534 cost -= 4;
8536 /* When the preceding instruction loads the shift amount of
8537 the following SHAD/SHLD, the latency of the load is increased
8538 by 1 cycle. */
8539 else if (TARGET_SH4
8540 && get_attr_type (insn) == TYPE_DYN_SHIFT
8541 && get_attr_any_int_load (dep_insn) == ANY_INT_LOAD_YES
8542 && reg_overlap_mentioned_p (SET_DEST (PATTERN (dep_insn)),
8543 XEXP (SET_SRC (single_set (insn)),
8544 1)))
8545 cost++;
8546 /* When an LS group instruction with a latency of less than
8547 3 cycles is followed by a double-precision floating-point
8548 instruction, FIPR, or FTRV, the latency of the first
8549 instruction is increased to 3 cycles. */
8550 else if (cost < 3
8551 && get_attr_insn_class (dep_insn) == INSN_CLASS_LS_GROUP
8552 && get_attr_dfp_comp (insn) == DFP_COMP_YES)
8553 cost = 3;
8554 /* The lsw register of a double-precision computation is ready one
8555 cycle earlier. */
8556 else if (reload_completed
8557 && get_attr_dfp_comp (dep_insn) == DFP_COMP_YES
8558 && (use_pat = single_set (insn))
8559 && ! regno_use_in (REGNO (SET_DEST (single_set (dep_insn))),
8560 SET_SRC (use_pat)))
8561 cost -= 1;
8563 if (get_attr_any_fp_comp (dep_insn) == ANY_FP_COMP_YES
8564 && get_attr_late_fp_use (insn) == LATE_FP_USE_YES)
8565 cost -= 1;
8567 /* An anti-dependence penalty of two applies if the first insn is a double
8568 precision fadd / fsub / fmul. */
8569 else if (REG_NOTE_KIND (link) == REG_DEP_ANTI
8570 && recog_memoized (dep_insn) >= 0
8571 && get_attr_type (dep_insn) == TYPE_DFP_ARITH
8572 /* A lot of alleged anti-flow dependences are fake,
8573 so check this one is real. */
8574 && flow_dependent_p (dep_insn, insn))
8575 cost = 2;
8578 return cost;
8581 /* Check if INSN is flow-dependent on DEP_INSN. Can also be used to check
8582 if DEP_INSN is anti-flow dependent on INSN. */
8583 static int
8584 flow_dependent_p (rtx insn, rtx dep_insn)
8586 rtx tmp = PATTERN (insn);
8588 note_stores (PATTERN (dep_insn), flow_dependent_p_1, &tmp);
8589 return tmp == NULL_RTX;
8592 /* A helper function for flow_dependent_p called through note_stores. */
8593 static void
8594 flow_dependent_p_1 (rtx x, rtx pat ATTRIBUTE_UNUSED, void *data)
8596 rtx * pinsn = (rtx *) data;
8598 if (*pinsn && reg_referenced_p (x, *pinsn))
8599 *pinsn = NULL_RTX;
8602 /* For use by ALLOCATE_INITIAL_VALUE. Note that sh.md contains some
8603 'special function' patterns (type sfunc) that clobber pr, but that
8604 do not look like function calls to leaf_function_p. Hence we must
8605 do this extra check. */
8607 sh_pr_n_sets (void)
8609 return REG_N_SETS (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG);
8612 /* This function returns "2" to indicate dual issue for the SH4
8613 processor. To be used by the DFA pipeline description. */
8614 static int
8615 sh_issue_rate (void)
8617 if (TARGET_SUPERSCALAR)
8618 return 2;
8619 else
8620 return 1;
8623 /* Functions for ready queue reordering for sched1. */
8625 /* Get weight for mode for a set x. */
8626 static short
8627 find_set_regmode_weight (rtx x, enum machine_mode mode)
8629 if (GET_CODE (x) == CLOBBER && register_operand (SET_DEST (x), mode))
8630 return 1;
8631 if (GET_CODE (x) == SET && register_operand (SET_DEST (x), mode))
8633 if (GET_CODE (SET_DEST (x)) == REG)
8635 if (!reg_mentioned_p (SET_DEST (x), SET_SRC (x)))
8636 return 1;
8637 else
8638 return 0;
8640 return 1;
8642 return 0;
8645 /* Get regmode weight for insn. */
8646 static short
8647 find_insn_regmode_weight (rtx insn, enum machine_mode mode)
8649 short reg_weight = 0;
8650 rtx x;
8652 /* Increment weight for each register born here. */
8653 x = PATTERN (insn);
8654 reg_weight += find_set_regmode_weight (x, mode);
8655 if (GET_CODE (x) == PARALLEL)
8657 int j;
8658 for (j = XVECLEN (x, 0) - 1; j >= 0; j--)
8660 x = XVECEXP (PATTERN (insn), 0, j);
8661 reg_weight += find_set_regmode_weight (x, mode);
8664 /* Decrement weight for each register that dies here. */
8665 for (x = REG_NOTES (insn); x; x = XEXP (x, 1))
8667 if (REG_NOTE_KIND (x) == REG_DEAD || REG_NOTE_KIND (x) == REG_UNUSED)
8669 rtx note = XEXP (x, 0);
8670 if (GET_CODE (note) == REG && GET_MODE (note) == mode)
8671 reg_weight--;
8674 return reg_weight;
8677 /* Calculate regmode weights for all insns of a basic block. */
8678 static void
8679 find_regmode_weight (int b, enum machine_mode mode)
8681 rtx insn, next_tail, head, tail;
8683 get_block_head_tail (b, &head, &tail);
8684 next_tail = NEXT_INSN (tail);
8686 for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
8688 /* Handle register life information. */
8689 if (!INSN_P (insn))
8690 continue;
8692 if (mode == SFmode)
8693 INSN_REGMODE_WEIGHT (insn, mode) =
8694 find_insn_regmode_weight (insn, mode) + 2 * find_insn_regmode_weight (insn, DFmode);
8695 else if (mode == SImode)
8696 INSN_REGMODE_WEIGHT (insn, mode) =
8697 find_insn_regmode_weight (insn, mode) + 2 * find_insn_regmode_weight (insn, DImode);
8701 /* Comparison function for ready queue sorting. */
8702 static int
8703 rank_for_reorder (const void *x, const void *y)
8705 rtx tmp = *(const rtx *) y;
8706 rtx tmp2 = *(const rtx *) x;
8708 /* The insn in a schedule group should be issued the first. */
8709 if (SCHED_GROUP_P (tmp) != SCHED_GROUP_P (tmp2))
8710 return SCHED_GROUP_P (tmp2) ? 1 : -1;
8712 /* If insns are equally good, sort by INSN_LUID (original insn order), This
8713 minimizes instruction movement, thus minimizing sched's effect on
8714 register pressure. */
8715 return INSN_LUID (tmp) - INSN_LUID (tmp2);
8718 /* Resort the array A in which only element at index N may be out of order. */
8719 static void
8720 swap_reorder (rtx *a, int n)
8722 rtx insn = a[n - 1];
8723 int i = n - 2;
8725 while (i >= 0 && rank_for_reorder (a + i, &insn) >= 0)
8727 a[i + 1] = a[i];
8728 i -= 1;
8730 a[i + 1] = insn;
8733 #define SCHED_REORDER(READY, N_READY) \
8734 do \
8736 if ((N_READY) == 2) \
8737 swap_reorder (READY, N_READY); \
8738 else if ((N_READY) > 2) \
8739 qsort (READY, N_READY, sizeof (rtx), rank_for_reorder); \
8741 while (0)
8743 /* Sort the ready list READY by ascending priority, using the SCHED_REORDER
8744 macro. */
8745 static void
8746 ready_reorder (rtx *ready, int nready)
8748 SCHED_REORDER (ready, nready);
8751 /* Calculate regmode weights for all insns of all basic block. */
8752 static void
8753 sh_md_init_global (FILE *dump ATTRIBUTE_UNUSED,
8754 int verbose ATTRIBUTE_UNUSED,
8755 int old_max_uid)
8757 basic_block b;
8759 regmode_weight[0] = (short *) xcalloc (old_max_uid, sizeof (short));
8760 regmode_weight[1] = (short *) xcalloc (old_max_uid, sizeof (short));
8762 FOR_EACH_BB_REVERSE (b)
8764 find_regmode_weight (b->index, SImode);
8765 find_regmode_weight (b->index, SFmode);
8768 CURR_REGMODE_PRESSURE (SImode) = 0;
8769 CURR_REGMODE_PRESSURE (SFmode) = 0;
8773 /* Cleanup. */
8774 static void
8775 sh_md_finish_global (FILE *dump ATTRIBUTE_UNUSED,
8776 int verbose ATTRIBUTE_UNUSED)
8778 if (regmode_weight[0])
8780 free (regmode_weight[0]);
8781 regmode_weight[0] = NULL;
8783 if (regmode_weight[1])
8785 free (regmode_weight[1]);
8786 regmode_weight[1] = NULL;
8790 /* Cache the can_issue_more so that we can return it from reorder2. Also,
8791 keep count of register pressures on SImode and SFmode. */
8792 static int
8793 sh_variable_issue (FILE *dump ATTRIBUTE_UNUSED,
8794 int sched_verbose ATTRIBUTE_UNUSED,
8795 rtx insn,
8796 int can_issue_more)
8798 if (GET_CODE (PATTERN (insn)) != USE
8799 && GET_CODE (PATTERN (insn)) != CLOBBER)
8800 cached_can_issue_more = can_issue_more - 1;
8801 else
8802 cached_can_issue_more = can_issue_more;
8804 if (reload_completed)
8805 return cached_can_issue_more;
8807 CURR_REGMODE_PRESSURE (SImode) += INSN_REGMODE_WEIGHT (insn, SImode);
8808 CURR_REGMODE_PRESSURE (SFmode) += INSN_REGMODE_WEIGHT (insn, SFmode);
8810 return cached_can_issue_more;
8813 static void
8814 sh_md_init (FILE *dump ATTRIBUTE_UNUSED,
8815 int verbose ATTRIBUTE_UNUSED,
8816 int veclen ATTRIBUTE_UNUSED)
8818 CURR_REGMODE_PRESSURE (SImode) = 0;
8819 CURR_REGMODE_PRESSURE (SFmode) = 0;
8822 /* Some magic numbers. */
8823 /* Pressure on register r0 can lead to spill failures. so avoid sched1 for
8824 functions that already have high pressure on r0. */
8825 #define R0_MAX_LIFE_REGIONS 2
8826 #define R0_MAX_LIVE_LENGTH 12
8827 /* Register Pressure thresholds for SImode and SFmode registers. */
8828 #define SIMODE_MAX_WEIGHT 5
8829 #define SFMODE_MAX_WEIGHT 10
8831 /* Return true if the pressure is high for MODE. */
8832 static short
8833 high_pressure (enum machine_mode mode)
8835 /* Pressure on register r0 can lead to spill failures. so avoid sched1 for
8836 functions that already have high pressure on r0. */
8837 if ((REG_N_SETS (0) - REG_N_DEATHS (0)) >= R0_MAX_LIFE_REGIONS
8838 && REG_LIVE_LENGTH (0) >= R0_MAX_LIVE_LENGTH)
8839 return 1;
8841 if (mode == SFmode)
8842 return (CURR_REGMODE_PRESSURE (SFmode) > SFMODE_MAX_WEIGHT);
8843 else
8844 return (CURR_REGMODE_PRESSURE (SImode) > SIMODE_MAX_WEIGHT);
8847 /* Reorder ready queue if register pressure is high. */
8848 static int
8849 sh_reorder (FILE *dump ATTRIBUTE_UNUSED,
8850 int sched_verbose ATTRIBUTE_UNUSED,
8851 rtx *ready,
8852 int *n_readyp,
8853 int clock_var ATTRIBUTE_UNUSED)
8855 if (reload_completed)
8856 return sh_issue_rate ();
8858 if (high_pressure (SFmode) || high_pressure (SImode))
8860 ready_reorder (ready, *n_readyp);
8863 return sh_issue_rate ();
8866 /* Skip cycles if the current register pressure is high. */
8867 static int
8868 sh_reorder2 (FILE *dump ATTRIBUTE_UNUSED,
8869 int sched_verbose ATTRIBUTE_UNUSED,
8870 rtx *ready ATTRIBUTE_UNUSED,
8871 int *n_readyp ATTRIBUTE_UNUSED,
8872 int clock_var ATTRIBUTE_UNUSED)
8874 if (reload_completed)
8875 return cached_can_issue_more;
8877 if (high_pressure(SFmode) || high_pressure (SImode))
8878 skip_cycles = 1;
8880 return cached_can_issue_more;
8883 /* Skip cycles without sorting the ready queue. This will move insn from
8884 Q->R. If this is the last cycle we are skipping; allow sorting of ready
8885 queue by sh_reorder. */
8887 /* Generally, skipping these many cycles are sufficient for all insns to move
8888 from Q -> R. */
8889 #define MAX_SKIPS 8
8891 static int
8892 sh_dfa_new_cycle (FILE *sched_dump ATTRIBUTE_UNUSED,
8893 int sched_verbose ATTRIBUTE_UNUSED,
8894 rtx insn ATTRIBUTE_UNUSED,
8895 int last_clock_var,
8896 int clock_var,
8897 int *sort_p)
8899 if (reload_completed)
8900 return 0;
8902 if (skip_cycles)
8904 if ((clock_var - last_clock_var) < MAX_SKIPS)
8906 *sort_p = 0;
8907 return 1;
8909 /* If this is the last cycle we are skipping, allow reordering of R. */
8910 if ((clock_var - last_clock_var) == MAX_SKIPS)
8912 *sort_p = 1;
8913 return 1;
8917 skip_cycles = 0;
8919 return 0;
8922 /* SHmedia requires registers for branches, so we can't generate new
8923 branches past reload. */
8924 static bool
8925 sh_cannot_modify_jumps_p (void)
8927 return (TARGET_SHMEDIA && (reload_in_progress || reload_completed));
8930 static int
8931 sh_target_reg_class (void)
8933 return TARGET_SHMEDIA ? TARGET_REGS : NO_REGS;
8936 static bool
8937 sh_optimize_target_register_callee_saved (bool after_prologue_epilogue_gen)
8939 return (shmedia_space_reserved_for_target_registers
8940 && (! after_prologue_epilogue_gen || TARGET_SAVE_ALL_TARGET_REGS));
8943 static bool
8944 sh_ms_bitfield_layout_p (tree record_type ATTRIBUTE_UNUSED)
8946 return (TARGET_SH5 || TARGET_HITACHI || sh_attr_renesas_p (record_type));
8950 On the SH1..SH4, the trampoline looks like
8951 2 0002 D202 mov.l l2,r2
8952 1 0000 D301 mov.l l1,r3
8953 3 0004 422B jmp @r2
8954 4 0006 0009 nop
8955 5 0008 00000000 l1: .long area
8956 6 000c 00000000 l2: .long function
8958 SH5 (compact) uses r1 instead of r3 for the static chain. */
8961 /* Emit RTL insns to initialize the variable parts of a trampoline.
8962 FNADDR is an RTX for the address of the function's pure code.
8963 CXT is an RTX for the static chain value for the function. */
8965 void
8966 sh_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
8968 if (TARGET_SHMEDIA64)
8970 rtx tramp_templ;
8971 int fixed_len;
8973 rtx movi1 = GEN_INT (0xcc000010);
8974 rtx shori1 = GEN_INT (0xc8000010);
8975 rtx src, dst;
8977 /* The following trampoline works within a +- 128 KB range for cxt:
8978 ptb/u cxt,tr1; movi fnaddr >> 48,r0; shori fnaddr >> 32,r0;
8979 shori fnaddr >> 16,r0; shori fnaddr,r0; ptabs/l r0,tr0
8980 gettr tr1,r1; blink tr0,r63 */
8981 /* Address rounding makes it hard to compute the exact bounds of the
8982 offset for this trampoline, but we have a rather generous offset
8983 range, so frame_offset should do fine as an upper bound. */
8984 if (cxt == virtual_stack_vars_rtx && frame_offset < 0x20000)
8986 /* ??? could optimize this trampoline initialization
8987 by writing DImode words with two insns each. */
8988 rtx mask = force_reg (DImode, GEN_INT (0x3fffc00));
8989 rtx insn = gen_rtx_MINUS (DImode, cxt, tramp);
8990 insn = gen_rtx_ASHIFT (DImode, insn, GEN_INT (10-2));
8991 insn = gen_rtx_AND (DImode, insn, mask);
8992 /* Or in ptb/u .,tr1 pattern */
8993 insn = gen_rtx_IOR (DImode, insn, gen_int_mode (0xec000010, SImode));
8994 insn = force_operand (insn, NULL_RTX);
8995 insn = gen_lowpart (SImode, insn);
8996 emit_move_insn (gen_rtx_MEM (SImode, tramp), insn);
8997 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (38));
8998 insn = gen_rtx_AND (DImode, insn, mask);
8999 insn = force_operand (gen_rtx_IOR (DImode, movi1, insn), NULL_RTX);
9000 insn = gen_lowpart (SImode, insn);
9001 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 4)), insn);
9002 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (22));
9003 insn = gen_rtx_AND (DImode, insn, mask);
9004 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
9005 insn = gen_lowpart (SImode, insn);
9006 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 8)), insn);
9007 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (6));
9008 insn = gen_rtx_AND (DImode, insn, mask);
9009 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
9010 insn = gen_lowpart (SImode, insn);
9011 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 12)),
9012 insn);
9013 insn = gen_rtx_ASHIFT (DImode, fnaddr, GEN_INT (10));
9014 insn = gen_rtx_AND (DImode, insn, mask);
9015 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
9016 insn = gen_lowpart (SImode, insn);
9017 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 16)),
9018 insn);
9019 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 20)),
9020 GEN_INT (0x6bf10600));
9021 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 24)),
9022 GEN_INT (0x4415fc10));
9023 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 28)),
9024 GEN_INT (0x4401fff0));
9025 emit_insn (gen_ic_invalidate_line (tramp));
9026 return;
9028 tramp_templ = gen_rtx_SYMBOL_REF (Pmode,"__GCC_nested_trampoline");
9029 fixed_len = TRAMPOLINE_SIZE - 2 * GET_MODE_SIZE (Pmode);
9031 tramp_templ = gen_datalabel_ref (tramp_templ);
9032 dst = gen_rtx_MEM (BLKmode, tramp);
9033 src = gen_rtx_MEM (BLKmode, tramp_templ);
9034 set_mem_align (dst, 256);
9035 set_mem_align (src, 64);
9036 emit_block_move (dst, src, GEN_INT (fixed_len), BLOCK_OP_NORMAL);
9038 emit_move_insn (gen_rtx_MEM (Pmode, plus_constant (tramp, fixed_len)),
9039 fnaddr);
9040 emit_move_insn (gen_rtx_MEM (Pmode,
9041 plus_constant (tramp,
9042 fixed_len
9043 + GET_MODE_SIZE (Pmode))),
9044 cxt);
9045 emit_insn (gen_ic_invalidate_line (tramp));
9046 return;
9048 else if (TARGET_SHMEDIA)
9050 /* movi fnaddr >> 16,r1; shori fnaddr,r1; ptabs/l r1,tr0
9051 movi cxt >> 16,r1; shori cxt,r1; blink tr0,r63 */
9052 rtx quad0 = gen_reg_rtx (DImode), cxtload = gen_reg_rtx (DImode);
9053 rtx quad1 = gen_reg_rtx (DImode), quad2 = gen_reg_rtx (DImode);
9054 /* movi 0,r1: 0xcc000010 shori 0,r1: c8000010 concatenated,
9055 rotated 10 right, and higher 16 bit of every 32 selected. */
9056 rtx movishori
9057 = force_reg (V2HImode, (simplify_gen_subreg
9058 (V2HImode, GEN_INT (0x4330432), SImode, 0)));
9059 rtx ptabs = force_reg (DImode, GEN_INT (0x6bf10600));
9060 rtx blink = force_reg (DImode, GEN_INT (0x4401fff0));
9062 tramp = force_reg (Pmode, tramp);
9063 fnaddr = force_reg (SImode, fnaddr);
9064 cxt = force_reg (SImode, cxt);
9065 emit_insn (gen_mshflo_w_x (gen_rtx_SUBREG (V4HImode, quad0, 0),
9066 gen_rtx_SUBREG (V2HImode, fnaddr, 0),
9067 movishori));
9068 emit_insn (gen_rotrdi3_mextr (quad0, quad0,
9069 GEN_INT (TARGET_LITTLE_ENDIAN ? 24 : 56)));
9070 emit_insn (gen_ashldi3_media (quad0, quad0, const2_rtx));
9071 emit_move_insn (gen_rtx_MEM (DImode, tramp), quad0);
9072 emit_insn (gen_mshflo_w_x (gen_rtx_SUBREG (V4HImode, cxtload, 0),
9073 gen_rtx_SUBREG (V2HImode, cxt, 0),
9074 movishori));
9075 emit_insn (gen_rotrdi3_mextr (cxtload, cxtload,
9076 GEN_INT (TARGET_LITTLE_ENDIAN ? 24 : 56)));
9077 emit_insn (gen_ashldi3_media (cxtload, cxtload, const2_rtx));
9078 if (TARGET_LITTLE_ENDIAN)
9080 emit_insn (gen_mshflo_l_di (quad1, ptabs, cxtload));
9081 emit_insn (gen_mextr4 (quad2, cxtload, blink));
9083 else
9085 emit_insn (gen_mextr4 (quad1, cxtload, ptabs));
9086 emit_insn (gen_mshflo_l_di (quad2, blink, cxtload));
9088 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, 8)), quad1);
9089 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, 16)), quad2);
9090 emit_insn (gen_ic_invalidate_line (tramp));
9091 return;
9093 else if (TARGET_SHCOMPACT)
9095 emit_insn (gen_initialize_trampoline (tramp, cxt, fnaddr));
9096 return;
9098 emit_move_insn (gen_rtx_MEM (SImode, tramp),
9099 gen_int_mode (TARGET_LITTLE_ENDIAN ? 0xd301d202 : 0xd202d301,
9100 SImode));
9101 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 4)),
9102 gen_int_mode (TARGET_LITTLE_ENDIAN ? 0x0009422b : 0x422b0009,
9103 SImode));
9104 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 8)),
9105 cxt);
9106 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 12)),
9107 fnaddr);
9108 if (TARGET_HARVARD)
9110 if (TARGET_USERMODE)
9111 emit_library_call (function_symbol ("__ic_invalidate"),
9112 0, VOIDmode, 1, tramp, SImode);
9113 else
9114 emit_insn (gen_ic_invalidate_line (tramp));
9118 /* FIXME: This is overly conservative. A SHcompact function that
9119 receives arguments ``by reference'' will have them stored in its
9120 own stack frame, so it must not pass pointers or references to
9121 these arguments to other functions by means of sibling calls. */
9122 static bool
9123 sh_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
9125 return (decl
9126 && (! TARGET_SHCOMPACT
9127 || current_function_args_info.stack_regs == 0)
9128 && ! sh_cfun_interrupt_handler_p ());
9131 /* Machine specific built-in functions. */
9133 struct builtin_description
9135 const enum insn_code icode;
9136 const char *const name;
9137 int signature;
9140 /* describe number and signedness of arguments; arg[0] == result
9141 (1: unsigned, 2: signed, 4: don't care, 8: pointer 0: no argument */
9142 static const char signature_args[][4] =
9144 #define SH_BLTIN_V2SI2 0
9145 { 4, 4 },
9146 #define SH_BLTIN_V4HI2 1
9147 { 4, 4 },
9148 #define SH_BLTIN_V2SI3 2
9149 { 4, 4, 4 },
9150 #define SH_BLTIN_V4HI3 3
9151 { 4, 4, 4 },
9152 #define SH_BLTIN_V8QI3 4
9153 { 4, 4, 4 },
9154 #define SH_BLTIN_MAC_HISI 5
9155 { 1, 4, 4, 1 },
9156 #define SH_BLTIN_SH_HI 6
9157 { 4, 4, 1 },
9158 #define SH_BLTIN_SH_SI 7
9159 { 4, 4, 1 },
9160 #define SH_BLTIN_V4HI2V2SI 8
9161 { 4, 4, 4 },
9162 #define SH_BLTIN_V4HI2V8QI 9
9163 { 4, 4, 4 },
9164 #define SH_BLTIN_SISF 10
9165 { 4, 2 },
9166 #define SH_BLTIN_LDUA_L 11
9167 { 2, 8 },
9168 #define SH_BLTIN_LDUA_Q 12
9169 { 1, 8 },
9170 #define SH_BLTIN_STUA_L 13
9171 { 0, 8, 2 },
9172 #define SH_BLTIN_STUA_Q 14
9173 { 0, 8, 1 },
9174 #define SH_BLTIN_UDI 15
9175 { 0, 8, 1 },
9176 #define SH_BLTIN_NUM_SHARED_SIGNATURES 16
9177 #define SH_BLTIN_2 16
9178 #define SH_BLTIN_SU 16
9179 { 1, 2 },
9180 #define SH_BLTIN_3 17
9181 #define SH_BLTIN_SUS 17
9182 { 2, 2, 1 },
9183 #define SH_BLTIN_PSSV 18
9184 { 0, 8, 2, 2 },
9185 #define SH_BLTIN_XXUU 19
9186 #define SH_BLTIN_UUUU 19
9187 { 1, 1, 1, 1 },
9188 #define SH_BLTIN_PV 20
9189 { 0, 8 },
9191 /* mcmv: operands considered unsigned. */
9192 /* mmulsum_wq, msad_ubq: result considered unsigned long long. */
9193 /* mperm: control value considered unsigned int. */
9194 /* mshalds, mshard, mshards, mshlld, mshlrd: shift count is unsigned int. */
9195 /* mshards_q: returns signed short. */
9196 /* nsb: takes long long arg, returns unsigned char. */
9197 static const struct builtin_description bdesc[] =
9199 { CODE_FOR_absv2si2, "__builtin_absv2si2", SH_BLTIN_V2SI2 },
9200 { CODE_FOR_absv4hi2, "__builtin_absv4hi2", SH_BLTIN_V4HI2 },
9201 { CODE_FOR_addv2si3, "__builtin_addv2si3", SH_BLTIN_V2SI3 },
9202 { CODE_FOR_addv4hi3, "__builtin_addv4hi3", SH_BLTIN_V4HI3 },
9203 { CODE_FOR_ssaddv2si3,"__builtin_ssaddv2si3", SH_BLTIN_V2SI3 },
9204 { CODE_FOR_usaddv8qi3,"__builtin_usaddv8qi3", SH_BLTIN_V8QI3 },
9205 { CODE_FOR_ssaddv4hi3,"__builtin_ssaddv4hi3", SH_BLTIN_V4HI3 },
9206 #if 0
9207 { CODE_FOR_alloco32, "__builtin_sh_media_ALLOCO", SH_BLTIN_PV },
9208 { CODE_FOR_alloco64, "__builtin_sh_media_ALLOCO", SH_BLTIN_PV },
9209 #endif
9210 { CODE_FOR_negcmpeqv8qi,"__builtin_sh_media_MCMPEQ_B", SH_BLTIN_V8QI3 },
9211 { CODE_FOR_negcmpeqv2si,"__builtin_sh_media_MCMPEQ_L", SH_BLTIN_V2SI3 },
9212 { CODE_FOR_negcmpeqv4hi,"__builtin_sh_media_MCMPEQ_W", SH_BLTIN_V4HI3 },
9213 { CODE_FOR_negcmpgtuv8qi,"__builtin_sh_media_MCMPGT_UB", SH_BLTIN_V8QI3 },
9214 { CODE_FOR_negcmpgtv2si,"__builtin_sh_media_MCMPGT_L", SH_BLTIN_V2SI3 },
9215 { CODE_FOR_negcmpgtv4hi,"__builtin_sh_media_MCMPGT_W", SH_BLTIN_V4HI3 },
9216 { CODE_FOR_mcmv, "__builtin_sh_media_MCMV", SH_BLTIN_UUUU },
9217 { CODE_FOR_mcnvs_lw, "__builtin_sh_media_MCNVS_LW", SH_BLTIN_3 },
9218 { CODE_FOR_mcnvs_wb, "__builtin_sh_media_MCNVS_WB", SH_BLTIN_V4HI2V8QI },
9219 { CODE_FOR_mcnvs_wub, "__builtin_sh_media_MCNVS_WUB", SH_BLTIN_V4HI2V8QI },
9220 { CODE_FOR_mextr1, "__builtin_sh_media_MEXTR1", SH_BLTIN_UDI },
9221 { CODE_FOR_mextr2, "__builtin_sh_media_MEXTR2", SH_BLTIN_UDI },
9222 { CODE_FOR_mextr3, "__builtin_sh_media_MEXTR3", SH_BLTIN_UDI },
9223 { CODE_FOR_mextr4, "__builtin_sh_media_MEXTR4", SH_BLTIN_UDI },
9224 { CODE_FOR_mextr5, "__builtin_sh_media_MEXTR5", SH_BLTIN_UDI },
9225 { CODE_FOR_mextr6, "__builtin_sh_media_MEXTR6", SH_BLTIN_UDI },
9226 { CODE_FOR_mextr7, "__builtin_sh_media_MEXTR7", SH_BLTIN_UDI },
9227 { CODE_FOR_mmacfx_wl, "__builtin_sh_media_MMACFX_WL", SH_BLTIN_MAC_HISI },
9228 { CODE_FOR_mmacnfx_wl,"__builtin_sh_media_MMACNFX_WL", SH_BLTIN_MAC_HISI },
9229 { CODE_FOR_mulv2si3, "__builtin_mulv2si3", SH_BLTIN_V2SI3, },
9230 { CODE_FOR_mulv4hi3, "__builtin_mulv4hi3", SH_BLTIN_V4HI3 },
9231 { CODE_FOR_mmulfx_l, "__builtin_sh_media_MMULFX_L", SH_BLTIN_V2SI3 },
9232 { CODE_FOR_mmulfx_w, "__builtin_sh_media_MMULFX_W", SH_BLTIN_V4HI3 },
9233 { CODE_FOR_mmulfxrp_w,"__builtin_sh_media_MMULFXRP_W", SH_BLTIN_V4HI3 },
9234 { CODE_FOR_mmulhi_wl, "__builtin_sh_media_MMULHI_WL", SH_BLTIN_V4HI2V2SI },
9235 { CODE_FOR_mmullo_wl, "__builtin_sh_media_MMULLO_WL", SH_BLTIN_V4HI2V2SI },
9236 { CODE_FOR_mmulsum_wq,"__builtin_sh_media_MMULSUM_WQ", SH_BLTIN_XXUU },
9237 { CODE_FOR_mperm_w, "__builtin_sh_media_MPERM_W", SH_BLTIN_SH_HI },
9238 { CODE_FOR_msad_ubq, "__builtin_sh_media_MSAD_UBQ", SH_BLTIN_XXUU },
9239 { CODE_FOR_mshalds_l, "__builtin_sh_media_MSHALDS_L", SH_BLTIN_SH_SI },
9240 { CODE_FOR_mshalds_w, "__builtin_sh_media_MSHALDS_W", SH_BLTIN_SH_HI },
9241 { CODE_FOR_ashrv2si3, "__builtin_ashrv2si3", SH_BLTIN_SH_SI },
9242 { CODE_FOR_ashrv4hi3, "__builtin_ashrv4hi3", SH_BLTIN_SH_HI },
9243 { CODE_FOR_mshards_q, "__builtin_sh_media_MSHARDS_Q", SH_BLTIN_SUS },
9244 { CODE_FOR_mshfhi_b, "__builtin_sh_media_MSHFHI_B", SH_BLTIN_V8QI3 },
9245 { CODE_FOR_mshfhi_l, "__builtin_sh_media_MSHFHI_L", SH_BLTIN_V2SI3 },
9246 { CODE_FOR_mshfhi_w, "__builtin_sh_media_MSHFHI_W", SH_BLTIN_V4HI3 },
9247 { CODE_FOR_mshflo_b, "__builtin_sh_media_MSHFLO_B", SH_BLTIN_V8QI3 },
9248 { CODE_FOR_mshflo_l, "__builtin_sh_media_MSHFLO_L", SH_BLTIN_V2SI3 },
9249 { CODE_FOR_mshflo_w, "__builtin_sh_media_MSHFLO_W", SH_BLTIN_V4HI3 },
9250 { CODE_FOR_ashlv2si3, "__builtin_ashlv2si3", SH_BLTIN_SH_SI },
9251 { CODE_FOR_ashlv4hi3, "__builtin_ashlv4hi3", SH_BLTIN_SH_HI },
9252 { CODE_FOR_lshrv2si3, "__builtin_lshrv2si3", SH_BLTIN_SH_SI },
9253 { CODE_FOR_lshrv4hi3, "__builtin_lshrv4hi3", SH_BLTIN_SH_HI },
9254 { CODE_FOR_subv2si3, "__builtin_subv2si3", SH_BLTIN_V2SI3 },
9255 { CODE_FOR_subv4hi3, "__builtin_subv4hi3", SH_BLTIN_V4HI3 },
9256 { CODE_FOR_sssubv2si3,"__builtin_sssubv2si3", SH_BLTIN_V2SI3 },
9257 { CODE_FOR_ussubv8qi3,"__builtin_ussubv8qi3", SH_BLTIN_V8QI3 },
9258 { CODE_FOR_sssubv4hi3,"__builtin_sssubv4hi3", SH_BLTIN_V4HI3 },
9259 { CODE_FOR_fcosa_s, "__builtin_sh_media_FCOSA_S", SH_BLTIN_SISF },
9260 { CODE_FOR_fsina_s, "__builtin_sh_media_FSINA_S", SH_BLTIN_SISF },
9261 { CODE_FOR_fipr, "__builtin_sh_media_FIPR_S", SH_BLTIN_3 },
9262 { CODE_FOR_ftrv, "__builtin_sh_media_FTRV_S", SH_BLTIN_3 },
9263 { CODE_FOR_fsrra_s, "__builtin_sh_media_FSRRA_S", SH_BLTIN_2 },
9264 #if 0
9265 { CODE_FOR_ldhi_l, "__builtin_sh_media_LDHI_L", SH_BLTIN_LDUA_L },
9266 { CODE_FOR_ldhi_q, "__builtin_sh_media_LDHI_Q", SH_BLTIN_LDUA_Q },
9267 { CODE_FOR_ldlo_l, "__builtin_sh_media_LDLO_L", SH_BLTIN_LDUA_L },
9268 { CODE_FOR_ldlo_q, "__builtin_sh_media_LDLO_Q", SH_BLTIN_LDUA_Q },
9269 { CODE_FOR_sthi_l, "__builtin_sh_media_STHI_L", SH_BLTIN_STUA_L },
9270 { CODE_FOR_sthi_q, "__builtin_sh_media_STHI_Q", SH_BLTIN_STUA_Q },
9271 { CODE_FOR_stlo_l, "__builtin_sh_media_STLO_L", SH_BLTIN_STUA_L },
9272 { CODE_FOR_stlo_q, "__builtin_sh_media_STLO_Q", SH_BLTIN_STUA_Q },
9273 { CODE_FOR_ldhi_l64, "__builtin_sh_media_LDHI_L", SH_BLTIN_LDUA_L },
9274 { CODE_FOR_ldhi_q64, "__builtin_sh_media_LDHI_Q", SH_BLTIN_LDUA_Q },
9275 { CODE_FOR_ldlo_l64, "__builtin_sh_media_LDLO_L", SH_BLTIN_LDUA_L },
9276 { CODE_FOR_ldlo_q64, "__builtin_sh_media_LDLO_Q", SH_BLTIN_LDUA_Q },
9277 { CODE_FOR_sthi_l64, "__builtin_sh_media_STHI_L", SH_BLTIN_STUA_L },
9278 { CODE_FOR_sthi_q64, "__builtin_sh_media_STHI_Q", SH_BLTIN_STUA_Q },
9279 { CODE_FOR_stlo_l64, "__builtin_sh_media_STLO_L", SH_BLTIN_STUA_L },
9280 { CODE_FOR_stlo_q64, "__builtin_sh_media_STLO_Q", SH_BLTIN_STUA_Q },
9281 #endif
9282 { CODE_FOR_nsb, "__builtin_sh_media_NSB", SH_BLTIN_SU },
9283 { CODE_FOR_byterev, "__builtin_sh_media_BYTEREV", SH_BLTIN_2 },
9284 #if 0
9285 { CODE_FOR_prefetch32,"__builtin_sh_media_PREFO", SH_BLTIN_PSSV },
9286 { CODE_FOR_prefetch64,"__builtin_sh_media_PREFO", SH_BLTIN_PSSV }
9287 #endif
9290 static void
9291 sh_media_init_builtins (void)
9293 tree shared[SH_BLTIN_NUM_SHARED_SIGNATURES];
9294 const struct builtin_description *d;
9296 memset (shared, 0, sizeof shared);
9297 for (d = bdesc; d - bdesc < (int) ARRAY_SIZE (bdesc); d++)
9299 tree type, arg_type;
9300 int signature = d->signature;
9301 int i;
9303 if (signature < SH_BLTIN_NUM_SHARED_SIGNATURES && shared[signature])
9304 type = shared[signature];
9305 else
9307 int has_result = signature_args[signature][0] != 0;
9309 if (signature_args[signature][1] == 8
9310 && (insn_data[d->icode].operand[has_result].mode != Pmode))
9311 continue;
9312 if (! TARGET_FPU_ANY
9313 && FLOAT_MODE_P (insn_data[d->icode].operand[0].mode))
9314 continue;
9315 type = void_list_node;
9316 for (i = 3; ; i--)
9318 int arg = signature_args[signature][i];
9319 int opno = i - 1 + has_result;
9321 if (arg == 8)
9322 arg_type = ptr_type_node;
9323 else if (arg)
9324 arg_type = ((*lang_hooks.types.type_for_mode)
9325 (insn_data[d->icode].operand[opno].mode,
9326 (arg & 1)));
9327 else if (i)
9328 continue;
9329 else
9330 arg_type = void_type_node;
9331 if (i == 0)
9332 break;
9333 type = tree_cons (NULL_TREE, arg_type, type);
9335 type = build_function_type (arg_type, type);
9336 if (signature < SH_BLTIN_NUM_SHARED_SIGNATURES)
9337 shared[signature] = type;
9339 lang_hooks.builtin_function (d->name, type, d - bdesc, BUILT_IN_MD,
9340 NULL, NULL_TREE);
9344 /* Implements target hook vector_mode_supported_p. */
9345 bool
9346 sh_vector_mode_supported_p (enum machine_mode mode)
9348 if (TARGET_FPU_ANY
9349 && ((mode == V2SFmode)
9350 || (mode == V4SFmode)
9351 || (mode == V16SFmode)))
9352 return true;
9354 else if (TARGET_SHMEDIA
9355 && ((mode == V8QImode)
9356 || (mode == V2HImode)
9357 || (mode == V4HImode)
9358 || (mode == V2SImode)))
9359 return true;
9361 return false;
9364 static void
9365 sh_init_builtins (void)
9367 if (TARGET_SHMEDIA)
9368 sh_media_init_builtins ();
9371 /* Expand an expression EXP that calls a built-in function,
9372 with result going to TARGET if that's convenient
9373 (and in mode MODE if that's convenient).
9374 SUBTARGET may be used as the target for computing one of EXP's operands.
9375 IGNORE is nonzero if the value is to be ignored. */
9377 static rtx
9378 sh_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
9379 enum machine_mode mode ATTRIBUTE_UNUSED, int ignore)
9381 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
9382 tree arglist = TREE_OPERAND (exp, 1);
9383 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
9384 const struct builtin_description *d = &bdesc[fcode];
9385 enum insn_code icode = d->icode;
9386 int signature = d->signature;
9387 enum machine_mode tmode = VOIDmode;
9388 int nop = 0, i;
9389 rtx op[4];
9390 rtx pat;
9392 if (signature_args[signature][0])
9394 if (ignore)
9395 return 0;
9397 tmode = insn_data[icode].operand[0].mode;
9398 if (! target
9399 || GET_MODE (target) != tmode
9400 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
9401 target = gen_reg_rtx (tmode);
9402 op[nop++] = target;
9404 else
9405 target = 0;
9407 for (i = 1; i <= 3; i++, nop++)
9409 tree arg;
9410 enum machine_mode opmode, argmode;
9412 if (! signature_args[signature][i])
9413 break;
9414 arg = TREE_VALUE (arglist);
9415 if (arg == error_mark_node)
9416 return const0_rtx;
9417 arglist = TREE_CHAIN (arglist);
9418 opmode = insn_data[icode].operand[nop].mode;
9419 argmode = TYPE_MODE (TREE_TYPE (arg));
9420 if (argmode != opmode)
9421 arg = build1 (NOP_EXPR,
9422 (*lang_hooks.types.type_for_mode) (opmode, 0), arg);
9423 op[nop] = expand_expr (arg, NULL_RTX, opmode, 0);
9424 if (! (*insn_data[icode].operand[nop].predicate) (op[nop], opmode))
9425 op[nop] = copy_to_mode_reg (opmode, op[nop]);
9428 switch (nop)
9430 case 1:
9431 pat = (*insn_data[d->icode].genfun) (op[0]);
9432 break;
9433 case 2:
9434 pat = (*insn_data[d->icode].genfun) (op[0], op[1]);
9435 break;
9436 case 3:
9437 pat = (*insn_data[d->icode].genfun) (op[0], op[1], op[2]);
9438 break;
9439 case 4:
9440 pat = (*insn_data[d->icode].genfun) (op[0], op[1], op[2], op[3]);
9441 break;
9442 default:
9443 abort ();
9445 if (! pat)
9446 return 0;
9447 emit_insn (pat);
9448 return target;
9451 void
9452 sh_expand_unop_v2sf (enum rtx_code code, rtx op0, rtx op1)
9454 rtx sel0 = const0_rtx;
9455 rtx sel1 = const1_rtx;
9456 rtx (*fn) (rtx, rtx, rtx, rtx, rtx) = gen_unary_sf_op;
9457 rtx op = gen_rtx_fmt_e (code, SFmode, op1);
9459 emit_insn ((*fn) (op0, op1, op, sel0, sel0));
9460 emit_insn ((*fn) (op0, op1, op, sel1, sel1));
9463 void
9464 sh_expand_binop_v2sf (enum rtx_code code, rtx op0, rtx op1, rtx op2)
9466 rtx sel0 = const0_rtx;
9467 rtx sel1 = const1_rtx;
9468 rtx (*fn) (rtx, rtx, rtx, rtx, rtx, rtx, rtx, rtx)
9469 = gen_binary_sf_op;
9470 rtx op = gen_rtx_fmt_ee (code, SFmode, op1, op2);
9472 emit_insn ((*fn) (op0, op1, op2, op, sel0, sel0, sel0, sel1));
9473 emit_insn ((*fn) (op0, op1, op2, op, sel1, sel1, sel1, sel0));
9476 /* Return the class of registers for which a mode change from FROM to TO
9477 is invalid. */
9478 bool
9479 sh_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
9480 enum reg_class class)
9482 /* We want to enable the use of SUBREGs as a means to
9483 VEC_SELECT a single element of a vector. */
9484 if (to == SFmode && VECTOR_MODE_P (from) && GET_MODE_INNER (from) == SFmode)
9485 return (reg_classes_intersect_p (GENERAL_REGS, class));
9487 if (GET_MODE_SIZE (from) != GET_MODE_SIZE (to))
9489 if (TARGET_LITTLE_ENDIAN)
9491 if (GET_MODE_SIZE (to) < 8 || GET_MODE_SIZE (from) < 8)
9492 return reg_classes_intersect_p (DF_REGS, class);
9494 else
9496 if (GET_MODE_SIZE (from) < 8)
9497 return reg_classes_intersect_p (DF_HI_REGS, class);
9500 return 0;
9504 /* If ADDRESS refers to a CODE_LABEL, add NUSES to the number of times
9505 that label is used. */
9507 void
9508 sh_mark_label (rtx address, int nuses)
9510 if (GOTOFF_P (address))
9512 /* Extract the label or symbol. */
9513 address = XEXP (address, 0);
9514 if (GET_CODE (address) == PLUS)
9515 address = XEXP (address, 0);
9516 address = XVECEXP (address, 0, 0);
9518 if (GET_CODE (address) == LABEL_REF
9519 && GET_CODE (XEXP (address, 0)) == CODE_LABEL)
9520 LABEL_NUSES (XEXP (address, 0)) += nuses;
9523 /* Compute extra cost of moving data between one register class
9524 and another. */
9526 /* If SECONDARY*_RELOAD_CLASS says something about the src/dst pair, regclass
9527 uses this information. Hence, the general register <-> floating point
9528 register information here is not used for SFmode. */
9531 sh_register_move_cost (enum machine_mode mode,
9532 enum reg_class srcclass, enum reg_class dstclass)
9534 if (dstclass == T_REGS || dstclass == PR_REGS)
9535 return 10;
9537 if (dstclass == MAC_REGS && srcclass == MAC_REGS)
9538 return 4;
9540 if (mode == SImode && ! TARGET_SHMEDIA && TARGET_FMOVD
9541 && REGCLASS_HAS_FP_REG (srcclass)
9542 && REGCLASS_HAS_FP_REG (dstclass))
9543 return 4;
9545 if ((REGCLASS_HAS_FP_REG (dstclass) && srcclass == MAC_REGS)
9546 || (dstclass == MAC_REGS && REGCLASS_HAS_FP_REG (srcclass)))
9547 return 9;
9549 if ((REGCLASS_HAS_FP_REG (dstclass)
9550 && REGCLASS_HAS_GENERAL_REG (srcclass))
9551 || (REGCLASS_HAS_GENERAL_REG (dstclass)
9552 && REGCLASS_HAS_FP_REG (srcclass)))
9553 return ((TARGET_SHMEDIA ? 4 : TARGET_FMOVD ? 8 : 12)
9554 * ((GET_MODE_SIZE (mode) + 7) / 8U));
9556 if ((dstclass == FPUL_REGS
9557 && REGCLASS_HAS_GENERAL_REG (srcclass))
9558 || (srcclass == FPUL_REGS
9559 && REGCLASS_HAS_GENERAL_REG (dstclass)))
9560 return 5;
9562 if ((dstclass == FPUL_REGS
9563 && (srcclass == PR_REGS || srcclass == MAC_REGS || srcclass == T_REGS))
9564 || (srcclass == FPUL_REGS
9565 && (dstclass == PR_REGS || dstclass == MAC_REGS)))
9566 return 7;
9568 if ((srcclass == TARGET_REGS && ! REGCLASS_HAS_GENERAL_REG (dstclass))
9569 || ((dstclass) == TARGET_REGS && ! REGCLASS_HAS_GENERAL_REG (srcclass)))
9570 return 20;
9572 if ((srcclass == FPSCR_REGS && ! REGCLASS_HAS_GENERAL_REG (dstclass))
9573 || (dstclass == FPSCR_REGS && ! REGCLASS_HAS_GENERAL_REG (srcclass)))
9574 return 4;
9576 if (TARGET_SHMEDIA
9577 || (TARGET_FMOVD
9578 && ! REGCLASS_HAS_GENERAL_REG (srcclass)
9579 && ! REGCLASS_HAS_GENERAL_REG (dstclass)))
9580 return 2 * ((GET_MODE_SIZE (mode) + 7) / 8U);
9582 return 2 * ((GET_MODE_SIZE (mode) + 3) / 4U);
9585 /* Like register_operand, but take into account that SHMEDIA can use
9586 the constant zero like a general register. */
9588 sh_register_operand (rtx op, enum machine_mode mode)
9590 if (op == CONST0_RTX (mode) && TARGET_SHMEDIA)
9591 return 1;
9592 return register_operand (op, mode);
9596 cmpsi_operand (rtx op, enum machine_mode mode)
9598 if (GET_CODE (op) == REG && REGNO (op) == T_REG
9599 && GET_MODE (op) == SImode)
9600 return 1;
9601 return arith_operand (op, mode);
9604 static rtx emit_load_ptr (rtx, rtx);
9606 static rtx
9607 emit_load_ptr (rtx reg, rtx addr)
9609 rtx mem = gen_rtx_MEM (ptr_mode, addr);
9611 if (Pmode != ptr_mode)
9612 mem = gen_rtx_SIGN_EXTEND (Pmode, mem);
9613 return emit_move_insn (reg, mem);
9616 void
9617 sh_output_mi_thunk (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED,
9618 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
9619 tree function)
9621 CUMULATIVE_ARGS cum;
9622 int structure_value_byref = 0;
9623 rtx this, this_value, sibcall, insns, funexp;
9624 tree funtype = TREE_TYPE (function);
9625 int simple_add = CONST_OK_FOR_ADD (delta);
9626 int did_load = 0;
9627 rtx scratch0, scratch1, scratch2;
9629 reload_completed = 1;
9630 epilogue_completed = 1;
9631 no_new_pseudos = 1;
9632 current_function_uses_only_leaf_regs = 1;
9633 reset_block_changes ();
9635 emit_note (NOTE_INSN_PROLOGUE_END);
9637 /* Find the "this" pointer. We have such a wide range of ABIs for the
9638 SH that it's best to do this completely machine independently.
9639 "this" is passed as first argument, unless a structure return pointer
9640 comes first, in which case "this" comes second. */
9641 INIT_CUMULATIVE_ARGS (cum, funtype, NULL_RTX, 0, 1);
9642 #ifndef PCC_STATIC_STRUCT_RETURN
9643 if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
9644 structure_value_byref = 1;
9645 #endif /* not PCC_STATIC_STRUCT_RETURN */
9646 if (structure_value_byref && sh_struct_value_rtx (function, 0) == 0)
9648 tree ptype = build_pointer_type (TREE_TYPE (funtype));
9650 FUNCTION_ARG_ADVANCE (cum, Pmode, ptype, 1);
9652 this = FUNCTION_ARG (cum, Pmode, ptr_type_node, 1);
9654 /* For SHcompact, we only have r0 for a scratch register: r1 is the
9655 static chain pointer (even if you can't have nested virtual functions
9656 right now, someone might implement them sometime), and the rest of the
9657 registers are used for argument passing, are callee-saved, or reserved. */
9658 scratch0 = scratch1 = scratch2 = gen_rtx_REG (Pmode, 0);
9659 if (! TARGET_SH5)
9661 scratch1 = gen_rtx_REG (ptr_mode, 1);
9662 /* N.B., if not TARGET_HITACHI, register 2 is used to pass the pointer
9663 pointing where to return struct values. */
9664 scratch2 = gen_rtx_REG (Pmode, 3);
9666 else if (TARGET_SHMEDIA)
9668 scratch1 = gen_rtx_REG (ptr_mode, 21);
9669 scratch2 = gen_rtx_REG (Pmode, TR0_REG);
9672 this_value = plus_constant (this, delta);
9673 if (vcall_offset
9674 && (simple_add || scratch0 != scratch1)
9675 && strict_memory_address_p (ptr_mode, this_value))
9677 emit_load_ptr (scratch0, this_value);
9678 did_load = 1;
9681 if (!delta)
9682 ; /* Do nothing. */
9683 else if (simple_add)
9684 emit_move_insn (this, this_value);
9685 else
9687 emit_move_insn (scratch1, GEN_INT (delta));
9688 emit_insn (gen_add2_insn (this, scratch1));
9691 if (vcall_offset)
9693 rtx offset_addr;
9695 if (!did_load)
9696 emit_load_ptr (scratch0, this);
9698 offset_addr = plus_constant (scratch0, vcall_offset);
9699 if (strict_memory_address_p (ptr_mode, offset_addr))
9700 ; /* Do nothing. */
9701 else if (! TARGET_SH5)
9703 /* scratch0 != scratch1, and we have indexed loads. Get better
9704 schedule by loading the offset into r1 and using an indexed
9705 load - then the load of r1 can issue before the load from
9706 (this + delta) finishes. */
9707 emit_move_insn (scratch1, GEN_INT (vcall_offset));
9708 offset_addr = gen_rtx_PLUS (Pmode, scratch0, scratch1);
9710 else if (CONST_OK_FOR_ADD (vcall_offset))
9712 emit_insn (gen_add2_insn (scratch0, GEN_INT (vcall_offset)));
9713 offset_addr = scratch0;
9715 else if (scratch0 != scratch1)
9717 emit_move_insn (scratch1, GEN_INT (vcall_offset));
9718 emit_insn (gen_add2_insn (scratch0, scratch1));
9719 offset_addr = scratch0;
9721 else
9722 abort (); /* FIXME */
9723 emit_load_ptr (scratch0, offset_addr);
9725 if (Pmode != ptr_mode)
9726 scratch0 = gen_rtx_TRUNCATE (ptr_mode, scratch0);
9727 emit_insn (gen_add2_insn (this, scratch0));
9730 /* Generate a tail call to the target function. */
9731 if (! TREE_USED (function))
9733 assemble_external (function);
9734 TREE_USED (function) = 1;
9736 funexp = XEXP (DECL_RTL (function), 0);
9737 emit_move_insn (scratch2, funexp);
9738 funexp = gen_rtx_MEM (FUNCTION_MODE, scratch2);
9739 sibcall = emit_call_insn (gen_sibcall (funexp, const0_rtx, NULL_RTX));
9740 SIBLING_CALL_P (sibcall) = 1;
9741 use_reg (&CALL_INSN_FUNCTION_USAGE (sibcall), this);
9742 emit_barrier ();
9744 /* Run just enough of rest_of_compilation to do scheduling and get
9745 the insns emitted. Note that use_thunk calls
9746 assemble_start_function and assemble_end_function. */
9748 insn_locators_initialize ();
9749 insns = get_insns ();
9751 if (optimize > 0 && flag_schedule_insns_after_reload)
9753 if (! basic_block_info)
9754 init_flow ();
9755 rtl_register_cfg_hooks ();
9756 find_basic_blocks (insns, max_reg_num (), dump_file);
9757 life_analysis (dump_file, PROP_FINAL);
9759 split_all_insns (1);
9761 schedule_insns (dump_file);
9764 sh_reorg ();
9766 if (optimize > 0 && flag_delayed_branch)
9767 dbr_schedule (insns, dump_file);
9768 shorten_branches (insns);
9769 final_start_function (insns, file, 1);
9770 final (insns, file, 1, 0);
9771 final_end_function ();
9773 if (optimize > 0 && flag_schedule_insns_after_reload)
9775 /* Release all memory allocated by flow. */
9776 free_basic_block_vars ();
9778 /* Release all memory held by regsets now. */
9779 regset_release_memory ();
9782 reload_completed = 0;
9783 epilogue_completed = 0;
9784 no_new_pseudos = 0;
9788 function_symbol (const char *name)
9790 rtx sym = gen_rtx_SYMBOL_REF (Pmode, name);
9791 SYMBOL_REF_FLAGS (sym) = SYMBOL_FLAG_FUNCTION;
9792 return sym;
9795 /* Find the number of a general purpose register in S. */
9796 static int
9797 scavenge_reg (HARD_REG_SET *s)
9799 int r;
9800 for (r = FIRST_GENERAL_REG; r <= LAST_GENERAL_REG; r++)
9801 if (TEST_HARD_REG_BIT (*s, r))
9802 return r;
9803 return -1;
9807 sh_get_pr_initial_val (void)
9809 rtx val;
9811 /* ??? Unfortunately, get_hard_reg_initial_val doesn't always work for the
9812 PR register on SHcompact, because it might be clobbered by the prologue.
9813 We check first if that is known to be the case. */
9814 if (TARGET_SHCOMPACT
9815 && ((current_function_args_info.call_cookie
9816 & ~ CALL_COOKIE_RET_TRAMP (1))
9817 || current_function_has_nonlocal_label))
9818 return gen_rtx_MEM (SImode, return_address_pointer_rtx);
9820 /* If we haven't finished rtl generation, there might be a nonlocal label
9821 that we haven't seen yet.
9822 ??? get_hard_reg_initial_val fails if it is called while no_new_pseudos
9823 is set, unless it has been called before for the same register. And even
9824 then, we end in trouble if we didn't use the register in the same
9825 basic block before. So call get_hard_reg_initial_val now and wrap it
9826 in an unspec if we might need to replace it. */
9827 /* ??? We also must do this for TARGET_SH1 in general, because otherwise
9828 combine can put the pseudo returned by get_hard_reg_initial_val into
9829 instructions that need a general purpose registers, which will fail to
9830 be recognized when the pseudo becomes allocated to PR. */
9832 = get_hard_reg_initial_val (Pmode, TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG);
9833 if (TARGET_SH1)
9834 return gen_rtx_UNSPEC (SImode, gen_rtvec (1, val), UNSPEC_RA);
9835 return val;
9839 sh_expand_t_scc (enum rtx_code code, rtx target)
9841 rtx result = target;
9842 HOST_WIDE_INT val;
9844 if (GET_CODE (sh_compare_op0) != REG || REGNO (sh_compare_op0) != T_REG
9845 || GET_CODE (sh_compare_op1) != CONST_INT)
9846 return 0;
9847 if (GET_CODE (result) != REG)
9848 result = gen_reg_rtx (SImode);
9849 val = INTVAL (sh_compare_op1);
9850 if ((code == EQ && val == 1) || (code == NE && val == 0))
9851 emit_insn (gen_movt (result));
9852 else if ((code == EQ && val == 0) || (code == NE && val == 1))
9854 emit_insn (gen_rtx_CLOBBER (VOIDmode, result));
9855 emit_insn (gen_subc (result, result, result));
9856 emit_insn (gen_addsi3 (result, result, const1_rtx));
9858 else if (code == EQ || code == NE)
9859 emit_insn (gen_move_insn (result, GEN_INT (code == NE)));
9860 else
9861 return 0;
9862 if (result != target)
9863 emit_move_insn (target, result);
9864 return 1;
9867 /* INSN is an sfunc; return the rtx that describes the address used. */
9868 static rtx
9869 extract_sfunc_addr (rtx insn)
9871 rtx pattern, part = NULL_RTX;
9872 int len, i;
9874 pattern = PATTERN (insn);
9875 len = XVECLEN (pattern, 0);
9876 for (i = 0; i < len; i++)
9878 part = XVECEXP (pattern, 0, i);
9879 if (GET_CODE (part) == USE && GET_MODE (XEXP (part, 0)) == Pmode
9880 && GENERAL_REGISTER_P (true_regnum (XEXP (part, 0))))
9881 return XEXP (part, 0);
9883 if (GET_CODE (XVECEXP (pattern, 0, 0)) == UNSPEC_VOLATILE)
9884 return XVECEXP (XVECEXP (pattern, 0, 0), 0, 1);
9885 abort ();
9888 /* Verify that the register in use_sfunc_addr still agrees with the address
9889 used in the sfunc. This prevents fill_slots_from_thread from changing
9890 use_sfunc_addr.
9891 INSN is the use_sfunc_addr instruction, and REG is the register it
9892 guards. */
9894 check_use_sfunc_addr (rtx insn, rtx reg)
9896 /* Search for the sfunc. It should really come right after INSN. */
9897 while ((insn = NEXT_INSN (insn)))
9899 if (GET_CODE (insn) == CODE_LABEL || GET_CODE (insn) == JUMP_INSN)
9900 break;
9901 if (! INSN_P (insn))
9902 continue;
9904 if (GET_CODE (PATTERN (insn)) == SEQUENCE)
9905 insn = XVECEXP (PATTERN (insn), 0, 0);
9906 if (GET_CODE (PATTERN (insn)) != PARALLEL
9907 || get_attr_type (insn) != TYPE_SFUNC)
9908 continue;
9909 return rtx_equal_p (extract_sfunc_addr (insn), reg);
9911 abort ();
9914 /* Returns 1 if OP is a MEM that can be source of a simple move operation. */
9917 unaligned_load_operand (rtx op, enum machine_mode mode)
9919 rtx inside;
9921 if (GET_CODE (op) != MEM || GET_MODE (op) != mode)
9922 return 0;
9924 inside = XEXP (op, 0);
9926 if (GET_CODE (inside) == POST_INC)
9927 inside = XEXP (inside, 0);
9929 if (GET_CODE (inside) == REG)
9930 return 1;
9932 return 0;
9935 /* This function returns a constant rtx that represents pi / 2**15 in
9936 SFmode. it's used to scale SFmode angles, in radians, to a
9937 fixed-point signed 16.16-bit fraction of a full circle, i.e., 2*pi
9938 maps to 0x10000). */
9940 static GTY(()) rtx sh_fsca_sf2int_rtx;
9943 sh_fsca_sf2int (void)
9945 if (! sh_fsca_sf2int_rtx)
9947 REAL_VALUE_TYPE rv;
9949 real_from_string (&rv, "10430.378350470453");
9950 sh_fsca_sf2int_rtx = const_double_from_real_value (rv, SFmode);
9953 return sh_fsca_sf2int_rtx;
9956 /* This function returns a constant rtx that represents pi / 2**15 in
9957 DFmode. it's used to scale DFmode angles, in radians, to a
9958 fixed-point signed 16.16-bit fraction of a full circle, i.e., 2*pi
9959 maps to 0x10000). */
9961 static GTY(()) rtx sh_fsca_df2int_rtx;
9964 sh_fsca_df2int (void)
9966 if (! sh_fsca_df2int_rtx)
9968 REAL_VALUE_TYPE rv;
9970 real_from_string (&rv, "10430.378350470453");
9971 sh_fsca_df2int_rtx = const_double_from_real_value (rv, DFmode);
9974 return sh_fsca_df2int_rtx;
9977 /* This function returns a constant rtx that represents 2**15 / pi in
9978 SFmode. it's used to scale a fixed-point signed 16.16-bit fraction
9979 of a full circle back to a SFmode value, i.e., 0x10000 maps to
9980 2*pi). */
9982 static GTY(()) rtx sh_fsca_int2sf_rtx;
9985 sh_fsca_int2sf (void)
9987 if (! sh_fsca_int2sf_rtx)
9989 REAL_VALUE_TYPE rv;
9991 real_from_string (&rv, "9.587379924285257e-5");
9992 sh_fsca_int2sf_rtx = const_double_from_real_value (rv, SFmode);
9995 return sh_fsca_int2sf_rtx;
9997 #include "gt-sh.h"