* Merge with edge-vector-mergepoint-20040918.
[official-gcc.git] / gcc / config / sh / sh.c
blobce4ee857a89a2f49d298a45c337926be94cebc6b
1 /* Output routines for GCC for Renesas / SuperH SH.
2 Copyright (C) 1993, 1994, 1995, 1997, 1997, 1998, 1999, 2000, 2001, 2002,
3 2003, 2004 Free Software Foundation, Inc.
4 Contributed by Steve Chamberlain (sac@cygnus.com).
5 Improved by Jim Wilson (wilson@cygnus.com).
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 2, or (at your option)
12 any later version.
14 GCC is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING. If not, write to
21 the Free Software Foundation, 59 Temple Place - Suite 330,
22 Boston, MA 02111-1307, USA. */
24 #include "config.h"
25 #include "system.h"
26 #include "coretypes.h"
27 #include "tm.h"
28 #include "insn-config.h"
29 #include "rtl.h"
30 #include "tree.h"
31 #include "flags.h"
32 #include "expr.h"
33 #include "optabs.h"
34 #include "function.h"
35 #include "regs.h"
36 #include "hard-reg-set.h"
37 #include "output.h"
38 #include "insn-attr.h"
39 #include "toplev.h"
40 #include "recog.h"
41 #include "c-pragma.h"
42 #include "integrate.h"
43 #include "tm_p.h"
44 #include "target.h"
45 #include "target-def.h"
46 #include "real.h"
47 #include "langhooks.h"
48 #include "basic-block.h"
49 #include "ra.h"
50 #include "cfglayout.h"
51 #include "intl.h"
52 #include "sched-int.h"
53 #include "ggc.h"
54 #include "tree-gimple.h"
57 int code_for_indirect_jump_scratch = CODE_FOR_indirect_jump_scratch;
59 #define MSW (TARGET_LITTLE_ENDIAN ? 1 : 0)
60 #define LSW (TARGET_LITTLE_ENDIAN ? 0 : 1)
62 /* These are some macros to abstract register modes. */
63 #define CONST_OK_FOR_ADD(size) \
64 (TARGET_SHMEDIA ? CONST_OK_FOR_I10 (size) : CONST_OK_FOR_I08 (size))
65 #define GEN_MOV (*(TARGET_SHMEDIA64 ? gen_movdi : gen_movsi))
66 #define GEN_ADD3 (*(TARGET_SHMEDIA64 ? gen_adddi3 : gen_addsi3))
67 #define GEN_SUB3 (*(TARGET_SHMEDIA64 ? gen_subdi3 : gen_subsi3))
69 /* Set to 1 by expand_prologue() when the function is an interrupt handler. */
70 int current_function_interrupt;
72 /* ??? The pragma interrupt support will not work for SH3. */
73 /* This is set by #pragma interrupt and #pragma trapa, and causes gcc to
74 output code for the next function appropriate for an interrupt handler. */
75 int pragma_interrupt;
77 /* This is set by the trap_exit attribute for functions. It specifies
78 a trap number to be used in a trapa instruction at function exit
79 (instead of an rte instruction). */
80 int trap_exit;
82 /* This is used by the sp_switch attribute for functions. It specifies
83 a variable holding the address of the stack the interrupt function
84 should switch to/from at entry/exit. */
85 rtx sp_switch;
87 /* This is set by #pragma trapa, and is similar to the above, except that
88 the compiler doesn't emit code to preserve all registers. */
89 static int pragma_trapa;
91 /* This is set by #pragma nosave_low_regs. This is useful on the SH3,
92 which has a separate set of low regs for User and Supervisor modes.
93 This should only be used for the lowest level of interrupts. Higher levels
94 of interrupts must save the registers in case they themselves are
95 interrupted. */
96 int pragma_nosave_low_regs;
98 /* This is used for communication between TARGET_SETUP_INCOMING_VARARGS and
99 sh_expand_prologue. */
100 int current_function_anonymous_args;
102 /* Global variables for machine-dependent things. */
104 /* Which cpu are we scheduling for. */
105 enum processor_type sh_cpu;
107 /* Definitions used in ready queue reordering for first scheduling pass. */
109 /* Reg weights arrays for modes SFmode and SImode, indexed by insn LUID. */
110 static short *regmode_weight[2];
112 /* Total SFmode and SImode weights of scheduled insns. */
113 static int curr_regmode_pressure[2];
115 /* If true, skip cycles for Q -> R movement. */
116 static int skip_cycles = 0;
118 /* Cached value of can_issue_more. This is cached in sh_variable_issue hook
119 and returned from sh_reorder2. */
120 static short cached_can_issue_more;
122 /* Saved operands from the last compare to use when we generate an scc
123 or bcc insn. */
125 rtx sh_compare_op0;
126 rtx sh_compare_op1;
128 /* Provides the class number of the smallest class containing
129 reg number. */
131 enum reg_class regno_reg_class[FIRST_PSEUDO_REGISTER] =
133 R0_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
134 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
135 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
136 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
137 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
138 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
139 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
140 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
141 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
142 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
143 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
144 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
145 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
146 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
147 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
148 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
149 FP0_REGS,FP_REGS, FP_REGS, FP_REGS,
150 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
151 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
152 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
153 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
154 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
155 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
156 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
157 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
158 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
159 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
160 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
161 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
162 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
163 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
164 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
165 TARGET_REGS, TARGET_REGS, TARGET_REGS, TARGET_REGS,
166 TARGET_REGS, TARGET_REGS, TARGET_REGS, TARGET_REGS,
167 DF_REGS, DF_REGS, DF_REGS, DF_REGS,
168 DF_REGS, DF_REGS, DF_REGS, DF_REGS,
169 NO_REGS, GENERAL_REGS, PR_REGS, T_REGS,
170 MAC_REGS, MAC_REGS, FPUL_REGS, FPSCR_REGS,
171 GENERAL_REGS,
174 char sh_register_names[FIRST_PSEUDO_REGISTER] \
175 [MAX_REGISTER_NAME_LENGTH + 1] = SH_REGISTER_NAMES_INITIALIZER;
177 char sh_additional_register_names[ADDREGNAMES_SIZE] \
178 [MAX_ADDITIONAL_REGISTER_NAME_LENGTH + 1]
179 = SH_ADDITIONAL_REGISTER_NAMES_INITIALIZER;
181 /* Provide reg_class from a letter such as appears in the machine
182 description. *: target independently reserved letter.
183 reg_class_from_letter['e' - 'a'] is set to NO_REGS for TARGET_FMOVD. */
185 enum reg_class reg_class_from_letter[] =
187 /* a */ ALL_REGS, /* b */ TARGET_REGS, /* c */ FPSCR_REGS, /* d */ DF_REGS,
188 /* e */ FP_REGS, /* f */ FP_REGS, /* g **/ NO_REGS, /* h */ NO_REGS,
189 /* i **/ NO_REGS, /* j */ NO_REGS, /* k */ SIBCALL_REGS, /* l */ PR_REGS,
190 /* m **/ NO_REGS, /* n **/ NO_REGS, /* o **/ NO_REGS, /* p **/ NO_REGS,
191 /* q */ NO_REGS, /* r **/ NO_REGS, /* s **/ NO_REGS, /* t */ T_REGS,
192 /* u */ NO_REGS, /* v */ NO_REGS, /* w */ FP0_REGS, /* x */ MAC_REGS,
193 /* y */ FPUL_REGS, /* z */ R0_REGS
196 int assembler_dialect;
198 static bool shmedia_space_reserved_for_target_registers;
200 static void split_branches (rtx);
201 static int branch_dest (rtx);
202 static void force_into (rtx, rtx);
203 static void print_slot (rtx);
204 static rtx add_constant (rtx, enum machine_mode, rtx);
205 static void dump_table (rtx, rtx);
206 static int hi_const (rtx);
207 static int broken_move (rtx);
208 static int mova_p (rtx);
209 static rtx find_barrier (int, rtx, rtx);
210 static int noncall_uses_reg (rtx, rtx, rtx *);
211 static rtx gen_block_redirect (rtx, int, int);
212 static void sh_reorg (void);
213 static void output_stack_adjust (int, rtx, int, HARD_REG_SET *);
214 static rtx frame_insn (rtx);
215 static rtx push (int);
216 static void pop (int);
217 static void push_regs (HARD_REG_SET *, int);
218 static int calc_live_regs (HARD_REG_SET *);
219 static void mark_use (rtx, rtx *);
220 static HOST_WIDE_INT rounded_frame_size (int);
221 static rtx mark_constant_pool_use (rtx);
222 const struct attribute_spec sh_attribute_table[];
223 static tree sh_handle_interrupt_handler_attribute (tree *, tree, tree, int, bool *);
224 static tree sh_handle_sp_switch_attribute (tree *, tree, tree, int, bool *);
225 static tree sh_handle_trap_exit_attribute (tree *, tree, tree, int, bool *);
226 static tree sh_handle_renesas_attribute (tree *, tree, tree, int, bool *);
227 static void sh_output_function_epilogue (FILE *, HOST_WIDE_INT);
228 static void sh_insert_attributes (tree, tree *);
229 static int sh_adjust_cost (rtx, rtx, rtx, int);
230 static int sh_issue_rate (void);
231 static int sh_dfa_new_cycle (FILE *, int, rtx, int, int, int *sort_p);
232 static short find_set_regmode_weight (rtx, enum machine_mode);
233 static short find_insn_regmode_weight (rtx, enum machine_mode);
234 static void find_regmode_weight (int, enum machine_mode);
235 static void sh_md_init_global (FILE *, int, int);
236 static void sh_md_finish_global (FILE *, int);
237 static int rank_for_reorder (const void *, const void *);
238 static void swap_reorder (rtx *, int);
239 static void ready_reorder (rtx *, int);
240 static short high_pressure (enum machine_mode);
241 static int sh_reorder (FILE *, int, rtx *, int *, int);
242 static int sh_reorder2 (FILE *, int, rtx *, int *, int);
243 static void sh_md_init (FILE *, int, int);
244 static int sh_variable_issue (FILE *, int, rtx, int);
246 static bool sh_function_ok_for_sibcall (tree, tree);
248 static bool sh_cannot_modify_jumps_p (void);
249 static int sh_target_reg_class (void);
250 static bool sh_optimize_target_register_callee_saved (bool);
251 static bool sh_ms_bitfield_layout_p (tree);
253 static void sh_init_builtins (void);
254 static void sh_media_init_builtins (void);
255 static rtx sh_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
256 static void sh_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT, tree);
257 static void sh_file_start (void);
258 static int flow_dependent_p (rtx, rtx);
259 static void flow_dependent_p_1 (rtx, rtx, void *);
260 static int shiftcosts (rtx);
261 static int andcosts (rtx);
262 static int addsubcosts (rtx);
263 static int multcosts (rtx);
264 static bool unspec_caller_rtx_p (rtx);
265 static bool sh_cannot_copy_insn_p (rtx);
266 static bool sh_rtx_costs (rtx, int, int, int *);
267 static int sh_address_cost (rtx);
268 static int shmedia_target_regs_stack_space (HARD_REG_SET *);
269 static int shmedia_reserve_space_for_target_registers_p (int, HARD_REG_SET *);
270 static int shmedia_target_regs_stack_adjust (HARD_REG_SET *);
271 static int scavenge_reg (HARD_REG_SET *s);
272 struct save_schedule_s;
273 static struct save_entry_s *sh5_schedule_saves (HARD_REG_SET *,
274 struct save_schedule_s *, int);
276 static rtx sh_struct_value_rtx (tree, int);
277 static bool sh_return_in_memory (tree, tree);
278 static rtx sh_builtin_saveregs (void);
279 static void sh_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode, tree, int *, int);
280 static bool sh_strict_argument_naming (CUMULATIVE_ARGS *);
281 static bool sh_pretend_outgoing_varargs_named (CUMULATIVE_ARGS *);
282 static tree sh_build_builtin_va_list (void);
283 static tree sh_gimplify_va_arg_expr (tree, tree, tree *, tree *);
284 static bool sh_pass_by_reference (CUMULATIVE_ARGS *, enum machine_mode,
285 tree, bool);
286 static bool sh_callee_copies (CUMULATIVE_ARGS *, enum machine_mode,
287 tree, bool);
290 /* Initialize the GCC target structure. */
291 #undef TARGET_ATTRIBUTE_TABLE
292 #define TARGET_ATTRIBUTE_TABLE sh_attribute_table
294 /* The next two are used for debug info when compiling with -gdwarf. */
295 #undef TARGET_ASM_UNALIGNED_HI_OP
296 #define TARGET_ASM_UNALIGNED_HI_OP "\t.uaword\t"
297 #undef TARGET_ASM_UNALIGNED_SI_OP
298 #define TARGET_ASM_UNALIGNED_SI_OP "\t.ualong\t"
300 /* These are NULLed out on non-SH5 in OVERRIDE_OPTIONS. */
301 #undef TARGET_ASM_UNALIGNED_DI_OP
302 #define TARGET_ASM_UNALIGNED_DI_OP "\t.uaquad\t"
303 #undef TARGET_ASM_ALIGNED_DI_OP
304 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
306 #undef TARGET_ASM_FUNCTION_EPILOGUE
307 #define TARGET_ASM_FUNCTION_EPILOGUE sh_output_function_epilogue
309 #undef TARGET_ASM_OUTPUT_MI_THUNK
310 #define TARGET_ASM_OUTPUT_MI_THUNK sh_output_mi_thunk
312 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
313 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_tree_hwi_hwi_tree_true
315 #undef TARGET_ASM_FILE_START
316 #define TARGET_ASM_FILE_START sh_file_start
317 #undef TARGET_ASM_FILE_START_FILE_DIRECTIVE
318 #define TARGET_ASM_FILE_START_FILE_DIRECTIVE true
320 #undef TARGET_INSERT_ATTRIBUTES
321 #define TARGET_INSERT_ATTRIBUTES sh_insert_attributes
323 #undef TARGET_SCHED_ADJUST_COST
324 #define TARGET_SCHED_ADJUST_COST sh_adjust_cost
326 #undef TARGET_SCHED_ISSUE_RATE
327 #define TARGET_SCHED_ISSUE_RATE sh_issue_rate
329 /* The next 5 hooks have been implemented for reenabling sched1. With the
330 help of these macros we are limiting the movement of insns in sched1 to
331 reduce the register pressure. The overall idea is to keep count of SImode
332 and SFmode regs required by already scheduled insns. When these counts
333 cross some threshold values; give priority to insns that free registers.
334 The insn that frees registers is most likely to be the insn with lowest
335 LUID (original insn order); but such an insn might be there in the stalled
336 queue (Q) instead of the ready queue (R). To solve this, we skip cycles
337 upto a max of 8 cycles so that such insns may move from Q -> R.
339 The description of the hooks are as below:
341 TARGET_SCHED_INIT_GLOBAL: Added a new target hook in the generic
342 scheduler; it is called inside the sched_init function just after
343 find_insn_reg_weights function call. It is used to calculate the SImode
344 and SFmode weights of insns of basic blocks; much similar to what
345 find_insn_reg_weights does.
346 TARGET_SCHED_FINISH_GLOBAL: Corresponding cleanup hook.
348 TARGET_SCHED_DFA_NEW_CYCLE: Skip cycles if high register pressure is
349 indicated by TARGET_SCHED_REORDER2; doing this may move insns from
350 (Q)->(R).
352 TARGET_SCHED_REORDER: If the register pressure for SImode or SFmode is
353 high; reorder the ready queue so that the insn with lowest LUID will be
354 issued next.
356 TARGET_SCHED_REORDER2: If the register pressure is high, indicate to
357 TARGET_SCHED_DFA_NEW_CYCLE to skip cycles.
359 TARGET_SCHED_VARIABLE_ISSUE: Cache the value of can_issue_more so that it
360 can be returned from TARGET_SCHED_REORDER2.
362 TARGET_SCHED_INIT: Reset the register pressure counting variables. */
364 #undef TARGET_SCHED_DFA_NEW_CYCLE
365 #define TARGET_SCHED_DFA_NEW_CYCLE sh_dfa_new_cycle
367 #undef TARGET_SCHED_INIT_GLOBAL
368 #define TARGET_SCHED_INIT_GLOBAL sh_md_init_global
370 #undef TARGET_SCHED_FINISH_GLOBAL
371 #define TARGET_SCHED_FINISH_GLOBAL sh_md_finish_global
373 #undef TARGET_SCHED_VARIABLE_ISSUE
374 #define TARGET_SCHED_VARIABLE_ISSUE sh_variable_issue
376 #undef TARGET_SCHED_REORDER
377 #define TARGET_SCHED_REORDER sh_reorder
379 #undef TARGET_SCHED_REORDER2
380 #define TARGET_SCHED_REORDER2 sh_reorder2
382 #undef TARGET_SCHED_INIT
383 #define TARGET_SCHED_INIT sh_md_init
385 #undef TARGET_CANNOT_MODIFY_JUMPS_P
386 #define TARGET_CANNOT_MODIFY_JUMPS_P sh_cannot_modify_jumps_p
387 #undef TARGET_BRANCH_TARGET_REGISTER_CLASS
388 #define TARGET_BRANCH_TARGET_REGISTER_CLASS sh_target_reg_class
389 #undef TARGET_BRANCH_TARGET_REGISTER_CALLEE_SAVED
390 #define TARGET_BRANCH_TARGET_REGISTER_CALLEE_SAVED \
391 sh_optimize_target_register_callee_saved
393 #undef TARGET_MS_BITFIELD_LAYOUT_P
394 #define TARGET_MS_BITFIELD_LAYOUT_P sh_ms_bitfield_layout_p
396 #undef TARGET_INIT_BUILTINS
397 #define TARGET_INIT_BUILTINS sh_init_builtins
398 #undef TARGET_EXPAND_BUILTIN
399 #define TARGET_EXPAND_BUILTIN sh_expand_builtin
401 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
402 #define TARGET_FUNCTION_OK_FOR_SIBCALL sh_function_ok_for_sibcall
404 #undef TARGET_CANNOT_COPY_INSN_P
405 #define TARGET_CANNOT_COPY_INSN_P sh_cannot_copy_insn_p
406 #undef TARGET_RTX_COSTS
407 #define TARGET_RTX_COSTS sh_rtx_costs
408 #undef TARGET_ADDRESS_COST
409 #define TARGET_ADDRESS_COST sh_address_cost
411 #undef TARGET_MACHINE_DEPENDENT_REORG
412 #define TARGET_MACHINE_DEPENDENT_REORG sh_reorg
414 #ifdef HAVE_AS_TLS
415 #undef TARGET_HAVE_TLS
416 #define TARGET_HAVE_TLS true
417 #endif
419 #undef TARGET_PROMOTE_PROTOTYPES
420 #define TARGET_PROMOTE_PROTOTYPES sh_promote_prototypes
421 #undef TARGET_PROMOTE_FUNCTION_ARGS
422 #define TARGET_PROMOTE_FUNCTION_ARGS sh_promote_prototypes
423 #undef TARGET_PROMOTE_FUNCTION_RETURN
424 #define TARGET_PROMOTE_FUNCTION_RETURN sh_promote_prototypes
426 #undef TARGET_STRUCT_VALUE_RTX
427 #define TARGET_STRUCT_VALUE_RTX sh_struct_value_rtx
428 #undef TARGET_RETURN_IN_MEMORY
429 #define TARGET_RETURN_IN_MEMORY sh_return_in_memory
431 #undef TARGET_EXPAND_BUILTIN_SAVEREGS
432 #define TARGET_EXPAND_BUILTIN_SAVEREGS sh_builtin_saveregs
433 #undef TARGET_SETUP_INCOMING_VARARGS
434 #define TARGET_SETUP_INCOMING_VARARGS sh_setup_incoming_varargs
435 #undef TARGET_STRICT_ARGUMENT_NAMING
436 #define TARGET_STRICT_ARGUMENT_NAMING sh_strict_argument_naming
437 #undef TARGET_PRETEND_OUTGOING_VARARGS_NAMED
438 #define TARGET_PRETEND_OUTGOING_VARARGS_NAMED sh_pretend_outgoing_varargs_named
439 #undef TARGET_MUST_PASS_IN_STACK
440 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
441 #undef TARGET_PASS_BY_REFERENCE
442 #define TARGET_PASS_BY_REFERENCE sh_pass_by_reference
443 #undef TARGET_CALLEE_COPIES
444 #define TARGET_CALLEE_COPIES sh_callee_copies
446 #undef TARGET_BUILD_BUILTIN_VA_LIST
447 #define TARGET_BUILD_BUILTIN_VA_LIST sh_build_builtin_va_list
448 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
449 #define TARGET_GIMPLIFY_VA_ARG_EXPR sh_gimplify_va_arg_expr
451 #undef TARGET_VECTOR_MODE_SUPPORTED_P
452 #define TARGET_VECTOR_MODE_SUPPORTED_P sh_vector_mode_supported_p
454 #undef TARGET_PCH_VALID_P
455 #define TARGET_PCH_VALID_P sh_pch_valid_p
457 /* Return regmode weight for insn. */
458 #define INSN_REGMODE_WEIGHT(INSN, MODE) regmode_weight[((MODE) == SImode) ? 0 : 1][INSN_UID (INSN)]
460 /* Return current register pressure for regmode. */
461 #define CURR_REGMODE_PRESSURE(MODE) curr_regmode_pressure[((MODE) == SImode) ? 0 : 1]
463 #ifdef SYMBIAN
465 #undef TARGET_ENCODE_SECTION_INFO
466 #define TARGET_ENCODE_SECTION_INFO sh_symbian_encode_section_info
467 #undef TARGET_STRIP_NAME_ENCODING
468 #define TARGET_STRIP_NAME_ENCODING sh_symbian_strip_name_encoding
469 #undef TARGET_CXX_IMPORT_EXPORT_CLASS
470 #define TARGET_CXX_IMPORT_EXPORT_CLASS symbian_import_export_class
472 #endif /* SYMBIAN */
474 struct gcc_target targetm = TARGET_INITIALIZER;
476 /* Print the operand address in x to the stream. */
478 void
479 print_operand_address (FILE *stream, rtx x)
481 switch (GET_CODE (x))
483 case REG:
484 case SUBREG:
485 fprintf (stream, "@%s", reg_names[true_regnum (x)]);
486 break;
488 case PLUS:
490 rtx base = XEXP (x, 0);
491 rtx index = XEXP (x, 1);
493 switch (GET_CODE (index))
495 case CONST_INT:
496 fprintf (stream, "@(%d,%s)", (int) INTVAL (index),
497 reg_names[true_regnum (base)]);
498 break;
500 case REG:
501 case SUBREG:
503 int base_num = true_regnum (base);
504 int index_num = true_regnum (index);
506 fprintf (stream, "@(r0,%s)",
507 reg_names[MAX (base_num, index_num)]);
508 break;
511 default:
512 debug_rtx (x);
513 abort ();
516 break;
518 case PRE_DEC:
519 fprintf (stream, "@-%s", reg_names[true_regnum (XEXP (x, 0))]);
520 break;
522 case POST_INC:
523 fprintf (stream, "@%s+", reg_names[true_regnum (XEXP (x, 0))]);
524 break;
526 default:
527 x = mark_constant_pool_use (x);
528 output_addr_const (stream, x);
529 break;
533 /* Print operand x (an rtx) in assembler syntax to file stream
534 according to modifier code.
536 '.' print a .s if insn needs delay slot
537 ',' print LOCAL_LABEL_PREFIX
538 '@' print trap, rte or rts depending upon pragma interruptness
539 '#' output a nop if there is nothing to put in the delay slot
540 ''' print likelihood suffix (/u for unlikely).
541 'O' print a constant without the #
542 'R' print the LSW of a dp value - changes if in little endian
543 'S' print the MSW of a dp value - changes if in little endian
544 'T' print the next word of a dp value - same as 'R' in big endian mode.
545 'M' print an `x' if `m' will print `base,index'.
546 'N' print 'r63' if the operand is (const_int 0).
547 'd' print a V2SF reg as dN instead of fpN.
548 'm' print a pair `base,offset' or `base,index', for LD and ST.
549 'u' prints the lowest 16 bits of CONST_INT, as an unsigned value.
550 'o' output an operator. */
552 void
553 print_operand (FILE *stream, rtx x, int code)
555 switch (code)
557 case '.':
558 if (final_sequence
559 && ! INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))
560 && get_attr_length (XVECEXP (final_sequence, 0, 1)))
561 fprintf (stream, ASSEMBLER_DIALECT ? "/s" : ".s");
562 break;
563 case ',':
564 fprintf (stream, "%s", LOCAL_LABEL_PREFIX);
565 break;
566 case '@':
567 if (trap_exit)
568 fprintf (stream, "trapa #%d", trap_exit);
569 else if (sh_cfun_interrupt_handler_p ())
570 fprintf (stream, "rte");
571 else
572 fprintf (stream, "rts");
573 break;
574 case '#':
575 /* Output a nop if there's nothing in the delay slot. */
576 if (dbr_sequence_length () == 0)
577 fprintf (stream, "\n\tnop");
578 break;
579 case '\'':
581 rtx note = find_reg_note (current_output_insn, REG_BR_PROB, 0);
583 if (note && INTVAL (XEXP (note, 0)) * 2 < REG_BR_PROB_BASE)
584 fputs ("/u", stream);
585 break;
587 case 'O':
588 x = mark_constant_pool_use (x);
589 output_addr_const (stream, x);
590 break;
591 case 'R':
592 fputs (reg_names[REGNO (x) + LSW], (stream));
593 break;
594 case 'S':
595 fputs (reg_names[REGNO (x) + MSW], (stream));
596 break;
597 case 'T':
598 /* Next word of a double. */
599 switch (GET_CODE (x))
601 case REG:
602 fputs (reg_names[REGNO (x) + 1], (stream));
603 break;
604 case MEM:
605 if (GET_CODE (XEXP (x, 0)) != PRE_DEC
606 && GET_CODE (XEXP (x, 0)) != POST_INC)
607 x = adjust_address (x, SImode, 4);
608 print_operand_address (stream, XEXP (x, 0));
609 break;
610 default:
611 break;
613 break;
614 case 'o':
615 switch (GET_CODE (x))
617 case PLUS: fputs ("add", stream); break;
618 case MINUS: fputs ("sub", stream); break;
619 case MULT: fputs ("mul", stream); break;
620 case DIV: fputs ("div", stream); break;
621 case EQ: fputs ("eq", stream); break;
622 case NE: fputs ("ne", stream); break;
623 case GT: case LT: fputs ("gt", stream); break;
624 case GE: case LE: fputs ("ge", stream); break;
625 case GTU: case LTU: fputs ("gtu", stream); break;
626 case GEU: case LEU: fputs ("geu", stream); break;
627 default:
628 break;
630 break;
631 case 'M':
632 if (GET_CODE (x) == MEM
633 && GET_CODE (XEXP (x, 0)) == PLUS
634 && (GET_CODE (XEXP (XEXP (x, 0), 1)) == REG
635 || GET_CODE (XEXP (XEXP (x, 0), 1)) == SUBREG))
636 fputc ('x', stream);
637 break;
639 case 'm':
640 if (GET_CODE (x) != MEM)
641 abort ();
642 x = XEXP (x, 0);
643 switch (GET_CODE (x))
645 case REG:
646 case SUBREG:
647 print_operand (stream, x, 0);
648 fputs (", 0", stream);
649 break;
651 case PLUS:
652 print_operand (stream, XEXP (x, 0), 0);
653 fputs (", ", stream);
654 print_operand (stream, XEXP (x, 1), 0);
655 break;
657 default:
658 abort ();
660 break;
662 case 'd':
663 if (GET_CODE (x) != REG || GET_MODE (x) != V2SFmode)
664 abort ();
666 fprintf ((stream), "d%s", reg_names[REGNO (x)] + 1);
667 break;
669 case 'N':
670 if (x == CONST0_RTX (GET_MODE (x)))
672 fprintf ((stream), "r63");
673 break;
675 goto default_output;
676 case 'u':
677 if (GET_CODE (x) == CONST_INT)
679 fprintf ((stream), "%u", (unsigned) INTVAL (x) & (0x10000 - 1));
680 break;
682 /* Fall through. */
684 default_output:
685 default:
686 switch (GET_CODE (x))
688 /* FIXME: We need this on SHmedia32 because reload generates
689 some sign-extended HI or QI loads into DImode registers
690 but, because Pmode is SImode, the address ends up with a
691 subreg:SI of the DImode register. Maybe reload should be
692 fixed so as to apply alter_subreg to such loads? */
693 case SUBREG:
694 if (SUBREG_BYTE (x) != 0
695 || GET_CODE (SUBREG_REG (x)) != REG)
696 abort ();
698 x = SUBREG_REG (x);
699 /* Fall through. */
701 case REG:
702 if (FP_REGISTER_P (REGNO (x))
703 && GET_MODE (x) == V16SFmode)
704 fprintf ((stream), "mtrx%s", reg_names[REGNO (x)] + 2);
705 else if (FP_REGISTER_P (REGNO (x))
706 && GET_MODE (x) == V4SFmode)
707 fprintf ((stream), "fv%s", reg_names[REGNO (x)] + 2);
708 else if (GET_CODE (x) == REG
709 && GET_MODE (x) == V2SFmode)
710 fprintf ((stream), "fp%s", reg_names[REGNO (x)] + 2);
711 else if (FP_REGISTER_P (REGNO (x))
712 && GET_MODE_SIZE (GET_MODE (x)) > 4)
713 fprintf ((stream), "d%s", reg_names[REGNO (x)] + 1);
714 else
715 fputs (reg_names[REGNO (x)], (stream));
716 break;
718 case MEM:
719 output_address (XEXP (x, 0));
720 break;
722 case CONST:
723 if (TARGET_SHMEDIA
724 && GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
725 && GET_MODE (XEXP (x, 0)) == DImode
726 && GET_CODE (XEXP (XEXP (x, 0), 0)) == TRUNCATE
727 && GET_MODE (XEXP (XEXP (x, 0), 0)) == HImode)
729 rtx val = XEXP (XEXP (XEXP (x, 0), 0), 0);
731 fputc ('(', stream);
732 if (GET_CODE (val) == ASHIFTRT)
734 fputc ('(', stream);
735 if (GET_CODE (XEXP (val, 0)) == CONST)
736 fputc ('(', stream);
737 output_addr_const (stream, XEXP (val, 0));
738 if (GET_CODE (XEXP (val, 0)) == CONST)
739 fputc (')', stream);
740 fputs (" >> ", stream);
741 output_addr_const (stream, XEXP (val, 1));
742 fputc (')', stream);
744 else
746 if (GET_CODE (val) == CONST)
747 fputc ('(', stream);
748 output_addr_const (stream, val);
749 if (GET_CODE (val) == CONST)
750 fputc (')', stream);
752 fputs (" & 65535)", stream);
753 break;
756 /* Fall through. */
757 default:
758 if (TARGET_SH1)
759 fputc ('#', stream);
760 output_addr_const (stream, x);
761 break;
763 break;
767 /* Like force_operand, but guarantees that VALUE ends up in TARGET. */
768 static void
769 force_into (rtx value, rtx target)
771 value = force_operand (value, target);
772 if (! rtx_equal_p (value, target))
773 emit_insn (gen_move_insn (target, value));
776 /* Emit code to perform a block move. Choose the best method.
778 OPERANDS[0] is the destination.
779 OPERANDS[1] is the source.
780 OPERANDS[2] is the size.
781 OPERANDS[3] is the alignment safe to use. */
784 expand_block_move (rtx *operands)
786 int align = INTVAL (operands[3]);
787 int constp = (GET_CODE (operands[2]) == CONST_INT);
788 int bytes = (constp ? INTVAL (operands[2]) : 0);
790 if (! constp)
791 return 0;
793 /* If we could use mov.l to move words and dest is word-aligned, we
794 can use movua.l for loads and still generate a relatively short
795 and efficient sequence. */
796 if (TARGET_SH4A_ARCH && align < 4
797 && MEM_ALIGN (operands[0]) >= 32
798 && can_move_by_pieces (bytes, 32))
800 rtx dest = copy_rtx (operands[0]);
801 rtx src = copy_rtx (operands[1]);
802 /* We could use different pseudos for each copied word, but
803 since movua can only load into r0, it's kind of
804 pointless. */
805 rtx temp = gen_reg_rtx (SImode);
806 rtx src_addr = copy_addr_to_reg (XEXP (src, 0));
807 int copied = 0;
809 while (copied + 4 <= bytes)
811 rtx to = adjust_address (dest, SImode, copied);
812 rtx from = adjust_automodify_address (src, SImode, src_addr, copied);
814 emit_insn (gen_movua (temp, from));
815 emit_move_insn (src_addr, plus_constant (src_addr, 4));
816 emit_move_insn (to, temp);
817 copied += 4;
820 if (copied < bytes)
821 move_by_pieces (adjust_address (dest, BLKmode, copied),
822 adjust_automodify_address (src, BLKmode,
823 src_addr, copied),
824 bytes - copied, align, 0);
826 return 1;
829 /* If it isn't a constant number of bytes, or if it doesn't have 4 byte
830 alignment, or if it isn't a multiple of 4 bytes, then fail. */
831 if (align < 4 || (bytes % 4 != 0))
832 return 0;
834 if (TARGET_HARD_SH4)
836 if (bytes < 12)
837 return 0;
838 else if (bytes == 12)
840 tree entry_name;
841 rtx sym;
842 rtx func_addr_rtx;
843 rtx r4 = gen_rtx_REG (SImode, 4);
844 rtx r5 = gen_rtx_REG (SImode, 5);
846 entry_name = get_identifier ("__movmemSI12_i4");
848 sym = function_symbol (IDENTIFIER_POINTER (entry_name));
849 func_addr_rtx = copy_to_mode_reg (Pmode, sym);
850 force_into (XEXP (operands[0], 0), r4);
851 force_into (XEXP (operands[1], 0), r5);
852 emit_insn (gen_block_move_real_i4 (func_addr_rtx));
853 return 1;
855 else if (! TARGET_SMALLCODE)
857 tree entry_name;
858 rtx sym;
859 rtx func_addr_rtx;
860 int dwords;
861 rtx r4 = gen_rtx_REG (SImode, 4);
862 rtx r5 = gen_rtx_REG (SImode, 5);
863 rtx r6 = gen_rtx_REG (SImode, 6);
865 entry_name = get_identifier (bytes & 4
866 ? "__movmem_i4_odd"
867 : "__movmem_i4_even");
868 sym = function_symbol (IDENTIFIER_POINTER (entry_name));
869 func_addr_rtx = copy_to_mode_reg (Pmode, sym);
870 force_into (XEXP (operands[0], 0), r4);
871 force_into (XEXP (operands[1], 0), r5);
873 dwords = bytes >> 3;
874 emit_insn (gen_move_insn (r6, GEN_INT (dwords - 1)));
875 emit_insn (gen_block_lump_real_i4 (func_addr_rtx));
876 return 1;
878 else
879 return 0;
881 if (bytes < 64)
883 char entry[30];
884 tree entry_name;
885 rtx sym;
886 rtx func_addr_rtx;
887 rtx r4 = gen_rtx_REG (SImode, 4);
888 rtx r5 = gen_rtx_REG (SImode, 5);
890 sprintf (entry, "__movmemSI%d", bytes);
891 entry_name = get_identifier (entry);
892 sym = function_symbol (IDENTIFIER_POINTER (entry_name));
893 func_addr_rtx = copy_to_mode_reg (Pmode, sym);
894 force_into (XEXP (operands[0], 0), r4);
895 force_into (XEXP (operands[1], 0), r5);
896 emit_insn (gen_block_move_real (func_addr_rtx));
897 return 1;
900 /* This is the same number of bytes as a memcpy call, but to a different
901 less common function name, so this will occasionally use more space. */
902 if (! TARGET_SMALLCODE)
904 tree entry_name;
905 rtx sym;
906 rtx func_addr_rtx;
907 int final_switch, while_loop;
908 rtx r4 = gen_rtx_REG (SImode, 4);
909 rtx r5 = gen_rtx_REG (SImode, 5);
910 rtx r6 = gen_rtx_REG (SImode, 6);
912 entry_name = get_identifier ("__movmem");
913 sym = function_symbol (IDENTIFIER_POINTER (entry_name));
914 func_addr_rtx = copy_to_mode_reg (Pmode, sym);
915 force_into (XEXP (operands[0], 0), r4);
916 force_into (XEXP (operands[1], 0), r5);
918 /* r6 controls the size of the move. 16 is decremented from it
919 for each 64 bytes moved. Then the negative bit left over is used
920 as an index into a list of move instructions. e.g., a 72 byte move
921 would be set up with size(r6) = 14, for one iteration through the
922 big while loop, and a switch of -2 for the last part. */
924 final_switch = 16 - ((bytes / 4) % 16);
925 while_loop = ((bytes / 4) / 16 - 1) * 16;
926 emit_insn (gen_move_insn (r6, GEN_INT (while_loop + final_switch)));
927 emit_insn (gen_block_lump_real (func_addr_rtx));
928 return 1;
931 return 0;
934 /* Prepare operands for a move define_expand; specifically, one of the
935 operands must be in a register. */
938 prepare_move_operands (rtx operands[], enum machine_mode mode)
940 if ((mode == SImode || mode == DImode)
941 && flag_pic
942 && ! ((mode == Pmode || mode == ptr_mode)
943 && tls_symbolic_operand (operands[1], Pmode) != 0))
945 rtx temp;
946 if (SYMBOLIC_CONST_P (operands[1]))
948 if (GET_CODE (operands[0]) == MEM)
949 operands[1] = force_reg (Pmode, operands[1]);
950 else if (TARGET_SHMEDIA
951 && GET_CODE (operands[1]) == LABEL_REF
952 && target_reg_operand (operands[0], mode))
953 /* It's ok. */;
954 else
956 temp = no_new_pseudos ? operands[0] : gen_reg_rtx (Pmode);
957 operands[1] = legitimize_pic_address (operands[1], mode, temp);
960 else if (GET_CODE (operands[1]) == CONST
961 && GET_CODE (XEXP (operands[1], 0)) == PLUS
962 && SYMBOLIC_CONST_P (XEXP (XEXP (operands[1], 0), 0)))
964 temp = no_new_pseudos ? operands[0] : gen_reg_rtx (Pmode);
965 temp = legitimize_pic_address (XEXP (XEXP (operands[1], 0), 0),
966 mode, temp);
967 operands[1] = expand_binop (mode, add_optab, temp,
968 XEXP (XEXP (operands[1], 0), 1),
969 no_new_pseudos ? temp
970 : gen_reg_rtx (Pmode),
971 0, OPTAB_LIB_WIDEN);
975 if (! reload_in_progress && ! reload_completed)
977 /* Copy the source to a register if both operands aren't registers. */
978 if (! register_operand (operands[0], mode)
979 && ! sh_register_operand (operands[1], mode))
980 operands[1] = copy_to_mode_reg (mode, operands[1]);
982 if (GET_CODE (operands[0]) == MEM && ! memory_operand (operands[0], mode))
984 /* This is like change_address_1 (operands[0], mode, 0, 1) ,
985 except that we can't use that function because it is static. */
986 rtx new = change_address (operands[0], mode, 0);
987 MEM_COPY_ATTRIBUTES (new, operands[0]);
988 operands[0] = new;
991 /* This case can happen while generating code to move the result
992 of a library call to the target. Reject `st r0,@(rX,rY)' because
993 reload will fail to find a spill register for rX, since r0 is already
994 being used for the source. */
995 else if (refers_to_regno_p (R0_REG, R0_REG + 1, operands[1], (rtx *)0)
996 && GET_CODE (operands[0]) == MEM
997 && GET_CODE (XEXP (operands[0], 0)) == PLUS
998 && GET_CODE (XEXP (XEXP (operands[0], 0), 1)) == REG)
999 operands[1] = copy_to_mode_reg (mode, operands[1]);
1002 if (mode == Pmode || mode == ptr_mode)
1004 rtx op0, op1;
1005 enum tls_model tls_kind;
1007 op0 = operands[0];
1008 op1 = operands[1];
1009 if ((tls_kind = tls_symbolic_operand (op1, Pmode)))
1011 rtx tga_op1, tga_ret, tmp, tmp2;
1014 switch (tls_kind)
1016 case TLS_MODEL_GLOBAL_DYNAMIC:
1017 tga_ret = gen_rtx_REG (Pmode, R0_REG);
1018 emit_call_insn (gen_tls_global_dynamic (tga_ret, op1));
1019 op1 = tga_ret;
1020 break;
1022 case TLS_MODEL_LOCAL_DYNAMIC:
1023 tga_ret = gen_rtx_REG (Pmode, R0_REG);
1024 emit_call_insn (gen_tls_local_dynamic (tga_ret, op1));
1026 tmp = gen_reg_rtx (Pmode);
1027 emit_move_insn (tmp, tga_ret);
1029 if (register_operand (op0, Pmode))
1030 tmp2 = op0;
1031 else
1032 tmp2 = gen_reg_rtx (Pmode);
1034 emit_insn (gen_symDTPOFF2reg (tmp2, op1, tmp));
1035 op1 = tmp2;
1036 break;
1038 case TLS_MODEL_INITIAL_EXEC:
1039 if (! flag_pic)
1040 emit_insn (gen_GOTaddr2picreg ());
1041 tga_op1 = gen_reg_rtx (Pmode);
1042 tmp = gen_sym2GOTTPOFF (op1);
1043 emit_insn (gen_tls_initial_exec (tga_op1, tmp));
1044 op1 = tga_op1;
1045 break;
1047 case TLS_MODEL_LOCAL_EXEC:
1048 tmp2 = gen_reg_rtx (Pmode);
1049 emit_insn (gen_load_gbr (tmp2));
1050 tmp = gen_reg_rtx (Pmode);
1051 emit_insn (gen_symTPOFF2reg (tmp, op1));
1053 if (register_operand (op0, Pmode))
1054 op1 = op0;
1055 else
1056 op1 = gen_reg_rtx (Pmode);
1058 emit_insn (gen_addsi3 (op1, tmp, tmp2));
1059 break;
1061 default:
1062 abort ();
1064 operands[1] = op1;
1068 return 0;
1071 /* Prepare the operands for an scc instruction; make sure that the
1072 compare has been done. */
1074 prepare_scc_operands (enum rtx_code code)
1076 rtx t_reg = gen_rtx_REG (SImode, T_REG);
1077 enum rtx_code oldcode = code;
1078 enum machine_mode mode;
1080 /* First need a compare insn. */
1081 switch (code)
1083 case NE:
1084 /* It isn't possible to handle this case. */
1085 abort ();
1086 case LT:
1087 code = GT;
1088 break;
1089 case LE:
1090 code = GE;
1091 break;
1092 case LTU:
1093 code = GTU;
1094 break;
1095 case LEU:
1096 code = GEU;
1097 break;
1098 default:
1099 break;
1101 if (code != oldcode)
1103 rtx tmp = sh_compare_op0;
1104 sh_compare_op0 = sh_compare_op1;
1105 sh_compare_op1 = tmp;
1108 mode = GET_MODE (sh_compare_op0);
1109 if (mode == VOIDmode)
1110 mode = GET_MODE (sh_compare_op1);
1112 sh_compare_op0 = force_reg (mode, sh_compare_op0);
1113 if ((code != EQ && code != NE
1114 && (sh_compare_op1 != const0_rtx
1115 || code == GTU || code == GEU || code == LTU || code == LEU))
1116 || (mode == DImode && sh_compare_op1 != const0_rtx)
1117 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
1118 sh_compare_op1 = force_reg (mode, sh_compare_op1);
1120 if ((TARGET_SH4 || TARGET_SH2A) && GET_MODE_CLASS (mode) == MODE_FLOAT)
1121 (mode == SFmode ? emit_sf_insn : emit_df_insn)
1122 (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2,
1123 gen_rtx_SET (VOIDmode, t_reg,
1124 gen_rtx_fmt_ee (code, SImode,
1125 sh_compare_op0, sh_compare_op1)),
1126 gen_rtx_USE (VOIDmode, get_fpscr_rtx ()))));
1127 else
1128 emit_insn (gen_rtx_SET (VOIDmode, t_reg,
1129 gen_rtx_fmt_ee (code, SImode,
1130 sh_compare_op0, sh_compare_op1)));
1132 return t_reg;
1135 /* Called from the md file, set up the operands of a compare instruction. */
1137 void
1138 from_compare (rtx *operands, int code)
1140 enum machine_mode mode = GET_MODE (sh_compare_op0);
1141 rtx insn;
1142 if (mode == VOIDmode)
1143 mode = GET_MODE (sh_compare_op1);
1144 if (code != EQ
1145 || mode == DImode
1146 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
1148 /* Force args into regs, since we can't use constants here. */
1149 sh_compare_op0 = force_reg (mode, sh_compare_op0);
1150 if (sh_compare_op1 != const0_rtx
1151 || code == GTU || code == GEU
1152 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
1153 sh_compare_op1 = force_reg (mode, sh_compare_op1);
1155 if (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT && code == GE)
1157 from_compare (operands, GT);
1158 insn = gen_ieee_ccmpeqsf_t (sh_compare_op0, sh_compare_op1);
1160 else
1161 insn = gen_rtx_SET (VOIDmode,
1162 gen_rtx_REG (SImode, T_REG),
1163 gen_rtx_fmt_ee (code, SImode,
1164 sh_compare_op0, sh_compare_op1));
1165 if ((TARGET_SH4 || TARGET_SH2A) && GET_MODE_CLASS (mode) == MODE_FLOAT)
1167 insn = gen_rtx_PARALLEL (VOIDmode,
1168 gen_rtvec (2, insn,
1169 gen_rtx_USE (VOIDmode, get_fpscr_rtx ())));
1170 (mode == SFmode ? emit_sf_insn : emit_df_insn) (insn);
1172 else
1173 emit_insn (insn);
1176 /* Functions to output assembly code. */
1178 /* Return a sequence of instructions to perform DI or DF move.
1180 Since the SH cannot move a DI or DF in one instruction, we have
1181 to take care when we see overlapping source and dest registers. */
1183 const char *
1184 output_movedouble (rtx insn ATTRIBUTE_UNUSED, rtx operands[],
1185 enum machine_mode mode)
1187 rtx dst = operands[0];
1188 rtx src = operands[1];
1190 if (GET_CODE (dst) == MEM
1191 && GET_CODE (XEXP (dst, 0)) == PRE_DEC)
1192 return "mov.l %T1,%0\n\tmov.l %1,%0";
1194 if (register_operand (dst, mode)
1195 && register_operand (src, mode))
1197 if (REGNO (src) == MACH_REG)
1198 return "sts mach,%S0\n\tsts macl,%R0";
1200 /* When mov.d r1,r2 do r2->r3 then r1->r2;
1201 when mov.d r1,r0 do r1->r0 then r2->r1. */
1203 if (REGNO (src) + 1 == REGNO (dst))
1204 return "mov %T1,%T0\n\tmov %1,%0";
1205 else
1206 return "mov %1,%0\n\tmov %T1,%T0";
1208 else if (GET_CODE (src) == CONST_INT)
1210 if (INTVAL (src) < 0)
1211 output_asm_insn ("mov #-1,%S0", operands);
1212 else
1213 output_asm_insn ("mov #0,%S0", operands);
1215 return "mov %1,%R0";
1217 else if (GET_CODE (src) == MEM)
1219 int ptrreg = -1;
1220 int dreg = REGNO (dst);
1221 rtx inside = XEXP (src, 0);
1223 if (GET_CODE (inside) == REG)
1224 ptrreg = REGNO (inside);
1225 else if (GET_CODE (inside) == SUBREG)
1226 ptrreg = subreg_regno (inside);
1227 else if (GET_CODE (inside) == PLUS)
1229 ptrreg = REGNO (XEXP (inside, 0));
1230 /* ??? A r0+REG address shouldn't be possible here, because it isn't
1231 an offsettable address. Unfortunately, offsettable addresses use
1232 QImode to check the offset, and a QImode offsettable address
1233 requires r0 for the other operand, which is not currently
1234 supported, so we can't use the 'o' constraint.
1235 Thus we must check for and handle r0+REG addresses here.
1236 We punt for now, since this is likely very rare. */
1237 if (GET_CODE (XEXP (inside, 1)) == REG)
1238 abort ();
1240 else if (GET_CODE (inside) == LABEL_REF)
1241 return "mov.l %1,%0\n\tmov.l %1+4,%T0";
1242 else if (GET_CODE (inside) == POST_INC)
1243 return "mov.l %1,%0\n\tmov.l %1,%T0";
1244 else
1245 abort ();
1247 /* Work out the safe way to copy. Copy into the second half first. */
1248 if (dreg == ptrreg)
1249 return "mov.l %T1,%T0\n\tmov.l %1,%0";
1252 return "mov.l %1,%0\n\tmov.l %T1,%T0";
1255 /* Print an instruction which would have gone into a delay slot after
1256 another instruction, but couldn't because the other instruction expanded
1257 into a sequence where putting the slot insn at the end wouldn't work. */
1259 static void
1260 print_slot (rtx insn)
1262 final_scan_insn (XVECEXP (insn, 0, 1), asm_out_file, optimize, 0, 1, NULL);
1264 INSN_DELETED_P (XVECEXP (insn, 0, 1)) = 1;
1267 const char *
1268 output_far_jump (rtx insn, rtx op)
1270 struct { rtx lab, reg, op; } this;
1271 rtx braf_base_lab = NULL_RTX;
1272 const char *jump;
1273 int far;
1274 int offset = branch_dest (insn) - INSN_ADDRESSES (INSN_UID (insn));
1275 rtx prev;
1277 this.lab = gen_label_rtx ();
1279 if (TARGET_SH2
1280 && offset >= -32764
1281 && offset - get_attr_length (insn) <= 32766)
1283 far = 0;
1284 jump = "mov.w %O0,%1; braf %1";
1286 else
1288 far = 1;
1289 if (flag_pic)
1291 if (TARGET_SH2)
1292 jump = "mov.l %O0,%1; braf %1";
1293 else
1294 jump = "mov.l r0,@-r15; mova %O0,r0; mov.l @r0,%1; add r0,%1; mov.l @r15+,r0; jmp @%1";
1296 else
1297 jump = "mov.l %O0,%1; jmp @%1";
1299 /* If we have a scratch register available, use it. */
1300 if (GET_CODE ((prev = prev_nonnote_insn (insn))) == INSN
1301 && INSN_CODE (prev) == CODE_FOR_indirect_jump_scratch)
1303 this.reg = SET_DEST (XVECEXP (PATTERN (prev), 0, 0));
1304 if (REGNO (this.reg) == R0_REG && flag_pic && ! TARGET_SH2)
1305 jump = "mov.l r1,@-r15; mova %O0,r0; mov.l @r0,r1; add r1,r0; mov.l @r15+,r1; jmp @%1";
1306 output_asm_insn (jump, &this.lab);
1307 if (dbr_sequence_length ())
1308 print_slot (final_sequence);
1309 else
1310 output_asm_insn ("nop", 0);
1312 else
1314 /* Output the delay slot insn first if any. */
1315 if (dbr_sequence_length ())
1316 print_slot (final_sequence);
1318 this.reg = gen_rtx_REG (SImode, 13);
1319 /* We must keep the stack aligned to 8-byte boundaries on SH5.
1320 Fortunately, MACL is fixed and call-clobbered, and we never
1321 need its value across jumps, so save r13 in it instead of in
1322 the stack. */
1323 if (TARGET_SH5)
1324 output_asm_insn ("lds r13, macl", 0);
1325 else
1326 output_asm_insn ("mov.l r13,@-r15", 0);
1327 output_asm_insn (jump, &this.lab);
1328 if (TARGET_SH5)
1329 output_asm_insn ("sts macl, r13", 0);
1330 else
1331 output_asm_insn ("mov.l @r15+,r13", 0);
1333 if (far && flag_pic && TARGET_SH2)
1335 braf_base_lab = gen_label_rtx ();
1336 (*targetm.asm_out.internal_label) (asm_out_file, "L",
1337 CODE_LABEL_NUMBER (braf_base_lab));
1339 if (far)
1340 output_asm_insn (".align 2", 0);
1341 (*targetm.asm_out.internal_label) (asm_out_file, "L", CODE_LABEL_NUMBER (this.lab));
1342 this.op = op;
1343 if (far && flag_pic)
1345 if (TARGET_SH2)
1346 this.lab = braf_base_lab;
1347 output_asm_insn (".long %O2-%O0", &this.lab);
1349 else
1350 output_asm_insn (far ? ".long %O2" : ".word %O2-%O0", &this.lab);
1351 return "";
1354 /* Local label counter, used for constants in the pool and inside
1355 pattern branches. */
1357 static int lf = 100;
1359 /* Output code for ordinary branches. */
1361 const char *
1362 output_branch (int logic, rtx insn, rtx *operands)
1364 switch (get_attr_length (insn))
1366 case 6:
1367 /* This can happen if filling the delay slot has caused a forward
1368 branch to exceed its range (we could reverse it, but only
1369 when we know we won't overextend other branches; this should
1370 best be handled by relaxation).
1371 It can also happen when other condbranches hoist delay slot insn
1372 from their destination, thus leading to code size increase.
1373 But the branch will still be in the range -4092..+4098 bytes. */
1375 if (! TARGET_RELAX)
1377 int label = lf++;
1378 /* The call to print_slot will clobber the operands. */
1379 rtx op0 = operands[0];
1381 /* If the instruction in the delay slot is annulled (true), then
1382 there is no delay slot where we can put it now. The only safe
1383 place for it is after the label. final will do that by default. */
1385 if (final_sequence
1386 && ! INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))
1387 && get_attr_length (XVECEXP (final_sequence, 0, 1)))
1389 asm_fprintf (asm_out_file, "\tb%s%ss\t%LLF%d\n", logic ? "f" : "t",
1390 ASSEMBLER_DIALECT ? "/" : ".", label);
1391 print_slot (final_sequence);
1393 else
1394 asm_fprintf (asm_out_file, "\tb%s\t%LLF%d\n", logic ? "f" : "t", label);
1396 output_asm_insn ("bra\t%l0", &op0);
1397 fprintf (asm_out_file, "\tnop\n");
1398 (*targetm.asm_out.internal_label) (asm_out_file, "LF", label);
1400 return "";
1402 /* When relaxing, handle this like a short branch. The linker
1403 will fix it up if it still doesn't fit after relaxation. */
1404 case 2:
1405 return logic ? "bt%.\t%l0" : "bf%.\t%l0";
1407 /* These are for SH2e, in which we have to account for the
1408 extra nop because of the hardware bug in annulled branches. */
1409 case 8:
1410 if (! TARGET_RELAX)
1412 int label = lf++;
1414 if (final_sequence
1415 && INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0)))
1416 abort ();
1417 asm_fprintf (asm_out_file, "b%s%ss\t%LLF%d\n",
1418 logic ? "f" : "t",
1419 ASSEMBLER_DIALECT ? "/" : ".", label);
1420 fprintf (asm_out_file, "\tnop\n");
1421 output_asm_insn ("bra\t%l0", operands);
1422 fprintf (asm_out_file, "\tnop\n");
1423 (*targetm.asm_out.internal_label) (asm_out_file, "LF", label);
1425 return "";
1427 /* When relaxing, fall through. */
1428 case 4:
1430 char buffer[10];
1432 sprintf (buffer, "b%s%ss\t%%l0",
1433 logic ? "t" : "f",
1434 ASSEMBLER_DIALECT ? "/" : ".");
1435 output_asm_insn (buffer, &operands[0]);
1436 return "nop";
1439 default:
1440 /* There should be no longer branches now - that would
1441 indicate that something has destroyed the branches set
1442 up in machine_dependent_reorg. */
1443 abort ();
1447 const char *
1448 output_branchy_insn (enum rtx_code code, const char *template,
1449 rtx insn, rtx *operands)
1451 rtx next_insn = NEXT_INSN (insn);
1453 if (next_insn && GET_CODE (next_insn) == JUMP_INSN && condjump_p (next_insn))
1455 rtx src = SET_SRC (PATTERN (next_insn));
1456 if (GET_CODE (src) == IF_THEN_ELSE && GET_CODE (XEXP (src, 0)) != code)
1458 /* Following branch not taken */
1459 operands[9] = gen_label_rtx ();
1460 emit_label_after (operands[9], next_insn);
1461 INSN_ADDRESSES_NEW (operands[9],
1462 INSN_ADDRESSES (INSN_UID (next_insn))
1463 + get_attr_length (next_insn));
1464 return template;
1466 else
1468 int offset = (branch_dest (next_insn)
1469 - INSN_ADDRESSES (INSN_UID (next_insn)) + 4);
1470 if (offset >= -252 && offset <= 258)
1472 if (GET_CODE (src) == IF_THEN_ELSE)
1473 /* branch_true */
1474 src = XEXP (src, 1);
1475 operands[9] = src;
1476 return template;
1480 operands[9] = gen_label_rtx ();
1481 emit_label_after (operands[9], insn);
1482 INSN_ADDRESSES_NEW (operands[9],
1483 INSN_ADDRESSES (INSN_UID (insn))
1484 + get_attr_length (insn));
1485 return template;
1488 const char *
1489 output_ieee_ccmpeq (rtx insn, rtx *operands)
1491 return output_branchy_insn (NE, "bt\t%l9\\;fcmp/eq\t%1,%0", insn, operands);
1494 /* Output the start of the assembler file. */
1496 static void
1497 sh_file_start (void)
1499 default_file_start ();
1501 #ifdef SYMBIAN
1502 /* Declare the .directive section before it is used. */
1503 fputs ("\t.section .directive, \"SM\", @progbits, 1\n", asm_out_file);
1504 fputs ("\t.asciz \"#<SYMEDIT>#\\n\"\n", asm_out_file);
1505 #endif
1507 if (TARGET_ELF)
1508 /* We need to show the text section with the proper
1509 attributes as in TEXT_SECTION_ASM_OP, before dwarf2out
1510 emits it without attributes in TEXT_SECTION_ASM_OP, else GAS
1511 will complain. We can teach GAS specifically about the
1512 default attributes for our choice of text section, but
1513 then we would have to change GAS again if/when we change
1514 the text section name. */
1515 fprintf (asm_out_file, "%s\n", TEXT_SECTION_ASM_OP);
1516 else
1517 /* Switch to the data section so that the coffsem symbol
1518 isn't in the text section. */
1519 data_section ();
1521 if (TARGET_LITTLE_ENDIAN)
1522 fputs ("\t.little\n", asm_out_file);
1524 if (!TARGET_ELF)
1526 if (TARGET_SHCOMPACT)
1527 fputs ("\t.mode\tSHcompact\n", asm_out_file);
1528 else if (TARGET_SHMEDIA)
1529 fprintf (asm_out_file, "\t.mode\tSHmedia\n\t.abi\t%i\n",
1530 TARGET_SHMEDIA64 ? 64 : 32);
1534 /* Check if PAT includes UNSPEC_CALLER unspec pattern. */
1536 static bool
1537 unspec_caller_rtx_p (rtx pat)
1539 switch (GET_CODE (pat))
1541 case CONST:
1542 return unspec_caller_rtx_p (XEXP (pat, 0));
1543 case PLUS:
1544 case MINUS:
1545 if (unspec_caller_rtx_p (XEXP (pat, 0)))
1546 return true;
1547 return unspec_caller_rtx_p (XEXP (pat, 1));
1548 case UNSPEC:
1549 if (XINT (pat, 1) == UNSPEC_CALLER)
1550 return true;
1551 default:
1552 break;
1555 return false;
1558 /* Indicate that INSN cannot be duplicated. This is true for insn
1559 that generates an unique label. */
1561 static bool
1562 sh_cannot_copy_insn_p (rtx insn)
1564 rtx pat;
1566 if (!reload_completed || !flag_pic)
1567 return false;
1569 if (GET_CODE (insn) != INSN)
1570 return false;
1571 if (asm_noperands (insn) >= 0)
1572 return false;
1574 pat = PATTERN (insn);
1575 if (GET_CODE (pat) != SET)
1576 return false;
1577 pat = SET_SRC (pat);
1579 if (unspec_caller_rtx_p (pat))
1580 return true;
1582 return false;
1585 /* Actual number of instructions used to make a shift by N. */
1586 static const char ashiftrt_insns[] =
1587 { 0,1,2,3,4,5,8,8,8,8,8,8,8,8,8,8,2,3,4,5,8,8,8,8,8,8,8,8,8,8,8,2};
1589 /* Left shift and logical right shift are the same. */
1590 static const char shift_insns[] =
1591 { 0,1,1,2,2,3,3,4,1,2,2,3,3,4,3,3,1,2,2,3,3,4,3,3,2,3,3,4,4,4,3,3};
1593 /* Individual shift amounts needed to get the above length sequences.
1594 One bit right shifts clobber the T bit, so when possible, put one bit
1595 shifts in the middle of the sequence, so the ends are eligible for
1596 branch delay slots. */
1597 static const short shift_amounts[32][5] = {
1598 {0}, {1}, {2}, {2, 1},
1599 {2, 2}, {2, 1, 2}, {2, 2, 2}, {2, 2, 1, 2},
1600 {8}, {8, 1}, {8, 2}, {8, 1, 2},
1601 {8, 2, 2}, {8, 2, 1, 2}, {8, -2, 8}, {8, -1, 8},
1602 {16}, {16, 1}, {16, 2}, {16, 1, 2},
1603 {16, 2, 2}, {16, 2, 1, 2}, {16, -2, 8}, {16, -1, 8},
1604 {16, 8}, {16, 1, 8}, {16, 8, 2}, {16, 8, 1, 2},
1605 {16, 8, 2, 2}, {16, -1, -2, 16}, {16, -2, 16}, {16, -1, 16}};
1607 /* Likewise, but for shift amounts < 16, up to three highmost bits
1608 might be clobbered. This is typically used when combined with some
1609 kind of sign or zero extension. */
1611 static const char ext_shift_insns[] =
1612 { 0,1,1,2,2,3,2,2,1,2,2,3,3,3,2,2,1,2,2,3,3,4,3,3,2,3,3,4,4,4,3,3};
1614 static const short ext_shift_amounts[32][4] = {
1615 {0}, {1}, {2}, {2, 1},
1616 {2, 2}, {2, 1, 2}, {8, -2}, {8, -1},
1617 {8}, {8, 1}, {8, 2}, {8, 1, 2},
1618 {8, 2, 2}, {16, -2, -1}, {16, -2}, {16, -1},
1619 {16}, {16, 1}, {16, 2}, {16, 1, 2},
1620 {16, 2, 2}, {16, 2, 1, 2}, {16, -2, 8}, {16, -1, 8},
1621 {16, 8}, {16, 1, 8}, {16, 8, 2}, {16, 8, 1, 2},
1622 {16, 8, 2, 2}, {16, -1, -2, 16}, {16, -2, 16}, {16, -1, 16}};
1624 /* Assuming we have a value that has been sign-extended by at least one bit,
1625 can we use the ext_shift_amounts with the last shift turned to an arithmetic shift
1626 to shift it by N without data loss, and quicker than by other means? */
1627 #define EXT_SHIFT_SIGNED(n) (((n) | 8) == 15)
1629 /* This is used in length attributes in sh.md to help compute the length
1630 of arbitrary constant shift instructions. */
1633 shift_insns_rtx (rtx insn)
1635 rtx set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
1636 int shift_count = INTVAL (XEXP (set_src, 1));
1637 enum rtx_code shift_code = GET_CODE (set_src);
1639 switch (shift_code)
1641 case ASHIFTRT:
1642 return ashiftrt_insns[shift_count];
1643 case LSHIFTRT:
1644 case ASHIFT:
1645 return shift_insns[shift_count];
1646 default:
1647 abort ();
1651 /* Return the cost of a shift. */
1653 static inline int
1654 shiftcosts (rtx x)
1656 int value;
1658 if (TARGET_SHMEDIA)
1659 return 1;
1661 if (GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD)
1663 if (GET_MODE (x) == DImode
1664 && GET_CODE (XEXP (x, 1)) == CONST_INT
1665 && INTVAL (XEXP (x, 1)) == 1)
1666 return 2;
1668 /* Everything else is invalid, because there is no pattern for it. */
1669 return 10000;
1671 /* If shift by a non constant, then this will be expensive. */
1672 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
1673 return SH_DYNAMIC_SHIFT_COST;
1675 value = INTVAL (XEXP (x, 1));
1677 /* Otherwise, return the true cost in instructions. */
1678 if (GET_CODE (x) == ASHIFTRT)
1680 int cost = ashiftrt_insns[value];
1681 /* If SH3, then we put the constant in a reg and use shad. */
1682 if (cost > 1 + SH_DYNAMIC_SHIFT_COST)
1683 cost = 1 + SH_DYNAMIC_SHIFT_COST;
1684 return cost;
1686 else
1687 return shift_insns[value];
1690 /* Return the cost of an AND operation. */
1692 static inline int
1693 andcosts (rtx x)
1695 int i;
1697 /* Anding with a register is a single cycle and instruction. */
1698 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
1699 return 1;
1701 i = INTVAL (XEXP (x, 1));
1703 if (TARGET_SHMEDIA)
1705 if ((GET_CODE (XEXP (x, 1)) == CONST_INT
1706 && CONST_OK_FOR_I16 (INTVAL (XEXP (x, 1))))
1707 || EXTRA_CONSTRAINT_C16 (XEXP (x, 1)))
1708 return 1;
1709 else
1710 return 2;
1713 /* These constants are single cycle extu.[bw] instructions. */
1714 if (i == 0xff || i == 0xffff)
1715 return 1;
1716 /* Constants that can be used in an and immediate instruction in a single
1717 cycle, but this requires r0, so make it a little more expensive. */
1718 if (CONST_OK_FOR_K08 (i))
1719 return 2;
1720 /* Constants that can be loaded with a mov immediate and an and.
1721 This case is probably unnecessary. */
1722 if (CONST_OK_FOR_I08 (i))
1723 return 2;
1724 /* Any other constants requires a 2 cycle pc-relative load plus an and.
1725 This case is probably unnecessary. */
1726 return 3;
1729 /* Return the cost of an addition or a subtraction. */
1731 static inline int
1732 addsubcosts (rtx x)
1734 /* Adding a register is a single cycle insn. */
1735 if (GET_CODE (XEXP (x, 1)) == REG
1736 || GET_CODE (XEXP (x, 1)) == SUBREG)
1737 return 1;
1739 /* Likewise for small constants. */
1740 if (GET_CODE (XEXP (x, 1)) == CONST_INT
1741 && CONST_OK_FOR_ADD (INTVAL (XEXP (x, 1))))
1742 return 1;
1744 if (TARGET_SHMEDIA)
1745 switch (GET_CODE (XEXP (x, 1)))
1747 case CONST:
1748 case LABEL_REF:
1749 case SYMBOL_REF:
1750 return TARGET_SHMEDIA64 ? 5 : 3;
1752 case CONST_INT:
1753 if (CONST_OK_FOR_I16 (INTVAL (XEXP (x, 1))))
1754 return 2;
1755 else if (CONST_OK_FOR_I16 (INTVAL (XEXP (x, 1)) >> 16))
1756 return 3;
1757 else if (CONST_OK_FOR_I16 ((INTVAL (XEXP (x, 1)) >> 16) >> 16))
1758 return 4;
1760 /* Fall through. */
1761 default:
1762 return 5;
1765 /* Any other constant requires a 2 cycle pc-relative load plus an
1766 addition. */
1767 return 3;
1770 /* Return the cost of a multiply. */
1771 static inline int
1772 multcosts (rtx x ATTRIBUTE_UNUSED)
1774 if (TARGET_SHMEDIA)
1775 return 3;
1777 if (TARGET_SH2)
1779 /* We have a mul insn, so we can never take more than the mul and the
1780 read of the mac reg, but count more because of the latency and extra
1781 reg usage. */
1782 if (TARGET_SMALLCODE)
1783 return 2;
1784 return 3;
1787 /* If we're aiming at small code, then just count the number of
1788 insns in a multiply call sequence. */
1789 if (TARGET_SMALLCODE)
1790 return 5;
1792 /* Otherwise count all the insns in the routine we'd be calling too. */
1793 return 20;
1796 /* Compute a (partial) cost for rtx X. Return true if the complete
1797 cost has been computed, and false if subexpressions should be
1798 scanned. In either case, *TOTAL contains the cost result. */
1800 static bool
1801 sh_rtx_costs (rtx x, int code, int outer_code, int *total)
1803 switch (code)
1805 case CONST_INT:
1806 if (TARGET_SHMEDIA)
1808 if (INTVAL (x) == 0)
1809 *total = 0;
1810 else if (outer_code == AND && and_operand ((x), DImode))
1811 *total = 0;
1812 else if ((outer_code == IOR || outer_code == XOR
1813 || outer_code == PLUS)
1814 && CONST_OK_FOR_I10 (INTVAL (x)))
1815 *total = 0;
1816 else if (CONST_OK_FOR_I16 (INTVAL (x)))
1817 *total = COSTS_N_INSNS (outer_code != SET);
1818 else if (CONST_OK_FOR_I16 (INTVAL (x) >> 16))
1819 *total = COSTS_N_INSNS (2);
1820 else if (CONST_OK_FOR_I16 ((INTVAL (x) >> 16) >> 16))
1821 *total = COSTS_N_INSNS (3);
1822 else
1823 *total = COSTS_N_INSNS (4);
1824 return true;
1826 if (CONST_OK_FOR_I08 (INTVAL (x)))
1827 *total = 0;
1828 else if ((outer_code == AND || outer_code == IOR || outer_code == XOR)
1829 && CONST_OK_FOR_K08 (INTVAL (x)))
1830 *total = 1;
1831 else
1832 *total = 8;
1833 return true;
1835 case CONST:
1836 case LABEL_REF:
1837 case SYMBOL_REF:
1838 if (TARGET_SHMEDIA64)
1839 *total = COSTS_N_INSNS (4);
1840 else if (TARGET_SHMEDIA32)
1841 *total = COSTS_N_INSNS (2);
1842 else
1843 *total = 5;
1844 return true;
1846 case CONST_DOUBLE:
1847 if (TARGET_SHMEDIA)
1848 *total = COSTS_N_INSNS (4);
1849 else
1850 *total = 10;
1851 return true;
1853 case PLUS:
1854 *total = COSTS_N_INSNS (addsubcosts (x));
1855 return true;
1857 case AND:
1858 *total = COSTS_N_INSNS (andcosts (x));
1859 return true;
1861 case MULT:
1862 *total = COSTS_N_INSNS (multcosts (x));
1863 return true;
1865 case ASHIFT:
1866 case ASHIFTRT:
1867 case LSHIFTRT:
1868 *total = COSTS_N_INSNS (shiftcosts (x));
1869 return true;
1871 case DIV:
1872 case UDIV:
1873 case MOD:
1874 case UMOD:
1875 *total = COSTS_N_INSNS (20);
1876 return true;
1878 case FLOAT:
1879 case FIX:
1880 *total = 100;
1881 return true;
1883 default:
1884 return false;
1888 /* Compute the cost of an address. For the SH, all valid addresses are
1889 the same cost. Use a slightly higher cost for reg + reg addressing,
1890 since it increases pressure on r0. */
1892 static int
1893 sh_address_cost (rtx X)
1895 return (GET_CODE (X) == PLUS
1896 && ! CONSTANT_P (XEXP (X, 1))
1897 && ! TARGET_SHMEDIA ? 1 : 0);
1900 /* Code to expand a shift. */
1902 void
1903 gen_ashift (int type, int n, rtx reg)
1905 /* Negative values here come from the shift_amounts array. */
1906 if (n < 0)
1908 if (type == ASHIFT)
1909 type = LSHIFTRT;
1910 else
1911 type = ASHIFT;
1912 n = -n;
1915 switch (type)
1917 case ASHIFTRT:
1918 emit_insn (gen_ashrsi3_k (reg, reg, GEN_INT (n)));
1919 break;
1920 case LSHIFTRT:
1921 if (n == 1)
1922 emit_insn (gen_lshrsi3_m (reg, reg, GEN_INT (n)));
1923 else
1924 emit_insn (gen_lshrsi3_k (reg, reg, GEN_INT (n)));
1925 break;
1926 case ASHIFT:
1927 emit_insn (gen_ashlsi3_std (reg, reg, GEN_INT (n)));
1928 break;
1932 /* Same for HImode */
1934 void
1935 gen_ashift_hi (int type, int n, rtx reg)
1937 /* Negative values here come from the shift_amounts array. */
1938 if (n < 0)
1940 if (type == ASHIFT)
1941 type = LSHIFTRT;
1942 else
1943 type = ASHIFT;
1944 n = -n;
1947 switch (type)
1949 case ASHIFTRT:
1950 case LSHIFTRT:
1951 /* We don't have HImode right shift operations because using the
1952 ordinary 32 bit shift instructions for that doesn't generate proper
1953 zero/sign extension.
1954 gen_ashift_hi is only called in contexts where we know that the
1955 sign extension works out correctly. */
1957 int offset = 0;
1958 if (GET_CODE (reg) == SUBREG)
1960 offset = SUBREG_BYTE (reg);
1961 reg = SUBREG_REG (reg);
1963 gen_ashift (type, n, gen_rtx_SUBREG (SImode, reg, offset));
1964 break;
1966 case ASHIFT:
1967 emit_insn (gen_ashlhi3_k (reg, reg, GEN_INT (n)));
1968 break;
1972 /* Output RTL to split a constant shift into its component SH constant
1973 shift instructions. */
1975 void
1976 gen_shifty_op (int code, rtx *operands)
1978 int value = INTVAL (operands[2]);
1979 int max, i;
1981 /* Truncate the shift count in case it is out of bounds. */
1982 value = value & 0x1f;
1984 if (value == 31)
1986 if (code == LSHIFTRT)
1988 emit_insn (gen_rotlsi3_1 (operands[0], operands[0]));
1989 emit_insn (gen_movt (operands[0]));
1990 return;
1992 else if (code == ASHIFT)
1994 /* There is a two instruction sequence for 31 bit left shifts,
1995 but it requires r0. */
1996 if (GET_CODE (operands[0]) == REG && REGNO (operands[0]) == 0)
1998 emit_insn (gen_andsi3 (operands[0], operands[0], const1_rtx));
1999 emit_insn (gen_rotlsi3_31 (operands[0], operands[0]));
2000 return;
2004 else if (value == 0)
2006 /* This can happen when not optimizing. We must output something here
2007 to prevent the compiler from aborting in final.c after the try_split
2008 call. */
2009 emit_insn (gen_nop ());
2010 return;
2013 max = shift_insns[value];
2014 for (i = 0; i < max; i++)
2015 gen_ashift (code, shift_amounts[value][i], operands[0]);
2018 /* Same as above, but optimized for values where the topmost bits don't
2019 matter. */
2021 void
2022 gen_shifty_hi_op (int code, rtx *operands)
2024 int value = INTVAL (operands[2]);
2025 int max, i;
2026 void (*gen_fun) (int, int, rtx);
2028 /* This operation is used by and_shl for SImode values with a few
2029 high bits known to be cleared. */
2030 value &= 31;
2031 if (value == 0)
2033 emit_insn (gen_nop ());
2034 return;
2037 gen_fun = GET_MODE (operands[0]) == HImode ? gen_ashift_hi : gen_ashift;
2038 if (code == ASHIFT)
2040 max = ext_shift_insns[value];
2041 for (i = 0; i < max; i++)
2042 gen_fun (code, ext_shift_amounts[value][i], operands[0]);
2044 else
2045 /* When shifting right, emit the shifts in reverse order, so that
2046 solitary negative values come first. */
2047 for (i = ext_shift_insns[value] - 1; i >= 0; i--)
2048 gen_fun (code, ext_shift_amounts[value][i], operands[0]);
2051 /* Output RTL for an arithmetic right shift. */
2053 /* ??? Rewrite to use super-optimizer sequences. */
2056 expand_ashiftrt (rtx *operands)
2058 rtx sym;
2059 rtx wrk;
2060 char func[18];
2061 tree func_name;
2062 int value;
2064 if (TARGET_SH3)
2066 if (GET_CODE (operands[2]) != CONST_INT)
2068 rtx count = copy_to_mode_reg (SImode, operands[2]);
2069 emit_insn (gen_negsi2 (count, count));
2070 emit_insn (gen_ashrsi3_d (operands[0], operands[1], count));
2071 return 1;
2073 else if (ashiftrt_insns[INTVAL (operands[2]) & 31]
2074 > 1 + SH_DYNAMIC_SHIFT_COST)
2076 rtx count
2077 = force_reg (SImode, GEN_INT (- (INTVAL (operands[2]) & 31)));
2078 emit_insn (gen_ashrsi3_d (operands[0], operands[1], count));
2079 return 1;
2082 if (GET_CODE (operands[2]) != CONST_INT)
2083 return 0;
2085 value = INTVAL (operands[2]) & 31;
2087 if (value == 31)
2089 emit_insn (gen_ashrsi2_31 (operands[0], operands[1]));
2090 return 1;
2092 else if (value >= 16 && value <= 19)
2094 wrk = gen_reg_rtx (SImode);
2095 emit_insn (gen_ashrsi2_16 (wrk, operands[1]));
2096 value -= 16;
2097 while (value--)
2098 gen_ashift (ASHIFTRT, 1, wrk);
2099 emit_move_insn (operands[0], wrk);
2100 return 1;
2102 /* Expand a short sequence inline, longer call a magic routine. */
2103 else if (value <= 5)
2105 wrk = gen_reg_rtx (SImode);
2106 emit_move_insn (wrk, operands[1]);
2107 while (value--)
2108 gen_ashift (ASHIFTRT, 1, wrk);
2109 emit_move_insn (operands[0], wrk);
2110 return 1;
2113 wrk = gen_reg_rtx (Pmode);
2115 /* Load the value into an arg reg and call a helper. */
2116 emit_move_insn (gen_rtx_REG (SImode, 4), operands[1]);
2117 sprintf (func, "__ashiftrt_r4_%d", value);
2118 func_name = get_identifier (func);
2119 sym = function_symbol (IDENTIFIER_POINTER (func_name));
2120 emit_move_insn (wrk, sym);
2121 emit_insn (gen_ashrsi3_n (GEN_INT (value), wrk));
2122 emit_move_insn (operands[0], gen_rtx_REG (SImode, 4));
2123 return 1;
2127 sh_dynamicalize_shift_p (rtx count)
2129 return shift_insns[INTVAL (count)] > 1 + SH_DYNAMIC_SHIFT_COST;
2132 /* Try to find a good way to implement the combiner pattern
2133 [(set (match_operand:SI 0 "register_operand" "r")
2134 (and:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
2135 (match_operand:SI 2 "const_int_operand" "n"))
2136 (match_operand:SI 3 "const_int_operand" "n"))) .
2137 LEFT_RTX is operand 2 in the above pattern, and MASK_RTX is operand 3.
2138 return 0 for simple right / left or left/right shift combination.
2139 return 1 for a combination of shifts with zero_extend.
2140 return 2 for a combination of shifts with an AND that needs r0.
2141 return 3 for a combination of shifts with an AND that needs an extra
2142 scratch register, when the three highmost bits of the AND mask are clear.
2143 return 4 for a combination of shifts with an AND that needs an extra
2144 scratch register, when any of the three highmost bits of the AND mask
2145 is set.
2146 If ATTRP is set, store an initial right shift width in ATTRP[0],
2147 and the instruction length in ATTRP[1] . These values are not valid
2148 when returning 0.
2149 When ATTRP is set and returning 1, ATTRP[2] gets set to the index into
2150 shift_amounts for the last shift value that is to be used before the
2151 sign extend. */
2153 shl_and_kind (rtx left_rtx, rtx mask_rtx, int *attrp)
2155 unsigned HOST_WIDE_INT mask, lsb, mask2, lsb2;
2156 int left = INTVAL (left_rtx), right;
2157 int best = 0;
2158 int cost, best_cost = 10000;
2159 int best_right = 0, best_len = 0;
2160 int i;
2161 int can_ext;
2163 if (left < 0 || left > 31)
2164 return 0;
2165 if (GET_CODE (mask_rtx) == CONST_INT)
2166 mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> left;
2167 else
2168 mask = (unsigned HOST_WIDE_INT) GET_MODE_MASK (SImode) >> left;
2169 /* Can this be expressed as a right shift / left shift pair? */
2170 lsb = ((mask ^ (mask - 1)) >> 1) + 1;
2171 right = exact_log2 (lsb);
2172 mask2 = ~(mask + lsb - 1);
2173 lsb2 = ((mask2 ^ (mask2 - 1)) >> 1) + 1;
2174 /* mask has no zeroes but trailing zeroes <==> ! mask2 */
2175 if (! mask2)
2176 best_cost = shift_insns[right] + shift_insns[right + left];
2177 /* mask has no trailing zeroes <==> ! right */
2178 else if (! right && mask2 == ~(lsb2 - 1))
2180 int late_right = exact_log2 (lsb2);
2181 best_cost = shift_insns[left + late_right] + shift_insns[late_right];
2183 /* Try to use zero extend. */
2184 if (mask2 == ~(lsb2 - 1))
2186 int width, first;
2188 for (width = 8; width <= 16; width += 8)
2190 /* Can we zero-extend right away? */
2191 if (lsb2 == (unsigned HOST_WIDE_INT) 1 << width)
2193 cost
2194 = 1 + ext_shift_insns[right] + ext_shift_insns[left + right];
2195 if (cost < best_cost)
2197 best = 1;
2198 best_cost = cost;
2199 best_right = right;
2200 best_len = cost;
2201 if (attrp)
2202 attrp[2] = -1;
2204 continue;
2206 /* ??? Could try to put zero extend into initial right shift,
2207 or even shift a bit left before the right shift. */
2208 /* Determine value of first part of left shift, to get to the
2209 zero extend cut-off point. */
2210 first = width - exact_log2 (lsb2) + right;
2211 if (first >= 0 && right + left - first >= 0)
2213 cost = ext_shift_insns[right] + ext_shift_insns[first] + 1
2214 + ext_shift_insns[right + left - first];
2215 if (cost < best_cost)
2217 best = 1;
2218 best_cost = cost;
2219 best_right = right;
2220 best_len = cost;
2221 if (attrp)
2222 attrp[2] = first;
2227 /* Try to use r0 AND pattern */
2228 for (i = 0; i <= 2; i++)
2230 if (i > right)
2231 break;
2232 if (! CONST_OK_FOR_K08 (mask >> i))
2233 continue;
2234 cost = (i != 0) + 2 + ext_shift_insns[left + i];
2235 if (cost < best_cost)
2237 best = 2;
2238 best_cost = cost;
2239 best_right = i;
2240 best_len = cost - 1;
2243 /* Try to use a scratch register to hold the AND operand. */
2244 can_ext = ((mask << left) & ((unsigned HOST_WIDE_INT) 3 << 30)) == 0;
2245 for (i = 0; i <= 2; i++)
2247 if (i > right)
2248 break;
2249 cost = (i != 0) + (CONST_OK_FOR_I08 (mask >> i) ? 2 : 3)
2250 + (can_ext ? ext_shift_insns : shift_insns)[left + i];
2251 if (cost < best_cost)
2253 best = 4 - can_ext;
2254 best_cost = cost;
2255 best_right = i;
2256 best_len = cost - 1 - ! CONST_OK_FOR_I08 (mask >> i);
2260 if (attrp)
2262 attrp[0] = best_right;
2263 attrp[1] = best_len;
2265 return best;
2268 /* This is used in length attributes of the unnamed instructions
2269 corresponding to shl_and_kind return values of 1 and 2. */
2271 shl_and_length (rtx insn)
2273 rtx set_src, left_rtx, mask_rtx;
2274 int attributes[3];
2276 set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
2277 left_rtx = XEXP (XEXP (set_src, 0), 1);
2278 mask_rtx = XEXP (set_src, 1);
2279 shl_and_kind (left_rtx, mask_rtx, attributes);
2280 return attributes[1];
2283 /* This is used in length attribute of the and_shl_scratch instruction. */
2286 shl_and_scr_length (rtx insn)
2288 rtx set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
2289 int len = shift_insns[INTVAL (XEXP (set_src, 1))];
2290 rtx op = XEXP (set_src, 0);
2291 len += shift_insns[INTVAL (XEXP (op, 1))] + 1;
2292 op = XEXP (XEXP (op, 0), 0);
2293 return len + shift_insns[INTVAL (XEXP (op, 1))];
2296 /* Generate rtl for instructions for which shl_and_kind advised a particular
2297 method of generating them, i.e. returned zero. */
2300 gen_shl_and (rtx dest, rtx left_rtx, rtx mask_rtx, rtx source)
2302 int attributes[3];
2303 unsigned HOST_WIDE_INT mask;
2304 int kind = shl_and_kind (left_rtx, mask_rtx, attributes);
2305 int right, total_shift;
2306 void (*shift_gen_fun) (int, rtx *) = gen_shifty_hi_op;
2308 right = attributes[0];
2309 total_shift = INTVAL (left_rtx) + right;
2310 mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> total_shift;
2311 switch (kind)
2313 default:
2314 return -1;
2315 case 1:
2317 int first = attributes[2];
2318 rtx operands[3];
2320 if (first < 0)
2322 emit_insn ((mask << right) <= 0xff
2323 ? gen_zero_extendqisi2 (dest,
2324 gen_lowpart (QImode, source))
2325 : gen_zero_extendhisi2 (dest,
2326 gen_lowpart (HImode, source)));
2327 source = dest;
2329 if (source != dest)
2330 emit_insn (gen_movsi (dest, source));
2331 operands[0] = dest;
2332 if (right)
2334 operands[2] = GEN_INT (right);
2335 gen_shifty_hi_op (LSHIFTRT, operands);
2337 if (first > 0)
2339 operands[2] = GEN_INT (first);
2340 gen_shifty_hi_op (ASHIFT, operands);
2341 total_shift -= first;
2342 mask <<= first;
2344 if (first >= 0)
2345 emit_insn (mask <= 0xff
2346 ? gen_zero_extendqisi2 (dest, gen_lowpart (QImode, dest))
2347 : gen_zero_extendhisi2 (dest, gen_lowpart (HImode, dest)));
2348 if (total_shift > 0)
2350 operands[2] = GEN_INT (total_shift);
2351 gen_shifty_hi_op (ASHIFT, operands);
2353 break;
2355 case 4:
2356 shift_gen_fun = gen_shifty_op;
2357 case 3:
2358 /* If the topmost bit that matters is set, set the topmost bits
2359 that don't matter. This way, we might be able to get a shorter
2360 signed constant. */
2361 if (mask & ((HOST_WIDE_INT) 1 << (31 - total_shift)))
2362 mask |= (HOST_WIDE_INT) ~0 << (31 - total_shift);
2363 case 2:
2364 /* Don't expand fine-grained when combining, because that will
2365 make the pattern fail. */
2366 if (currently_expanding_to_rtl
2367 || reload_in_progress || reload_completed)
2369 rtx operands[3];
2371 /* Cases 3 and 4 should be handled by this split
2372 only while combining */
2373 if (kind > 2)
2374 abort ();
2375 if (right)
2377 emit_insn (gen_lshrsi3 (dest, source, GEN_INT (right)));
2378 source = dest;
2380 emit_insn (gen_andsi3 (dest, source, GEN_INT (mask)));
2381 if (total_shift)
2383 operands[0] = dest;
2384 operands[1] = dest;
2385 operands[2] = GEN_INT (total_shift);
2386 shift_gen_fun (ASHIFT, operands);
2388 break;
2390 else
2392 int neg = 0;
2393 if (kind != 4 && total_shift < 16)
2395 neg = -ext_shift_amounts[total_shift][1];
2396 if (neg > 0)
2397 neg -= ext_shift_amounts[total_shift][2];
2398 else
2399 neg = 0;
2401 emit_insn (gen_and_shl_scratch (dest, source,
2402 GEN_INT (right),
2403 GEN_INT (mask),
2404 GEN_INT (total_shift + neg),
2405 GEN_INT (neg)));
2406 emit_insn (gen_movsi (dest, dest));
2407 break;
2410 return 0;
2413 /* Try to find a good way to implement the combiner pattern
2414 [(set (match_operand:SI 0 "register_operand" "=r")
2415 (sign_extract:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
2416 (match_operand:SI 2 "const_int_operand" "n")
2417 (match_operand:SI 3 "const_int_operand" "n")
2418 (const_int 0)))
2419 (clobber (reg:SI T_REG))]
2420 LEFT_RTX is operand 2 in the above pattern, and SIZE_RTX is operand 3.
2421 return 0 for simple left / right shift combination.
2422 return 1 for left shift / 8 bit sign extend / left shift.
2423 return 2 for left shift / 16 bit sign extend / left shift.
2424 return 3 for left shift / 8 bit sign extend / shift / sign extend.
2425 return 4 for left shift / 16 bit sign extend / shift / sign extend.
2426 return 5 for left shift / 16 bit sign extend / right shift
2427 return 6 for < 8 bit sign extend / left shift.
2428 return 7 for < 8 bit sign extend / left shift / single right shift.
2429 If COSTP is nonzero, assign the calculated cost to *COSTP. */
2432 shl_sext_kind (rtx left_rtx, rtx size_rtx, int *costp)
2434 int left, size, insize, ext;
2435 int cost = 0, best_cost;
2436 int kind;
2438 left = INTVAL (left_rtx);
2439 size = INTVAL (size_rtx);
2440 insize = size - left;
2441 if (insize <= 0)
2442 abort ();
2443 /* Default to left / right shift. */
2444 kind = 0;
2445 best_cost = shift_insns[32 - insize] + ashiftrt_insns[32 - size];
2446 if (size <= 16)
2448 /* 16 bit shift / sign extend / 16 bit shift */
2449 cost = shift_insns[16 - insize] + 1 + ashiftrt_insns[16 - size];
2450 /* If ashiftrt_insns[16 - size] is 8, this choice will be overridden
2451 below, by alternative 3 or something even better. */
2452 if (cost < best_cost)
2454 kind = 5;
2455 best_cost = cost;
2458 /* Try a plain sign extend between two shifts. */
2459 for (ext = 16; ext >= insize; ext -= 8)
2461 if (ext <= size)
2463 cost = ext_shift_insns[ext - insize] + 1 + shift_insns[size - ext];
2464 if (cost < best_cost)
2466 kind = ext / (unsigned) 8;
2467 best_cost = cost;
2470 /* Check if we can do a sloppy shift with a final signed shift
2471 restoring the sign. */
2472 if (EXT_SHIFT_SIGNED (size - ext))
2473 cost = ext_shift_insns[ext - insize] + ext_shift_insns[size - ext] + 1;
2474 /* If not, maybe it's still cheaper to do the second shift sloppy,
2475 and do a final sign extend? */
2476 else if (size <= 16)
2477 cost = ext_shift_insns[ext - insize] + 1
2478 + ext_shift_insns[size > ext ? size - ext : ext - size] + 1;
2479 else
2480 continue;
2481 if (cost < best_cost)
2483 kind = ext / (unsigned) 8 + 2;
2484 best_cost = cost;
2487 /* Check if we can sign extend in r0 */
2488 if (insize < 8)
2490 cost = 3 + shift_insns[left];
2491 if (cost < best_cost)
2493 kind = 6;
2494 best_cost = cost;
2496 /* Try the same with a final signed shift. */
2497 if (left < 31)
2499 cost = 3 + ext_shift_insns[left + 1] + 1;
2500 if (cost < best_cost)
2502 kind = 7;
2503 best_cost = cost;
2507 if (TARGET_SH3)
2509 /* Try to use a dynamic shift. */
2510 cost = shift_insns[32 - insize] + 1 + SH_DYNAMIC_SHIFT_COST;
2511 if (cost < best_cost)
2513 kind = 0;
2514 best_cost = cost;
2517 if (costp)
2518 *costp = cost;
2519 return kind;
2522 /* Function to be used in the length attribute of the instructions
2523 implementing this pattern. */
2526 shl_sext_length (rtx insn)
2528 rtx set_src, left_rtx, size_rtx;
2529 int cost;
2531 set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
2532 left_rtx = XEXP (XEXP (set_src, 0), 1);
2533 size_rtx = XEXP (set_src, 1);
2534 shl_sext_kind (left_rtx, size_rtx, &cost);
2535 return cost;
2538 /* Generate rtl for this pattern */
2541 gen_shl_sext (rtx dest, rtx left_rtx, rtx size_rtx, rtx source)
2543 int kind;
2544 int left, size, insize, cost;
2545 rtx operands[3];
2547 kind = shl_sext_kind (left_rtx, size_rtx, &cost);
2548 left = INTVAL (left_rtx);
2549 size = INTVAL (size_rtx);
2550 insize = size - left;
2551 switch (kind)
2553 case 1:
2554 case 2:
2555 case 3:
2556 case 4:
2558 int ext = kind & 1 ? 8 : 16;
2559 int shift2 = size - ext;
2561 /* Don't expand fine-grained when combining, because that will
2562 make the pattern fail. */
2563 if (! currently_expanding_to_rtl
2564 && ! reload_in_progress && ! reload_completed)
2566 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
2567 emit_insn (gen_movsi (dest, source));
2568 break;
2570 if (dest != source)
2571 emit_insn (gen_movsi (dest, source));
2572 operands[0] = dest;
2573 if (ext - insize)
2575 operands[2] = GEN_INT (ext - insize);
2576 gen_shifty_hi_op (ASHIFT, operands);
2578 emit_insn (kind & 1
2579 ? gen_extendqisi2 (dest, gen_lowpart (QImode, dest))
2580 : gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
2581 if (kind <= 2)
2583 if (shift2)
2585 operands[2] = GEN_INT (shift2);
2586 gen_shifty_op (ASHIFT, operands);
2589 else
2591 if (shift2 > 0)
2593 if (EXT_SHIFT_SIGNED (shift2))
2595 operands[2] = GEN_INT (shift2 + 1);
2596 gen_shifty_op (ASHIFT, operands);
2597 operands[2] = const1_rtx;
2598 gen_shifty_op (ASHIFTRT, operands);
2599 break;
2601 operands[2] = GEN_INT (shift2);
2602 gen_shifty_hi_op (ASHIFT, operands);
2604 else if (shift2)
2606 operands[2] = GEN_INT (-shift2);
2607 gen_shifty_hi_op (LSHIFTRT, operands);
2609 emit_insn (size <= 8
2610 ? gen_extendqisi2 (dest, gen_lowpart (QImode, dest))
2611 : gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
2613 break;
2615 case 5:
2617 int i = 16 - size;
2618 if (! currently_expanding_to_rtl
2619 && ! reload_in_progress && ! reload_completed)
2620 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
2621 else
2623 operands[0] = dest;
2624 operands[2] = GEN_INT (16 - insize);
2625 gen_shifty_hi_op (ASHIFT, operands);
2626 emit_insn (gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
2628 /* Don't use gen_ashrsi3 because it generates new pseudos. */
2629 while (--i >= 0)
2630 gen_ashift (ASHIFTRT, 1, dest);
2631 break;
2633 case 6:
2634 case 7:
2635 /* Don't expand fine-grained when combining, because that will
2636 make the pattern fail. */
2637 if (! currently_expanding_to_rtl
2638 && ! reload_in_progress && ! reload_completed)
2640 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
2641 emit_insn (gen_movsi (dest, source));
2642 break;
2644 emit_insn (gen_andsi3 (dest, source, GEN_INT ((1 << insize) - 1)));
2645 emit_insn (gen_xorsi3 (dest, dest, GEN_INT (1 << (insize - 1))));
2646 emit_insn (gen_addsi3 (dest, dest, GEN_INT (-1 << (insize - 1))));
2647 operands[0] = dest;
2648 operands[2] = kind == 7 ? GEN_INT (left + 1) : left_rtx;
2649 gen_shifty_op (ASHIFT, operands);
2650 if (kind == 7)
2651 emit_insn (gen_ashrsi3_k (dest, dest, const1_rtx));
2652 break;
2653 default:
2654 return -1;
2656 return 0;
2659 /* Prefix a symbol_ref name with "datalabel". */
2662 gen_datalabel_ref (rtx sym)
2664 if (GET_CODE (sym) == LABEL_REF)
2665 return gen_rtx_CONST (GET_MODE (sym),
2666 gen_rtx_UNSPEC (GET_MODE (sym),
2667 gen_rtvec (1, sym),
2668 UNSPEC_DATALABEL));
2670 if (GET_CODE (sym) != SYMBOL_REF)
2671 abort ();
2673 return sym;
2677 /* The SH cannot load a large constant into a register, constants have to
2678 come from a pc relative load. The reference of a pc relative load
2679 instruction must be less than 1k infront of the instruction. This
2680 means that we often have to dump a constant inside a function, and
2681 generate code to branch around it.
2683 It is important to minimize this, since the branches will slow things
2684 down and make things bigger.
2686 Worst case code looks like:
2688 mov.l L1,rn
2689 bra L2
2691 align
2692 L1: .long value
2696 mov.l L3,rn
2697 bra L4
2699 align
2700 L3: .long value
2704 We fix this by performing a scan before scheduling, which notices which
2705 instructions need to have their operands fetched from the constant table
2706 and builds the table.
2708 The algorithm is:
2710 scan, find an instruction which needs a pcrel move. Look forward, find the
2711 last barrier which is within MAX_COUNT bytes of the requirement.
2712 If there isn't one, make one. Process all the instructions between
2713 the find and the barrier.
2715 In the above example, we can tell that L3 is within 1k of L1, so
2716 the first move can be shrunk from the 3 insn+constant sequence into
2717 just 1 insn, and the constant moved to L3 to make:
2719 mov.l L1,rn
2721 mov.l L3,rn
2722 bra L4
2724 align
2725 L3:.long value
2726 L4:.long value
2728 Then the second move becomes the target for the shortening process. */
2730 typedef struct
2732 rtx value; /* Value in table. */
2733 rtx label; /* Label of value. */
2734 rtx wend; /* End of window. */
2735 enum machine_mode mode; /* Mode of value. */
2737 /* True if this constant is accessed as part of a post-increment
2738 sequence. Note that HImode constants are never accessed in this way. */
2739 bool part_of_sequence_p;
2740 } pool_node;
2742 /* The maximum number of constants that can fit into one pool, since
2743 the pc relative range is 0...1020 bytes and constants are at least 4
2744 bytes long. */
2746 #define MAX_POOL_SIZE (1020/4)
2747 static pool_node pool_vector[MAX_POOL_SIZE];
2748 static int pool_size;
2749 static rtx pool_window_label;
2750 static int pool_window_last;
2752 /* ??? If we need a constant in HImode which is the truncated value of a
2753 constant we need in SImode, we could combine the two entries thus saving
2754 two bytes. Is this common enough to be worth the effort of implementing
2755 it? */
2757 /* ??? This stuff should be done at the same time that we shorten branches.
2758 As it is now, we must assume that all branches are the maximum size, and
2759 this causes us to almost always output constant pools sooner than
2760 necessary. */
2762 /* Add a constant to the pool and return its label. */
2764 static rtx
2765 add_constant (rtx x, enum machine_mode mode, rtx last_value)
2767 int i;
2768 rtx lab, new, ref, newref;
2770 /* First see if we've already got it. */
2771 for (i = 0; i < pool_size; i++)
2773 if (x->code == pool_vector[i].value->code
2774 && mode == pool_vector[i].mode)
2776 if (x->code == CODE_LABEL)
2778 if (XINT (x, 3) != XINT (pool_vector[i].value, 3))
2779 continue;
2781 if (rtx_equal_p (x, pool_vector[i].value))
2783 lab = new = 0;
2784 if (! last_value
2785 || ! i
2786 || ! rtx_equal_p (last_value, pool_vector[i-1].value))
2788 new = gen_label_rtx ();
2789 LABEL_REFS (new) = pool_vector[i].label;
2790 pool_vector[i].label = lab = new;
2792 if (lab && pool_window_label)
2794 newref = gen_rtx_LABEL_REF (VOIDmode, pool_window_label);
2795 ref = pool_vector[pool_window_last].wend;
2796 LABEL_NEXTREF (newref) = ref;
2797 pool_vector[pool_window_last].wend = newref;
2799 if (new)
2800 pool_window_label = new;
2801 pool_window_last = i;
2802 return lab;
2807 /* Need a new one. */
2808 pool_vector[pool_size].value = x;
2809 if (last_value && rtx_equal_p (last_value, pool_vector[pool_size - 1].value))
2811 lab = 0;
2812 pool_vector[pool_size - 1].part_of_sequence_p = true;
2814 else
2815 lab = gen_label_rtx ();
2816 pool_vector[pool_size].mode = mode;
2817 pool_vector[pool_size].label = lab;
2818 pool_vector[pool_size].wend = NULL_RTX;
2819 pool_vector[pool_size].part_of_sequence_p = (lab == 0);
2820 if (lab && pool_window_label)
2822 newref = gen_rtx_LABEL_REF (VOIDmode, pool_window_label);
2823 ref = pool_vector[pool_window_last].wend;
2824 LABEL_NEXTREF (newref) = ref;
2825 pool_vector[pool_window_last].wend = newref;
2827 if (lab)
2828 pool_window_label = lab;
2829 pool_window_last = pool_size;
2830 pool_size++;
2831 return lab;
2834 /* Output the literal table. START, if nonzero, is the first instruction
2835 this table is needed for, and also indicates that there is at least one
2836 casesi_worker_2 instruction; We have to emit the operand3 labels from
2837 these insns at a 4-byte aligned position. BARRIER is the barrier
2838 after which we are to place the table. */
2840 static void
2841 dump_table (rtx start, rtx barrier)
2843 rtx scan = barrier;
2844 int i;
2845 int need_align = 1;
2846 rtx lab, ref;
2847 int have_df = 0;
2849 /* Do two passes, first time dump out the HI sized constants. */
2851 for (i = 0; i < pool_size; i++)
2853 pool_node *p = &pool_vector[i];
2855 if (p->mode == HImode)
2857 if (need_align)
2859 scan = emit_insn_after (gen_align_2 (), scan);
2860 need_align = 0;
2862 for (lab = p->label; lab; lab = LABEL_REFS (lab))
2863 scan = emit_label_after (lab, scan);
2864 scan = emit_insn_after (gen_consttable_2 (p->value, const0_rtx),
2865 scan);
2866 for (ref = p->wend; ref; ref = LABEL_NEXTREF (ref))
2868 lab = XEXP (ref, 0);
2869 scan = emit_insn_after (gen_consttable_window_end (lab), scan);
2872 else if (p->mode == DFmode)
2873 have_df = 1;
2876 need_align = 1;
2878 if (start)
2880 scan = emit_insn_after (gen_align_4 (), scan);
2881 need_align = 0;
2882 for (; start != barrier; start = NEXT_INSN (start))
2883 if (GET_CODE (start) == INSN
2884 && recog_memoized (start) == CODE_FOR_casesi_worker_2)
2886 rtx src = SET_SRC (XVECEXP (PATTERN (start), 0, 0));
2887 rtx lab = XEXP (XVECEXP (src, 0, 3), 0);
2889 scan = emit_label_after (lab, scan);
2892 if (TARGET_FMOVD && TARGET_ALIGN_DOUBLE && have_df)
2894 rtx align_insn = NULL_RTX;
2896 scan = emit_label_after (gen_label_rtx (), scan);
2897 scan = emit_insn_after (gen_align_log (GEN_INT (3)), scan);
2898 need_align = 0;
2900 for (i = 0; i < pool_size; i++)
2902 pool_node *p = &pool_vector[i];
2904 switch (p->mode)
2906 case HImode:
2907 break;
2908 case SImode:
2909 case SFmode:
2910 if (align_insn && !p->part_of_sequence_p)
2912 for (lab = p->label; lab; lab = LABEL_REFS (lab))
2913 emit_label_before (lab, align_insn);
2914 emit_insn_before (gen_consttable_4 (p->value, const0_rtx),
2915 align_insn);
2916 for (ref = p->wend; ref; ref = LABEL_NEXTREF (ref))
2918 lab = XEXP (ref, 0);
2919 emit_insn_before (gen_consttable_window_end (lab),
2920 align_insn);
2922 delete_insn (align_insn);
2923 align_insn = NULL_RTX;
2924 continue;
2926 else
2928 for (lab = p->label; lab; lab = LABEL_REFS (lab))
2929 scan = emit_label_after (lab, scan);
2930 scan = emit_insn_after (gen_consttable_4 (p->value,
2931 const0_rtx), scan);
2932 need_align = ! need_align;
2934 break;
2935 case DFmode:
2936 if (need_align)
2938 scan = emit_insn_after (gen_align_log (GEN_INT (3)), scan);
2939 align_insn = scan;
2940 need_align = 0;
2942 case DImode:
2943 for (lab = p->label; lab; lab = LABEL_REFS (lab))
2944 scan = emit_label_after (lab, scan);
2945 scan = emit_insn_after (gen_consttable_8 (p->value, const0_rtx),
2946 scan);
2947 break;
2948 default:
2949 abort ();
2950 break;
2953 if (p->mode != HImode)
2955 for (ref = p->wend; ref; ref = LABEL_NEXTREF (ref))
2957 lab = XEXP (ref, 0);
2958 scan = emit_insn_after (gen_consttable_window_end (lab),
2959 scan);
2964 pool_size = 0;
2967 for (i = 0; i < pool_size; i++)
2969 pool_node *p = &pool_vector[i];
2971 switch (p->mode)
2973 case HImode:
2974 break;
2975 case SImode:
2976 case SFmode:
2977 if (need_align)
2979 need_align = 0;
2980 scan = emit_label_after (gen_label_rtx (), scan);
2981 scan = emit_insn_after (gen_align_4 (), scan);
2983 for (lab = p->label; lab; lab = LABEL_REFS (lab))
2984 scan = emit_label_after (lab, scan);
2985 scan = emit_insn_after (gen_consttable_4 (p->value, const0_rtx),
2986 scan);
2987 break;
2988 case DFmode:
2989 case DImode:
2990 if (need_align)
2992 need_align = 0;
2993 scan = emit_label_after (gen_label_rtx (), scan);
2994 scan = emit_insn_after (gen_align_4 (), scan);
2996 for (lab = p->label; lab; lab = LABEL_REFS (lab))
2997 scan = emit_label_after (lab, scan);
2998 scan = emit_insn_after (gen_consttable_8 (p->value, const0_rtx),
2999 scan);
3000 break;
3001 default:
3002 abort ();
3003 break;
3006 if (p->mode != HImode)
3008 for (ref = p->wend; ref; ref = LABEL_NEXTREF (ref))
3010 lab = XEXP (ref, 0);
3011 scan = emit_insn_after (gen_consttable_window_end (lab), scan);
3016 scan = emit_insn_after (gen_consttable_end (), scan);
3017 scan = emit_barrier_after (scan);
3018 pool_size = 0;
3019 pool_window_label = NULL_RTX;
3020 pool_window_last = 0;
3023 /* Return nonzero if constant would be an ok source for a
3024 mov.w instead of a mov.l. */
3026 static int
3027 hi_const (rtx src)
3029 return (GET_CODE (src) == CONST_INT
3030 && INTVAL (src) >= -32768
3031 && INTVAL (src) <= 32767);
3034 /* Nonzero if the insn is a move instruction which needs to be fixed. */
3036 /* ??? For a DImode/DFmode moves, we don't need to fix it if each half of the
3037 CONST_DOUBLE input value is CONST_OK_FOR_I08. For a SFmode move, we don't
3038 need to fix it if the input value is CONST_OK_FOR_I08. */
3040 static int
3041 broken_move (rtx insn)
3043 if (GET_CODE (insn) == INSN)
3045 rtx pat = PATTERN (insn);
3046 if (GET_CODE (pat) == PARALLEL)
3047 pat = XVECEXP (pat, 0, 0);
3048 if (GET_CODE (pat) == SET
3049 /* We can load any 8 bit value if we don't care what the high
3050 order bits end up as. */
3051 && GET_MODE (SET_DEST (pat)) != QImode
3052 && (CONSTANT_P (SET_SRC (pat))
3053 /* Match mova_const. */
3054 || (GET_CODE (SET_SRC (pat)) == UNSPEC
3055 && XINT (SET_SRC (pat), 1) == UNSPEC_MOVA
3056 && GET_CODE (XVECEXP (SET_SRC (pat), 0, 0)) == CONST))
3057 && ! (TARGET_SH2E
3058 && GET_CODE (SET_SRC (pat)) == CONST_DOUBLE
3059 && (fp_zero_operand (SET_SRC (pat))
3060 || fp_one_operand (SET_SRC (pat)))
3061 /* ??? If this is a -m4 or -m4-single compilation, in general
3062 we don't know the current setting of fpscr, so disable fldi.
3063 There is an exception if this was a register-register move
3064 before reload - and hence it was ascertained that we have
3065 single precision setting - and in a post-reload optimization
3066 we changed this to do a constant load. In that case
3067 we don't have an r0 clobber, hence we must use fldi. */
3068 && (! TARGET_SH4 || TARGET_FMOVD
3069 || (GET_CODE (XEXP (XVECEXP (PATTERN (insn), 0, 2), 0))
3070 == SCRATCH))
3071 && GET_CODE (SET_DEST (pat)) == REG
3072 && FP_REGISTER_P (REGNO (SET_DEST (pat))))
3073 && ! (TARGET_SH2A
3074 && GET_MODE (SET_DEST (pat)) == SImode
3075 && GET_CODE (SET_SRC (pat)) == CONST_INT
3076 && CONST_OK_FOR_I20 (INTVAL (SET_SRC (pat))))
3077 && (GET_CODE (SET_SRC (pat)) != CONST_INT
3078 || ! CONST_OK_FOR_I08 (INTVAL (SET_SRC (pat)))))
3079 return 1;
3082 return 0;
3085 static int
3086 mova_p (rtx insn)
3088 return (GET_CODE (insn) == INSN
3089 && GET_CODE (PATTERN (insn)) == SET
3090 && GET_CODE (SET_SRC (PATTERN (insn))) == UNSPEC
3091 && XINT (SET_SRC (PATTERN (insn)), 1) == UNSPEC_MOVA
3092 /* Don't match mova_const. */
3093 && GET_CODE (XVECEXP (SET_SRC (PATTERN (insn)), 0, 0)) == LABEL_REF);
3096 /* Fix up a mova from a switch that went out of range. */
3097 static void
3098 fixup_mova (rtx mova)
3100 if (! flag_pic)
3102 SET_SRC (PATTERN (mova)) = XVECEXP (SET_SRC (PATTERN (mova)), 0, 0);
3103 INSN_CODE (mova) = -1;
3105 else
3107 rtx worker = mova;
3108 rtx lab = gen_label_rtx ();
3109 rtx wpat, wpat0, wpat1, wsrc, diff;
3113 worker = NEXT_INSN (worker);
3114 if (! worker
3115 || GET_CODE (worker) == CODE_LABEL
3116 || GET_CODE (worker) == JUMP_INSN)
3117 abort ();
3118 } while (recog_memoized (worker) != CODE_FOR_casesi_worker_1);
3119 wpat = PATTERN (worker);
3120 wpat0 = XVECEXP (wpat, 0, 0);
3121 wpat1 = XVECEXP (wpat, 0, 1);
3122 wsrc = SET_SRC (wpat0);
3123 PATTERN (worker) = (gen_casesi_worker_2
3124 (SET_DEST (wpat0), XVECEXP (wsrc, 0, 1),
3125 XEXP (XVECEXP (wsrc, 0, 2), 0), lab,
3126 XEXP (wpat1, 0)));
3127 INSN_CODE (worker) = -1;
3128 diff = gen_rtx_MINUS (Pmode, XVECEXP (SET_SRC (PATTERN (mova)), 0, 0),
3129 gen_rtx_LABEL_REF (Pmode, lab));
3130 diff = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, diff), UNSPEC_PIC);
3131 SET_SRC (PATTERN (mova)) = gen_rtx_CONST (Pmode, diff);
3132 INSN_CODE (mova) = -1;
3136 /* Find the last barrier from insn FROM which is close enough to hold the
3137 constant pool. If we can't find one, then create one near the end of
3138 the range. */
3140 static rtx
3141 find_barrier (int num_mova, rtx mova, rtx from)
3143 int count_si = 0;
3144 int count_hi = 0;
3145 int found_hi = 0;
3146 int found_si = 0;
3147 int found_di = 0;
3148 int hi_align = 2;
3149 int si_align = 2;
3150 int leading_mova = num_mova;
3151 rtx barrier_before_mova = 0, found_barrier = 0, good_barrier = 0;
3152 int si_limit;
3153 int hi_limit;
3155 /* For HImode: range is 510, add 4 because pc counts from address of
3156 second instruction after this one, subtract 2 for the jump instruction
3157 that we may need to emit before the table, subtract 2 for the instruction
3158 that fills the jump delay slot (in very rare cases, reorg will take an
3159 instruction from after the constant pool or will leave the delay slot
3160 empty). This gives 510.
3161 For SImode: range is 1020, add 4 because pc counts from address of
3162 second instruction after this one, subtract 2 in case pc is 2 byte
3163 aligned, subtract 2 for the jump instruction that we may need to emit
3164 before the table, subtract 2 for the instruction that fills the jump
3165 delay slot. This gives 1018. */
3167 /* The branch will always be shortened now that the reference address for
3168 forward branches is the successor address, thus we need no longer make
3169 adjustments to the [sh]i_limit for -O0. */
3171 si_limit = 1018;
3172 hi_limit = 510;
3174 while (from && count_si < si_limit && count_hi < hi_limit)
3176 int inc = get_attr_length (from);
3177 int new_align = 1;
3179 if (GET_CODE (from) == CODE_LABEL)
3181 if (optimize)
3182 new_align = 1 << label_to_alignment (from);
3183 else if (GET_CODE (prev_nonnote_insn (from)) == BARRIER)
3184 new_align = 1 << barrier_align (from);
3185 else
3186 new_align = 1;
3187 inc = 0;
3190 if (GET_CODE (from) == BARRIER)
3193 found_barrier = from;
3195 /* If we are at the end of the function, or in front of an alignment
3196 instruction, we need not insert an extra alignment. We prefer
3197 this kind of barrier. */
3198 if (barrier_align (from) > 2)
3199 good_barrier = from;
3202 if (broken_move (from))
3204 rtx pat, src, dst;
3205 enum machine_mode mode;
3207 pat = PATTERN (from);
3208 if (GET_CODE (pat) == PARALLEL)
3209 pat = XVECEXP (pat, 0, 0);
3210 src = SET_SRC (pat);
3211 dst = SET_DEST (pat);
3212 mode = GET_MODE (dst);
3214 /* We must explicitly check the mode, because sometimes the
3215 front end will generate code to load unsigned constants into
3216 HImode targets without properly sign extending them. */
3217 if (mode == HImode
3218 || (mode == SImode && hi_const (src) && REGNO (dst) != FPUL_REG))
3220 found_hi += 2;
3221 /* We put the short constants before the long constants, so
3222 we must count the length of short constants in the range
3223 for the long constants. */
3224 /* ??? This isn't optimal, but is easy to do. */
3225 si_limit -= 2;
3227 else
3229 /* We dump DF/DI constants before SF/SI ones, because
3230 the limit is the same, but the alignment requirements
3231 are higher. We may waste up to 4 additional bytes
3232 for alignment, and the DF/DI constant may have
3233 another SF/SI constant placed before it. */
3234 if (TARGET_SHCOMPACT
3235 && ! found_di
3236 && (mode == DFmode || mode == DImode))
3238 found_di = 1;
3239 si_limit -= 8;
3241 while (si_align > 2 && found_si + si_align - 2 > count_si)
3242 si_align >>= 1;
3243 if (found_si > count_si)
3244 count_si = found_si;
3245 found_si += GET_MODE_SIZE (mode);
3246 if (num_mova)
3247 si_limit -= GET_MODE_SIZE (mode);
3250 /* See the code in machine_dependent_reorg, which has a similar if
3251 statement that generates a new mova insn in many cases. */
3252 if (GET_CODE (dst) == REG && FP_ANY_REGISTER_P (REGNO (dst)))
3253 inc += 2;
3256 if (mova_p (from))
3258 if (! num_mova++)
3260 leading_mova = 0;
3261 mova = from;
3262 barrier_before_mova = good_barrier ? good_barrier : found_barrier;
3264 if (found_si > count_si)
3265 count_si = found_si;
3267 else if (GET_CODE (from) == JUMP_INSN
3268 && (GET_CODE (PATTERN (from)) == ADDR_VEC
3269 || GET_CODE (PATTERN (from)) == ADDR_DIFF_VEC))
3271 if (num_mova)
3272 num_mova--;
3273 if (barrier_align (next_real_insn (from)) == align_jumps_log)
3275 /* We have just passed the barrier in front of the
3276 ADDR_DIFF_VEC, which is stored in found_barrier. Since
3277 the ADDR_DIFF_VEC is accessed as data, just like our pool
3278 constants, this is a good opportunity to accommodate what
3279 we have gathered so far.
3280 If we waited any longer, we could end up at a barrier in
3281 front of code, which gives worse cache usage for separated
3282 instruction / data caches. */
3283 good_barrier = found_barrier;
3284 break;
3286 else
3288 rtx body = PATTERN (from);
3289 inc = XVECLEN (body, 1) * GET_MODE_SIZE (GET_MODE (body));
3292 /* For the SH1, we generate alignments even after jumps-around-jumps. */
3293 else if (GET_CODE (from) == JUMP_INSN
3294 && ! TARGET_SH2
3295 && ! TARGET_SMALLCODE)
3296 new_align = 4;
3298 if (found_si)
3300 count_si += inc;
3301 if (new_align > si_align)
3303 si_limit -= (count_si - 1) & (new_align - si_align);
3304 si_align = new_align;
3306 count_si = (count_si + new_align - 1) & -new_align;
3308 if (found_hi)
3310 count_hi += inc;
3311 if (new_align > hi_align)
3313 hi_limit -= (count_hi - 1) & (new_align - hi_align);
3314 hi_align = new_align;
3316 count_hi = (count_hi + new_align - 1) & -new_align;
3318 from = NEXT_INSN (from);
3321 if (num_mova)
3323 if (leading_mova)
3325 /* Try as we might, the leading mova is out of range. Change
3326 it into a load (which will become a pcload) and retry. */
3327 fixup_mova (mova);
3328 return find_barrier (0, 0, mova);
3330 else
3332 /* Insert the constant pool table before the mova instruction,
3333 to prevent the mova label reference from going out of range. */
3334 from = mova;
3335 good_barrier = found_barrier = barrier_before_mova;
3339 if (found_barrier)
3341 if (good_barrier && next_real_insn (found_barrier))
3342 found_barrier = good_barrier;
3344 else
3346 /* We didn't find a barrier in time to dump our stuff,
3347 so we'll make one. */
3348 rtx label = gen_label_rtx ();
3350 /* If we exceeded the range, then we must back up over the last
3351 instruction we looked at. Otherwise, we just need to undo the
3352 NEXT_INSN at the end of the loop. */
3353 if (count_hi > hi_limit || count_si > si_limit)
3354 from = PREV_INSN (PREV_INSN (from));
3355 else
3356 from = PREV_INSN (from);
3358 /* Walk back to be just before any jump or label.
3359 Putting it before a label reduces the number of times the branch
3360 around the constant pool table will be hit. Putting it before
3361 a jump makes it more likely that the bra delay slot will be
3362 filled. */
3363 while (GET_CODE (from) == JUMP_INSN || GET_CODE (from) == NOTE
3364 || GET_CODE (from) == CODE_LABEL)
3365 from = PREV_INSN (from);
3367 from = emit_jump_insn_after (gen_jump (label), from);
3368 JUMP_LABEL (from) = label;
3369 LABEL_NUSES (label) = 1;
3370 found_barrier = emit_barrier_after (from);
3371 emit_label_after (label, found_barrier);
3374 return found_barrier;
3377 /* If the instruction INSN is implemented by a special function, and we can
3378 positively find the register that is used to call the sfunc, and this
3379 register is not used anywhere else in this instruction - except as the
3380 destination of a set, return this register; else, return 0. */
3382 sfunc_uses_reg (rtx insn)
3384 int i;
3385 rtx pattern, part, reg_part, reg;
3387 if (GET_CODE (insn) != INSN)
3388 return 0;
3389 pattern = PATTERN (insn);
3390 if (GET_CODE (pattern) != PARALLEL || get_attr_type (insn) != TYPE_SFUNC)
3391 return 0;
3393 for (reg_part = 0, i = XVECLEN (pattern, 0) - 1; i >= 1; i--)
3395 part = XVECEXP (pattern, 0, i);
3396 if (GET_CODE (part) == USE && GET_MODE (XEXP (part, 0)) == SImode)
3397 reg_part = part;
3399 if (! reg_part)
3400 return 0;
3401 reg = XEXP (reg_part, 0);
3402 for (i = XVECLEN (pattern, 0) - 1; i >= 0; i--)
3404 part = XVECEXP (pattern, 0, i);
3405 if (part == reg_part || GET_CODE (part) == CLOBBER)
3406 continue;
3407 if (reg_mentioned_p (reg, ((GET_CODE (part) == SET
3408 && GET_CODE (SET_DEST (part)) == REG)
3409 ? SET_SRC (part) : part)))
3410 return 0;
3412 return reg;
3415 /* See if the only way in which INSN uses REG is by calling it, or by
3416 setting it while calling it. Set *SET to a SET rtx if the register
3417 is set by INSN. */
3419 static int
3420 noncall_uses_reg (rtx reg, rtx insn, rtx *set)
3422 rtx pattern, reg2;
3424 *set = NULL_RTX;
3426 reg2 = sfunc_uses_reg (insn);
3427 if (reg2 && REGNO (reg2) == REGNO (reg))
3429 pattern = single_set (insn);
3430 if (pattern
3431 && GET_CODE (SET_DEST (pattern)) == REG
3432 && REGNO (reg) == REGNO (SET_DEST (pattern)))
3433 *set = pattern;
3434 return 0;
3436 if (GET_CODE (insn) != CALL_INSN)
3438 /* We don't use rtx_equal_p because we don't care if the mode is
3439 different. */
3440 pattern = single_set (insn);
3441 if (pattern
3442 && GET_CODE (SET_DEST (pattern)) == REG
3443 && REGNO (reg) == REGNO (SET_DEST (pattern)))
3445 rtx par, part;
3446 int i;
3448 *set = pattern;
3449 par = PATTERN (insn);
3450 if (GET_CODE (par) == PARALLEL)
3451 for (i = XVECLEN (par, 0) - 1; i >= 0; i--)
3453 part = XVECEXP (par, 0, i);
3454 if (GET_CODE (part) != SET && reg_mentioned_p (reg, part))
3455 return 1;
3457 return reg_mentioned_p (reg, SET_SRC (pattern));
3460 return 1;
3463 pattern = PATTERN (insn);
3465 if (GET_CODE (pattern) == PARALLEL)
3467 int i;
3469 for (i = XVECLEN (pattern, 0) - 1; i >= 1; i--)
3470 if (reg_mentioned_p (reg, XVECEXP (pattern, 0, i)))
3471 return 1;
3472 pattern = XVECEXP (pattern, 0, 0);
3475 if (GET_CODE (pattern) == SET)
3477 if (reg_mentioned_p (reg, SET_DEST (pattern)))
3479 /* We don't use rtx_equal_p, because we don't care if the
3480 mode is different. */
3481 if (GET_CODE (SET_DEST (pattern)) != REG
3482 || REGNO (reg) != REGNO (SET_DEST (pattern)))
3483 return 1;
3485 *set = pattern;
3488 pattern = SET_SRC (pattern);
3491 if (GET_CODE (pattern) != CALL
3492 || GET_CODE (XEXP (pattern, 0)) != MEM
3493 || ! rtx_equal_p (reg, XEXP (XEXP (pattern, 0), 0)))
3494 return 1;
3496 return 0;
3499 /* Given a X, a pattern of an insn or a part of it, return a mask of used
3500 general registers. Bits 0..15 mean that the respective registers
3501 are used as inputs in the instruction. Bits 16..31 mean that the
3502 registers 0..15, respectively, are used as outputs, or are clobbered.
3503 IS_DEST should be set to 16 if X is the destination of a SET, else to 0. */
3505 regs_used (rtx x, int is_dest)
3507 enum rtx_code code;
3508 const char *fmt;
3509 int i, used = 0;
3511 if (! x)
3512 return used;
3513 code = GET_CODE (x);
3514 switch (code)
3516 case REG:
3517 if (REGNO (x) < 16)
3518 return (((1 << HARD_REGNO_NREGS (0, GET_MODE (x))) - 1)
3519 << (REGNO (x) + is_dest));
3520 return 0;
3521 case SUBREG:
3523 rtx y = SUBREG_REG (x);
3525 if (GET_CODE (y) != REG)
3526 break;
3527 if (REGNO (y) < 16)
3528 return (((1 << HARD_REGNO_NREGS (0, GET_MODE (x))) - 1)
3529 << (REGNO (y) +
3530 subreg_regno_offset (REGNO (y),
3531 GET_MODE (y),
3532 SUBREG_BYTE (x),
3533 GET_MODE (x)) + is_dest));
3534 return 0;
3536 case SET:
3537 return regs_used (SET_SRC (x), 0) | regs_used (SET_DEST (x), 16);
3538 case RETURN:
3539 /* If there was a return value, it must have been indicated with USE. */
3540 return 0x00ffff00;
3541 case CLOBBER:
3542 is_dest = 1;
3543 break;
3544 case MEM:
3545 is_dest = 0;
3546 break;
3547 case CALL:
3548 used |= 0x00ff00f0;
3549 break;
3550 default:
3551 break;
3554 fmt = GET_RTX_FORMAT (code);
3556 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
3558 if (fmt[i] == 'E')
3560 register int j;
3561 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
3562 used |= regs_used (XVECEXP (x, i, j), is_dest);
3564 else if (fmt[i] == 'e')
3565 used |= regs_used (XEXP (x, i), is_dest);
3567 return used;
3570 /* Create an instruction that prevents redirection of a conditional branch
3571 to the destination of the JUMP with address ADDR.
3572 If the branch needs to be implemented as an indirect jump, try to find
3573 a scratch register for it.
3574 If NEED_BLOCK is 0, don't do anything unless we need a scratch register.
3575 If any preceding insn that doesn't fit into a delay slot is good enough,
3576 pass 1. Pass 2 if a definite blocking insn is needed.
3577 -1 is used internally to avoid deep recursion.
3578 If a blocking instruction is made or recognized, return it. */
3580 static rtx
3581 gen_block_redirect (rtx jump, int addr, int need_block)
3583 int dead = 0;
3584 rtx prev = prev_nonnote_insn (jump);
3585 rtx dest;
3587 /* First, check if we already have an instruction that satisfies our need. */
3588 if (prev && GET_CODE (prev) == INSN && ! INSN_DELETED_P (prev))
3590 if (INSN_CODE (prev) == CODE_FOR_indirect_jump_scratch)
3591 return prev;
3592 if (GET_CODE (PATTERN (prev)) == USE
3593 || GET_CODE (PATTERN (prev)) == CLOBBER
3594 || get_attr_in_delay_slot (prev) == IN_DELAY_SLOT_YES)
3595 prev = jump;
3596 else if ((need_block &= ~1) < 0)
3597 return prev;
3598 else if (recog_memoized (prev) == CODE_FOR_block_branch_redirect)
3599 need_block = 0;
3601 if (GET_CODE (PATTERN (jump)) == RETURN)
3603 if (! need_block)
3604 return prev;
3605 /* Reorg even does nasty things with return insns that cause branches
3606 to go out of range - see find_end_label and callers. */
3607 return emit_insn_before (gen_block_branch_redirect (const0_rtx) , jump);
3609 /* We can't use JUMP_LABEL here because it might be undefined
3610 when not optimizing. */
3611 dest = XEXP (SET_SRC (PATTERN (jump)), 0);
3612 /* If the branch is out of range, try to find a scratch register for it. */
3613 if (optimize
3614 && (INSN_ADDRESSES (INSN_UID (dest)) - addr + (unsigned) 4092
3615 > 4092 + 4098))
3617 rtx scan;
3618 /* Don't look for the stack pointer as a scratch register,
3619 it would cause trouble if an interrupt occurred. */
3620 unsigned try = 0x7fff, used;
3621 int jump_left = flag_expensive_optimizations + 1;
3623 /* It is likely that the most recent eligible instruction is wanted for
3624 the delay slot. Therefore, find out which registers it uses, and
3625 try to avoid using them. */
3627 for (scan = jump; (scan = PREV_INSN (scan)); )
3629 enum rtx_code code;
3631 if (INSN_DELETED_P (scan))
3632 continue;
3633 code = GET_CODE (scan);
3634 if (code == CODE_LABEL || code == JUMP_INSN)
3635 break;
3636 if (code == INSN
3637 && GET_CODE (PATTERN (scan)) != USE
3638 && GET_CODE (PATTERN (scan)) != CLOBBER
3639 && get_attr_in_delay_slot (scan) == IN_DELAY_SLOT_YES)
3641 try &= ~regs_used (PATTERN (scan), 0);
3642 break;
3645 for (used = dead = 0, scan = JUMP_LABEL (jump);
3646 (scan = NEXT_INSN (scan)); )
3648 enum rtx_code code;
3650 if (INSN_DELETED_P (scan))
3651 continue;
3652 code = GET_CODE (scan);
3653 if (INSN_P (scan))
3655 used |= regs_used (PATTERN (scan), 0);
3656 if (code == CALL_INSN)
3657 used |= regs_used (CALL_INSN_FUNCTION_USAGE (scan), 0);
3658 dead |= (used >> 16) & ~used;
3659 if (dead & try)
3661 dead &= try;
3662 break;
3664 if (code == JUMP_INSN)
3666 if (jump_left-- && simplejump_p (scan))
3667 scan = JUMP_LABEL (scan);
3668 else
3669 break;
3673 /* Mask out the stack pointer again, in case it was
3674 the only 'free' register we have found. */
3675 dead &= 0x7fff;
3677 /* If the immediate destination is still in range, check for possible
3678 threading with a jump beyond the delay slot insn.
3679 Don't check if we are called recursively; the jump has been or will be
3680 checked in a different invocation then. */
3682 else if (optimize && need_block >= 0)
3684 rtx next = next_active_insn (next_active_insn (dest));
3685 if (next && GET_CODE (next) == JUMP_INSN
3686 && GET_CODE (PATTERN (next)) == SET
3687 && recog_memoized (next) == CODE_FOR_jump_compact)
3689 dest = JUMP_LABEL (next);
3690 if (dest
3691 && (INSN_ADDRESSES (INSN_UID (dest)) - addr + (unsigned) 4092
3692 > 4092 + 4098))
3693 gen_block_redirect (next, INSN_ADDRESSES (INSN_UID (next)), -1);
3697 if (dead)
3699 rtx reg = gen_rtx_REG (SImode, exact_log2 (dead & -dead));
3701 /* It would be nice if we could convert the jump into an indirect
3702 jump / far branch right now, and thus exposing all constituent
3703 instructions to further optimization. However, reorg uses
3704 simplejump_p to determine if there is an unconditional jump where
3705 it should try to schedule instructions from the target of the
3706 branch; simplejump_p fails for indirect jumps even if they have
3707 a JUMP_LABEL. */
3708 rtx insn = emit_insn_before (gen_indirect_jump_scratch
3709 (reg, GEN_INT (INSN_UID (JUMP_LABEL (jump))))
3710 , jump);
3711 /* ??? We would like this to have the scope of the jump, but that
3712 scope will change when a delay slot insn of an inner scope is added.
3713 Hence, after delay slot scheduling, we'll have to expect
3714 NOTE_INSN_BLOCK_END notes between the indirect_jump_scratch and
3715 the jump. */
3717 INSN_LOCATOR (insn) = INSN_LOCATOR (jump);
3718 INSN_CODE (insn) = CODE_FOR_indirect_jump_scratch;
3719 return insn;
3721 else if (need_block)
3722 /* We can't use JUMP_LABEL here because it might be undefined
3723 when not optimizing. */
3724 return emit_insn_before (gen_block_branch_redirect
3725 (GEN_INT (INSN_UID (XEXP (SET_SRC (PATTERN (jump)), 0))))
3726 , jump);
3727 return prev;
3730 #define CONDJUMP_MIN -252
3731 #define CONDJUMP_MAX 262
3732 struct far_branch
3734 /* A label (to be placed) in front of the jump
3735 that jumps to our ultimate destination. */
3736 rtx near_label;
3737 /* Where we are going to insert it if we cannot move the jump any farther,
3738 or the jump itself if we have picked up an existing jump. */
3739 rtx insert_place;
3740 /* The ultimate destination. */
3741 rtx far_label;
3742 struct far_branch *prev;
3743 /* If the branch has already been created, its address;
3744 else the address of its first prospective user. */
3745 int address;
3748 static void gen_far_branch (struct far_branch *);
3749 enum mdep_reorg_phase_e mdep_reorg_phase;
3750 static void
3751 gen_far_branch (struct far_branch *bp)
3753 rtx insn = bp->insert_place;
3754 rtx jump;
3755 rtx label = gen_label_rtx ();
3757 emit_label_after (label, insn);
3758 if (bp->far_label)
3760 jump = emit_jump_insn_after (gen_jump (bp->far_label), insn);
3761 LABEL_NUSES (bp->far_label)++;
3763 else
3764 jump = emit_jump_insn_after (gen_return (), insn);
3765 /* Emit a barrier so that reorg knows that any following instructions
3766 are not reachable via a fall-through path.
3767 But don't do this when not optimizing, since we wouldn't suppress the
3768 alignment for the barrier then, and could end up with out-of-range
3769 pc-relative loads. */
3770 if (optimize)
3771 emit_barrier_after (jump);
3772 emit_label_after (bp->near_label, insn);
3773 JUMP_LABEL (jump) = bp->far_label;
3774 if (! invert_jump (insn, label, 1))
3775 abort ();
3776 /* If we are branching around a jump (rather than a return), prevent
3777 reorg from using an insn from the jump target as the delay slot insn -
3778 when reorg did this, it pessimized code (we rather hide the delay slot)
3779 and it could cause branches to go out of range. */
3780 if (bp->far_label)
3781 (emit_insn_after
3782 (gen_stuff_delay_slot
3783 (GEN_INT (INSN_UID (XEXP (SET_SRC (PATTERN (jump)), 0))),
3784 GEN_INT (recog_memoized (insn) == CODE_FOR_branch_false)),
3785 insn));
3786 /* Prevent reorg from undoing our splits. */
3787 gen_block_redirect (jump, bp->address += 2, 2);
3790 /* Fix up ADDR_DIFF_VECs. */
3791 void
3792 fixup_addr_diff_vecs (rtx first)
3794 rtx insn;
3796 for (insn = first; insn; insn = NEXT_INSN (insn))
3798 rtx vec_lab, pat, prev, prevpat, x, braf_label;
3800 if (GET_CODE (insn) != JUMP_INSN
3801 || GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
3802 continue;
3803 pat = PATTERN (insn);
3804 vec_lab = XEXP (XEXP (pat, 0), 0);
3806 /* Search the matching casesi_jump_2. */
3807 for (prev = vec_lab; ; prev = PREV_INSN (prev))
3809 if (GET_CODE (prev) != JUMP_INSN)
3810 continue;
3811 prevpat = PATTERN (prev);
3812 if (GET_CODE (prevpat) != PARALLEL || XVECLEN (prevpat, 0) != 2)
3813 continue;
3814 x = XVECEXP (prevpat, 0, 1);
3815 if (GET_CODE (x) != USE)
3816 continue;
3817 x = XEXP (x, 0);
3818 if (GET_CODE (x) == LABEL_REF && XEXP (x, 0) == vec_lab)
3819 break;
3821 /* FIXME: This is a bug in the optimizer, but it seems harmless
3822 to just avoid panicing. */
3823 if (!prev)
3824 continue;
3826 /* Emit the reference label of the braf where it belongs, right after
3827 the casesi_jump_2 (i.e. braf). */
3828 braf_label = XEXP (XEXP (SET_SRC (XVECEXP (prevpat, 0, 0)), 1), 0);
3829 emit_label_after (braf_label, prev);
3831 /* Fix up the ADDR_DIF_VEC to be relative
3832 to the reference address of the braf. */
3833 XEXP (XEXP (pat, 0), 0) = braf_label;
3837 /* BARRIER_OR_LABEL is either a BARRIER or a CODE_LABEL immediately following
3838 a barrier. Return the base 2 logarithm of the desired alignment. */
3840 barrier_align (rtx barrier_or_label)
3842 rtx next = next_real_insn (barrier_or_label), pat, prev;
3843 int slot, credit, jump_to_next = 0;
3845 if (! next)
3846 return 0;
3848 pat = PATTERN (next);
3850 if (GET_CODE (pat) == ADDR_DIFF_VEC)
3851 return 2;
3853 if (GET_CODE (pat) == UNSPEC_VOLATILE && XINT (pat, 1) == UNSPECV_ALIGN)
3854 /* This is a barrier in front of a constant table. */
3855 return 0;
3857 prev = prev_real_insn (barrier_or_label);
3858 if (GET_CODE (PATTERN (prev)) == ADDR_DIFF_VEC)
3860 pat = PATTERN (prev);
3861 /* If this is a very small table, we want to keep the alignment after
3862 the table to the minimum for proper code alignment. */
3863 return ((TARGET_SMALLCODE
3864 || ((unsigned) XVECLEN (pat, 1) * GET_MODE_SIZE (GET_MODE (pat))
3865 <= (unsigned) 1 << (CACHE_LOG - 2)))
3866 ? 1 << TARGET_SHMEDIA : align_jumps_log);
3869 if (TARGET_SMALLCODE)
3870 return 0;
3872 if (! TARGET_SH2 || ! optimize)
3873 return align_jumps_log;
3875 /* When fixing up pcloads, a constant table might be inserted just before
3876 the basic block that ends with the barrier. Thus, we can't trust the
3877 instruction lengths before that. */
3878 if (mdep_reorg_phase > SH_FIXUP_PCLOAD)
3880 /* Check if there is an immediately preceding branch to the insn beyond
3881 the barrier. We must weight the cost of discarding useful information
3882 from the current cache line when executing this branch and there is
3883 an alignment, against that of fetching unneeded insn in front of the
3884 branch target when there is no alignment. */
3886 /* There are two delay_slot cases to consider. One is the simple case
3887 where the preceding branch is to the insn beyond the barrier (simple
3888 delay slot filling), and the other is where the preceding branch has
3889 a delay slot that is a duplicate of the insn after the barrier
3890 (fill_eager_delay_slots) and the branch is to the insn after the insn
3891 after the barrier. */
3893 /* PREV is presumed to be the JUMP_INSN for the barrier under
3894 investigation. Skip to the insn before it. */
3895 prev = prev_real_insn (prev);
3897 for (slot = 2, credit = (1 << (CACHE_LOG - 2)) + 2;
3898 credit >= 0 && prev && GET_CODE (prev) == INSN;
3899 prev = prev_real_insn (prev))
3901 jump_to_next = 0;
3902 if (GET_CODE (PATTERN (prev)) == USE
3903 || GET_CODE (PATTERN (prev)) == CLOBBER)
3904 continue;
3905 if (GET_CODE (PATTERN (prev)) == SEQUENCE)
3907 prev = XVECEXP (PATTERN (prev), 0, 1);
3908 if (INSN_UID (prev) == INSN_UID (next))
3910 /* Delay slot was filled with insn at jump target. */
3911 jump_to_next = 1;
3912 continue;
3916 if (slot &&
3917 get_attr_in_delay_slot (prev) == IN_DELAY_SLOT_YES)
3918 slot = 0;
3919 credit -= get_attr_length (prev);
3921 if (prev
3922 && GET_CODE (prev) == JUMP_INSN
3923 && JUMP_LABEL (prev))
3925 rtx x;
3926 if (jump_to_next
3927 || next_real_insn (JUMP_LABEL (prev)) == next
3928 /* If relax_delay_slots() decides NEXT was redundant
3929 with some previous instruction, it will have
3930 redirected PREV's jump to the following insn. */
3931 || JUMP_LABEL (prev) == next_nonnote_insn (next)
3932 /* There is no upper bound on redundant instructions
3933 that might have been skipped, but we must not put an
3934 alignment where none had been before. */
3935 || (x = (NEXT_INSN (NEXT_INSN (PREV_INSN (prev)))),
3936 (INSN_P (x)
3937 && (INSN_CODE (x) == CODE_FOR_block_branch_redirect
3938 || INSN_CODE (x) == CODE_FOR_indirect_jump_scratch
3939 || INSN_CODE (x) == CODE_FOR_stuff_delay_slot))))
3941 rtx pat = PATTERN (prev);
3942 if (GET_CODE (pat) == PARALLEL)
3943 pat = XVECEXP (pat, 0, 0);
3944 if (credit - slot >= (GET_CODE (SET_SRC (pat)) == PC ? 2 : 0))
3945 return 0;
3950 return align_jumps_log;
3953 /* If we are inside a phony loop, almost any kind of label can turn up as the
3954 first one in the loop. Aligning a braf label causes incorrect switch
3955 destination addresses; we can detect braf labels because they are
3956 followed by a BARRIER.
3957 Applying loop alignment to small constant or switch tables is a waste
3958 of space, so we suppress this too. */
3960 sh_loop_align (rtx label)
3962 rtx next = label;
3965 next = next_nonnote_insn (next);
3966 while (next && GET_CODE (next) == CODE_LABEL);
3968 if (! next
3969 || ! INSN_P (next)
3970 || GET_CODE (PATTERN (next)) == ADDR_DIFF_VEC
3971 || recog_memoized (next) == CODE_FOR_consttable_2)
3972 return 0;
3974 return align_loops_log;
3977 /* Do a final pass over the function, just before delayed branch
3978 scheduling. */
3980 static void
3981 sh_reorg (void)
3983 rtx first, insn, mova = NULL_RTX;
3984 int num_mova;
3985 rtx r0_rtx = gen_rtx_REG (Pmode, 0);
3986 rtx r0_inc_rtx = gen_rtx_POST_INC (Pmode, r0_rtx);
3988 first = get_insns ();
3990 /* We must split call insns before introducing `mova's. If we're
3991 optimizing, they'll have already been split. Otherwise, make
3992 sure we don't split them too late. */
3993 if (! optimize)
3994 split_all_insns_noflow ();
3996 if (TARGET_SHMEDIA)
3997 return;
3999 /* If relaxing, generate pseudo-ops to associate function calls with
4000 the symbols they call. It does no harm to not generate these
4001 pseudo-ops. However, when we can generate them, it enables to
4002 linker to potentially relax the jsr to a bsr, and eliminate the
4003 register load and, possibly, the constant pool entry. */
4005 mdep_reorg_phase = SH_INSERT_USES_LABELS;
4006 if (TARGET_RELAX)
4008 /* Remove all REG_LABEL notes. We want to use them for our own
4009 purposes. This works because none of the remaining passes
4010 need to look at them.
4012 ??? But it may break in the future. We should use a machine
4013 dependent REG_NOTE, or some other approach entirely. */
4014 for (insn = first; insn; insn = NEXT_INSN (insn))
4016 if (INSN_P (insn))
4018 rtx note;
4020 while ((note = find_reg_note (insn, REG_LABEL, NULL_RTX)) != 0)
4021 remove_note (insn, note);
4025 for (insn = first; insn; insn = NEXT_INSN (insn))
4027 rtx pattern, reg, link, set, scan, dies, label;
4028 int rescan = 0, foundinsn = 0;
4030 if (GET_CODE (insn) == CALL_INSN)
4032 pattern = PATTERN (insn);
4034 if (GET_CODE (pattern) == PARALLEL)
4035 pattern = XVECEXP (pattern, 0, 0);
4036 if (GET_CODE (pattern) == SET)
4037 pattern = SET_SRC (pattern);
4039 if (GET_CODE (pattern) != CALL
4040 || GET_CODE (XEXP (pattern, 0)) != MEM)
4041 continue;
4043 reg = XEXP (XEXP (pattern, 0), 0);
4045 else
4047 reg = sfunc_uses_reg (insn);
4048 if (! reg)
4049 continue;
4052 if (GET_CODE (reg) != REG)
4053 continue;
4055 /* This is a function call via REG. If the only uses of REG
4056 between the time that it is set and the time that it dies
4057 are in function calls, then we can associate all the
4058 function calls with the setting of REG. */
4060 for (link = LOG_LINKS (insn); link; link = XEXP (link, 1))
4062 if (REG_NOTE_KIND (link) != 0)
4063 continue;
4064 set = single_set (XEXP (link, 0));
4065 if (set && rtx_equal_p (reg, SET_DEST (set)))
4067 link = XEXP (link, 0);
4068 break;
4072 if (! link)
4074 /* ??? Sometimes global register allocation will have
4075 deleted the insn pointed to by LOG_LINKS. Try
4076 scanning backward to find where the register is set. */
4077 for (scan = PREV_INSN (insn);
4078 scan && GET_CODE (scan) != CODE_LABEL;
4079 scan = PREV_INSN (scan))
4081 if (! INSN_P (scan))
4082 continue;
4084 if (! reg_mentioned_p (reg, scan))
4085 continue;
4087 if (noncall_uses_reg (reg, scan, &set))
4088 break;
4090 if (set)
4092 link = scan;
4093 break;
4098 if (! link)
4099 continue;
4101 /* The register is set at LINK. */
4103 /* We can only optimize the function call if the register is
4104 being set to a symbol. In theory, we could sometimes
4105 optimize calls to a constant location, but the assembler
4106 and linker do not support that at present. */
4107 if (GET_CODE (SET_SRC (set)) != SYMBOL_REF
4108 && GET_CODE (SET_SRC (set)) != LABEL_REF)
4109 continue;
4111 /* Scan forward from LINK to the place where REG dies, and
4112 make sure that the only insns which use REG are
4113 themselves function calls. */
4115 /* ??? This doesn't work for call targets that were allocated
4116 by reload, since there may not be a REG_DEAD note for the
4117 register. */
4119 dies = NULL_RTX;
4120 for (scan = NEXT_INSN (link); scan; scan = NEXT_INSN (scan))
4122 rtx scanset;
4124 /* Don't try to trace forward past a CODE_LABEL if we haven't
4125 seen INSN yet. Ordinarily, we will only find the setting insn
4126 in LOG_LINKS if it is in the same basic block. However,
4127 cross-jumping can insert code labels in between the load and
4128 the call, and can result in situations where a single call
4129 insn may have two targets depending on where we came from. */
4131 if (GET_CODE (scan) == CODE_LABEL && ! foundinsn)
4132 break;
4134 if (! INSN_P (scan))
4135 continue;
4137 /* Don't try to trace forward past a JUMP. To optimize
4138 safely, we would have to check that all the
4139 instructions at the jump destination did not use REG. */
4141 if (GET_CODE (scan) == JUMP_INSN)
4142 break;
4144 if (! reg_mentioned_p (reg, scan))
4145 continue;
4147 if (noncall_uses_reg (reg, scan, &scanset))
4148 break;
4150 if (scan == insn)
4151 foundinsn = 1;
4153 if (scan != insn
4154 && (GET_CODE (scan) == CALL_INSN || sfunc_uses_reg (scan)))
4156 /* There is a function call to this register other
4157 than the one we are checking. If we optimize
4158 this call, we need to rescan again below. */
4159 rescan = 1;
4162 /* ??? We shouldn't have to worry about SCANSET here.
4163 We should just be able to check for a REG_DEAD note
4164 on a function call. However, the REG_DEAD notes are
4165 apparently not dependable around libcalls; c-torture
4166 execute/920501-2 is a test case. If SCANSET is set,
4167 then this insn sets the register, so it must have
4168 died earlier. Unfortunately, this will only handle
4169 the cases in which the register is, in fact, set in a
4170 later insn. */
4172 /* ??? We shouldn't have to use FOUNDINSN here.
4173 However, the LOG_LINKS fields are apparently not
4174 entirely reliable around libcalls;
4175 newlib/libm/math/e_pow.c is a test case. Sometimes
4176 an insn will appear in LOG_LINKS even though it is
4177 not the most recent insn which sets the register. */
4179 if (foundinsn
4180 && (scanset
4181 || find_reg_note (scan, REG_DEAD, reg)))
4183 dies = scan;
4184 break;
4188 if (! dies)
4190 /* Either there was a branch, or some insn used REG
4191 other than as a function call address. */
4192 continue;
4195 /* Create a code label, and put it in a REG_LABEL note on
4196 the insn which sets the register, and on each call insn
4197 which uses the register. In final_prescan_insn we look
4198 for the REG_LABEL notes, and output the appropriate label
4199 or pseudo-op. */
4201 label = gen_label_rtx ();
4202 REG_NOTES (link) = gen_rtx_INSN_LIST (REG_LABEL, label,
4203 REG_NOTES (link));
4204 REG_NOTES (insn) = gen_rtx_INSN_LIST (REG_LABEL, label,
4205 REG_NOTES (insn));
4206 if (rescan)
4208 scan = link;
4211 rtx reg2;
4213 scan = NEXT_INSN (scan);
4214 if (scan != insn
4215 && ((GET_CODE (scan) == CALL_INSN
4216 && reg_mentioned_p (reg, scan))
4217 || ((reg2 = sfunc_uses_reg (scan))
4218 && REGNO (reg2) == REGNO (reg))))
4219 REG_NOTES (scan)
4220 = gen_rtx_INSN_LIST (REG_LABEL, label, REG_NOTES (scan));
4222 while (scan != dies);
4227 if (TARGET_SH2)
4228 fixup_addr_diff_vecs (first);
4230 if (optimize)
4232 mdep_reorg_phase = SH_SHORTEN_BRANCHES0;
4233 shorten_branches (first);
4235 /* Scan the function looking for move instructions which have to be
4236 changed to pc-relative loads and insert the literal tables. */
4238 mdep_reorg_phase = SH_FIXUP_PCLOAD;
4239 for (insn = first, num_mova = 0; insn; insn = NEXT_INSN (insn))
4241 if (mova_p (insn))
4243 /* ??? basic block reordering can move a switch table dispatch
4244 below the switch table. Check if that has happened.
4245 We only have the addresses available when optimizing; but then,
4246 this check shouldn't be needed when not optimizing. */
4247 rtx label_ref = XVECEXP (SET_SRC (PATTERN (insn)), 0, 0);
4248 if (optimize
4249 && (INSN_ADDRESSES (INSN_UID (insn))
4250 > INSN_ADDRESSES (INSN_UID (XEXP (label_ref, 0)))))
4252 /* Change the mova into a load.
4253 broken_move will then return true for it. */
4254 fixup_mova (insn);
4256 else if (! num_mova++)
4257 mova = insn;
4259 else if (GET_CODE (insn) == JUMP_INSN
4260 && GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC
4261 && num_mova)
4263 rtx scan;
4264 int total;
4266 num_mova--;
4268 /* Some code might have been inserted between the mova and
4269 its ADDR_DIFF_VEC. Check if the mova is still in range. */
4270 for (scan = mova, total = 0; scan != insn; scan = NEXT_INSN (scan))
4271 total += get_attr_length (scan);
4273 /* range of mova is 1020, add 4 because pc counts from address of
4274 second instruction after this one, subtract 2 in case pc is 2
4275 byte aligned. Possible alignment needed for the ADDR_DIFF_VEC
4276 cancels out with alignment effects of the mova itself. */
4277 if (total > 1022)
4279 /* Change the mova into a load, and restart scanning
4280 there. broken_move will then return true for mova. */
4281 fixup_mova (mova);
4282 insn = mova;
4285 if (broken_move (insn)
4286 || (GET_CODE (insn) == INSN
4287 && recog_memoized (insn) == CODE_FOR_casesi_worker_2))
4289 rtx scan;
4290 /* Scan ahead looking for a barrier to stick the constant table
4291 behind. */
4292 rtx barrier = find_barrier (num_mova, mova, insn);
4293 rtx last_float_move = NULL_RTX, last_float = 0, *last_float_addr = NULL;
4294 int need_aligned_label = 0;
4296 if (num_mova && ! mova_p (mova))
4298 /* find_barrier had to change the first mova into a
4299 pcload; thus, we have to start with this new pcload. */
4300 insn = mova;
4301 num_mova = 0;
4303 /* Now find all the moves between the points and modify them. */
4304 for (scan = insn; scan != barrier; scan = NEXT_INSN (scan))
4306 if (GET_CODE (scan) == CODE_LABEL)
4307 last_float = 0;
4308 if (GET_CODE (scan) == INSN
4309 && recog_memoized (scan) == CODE_FOR_casesi_worker_2)
4310 need_aligned_label = 1;
4311 if (broken_move (scan))
4313 rtx *patp = &PATTERN (scan), pat = *patp;
4314 rtx src, dst;
4315 rtx lab;
4316 rtx newsrc;
4317 enum machine_mode mode;
4319 if (GET_CODE (pat) == PARALLEL)
4320 patp = &XVECEXP (pat, 0, 0), pat = *patp;
4321 src = SET_SRC (pat);
4322 dst = SET_DEST (pat);
4323 mode = GET_MODE (dst);
4325 if (mode == SImode && hi_const (src)
4326 && REGNO (dst) != FPUL_REG)
4328 int offset = 0;
4330 mode = HImode;
4331 while (GET_CODE (dst) == SUBREG)
4333 offset += subreg_regno_offset (REGNO (SUBREG_REG (dst)),
4334 GET_MODE (SUBREG_REG (dst)),
4335 SUBREG_BYTE (dst),
4336 GET_MODE (dst));
4337 dst = SUBREG_REG (dst);
4339 dst = gen_rtx_REG (HImode, REGNO (dst) + offset);
4341 if (GET_CODE (dst) == REG && FP_ANY_REGISTER_P (REGNO (dst)))
4343 /* This must be an insn that clobbers r0. */
4344 rtx *clobberp = &XVECEXP (PATTERN (scan), 0,
4345 XVECLEN (PATTERN (scan), 0)
4346 - 1);
4347 rtx clobber = *clobberp;
4349 if (GET_CODE (clobber) != CLOBBER
4350 || ! rtx_equal_p (XEXP (clobber, 0), r0_rtx))
4351 abort ();
4353 if (last_float
4354 && reg_set_between_p (r0_rtx, last_float_move, scan))
4355 last_float = 0;
4356 if (last_float
4357 && TARGET_SHCOMPACT
4358 && GET_MODE_SIZE (mode) != 4
4359 && GET_MODE_SIZE (GET_MODE (last_float)) == 4)
4360 last_float = 0;
4361 lab = add_constant (src, mode, last_float);
4362 if (lab)
4363 emit_insn_before (gen_mova (lab), scan);
4364 else
4366 /* There will be a REG_UNUSED note for r0 on
4367 LAST_FLOAT_MOVE; we have to change it to REG_INC,
4368 lest reorg:mark_target_live_regs will not
4369 consider r0 to be used, and we end up with delay
4370 slot insn in front of SCAN that clobbers r0. */
4371 rtx note
4372 = find_regno_note (last_float_move, REG_UNUSED, 0);
4374 /* If we are not optimizing, then there may not be
4375 a note. */
4376 if (note)
4377 PUT_MODE (note, REG_INC);
4379 *last_float_addr = r0_inc_rtx;
4381 last_float_move = scan;
4382 last_float = src;
4383 newsrc = gen_rtx_MEM (mode,
4384 (((TARGET_SH4 && ! TARGET_FMOVD)
4385 || REGNO (dst) == FPUL_REG)
4386 ? r0_inc_rtx
4387 : r0_rtx));
4388 last_float_addr = &XEXP (newsrc, 0);
4390 /* Remove the clobber of r0. */
4391 *clobberp = gen_rtx_CLOBBER (GET_MODE (clobber),
4392 gen_rtx_SCRATCH (Pmode));
4394 /* This is a mova needing a label. Create it. */
4395 else if (GET_CODE (src) == UNSPEC
4396 && XINT (src, 1) == UNSPEC_MOVA
4397 && GET_CODE (XVECEXP (src, 0, 0)) == CONST)
4399 lab = add_constant (XVECEXP (src, 0, 0), mode, 0);
4400 newsrc = gen_rtx_LABEL_REF (VOIDmode, lab);
4401 newsrc = gen_rtx_UNSPEC (SImode,
4402 gen_rtvec (1, newsrc),
4403 UNSPEC_MOVA);
4405 else
4407 lab = add_constant (src, mode, 0);
4408 newsrc = gen_rtx_LABEL_REF (VOIDmode, lab);
4409 newsrc = gen_const_mem (mode, newsrc);
4411 *patp = gen_rtx_SET (VOIDmode, dst, newsrc);
4412 INSN_CODE (scan) = -1;
4415 dump_table (need_aligned_label ? insn : 0, barrier);
4416 insn = barrier;
4420 mdep_reorg_phase = SH_SHORTEN_BRANCHES1;
4421 INSN_ADDRESSES_FREE ();
4422 split_branches (first);
4424 /* The INSN_REFERENCES_ARE_DELAYED in sh.h is problematic because it
4425 also has an effect on the register that holds the address of the sfunc.
4426 Insert an extra dummy insn in front of each sfunc that pretends to
4427 use this register. */
4428 if (flag_delayed_branch)
4430 for (insn = first; insn; insn = NEXT_INSN (insn))
4432 rtx reg = sfunc_uses_reg (insn);
4434 if (! reg)
4435 continue;
4436 emit_insn_before (gen_use_sfunc_addr (reg), insn);
4439 #if 0
4440 /* fpscr is not actually a user variable, but we pretend it is for the
4441 sake of the previous optimization passes, since we want it handled like
4442 one. However, we don't have any debugging information for it, so turn
4443 it into a non-user variable now. */
4444 if (TARGET_SH4)
4445 REG_USERVAR_P (get_fpscr_rtx ()) = 0;
4446 #endif
4447 mdep_reorg_phase = SH_AFTER_MDEP_REORG;
4451 get_dest_uid (rtx label, int max_uid)
4453 rtx dest = next_real_insn (label);
4454 int dest_uid;
4455 if (! dest)
4456 /* This can happen for an undefined label. */
4457 return 0;
4458 dest_uid = INSN_UID (dest);
4459 /* If this is a newly created branch redirection blocking instruction,
4460 we cannot index the branch_uid or insn_addresses arrays with its
4461 uid. But then, we won't need to, because the actual destination is
4462 the following branch. */
4463 while (dest_uid >= max_uid)
4465 dest = NEXT_INSN (dest);
4466 dest_uid = INSN_UID (dest);
4468 if (GET_CODE (dest) == JUMP_INSN && GET_CODE (PATTERN (dest)) == RETURN)
4469 return 0;
4470 return dest_uid;
4473 /* Split condbranches that are out of range. Also add clobbers for
4474 scratch registers that are needed in far jumps.
4475 We do this before delay slot scheduling, so that it can take our
4476 newly created instructions into account. It also allows us to
4477 find branches with common targets more easily. */
4479 static void
4480 split_branches (rtx first)
4482 rtx insn;
4483 struct far_branch **uid_branch, *far_branch_list = 0;
4484 int max_uid = get_max_uid ();
4486 /* Find out which branches are out of range. */
4487 shorten_branches (first);
4489 uid_branch = (struct far_branch **) alloca (max_uid * sizeof *uid_branch);
4490 memset ((char *) uid_branch, 0, max_uid * sizeof *uid_branch);
4492 for (insn = first; insn; insn = NEXT_INSN (insn))
4493 if (! INSN_P (insn))
4494 continue;
4495 else if (INSN_DELETED_P (insn))
4497 /* Shorten_branches would split this instruction again,
4498 so transform it into a note. */
4499 PUT_CODE (insn, NOTE);
4500 NOTE_LINE_NUMBER (insn) = NOTE_INSN_DELETED;
4501 NOTE_SOURCE_FILE (insn) = 0;
4503 else if (GET_CODE (insn) == JUMP_INSN
4504 /* Don't mess with ADDR_DIFF_VEC */
4505 && (GET_CODE (PATTERN (insn)) == SET
4506 || GET_CODE (PATTERN (insn)) == RETURN))
4508 enum attr_type type = get_attr_type (insn);
4509 if (type == TYPE_CBRANCH)
4511 rtx next, beyond;
4513 if (get_attr_length (insn) > 4)
4515 rtx src = SET_SRC (PATTERN (insn));
4516 rtx olabel = XEXP (XEXP (src, 1), 0);
4517 int addr = INSN_ADDRESSES (INSN_UID (insn));
4518 rtx label = 0;
4519 int dest_uid = get_dest_uid (olabel, max_uid);
4520 struct far_branch *bp = uid_branch[dest_uid];
4522 /* redirect_jump needs a valid JUMP_LABEL, and it might delete
4523 the label if the LABEL_NUSES count drops to zero. There is
4524 always a jump_optimize pass that sets these values, but it
4525 proceeds to delete unreferenced code, and then if not
4526 optimizing, to un-delete the deleted instructions, thus
4527 leaving labels with too low uses counts. */
4528 if (! optimize)
4530 JUMP_LABEL (insn) = olabel;
4531 LABEL_NUSES (olabel)++;
4533 if (! bp)
4535 bp = (struct far_branch *) alloca (sizeof *bp);
4536 uid_branch[dest_uid] = bp;
4537 bp->prev = far_branch_list;
4538 far_branch_list = bp;
4539 bp->far_label
4540 = XEXP (XEXP (SET_SRC (PATTERN (insn)), 1), 0);
4541 LABEL_NUSES (bp->far_label)++;
4543 else
4545 label = bp->near_label;
4546 if (! label && bp->address - addr >= CONDJUMP_MIN)
4548 rtx block = bp->insert_place;
4550 if (GET_CODE (PATTERN (block)) == RETURN)
4551 block = PREV_INSN (block);
4552 else
4553 block = gen_block_redirect (block,
4554 bp->address, 2);
4555 label = emit_label_after (gen_label_rtx (),
4556 PREV_INSN (block));
4557 bp->near_label = label;
4559 else if (label && ! NEXT_INSN (label))
4561 if (addr + 2 - bp->address <= CONDJUMP_MAX)
4562 bp->insert_place = insn;
4563 else
4564 gen_far_branch (bp);
4567 if (! label
4568 || (NEXT_INSN (label) && bp->address - addr < CONDJUMP_MIN))
4570 bp->near_label = label = gen_label_rtx ();
4571 bp->insert_place = insn;
4572 bp->address = addr;
4574 if (! redirect_jump (insn, label, 1))
4575 abort ();
4577 else
4579 /* get_attr_length (insn) == 2 */
4580 /* Check if we have a pattern where reorg wants to redirect
4581 the branch to a label from an unconditional branch that
4582 is too far away. */
4583 /* We can't use JUMP_LABEL here because it might be undefined
4584 when not optimizing. */
4585 /* A syntax error might cause beyond to be NULL_RTX. */
4586 beyond
4587 = next_active_insn (XEXP (XEXP (SET_SRC (PATTERN (insn)), 1),
4588 0));
4590 if (beyond
4591 && (GET_CODE (beyond) == JUMP_INSN
4592 || ((beyond = next_active_insn (beyond))
4593 && GET_CODE (beyond) == JUMP_INSN))
4594 && GET_CODE (PATTERN (beyond)) == SET
4595 && recog_memoized (beyond) == CODE_FOR_jump_compact
4596 && ((INSN_ADDRESSES
4597 (INSN_UID (XEXP (SET_SRC (PATTERN (beyond)), 0)))
4598 - INSN_ADDRESSES (INSN_UID (insn)) + (unsigned) 252)
4599 > 252 + 258 + 2))
4600 gen_block_redirect (beyond,
4601 INSN_ADDRESSES (INSN_UID (beyond)), 1);
4604 next = next_active_insn (insn);
4606 if ((GET_CODE (next) == JUMP_INSN
4607 || ((next = next_active_insn (next))
4608 && GET_CODE (next) == JUMP_INSN))
4609 && GET_CODE (PATTERN (next)) == SET
4610 && recog_memoized (next) == CODE_FOR_jump_compact
4611 && ((INSN_ADDRESSES
4612 (INSN_UID (XEXP (SET_SRC (PATTERN (next)), 0)))
4613 - INSN_ADDRESSES (INSN_UID (insn)) + (unsigned) 252)
4614 > 252 + 258 + 2))
4615 gen_block_redirect (next, INSN_ADDRESSES (INSN_UID (next)), 1);
4617 else if (type == TYPE_JUMP || type == TYPE_RETURN)
4619 int addr = INSN_ADDRESSES (INSN_UID (insn));
4620 rtx far_label = 0;
4621 int dest_uid = 0;
4622 struct far_branch *bp;
4624 if (type == TYPE_JUMP)
4626 far_label = XEXP (SET_SRC (PATTERN (insn)), 0);
4627 dest_uid = get_dest_uid (far_label, max_uid);
4628 if (! dest_uid)
4630 /* Parse errors can lead to labels outside
4631 the insn stream. */
4632 if (! NEXT_INSN (far_label))
4633 continue;
4635 if (! optimize)
4637 JUMP_LABEL (insn) = far_label;
4638 LABEL_NUSES (far_label)++;
4640 redirect_jump (insn, NULL_RTX, 1);
4641 far_label = 0;
4644 bp = uid_branch[dest_uid];
4645 if (! bp)
4647 bp = (struct far_branch *) alloca (sizeof *bp);
4648 uid_branch[dest_uid] = bp;
4649 bp->prev = far_branch_list;
4650 far_branch_list = bp;
4651 bp->near_label = 0;
4652 bp->far_label = far_label;
4653 if (far_label)
4654 LABEL_NUSES (far_label)++;
4656 else if (bp->near_label && ! NEXT_INSN (bp->near_label))
4657 if (addr - bp->address <= CONDJUMP_MAX)
4658 emit_label_after (bp->near_label, PREV_INSN (insn));
4659 else
4661 gen_far_branch (bp);
4662 bp->near_label = 0;
4664 else
4665 bp->near_label = 0;
4666 bp->address = addr;
4667 bp->insert_place = insn;
4668 if (! far_label)
4669 emit_insn_before (gen_block_branch_redirect (const0_rtx), insn);
4670 else
4671 gen_block_redirect (insn, addr, bp->near_label ? 2 : 0);
4674 /* Generate all pending far branches,
4675 and free our references to the far labels. */
4676 while (far_branch_list)
4678 if (far_branch_list->near_label
4679 && ! NEXT_INSN (far_branch_list->near_label))
4680 gen_far_branch (far_branch_list);
4681 if (optimize
4682 && far_branch_list->far_label
4683 && ! --LABEL_NUSES (far_branch_list->far_label))
4684 delete_insn (far_branch_list->far_label);
4685 far_branch_list = far_branch_list->prev;
4688 /* Instruction length information is no longer valid due to the new
4689 instructions that have been generated. */
4690 init_insn_lengths ();
4693 /* Dump out instruction addresses, which is useful for debugging the
4694 constant pool table stuff.
4696 If relaxing, output the label and pseudo-ops used to link together
4697 calls and the instruction which set the registers. */
4699 /* ??? The addresses printed by this routine for insns are nonsense for
4700 insns which are inside of a sequence where none of the inner insns have
4701 variable length. This is because the second pass of shorten_branches
4702 does not bother to update them. */
4704 void
4705 final_prescan_insn (rtx insn, rtx *opvec ATTRIBUTE_UNUSED,
4706 int noperands ATTRIBUTE_UNUSED)
4708 if (TARGET_DUMPISIZE)
4709 fprintf (asm_out_file, "\n! at %04x\n", INSN_ADDRESSES (INSN_UID (insn)));
4711 if (TARGET_RELAX)
4713 rtx note;
4715 note = find_reg_note (insn, REG_LABEL, NULL_RTX);
4716 if (note)
4718 rtx pattern;
4720 pattern = PATTERN (insn);
4721 if (GET_CODE (pattern) == PARALLEL)
4722 pattern = XVECEXP (pattern, 0, 0);
4723 if (GET_CODE (pattern) == CALL
4724 || (GET_CODE (pattern) == SET
4725 && (GET_CODE (SET_SRC (pattern)) == CALL
4726 || get_attr_type (insn) == TYPE_SFUNC)))
4727 asm_fprintf (asm_out_file, "\t.uses %LL%d\n",
4728 CODE_LABEL_NUMBER (XEXP (note, 0)));
4729 else if (GET_CODE (pattern) == SET)
4730 (*targetm.asm_out.internal_label) (asm_out_file, "L",
4731 CODE_LABEL_NUMBER (XEXP (note, 0)));
4732 else
4733 abort ();
4738 /* Dump out any constants accumulated in the final pass. These will
4739 only be labels. */
4741 const char *
4742 output_jump_label_table (void)
4744 int i;
4746 if (pool_size)
4748 fprintf (asm_out_file, "\t.align 2\n");
4749 for (i = 0; i < pool_size; i++)
4751 pool_node *p = &pool_vector[i];
4753 (*targetm.asm_out.internal_label) (asm_out_file, "L",
4754 CODE_LABEL_NUMBER (p->label));
4755 output_asm_insn (".long %O0", &p->value);
4757 pool_size = 0;
4760 return "";
4763 /* A full frame looks like:
4765 arg-5
4766 arg-4
4767 [ if current_function_anonymous_args
4768 arg-3
4769 arg-2
4770 arg-1
4771 arg-0 ]
4772 saved-fp
4773 saved-r10
4774 saved-r11
4775 saved-r12
4776 saved-pr
4777 local-n
4779 local-1
4780 local-0 <- fp points here. */
4782 /* Number of bytes pushed for anonymous args, used to pass information
4783 between expand_prologue and expand_epilogue. */
4785 /* Adjust the stack by SIZE bytes. REG holds the rtl of the register to be
4786 adjusted. If epilogue_p is zero, this is for a prologue; otherwise, it's
4787 for an epilogue and a negative value means that it's for a sibcall
4788 epilogue. If LIVE_REGS_MASK is nonzero, it points to a HARD_REG_SET of
4789 all the registers that are about to be restored, and hence dead. */
4791 static void
4792 output_stack_adjust (int size, rtx reg, int epilogue_p,
4793 HARD_REG_SET *live_regs_mask)
4795 rtx (*emit_fn) (rtx) = epilogue_p ? &emit_insn : &frame_insn;
4796 if (size)
4798 HOST_WIDE_INT align = STACK_BOUNDARY / BITS_PER_UNIT;
4800 /* This test is bogus, as output_stack_adjust is used to re-align the
4801 stack. */
4802 #if 0
4803 if (size % align)
4804 abort ();
4805 #endif
4807 if (CONST_OK_FOR_ADD (size))
4808 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size)));
4809 /* Try to do it with two partial adjustments; however, we must make
4810 sure that the stack is properly aligned at all times, in case
4811 an interrupt occurs between the two partial adjustments. */
4812 else if (CONST_OK_FOR_ADD (size / 2 & -align)
4813 && CONST_OK_FOR_ADD (size - (size / 2 & -align)))
4815 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size / 2 & -align)));
4816 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size - (size / 2 & -align))));
4818 else
4820 rtx const_reg;
4821 rtx insn;
4822 int temp = epilogue_p ? 7 : (TARGET_SH5 ? 0 : 1);
4823 int i;
4825 /* If TEMP is invalid, we could temporarily save a general
4826 register to MACL. However, there is currently no need
4827 to handle this case, so just abort when we see it. */
4828 if (epilogue_p < 0
4829 || current_function_interrupt
4830 || ! call_really_used_regs[temp] || fixed_regs[temp])
4831 temp = -1;
4832 if (temp < 0 && ! current_function_interrupt
4833 && (TARGET_SHMEDIA || epilogue_p >= 0))
4835 HARD_REG_SET temps;
4836 COPY_HARD_REG_SET (temps, call_used_reg_set);
4837 AND_COMPL_HARD_REG_SET (temps, call_fixed_reg_set);
4838 if (epilogue_p > 0)
4840 int nreg = 0;
4841 if (current_function_return_rtx)
4843 enum machine_mode mode;
4844 mode = GET_MODE (current_function_return_rtx);
4845 if (BASE_RETURN_VALUE_REG (mode) == FIRST_RET_REG)
4846 nreg = HARD_REGNO_NREGS (FIRST_RET_REG, mode);
4848 for (i = 0; i < nreg; i++)
4849 CLEAR_HARD_REG_BIT (temps, FIRST_RET_REG + i);
4850 if (current_function_calls_eh_return)
4852 CLEAR_HARD_REG_BIT (temps, EH_RETURN_STACKADJ_REGNO);
4853 for (i = 0; i <= 3; i++)
4854 CLEAR_HARD_REG_BIT (temps, EH_RETURN_DATA_REGNO (i));
4857 if (TARGET_SHMEDIA && epilogue_p < 0)
4858 for (i = FIRST_TARGET_REG; i <= LAST_TARGET_REG; i++)
4859 CLEAR_HARD_REG_BIT (temps, i);
4860 if (epilogue_p <= 0)
4862 for (i = FIRST_PARM_REG;
4863 i < FIRST_PARM_REG + NPARM_REGS (SImode); i++)
4864 CLEAR_HARD_REG_BIT (temps, i);
4865 if (cfun->static_chain_decl != NULL)
4866 CLEAR_HARD_REG_BIT (temps, STATIC_CHAIN_REGNUM);
4868 temp = scavenge_reg (&temps);
4870 if (temp < 0 && live_regs_mask)
4871 temp = scavenge_reg (live_regs_mask);
4872 if (temp < 0)
4874 /* If we reached here, the most likely case is the (sibcall)
4875 epilogue for non SHmedia. Put a special push/pop sequence
4876 for such case as the last resort. This looks lengthy but
4877 would not be problem because it seems to be very rare. */
4878 if (! TARGET_SHMEDIA && epilogue_p)
4880 rtx adj_reg, tmp_reg, mem;
4882 /* ??? There is still the slight possibility that r4 or r5
4883 have been reserved as fixed registers or assigned as
4884 global registers, and they change during an interrupt.
4885 There are possible ways to handle this:
4886 - If we are adjusting the frame pointer (r14), we can do
4887 with a single temp register and an ordinary push / pop
4888 on the stack.
4889 - Grab any call-used or call-saved registers (i.e. not
4890 fixed or globals) for the temps we need. We might
4891 also grab r14 if we are adjusting the stack pointer.
4892 If we can't find enough available registers, issue
4893 a diagnostic and abort - the user must have reserved
4894 way too many registers.
4895 But since all this is rather unlikely to happen and
4896 would require extra testing, we just abort if r4 / r5
4897 are not available. */
4898 if (fixed_regs[4] || fixed_regs[5]
4899 || global_regs[4] || global_regs[5])
4900 abort ();
4902 adj_reg = gen_rtx_REG (GET_MODE (reg), 4);
4903 tmp_reg = gen_rtx_REG (GET_MODE (reg), 5);
4904 emit_move_insn (gen_rtx_MEM (Pmode, reg), adj_reg);
4905 emit_insn (GEN_MOV (adj_reg, GEN_INT (size)));
4906 emit_insn (GEN_ADD3 (adj_reg, adj_reg, reg));
4907 mem = gen_rtx_MEM (Pmode, gen_rtx_PRE_DEC (Pmode, adj_reg));
4908 emit_move_insn (mem, tmp_reg);
4909 emit_move_insn (tmp_reg, gen_rtx_MEM (Pmode, reg));
4910 mem = gen_rtx_MEM (Pmode, gen_rtx_PRE_DEC (Pmode, adj_reg));
4911 emit_move_insn (mem, tmp_reg);
4912 emit_move_insn (reg, adj_reg);
4913 mem = gen_rtx_MEM (Pmode, gen_rtx_POST_INC (Pmode, reg));
4914 emit_move_insn (adj_reg, mem);
4915 mem = gen_rtx_MEM (Pmode, gen_rtx_POST_INC (Pmode, reg));
4916 emit_move_insn (tmp_reg, mem);
4917 return;
4919 else
4920 abort ();
4922 const_reg = gen_rtx_REG (GET_MODE (reg), temp);
4924 /* If SIZE is negative, subtract the positive value.
4925 This sometimes allows a constant pool entry to be shared
4926 between prologue and epilogue code. */
4927 if (size < 0)
4929 emit_insn (GEN_MOV (const_reg, GEN_INT (-size)));
4930 insn = emit_fn (GEN_SUB3 (reg, reg, const_reg));
4932 else
4934 emit_insn (GEN_MOV (const_reg, GEN_INT (size)));
4935 insn = emit_fn (GEN_ADD3 (reg, reg, const_reg));
4937 if (! epilogue_p)
4938 REG_NOTES (insn)
4939 = (gen_rtx_EXPR_LIST
4940 (REG_FRAME_RELATED_EXPR,
4941 gen_rtx_SET (VOIDmode, reg,
4942 gen_rtx_PLUS (SImode, reg, GEN_INT (size))),
4943 REG_NOTES (insn)));
4948 static rtx
4949 frame_insn (rtx x)
4951 x = emit_insn (x);
4952 RTX_FRAME_RELATED_P (x) = 1;
4953 return x;
4956 /* Output RTL to push register RN onto the stack. */
4958 static rtx
4959 push (int rn)
4961 rtx x;
4962 if (rn == FPUL_REG)
4963 x = gen_push_fpul ();
4964 else if (rn == FPSCR_REG)
4965 x = gen_push_fpscr ();
4966 else if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && ! TARGET_FPU_SINGLE
4967 && FP_OR_XD_REGISTER_P (rn))
4969 if (FP_REGISTER_P (rn) && (rn - FIRST_FP_REG) & 1)
4970 return NULL_RTX;
4971 x = gen_push_4 (gen_rtx_REG (DFmode, rn));
4973 else if (TARGET_SH2E && FP_REGISTER_P (rn))
4974 x = gen_push_e (gen_rtx_REG (SFmode, rn));
4975 else
4976 x = gen_push (gen_rtx_REG (SImode, rn));
4978 x = frame_insn (x);
4979 REG_NOTES (x)
4980 = gen_rtx_EXPR_LIST (REG_INC,
4981 gen_rtx_REG (SImode, STACK_POINTER_REGNUM), 0);
4982 return x;
4985 /* Output RTL to pop register RN from the stack. */
4987 static void
4988 pop (int rn)
4990 rtx x;
4991 if (rn == FPUL_REG)
4992 x = gen_pop_fpul ();
4993 else if (rn == FPSCR_REG)
4994 x = gen_pop_fpscr ();
4995 else if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && ! TARGET_FPU_SINGLE
4996 && FP_OR_XD_REGISTER_P (rn))
4998 if (FP_REGISTER_P (rn) && (rn - FIRST_FP_REG) & 1)
4999 return;
5000 x = gen_pop_4 (gen_rtx_REG (DFmode, rn));
5002 else if (TARGET_SH2E && FP_REGISTER_P (rn))
5003 x = gen_pop_e (gen_rtx_REG (SFmode, rn));
5004 else
5005 x = gen_pop (gen_rtx_REG (SImode, rn));
5007 x = emit_insn (x);
5008 REG_NOTES (x)
5009 = gen_rtx_EXPR_LIST (REG_INC,
5010 gen_rtx_REG (SImode, STACK_POINTER_REGNUM), 0);
5013 /* Generate code to push the regs specified in the mask. */
5015 static void
5016 push_regs (HARD_REG_SET *mask, int interrupt_handler)
5018 int i;
5019 int skip_fpscr = 0;
5021 /* Push PR last; this gives better latencies after the prologue, and
5022 candidates for the return delay slot when there are no general
5023 registers pushed. */
5024 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
5026 /* If this is an interrupt handler, and the SZ bit varies,
5027 and we have to push any floating point register, we need
5028 to switch to the correct precision first. */
5029 if (i == FIRST_FP_REG && interrupt_handler && TARGET_FMOVD
5030 && hard_regs_intersect_p (mask, &reg_class_contents[DF_REGS]))
5032 HARD_REG_SET unsaved;
5034 push (FPSCR_REG);
5035 COMPL_HARD_REG_SET (unsaved, *mask);
5036 fpscr_set_from_mem (NORMAL_MODE (FP_MODE), unsaved);
5037 skip_fpscr = 1;
5039 if (i != PR_REG
5040 && (i != FPSCR_REG || ! skip_fpscr)
5041 && TEST_HARD_REG_BIT (*mask, i))
5042 push (i);
5044 if (TEST_HARD_REG_BIT (*mask, PR_REG))
5045 push (PR_REG);
5048 /* Calculate how much extra space is needed to save all callee-saved
5049 target registers.
5050 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
5052 static int
5053 shmedia_target_regs_stack_space (HARD_REG_SET *live_regs_mask)
5055 int reg;
5056 int stack_space = 0;
5057 int interrupt_handler = sh_cfun_interrupt_handler_p ();
5059 for (reg = LAST_TARGET_REG; reg >= FIRST_TARGET_REG; reg--)
5060 if ((! call_really_used_regs[reg] || interrupt_handler)
5061 && ! TEST_HARD_REG_BIT (*live_regs_mask, reg))
5062 /* Leave space to save this target register on the stack,
5063 in case target register allocation wants to use it. */
5064 stack_space += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
5065 return stack_space;
5068 /* Decide whether we should reserve space for callee-save target registers,
5069 in case target register allocation wants to use them. REGS_SAVED is
5070 the space, in bytes, that is already required for register saves.
5071 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
5073 static int
5074 shmedia_reserve_space_for_target_registers_p (int regs_saved,
5075 HARD_REG_SET *live_regs_mask)
5077 if (optimize_size)
5078 return 0;
5079 return shmedia_target_regs_stack_space (live_regs_mask) <= regs_saved;
5082 /* Decide how much space to reserve for callee-save target registers
5083 in case target register allocation wants to use them.
5084 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
5086 static int
5087 shmedia_target_regs_stack_adjust (HARD_REG_SET *live_regs_mask)
5089 if (shmedia_space_reserved_for_target_registers)
5090 return shmedia_target_regs_stack_space (live_regs_mask);
5091 else
5092 return 0;
5095 /* Work out the registers which need to be saved, both as a mask and a
5096 count of saved words. Return the count.
5098 If doing a pragma interrupt function, then push all regs used by the
5099 function, and if we call another function (we can tell by looking at PR),
5100 make sure that all the regs it clobbers are safe too. */
5102 static int
5103 calc_live_regs (HARD_REG_SET *live_regs_mask)
5105 int reg;
5106 int count;
5107 int interrupt_handler;
5108 int pr_live, has_call;
5110 interrupt_handler = sh_cfun_interrupt_handler_p ();
5112 CLEAR_HARD_REG_SET (*live_regs_mask);
5113 if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && interrupt_handler
5114 && regs_ever_live[FPSCR_REG])
5115 target_flags &= ~FPU_SINGLE_BIT;
5116 /* If we can save a lot of saves by switching to double mode, do that. */
5117 else if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && TARGET_FPU_SINGLE)
5118 for (count = 0, reg = FIRST_FP_REG; reg <= LAST_FP_REG; reg += 2)
5119 if (regs_ever_live[reg] && regs_ever_live[reg+1]
5120 && (! call_really_used_regs[reg]
5121 || (interrupt_handler && ! pragma_trapa))
5122 && ++count > 2)
5124 target_flags &= ~FPU_SINGLE_BIT;
5125 break;
5127 /* PR_MEDIA_REG is a general purpose register, thus global_alloc already
5128 knows how to use it. That means the pseudo originally allocated for
5129 the initial value can become the PR_MEDIA_REG hard register, as seen for
5130 execute/20010122-1.c:test9. */
5131 if (TARGET_SHMEDIA)
5132 /* ??? this function is called from initial_elimination_offset, hence we
5133 can't use the result of sh_media_register_for_return here. */
5134 pr_live = sh_pr_n_sets ();
5135 else
5137 rtx pr_initial = has_hard_reg_initial_val (Pmode, PR_REG);
5138 pr_live = (pr_initial
5139 ? (GET_CODE (pr_initial) != REG
5140 || REGNO (pr_initial) != (PR_REG))
5141 : regs_ever_live[PR_REG]);
5142 /* For Shcompact, if not optimizing, we end up with a memory reference
5143 using the return address pointer for __builtin_return_address even
5144 though there is no actual need to put the PR register on the stack. */
5145 pr_live |= regs_ever_live[RETURN_ADDRESS_POINTER_REGNUM];
5147 /* Force PR to be live if the prologue has to call the SHmedia
5148 argument decoder or register saver. */
5149 if (TARGET_SHCOMPACT
5150 && ((current_function_args_info.call_cookie
5151 & ~ CALL_COOKIE_RET_TRAMP (1))
5152 || current_function_has_nonlocal_label))
5153 pr_live = 1;
5154 has_call = TARGET_SHMEDIA ? ! leaf_function_p () : pr_live;
5155 for (count = 0, reg = FIRST_PSEUDO_REGISTER - 1; reg >= 0; reg--)
5157 if (reg == (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG)
5158 ? pr_live
5159 : (interrupt_handler && ! pragma_trapa)
5160 ? (/* Need to save all the regs ever live. */
5161 (regs_ever_live[reg]
5162 || (call_really_used_regs[reg]
5163 && (! fixed_regs[reg] || reg == MACH_REG || reg == MACL_REG
5164 || reg == PIC_OFFSET_TABLE_REGNUM)
5165 && has_call)
5166 || (has_call && REGISTER_NATURAL_MODE (reg) == SImode
5167 && (GENERAL_REGISTER_P (reg) || TARGET_REGISTER_P (reg))))
5168 && reg != STACK_POINTER_REGNUM && reg != ARG_POINTER_REGNUM
5169 && reg != RETURN_ADDRESS_POINTER_REGNUM
5170 && reg != T_REG && reg != GBR_REG
5171 /* Push fpscr only on targets which have FPU */
5172 && (reg != FPSCR_REG || TARGET_FPU_ANY))
5173 : (/* Only push those regs which are used and need to be saved. */
5174 (TARGET_SHCOMPACT
5175 && flag_pic
5176 && current_function_args_info.call_cookie
5177 && reg == (int) PIC_OFFSET_TABLE_REGNUM)
5178 || (regs_ever_live[reg] && ! call_really_used_regs[reg])
5179 || (current_function_calls_eh_return
5180 && (reg == (int) EH_RETURN_DATA_REGNO (0)
5181 || reg == (int) EH_RETURN_DATA_REGNO (1)
5182 || reg == (int) EH_RETURN_DATA_REGNO (2)
5183 || reg == (int) EH_RETURN_DATA_REGNO (3)))
5184 || ((reg == MACL_REG || reg == MACH_REG)
5185 && regs_ever_live[reg]
5186 && sh_cfun_attr_renesas_p ())
5189 SET_HARD_REG_BIT (*live_regs_mask, reg);
5190 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
5192 if ((TARGET_SH4 || TARGET_SH2A_DOUBLE || TARGET_SH5) && TARGET_FMOVD
5193 && GET_MODE_CLASS (REGISTER_NATURAL_MODE (reg)) == MODE_FLOAT)
5195 if (FP_REGISTER_P (reg))
5197 if (! TARGET_FPU_SINGLE && ! regs_ever_live[reg ^ 1])
5199 SET_HARD_REG_BIT (*live_regs_mask, (reg ^ 1));
5200 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg ^ 1));
5203 else if (XD_REGISTER_P (reg))
5205 /* Must switch to double mode to access these registers. */
5206 target_flags &= ~FPU_SINGLE_BIT;
5211 /* If we have a target register optimization pass after prologue / epilogue
5212 threading, we need to assume all target registers will be live even if
5213 they aren't now. */
5214 if (flag_branch_target_load_optimize2
5215 && TARGET_SAVE_ALL_TARGET_REGS
5216 && shmedia_space_reserved_for_target_registers)
5217 for (reg = LAST_TARGET_REG; reg >= FIRST_TARGET_REG; reg--)
5218 if ((! call_really_used_regs[reg] || interrupt_handler)
5219 && ! TEST_HARD_REG_BIT (*live_regs_mask, reg))
5221 SET_HARD_REG_BIT (*live_regs_mask, reg);
5222 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
5224 /* If this is an interrupt handler, we don't have any call-clobbered
5225 registers we can conveniently use for target register save/restore.
5226 Make sure we save at least one general purpose register when we need
5227 to save target registers. */
5228 if (interrupt_handler
5229 && hard_regs_intersect_p (live_regs_mask,
5230 &reg_class_contents[TARGET_REGS])
5231 && ! hard_regs_intersect_p (live_regs_mask,
5232 &reg_class_contents[GENERAL_REGS]))
5234 SET_HARD_REG_BIT (*live_regs_mask, R0_REG);
5235 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (R0_REG));
5238 return count;
5241 /* Code to generate prologue and epilogue sequences */
5243 /* PUSHED is the number of bytes that are being pushed on the
5244 stack for register saves. Return the frame size, padded
5245 appropriately so that the stack stays properly aligned. */
5246 static HOST_WIDE_INT
5247 rounded_frame_size (int pushed)
5249 HOST_WIDE_INT size = get_frame_size ();
5250 HOST_WIDE_INT align = STACK_BOUNDARY / BITS_PER_UNIT;
5252 return ((size + pushed + align - 1) & -align) - pushed;
5255 /* Choose a call-clobbered target-branch register that remains
5256 unchanged along the whole function. We set it up as the return
5257 value in the prologue. */
5259 sh_media_register_for_return (void)
5261 int regno;
5262 int tr0_used;
5264 if (! current_function_is_leaf)
5265 return -1;
5266 if (lookup_attribute ("interrupt_handler",
5267 DECL_ATTRIBUTES (current_function_decl)))
5268 return -1;
5270 tr0_used = flag_pic && regs_ever_live[PIC_OFFSET_TABLE_REGNUM];
5272 for (regno = FIRST_TARGET_REG + tr0_used; regno <= LAST_TARGET_REG; regno++)
5273 if (call_really_used_regs[regno] && ! regs_ever_live[regno])
5274 return regno;
5276 return -1;
5279 /* The maximum registers we need to save are:
5280 - 62 general purpose registers (r15 is stack pointer, r63 is zero)
5281 - 32 floating point registers (for each pair, we save none,
5282 one single precision value, or a double precision value).
5283 - 8 target registers
5284 - add 1 entry for a delimiter. */
5285 #define MAX_SAVED_REGS (62+32+8)
5287 typedef struct save_entry_s
5289 unsigned char reg;
5290 unsigned char mode;
5291 short offset;
5292 } save_entry;
5294 #define MAX_TEMPS 4
5296 /* There will be a delimiter entry with VOIDmode both at the start and the
5297 end of a filled in schedule. The end delimiter has the offset of the
5298 save with the smallest (i.e. most negative) offset. */
5299 typedef struct save_schedule_s
5301 save_entry entries[MAX_SAVED_REGS + 2];
5302 int temps[MAX_TEMPS+1];
5303 } save_schedule;
5305 /* Fill in SCHEDULE according to LIVE_REGS_MASK. If RESTORE is nonzero,
5306 use reverse order. Returns the last entry written to (not counting
5307 the delimiter). OFFSET_BASE is a number to be added to all offset
5308 entries. */
5310 static save_entry *
5311 sh5_schedule_saves (HARD_REG_SET *live_regs_mask, save_schedule *schedule,
5312 int offset_base)
5314 int align, i;
5315 save_entry *entry = schedule->entries;
5316 int tmpx = 0;
5317 int offset;
5319 if (! current_function_interrupt)
5320 for (i = FIRST_GENERAL_REG; tmpx < MAX_TEMPS && i <= LAST_GENERAL_REG; i++)
5321 if (call_really_used_regs[i] && ! fixed_regs[i] && i != PR_MEDIA_REG
5322 && ! FUNCTION_ARG_REGNO_P (i)
5323 && i != FIRST_RET_REG
5324 && ! (cfun->static_chain_decl != NULL && i == STATIC_CHAIN_REGNUM)
5325 && ! (current_function_calls_eh_return
5326 && (i == EH_RETURN_STACKADJ_REGNO
5327 || ((unsigned) i <= EH_RETURN_DATA_REGNO (0)
5328 && (unsigned) i >= EH_RETURN_DATA_REGNO (3)))))
5329 schedule->temps[tmpx++] = i;
5330 entry->reg = -1;
5331 entry->mode = VOIDmode;
5332 entry->offset = offset_base;
5333 entry++;
5334 /* We loop twice: first, we save 8-byte aligned registers in the
5335 higher addresses, that are known to be aligned. Then, we
5336 proceed to saving 32-bit registers that don't need 8-byte
5337 alignment.
5338 If this is an interrupt function, all registers that need saving
5339 need to be saved in full. moreover, we need to postpone saving
5340 target registers till we have saved some general purpose registers
5341 we can then use as scratch registers. */
5342 offset = offset_base;
5343 for (align = 1; align >= 0; align--)
5345 for (i = FIRST_PSEUDO_REGISTER - 1; i >= 0; i--)
5346 if (TEST_HARD_REG_BIT (*live_regs_mask, i))
5348 enum machine_mode mode = REGISTER_NATURAL_MODE (i);
5349 int reg = i;
5351 if (current_function_interrupt)
5353 if (TARGET_REGISTER_P (i))
5354 continue;
5355 if (GENERAL_REGISTER_P (i))
5356 mode = DImode;
5358 if (mode == SFmode && (i % 2) == 1
5359 && ! TARGET_FPU_SINGLE && FP_REGISTER_P (i)
5360 && (TEST_HARD_REG_BIT (*live_regs_mask, (i ^ 1))))
5362 mode = DFmode;
5363 i--;
5364 reg--;
5367 /* If we're doing the aligned pass and this is not aligned,
5368 or we're doing the unaligned pass and this is aligned,
5369 skip it. */
5370 if ((GET_MODE_SIZE (mode) % (STACK_BOUNDARY / BITS_PER_UNIT) == 0)
5371 != align)
5372 continue;
5374 if (current_function_interrupt
5375 && GENERAL_REGISTER_P (i)
5376 && tmpx < MAX_TEMPS)
5377 schedule->temps[tmpx++] = i;
5379 offset -= GET_MODE_SIZE (mode);
5380 entry->reg = i;
5381 entry->mode = mode;
5382 entry->offset = offset;
5383 entry++;
5385 if (align && current_function_interrupt)
5386 for (i = LAST_TARGET_REG; i >= FIRST_TARGET_REG; i--)
5387 if (TEST_HARD_REG_BIT (*live_regs_mask, i))
5389 offset -= GET_MODE_SIZE (DImode);
5390 entry->reg = i;
5391 entry->mode = DImode;
5392 entry->offset = offset;
5393 entry++;
5396 entry->reg = -1;
5397 entry->mode = VOIDmode;
5398 entry->offset = offset;
5399 schedule->temps[tmpx] = -1;
5400 return entry - 1;
5403 void
5404 sh_expand_prologue (void)
5406 HARD_REG_SET live_regs_mask;
5407 int d, i;
5408 int d_rounding = 0;
5409 int save_flags = target_flags;
5410 int pretend_args;
5412 current_function_interrupt = sh_cfun_interrupt_handler_p ();
5414 /* We have pretend args if we had an object sent partially in registers
5415 and partially on the stack, e.g. a large structure. */
5416 pretend_args = current_function_pretend_args_size;
5417 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl)
5418 && (NPARM_REGS(SImode)
5419 > current_function_args_info.arg_count[(int) SH_ARG_INT]))
5420 pretend_args = 0;
5421 output_stack_adjust (-pretend_args
5422 - current_function_args_info.stack_regs * 8,
5423 stack_pointer_rtx, 0, NULL);
5425 if (TARGET_SHCOMPACT && flag_pic && current_function_args_info.call_cookie)
5426 /* We're going to use the PIC register to load the address of the
5427 incoming-argument decoder and/or of the return trampoline from
5428 the GOT, so make sure the PIC register is preserved and
5429 initialized. */
5430 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
5432 if (TARGET_SHCOMPACT
5433 && (current_function_args_info.call_cookie & ~ CALL_COOKIE_RET_TRAMP(1)))
5435 int reg;
5437 /* First, make all registers with incoming arguments that will
5438 be pushed onto the stack live, so that register renaming
5439 doesn't overwrite them. */
5440 for (reg = 0; reg < NPARM_REGS (SImode); reg++)
5441 if (CALL_COOKIE_STACKSEQ_GET (current_function_args_info.call_cookie)
5442 >= NPARM_REGS (SImode) - reg)
5443 for (; reg < NPARM_REGS (SImode); reg++)
5444 emit_insn (gen_shcompact_preserve_incoming_args
5445 (gen_rtx_REG (SImode, FIRST_PARM_REG + reg)));
5446 else if (CALL_COOKIE_INT_REG_GET
5447 (current_function_args_info.call_cookie, reg) == 1)
5448 emit_insn (gen_shcompact_preserve_incoming_args
5449 (gen_rtx_REG (SImode, FIRST_PARM_REG + reg)));
5451 emit_move_insn (gen_rtx_REG (Pmode, MACL_REG),
5452 stack_pointer_rtx);
5453 emit_move_insn (gen_rtx_REG (SImode, R0_REG),
5454 GEN_INT (current_function_args_info.call_cookie));
5455 emit_move_insn (gen_rtx_REG (SImode, MACH_REG),
5456 gen_rtx_REG (SImode, R0_REG));
5458 else if (TARGET_SHMEDIA)
5460 int tr = sh_media_register_for_return ();
5462 if (tr >= 0)
5464 rtx insn = emit_move_insn (gen_rtx_REG (DImode, tr),
5465 gen_rtx_REG (DImode, PR_MEDIA_REG));
5467 /* ??? We should suppress saving pr when we don't need it, but this
5468 is tricky because of builtin_return_address. */
5470 /* If this function only exits with sibcalls, this copy
5471 will be flagged as dead. */
5472 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD,
5473 const0_rtx,
5474 REG_NOTES (insn));
5478 /* Emit the code for SETUP_VARARGS. */
5479 if (current_function_stdarg)
5481 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl))
5483 /* Push arg regs as if they'd been provided by caller in stack. */
5484 for (i = 0; i < NPARM_REGS(SImode); i++)
5486 int rn = NPARM_REGS(SImode) + FIRST_PARM_REG - i - 1;
5487 rtx insn;
5489 if (i >= (NPARM_REGS(SImode)
5490 - current_function_args_info.arg_count[(int) SH_ARG_INT]
5492 break;
5493 insn = push (rn);
5494 RTX_FRAME_RELATED_P (insn) = 0;
5499 /* If we're supposed to switch stacks at function entry, do so now. */
5500 if (sp_switch)
5501 emit_insn (gen_sp_switch_1 ());
5503 d = calc_live_regs (&live_regs_mask);
5504 /* ??? Maybe we could save some switching if we can move a mode switch
5505 that already happens to be at the function start into the prologue. */
5506 if (target_flags != save_flags && ! current_function_interrupt)
5507 emit_insn (gen_toggle_sz ());
5509 if (TARGET_SH5)
5511 int offset_base, offset;
5512 rtx r0 = NULL_RTX;
5513 int offset_in_r0 = -1;
5514 int sp_in_r0 = 0;
5515 int tregs_space = shmedia_target_regs_stack_adjust (&live_regs_mask);
5516 int total_size, save_size;
5517 save_schedule schedule;
5518 save_entry *entry;
5519 int *tmp_pnt;
5521 if (call_really_used_regs[R0_REG] && ! fixed_regs[R0_REG]
5522 && ! current_function_interrupt)
5523 r0 = gen_rtx_REG (Pmode, R0_REG);
5525 /* D is the actual number of bytes that we need for saving registers,
5526 however, in initial_elimination_offset we have committed to using
5527 an additional TREGS_SPACE amount of bytes - in order to keep both
5528 addresses to arguments supplied by the caller and local variables
5529 valid, we must keep this gap. Place it between the incoming
5530 arguments and the actually saved registers in a bid to optimize
5531 locality of reference. */
5532 total_size = d + tregs_space;
5533 total_size += rounded_frame_size (total_size);
5534 save_size = total_size - rounded_frame_size (d);
5535 if (save_size % (STACK_BOUNDARY / BITS_PER_UNIT))
5536 d_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
5537 - save_size % (STACK_BOUNDARY / BITS_PER_UNIT));
5539 /* If adjusting the stack in a single step costs nothing extra, do so.
5540 I.e. either if a single addi is enough, or we need a movi anyway,
5541 and we don't exceed the maximum offset range (the test for the
5542 latter is conservative for simplicity). */
5543 if (TARGET_SHMEDIA
5544 && (CONST_OK_FOR_I10 (-total_size)
5545 || (! CONST_OK_FOR_I10 (-(save_size + d_rounding))
5546 && total_size <= 2044)))
5547 d_rounding = total_size - save_size;
5549 offset_base = d + d_rounding;
5551 output_stack_adjust (-(save_size + d_rounding), stack_pointer_rtx,
5552 0, NULL);
5554 sh5_schedule_saves (&live_regs_mask, &schedule, offset_base);
5555 tmp_pnt = schedule.temps;
5556 for (entry = &schedule.entries[1]; entry->mode != VOIDmode; entry++)
5558 enum machine_mode mode = entry->mode;
5559 int reg = entry->reg;
5560 rtx reg_rtx, mem_rtx, pre_dec = NULL_RTX;
5562 offset = entry->offset;
5564 reg_rtx = gen_rtx_REG (mode, reg);
5566 mem_rtx = gen_rtx_MEM (mode,
5567 gen_rtx_PLUS (Pmode,
5568 stack_pointer_rtx,
5569 GEN_INT (offset)));
5571 GO_IF_LEGITIMATE_ADDRESS (mode, XEXP (mem_rtx, 0), try_pre_dec);
5573 if (! r0)
5574 abort ();
5575 mem_rtx = NULL_RTX;
5577 try_pre_dec:
5579 if (HAVE_PRE_DECREMENT
5580 && (offset_in_r0 - offset == GET_MODE_SIZE (mode)
5581 || mem_rtx == NULL_RTX
5582 || reg == PR_REG || SPECIAL_REGISTER_P (reg)))
5584 pre_dec = gen_rtx_MEM (mode,
5585 gen_rtx_PRE_DEC (Pmode, r0));
5587 GO_IF_LEGITIMATE_ADDRESS (mode, XEXP (pre_dec, 0),
5588 pre_dec_ok);
5590 pre_dec = NULL_RTX;
5592 break;
5594 pre_dec_ok:
5595 mem_rtx = NULL_RTX;
5596 offset += GET_MODE_SIZE (mode);
5598 while (0);
5600 if (mem_rtx != NULL_RTX)
5601 goto addr_ok;
5603 if (offset_in_r0 == -1)
5605 emit_move_insn (r0, GEN_INT (offset));
5606 offset_in_r0 = offset;
5608 else if (offset != offset_in_r0)
5610 emit_move_insn (r0,
5611 gen_rtx_PLUS
5612 (Pmode, r0,
5613 GEN_INT (offset - offset_in_r0)));
5614 offset_in_r0 += offset - offset_in_r0;
5617 if (pre_dec != NULL_RTX)
5619 if (! sp_in_r0)
5621 emit_move_insn (r0,
5622 gen_rtx_PLUS
5623 (Pmode, r0, stack_pointer_rtx));
5624 sp_in_r0 = 1;
5627 offset -= GET_MODE_SIZE (mode);
5628 offset_in_r0 -= GET_MODE_SIZE (mode);
5630 mem_rtx = pre_dec;
5632 else if (sp_in_r0)
5633 mem_rtx = gen_rtx_MEM (mode, r0);
5634 else
5635 mem_rtx = gen_rtx_MEM (mode,
5636 gen_rtx_PLUS (Pmode,
5637 stack_pointer_rtx,
5638 r0));
5640 /* We must not use an r0-based address for target-branch
5641 registers or for special registers without pre-dec
5642 memory addresses, since we store their values in r0
5643 first. */
5644 if (TARGET_REGISTER_P (reg)
5645 || ((reg == PR_REG || SPECIAL_REGISTER_P (reg))
5646 && mem_rtx != pre_dec))
5647 abort ();
5649 addr_ok:
5650 if (TARGET_REGISTER_P (reg)
5651 || ((reg == PR_REG || SPECIAL_REGISTER_P (reg))
5652 && mem_rtx != pre_dec))
5654 rtx tmp_reg = gen_rtx_REG (GET_MODE (reg_rtx), *tmp_pnt);
5656 emit_move_insn (tmp_reg, reg_rtx);
5658 if (REGNO (tmp_reg) == R0_REG)
5660 offset_in_r0 = -1;
5661 sp_in_r0 = 0;
5662 if (refers_to_regno_p (R0_REG, R0_REG+1, mem_rtx, (rtx *) 0))
5663 abort ();
5666 if (*++tmp_pnt <= 0)
5667 tmp_pnt = schedule.temps;
5669 reg_rtx = tmp_reg;
5672 rtx insn;
5674 /* Mark as interesting for dwarf cfi generator */
5675 insn = emit_move_insn (mem_rtx, reg_rtx);
5676 RTX_FRAME_RELATED_P (insn) = 1;
5678 if (TARGET_SHCOMPACT && (offset_in_r0 != -1))
5680 rtx reg_rtx = gen_rtx_REG (mode, reg);
5681 rtx set, note_rtx;
5682 rtx mem_rtx = gen_rtx_MEM (mode,
5683 gen_rtx_PLUS (Pmode,
5684 stack_pointer_rtx,
5685 GEN_INT (offset)));
5687 set = gen_rtx_SET (VOIDmode, mem_rtx, reg_rtx);
5688 note_rtx = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, set,
5689 REG_NOTES (insn));
5690 REG_NOTES (insn) = note_rtx;
5695 if (entry->offset != d_rounding)
5696 abort ();
5698 else
5699 push_regs (&live_regs_mask, current_function_interrupt);
5701 if (flag_pic && regs_ever_live[PIC_OFFSET_TABLE_REGNUM])
5703 rtx insn = get_last_insn ();
5704 rtx last = emit_insn (gen_GOTaddr2picreg ());
5706 /* Mark these insns as possibly dead. Sometimes, flow2 may
5707 delete all uses of the PIC register. In this case, let it
5708 delete the initialization too. */
5711 insn = NEXT_INSN (insn);
5713 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD,
5714 const0_rtx,
5715 REG_NOTES (insn));
5717 while (insn != last);
5720 if (SHMEDIA_REGS_STACK_ADJUST ())
5722 emit_move_insn (gen_rtx_REG (Pmode, R0_REG),
5723 function_symbol (TARGET_FPU_ANY
5724 ? "__GCC_push_shmedia_regs"
5725 : "__GCC_push_shmedia_regs_nofpu"));
5726 /* This must NOT go through the PLT, otherwise mach and macl
5727 may be clobbered. */
5728 emit_insn (gen_shmedia_save_restore_regs_compact
5729 (GEN_INT (-SHMEDIA_REGS_STACK_ADJUST ())));
5732 if (target_flags != save_flags && ! current_function_interrupt)
5734 rtx insn = emit_insn (gen_toggle_sz ());
5736 /* If we're lucky, a mode switch in the function body will
5737 overwrite fpscr, turning this insn dead. Tell flow this
5738 insn is ok to delete. */
5739 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD,
5740 const0_rtx,
5741 REG_NOTES (insn));
5744 target_flags = save_flags;
5746 output_stack_adjust (-rounded_frame_size (d) + d_rounding,
5747 stack_pointer_rtx, 0, NULL);
5749 if (frame_pointer_needed)
5750 frame_insn (GEN_MOV (frame_pointer_rtx, stack_pointer_rtx));
5752 if (TARGET_SHCOMPACT
5753 && (current_function_args_info.call_cookie & ~ CALL_COOKIE_RET_TRAMP(1)))
5755 /* This must NOT go through the PLT, otherwise mach and macl
5756 may be clobbered. */
5757 emit_move_insn (gen_rtx_REG (Pmode, R0_REG),
5758 function_symbol ("__GCC_shcompact_incoming_args"));
5759 emit_insn (gen_shcompact_incoming_args ());
5763 void
5764 sh_expand_epilogue (bool sibcall_p)
5766 HARD_REG_SET live_regs_mask;
5767 int d, i;
5768 int d_rounding = 0;
5770 int save_flags = target_flags;
5771 int frame_size, save_size;
5772 int fpscr_deferred = 0;
5773 int e = sibcall_p ? -1 : 1;
5775 d = calc_live_regs (&live_regs_mask);
5777 save_size = d;
5778 frame_size = rounded_frame_size (d);
5780 if (TARGET_SH5)
5782 int tregs_space = shmedia_target_regs_stack_adjust (&live_regs_mask);
5783 int total_size;
5784 if (d % (STACK_BOUNDARY / BITS_PER_UNIT))
5785 d_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
5786 - d % (STACK_BOUNDARY / BITS_PER_UNIT));
5788 total_size = d + tregs_space;
5789 total_size += rounded_frame_size (total_size);
5790 save_size = total_size - frame_size;
5792 /* If adjusting the stack in a single step costs nothing extra, do so.
5793 I.e. either if a single addi is enough, or we need a movi anyway,
5794 and we don't exceed the maximum offset range (the test for the
5795 latter is conservative for simplicity). */
5796 if (TARGET_SHMEDIA
5797 && ! frame_pointer_needed
5798 && (CONST_OK_FOR_I10 (total_size)
5799 || (! CONST_OK_FOR_I10 (save_size + d_rounding)
5800 && total_size <= 2044)))
5801 d_rounding = frame_size;
5803 frame_size -= d_rounding;
5806 if (frame_pointer_needed)
5808 output_stack_adjust (frame_size, frame_pointer_rtx, e, &live_regs_mask);
5810 /* We must avoid moving the stack pointer adjustment past code
5811 which reads from the local frame, else an interrupt could
5812 occur after the SP adjustment and clobber data in the local
5813 frame. */
5814 emit_insn (gen_blockage ());
5815 emit_insn (GEN_MOV (stack_pointer_rtx, frame_pointer_rtx));
5817 else if (frame_size)
5819 /* We must avoid moving the stack pointer adjustment past code
5820 which reads from the local frame, else an interrupt could
5821 occur after the SP adjustment and clobber data in the local
5822 frame. */
5823 emit_insn (gen_blockage ());
5824 output_stack_adjust (frame_size, stack_pointer_rtx, e, &live_regs_mask);
5827 if (SHMEDIA_REGS_STACK_ADJUST ())
5829 emit_move_insn (gen_rtx_REG (Pmode, R0_REG),
5830 function_symbol (TARGET_FPU_ANY
5831 ? "__GCC_pop_shmedia_regs"
5832 : "__GCC_pop_shmedia_regs_nofpu"));
5833 /* This must NOT go through the PLT, otherwise mach and macl
5834 may be clobbered. */
5835 emit_insn (gen_shmedia_save_restore_regs_compact
5836 (GEN_INT (SHMEDIA_REGS_STACK_ADJUST ())));
5839 /* Pop all the registers. */
5841 if (target_flags != save_flags && ! current_function_interrupt)
5842 emit_insn (gen_toggle_sz ());
5843 if (TARGET_SH5)
5845 int offset_base, offset;
5846 int offset_in_r0 = -1;
5847 int sp_in_r0 = 0;
5848 rtx r0 = gen_rtx_REG (Pmode, R0_REG);
5849 save_schedule schedule;
5850 save_entry *entry;
5851 int *tmp_pnt;
5853 entry = sh5_schedule_saves (&live_regs_mask, &schedule, d_rounding);
5854 offset_base = -entry[1].offset + d_rounding;
5855 tmp_pnt = schedule.temps;
5856 for (; entry->mode != VOIDmode; entry--)
5858 enum machine_mode mode = entry->mode;
5859 int reg = entry->reg;
5860 rtx reg_rtx, mem_rtx, post_inc = NULL_RTX, insn;
5862 offset = offset_base + entry->offset;
5863 reg_rtx = gen_rtx_REG (mode, reg);
5865 mem_rtx = gen_rtx_MEM (mode,
5866 gen_rtx_PLUS (Pmode,
5867 stack_pointer_rtx,
5868 GEN_INT (offset)));
5870 GO_IF_LEGITIMATE_ADDRESS (mode, XEXP (mem_rtx, 0), try_post_inc);
5872 mem_rtx = NULL_RTX;
5874 try_post_inc:
5876 if (HAVE_POST_INCREMENT
5877 && (offset == offset_in_r0
5878 || (offset + GET_MODE_SIZE (mode) != d + d_rounding
5879 && mem_rtx == NULL_RTX)
5880 || reg == PR_REG || SPECIAL_REGISTER_P (reg)))
5882 post_inc = gen_rtx_MEM (mode,
5883 gen_rtx_POST_INC (Pmode, r0));
5885 GO_IF_LEGITIMATE_ADDRESS (mode, XEXP (post_inc, 0),
5886 post_inc_ok);
5888 post_inc = NULL_RTX;
5890 break;
5892 post_inc_ok:
5893 mem_rtx = NULL_RTX;
5895 while (0);
5897 if (mem_rtx != NULL_RTX)
5898 goto addr_ok;
5900 if (offset_in_r0 == -1)
5902 emit_move_insn (r0, GEN_INT (offset));
5903 offset_in_r0 = offset;
5905 else if (offset != offset_in_r0)
5907 emit_move_insn (r0,
5908 gen_rtx_PLUS
5909 (Pmode, r0,
5910 GEN_INT (offset - offset_in_r0)));
5911 offset_in_r0 += offset - offset_in_r0;
5914 if (post_inc != NULL_RTX)
5916 if (! sp_in_r0)
5918 emit_move_insn (r0,
5919 gen_rtx_PLUS
5920 (Pmode, r0, stack_pointer_rtx));
5921 sp_in_r0 = 1;
5924 mem_rtx = post_inc;
5926 offset_in_r0 += GET_MODE_SIZE (mode);
5928 else if (sp_in_r0)
5929 mem_rtx = gen_rtx_MEM (mode, r0);
5930 else
5931 mem_rtx = gen_rtx_MEM (mode,
5932 gen_rtx_PLUS (Pmode,
5933 stack_pointer_rtx,
5934 r0));
5936 if ((reg == PR_REG || SPECIAL_REGISTER_P (reg))
5937 && mem_rtx != post_inc)
5938 abort ();
5940 addr_ok:
5941 if ((reg == PR_REG || SPECIAL_REGISTER_P (reg))
5942 && mem_rtx != post_inc)
5944 insn = emit_move_insn (r0, mem_rtx);
5945 mem_rtx = r0;
5947 else if (TARGET_REGISTER_P (reg))
5949 rtx tmp_reg = gen_rtx_REG (mode, *tmp_pnt);
5951 /* Give the scheduler a bit of freedom by using up to
5952 MAX_TEMPS registers in a round-robin fashion. */
5953 insn = emit_move_insn (tmp_reg, mem_rtx);
5954 mem_rtx = tmp_reg;
5955 if (*++tmp_pnt < 0)
5956 tmp_pnt = schedule.temps;
5959 insn = emit_move_insn (reg_rtx, mem_rtx);
5960 if (reg == PR_MEDIA_REG && sh_media_register_for_return () >= 0)
5961 /* This is dead, unless we return with a sibcall. */
5962 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD,
5963 const0_rtx,
5964 REG_NOTES (insn));
5967 if (entry->offset + offset_base != d + d_rounding)
5968 abort ();
5970 else /* ! TARGET_SH5 */
5972 save_size = 0;
5973 if (TEST_HARD_REG_BIT (live_regs_mask, PR_REG))
5974 pop (PR_REG);
5975 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
5977 int j = (FIRST_PSEUDO_REGISTER - 1) - i;
5979 if (j == FPSCR_REG && current_function_interrupt && TARGET_FMOVD
5980 && hard_regs_intersect_p (&live_regs_mask,
5981 &reg_class_contents[DF_REGS]))
5982 fpscr_deferred = 1;
5983 else if (j != PR_REG && TEST_HARD_REG_BIT (live_regs_mask, j))
5984 pop (j);
5985 if (j == FIRST_FP_REG && fpscr_deferred)
5986 pop (FPSCR_REG);
5990 if (target_flags != save_flags && ! current_function_interrupt)
5991 emit_insn (gen_toggle_sz ());
5992 target_flags = save_flags;
5994 output_stack_adjust (current_function_pretend_args_size
5995 + save_size + d_rounding
5996 + current_function_args_info.stack_regs * 8,
5997 stack_pointer_rtx, e, NULL);
5999 if (current_function_calls_eh_return)
6000 emit_insn (GEN_ADD3 (stack_pointer_rtx, stack_pointer_rtx,
6001 EH_RETURN_STACKADJ_RTX));
6003 /* Switch back to the normal stack if necessary. */
6004 if (sp_switch)
6005 emit_insn (gen_sp_switch_2 ());
6007 /* Tell flow the insn that pops PR isn't dead. */
6008 /* PR_REG will never be live in SHmedia mode, and we don't need to
6009 USE PR_MEDIA_REG, since it will be explicitly copied to TR0_REG
6010 by the return pattern. */
6011 if (TEST_HARD_REG_BIT (live_regs_mask, PR_REG))
6012 emit_insn (gen_rtx_USE (VOIDmode, gen_rtx_REG (SImode, PR_REG)));
6015 static int sh_need_epilogue_known = 0;
6018 sh_need_epilogue (void)
6020 if (! sh_need_epilogue_known)
6022 rtx epilogue;
6024 start_sequence ();
6025 sh_expand_epilogue (0);
6026 epilogue = get_insns ();
6027 end_sequence ();
6028 sh_need_epilogue_known = (epilogue == NULL ? -1 : 1);
6030 return sh_need_epilogue_known > 0;
6033 /* Emit code to change the current function's return address to RA.
6034 TEMP is available as a scratch register, if needed. */
6036 void
6037 sh_set_return_address (rtx ra, rtx tmp)
6039 HARD_REG_SET live_regs_mask;
6040 int d;
6041 int pr_reg = TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG;
6042 int pr_offset;
6044 d = calc_live_regs (&live_regs_mask);
6046 /* If pr_reg isn't life, we can set it (or the register given in
6047 sh_media_register_for_return) directly. */
6048 if (! TEST_HARD_REG_BIT (live_regs_mask, pr_reg))
6050 rtx rr;
6052 if (TARGET_SHMEDIA)
6054 int rr_regno = sh_media_register_for_return ();
6056 if (rr_regno < 0)
6057 rr_regno = pr_reg;
6059 rr = gen_rtx_REG (DImode, rr_regno);
6061 else
6062 rr = gen_rtx_REG (SImode, pr_reg);
6064 emit_insn (GEN_MOV (rr, ra));
6065 /* Tell flow the register for return isn't dead. */
6066 emit_insn (gen_rtx_USE (VOIDmode, rr));
6067 return;
6070 if (TARGET_SH5)
6072 int offset;
6073 save_schedule schedule;
6074 save_entry *entry;
6076 entry = sh5_schedule_saves (&live_regs_mask, &schedule, 0);
6077 offset = entry[1].offset;
6078 for (; entry->mode != VOIDmode; entry--)
6079 if (entry->reg == pr_reg)
6080 goto found;
6082 /* We can't find pr register. */
6083 abort ();
6085 found:
6086 offset = entry->offset - offset;
6087 pr_offset = (rounded_frame_size (d) + offset
6088 + SHMEDIA_REGS_STACK_ADJUST ());
6090 else
6091 pr_offset = rounded_frame_size (d);
6093 emit_insn (GEN_MOV (tmp, GEN_INT (pr_offset)));
6094 emit_insn (GEN_ADD3 (tmp, tmp, frame_pointer_rtx));
6096 tmp = gen_rtx_MEM (Pmode, tmp);
6097 emit_insn (GEN_MOV (tmp, ra));
6100 /* Clear variables at function end. */
6102 static void
6103 sh_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
6104 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
6106 trap_exit = pragma_interrupt = pragma_trapa = pragma_nosave_low_regs = 0;
6107 sh_need_epilogue_known = 0;
6108 sp_switch = NULL_RTX;
6111 static rtx
6112 sh_builtin_saveregs (void)
6114 /* First unnamed integer register. */
6115 int first_intreg = current_function_args_info.arg_count[(int) SH_ARG_INT];
6116 /* Number of integer registers we need to save. */
6117 int n_intregs = MAX (0, NPARM_REGS (SImode) - first_intreg);
6118 /* First unnamed SFmode float reg */
6119 int first_floatreg = current_function_args_info.arg_count[(int) SH_ARG_FLOAT];
6120 /* Number of SFmode float regs to save. */
6121 int n_floatregs = MAX (0, NPARM_REGS (SFmode) - first_floatreg);
6122 rtx regbuf, fpregs;
6123 int bufsize, regno;
6124 HOST_WIDE_INT alias_set;
6126 if (TARGET_SH5)
6128 if (n_intregs)
6130 int pushregs = n_intregs;
6132 while (pushregs < NPARM_REGS (SImode) - 1
6133 && (CALL_COOKIE_INT_REG_GET
6134 (current_function_args_info.call_cookie,
6135 NPARM_REGS (SImode) - pushregs)
6136 == 1))
6138 current_function_args_info.call_cookie
6139 &= ~ CALL_COOKIE_INT_REG (NPARM_REGS (SImode)
6140 - pushregs, 1);
6141 pushregs++;
6144 if (pushregs == NPARM_REGS (SImode))
6145 current_function_args_info.call_cookie
6146 |= (CALL_COOKIE_INT_REG (0, 1)
6147 | CALL_COOKIE_STACKSEQ (pushregs - 1));
6148 else
6149 current_function_args_info.call_cookie
6150 |= CALL_COOKIE_STACKSEQ (pushregs);
6152 current_function_pretend_args_size += 8 * n_intregs;
6154 if (TARGET_SHCOMPACT)
6155 return const0_rtx;
6158 if (! TARGET_SH2E && ! TARGET_SH4 && ! TARGET_SH5)
6160 error ("__builtin_saveregs not supported by this subtarget");
6161 return const0_rtx;
6164 if (TARGET_SHMEDIA)
6165 n_floatregs = 0;
6167 /* Allocate block of memory for the regs. */
6168 /* ??? If n_intregs + n_floatregs == 0, should we allocate at least 1 byte?
6169 Or can assign_stack_local accept a 0 SIZE argument? */
6170 bufsize = (n_intregs * UNITS_PER_WORD) + (n_floatregs * UNITS_PER_WORD);
6172 if (TARGET_SHMEDIA)
6173 regbuf = gen_rtx_MEM (BLKmode,
6174 gen_rtx_REG (Pmode, ARG_POINTER_REGNUM));
6175 else if (n_floatregs & 1)
6177 rtx addr;
6179 regbuf = assign_stack_local (BLKmode, bufsize + UNITS_PER_WORD, 0);
6180 addr = copy_to_mode_reg (Pmode, XEXP (regbuf, 0));
6181 emit_insn (gen_iorsi3 (addr, addr, GEN_INT (UNITS_PER_WORD)));
6182 regbuf = change_address (regbuf, BLKmode, addr);
6184 else
6185 regbuf = assign_stack_local (BLKmode, bufsize, 0);
6186 alias_set = get_varargs_alias_set ();
6187 set_mem_alias_set (regbuf, alias_set);
6189 /* Save int args.
6190 This is optimized to only save the regs that are necessary. Explicitly
6191 named args need not be saved. */
6192 if (n_intregs > 0)
6193 move_block_from_reg (BASE_ARG_REG (SImode) + first_intreg,
6194 adjust_address (regbuf, BLKmode,
6195 n_floatregs * UNITS_PER_WORD),
6196 n_intregs);
6198 if (TARGET_SHMEDIA)
6199 /* Return the address of the regbuf. */
6200 return XEXP (regbuf, 0);
6202 /* Save float args.
6203 This is optimized to only save the regs that are necessary. Explicitly
6204 named args need not be saved.
6205 We explicitly build a pointer to the buffer because it halves the insn
6206 count when not optimizing (otherwise the pointer is built for each reg
6207 saved).
6208 We emit the moves in reverse order so that we can use predecrement. */
6210 fpregs = gen_reg_rtx (Pmode);
6211 emit_move_insn (fpregs, XEXP (regbuf, 0));
6212 emit_insn (gen_addsi3 (fpregs, fpregs,
6213 GEN_INT (n_floatregs * UNITS_PER_WORD)));
6214 if (TARGET_SH4 || TARGET_SH2A_DOUBLE)
6216 rtx mem;
6217 for (regno = NPARM_REGS (DFmode) - 2; regno >= first_floatreg; regno -= 2)
6219 emit_insn (gen_addsi3 (fpregs, fpregs,
6220 GEN_INT (-2 * UNITS_PER_WORD)));
6221 mem = gen_rtx_MEM (DFmode, fpregs);
6222 set_mem_alias_set (mem, alias_set);
6223 emit_move_insn (mem,
6224 gen_rtx_REG (DFmode, BASE_ARG_REG (DFmode) + regno));
6226 regno = first_floatreg;
6227 if (regno & 1)
6229 emit_insn (gen_addsi3 (fpregs, fpregs, GEN_INT (-UNITS_PER_WORD)));
6230 mem = gen_rtx_MEM (SFmode, fpregs);
6231 set_mem_alias_set (mem, alias_set);
6232 emit_move_insn (mem,
6233 gen_rtx_REG (SFmode, BASE_ARG_REG (SFmode) + regno
6234 - (TARGET_LITTLE_ENDIAN != 0)));
6237 else
6238 for (regno = NPARM_REGS (SFmode) - 1; regno >= first_floatreg; regno--)
6240 rtx mem;
6242 emit_insn (gen_addsi3 (fpregs, fpregs, GEN_INT (-UNITS_PER_WORD)));
6243 mem = gen_rtx_MEM (SFmode, fpregs);
6244 set_mem_alias_set (mem, alias_set);
6245 emit_move_insn (mem,
6246 gen_rtx_REG (SFmode, BASE_ARG_REG (SFmode) + regno));
6249 /* Return the address of the regbuf. */
6250 return XEXP (regbuf, 0);
6253 /* Define the `__builtin_va_list' type for the ABI. */
6255 static tree
6256 sh_build_builtin_va_list (void)
6258 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
6259 tree record;
6261 if (TARGET_SH5 || (! TARGET_SH2E && ! TARGET_SH4)
6262 || TARGET_HITACHI || sh_cfun_attr_renesas_p ())
6263 return ptr_type_node;
6265 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
6267 f_next_o = build_decl (FIELD_DECL, get_identifier ("__va_next_o"),
6268 ptr_type_node);
6269 f_next_o_limit = build_decl (FIELD_DECL,
6270 get_identifier ("__va_next_o_limit"),
6271 ptr_type_node);
6272 f_next_fp = build_decl (FIELD_DECL, get_identifier ("__va_next_fp"),
6273 ptr_type_node);
6274 f_next_fp_limit = build_decl (FIELD_DECL,
6275 get_identifier ("__va_next_fp_limit"),
6276 ptr_type_node);
6277 f_next_stack = build_decl (FIELD_DECL, get_identifier ("__va_next_stack"),
6278 ptr_type_node);
6280 DECL_FIELD_CONTEXT (f_next_o) = record;
6281 DECL_FIELD_CONTEXT (f_next_o_limit) = record;
6282 DECL_FIELD_CONTEXT (f_next_fp) = record;
6283 DECL_FIELD_CONTEXT (f_next_fp_limit) = record;
6284 DECL_FIELD_CONTEXT (f_next_stack) = record;
6286 TYPE_FIELDS (record) = f_next_o;
6287 TREE_CHAIN (f_next_o) = f_next_o_limit;
6288 TREE_CHAIN (f_next_o_limit) = f_next_fp;
6289 TREE_CHAIN (f_next_fp) = f_next_fp_limit;
6290 TREE_CHAIN (f_next_fp_limit) = f_next_stack;
6292 layout_type (record);
6294 return record;
6297 /* Implement `va_start' for varargs and stdarg. */
6299 void
6300 sh_va_start (tree valist, rtx nextarg)
6302 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
6303 tree next_o, next_o_limit, next_fp, next_fp_limit, next_stack;
6304 tree t, u;
6305 int nfp, nint;
6307 if (TARGET_SH5)
6309 expand_builtin_saveregs ();
6310 std_expand_builtin_va_start (valist, nextarg);
6311 return;
6314 if ((! TARGET_SH2E && ! TARGET_SH4)
6315 || TARGET_HITACHI || sh_cfun_attr_renesas_p ())
6317 std_expand_builtin_va_start (valist, nextarg);
6318 return;
6321 f_next_o = TYPE_FIELDS (va_list_type_node);
6322 f_next_o_limit = TREE_CHAIN (f_next_o);
6323 f_next_fp = TREE_CHAIN (f_next_o_limit);
6324 f_next_fp_limit = TREE_CHAIN (f_next_fp);
6325 f_next_stack = TREE_CHAIN (f_next_fp_limit);
6327 next_o = build (COMPONENT_REF, TREE_TYPE (f_next_o), valist, f_next_o,
6328 NULL_TREE);
6329 next_o_limit = build (COMPONENT_REF, TREE_TYPE (f_next_o_limit),
6330 valist, f_next_o_limit, NULL_TREE);
6331 next_fp = build (COMPONENT_REF, TREE_TYPE (f_next_fp), valist, f_next_fp,
6332 NULL_TREE);
6333 next_fp_limit = build (COMPONENT_REF, TREE_TYPE (f_next_fp_limit),
6334 valist, f_next_fp_limit, NULL_TREE);
6335 next_stack = build (COMPONENT_REF, TREE_TYPE (f_next_stack),
6336 valist, f_next_stack, NULL_TREE);
6338 /* Call __builtin_saveregs. */
6339 u = make_tree (ptr_type_node, expand_builtin_saveregs ());
6340 t = build (MODIFY_EXPR, ptr_type_node, next_fp, u);
6341 TREE_SIDE_EFFECTS (t) = 1;
6342 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6344 nfp = current_function_args_info.arg_count[SH_ARG_FLOAT];
6345 if (nfp < 8)
6346 nfp = 8 - nfp;
6347 else
6348 nfp = 0;
6349 u = fold (build (PLUS_EXPR, ptr_type_node, u,
6350 build_int_cst (NULL_TREE, UNITS_PER_WORD * nfp)));
6351 t = build (MODIFY_EXPR, ptr_type_node, next_fp_limit, u);
6352 TREE_SIDE_EFFECTS (t) = 1;
6353 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6355 t = build (MODIFY_EXPR, ptr_type_node, next_o, u);
6356 TREE_SIDE_EFFECTS (t) = 1;
6357 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6359 nint = current_function_args_info.arg_count[SH_ARG_INT];
6360 if (nint < 4)
6361 nint = 4 - nint;
6362 else
6363 nint = 0;
6364 u = fold (build (PLUS_EXPR, ptr_type_node, u,
6365 build_int_cst (NULL_TREE, UNITS_PER_WORD * nint)));
6366 t = build (MODIFY_EXPR, ptr_type_node, next_o_limit, u);
6367 TREE_SIDE_EFFECTS (t) = 1;
6368 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6370 u = make_tree (ptr_type_node, nextarg);
6371 t = build (MODIFY_EXPR, ptr_type_node, next_stack, u);
6372 TREE_SIDE_EFFECTS (t) = 1;
6373 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6376 /* Implement `va_arg'. */
6378 static tree
6379 sh_gimplify_va_arg_expr (tree valist, tree type, tree *pre_p,
6380 tree *post_p ATTRIBUTE_UNUSED)
6382 HOST_WIDE_INT size, rsize;
6383 tree tmp, pptr_type_node;
6384 tree addr, lab_over, result = NULL;
6385 int pass_by_ref = targetm.calls.must_pass_in_stack (TYPE_MODE (type), type);
6387 if (pass_by_ref)
6388 type = build_pointer_type (type);
6390 size = int_size_in_bytes (type);
6391 rsize = (size + UNITS_PER_WORD - 1) & -UNITS_PER_WORD;
6392 pptr_type_node = build_pointer_type (ptr_type_node);
6394 if (! TARGET_SH5 && (TARGET_SH2E || TARGET_SH4)
6395 && ! (TARGET_HITACHI || sh_cfun_attr_renesas_p ()))
6397 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
6398 tree next_o, next_o_limit, next_fp, next_fp_limit, next_stack;
6399 int pass_as_float;
6400 tree lab_false;
6402 f_next_o = TYPE_FIELDS (va_list_type_node);
6403 f_next_o_limit = TREE_CHAIN (f_next_o);
6404 f_next_fp = TREE_CHAIN (f_next_o_limit);
6405 f_next_fp_limit = TREE_CHAIN (f_next_fp);
6406 f_next_stack = TREE_CHAIN (f_next_fp_limit);
6408 next_o = build (COMPONENT_REF, TREE_TYPE (f_next_o), valist, f_next_o,
6409 NULL_TREE);
6410 next_o_limit = build (COMPONENT_REF, TREE_TYPE (f_next_o_limit),
6411 valist, f_next_o_limit, NULL_TREE);
6412 next_fp = build (COMPONENT_REF, TREE_TYPE (f_next_fp),
6413 valist, f_next_fp, NULL_TREE);
6414 next_fp_limit = build (COMPONENT_REF, TREE_TYPE (f_next_fp_limit),
6415 valist, f_next_fp_limit, NULL_TREE);
6416 next_stack = build (COMPONENT_REF, TREE_TYPE (f_next_stack),
6417 valist, f_next_stack, NULL_TREE);
6419 /* Structures with a single member with a distinct mode are passed
6420 like their member. This is relevant if the latter has a REAL_TYPE
6421 or COMPLEX_TYPE type. */
6422 if (TREE_CODE (type) == RECORD_TYPE
6423 && TYPE_FIELDS (type)
6424 && TREE_CODE (TYPE_FIELDS (type)) == FIELD_DECL
6425 && (TREE_CODE (TREE_TYPE (TYPE_FIELDS (type))) == REAL_TYPE
6426 || TREE_CODE (TREE_TYPE (TYPE_FIELDS (type))) == COMPLEX_TYPE)
6427 && TREE_CHAIN (TYPE_FIELDS (type)) == NULL_TREE)
6428 type = TREE_TYPE (TYPE_FIELDS (type));
6430 if (TARGET_SH4)
6432 pass_as_float = ((TREE_CODE (type) == REAL_TYPE && size <= 8)
6433 || (TREE_CODE (type) == COMPLEX_TYPE
6434 && TREE_CODE (TREE_TYPE (type)) == REAL_TYPE
6435 && size <= 16));
6437 else
6439 pass_as_float = (TREE_CODE (type) == REAL_TYPE && size == 4);
6442 addr = create_tmp_var (pptr_type_node, NULL);
6443 lab_false = create_artificial_label ();
6444 lab_over = create_artificial_label ();
6446 valist = build1 (INDIRECT_REF, ptr_type_node, addr);
6448 if (pass_as_float)
6450 int first_floatreg
6451 = current_function_args_info.arg_count[(int) SH_ARG_FLOAT];
6452 int n_floatregs = MAX (0, NPARM_REGS (SFmode) - first_floatreg);
6454 tmp = build (GE_EXPR, boolean_type_node, next_fp, next_fp_limit);
6455 tmp = build (COND_EXPR, void_type_node, tmp,
6456 build (GOTO_EXPR, void_type_node, lab_false),
6457 NULL);
6458 gimplify_and_add (tmp, pre_p);
6460 if (TYPE_ALIGN (type) > BITS_PER_WORD
6461 || (((TREE_CODE (type) == REAL_TYPE && size == 8) || size == 16)
6462 && (n_floatregs & 1)))
6464 tmp = fold_convert (ptr_type_node, size_int (UNITS_PER_WORD));
6465 tmp = build (BIT_AND_EXPR, ptr_type_node, next_fp, tmp);
6466 tmp = build (PLUS_EXPR, ptr_type_node, next_fp, tmp);
6467 tmp = build (MODIFY_EXPR, ptr_type_node, next_fp, tmp);
6468 gimplify_and_add (tmp, pre_p);
6471 tmp = build1 (ADDR_EXPR, pptr_type_node, next_fp);
6472 tmp = build (MODIFY_EXPR, void_type_node, addr, tmp);
6473 gimplify_and_add (tmp, pre_p);
6475 #ifdef FUNCTION_ARG_SCmode_WART
6476 if (TYPE_MODE (type) == SCmode && TARGET_SH4 && TARGET_LITTLE_ENDIAN)
6478 tree subtype = TREE_TYPE (type);
6479 tree real, imag;
6481 imag = std_gimplify_va_arg_expr (valist, subtype, pre_p, NULL);
6482 imag = get_initialized_tmp_var (imag, pre_p, NULL);
6484 real = std_gimplify_va_arg_expr (valist, subtype, pre_p, NULL);
6485 real = get_initialized_tmp_var (real, pre_p, NULL);
6487 result = build (COMPLEX_EXPR, type, real, imag);
6488 result = get_initialized_tmp_var (result, pre_p, NULL);
6490 #endif /* FUNCTION_ARG_SCmode_WART */
6492 tmp = build (GOTO_EXPR, void_type_node, lab_over);
6493 gimplify_and_add (tmp, pre_p);
6495 tmp = build (LABEL_EXPR, void_type_node, lab_false);
6496 gimplify_and_add (tmp, pre_p);
6498 tmp = build1 (ADDR_EXPR, pptr_type_node, next_stack);
6499 tmp = build (MODIFY_EXPR, void_type_node, addr, tmp);
6500 gimplify_and_add (tmp, pre_p);
6502 else
6504 tmp = fold_convert (ptr_type_node, size_int (rsize));
6505 tmp = build (PLUS_EXPR, ptr_type_node, next_o, tmp);
6506 tmp = build (GT_EXPR, boolean_type_node, tmp, next_o_limit);
6507 tmp = build (COND_EXPR, void_type_node, tmp,
6508 build (GOTO_EXPR, void_type_node, lab_false),
6509 NULL);
6510 gimplify_and_add (tmp, pre_p);
6512 tmp = build1 (ADDR_EXPR, pptr_type_node, next_o);
6513 tmp = build (MODIFY_EXPR, void_type_node, addr, tmp);
6514 gimplify_and_add (tmp, pre_p);
6516 tmp = build (GOTO_EXPR, void_type_node, lab_over);
6517 gimplify_and_add (tmp, pre_p);
6519 tmp = build (LABEL_EXPR, void_type_node, lab_false);
6520 gimplify_and_add (tmp, pre_p);
6522 if (size > 4 && ! TARGET_SH4)
6524 tmp = build (MODIFY_EXPR, ptr_type_node, next_o, next_o_limit);
6525 gimplify_and_add (tmp, pre_p);
6528 tmp = build1 (ADDR_EXPR, pptr_type_node, next_stack);
6529 tmp = build (MODIFY_EXPR, void_type_node, addr, tmp);
6530 gimplify_and_add (tmp, pre_p);
6533 if (!result)
6535 tmp = build (LABEL_EXPR, void_type_node, lab_over);
6536 gimplify_and_add (tmp, pre_p);
6540 /* ??? In va-sh.h, there had been code to make values larger than
6541 size 8 indirect. This does not match the FUNCTION_ARG macros. */
6543 tmp = std_gimplify_va_arg_expr (valist, type, pre_p, NULL);
6544 if (result)
6546 tmp = build (MODIFY_EXPR, void_type_node, result, tmp);
6547 gimplify_and_add (tmp, pre_p);
6549 tmp = build (LABEL_EXPR, void_type_node, lab_over);
6550 gimplify_and_add (tmp, pre_p);
6552 else
6553 result = tmp;
6555 if (pass_by_ref)
6556 result = build_fold_indirect_ref (result);
6558 return result;
6561 bool
6562 sh_promote_prototypes (tree type)
6564 if (TARGET_HITACHI)
6565 return 0;
6566 if (! type)
6567 return 1;
6568 return ! sh_attr_renesas_p (type);
6571 /* Whether an argument must be passed by reference. On SHcompact, we
6572 pretend arguments wider than 32-bits that would have been passed in
6573 registers are passed by reference, so that an SHmedia trampoline
6574 loads them into the full 64-bits registers. */
6576 static int
6577 shcompact_byref (CUMULATIVE_ARGS *cum, enum machine_mode mode,
6578 tree type, bool named)
6580 unsigned HOST_WIDE_INT size;
6582 if (type)
6583 size = int_size_in_bytes (type);
6584 else
6585 size = GET_MODE_SIZE (mode);
6587 if (cum->arg_count[SH_ARG_INT] < NPARM_REGS (SImode)
6588 && (!named
6589 || GET_SH_ARG_CLASS (mode) == SH_ARG_INT
6590 || (GET_SH_ARG_CLASS (mode) == SH_ARG_FLOAT
6591 && cum->arg_count[SH_ARG_FLOAT] >= NPARM_REGS (SFmode)))
6592 && size > 4
6593 && !SHCOMPACT_FORCE_ON_STACK (mode, type)
6594 && !SH5_WOULD_BE_PARTIAL_NREGS (*cum, mode, type, named))
6595 return size;
6596 else
6597 return 0;
6600 static bool
6601 sh_pass_by_reference (CUMULATIVE_ARGS *cum, enum machine_mode mode,
6602 tree type, bool named)
6604 if (targetm.calls.must_pass_in_stack (mode, type))
6605 return true;
6607 /* ??? std_gimplify_va_arg_expr passes NULL for cum. That function
6608 wants to know about pass-by-reference semantics for incoming
6609 arguments. */
6610 if (! cum)
6611 return false;
6613 if (TARGET_SHCOMPACT)
6615 cum->byref = shcompact_byref (cum, mode, type, named);
6616 return cum->byref != 0;
6619 return false;
6622 static bool
6623 sh_callee_copies (CUMULATIVE_ARGS *cum, enum machine_mode mode,
6624 tree type, bool named ATTRIBUTE_UNUSED)
6626 /* ??? How can it possibly be correct to return true only on the
6627 caller side of the equation? Is there someplace else in the
6628 sh backend that's magically producing the copies? */
6629 return (cum->outgoing
6630 && ((mode == BLKmode ? TYPE_ALIGN (type) : GET_MODE_ALIGNMENT (mode))
6631 % SH_MIN_ALIGN_FOR_CALLEE_COPY == 0));
6634 /* Define where to put the arguments to a function.
6635 Value is zero to push the argument on the stack,
6636 or a hard register in which to store the argument.
6638 MODE is the argument's machine mode.
6639 TYPE is the data type of the argument (as a tree).
6640 This is null for libcalls where that information may
6641 not be available.
6642 CUM is a variable of type CUMULATIVE_ARGS which gives info about
6643 the preceding args and about the function being called.
6644 NAMED is nonzero if this argument is a named parameter
6645 (otherwise it is an extra parameter matching an ellipsis).
6647 On SH the first args are normally in registers
6648 and the rest are pushed. Any arg that starts within the first
6649 NPARM_REGS words is at least partially passed in a register unless
6650 its data type forbids. */
6654 sh_function_arg (CUMULATIVE_ARGS *ca, enum machine_mode mode,
6655 tree type, int named)
6657 if (! TARGET_SH5 && mode == VOIDmode)
6658 return GEN_INT (ca->renesas_abi ? 1 : 0);
6660 if (! TARGET_SH5
6661 && PASS_IN_REG_P (*ca, mode, type)
6662 && (named || ! (TARGET_HITACHI || ca->renesas_abi)))
6664 int regno;
6666 if (mode == SCmode && TARGET_SH4 && TARGET_LITTLE_ENDIAN
6667 && (! FUNCTION_ARG_SCmode_WART || (ROUND_REG (*ca, mode) & 1)))
6669 rtx r1 = gen_rtx_EXPR_LIST (VOIDmode,
6670 gen_rtx_REG (SFmode,
6671 BASE_ARG_REG (mode)
6672 + (ROUND_REG (*ca, mode) ^ 1)),
6673 const0_rtx);
6674 rtx r2 = gen_rtx_EXPR_LIST (VOIDmode,
6675 gen_rtx_REG (SFmode,
6676 BASE_ARG_REG (mode)
6677 + ((ROUND_REG (*ca, mode) + 1) ^ 1)),
6678 GEN_INT (4));
6679 return gen_rtx_PARALLEL(SCmode, gen_rtvec(2, r1, r2));
6682 /* If the alignment of a DF value causes an SF register to be
6683 skipped, we will use that skipped register for the next SF
6684 value. */
6685 if ((TARGET_HITACHI || ca->renesas_abi)
6686 && ca->free_single_fp_reg
6687 && mode == SFmode)
6688 return gen_rtx_REG (mode, ca->free_single_fp_reg);
6690 regno = (BASE_ARG_REG (mode) + ROUND_REG (*ca, mode))
6691 ^ (mode == SFmode && TARGET_SH4
6692 && TARGET_LITTLE_ENDIAN != 0
6693 && ! TARGET_HITACHI && ! ca->renesas_abi);
6694 return gen_rtx_REG (mode, regno);
6698 if (TARGET_SH5)
6700 if (mode == VOIDmode && TARGET_SHCOMPACT)
6701 return GEN_INT (ca->call_cookie);
6703 /* The following test assumes unnamed arguments are promoted to
6704 DFmode. */
6705 if (mode == SFmode && ca->free_single_fp_reg)
6706 return SH5_PROTOTYPED_FLOAT_ARG (*ca, mode, ca->free_single_fp_reg);
6708 if ((GET_SH_ARG_CLASS (mode) == SH_ARG_FLOAT)
6709 && (named || ! ca->prototype_p)
6710 && ca->arg_count[(int) SH_ARG_FLOAT] < NPARM_REGS (SFmode))
6712 if (! ca->prototype_p && TARGET_SHMEDIA)
6713 return SH5_PROTOTYPELESS_FLOAT_ARG (*ca, mode);
6715 return SH5_PROTOTYPED_FLOAT_ARG (*ca, mode,
6716 FIRST_FP_PARM_REG
6717 + ca->arg_count[(int) SH_ARG_FLOAT]);
6720 if (ca->arg_count[(int) SH_ARG_INT] < NPARM_REGS (SImode)
6721 && (! TARGET_SHCOMPACT
6722 || (! SHCOMPACT_FORCE_ON_STACK (mode, type)
6723 && ! SH5_WOULD_BE_PARTIAL_NREGS (*ca, mode,
6724 type, named))))
6726 return gen_rtx_REG (mode, (FIRST_PARM_REG
6727 + ca->arg_count[(int) SH_ARG_INT]));
6730 return 0;
6733 return 0;
6736 /* Update the data in CUM to advance over an argument
6737 of mode MODE and data type TYPE.
6738 (TYPE is null for libcalls where that information may not be
6739 available.) */
6741 void
6742 sh_function_arg_advance (CUMULATIVE_ARGS *ca, enum machine_mode mode,
6743 tree type, int named)
6745 if (ca->force_mem)
6746 ca->force_mem = 0;
6747 else if (TARGET_SH5)
6749 tree type2 = (ca->byref && type
6750 ? TREE_TYPE (type)
6751 : type);
6752 enum machine_mode mode2 = (ca->byref && type
6753 ? TYPE_MODE (type2)
6754 : mode);
6755 int dwords = ((ca->byref
6756 ? ca->byref
6757 : mode2 == BLKmode
6758 ? int_size_in_bytes (type2)
6759 : GET_MODE_SIZE (mode2)) + 7) / 8;
6760 int numregs = MIN (dwords, NPARM_REGS (SImode)
6761 - ca->arg_count[(int) SH_ARG_INT]);
6763 if (numregs)
6765 ca->arg_count[(int) SH_ARG_INT] += numregs;
6766 if (TARGET_SHCOMPACT
6767 && SHCOMPACT_FORCE_ON_STACK (mode2, type2))
6769 ca->call_cookie
6770 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
6771 - numregs, 1);
6772 /* N.B. We want this also for outgoing. */
6773 ca->stack_regs += numregs;
6775 else if (ca->byref)
6777 if (! ca->outgoing)
6778 ca->stack_regs += numregs;
6779 ca->byref_regs += numregs;
6780 ca->byref = 0;
6782 ca->call_cookie
6783 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
6784 - numregs, 2);
6785 while (--numregs);
6786 ca->call_cookie
6787 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
6788 - 1, 1);
6790 else if (dwords > numregs)
6792 int pushregs = numregs;
6794 if (TARGET_SHCOMPACT)
6795 ca->stack_regs += numregs;
6796 while (pushregs < NPARM_REGS (SImode) - 1
6797 && (CALL_COOKIE_INT_REG_GET
6798 (ca->call_cookie,
6799 NPARM_REGS (SImode) - pushregs)
6800 == 1))
6802 ca->call_cookie
6803 &= ~ CALL_COOKIE_INT_REG (NPARM_REGS (SImode)
6804 - pushregs, 1);
6805 pushregs++;
6807 if (numregs == NPARM_REGS (SImode))
6808 ca->call_cookie
6809 |= CALL_COOKIE_INT_REG (0, 1)
6810 | CALL_COOKIE_STACKSEQ (numregs - 1);
6811 else
6812 ca->call_cookie
6813 |= CALL_COOKIE_STACKSEQ (numregs);
6816 if (GET_SH_ARG_CLASS (mode2) == SH_ARG_FLOAT
6817 && (named || ! ca->prototype_p))
6819 if (mode2 == SFmode && ca->free_single_fp_reg)
6820 ca->free_single_fp_reg = 0;
6821 else if (ca->arg_count[(int) SH_ARG_FLOAT]
6822 < NPARM_REGS (SFmode))
6824 int numfpregs
6825 = MIN ((GET_MODE_SIZE (mode2) + 7) / 8 * 2,
6826 NPARM_REGS (SFmode)
6827 - ca->arg_count[(int) SH_ARG_FLOAT]);
6829 ca->arg_count[(int) SH_ARG_FLOAT] += numfpregs;
6831 if (TARGET_SHCOMPACT && ! ca->prototype_p)
6833 if (ca->outgoing && numregs > 0)
6836 ca->call_cookie
6837 |= (CALL_COOKIE_INT_REG
6838 (ca->arg_count[(int) SH_ARG_INT]
6839 - numregs + ((numfpregs - 2) / 2),
6840 4 + (ca->arg_count[(int) SH_ARG_FLOAT]
6841 - numfpregs) / 2));
6843 while (numfpregs -= 2);
6845 else if (mode2 == SFmode && (named)
6846 && (ca->arg_count[(int) SH_ARG_FLOAT]
6847 < NPARM_REGS (SFmode)))
6848 ca->free_single_fp_reg
6849 = FIRST_FP_PARM_REG - numfpregs
6850 + ca->arg_count[(int) SH_ARG_FLOAT] + 1;
6853 return;
6856 if ((TARGET_HITACHI || ca->renesas_abi) && TARGET_FPU_DOUBLE)
6858 /* Note that we've used the skipped register. */
6859 if (mode == SFmode && ca->free_single_fp_reg)
6861 ca->free_single_fp_reg = 0;
6862 return;
6864 /* When we have a DF after an SF, there's an SF register that get
6865 skipped in order to align the DF value. We note this skipped
6866 register, because the next SF value will use it, and not the
6867 SF that follows the DF. */
6868 if (mode == DFmode
6869 && ROUND_REG (*ca, DFmode) != ROUND_REG (*ca, SFmode))
6871 ca->free_single_fp_reg = (ROUND_REG (*ca, SFmode)
6872 + BASE_ARG_REG (mode));
6876 if (! ((TARGET_SH4 || TARGET_SH2A) || ca->renesas_abi)
6877 || PASS_IN_REG_P (*ca, mode, type))
6878 (ca->arg_count[(int) GET_SH_ARG_CLASS (mode)]
6879 = (ROUND_REG (*ca, mode)
6880 + (mode == BLKmode
6881 ? ROUND_ADVANCE (int_size_in_bytes (type))
6882 : ROUND_ADVANCE (GET_MODE_SIZE (mode)))));
6885 /* The Renesas calling convention doesn't quite fit into this scheme since
6886 the address is passed like an invisible argument, but one that is always
6887 passed in memory. */
6888 static rtx
6889 sh_struct_value_rtx (tree fndecl, int incoming ATTRIBUTE_UNUSED)
6891 if (TARGET_HITACHI || sh_attr_renesas_p (fndecl))
6892 return 0;
6893 return gen_rtx_REG (Pmode, 2);
6896 /* Worker function for TARGET_RETURN_IN_MEMORY. */
6898 static bool
6899 sh_return_in_memory (tree type, tree fndecl)
6901 if (TARGET_SH5)
6903 if (TYPE_MODE (type) == BLKmode)
6904 return ((unsigned HOST_WIDE_INT) int_size_in_bytes (type)) > 8;
6905 else
6906 return GET_MODE_SIZE (TYPE_MODE (type)) > 8;
6908 else
6910 return (TYPE_MODE (type) == BLKmode
6911 || ((TARGET_HITACHI || sh_attr_renesas_p (fndecl))
6912 && TREE_CODE (type) == RECORD_TYPE));
6916 /* We actually emit the code in sh_expand_prologue. We used to use
6917 a static variable to flag that we need to emit this code, but that
6918 doesn't when inlining, when functions are deferred and then emitted
6919 later. Fortunately, we already have two flags that are part of struct
6920 function that tell if a function uses varargs or stdarg. */
6921 static void
6922 sh_setup_incoming_varargs (CUMULATIVE_ARGS *ca,
6923 enum machine_mode mode,
6924 tree type,
6925 int *pretend_arg_size,
6926 int second_time ATTRIBUTE_UNUSED)
6928 if (! current_function_stdarg)
6929 abort ();
6930 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl))
6932 int named_parm_regs, anon_parm_regs;
6934 named_parm_regs = (ROUND_REG (*ca, mode)
6935 + (mode == BLKmode
6936 ? ROUND_ADVANCE (int_size_in_bytes (type))
6937 : ROUND_ADVANCE (GET_MODE_SIZE (mode))));
6938 anon_parm_regs = NPARM_REGS (SImode) - named_parm_regs;
6939 if (anon_parm_regs > 0)
6940 *pretend_arg_size = anon_parm_regs * 4;
6944 static bool
6945 sh_strict_argument_naming (CUMULATIVE_ARGS *ca ATTRIBUTE_UNUSED)
6947 return TARGET_SH5;
6950 static bool
6951 sh_pretend_outgoing_varargs_named (CUMULATIVE_ARGS *ca)
6953 return ! (TARGET_HITACHI || ca->renesas_abi) && ! TARGET_SH5;
6957 /* Define the offset between two registers, one to be eliminated, and
6958 the other its replacement, at the start of a routine. */
6961 initial_elimination_offset (int from, int to)
6963 int regs_saved;
6964 int regs_saved_rounding = 0;
6965 int total_saved_regs_space;
6966 int total_auto_space;
6967 int save_flags = target_flags;
6968 int copy_flags;
6969 HARD_REG_SET live_regs_mask;
6971 shmedia_space_reserved_for_target_registers = false;
6972 regs_saved = calc_live_regs (&live_regs_mask);
6973 regs_saved += SHMEDIA_REGS_STACK_ADJUST ();
6975 if (shmedia_reserve_space_for_target_registers_p (regs_saved, &live_regs_mask))
6977 shmedia_space_reserved_for_target_registers = true;
6978 regs_saved += shmedia_target_regs_stack_adjust (&live_regs_mask);
6981 if (TARGET_SH5 && regs_saved % (STACK_BOUNDARY / BITS_PER_UNIT))
6982 regs_saved_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
6983 - regs_saved % (STACK_BOUNDARY / BITS_PER_UNIT));
6985 total_auto_space = rounded_frame_size (regs_saved) - regs_saved_rounding;
6986 copy_flags = target_flags;
6987 target_flags = save_flags;
6989 total_saved_regs_space = regs_saved + regs_saved_rounding;
6991 if (from == ARG_POINTER_REGNUM && to == FRAME_POINTER_REGNUM)
6992 return total_saved_regs_space + total_auto_space
6993 + current_function_args_info.byref_regs * 8;
6995 if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
6996 return total_saved_regs_space + total_auto_space
6997 + current_function_args_info.byref_regs * 8;
6999 /* Initial gap between fp and sp is 0. */
7000 if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
7001 return 0;
7003 if (from == RETURN_ADDRESS_POINTER_REGNUM
7004 && (to == FRAME_POINTER_REGNUM || to == STACK_POINTER_REGNUM))
7006 if (TARGET_SH5)
7008 int n = total_saved_regs_space;
7009 int pr_reg = TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG;
7010 save_schedule schedule;
7011 save_entry *entry;
7013 n += total_auto_space;
7015 /* If it wasn't saved, there's not much we can do. */
7016 if (! TEST_HARD_REG_BIT (live_regs_mask, pr_reg))
7017 return n;
7019 target_flags = copy_flags;
7021 sh5_schedule_saves (&live_regs_mask, &schedule, n);
7022 for (entry = &schedule.entries[1]; entry->mode != VOIDmode; entry++)
7023 if (entry->reg == pr_reg)
7025 target_flags = save_flags;
7026 return entry->offset;
7028 abort ();
7030 else
7031 return total_auto_space;
7034 abort ();
7037 /* Handle machine specific pragmas to be semi-compatible with Renesas
7038 compiler. */
7040 void
7041 sh_pr_interrupt (struct cpp_reader *pfile ATTRIBUTE_UNUSED)
7043 pragma_interrupt = 1;
7046 void
7047 sh_pr_trapa (struct cpp_reader *pfile ATTRIBUTE_UNUSED)
7049 pragma_interrupt = pragma_trapa = 1;
7052 void
7053 sh_pr_nosave_low_regs (struct cpp_reader *pfile ATTRIBUTE_UNUSED)
7055 pragma_nosave_low_regs = 1;
7058 /* Generate 'handle_interrupt' attribute for decls */
7060 static void
7061 sh_insert_attributes (tree node, tree *attributes)
7063 if (! pragma_interrupt
7064 || TREE_CODE (node) != FUNCTION_DECL)
7065 return;
7067 /* We are only interested in fields. */
7068 if (!DECL_P (node))
7069 return;
7071 /* Add a 'handle_interrupt' attribute. */
7072 * attributes = tree_cons (get_identifier ("interrupt_handler"), NULL, * attributes);
7074 return;
7077 /* Supported attributes:
7079 interrupt_handler -- specifies this function is an interrupt handler.
7081 sp_switch -- specifies an alternate stack for an interrupt handler
7082 to run on.
7084 trap_exit -- use a trapa to exit an interrupt function instead of
7085 an rte instruction.
7087 renesas -- use Renesas calling/layout conventions (functions and
7088 structures).
7092 const struct attribute_spec sh_attribute_table[] =
7094 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
7095 { "interrupt_handler", 0, 0, true, false, false, sh_handle_interrupt_handler_attribute },
7096 { "sp_switch", 1, 1, true, false, false, sh_handle_sp_switch_attribute },
7097 { "trap_exit", 1, 1, true, false, false, sh_handle_trap_exit_attribute },
7098 { "renesas", 0, 0, false, true, false, sh_handle_renesas_attribute },
7099 #ifdef SYMBIAN
7100 /* Symbian support adds three new attributes:
7101 dllexport - for exporting a function/variable that will live in a dll
7102 dllimport - for importing a function/variable from a dll
7104 Microsoft allows multiple declspecs in one __declspec, separating
7105 them with spaces. We do NOT support this. Instead, use __declspec
7106 multiple times. */
7107 { "dllimport", 0, 0, true, false, false, sh_symbian_handle_dll_attribute },
7108 { "dllexport", 0, 0, true, false, false, sh_symbian_handle_dll_attribute },
7109 #endif
7110 { NULL, 0, 0, false, false, false, NULL }
7113 /* Handle an "interrupt_handler" attribute; arguments as in
7114 struct attribute_spec.handler. */
7115 static tree
7116 sh_handle_interrupt_handler_attribute (tree *node, tree name,
7117 tree args ATTRIBUTE_UNUSED,
7118 int flags ATTRIBUTE_UNUSED,
7119 bool *no_add_attrs)
7121 if (TREE_CODE (*node) != FUNCTION_DECL)
7123 warning ("`%s' attribute only applies to functions",
7124 IDENTIFIER_POINTER (name));
7125 *no_add_attrs = true;
7127 else if (TARGET_SHCOMPACT)
7129 error ("attribute interrupt_handler is not compatible with -m5-compact");
7130 *no_add_attrs = true;
7133 return NULL_TREE;
7136 /* Handle an "sp_switch" attribute; arguments as in
7137 struct attribute_spec.handler. */
7138 static tree
7139 sh_handle_sp_switch_attribute (tree *node, tree name, tree args,
7140 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
7142 if (TREE_CODE (*node) != FUNCTION_DECL)
7144 warning ("`%s' attribute only applies to functions",
7145 IDENTIFIER_POINTER (name));
7146 *no_add_attrs = true;
7148 else if (!pragma_interrupt)
7150 /* The sp_switch attribute only has meaning for interrupt functions. */
7151 warning ("`%s' attribute only applies to interrupt functions",
7152 IDENTIFIER_POINTER (name));
7153 *no_add_attrs = true;
7155 else if (TREE_CODE (TREE_VALUE (args)) != STRING_CST)
7157 /* The argument must be a constant string. */
7158 warning ("`%s' attribute argument not a string constant",
7159 IDENTIFIER_POINTER (name));
7160 *no_add_attrs = true;
7162 else
7164 sp_switch = gen_rtx_SYMBOL_REF (VOIDmode,
7165 TREE_STRING_POINTER (TREE_VALUE (args)));
7168 return NULL_TREE;
7171 /* Handle an "trap_exit" attribute; arguments as in
7172 struct attribute_spec.handler. */
7173 static tree
7174 sh_handle_trap_exit_attribute (tree *node, tree name, tree args,
7175 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
7177 if (TREE_CODE (*node) != FUNCTION_DECL)
7179 warning ("`%s' attribute only applies to functions",
7180 IDENTIFIER_POINTER (name));
7181 *no_add_attrs = true;
7183 else if (!pragma_interrupt)
7185 /* The trap_exit attribute only has meaning for interrupt functions. */
7186 warning ("`%s' attribute only applies to interrupt functions",
7187 IDENTIFIER_POINTER (name));
7188 *no_add_attrs = true;
7190 else if (TREE_CODE (TREE_VALUE (args)) != INTEGER_CST)
7192 /* The argument must be a constant integer. */
7193 warning ("`%s' attribute argument not an integer constant",
7194 IDENTIFIER_POINTER (name));
7195 *no_add_attrs = true;
7197 else
7199 trap_exit = TREE_INT_CST_LOW (TREE_VALUE (args));
7202 return NULL_TREE;
7205 static tree
7206 sh_handle_renesas_attribute (tree *node ATTRIBUTE_UNUSED,
7207 tree name ATTRIBUTE_UNUSED,
7208 tree args ATTRIBUTE_UNUSED,
7209 int flags ATTRIBUTE_UNUSED,
7210 bool *no_add_attrs ATTRIBUTE_UNUSED)
7212 return NULL_TREE;
7215 /* True if __attribute__((renesas)) or -mrenesas. */
7217 sh_attr_renesas_p (tree td)
7219 if (TARGET_HITACHI)
7220 return 1;
7221 if (td == 0)
7222 return 0;
7223 if (DECL_P (td))
7224 td = TREE_TYPE (td);
7225 return (lookup_attribute ("renesas", TYPE_ATTRIBUTES (td))
7226 != NULL_TREE);
7229 /* True if __attribute__((renesas)) or -mrenesas, for the current
7230 function. */
7232 sh_cfun_attr_renesas_p (void)
7234 return sh_attr_renesas_p (current_function_decl);
7238 sh_cfun_interrupt_handler_p (void)
7240 return (lookup_attribute ("interrupt_handler",
7241 DECL_ATTRIBUTES (current_function_decl))
7242 != NULL_TREE);
7245 /* ??? target_switches in toplev.c is static, hence we have to duplicate it. */
7246 static const struct
7248 const char *const name;
7249 const int value;
7250 const char *const description;
7252 sh_target_switches[] = TARGET_SWITCHES;
7253 #define target_switches sh_target_switches
7255 /* Like default_pch_valid_p, but take flag_mask into account. */
7256 const char *
7257 sh_pch_valid_p (const void *data_p, size_t len)
7259 const char *data = (const char *)data_p;
7260 const char *flag_that_differs = NULL;
7261 size_t i;
7262 int old_flags;
7263 int flag_mask
7264 = (SH1_BIT | SH2_BIT | SH3_BIT | SH_E_BIT | HARD_SH4_BIT | FPU_SINGLE_BIT
7265 | SH4_BIT | HITACHI_BIT | LITTLE_ENDIAN_BIT);
7267 /* -fpic and -fpie also usually make a PCH invalid. */
7268 if (data[0] != flag_pic)
7269 return _("created and used with different settings of -fpic");
7270 if (data[1] != flag_pie)
7271 return _("created and used with different settings of -fpie");
7272 data += 2;
7274 /* Check target_flags. */
7275 memcpy (&old_flags, data, sizeof (target_flags));
7276 if (((old_flags ^ target_flags) & flag_mask) != 0)
7278 for (i = 0; i < ARRAY_SIZE (target_switches); i++)
7280 int bits;
7282 bits = target_switches[i].value;
7283 if (bits < 0)
7284 bits = -bits;
7285 bits &= flag_mask;
7286 if ((target_flags & bits) != (old_flags & bits))
7288 flag_that_differs = target_switches[i].name;
7289 goto make_message;
7292 abort ();
7294 data += sizeof (target_flags);
7295 len -= sizeof (target_flags);
7297 /* Check string options. */
7298 #ifdef TARGET_OPTIONS
7299 for (i = 0; i < ARRAY_SIZE (target_options); i++)
7301 const char *str = *target_options[i].variable;
7302 size_t l;
7303 if (! str)
7304 str = "";
7305 l = strlen (str) + 1;
7306 if (len < l || memcmp (data, str, l) != 0)
7308 flag_that_differs = target_options[i].prefix;
7309 goto make_message;
7311 data += l;
7312 len -= l;
7314 #endif
7316 return NULL;
7318 make_message:
7320 char *r;
7321 asprintf (&r, _("created and used with differing settings of `-m%s'"),
7322 flag_that_differs);
7323 if (r == NULL)
7324 return _("out of memory");
7325 return r;
7329 /* Predicates used by the templates. */
7331 /* Returns 1 if OP is MACL, MACH or PR. The input must be a REG rtx.
7332 Used only in general_movsrc_operand. */
7335 system_reg_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
7337 switch (REGNO (op))
7339 case PR_REG:
7340 case MACL_REG:
7341 case MACH_REG:
7342 return 1;
7344 return 0;
7347 /* Returns 1 if OP can be source of a simple move operation.
7348 Same as general_operand, but a LABEL_REF is valid, PRE_DEC is
7349 invalid as are subregs of system registers. */
7352 general_movsrc_operand (rtx op, enum machine_mode mode)
7354 if (GET_CODE (op) == MEM)
7356 rtx inside = XEXP (op, 0);
7357 if (GET_CODE (inside) == CONST)
7358 inside = XEXP (inside, 0);
7360 if (GET_CODE (inside) == LABEL_REF)
7361 return 1;
7363 if (GET_CODE (inside) == PLUS
7364 && GET_CODE (XEXP (inside, 0)) == LABEL_REF
7365 && GET_CODE (XEXP (inside, 1)) == CONST_INT)
7366 return 1;
7368 /* Only post inc allowed. */
7369 if (GET_CODE (inside) == PRE_DEC)
7370 return 0;
7373 if ((mode == QImode || mode == HImode)
7374 && (GET_CODE (op) == SUBREG
7375 && GET_CODE (XEXP (op, 0)) == REG
7376 && system_reg_operand (XEXP (op, 0), mode)))
7377 return 0;
7379 return general_operand (op, mode);
7382 /* Returns 1 if OP can be a destination of a move.
7383 Same as general_operand, but no preinc allowed. */
7386 general_movdst_operand (rtx op, enum machine_mode mode)
7388 /* Only pre dec allowed. */
7389 if (GET_CODE (op) == MEM && GET_CODE (XEXP (op, 0)) == POST_INC)
7390 return 0;
7392 return general_operand (op, mode);
7395 /* Returns 1 if OP is a normal arithmetic register. */
7398 arith_reg_operand (rtx op, enum machine_mode mode)
7400 if (register_operand (op, mode))
7402 int regno;
7404 if (GET_CODE (op) == REG)
7405 regno = REGNO (op);
7406 else if (GET_CODE (op) == SUBREG && GET_CODE (SUBREG_REG (op)) == REG)
7407 regno = REGNO (SUBREG_REG (op));
7408 else
7409 return 1;
7411 return (regno != T_REG && regno != PR_REG
7412 && ! TARGET_REGISTER_P (regno)
7413 && (regno != FPUL_REG || TARGET_SH4)
7414 && regno != MACH_REG && regno != MACL_REG);
7416 return 0;
7419 /* Like above, but for DImode destinations: forbid paradoxical DImode subregs,
7420 because this would lead to missing sign extensions when truncating from
7421 DImode to SImode. */
7423 arith_reg_dest (rtx op, enum machine_mode mode)
7425 if (mode == DImode && GET_CODE (op) == SUBREG
7426 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (op))) < 8)
7427 return 0;
7428 return arith_reg_operand (op, mode);
7432 int_gpr_dest (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
7434 enum machine_mode op_mode = GET_MODE (op);
7436 if (GET_MODE_CLASS (op_mode) != MODE_INT
7437 || GET_MODE_SIZE (op_mode) >= UNITS_PER_WORD)
7438 return 0;
7439 if (! reload_completed)
7440 return 0;
7441 return true_regnum (op) <= LAST_GENERAL_REG;
7445 fp_arith_reg_operand (rtx op, enum machine_mode mode)
7447 if (register_operand (op, mode))
7449 int regno;
7451 if (GET_CODE (op) == REG)
7452 regno = REGNO (op);
7453 else if (GET_CODE (op) == SUBREG && GET_CODE (SUBREG_REG (op)) == REG)
7454 regno = REGNO (SUBREG_REG (op));
7455 else
7456 return 1;
7458 return (regno >= FIRST_PSEUDO_REGISTER
7459 || FP_REGISTER_P (regno));
7461 return 0;
7464 /* Returns 1 if OP is a valid source operand for an arithmetic insn. */
7467 arith_operand (rtx op, enum machine_mode mode)
7469 if (arith_reg_operand (op, mode))
7470 return 1;
7472 if (TARGET_SHMEDIA)
7474 /* FIXME: We should be checking whether the CONST_INT fits in a
7475 CONST_OK_FOR_I16 here, but this causes reload_cse to crash when
7476 attempting to transform a sequence of two 64-bit sets of the
7477 same register from literal constants into a set and an add,
7478 when the difference is too wide for an add. */
7479 if (GET_CODE (op) == CONST_INT
7480 || EXTRA_CONSTRAINT_C16 (op))
7481 return 1;
7482 else
7483 return 0;
7485 else if (GET_CODE (op) == CONST_INT && CONST_OK_FOR_I08 (INTVAL (op)))
7486 return 1;
7488 return 0;
7491 /* Returns 1 if OP is a valid source operand for a compare insn. */
7494 arith_reg_or_0_operand (rtx op, enum machine_mode mode)
7496 if (arith_reg_operand (op, mode))
7497 return 1;
7499 if (EXTRA_CONSTRAINT_Z (op))
7500 return 1;
7502 return 0;
7505 /* Return 1 if OP is a valid source operand for an SHmedia operation
7506 that takes either a register or a 6-bit immediate. */
7509 shmedia_6bit_operand (rtx op, enum machine_mode mode)
7511 return (arith_reg_operand (op, mode)
7512 || (GET_CODE (op) == CONST_INT && CONST_OK_FOR_I06 (INTVAL (op))));
7515 /* Returns 1 if OP is a valid source operand for a logical operation. */
7518 logical_operand (rtx op, enum machine_mode mode)
7520 if (arith_reg_operand (op, mode))
7521 return 1;
7523 if (TARGET_SHMEDIA)
7525 if (GET_CODE (op) == CONST_INT && CONST_OK_FOR_I10 (INTVAL (op)))
7526 return 1;
7527 else
7528 return 0;
7530 else if (GET_CODE (op) == CONST_INT && CONST_OK_FOR_K08 (INTVAL (op)))
7531 return 1;
7533 return 0;
7537 and_operand (rtx op, enum machine_mode mode)
7539 if (logical_operand (op, mode))
7540 return 1;
7542 /* Check mshflo.l / mshflhi.l opportunities. */
7543 if (TARGET_SHMEDIA
7544 && mode == DImode
7545 && GET_CODE (op) == CONST_INT
7546 && CONST_OK_FOR_J16 (INTVAL (op)))
7547 return 1;
7549 return 0;
7552 /* Nonzero if OP is a floating point value with value 0.0. */
7555 fp_zero_operand (rtx op)
7557 REAL_VALUE_TYPE r;
7559 if (GET_MODE (op) != SFmode)
7560 return 0;
7562 REAL_VALUE_FROM_CONST_DOUBLE (r, op);
7563 return REAL_VALUES_EQUAL (r, dconst0) && ! REAL_VALUE_MINUS_ZERO (r);
7566 /* Nonzero if OP is a floating point value with value 1.0. */
7569 fp_one_operand (rtx op)
7571 REAL_VALUE_TYPE r;
7573 if (GET_MODE (op) != SFmode)
7574 return 0;
7576 REAL_VALUE_FROM_CONST_DOUBLE (r, op);
7577 return REAL_VALUES_EQUAL (r, dconst1);
7580 /* For -m4 and -m4-single-only, mode switching is used. If we are
7581 compiling without -mfmovd, movsf_ie isn't taken into account for
7582 mode switching. We could check in machine_dependent_reorg for
7583 cases where we know we are in single precision mode, but there is
7584 interface to find that out during reload, so we must avoid
7585 choosing an fldi alternative during reload and thus failing to
7586 allocate a scratch register for the constant loading. */
7588 fldi_ok (void)
7590 return ! TARGET_SH4 || TARGET_FMOVD || reload_completed;
7594 tertiary_reload_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
7596 enum rtx_code code = GET_CODE (op);
7597 return code == MEM || (TARGET_SH4 && code == CONST_DOUBLE);
7601 fpscr_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
7603 return (GET_CODE (op) == REG
7604 && (REGNO (op) == FPSCR_REG
7605 || (REGNO (op) >= FIRST_PSEUDO_REGISTER
7606 && !(reload_in_progress || reload_completed)))
7607 && GET_MODE (op) == PSImode);
7611 fpul_operand (rtx op, enum machine_mode mode)
7613 if (TARGET_SHMEDIA)
7614 return fp_arith_reg_operand (op, mode);
7616 return (GET_CODE (op) == REG
7617 && (REGNO (op) == FPUL_REG || REGNO (op) >= FIRST_PSEUDO_REGISTER)
7618 && GET_MODE (op) == mode);
7622 symbol_ref_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
7624 return (GET_CODE (op) == SYMBOL_REF);
7627 /* Return the TLS type for TLS symbols, 0 for otherwise. */
7629 tls_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
7631 if (GET_CODE (op) != SYMBOL_REF)
7632 return 0;
7633 return SYMBOL_REF_TLS_MODEL (op);
7637 commutative_float_operator (rtx op, enum machine_mode mode)
7639 if (GET_MODE (op) != mode)
7640 return 0;
7641 switch (GET_CODE (op))
7643 case PLUS:
7644 case MULT:
7645 return 1;
7646 default:
7647 break;
7649 return 0;
7653 noncommutative_float_operator (rtx op, enum machine_mode mode)
7655 if (GET_MODE (op) != mode)
7656 return 0;
7657 switch (GET_CODE (op))
7659 case MINUS:
7660 case DIV:
7661 return 1;
7662 default:
7663 break;
7665 return 0;
7669 unary_float_operator (rtx op, enum machine_mode mode)
7671 if (GET_MODE (op) != mode)
7672 return 0;
7673 switch (GET_CODE (op))
7675 case ABS:
7676 case NEG:
7677 case SQRT:
7678 return 1;
7679 default:
7680 break;
7682 return 0;
7686 binary_float_operator (rtx op, enum machine_mode mode)
7688 if (GET_MODE (op) != mode)
7689 return 0;
7690 switch (GET_CODE (op))
7692 case PLUS:
7693 case MINUS:
7694 case MULT:
7695 case DIV:
7696 return 1;
7697 default:
7698 break;
7700 return 0;
7704 binary_logical_operator (rtx op, enum machine_mode mode)
7706 if (GET_MODE (op) != mode)
7707 return 0;
7708 switch (GET_CODE (op))
7710 case IOR:
7711 case AND:
7712 case XOR:
7713 return 1;
7714 default:
7715 break;
7717 return 0;
7721 equality_comparison_operator (rtx op, enum machine_mode mode)
7723 return ((mode == VOIDmode || GET_MODE (op) == mode)
7724 && (GET_CODE (op) == EQ || GET_CODE (op) == NE));
7728 greater_comparison_operator (rtx op, enum machine_mode mode)
7730 if (mode != VOIDmode && GET_MODE (op) == mode)
7731 return 0;
7732 switch (GET_CODE (op))
7734 case GT:
7735 case GE:
7736 case GTU:
7737 case GEU:
7738 return 1;
7739 default:
7740 return 0;
7745 less_comparison_operator (rtx op, enum machine_mode mode)
7747 if (mode != VOIDmode && GET_MODE (op) == mode)
7748 return 0;
7749 switch (GET_CODE (op))
7751 case LT:
7752 case LE:
7753 case LTU:
7754 case LEU:
7755 return 1;
7756 default:
7757 return 0;
7761 /* Accept pseudos and branch target registers. */
7763 target_reg_operand (rtx op, enum machine_mode mode)
7765 if (mode != DImode
7766 || GET_MODE (op) != DImode)
7767 return 0;
7769 if (GET_CODE (op) == SUBREG)
7770 op = XEXP (op, 0);
7772 if (GET_CODE (op) != REG)
7773 return 0;
7775 /* We must protect ourselves from matching pseudos that are virtual
7776 register, because they will eventually be replaced with hardware
7777 registers that aren't branch-target registers. */
7778 if (REGNO (op) > LAST_VIRTUAL_REGISTER
7779 || TARGET_REGISTER_P (REGNO (op)))
7780 return 1;
7782 return 0;
7785 /* Same as target_reg_operand, except that label_refs and symbol_refs
7786 are accepted before reload. */
7788 target_operand (rtx op, enum machine_mode mode)
7790 if (mode != DImode)
7791 return 0;
7793 if ((GET_MODE (op) == DImode || GET_MODE (op) == VOIDmode)
7794 && EXTRA_CONSTRAINT_Csy (op))
7795 return ! reload_completed;
7797 return target_reg_operand (op, mode);
7801 mextr_bit_offset (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
7803 HOST_WIDE_INT i;
7805 if (GET_CODE (op) != CONST_INT)
7806 return 0;
7807 i = INTVAL (op);
7808 return i >= 1 * 8 && i <= 7 * 8 && (i & 7) == 0;
7812 extend_reg_operand (rtx op, enum machine_mode mode)
7814 return (GET_CODE (op) == TRUNCATE
7815 ? arith_operand
7816 : arith_reg_operand) (op, mode);
7820 trunc_hi_operand (rtx op, enum machine_mode mode)
7822 enum machine_mode op_mode = GET_MODE (op);
7824 if (op_mode != SImode && op_mode != DImode
7825 && op_mode != V4HImode && op_mode != V2SImode)
7826 return 0;
7827 return extend_reg_operand (op, mode);
7831 extend_reg_or_0_operand (rtx op, enum machine_mode mode)
7833 return (GET_CODE (op) == TRUNCATE
7834 ? arith_operand
7835 : arith_reg_or_0_operand) (op, mode);
7839 general_extend_operand (rtx op, enum machine_mode mode)
7841 return (GET_CODE (op) == TRUNCATE
7842 ? arith_operand
7843 : nonimmediate_operand) (op, mode);
7847 inqhi_operand (rtx op, enum machine_mode mode)
7849 if (GET_CODE (op) != TRUNCATE || mode != GET_MODE (op))
7850 return 0;
7851 op = XEXP (op, 0);
7852 /* Can't use true_regnum here because copy_cost wants to know about
7853 SECONDARY_INPUT_RELOAD_CLASS. */
7854 return GET_CODE (op) == REG && FP_REGISTER_P (REGNO (op));
7858 sh_rep_vec (rtx v, enum machine_mode mode)
7860 int i;
7861 rtx x, y;
7863 if ((GET_CODE (v) != CONST_VECTOR && GET_CODE (v) != PARALLEL)
7864 || (GET_MODE (v) != mode && mode != VOIDmode))
7865 return 0;
7866 i = XVECLEN (v, 0) - 2;
7867 x = XVECEXP (v, 0, i + 1);
7868 if (GET_MODE_UNIT_SIZE (mode) == 1)
7870 y = XVECEXP (v, 0, i);
7871 for (i -= 2; i >= 0; i -= 2)
7872 if (! rtx_equal_p (XVECEXP (v, 0, i + 1), x)
7873 || ! rtx_equal_p (XVECEXP (v, 0, i), y))
7874 return 0;
7876 else
7877 for (; i >= 0; i--)
7878 if (XVECEXP (v, 0, i) != x)
7879 return 0;
7880 return 1;
7883 /* Determine if V is a constant vector matching MODE with only one element
7884 that is not a sign extension. Two byte-sized elements count as one. */
7886 sh_1el_vec (rtx v, enum machine_mode mode)
7888 int unit_size;
7889 int i, last, least, sign_ix;
7890 rtx sign;
7892 if (GET_CODE (v) != CONST_VECTOR
7893 || (GET_MODE (v) != mode && mode != VOIDmode))
7894 return 0;
7895 /* Determine numbers of last and of least significant elements. */
7896 last = XVECLEN (v, 0) - 1;
7897 least = TARGET_LITTLE_ENDIAN ? 0 : last;
7898 if (GET_CODE (XVECEXP (v, 0, least)) != CONST_INT)
7899 return 0;
7900 sign_ix = least;
7901 if (GET_MODE_UNIT_SIZE (mode) == 1)
7902 sign_ix = TARGET_LITTLE_ENDIAN ? 1 : last - 1;
7903 if (GET_CODE (XVECEXP (v, 0, sign_ix)) != CONST_INT)
7904 return 0;
7905 unit_size = GET_MODE_UNIT_SIZE (GET_MODE (v));
7906 sign = (INTVAL (XVECEXP (v, 0, sign_ix)) >> (unit_size * BITS_PER_UNIT - 1)
7907 ? constm1_rtx : const0_rtx);
7908 i = XVECLEN (v, 0) - 1;
7910 if (i != least && i != sign_ix && XVECEXP (v, 0, i) != sign)
7911 return 0;
7912 while (--i);
7913 return 1;
7917 sh_const_vec (rtx v, enum machine_mode mode)
7919 int i;
7921 if (GET_CODE (v) != CONST_VECTOR
7922 || (GET_MODE (v) != mode && mode != VOIDmode))
7923 return 0;
7924 i = XVECLEN (v, 0) - 1;
7925 for (; i >= 0; i--)
7926 if (GET_CODE (XVECEXP (v, 0, i)) != CONST_INT)
7927 return 0;
7928 return 1;
7931 /* Return the destination address of a branch. */
7933 static int
7934 branch_dest (rtx branch)
7936 rtx dest = SET_SRC (PATTERN (branch));
7937 int dest_uid;
7939 if (GET_CODE (dest) == IF_THEN_ELSE)
7940 dest = XEXP (dest, 1);
7941 dest = XEXP (dest, 0);
7942 dest_uid = INSN_UID (dest);
7943 return INSN_ADDRESSES (dest_uid);
7946 /* Return nonzero if REG is not used after INSN.
7947 We assume REG is a reload reg, and therefore does
7948 not live past labels. It may live past calls or jumps though. */
7950 reg_unused_after (rtx reg, rtx insn)
7952 enum rtx_code code;
7953 rtx set;
7955 /* If the reg is set by this instruction, then it is safe for our
7956 case. Disregard the case where this is a store to memory, since
7957 we are checking a register used in the store address. */
7958 set = single_set (insn);
7959 if (set && GET_CODE (SET_DEST (set)) != MEM
7960 && reg_overlap_mentioned_p (reg, SET_DEST (set)))
7961 return 1;
7963 while ((insn = NEXT_INSN (insn)))
7965 rtx set;
7966 if (!INSN_P (insn))
7967 continue;
7969 code = GET_CODE (insn);
7971 #if 0
7972 /* If this is a label that existed before reload, then the register
7973 if dead here. However, if this is a label added by reorg, then
7974 the register may still be live here. We can't tell the difference,
7975 so we just ignore labels completely. */
7976 if (code == CODE_LABEL)
7977 return 1;
7978 /* else */
7979 #endif
7981 if (code == JUMP_INSN)
7982 return 0;
7984 /* If this is a sequence, we must handle them all at once.
7985 We could have for instance a call that sets the target register,
7986 and an insn in a delay slot that uses the register. In this case,
7987 we must return 0. */
7988 else if (code == INSN && GET_CODE (PATTERN (insn)) == SEQUENCE)
7990 int i;
7991 int retval = 0;
7993 for (i = 0; i < XVECLEN (PATTERN (insn), 0); i++)
7995 rtx this_insn = XVECEXP (PATTERN (insn), 0, i);
7996 rtx set = single_set (this_insn);
7998 if (GET_CODE (this_insn) == CALL_INSN)
7999 code = CALL_INSN;
8000 else if (GET_CODE (this_insn) == JUMP_INSN)
8002 if (INSN_ANNULLED_BRANCH_P (this_insn))
8003 return 0;
8004 code = JUMP_INSN;
8007 if (set && reg_overlap_mentioned_p (reg, SET_SRC (set)))
8008 return 0;
8009 if (set && reg_overlap_mentioned_p (reg, SET_DEST (set)))
8011 if (GET_CODE (SET_DEST (set)) != MEM)
8012 retval = 1;
8013 else
8014 return 0;
8016 if (set == 0
8017 && reg_overlap_mentioned_p (reg, PATTERN (this_insn)))
8018 return 0;
8020 if (retval == 1)
8021 return 1;
8022 else if (code == JUMP_INSN)
8023 return 0;
8026 set = single_set (insn);
8027 if (set && reg_overlap_mentioned_p (reg, SET_SRC (set)))
8028 return 0;
8029 if (set && reg_overlap_mentioned_p (reg, SET_DEST (set)))
8030 return GET_CODE (SET_DEST (set)) != MEM;
8031 if (set == 0 && reg_overlap_mentioned_p (reg, PATTERN (insn)))
8032 return 0;
8034 if (code == CALL_INSN && call_really_used_regs[REGNO (reg)])
8035 return 1;
8037 return 1;
8040 #include "ggc.h"
8042 static GTY(()) rtx fpscr_rtx;
8044 get_fpscr_rtx (void)
8046 if (! fpscr_rtx)
8048 fpscr_rtx = gen_rtx_REG (PSImode, FPSCR_REG);
8049 REG_USERVAR_P (fpscr_rtx) = 1;
8050 mark_user_reg (fpscr_rtx);
8052 if (! reload_completed || mdep_reorg_phase != SH_AFTER_MDEP_REORG)
8053 mark_user_reg (fpscr_rtx);
8054 return fpscr_rtx;
8057 void
8058 emit_sf_insn (rtx pat)
8060 emit_insn (pat);
8063 void
8064 emit_df_insn (rtx pat)
8066 emit_insn (pat);
8069 void
8070 expand_sf_unop (rtx (*fun) (rtx, rtx, rtx), rtx *operands)
8072 emit_sf_insn ((*fun) (operands[0], operands[1], get_fpscr_rtx ()));
8075 void
8076 expand_sf_binop (rtx (*fun) (rtx, rtx, rtx, rtx), rtx *operands)
8078 emit_sf_insn ((*fun) (operands[0], operands[1], operands[2],
8079 get_fpscr_rtx ()));
8082 void
8083 expand_df_unop (rtx (*fun) (rtx, rtx, rtx), rtx *operands)
8085 emit_df_insn ((*fun) (operands[0], operands[1], get_fpscr_rtx ()));
8088 void
8089 expand_df_binop (rtx (*fun) (rtx, rtx, rtx, rtx), rtx *operands)
8091 emit_df_insn ((*fun) (operands[0], operands[1], operands[2],
8092 get_fpscr_rtx ()));
8095 /* ??? gcc does flow analysis strictly after common subexpression
8096 elimination. As a result, common subexpression elimination fails
8097 when there are some intervening statements setting the same register.
8098 If we did nothing about this, this would hurt the precision switching
8099 for SH4 badly. There is some cse after reload, but it is unable to
8100 undo the extra register pressure from the unused instructions, and
8101 it cannot remove auto-increment loads.
8103 A C code example that shows this flow/cse weakness for (at least) SH
8104 and sparc (as of gcc ss-970706) is this:
8106 double
8107 f(double a)
8109 double d;
8110 d = 0.1;
8111 a += d;
8112 d = 1.1;
8113 d = 0.1;
8114 a *= d;
8115 return a;
8118 So we add another pass before common subexpression elimination, to
8119 remove assignments that are dead due to a following assignment in the
8120 same basic block. */
8122 static void
8123 mark_use (rtx x, rtx *reg_set_block)
8125 enum rtx_code code;
8127 if (! x)
8128 return;
8129 code = GET_CODE (x);
8130 switch (code)
8132 case REG:
8134 int regno = REGNO (x);
8135 int nregs = (regno < FIRST_PSEUDO_REGISTER
8136 ? HARD_REGNO_NREGS (regno, GET_MODE (x))
8137 : 1);
8140 reg_set_block[regno + nregs - 1] = 0;
8142 while (--nregs);
8143 break;
8145 case SET:
8147 rtx dest = SET_DEST (x);
8149 if (GET_CODE (dest) == SUBREG)
8150 dest = SUBREG_REG (dest);
8151 if (GET_CODE (dest) != REG)
8152 mark_use (dest, reg_set_block);
8153 mark_use (SET_SRC (x), reg_set_block);
8154 break;
8156 case CLOBBER:
8157 break;
8158 default:
8160 const char *fmt = GET_RTX_FORMAT (code);
8161 int i, j;
8162 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
8164 if (fmt[i] == 'e')
8165 mark_use (XEXP (x, i), reg_set_block);
8166 else if (fmt[i] == 'E')
8167 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
8168 mark_use (XVECEXP (x, i, j), reg_set_block);
8170 break;
8175 static rtx get_free_reg (HARD_REG_SET);
8177 /* This function returns a register to use to load the address to load
8178 the fpscr from. Currently it always returns r1 or r7, but when we are
8179 able to use pseudo registers after combine, or have a better mechanism
8180 for choosing a register, it should be done here. */
8181 /* REGS_LIVE is the liveness information for the point for which we
8182 need this allocation. In some bare-bones exit blocks, r1 is live at the
8183 start. We can even have all of r0..r3 being live:
8184 __complex__ long long f (double d) { if (d == 0) return 2; else return 3; }
8185 INSN before which new insns are placed with will clobber the register
8186 we return. If a basic block consists only of setting the return value
8187 register to a pseudo and using that register, the return value is not
8188 live before or after this block, yet we we'll insert our insns right in
8189 the middle. */
8191 static rtx
8192 get_free_reg (HARD_REG_SET regs_live)
8194 if (! TEST_HARD_REG_BIT (regs_live, 1))
8195 return gen_rtx_REG (Pmode, 1);
8197 /* Hard reg 1 is live; since this is a SMALL_REGISTER_CLASSES target,
8198 there shouldn't be anything but a jump before the function end. */
8199 if (! TEST_HARD_REG_BIT (regs_live, 7))
8200 return gen_rtx_REG (Pmode, 7);
8202 abort ();
8205 /* This function will set the fpscr from memory.
8206 MODE is the mode we are setting it to. */
8207 void
8208 fpscr_set_from_mem (int mode, HARD_REG_SET regs_live)
8210 enum attr_fp_mode fp_mode = mode;
8211 rtx addr_reg = get_free_reg (regs_live);
8213 if (fp_mode == (enum attr_fp_mode) ACTUAL_NORMAL_MODE (FP_MODE))
8214 emit_insn (gen_fpu_switch1 (addr_reg));
8215 else
8216 emit_insn (gen_fpu_switch0 (addr_reg));
8219 /* Is the given character a logical line separator for the assembler? */
8220 #ifndef IS_ASM_LOGICAL_LINE_SEPARATOR
8221 #define IS_ASM_LOGICAL_LINE_SEPARATOR(C) ((C) == ';')
8222 #endif
8225 sh_insn_length_adjustment (rtx insn)
8227 /* Instructions with unfilled delay slots take up an extra two bytes for
8228 the nop in the delay slot. */
8229 if (((GET_CODE (insn) == INSN
8230 && GET_CODE (PATTERN (insn)) != USE
8231 && GET_CODE (PATTERN (insn)) != CLOBBER)
8232 || GET_CODE (insn) == CALL_INSN
8233 || (GET_CODE (insn) == JUMP_INSN
8234 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC
8235 && GET_CODE (PATTERN (insn)) != ADDR_VEC))
8236 && GET_CODE (PATTERN (NEXT_INSN (PREV_INSN (insn)))) != SEQUENCE
8237 && get_attr_needs_delay_slot (insn) == NEEDS_DELAY_SLOT_YES)
8238 return 2;
8240 /* SH2e has a bug that prevents the use of annulled branches, so if
8241 the delay slot is not filled, we'll have to put a NOP in it. */
8242 if (sh_cpu == CPU_SH2E
8243 && GET_CODE (insn) == JUMP_INSN
8244 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC
8245 && GET_CODE (PATTERN (insn)) != ADDR_VEC
8246 && get_attr_type (insn) == TYPE_CBRANCH
8247 && GET_CODE (PATTERN (NEXT_INSN (PREV_INSN (insn)))) != SEQUENCE)
8248 return 2;
8250 /* sh-dsp parallel processing insn take four bytes instead of two. */
8252 if (GET_CODE (insn) == INSN)
8254 int sum = 0;
8255 rtx body = PATTERN (insn);
8256 const char *template;
8257 char c;
8258 int maybe_label = 1;
8260 if (GET_CODE (body) == ASM_INPUT)
8261 template = XSTR (body, 0);
8262 else if (asm_noperands (body) >= 0)
8263 template
8264 = decode_asm_operands (body, NULL, NULL, NULL, NULL);
8265 else
8266 return 0;
8269 int ppi_adjust = 0;
8272 c = *template++;
8273 while (c == ' ' || c == '\t');
8274 /* all sh-dsp parallel-processing insns start with p.
8275 The only non-ppi sh insn starting with p is pref.
8276 The only ppi starting with pr is prnd. */
8277 if ((c == 'p' || c == 'P') && strncasecmp ("re", template, 2))
8278 ppi_adjust = 2;
8279 /* The repeat pseudo-insn expands two three insns, a total of
8280 six bytes in size. */
8281 else if ((c == 'r' || c == 'R')
8282 && ! strncasecmp ("epeat", template, 5))
8283 ppi_adjust = 4;
8284 while (c && c != '\n' && ! IS_ASM_LOGICAL_LINE_SEPARATOR (c))
8286 /* If this is a label, it is obviously not a ppi insn. */
8287 if (c == ':' && maybe_label)
8289 ppi_adjust = 0;
8290 break;
8292 else if (c == '\'' || c == '"')
8293 maybe_label = 0;
8294 c = *template++;
8296 sum += ppi_adjust;
8297 maybe_label = c != ':';
8299 while (c);
8300 return sum;
8302 return 0;
8305 /* Return TRUE if X references a SYMBOL_REF or LABEL_REF whose symbol
8306 isn't protected by a PIC unspec. */
8308 nonpic_symbol_mentioned_p (rtx x)
8310 register const char *fmt;
8311 register int i;
8313 if (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF
8314 || GET_CODE (x) == PC)
8315 return 1;
8317 /* We don't want to look into the possible MEM location of a
8318 CONST_DOUBLE, since we're not going to use it, in general. */
8319 if (GET_CODE (x) == CONST_DOUBLE)
8320 return 0;
8322 if (GET_CODE (x) == UNSPEC
8323 && (XINT (x, 1) == UNSPEC_PIC
8324 || XINT (x, 1) == UNSPEC_GOT
8325 || XINT (x, 1) == UNSPEC_GOTOFF
8326 || XINT (x, 1) == UNSPEC_GOTPLT
8327 || XINT (x, 1) == UNSPEC_GOTTPOFF
8328 || XINT (x, 1) == UNSPEC_DTPOFF
8329 || XINT (x, 1) == UNSPEC_PLT))
8330 return 0;
8332 fmt = GET_RTX_FORMAT (GET_CODE (x));
8333 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
8335 if (fmt[i] == 'E')
8337 register int j;
8339 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
8340 if (nonpic_symbol_mentioned_p (XVECEXP (x, i, j)))
8341 return 1;
8343 else if (fmt[i] == 'e' && nonpic_symbol_mentioned_p (XEXP (x, i)))
8344 return 1;
8347 return 0;
8350 /* Convert a non-PIC address in `orig' to a PIC address using @GOT or
8351 @GOTOFF in `reg'. */
8353 legitimize_pic_address (rtx orig, enum machine_mode mode ATTRIBUTE_UNUSED,
8354 rtx reg)
8356 if (tls_symbolic_operand (orig, Pmode))
8357 return orig;
8359 if (GET_CODE (orig) == LABEL_REF
8360 || (GET_CODE (orig) == SYMBOL_REF && SYMBOL_REF_LOCAL_P (orig)))
8362 if (reg == 0)
8363 reg = gen_reg_rtx (Pmode);
8365 emit_insn (gen_symGOTOFF2reg (reg, orig));
8366 return reg;
8368 else if (GET_CODE (orig) == SYMBOL_REF)
8370 if (reg == 0)
8371 reg = gen_reg_rtx (Pmode);
8373 emit_insn (gen_symGOT2reg (reg, orig));
8374 return reg;
8376 return orig;
8379 /* Mark the use of a constant in the literal table. If the constant
8380 has multiple labels, make it unique. */
8381 static rtx
8382 mark_constant_pool_use (rtx x)
8384 rtx insn, lab, pattern;
8386 if (x == NULL)
8387 return x;
8389 switch (GET_CODE (x))
8391 case LABEL_REF:
8392 x = XEXP (x, 0);
8393 case CODE_LABEL:
8394 break;
8395 default:
8396 return x;
8399 /* Get the first label in the list of labels for the same constant
8400 and delete another labels in the list. */
8401 lab = x;
8402 for (insn = PREV_INSN (x); insn; insn = PREV_INSN (insn))
8404 if (GET_CODE (insn) != CODE_LABEL
8405 || LABEL_REFS (insn) != NEXT_INSN (insn))
8406 break;
8407 lab = insn;
8410 for (insn = LABEL_REFS (lab); insn; insn = LABEL_REFS (insn))
8411 INSN_DELETED_P (insn) = 1;
8413 /* Mark constants in a window. */
8414 for (insn = NEXT_INSN (x); insn; insn = NEXT_INSN (insn))
8416 if (GET_CODE (insn) != INSN)
8417 continue;
8419 pattern = PATTERN (insn);
8420 if (GET_CODE (pattern) != UNSPEC_VOLATILE)
8421 continue;
8423 switch (XINT (pattern, 1))
8425 case UNSPECV_CONST2:
8426 case UNSPECV_CONST4:
8427 case UNSPECV_CONST8:
8428 XVECEXP (pattern, 0, 1) = const1_rtx;
8429 break;
8430 case UNSPECV_WINDOW_END:
8431 if (XVECEXP (pattern, 0, 0) == x)
8432 return lab;
8433 break;
8434 case UNSPECV_CONST_END:
8435 return lab;
8436 default:
8437 break;
8441 return lab;
8444 /* Return true if it's possible to redirect BRANCH1 to the destination
8445 of an unconditional jump BRANCH2. We only want to do this if the
8446 resulting branch will have a short displacement. */
8448 sh_can_redirect_branch (rtx branch1, rtx branch2)
8450 if (flag_expensive_optimizations && simplejump_p (branch2))
8452 rtx dest = XEXP (SET_SRC (single_set (branch2)), 0);
8453 rtx insn;
8454 int distance;
8456 for (distance = 0, insn = NEXT_INSN (branch1);
8457 insn && distance < 256;
8458 insn = PREV_INSN (insn))
8460 if (insn == dest)
8461 return 1;
8462 else
8463 distance += get_attr_length (insn);
8465 for (distance = 0, insn = NEXT_INSN (branch1);
8466 insn && distance < 256;
8467 insn = NEXT_INSN (insn))
8469 if (insn == dest)
8470 return 1;
8471 else
8472 distance += get_attr_length (insn);
8475 return 0;
8478 /* Return nonzero if register old_reg can be renamed to register new_reg. */
8480 sh_hard_regno_rename_ok (unsigned int old_reg ATTRIBUTE_UNUSED,
8481 unsigned int new_reg)
8483 /* Interrupt functions can only use registers that have already been
8484 saved by the prologue, even if they would normally be
8485 call-clobbered. */
8487 if (sh_cfun_interrupt_handler_p () && !regs_ever_live[new_reg])
8488 return 0;
8490 return 1;
8493 /* Function to update the integer COST
8494 based on the relationship between INSN that is dependent on
8495 DEP_INSN through the dependence LINK. The default is to make no
8496 adjustment to COST. This can be used for example to specify to
8497 the scheduler that an output- or anti-dependence does not incur
8498 the same cost as a data-dependence. The return value should be
8499 the new value for COST. */
8500 static int
8501 sh_adjust_cost (rtx insn, rtx link ATTRIBUTE_UNUSED, rtx dep_insn, int cost)
8503 rtx reg, use_pat;
8505 if (TARGET_SHMEDIA)
8507 /* On SHmedia, if the dependence is an anti-dependence or
8508 output-dependence, there is no cost. */
8509 if (REG_NOTE_KIND (link) != 0)
8510 cost = 0;
8512 if (get_attr_is_mac_media (insn)
8513 && get_attr_is_mac_media (dep_insn))
8514 cost = 1;
8516 else if (REG_NOTE_KIND (link) == 0)
8518 enum attr_type dep_type, type;
8520 if (recog_memoized (insn) < 0
8521 || recog_memoized (dep_insn) < 0)
8522 return cost;
8524 dep_type = get_attr_type (dep_insn);
8525 if (dep_type == TYPE_FLOAD || dep_type == TYPE_PCFLOAD)
8526 cost--;
8527 if ((dep_type == TYPE_LOAD_SI || dep_type == TYPE_PCLOAD_SI)
8528 && (type = get_attr_type (insn)) != TYPE_CALL
8529 && type != TYPE_SFUNC)
8530 cost--;
8532 /* The only input for a call that is timing-critical is the
8533 function's address. */
8534 if (GET_CODE(insn) == CALL_INSN)
8536 rtx call = PATTERN (insn);
8538 if (GET_CODE (call) == PARALLEL)
8539 call = XVECEXP (call, 0 ,0);
8540 if (GET_CODE (call) == SET)
8541 call = SET_SRC (call);
8542 if (GET_CODE (call) == CALL && GET_CODE (XEXP (call, 0)) == MEM
8543 && ! reg_set_p (XEXP (XEXP (call, 0), 0), dep_insn))
8544 cost = 0;
8546 /* Likewise, the most timing critical input for an sfuncs call
8547 is the function address. However, sfuncs typically start
8548 using their arguments pretty quickly.
8549 Assume a four cycle delay before they are needed. */
8550 /* All sfunc calls are parallels with at least four components.
8551 Exploit this to avoid unnecessary calls to sfunc_uses_reg. */
8552 else if (GET_CODE (PATTERN (insn)) == PARALLEL
8553 && XVECLEN (PATTERN (insn), 0) >= 4
8554 && (reg = sfunc_uses_reg (insn)))
8556 if (! reg_set_p (reg, dep_insn))
8557 cost -= 4;
8559 /* When the preceding instruction loads the shift amount of
8560 the following SHAD/SHLD, the latency of the load is increased
8561 by 1 cycle. */
8562 else if (TARGET_SH4
8563 && get_attr_type (insn) == TYPE_DYN_SHIFT
8564 && get_attr_any_int_load (dep_insn) == ANY_INT_LOAD_YES
8565 && reg_overlap_mentioned_p (SET_DEST (PATTERN (dep_insn)),
8566 XEXP (SET_SRC (single_set (insn)),
8567 1)))
8568 cost++;
8569 /* When an LS group instruction with a latency of less than
8570 3 cycles is followed by a double-precision floating-point
8571 instruction, FIPR, or FTRV, the latency of the first
8572 instruction is increased to 3 cycles. */
8573 else if (cost < 3
8574 && get_attr_insn_class (dep_insn) == INSN_CLASS_LS_GROUP
8575 && get_attr_dfp_comp (insn) == DFP_COMP_YES)
8576 cost = 3;
8577 /* The lsw register of a double-precision computation is ready one
8578 cycle earlier. */
8579 else if (reload_completed
8580 && get_attr_dfp_comp (dep_insn) == DFP_COMP_YES
8581 && (use_pat = single_set (insn))
8582 && ! regno_use_in (REGNO (SET_DEST (single_set (dep_insn))),
8583 SET_SRC (use_pat)))
8584 cost -= 1;
8586 if (get_attr_any_fp_comp (dep_insn) == ANY_FP_COMP_YES
8587 && get_attr_late_fp_use (insn) == LATE_FP_USE_YES)
8588 cost -= 1;
8590 /* An anti-dependence penalty of two applies if the first insn is a double
8591 precision fadd / fsub / fmul. */
8592 else if (REG_NOTE_KIND (link) == REG_DEP_ANTI
8593 && recog_memoized (dep_insn) >= 0
8594 && get_attr_type (dep_insn) == TYPE_DFP_ARITH
8595 /* A lot of alleged anti-flow dependences are fake,
8596 so check this one is real. */
8597 && flow_dependent_p (dep_insn, insn))
8598 cost = 2;
8601 return cost;
8604 /* Check if INSN is flow-dependent on DEP_INSN. Can also be used to check
8605 if DEP_INSN is anti-flow dependent on INSN. */
8606 static int
8607 flow_dependent_p (rtx insn, rtx dep_insn)
8609 rtx tmp = PATTERN (insn);
8611 note_stores (PATTERN (dep_insn), flow_dependent_p_1, &tmp);
8612 return tmp == NULL_RTX;
8615 /* A helper function for flow_dependent_p called through note_stores. */
8616 static void
8617 flow_dependent_p_1 (rtx x, rtx pat ATTRIBUTE_UNUSED, void *data)
8619 rtx * pinsn = (rtx *) data;
8621 if (*pinsn && reg_referenced_p (x, *pinsn))
8622 *pinsn = NULL_RTX;
8625 /* For use by ALLOCATE_INITIAL_VALUE. Note that sh.md contains some
8626 'special function' patterns (type sfunc) that clobber pr, but that
8627 do not look like function calls to leaf_function_p. Hence we must
8628 do this extra check. */
8630 sh_pr_n_sets (void)
8632 return REG_N_SETS (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG);
8635 /* This function returns "2" to indicate dual issue for the SH4
8636 processor. To be used by the DFA pipeline description. */
8637 static int
8638 sh_issue_rate (void)
8640 if (TARGET_SUPERSCALAR)
8641 return 2;
8642 else
8643 return 1;
8646 /* Functions for ready queue reordering for sched1. */
8648 /* Get weight for mode for a set x. */
8649 static short
8650 find_set_regmode_weight (rtx x, enum machine_mode mode)
8652 if (GET_CODE (x) == CLOBBER && register_operand (SET_DEST (x), mode))
8653 return 1;
8654 if (GET_CODE (x) == SET && register_operand (SET_DEST (x), mode))
8656 if (GET_CODE (SET_DEST (x)) == REG)
8658 if (!reg_mentioned_p (SET_DEST (x), SET_SRC (x)))
8659 return 1;
8660 else
8661 return 0;
8663 return 1;
8665 return 0;
8668 /* Get regmode weight for insn. */
8669 static short
8670 find_insn_regmode_weight (rtx insn, enum machine_mode mode)
8672 short reg_weight = 0;
8673 rtx x;
8675 /* Increment weight for each register born here. */
8676 x = PATTERN (insn);
8677 reg_weight += find_set_regmode_weight (x, mode);
8678 if (GET_CODE (x) == PARALLEL)
8680 int j;
8681 for (j = XVECLEN (x, 0) - 1; j >= 0; j--)
8683 x = XVECEXP (PATTERN (insn), 0, j);
8684 reg_weight += find_set_regmode_weight (x, mode);
8687 /* Decrement weight for each register that dies here. */
8688 for (x = REG_NOTES (insn); x; x = XEXP (x, 1))
8690 if (REG_NOTE_KIND (x) == REG_DEAD || REG_NOTE_KIND (x) == REG_UNUSED)
8692 rtx note = XEXP (x, 0);
8693 if (GET_CODE (note) == REG && GET_MODE (note) == mode)
8694 reg_weight--;
8697 return reg_weight;
8700 /* Calculate regmode weights for all insns of a basic block. */
8701 static void
8702 find_regmode_weight (int b, enum machine_mode mode)
8704 rtx insn, next_tail, head, tail;
8706 get_block_head_tail (b, &head, &tail);
8707 next_tail = NEXT_INSN (tail);
8709 for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
8711 /* Handle register life information. */
8712 if (!INSN_P (insn))
8713 continue;
8715 if (mode == SFmode)
8716 INSN_REGMODE_WEIGHT (insn, mode) =
8717 find_insn_regmode_weight (insn, mode) + 2 * find_insn_regmode_weight (insn, DFmode);
8718 else if (mode == SImode)
8719 INSN_REGMODE_WEIGHT (insn, mode) =
8720 find_insn_regmode_weight (insn, mode) + 2 * find_insn_regmode_weight (insn, DImode);
8724 /* Comparison function for ready queue sorting. */
8725 static int
8726 rank_for_reorder (const void *x, const void *y)
8728 rtx tmp = *(const rtx *) y;
8729 rtx tmp2 = *(const rtx *) x;
8731 /* The insn in a schedule group should be issued the first. */
8732 if (SCHED_GROUP_P (tmp) != SCHED_GROUP_P (tmp2))
8733 return SCHED_GROUP_P (tmp2) ? 1 : -1;
8735 /* If insns are equally good, sort by INSN_LUID (original insn order), This
8736 minimizes instruction movement, thus minimizing sched's effect on
8737 register pressure. */
8738 return INSN_LUID (tmp) - INSN_LUID (tmp2);
8741 /* Resort the array A in which only element at index N may be out of order. */
8742 static void
8743 swap_reorder (rtx *a, int n)
8745 rtx insn = a[n - 1];
8746 int i = n - 2;
8748 while (i >= 0 && rank_for_reorder (a + i, &insn) >= 0)
8750 a[i + 1] = a[i];
8751 i -= 1;
8753 a[i + 1] = insn;
8756 #define SCHED_REORDER(READY, N_READY) \
8757 do \
8759 if ((N_READY) == 2) \
8760 swap_reorder (READY, N_READY); \
8761 else if ((N_READY) > 2) \
8762 qsort (READY, N_READY, sizeof (rtx), rank_for_reorder); \
8764 while (0)
8766 /* Sort the ready list READY by ascending priority, using the SCHED_REORDER
8767 macro. */
8768 static void
8769 ready_reorder (rtx *ready, int nready)
8771 SCHED_REORDER (ready, nready);
8774 /* Calculate regmode weights for all insns of all basic block. */
8775 static void
8776 sh_md_init_global (FILE *dump ATTRIBUTE_UNUSED,
8777 int verbose ATTRIBUTE_UNUSED,
8778 int old_max_uid)
8780 basic_block b;
8782 regmode_weight[0] = (short *) xcalloc (old_max_uid, sizeof (short));
8783 regmode_weight[1] = (short *) xcalloc (old_max_uid, sizeof (short));
8785 FOR_EACH_BB_REVERSE (b)
8787 find_regmode_weight (b->index, SImode);
8788 find_regmode_weight (b->index, SFmode);
8791 CURR_REGMODE_PRESSURE (SImode) = 0;
8792 CURR_REGMODE_PRESSURE (SFmode) = 0;
8796 /* Cleanup. */
8797 static void
8798 sh_md_finish_global (FILE *dump ATTRIBUTE_UNUSED,
8799 int verbose ATTRIBUTE_UNUSED)
8801 if (regmode_weight[0])
8803 free (regmode_weight[0]);
8804 regmode_weight[0] = NULL;
8806 if (regmode_weight[1])
8808 free (regmode_weight[1]);
8809 regmode_weight[1] = NULL;
8813 /* Cache the can_issue_more so that we can return it from reorder2. Also,
8814 keep count of register pressures on SImode and SFmode. */
8815 static int
8816 sh_variable_issue (FILE *dump ATTRIBUTE_UNUSED,
8817 int sched_verbose ATTRIBUTE_UNUSED,
8818 rtx insn,
8819 int can_issue_more)
8821 if (GET_CODE (PATTERN (insn)) != USE
8822 && GET_CODE (PATTERN (insn)) != CLOBBER)
8823 cached_can_issue_more = can_issue_more - 1;
8824 else
8825 cached_can_issue_more = can_issue_more;
8827 if (reload_completed)
8828 return cached_can_issue_more;
8830 CURR_REGMODE_PRESSURE (SImode) += INSN_REGMODE_WEIGHT (insn, SImode);
8831 CURR_REGMODE_PRESSURE (SFmode) += INSN_REGMODE_WEIGHT (insn, SFmode);
8833 return cached_can_issue_more;
8836 static void
8837 sh_md_init (FILE *dump ATTRIBUTE_UNUSED,
8838 int verbose ATTRIBUTE_UNUSED,
8839 int veclen ATTRIBUTE_UNUSED)
8841 CURR_REGMODE_PRESSURE (SImode) = 0;
8842 CURR_REGMODE_PRESSURE (SFmode) = 0;
8845 /* Some magic numbers. */
8846 /* Pressure on register r0 can lead to spill failures. so avoid sched1 for
8847 functions that already have high pressure on r0. */
8848 #define R0_MAX_LIFE_REGIONS 2
8849 #define R0_MAX_LIVE_LENGTH 12
8850 /* Register Pressure thresholds for SImode and SFmode registers. */
8851 #define SIMODE_MAX_WEIGHT 5
8852 #define SFMODE_MAX_WEIGHT 10
8854 /* Return true if the pressure is high for MODE. */
8855 static short
8856 high_pressure (enum machine_mode mode)
8858 /* Pressure on register r0 can lead to spill failures. so avoid sched1 for
8859 functions that already have high pressure on r0. */
8860 if ((REG_N_SETS (0) - REG_N_DEATHS (0)) >= R0_MAX_LIFE_REGIONS
8861 && REG_LIVE_LENGTH (0) >= R0_MAX_LIVE_LENGTH)
8862 return 1;
8864 if (mode == SFmode)
8865 return (CURR_REGMODE_PRESSURE (SFmode) > SFMODE_MAX_WEIGHT);
8866 else
8867 return (CURR_REGMODE_PRESSURE (SImode) > SIMODE_MAX_WEIGHT);
8870 /* Reorder ready queue if register pressure is high. */
8871 static int
8872 sh_reorder (FILE *dump ATTRIBUTE_UNUSED,
8873 int sched_verbose ATTRIBUTE_UNUSED,
8874 rtx *ready,
8875 int *n_readyp,
8876 int clock_var ATTRIBUTE_UNUSED)
8878 if (reload_completed)
8879 return sh_issue_rate ();
8881 if (high_pressure (SFmode) || high_pressure (SImode))
8883 ready_reorder (ready, *n_readyp);
8886 return sh_issue_rate ();
8889 /* Skip cycles if the current register pressure is high. */
8890 static int
8891 sh_reorder2 (FILE *dump ATTRIBUTE_UNUSED,
8892 int sched_verbose ATTRIBUTE_UNUSED,
8893 rtx *ready ATTRIBUTE_UNUSED,
8894 int *n_readyp ATTRIBUTE_UNUSED,
8895 int clock_var ATTRIBUTE_UNUSED)
8897 if (reload_completed)
8898 return cached_can_issue_more;
8900 if (high_pressure(SFmode) || high_pressure (SImode))
8901 skip_cycles = 1;
8903 return cached_can_issue_more;
8906 /* Skip cycles without sorting the ready queue. This will move insn from
8907 Q->R. If this is the last cycle we are skipping; allow sorting of ready
8908 queue by sh_reorder. */
8910 /* Generally, skipping these many cycles are sufficient for all insns to move
8911 from Q -> R. */
8912 #define MAX_SKIPS 8
8914 static int
8915 sh_dfa_new_cycle (FILE *sched_dump ATTRIBUTE_UNUSED,
8916 int sched_verbose ATTRIBUTE_UNUSED,
8917 rtx insn ATTRIBUTE_UNUSED,
8918 int last_clock_var,
8919 int clock_var,
8920 int *sort_p)
8922 if (reload_completed)
8923 return 0;
8925 if (skip_cycles)
8927 if ((clock_var - last_clock_var) < MAX_SKIPS)
8929 *sort_p = 0;
8930 return 1;
8932 /* If this is the last cycle we are skipping, allow reordering of R. */
8933 if ((clock_var - last_clock_var) == MAX_SKIPS)
8935 *sort_p = 1;
8936 return 1;
8940 skip_cycles = 0;
8942 return 0;
8945 /* SHmedia requires registers for branches, so we can't generate new
8946 branches past reload. */
8947 static bool
8948 sh_cannot_modify_jumps_p (void)
8950 return (TARGET_SHMEDIA && (reload_in_progress || reload_completed));
8953 static int
8954 sh_target_reg_class (void)
8956 return TARGET_SHMEDIA ? TARGET_REGS : NO_REGS;
8959 static bool
8960 sh_optimize_target_register_callee_saved (bool after_prologue_epilogue_gen)
8962 return (shmedia_space_reserved_for_target_registers
8963 && (! after_prologue_epilogue_gen || TARGET_SAVE_ALL_TARGET_REGS));
8966 static bool
8967 sh_ms_bitfield_layout_p (tree record_type ATTRIBUTE_UNUSED)
8969 return (TARGET_SH5 || TARGET_HITACHI || sh_attr_renesas_p (record_type));
8973 On the SH1..SH4, the trampoline looks like
8974 2 0002 D202 mov.l l2,r2
8975 1 0000 D301 mov.l l1,r3
8976 3 0004 422B jmp @r2
8977 4 0006 0009 nop
8978 5 0008 00000000 l1: .long area
8979 6 000c 00000000 l2: .long function
8981 SH5 (compact) uses r1 instead of r3 for the static chain. */
8984 /* Emit RTL insns to initialize the variable parts of a trampoline.
8985 FNADDR is an RTX for the address of the function's pure code.
8986 CXT is an RTX for the static chain value for the function. */
8988 void
8989 sh_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
8991 if (TARGET_SHMEDIA64)
8993 rtx tramp_templ;
8994 int fixed_len;
8996 rtx movi1 = GEN_INT (0xcc000010);
8997 rtx shori1 = GEN_INT (0xc8000010);
8998 rtx src, dst;
9000 /* The following trampoline works within a +- 128 KB range for cxt:
9001 ptb/u cxt,tr1; movi fnaddr >> 48,r0; shori fnaddr >> 32,r0;
9002 shori fnaddr >> 16,r0; shori fnaddr,r0; ptabs/l r0,tr0
9003 gettr tr1,r1; blink tr0,r63 */
9004 /* Address rounding makes it hard to compute the exact bounds of the
9005 offset for this trampoline, but we have a rather generous offset
9006 range, so frame_offset should do fine as an upper bound. */
9007 if (cxt == virtual_stack_vars_rtx && frame_offset < 0x20000)
9009 /* ??? could optimize this trampoline initialization
9010 by writing DImode words with two insns each. */
9011 rtx mask = force_reg (DImode, GEN_INT (0x3fffc00));
9012 rtx insn = gen_rtx_MINUS (DImode, cxt, tramp);
9013 insn = gen_rtx_ASHIFT (DImode, insn, GEN_INT (10-2));
9014 insn = gen_rtx_AND (DImode, insn, mask);
9015 /* Or in ptb/u .,tr1 pattern */
9016 insn = gen_rtx_IOR (DImode, insn, gen_int_mode (0xec000010, SImode));
9017 insn = force_operand (insn, NULL_RTX);
9018 insn = gen_lowpart (SImode, insn);
9019 emit_move_insn (gen_rtx_MEM (SImode, tramp), insn);
9020 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (38));
9021 insn = gen_rtx_AND (DImode, insn, mask);
9022 insn = force_operand (gen_rtx_IOR (DImode, movi1, insn), NULL_RTX);
9023 insn = gen_lowpart (SImode, insn);
9024 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 4)), insn);
9025 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (22));
9026 insn = gen_rtx_AND (DImode, insn, mask);
9027 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
9028 insn = gen_lowpart (SImode, insn);
9029 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 8)), insn);
9030 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (6));
9031 insn = gen_rtx_AND (DImode, insn, mask);
9032 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
9033 insn = gen_lowpart (SImode, insn);
9034 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 12)),
9035 insn);
9036 insn = gen_rtx_ASHIFT (DImode, fnaddr, GEN_INT (10));
9037 insn = gen_rtx_AND (DImode, insn, mask);
9038 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
9039 insn = gen_lowpart (SImode, insn);
9040 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 16)),
9041 insn);
9042 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 20)),
9043 GEN_INT (0x6bf10600));
9044 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 24)),
9045 GEN_INT (0x4415fc10));
9046 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 28)),
9047 GEN_INT (0x4401fff0));
9048 emit_insn (gen_ic_invalidate_line (tramp));
9049 return;
9051 tramp_templ = gen_rtx_SYMBOL_REF (Pmode,"__GCC_nested_trampoline");
9052 fixed_len = TRAMPOLINE_SIZE - 2 * GET_MODE_SIZE (Pmode);
9054 tramp_templ = gen_datalabel_ref (tramp_templ);
9055 dst = gen_rtx_MEM (BLKmode, tramp);
9056 src = gen_rtx_MEM (BLKmode, tramp_templ);
9057 set_mem_align (dst, 256);
9058 set_mem_align (src, 64);
9059 emit_block_move (dst, src, GEN_INT (fixed_len), BLOCK_OP_NORMAL);
9061 emit_move_insn (gen_rtx_MEM (Pmode, plus_constant (tramp, fixed_len)),
9062 fnaddr);
9063 emit_move_insn (gen_rtx_MEM (Pmode,
9064 plus_constant (tramp,
9065 fixed_len
9066 + GET_MODE_SIZE (Pmode))),
9067 cxt);
9068 emit_insn (gen_ic_invalidate_line (tramp));
9069 return;
9071 else if (TARGET_SHMEDIA)
9073 /* movi fnaddr >> 16,r1; shori fnaddr,r1; ptabs/l r1,tr0
9074 movi cxt >> 16,r1; shori cxt,r1; blink tr0,r63 */
9075 rtx quad0 = gen_reg_rtx (DImode), cxtload = gen_reg_rtx (DImode);
9076 rtx quad1 = gen_reg_rtx (DImode), quad2 = gen_reg_rtx (DImode);
9077 /* movi 0,r1: 0xcc000010 shori 0,r1: c8000010 concatenated,
9078 rotated 10 right, and higher 16 bit of every 32 selected. */
9079 rtx movishori
9080 = force_reg (V2HImode, (simplify_gen_subreg
9081 (V2HImode, GEN_INT (0x4330432), SImode, 0)));
9082 rtx ptabs = force_reg (DImode, GEN_INT (0x6bf10600));
9083 rtx blink = force_reg (DImode, GEN_INT (0x4401fff0));
9085 tramp = force_reg (Pmode, tramp);
9086 fnaddr = force_reg (SImode, fnaddr);
9087 cxt = force_reg (SImode, cxt);
9088 emit_insn (gen_mshflo_w_x (gen_rtx_SUBREG (V4HImode, quad0, 0),
9089 gen_rtx_SUBREG (V2HImode, fnaddr, 0),
9090 movishori));
9091 emit_insn (gen_rotrdi3_mextr (quad0, quad0,
9092 GEN_INT (TARGET_LITTLE_ENDIAN ? 24 : 56)));
9093 emit_insn (gen_ashldi3_media (quad0, quad0, const2_rtx));
9094 emit_move_insn (gen_rtx_MEM (DImode, tramp), quad0);
9095 emit_insn (gen_mshflo_w_x (gen_rtx_SUBREG (V4HImode, cxtload, 0),
9096 gen_rtx_SUBREG (V2HImode, cxt, 0),
9097 movishori));
9098 emit_insn (gen_rotrdi3_mextr (cxtload, cxtload,
9099 GEN_INT (TARGET_LITTLE_ENDIAN ? 24 : 56)));
9100 emit_insn (gen_ashldi3_media (cxtload, cxtload, const2_rtx));
9101 if (TARGET_LITTLE_ENDIAN)
9103 emit_insn (gen_mshflo_l_di (quad1, ptabs, cxtload));
9104 emit_insn (gen_mextr4 (quad2, cxtload, blink));
9106 else
9108 emit_insn (gen_mextr4 (quad1, cxtload, ptabs));
9109 emit_insn (gen_mshflo_l_di (quad2, blink, cxtload));
9111 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, 8)), quad1);
9112 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, 16)), quad2);
9113 emit_insn (gen_ic_invalidate_line (tramp));
9114 return;
9116 else if (TARGET_SHCOMPACT)
9118 emit_insn (gen_initialize_trampoline (tramp, cxt, fnaddr));
9119 return;
9121 emit_move_insn (gen_rtx_MEM (SImode, tramp),
9122 gen_int_mode (TARGET_LITTLE_ENDIAN ? 0xd301d202 : 0xd202d301,
9123 SImode));
9124 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 4)),
9125 gen_int_mode (TARGET_LITTLE_ENDIAN ? 0x0009422b : 0x422b0009,
9126 SImode));
9127 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 8)),
9128 cxt);
9129 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 12)),
9130 fnaddr);
9131 if (TARGET_HARVARD)
9133 if (TARGET_USERMODE)
9134 emit_library_call (function_symbol ("__ic_invalidate"),
9135 0, VOIDmode, 1, tramp, SImode);
9136 else
9137 emit_insn (gen_ic_invalidate_line (tramp));
9141 /* FIXME: This is overly conservative. A SHcompact function that
9142 receives arguments ``by reference'' will have them stored in its
9143 own stack frame, so it must not pass pointers or references to
9144 these arguments to other functions by means of sibling calls. */
9145 static bool
9146 sh_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
9148 return (decl
9149 && (! TARGET_SHCOMPACT
9150 || current_function_args_info.stack_regs == 0)
9151 && ! sh_cfun_interrupt_handler_p ());
9154 /* Machine specific built-in functions. */
9156 struct builtin_description
9158 const enum insn_code icode;
9159 const char *const name;
9160 int signature;
9163 /* describe number and signedness of arguments; arg[0] == result
9164 (1: unsigned, 2: signed, 4: don't care, 8: pointer 0: no argument */
9165 static const char signature_args[][4] =
9167 #define SH_BLTIN_V2SI2 0
9168 { 4, 4 },
9169 #define SH_BLTIN_V4HI2 1
9170 { 4, 4 },
9171 #define SH_BLTIN_V2SI3 2
9172 { 4, 4, 4 },
9173 #define SH_BLTIN_V4HI3 3
9174 { 4, 4, 4 },
9175 #define SH_BLTIN_V8QI3 4
9176 { 4, 4, 4 },
9177 #define SH_BLTIN_MAC_HISI 5
9178 { 1, 4, 4, 1 },
9179 #define SH_BLTIN_SH_HI 6
9180 { 4, 4, 1 },
9181 #define SH_BLTIN_SH_SI 7
9182 { 4, 4, 1 },
9183 #define SH_BLTIN_V4HI2V2SI 8
9184 { 4, 4, 4 },
9185 #define SH_BLTIN_V4HI2V8QI 9
9186 { 4, 4, 4 },
9187 #define SH_BLTIN_SISF 10
9188 { 4, 2 },
9189 #define SH_BLTIN_LDUA_L 11
9190 { 2, 8 },
9191 #define SH_BLTIN_LDUA_Q 12
9192 { 1, 8 },
9193 #define SH_BLTIN_STUA_L 13
9194 { 0, 8, 2 },
9195 #define SH_BLTIN_STUA_Q 14
9196 { 0, 8, 1 },
9197 #define SH_BLTIN_UDI 15
9198 { 0, 8, 1 },
9199 #define SH_BLTIN_NUM_SHARED_SIGNATURES 16
9200 #define SH_BLTIN_2 16
9201 #define SH_BLTIN_SU 16
9202 { 1, 2 },
9203 #define SH_BLTIN_3 17
9204 #define SH_BLTIN_SUS 17
9205 { 2, 2, 1 },
9206 #define SH_BLTIN_PSSV 18
9207 { 0, 8, 2, 2 },
9208 #define SH_BLTIN_XXUU 19
9209 #define SH_BLTIN_UUUU 19
9210 { 1, 1, 1, 1 },
9211 #define SH_BLTIN_PV 20
9212 { 0, 8 },
9214 /* mcmv: operands considered unsigned. */
9215 /* mmulsum_wq, msad_ubq: result considered unsigned long long. */
9216 /* mperm: control value considered unsigned int. */
9217 /* mshalds, mshard, mshards, mshlld, mshlrd: shift count is unsigned int. */
9218 /* mshards_q: returns signed short. */
9219 /* nsb: takes long long arg, returns unsigned char. */
9220 static const struct builtin_description bdesc[] =
9222 { CODE_FOR_absv2si2, "__builtin_absv2si2", SH_BLTIN_V2SI2 },
9223 { CODE_FOR_absv4hi2, "__builtin_absv4hi2", SH_BLTIN_V4HI2 },
9224 { CODE_FOR_addv2si3, "__builtin_addv2si3", SH_BLTIN_V2SI3 },
9225 { CODE_FOR_addv4hi3, "__builtin_addv4hi3", SH_BLTIN_V4HI3 },
9226 { CODE_FOR_ssaddv2si3,"__builtin_ssaddv2si3", SH_BLTIN_V2SI3 },
9227 { CODE_FOR_usaddv8qi3,"__builtin_usaddv8qi3", SH_BLTIN_V8QI3 },
9228 { CODE_FOR_ssaddv4hi3,"__builtin_ssaddv4hi3", SH_BLTIN_V4HI3 },
9229 #if 0
9230 { CODE_FOR_alloco32, "__builtin_sh_media_ALLOCO", SH_BLTIN_PV },
9231 { CODE_FOR_alloco64, "__builtin_sh_media_ALLOCO", SH_BLTIN_PV },
9232 #endif
9233 { CODE_FOR_negcmpeqv8qi,"__builtin_sh_media_MCMPEQ_B", SH_BLTIN_V8QI3 },
9234 { CODE_FOR_negcmpeqv2si,"__builtin_sh_media_MCMPEQ_L", SH_BLTIN_V2SI3 },
9235 { CODE_FOR_negcmpeqv4hi,"__builtin_sh_media_MCMPEQ_W", SH_BLTIN_V4HI3 },
9236 { CODE_FOR_negcmpgtuv8qi,"__builtin_sh_media_MCMPGT_UB", SH_BLTIN_V8QI3 },
9237 { CODE_FOR_negcmpgtv2si,"__builtin_sh_media_MCMPGT_L", SH_BLTIN_V2SI3 },
9238 { CODE_FOR_negcmpgtv4hi,"__builtin_sh_media_MCMPGT_W", SH_BLTIN_V4HI3 },
9239 { CODE_FOR_mcmv, "__builtin_sh_media_MCMV", SH_BLTIN_UUUU },
9240 { CODE_FOR_mcnvs_lw, "__builtin_sh_media_MCNVS_LW", SH_BLTIN_3 },
9241 { CODE_FOR_mcnvs_wb, "__builtin_sh_media_MCNVS_WB", SH_BLTIN_V4HI2V8QI },
9242 { CODE_FOR_mcnvs_wub, "__builtin_sh_media_MCNVS_WUB", SH_BLTIN_V4HI2V8QI },
9243 { CODE_FOR_mextr1, "__builtin_sh_media_MEXTR1", SH_BLTIN_UDI },
9244 { CODE_FOR_mextr2, "__builtin_sh_media_MEXTR2", SH_BLTIN_UDI },
9245 { CODE_FOR_mextr3, "__builtin_sh_media_MEXTR3", SH_BLTIN_UDI },
9246 { CODE_FOR_mextr4, "__builtin_sh_media_MEXTR4", SH_BLTIN_UDI },
9247 { CODE_FOR_mextr5, "__builtin_sh_media_MEXTR5", SH_BLTIN_UDI },
9248 { CODE_FOR_mextr6, "__builtin_sh_media_MEXTR6", SH_BLTIN_UDI },
9249 { CODE_FOR_mextr7, "__builtin_sh_media_MEXTR7", SH_BLTIN_UDI },
9250 { CODE_FOR_mmacfx_wl, "__builtin_sh_media_MMACFX_WL", SH_BLTIN_MAC_HISI },
9251 { CODE_FOR_mmacnfx_wl,"__builtin_sh_media_MMACNFX_WL", SH_BLTIN_MAC_HISI },
9252 { CODE_FOR_mulv2si3, "__builtin_mulv2si3", SH_BLTIN_V2SI3, },
9253 { CODE_FOR_mulv4hi3, "__builtin_mulv4hi3", SH_BLTIN_V4HI3 },
9254 { CODE_FOR_mmulfx_l, "__builtin_sh_media_MMULFX_L", SH_BLTIN_V2SI3 },
9255 { CODE_FOR_mmulfx_w, "__builtin_sh_media_MMULFX_W", SH_BLTIN_V4HI3 },
9256 { CODE_FOR_mmulfxrp_w,"__builtin_sh_media_MMULFXRP_W", SH_BLTIN_V4HI3 },
9257 { CODE_FOR_mmulhi_wl, "__builtin_sh_media_MMULHI_WL", SH_BLTIN_V4HI2V2SI },
9258 { CODE_FOR_mmullo_wl, "__builtin_sh_media_MMULLO_WL", SH_BLTIN_V4HI2V2SI },
9259 { CODE_FOR_mmulsum_wq,"__builtin_sh_media_MMULSUM_WQ", SH_BLTIN_XXUU },
9260 { CODE_FOR_mperm_w, "__builtin_sh_media_MPERM_W", SH_BLTIN_SH_HI },
9261 { CODE_FOR_msad_ubq, "__builtin_sh_media_MSAD_UBQ", SH_BLTIN_XXUU },
9262 { CODE_FOR_mshalds_l, "__builtin_sh_media_MSHALDS_L", SH_BLTIN_SH_SI },
9263 { CODE_FOR_mshalds_w, "__builtin_sh_media_MSHALDS_W", SH_BLTIN_SH_HI },
9264 { CODE_FOR_ashrv2si3, "__builtin_ashrv2si3", SH_BLTIN_SH_SI },
9265 { CODE_FOR_ashrv4hi3, "__builtin_ashrv4hi3", SH_BLTIN_SH_HI },
9266 { CODE_FOR_mshards_q, "__builtin_sh_media_MSHARDS_Q", SH_BLTIN_SUS },
9267 { CODE_FOR_mshfhi_b, "__builtin_sh_media_MSHFHI_B", SH_BLTIN_V8QI3 },
9268 { CODE_FOR_mshfhi_l, "__builtin_sh_media_MSHFHI_L", SH_BLTIN_V2SI3 },
9269 { CODE_FOR_mshfhi_w, "__builtin_sh_media_MSHFHI_W", SH_BLTIN_V4HI3 },
9270 { CODE_FOR_mshflo_b, "__builtin_sh_media_MSHFLO_B", SH_BLTIN_V8QI3 },
9271 { CODE_FOR_mshflo_l, "__builtin_sh_media_MSHFLO_L", SH_BLTIN_V2SI3 },
9272 { CODE_FOR_mshflo_w, "__builtin_sh_media_MSHFLO_W", SH_BLTIN_V4HI3 },
9273 { CODE_FOR_ashlv2si3, "__builtin_ashlv2si3", SH_BLTIN_SH_SI },
9274 { CODE_FOR_ashlv4hi3, "__builtin_ashlv4hi3", SH_BLTIN_SH_HI },
9275 { CODE_FOR_lshrv2si3, "__builtin_lshrv2si3", SH_BLTIN_SH_SI },
9276 { CODE_FOR_lshrv4hi3, "__builtin_lshrv4hi3", SH_BLTIN_SH_HI },
9277 { CODE_FOR_subv2si3, "__builtin_subv2si3", SH_BLTIN_V2SI3 },
9278 { CODE_FOR_subv4hi3, "__builtin_subv4hi3", SH_BLTIN_V4HI3 },
9279 { CODE_FOR_sssubv2si3,"__builtin_sssubv2si3", SH_BLTIN_V2SI3 },
9280 { CODE_FOR_ussubv8qi3,"__builtin_ussubv8qi3", SH_BLTIN_V8QI3 },
9281 { CODE_FOR_sssubv4hi3,"__builtin_sssubv4hi3", SH_BLTIN_V4HI3 },
9282 { CODE_FOR_fcosa_s, "__builtin_sh_media_FCOSA_S", SH_BLTIN_SISF },
9283 { CODE_FOR_fsina_s, "__builtin_sh_media_FSINA_S", SH_BLTIN_SISF },
9284 { CODE_FOR_fipr, "__builtin_sh_media_FIPR_S", SH_BLTIN_3 },
9285 { CODE_FOR_ftrv, "__builtin_sh_media_FTRV_S", SH_BLTIN_3 },
9286 { CODE_FOR_fsrra_s, "__builtin_sh_media_FSRRA_S", SH_BLTIN_2 },
9287 #if 0
9288 { CODE_FOR_ldhi_l, "__builtin_sh_media_LDHI_L", SH_BLTIN_LDUA_L },
9289 { CODE_FOR_ldhi_q, "__builtin_sh_media_LDHI_Q", SH_BLTIN_LDUA_Q },
9290 { CODE_FOR_ldlo_l, "__builtin_sh_media_LDLO_L", SH_BLTIN_LDUA_L },
9291 { CODE_FOR_ldlo_q, "__builtin_sh_media_LDLO_Q", SH_BLTIN_LDUA_Q },
9292 { CODE_FOR_sthi_l, "__builtin_sh_media_STHI_L", SH_BLTIN_STUA_L },
9293 { CODE_FOR_sthi_q, "__builtin_sh_media_STHI_Q", SH_BLTIN_STUA_Q },
9294 { CODE_FOR_stlo_l, "__builtin_sh_media_STLO_L", SH_BLTIN_STUA_L },
9295 { CODE_FOR_stlo_q, "__builtin_sh_media_STLO_Q", SH_BLTIN_STUA_Q },
9296 { CODE_FOR_ldhi_l64, "__builtin_sh_media_LDHI_L", SH_BLTIN_LDUA_L },
9297 { CODE_FOR_ldhi_q64, "__builtin_sh_media_LDHI_Q", SH_BLTIN_LDUA_Q },
9298 { CODE_FOR_ldlo_l64, "__builtin_sh_media_LDLO_L", SH_BLTIN_LDUA_L },
9299 { CODE_FOR_ldlo_q64, "__builtin_sh_media_LDLO_Q", SH_BLTIN_LDUA_Q },
9300 { CODE_FOR_sthi_l64, "__builtin_sh_media_STHI_L", SH_BLTIN_STUA_L },
9301 { CODE_FOR_sthi_q64, "__builtin_sh_media_STHI_Q", SH_BLTIN_STUA_Q },
9302 { CODE_FOR_stlo_l64, "__builtin_sh_media_STLO_L", SH_BLTIN_STUA_L },
9303 { CODE_FOR_stlo_q64, "__builtin_sh_media_STLO_Q", SH_BLTIN_STUA_Q },
9304 #endif
9305 { CODE_FOR_nsb, "__builtin_sh_media_NSB", SH_BLTIN_SU },
9306 { CODE_FOR_byterev, "__builtin_sh_media_BYTEREV", SH_BLTIN_2 },
9307 #if 0
9308 { CODE_FOR_prefetch32,"__builtin_sh_media_PREFO", SH_BLTIN_PSSV },
9309 { CODE_FOR_prefetch64,"__builtin_sh_media_PREFO", SH_BLTIN_PSSV }
9310 #endif
9313 static void
9314 sh_media_init_builtins (void)
9316 tree shared[SH_BLTIN_NUM_SHARED_SIGNATURES];
9317 const struct builtin_description *d;
9319 memset (shared, 0, sizeof shared);
9320 for (d = bdesc; d - bdesc < (int) ARRAY_SIZE (bdesc); d++)
9322 tree type, arg_type;
9323 int signature = d->signature;
9324 int i;
9326 if (signature < SH_BLTIN_NUM_SHARED_SIGNATURES && shared[signature])
9327 type = shared[signature];
9328 else
9330 int has_result = signature_args[signature][0] != 0;
9332 if (signature_args[signature][1] == 8
9333 && (insn_data[d->icode].operand[has_result].mode != Pmode))
9334 continue;
9335 if (! TARGET_FPU_ANY
9336 && FLOAT_MODE_P (insn_data[d->icode].operand[0].mode))
9337 continue;
9338 type = void_list_node;
9339 for (i = 3; ; i--)
9341 int arg = signature_args[signature][i];
9342 int opno = i - 1 + has_result;
9344 if (arg == 8)
9345 arg_type = ptr_type_node;
9346 else if (arg)
9347 arg_type = ((*lang_hooks.types.type_for_mode)
9348 (insn_data[d->icode].operand[opno].mode,
9349 (arg & 1)));
9350 else if (i)
9351 continue;
9352 else
9353 arg_type = void_type_node;
9354 if (i == 0)
9355 break;
9356 type = tree_cons (NULL_TREE, arg_type, type);
9358 type = build_function_type (arg_type, type);
9359 if (signature < SH_BLTIN_NUM_SHARED_SIGNATURES)
9360 shared[signature] = type;
9362 lang_hooks.builtin_function (d->name, type, d - bdesc, BUILT_IN_MD,
9363 NULL, NULL_TREE);
9367 /* Implements target hook vector_mode_supported_p. */
9368 bool
9369 sh_vector_mode_supported_p (enum machine_mode mode)
9371 if (TARGET_FPU_ANY
9372 && ((mode == V2SFmode)
9373 || (mode == V4SFmode)
9374 || (mode == V16SFmode)))
9375 return true;
9377 else if (TARGET_SHMEDIA
9378 && ((mode == V8QImode)
9379 || (mode == V2HImode)
9380 || (mode == V4HImode)
9381 || (mode == V2SImode)))
9382 return true;
9384 return false;
9387 static void
9388 sh_init_builtins (void)
9390 if (TARGET_SHMEDIA)
9391 sh_media_init_builtins ();
9394 /* Expand an expression EXP that calls a built-in function,
9395 with result going to TARGET if that's convenient
9396 (and in mode MODE if that's convenient).
9397 SUBTARGET may be used as the target for computing one of EXP's operands.
9398 IGNORE is nonzero if the value is to be ignored. */
9400 static rtx
9401 sh_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
9402 enum machine_mode mode ATTRIBUTE_UNUSED, int ignore)
9404 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
9405 tree arglist = TREE_OPERAND (exp, 1);
9406 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
9407 const struct builtin_description *d = &bdesc[fcode];
9408 enum insn_code icode = d->icode;
9409 int signature = d->signature;
9410 enum machine_mode tmode = VOIDmode;
9411 int nop = 0, i;
9412 rtx op[4];
9413 rtx pat;
9415 if (signature_args[signature][0])
9417 if (ignore)
9418 return 0;
9420 tmode = insn_data[icode].operand[0].mode;
9421 if (! target
9422 || GET_MODE (target) != tmode
9423 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
9424 target = gen_reg_rtx (tmode);
9425 op[nop++] = target;
9427 else
9428 target = 0;
9430 for (i = 1; i <= 3; i++, nop++)
9432 tree arg;
9433 enum machine_mode opmode, argmode;
9435 if (! signature_args[signature][i])
9436 break;
9437 arg = TREE_VALUE (arglist);
9438 if (arg == error_mark_node)
9439 return const0_rtx;
9440 arglist = TREE_CHAIN (arglist);
9441 opmode = insn_data[icode].operand[nop].mode;
9442 argmode = TYPE_MODE (TREE_TYPE (arg));
9443 if (argmode != opmode)
9444 arg = build1 (NOP_EXPR,
9445 (*lang_hooks.types.type_for_mode) (opmode, 0), arg);
9446 op[nop] = expand_expr (arg, NULL_RTX, opmode, 0);
9447 if (! (*insn_data[icode].operand[nop].predicate) (op[nop], opmode))
9448 op[nop] = copy_to_mode_reg (opmode, op[nop]);
9451 switch (nop)
9453 case 1:
9454 pat = (*insn_data[d->icode].genfun) (op[0]);
9455 break;
9456 case 2:
9457 pat = (*insn_data[d->icode].genfun) (op[0], op[1]);
9458 break;
9459 case 3:
9460 pat = (*insn_data[d->icode].genfun) (op[0], op[1], op[2]);
9461 break;
9462 case 4:
9463 pat = (*insn_data[d->icode].genfun) (op[0], op[1], op[2], op[3]);
9464 break;
9465 default:
9466 abort ();
9468 if (! pat)
9469 return 0;
9470 emit_insn (pat);
9471 return target;
9474 void
9475 sh_expand_unop_v2sf (enum rtx_code code, rtx op0, rtx op1)
9477 rtx sel0 = const0_rtx;
9478 rtx sel1 = const1_rtx;
9479 rtx (*fn) (rtx, rtx, rtx, rtx, rtx) = gen_unary_sf_op;
9480 rtx op = gen_rtx_fmt_e (code, SFmode, op1);
9482 emit_insn ((*fn) (op0, op1, op, sel0, sel0));
9483 emit_insn ((*fn) (op0, op1, op, sel1, sel1));
9486 void
9487 sh_expand_binop_v2sf (enum rtx_code code, rtx op0, rtx op1, rtx op2)
9489 rtx sel0 = const0_rtx;
9490 rtx sel1 = const1_rtx;
9491 rtx (*fn) (rtx, rtx, rtx, rtx, rtx, rtx, rtx, rtx)
9492 = gen_binary_sf_op;
9493 rtx op = gen_rtx_fmt_ee (code, SFmode, op1, op2);
9495 emit_insn ((*fn) (op0, op1, op2, op, sel0, sel0, sel0, sel1));
9496 emit_insn ((*fn) (op0, op1, op2, op, sel1, sel1, sel1, sel0));
9499 /* Return the class of registers for which a mode change from FROM to TO
9500 is invalid. */
9501 bool
9502 sh_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
9503 enum reg_class class)
9505 /* We want to enable the use of SUBREGs as a means to
9506 VEC_SELECT a single element of a vector. */
9507 if (to == SFmode && VECTOR_MODE_P (from) && GET_MODE_INNER (from) == SFmode)
9508 return (reg_classes_intersect_p (GENERAL_REGS, class));
9510 if (GET_MODE_SIZE (from) != GET_MODE_SIZE (to))
9512 if (TARGET_LITTLE_ENDIAN)
9514 if (GET_MODE_SIZE (to) < 8 || GET_MODE_SIZE (from) < 8)
9515 return reg_classes_intersect_p (DF_REGS, class);
9517 else
9519 if (GET_MODE_SIZE (from) < 8)
9520 return reg_classes_intersect_p (DF_HI_REGS, class);
9523 return 0;
9527 /* If ADDRESS refers to a CODE_LABEL, add NUSES to the number of times
9528 that label is used. */
9530 void
9531 sh_mark_label (rtx address, int nuses)
9533 if (GOTOFF_P (address))
9535 /* Extract the label or symbol. */
9536 address = XEXP (address, 0);
9537 if (GET_CODE (address) == PLUS)
9538 address = XEXP (address, 0);
9539 address = XVECEXP (address, 0, 0);
9541 if (GET_CODE (address) == LABEL_REF
9542 && GET_CODE (XEXP (address, 0)) == CODE_LABEL)
9543 LABEL_NUSES (XEXP (address, 0)) += nuses;
9546 /* Compute extra cost of moving data between one register class
9547 and another. */
9549 /* If SECONDARY*_RELOAD_CLASS says something about the src/dst pair, regclass
9550 uses this information. Hence, the general register <-> floating point
9551 register information here is not used for SFmode. */
9554 sh_register_move_cost (enum machine_mode mode,
9555 enum reg_class srcclass, enum reg_class dstclass)
9557 if (dstclass == T_REGS || dstclass == PR_REGS)
9558 return 10;
9560 if (dstclass == MAC_REGS && srcclass == MAC_REGS)
9561 return 4;
9563 if (mode == SImode && ! TARGET_SHMEDIA && TARGET_FMOVD
9564 && REGCLASS_HAS_FP_REG (srcclass)
9565 && REGCLASS_HAS_FP_REG (dstclass))
9566 return 4;
9568 if ((REGCLASS_HAS_FP_REG (dstclass) && srcclass == MAC_REGS)
9569 || (dstclass == MAC_REGS && REGCLASS_HAS_FP_REG (srcclass)))
9570 return 9;
9572 if ((REGCLASS_HAS_FP_REG (dstclass)
9573 && REGCLASS_HAS_GENERAL_REG (srcclass))
9574 || (REGCLASS_HAS_GENERAL_REG (dstclass)
9575 && REGCLASS_HAS_FP_REG (srcclass)))
9576 return ((TARGET_SHMEDIA ? 4 : TARGET_FMOVD ? 8 : 12)
9577 * ((GET_MODE_SIZE (mode) + 7) / 8U));
9579 if ((dstclass == FPUL_REGS
9580 && REGCLASS_HAS_GENERAL_REG (srcclass))
9581 || (srcclass == FPUL_REGS
9582 && REGCLASS_HAS_GENERAL_REG (dstclass)))
9583 return 5;
9585 if ((dstclass == FPUL_REGS
9586 && (srcclass == PR_REGS || srcclass == MAC_REGS || srcclass == T_REGS))
9587 || (srcclass == FPUL_REGS
9588 && (dstclass == PR_REGS || dstclass == MAC_REGS)))
9589 return 7;
9591 if ((srcclass == TARGET_REGS && ! REGCLASS_HAS_GENERAL_REG (dstclass))
9592 || ((dstclass) == TARGET_REGS && ! REGCLASS_HAS_GENERAL_REG (srcclass)))
9593 return 20;
9595 if ((srcclass == FPSCR_REGS && ! REGCLASS_HAS_GENERAL_REG (dstclass))
9596 || (dstclass == FPSCR_REGS && ! REGCLASS_HAS_GENERAL_REG (srcclass)))
9597 return 4;
9599 if (TARGET_SHMEDIA
9600 || (TARGET_FMOVD
9601 && ! REGCLASS_HAS_GENERAL_REG (srcclass)
9602 && ! REGCLASS_HAS_GENERAL_REG (dstclass)))
9603 return 2 * ((GET_MODE_SIZE (mode) + 7) / 8U);
9605 return 2 * ((GET_MODE_SIZE (mode) + 3) / 4U);
9608 /* Like register_operand, but take into account that SHMEDIA can use
9609 the constant zero like a general register. */
9611 sh_register_operand (rtx op, enum machine_mode mode)
9613 if (op == CONST0_RTX (mode) && TARGET_SHMEDIA)
9614 return 1;
9615 return register_operand (op, mode);
9619 cmpsi_operand (rtx op, enum machine_mode mode)
9621 if (GET_CODE (op) == REG && REGNO (op) == T_REG
9622 && GET_MODE (op) == SImode)
9623 return 1;
9624 return arith_operand (op, mode);
9627 static rtx emit_load_ptr (rtx, rtx);
9629 static rtx
9630 emit_load_ptr (rtx reg, rtx addr)
9632 rtx mem = gen_rtx_MEM (ptr_mode, addr);
9634 if (Pmode != ptr_mode)
9635 mem = gen_rtx_SIGN_EXTEND (Pmode, mem);
9636 return emit_move_insn (reg, mem);
9639 void
9640 sh_output_mi_thunk (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED,
9641 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
9642 tree function)
9644 CUMULATIVE_ARGS cum;
9645 int structure_value_byref = 0;
9646 rtx this, this_value, sibcall, insns, funexp;
9647 tree funtype = TREE_TYPE (function);
9648 int simple_add = CONST_OK_FOR_ADD (delta);
9649 int did_load = 0;
9650 rtx scratch0, scratch1, scratch2;
9652 reload_completed = 1;
9653 epilogue_completed = 1;
9654 no_new_pseudos = 1;
9655 current_function_uses_only_leaf_regs = 1;
9656 reset_block_changes ();
9658 emit_note (NOTE_INSN_PROLOGUE_END);
9660 /* Find the "this" pointer. We have such a wide range of ABIs for the
9661 SH that it's best to do this completely machine independently.
9662 "this" is passed as first argument, unless a structure return pointer
9663 comes first, in which case "this" comes second. */
9664 INIT_CUMULATIVE_ARGS (cum, funtype, NULL_RTX, 0, 1);
9665 #ifndef PCC_STATIC_STRUCT_RETURN
9666 if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
9667 structure_value_byref = 1;
9668 #endif /* not PCC_STATIC_STRUCT_RETURN */
9669 if (structure_value_byref && sh_struct_value_rtx (function, 0) == 0)
9671 tree ptype = build_pointer_type (TREE_TYPE (funtype));
9673 FUNCTION_ARG_ADVANCE (cum, Pmode, ptype, 1);
9675 this = FUNCTION_ARG (cum, Pmode, ptr_type_node, 1);
9677 /* For SHcompact, we only have r0 for a scratch register: r1 is the
9678 static chain pointer (even if you can't have nested virtual functions
9679 right now, someone might implement them sometime), and the rest of the
9680 registers are used for argument passing, are callee-saved, or reserved. */
9681 scratch0 = scratch1 = scratch2 = gen_rtx_REG (Pmode, 0);
9682 if (! TARGET_SH5)
9684 scratch1 = gen_rtx_REG (ptr_mode, 1);
9685 /* N.B., if not TARGET_HITACHI, register 2 is used to pass the pointer
9686 pointing where to return struct values. */
9687 scratch2 = gen_rtx_REG (Pmode, 3);
9689 else if (TARGET_SHMEDIA)
9691 scratch1 = gen_rtx_REG (ptr_mode, 21);
9692 scratch2 = gen_rtx_REG (Pmode, TR0_REG);
9695 this_value = plus_constant (this, delta);
9696 if (vcall_offset
9697 && (simple_add || scratch0 != scratch1)
9698 && strict_memory_address_p (ptr_mode, this_value))
9700 emit_load_ptr (scratch0, this_value);
9701 did_load = 1;
9704 if (!delta)
9705 ; /* Do nothing. */
9706 else if (simple_add)
9707 emit_move_insn (this, this_value);
9708 else
9710 emit_move_insn (scratch1, GEN_INT (delta));
9711 emit_insn (gen_add2_insn (this, scratch1));
9714 if (vcall_offset)
9716 rtx offset_addr;
9718 if (!did_load)
9719 emit_load_ptr (scratch0, this);
9721 offset_addr = plus_constant (scratch0, vcall_offset);
9722 if (strict_memory_address_p (ptr_mode, offset_addr))
9723 ; /* Do nothing. */
9724 else if (! TARGET_SH5)
9726 /* scratch0 != scratch1, and we have indexed loads. Get better
9727 schedule by loading the offset into r1 and using an indexed
9728 load - then the load of r1 can issue before the load from
9729 (this + delta) finishes. */
9730 emit_move_insn (scratch1, GEN_INT (vcall_offset));
9731 offset_addr = gen_rtx_PLUS (Pmode, scratch0, scratch1);
9733 else if (CONST_OK_FOR_ADD (vcall_offset))
9735 emit_insn (gen_add2_insn (scratch0, GEN_INT (vcall_offset)));
9736 offset_addr = scratch0;
9738 else if (scratch0 != scratch1)
9740 emit_move_insn (scratch1, GEN_INT (vcall_offset));
9741 emit_insn (gen_add2_insn (scratch0, scratch1));
9742 offset_addr = scratch0;
9744 else
9745 abort (); /* FIXME */
9746 emit_load_ptr (scratch0, offset_addr);
9748 if (Pmode != ptr_mode)
9749 scratch0 = gen_rtx_TRUNCATE (ptr_mode, scratch0);
9750 emit_insn (gen_add2_insn (this, scratch0));
9753 /* Generate a tail call to the target function. */
9754 if (! TREE_USED (function))
9756 assemble_external (function);
9757 TREE_USED (function) = 1;
9759 funexp = XEXP (DECL_RTL (function), 0);
9760 emit_move_insn (scratch2, funexp);
9761 funexp = gen_rtx_MEM (FUNCTION_MODE, scratch2);
9762 sibcall = emit_call_insn (gen_sibcall (funexp, const0_rtx, NULL_RTX));
9763 SIBLING_CALL_P (sibcall) = 1;
9764 use_reg (&CALL_INSN_FUNCTION_USAGE (sibcall), this);
9765 emit_barrier ();
9767 /* Run just enough of rest_of_compilation to do scheduling and get
9768 the insns emitted. Note that use_thunk calls
9769 assemble_start_function and assemble_end_function. */
9771 insn_locators_initialize ();
9772 insns = get_insns ();
9774 if (optimize > 0 && flag_schedule_insns_after_reload)
9776 if (! basic_block_info)
9777 init_flow ();
9778 rtl_register_cfg_hooks ();
9779 find_basic_blocks (insns, max_reg_num (), dump_file);
9780 life_analysis (dump_file, PROP_FINAL);
9782 split_all_insns (1);
9784 schedule_insns (dump_file);
9787 sh_reorg ();
9789 if (optimize > 0 && flag_delayed_branch)
9790 dbr_schedule (insns, dump_file);
9791 shorten_branches (insns);
9792 final_start_function (insns, file, 1);
9793 final (insns, file, 1, 0);
9794 final_end_function ();
9796 if (optimize > 0 && flag_schedule_insns_after_reload)
9798 /* Release all memory allocated by flow. */
9799 free_basic_block_vars ();
9801 /* Release all memory held by regsets now. */
9802 regset_release_memory ();
9805 reload_completed = 0;
9806 epilogue_completed = 0;
9807 no_new_pseudos = 0;
9811 function_symbol (const char *name)
9813 rtx sym = gen_rtx_SYMBOL_REF (Pmode, name);
9814 SYMBOL_REF_FLAGS (sym) = SYMBOL_FLAG_FUNCTION;
9815 return sym;
9818 /* Find the number of a general purpose register in S. */
9819 static int
9820 scavenge_reg (HARD_REG_SET *s)
9822 int r;
9823 for (r = FIRST_GENERAL_REG; r <= LAST_GENERAL_REG; r++)
9824 if (TEST_HARD_REG_BIT (*s, r))
9825 return r;
9826 return -1;
9830 sh_get_pr_initial_val (void)
9832 rtx val;
9834 /* ??? Unfortunately, get_hard_reg_initial_val doesn't always work for the
9835 PR register on SHcompact, because it might be clobbered by the prologue.
9836 We check first if that is known to be the case. */
9837 if (TARGET_SHCOMPACT
9838 && ((current_function_args_info.call_cookie
9839 & ~ CALL_COOKIE_RET_TRAMP (1))
9840 || current_function_has_nonlocal_label))
9841 return gen_rtx_MEM (SImode, return_address_pointer_rtx);
9843 /* If we haven't finished rtl generation, there might be a nonlocal label
9844 that we haven't seen yet.
9845 ??? get_hard_reg_initial_val fails if it is called while no_new_pseudos
9846 is set, unless it has been called before for the same register. And even
9847 then, we end in trouble if we didn't use the register in the same
9848 basic block before. So call get_hard_reg_initial_val now and wrap it
9849 in an unspec if we might need to replace it. */
9850 /* ??? We also must do this for TARGET_SH1 in general, because otherwise
9851 combine can put the pseudo returned by get_hard_reg_initial_val into
9852 instructions that need a general purpose registers, which will fail to
9853 be recognized when the pseudo becomes allocated to PR. */
9855 = get_hard_reg_initial_val (Pmode, TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG);
9856 if (TARGET_SH1)
9857 return gen_rtx_UNSPEC (SImode, gen_rtvec (1, val), UNSPEC_RA);
9858 return val;
9862 sh_expand_t_scc (enum rtx_code code, rtx target)
9864 rtx result = target;
9865 HOST_WIDE_INT val;
9867 if (GET_CODE (sh_compare_op0) != REG || REGNO (sh_compare_op0) != T_REG
9868 || GET_CODE (sh_compare_op1) != CONST_INT)
9869 return 0;
9870 if (GET_CODE (result) != REG)
9871 result = gen_reg_rtx (SImode);
9872 val = INTVAL (sh_compare_op1);
9873 if ((code == EQ && val == 1) || (code == NE && val == 0))
9874 emit_insn (gen_movt (result));
9875 else if ((code == EQ && val == 0) || (code == NE && val == 1))
9877 emit_insn (gen_rtx_CLOBBER (VOIDmode, result));
9878 emit_insn (gen_subc (result, result, result));
9879 emit_insn (gen_addsi3 (result, result, const1_rtx));
9881 else if (code == EQ || code == NE)
9882 emit_insn (gen_move_insn (result, GEN_INT (code == NE)));
9883 else
9884 return 0;
9885 if (result != target)
9886 emit_move_insn (target, result);
9887 return 1;
9890 /* INSN is an sfunc; return the rtx that describes the address used. */
9891 static rtx
9892 extract_sfunc_addr (rtx insn)
9894 rtx pattern, part = NULL_RTX;
9895 int len, i;
9897 pattern = PATTERN (insn);
9898 len = XVECLEN (pattern, 0);
9899 for (i = 0; i < len; i++)
9901 part = XVECEXP (pattern, 0, i);
9902 if (GET_CODE (part) == USE && GET_MODE (XEXP (part, 0)) == Pmode
9903 && GENERAL_REGISTER_P (true_regnum (XEXP (part, 0))))
9904 return XEXP (part, 0);
9906 if (GET_CODE (XVECEXP (pattern, 0, 0)) == UNSPEC_VOLATILE)
9907 return XVECEXP (XVECEXP (pattern, 0, 0), 0, 1);
9908 abort ();
9911 /* Verify that the register in use_sfunc_addr still agrees with the address
9912 used in the sfunc. This prevents fill_slots_from_thread from changing
9913 use_sfunc_addr.
9914 INSN is the use_sfunc_addr instruction, and REG is the register it
9915 guards. */
9917 check_use_sfunc_addr (rtx insn, rtx reg)
9919 /* Search for the sfunc. It should really come right after INSN. */
9920 while ((insn = NEXT_INSN (insn)))
9922 if (GET_CODE (insn) == CODE_LABEL || GET_CODE (insn) == JUMP_INSN)
9923 break;
9924 if (! INSN_P (insn))
9925 continue;
9927 if (GET_CODE (PATTERN (insn)) == SEQUENCE)
9928 insn = XVECEXP (PATTERN (insn), 0, 0);
9929 if (GET_CODE (PATTERN (insn)) != PARALLEL
9930 || get_attr_type (insn) != TYPE_SFUNC)
9931 continue;
9932 return rtx_equal_p (extract_sfunc_addr (insn), reg);
9934 abort ();
9937 /* Returns 1 if OP is a MEM that can be source of a simple move operation. */
9940 unaligned_load_operand (rtx op, enum machine_mode mode)
9942 rtx inside;
9944 if (GET_CODE (op) != MEM || GET_MODE (op) != mode)
9945 return 0;
9947 inside = XEXP (op, 0);
9949 if (GET_CODE (inside) == POST_INC)
9950 inside = XEXP (inside, 0);
9952 if (GET_CODE (inside) == REG)
9953 return 1;
9955 return 0;
9958 /* This function returns a constant rtx that represents pi / 2**15 in
9959 SFmode. it's used to scale SFmode angles, in radians, to a
9960 fixed-point signed 16.16-bit fraction of a full circle, i.e., 2*pi
9961 maps to 0x10000). */
9963 static GTY(()) rtx sh_fsca_sf2int_rtx;
9966 sh_fsca_sf2int (void)
9968 if (! sh_fsca_sf2int_rtx)
9970 REAL_VALUE_TYPE rv;
9972 real_from_string (&rv, "10430.378350470453");
9973 sh_fsca_sf2int_rtx = const_double_from_real_value (rv, SFmode);
9976 return sh_fsca_sf2int_rtx;
9979 /* This function returns a constant rtx that represents pi / 2**15 in
9980 DFmode. it's used to scale DFmode angles, in radians, to a
9981 fixed-point signed 16.16-bit fraction of a full circle, i.e., 2*pi
9982 maps to 0x10000). */
9984 static GTY(()) rtx sh_fsca_df2int_rtx;
9987 sh_fsca_df2int (void)
9989 if (! sh_fsca_df2int_rtx)
9991 REAL_VALUE_TYPE rv;
9993 real_from_string (&rv, "10430.378350470453");
9994 sh_fsca_df2int_rtx = const_double_from_real_value (rv, DFmode);
9997 return sh_fsca_df2int_rtx;
10000 /* This function returns a constant rtx that represents 2**15 / pi in
10001 SFmode. it's used to scale a fixed-point signed 16.16-bit fraction
10002 of a full circle back to a SFmode value, i.e., 0x10000 maps to
10003 2*pi). */
10005 static GTY(()) rtx sh_fsca_int2sf_rtx;
10008 sh_fsca_int2sf (void)
10010 if (! sh_fsca_int2sf_rtx)
10012 REAL_VALUE_TYPE rv;
10014 real_from_string (&rv, "9.587379924285257e-5");
10015 sh_fsca_int2sf_rtx = const_double_from_real_value (rv, SFmode);
10018 return sh_fsca_int2sf_rtx;
10020 #include "gt-sh.h"