2006-01-30 Marcin Dalecki <martin@dalecki.de>
[official-gcc.git] / gcc / config / sh / sh.c
blob6dfe282f4d9eed497453c08a474e71178263bb49
1 /* Output routines for GCC for Renesas / SuperH SH.
2 Copyright (C) 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
3 2003, 2004, 2005, 2006 Free Software Foundation, Inc.
4 Contributed by Steve Chamberlain (sac@cygnus.com).
5 Improved by Jim Wilson (wilson@cygnus.com).
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 2, or (at your option)
12 any later version.
14 GCC is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING. If not, write to
21 the Free Software Foundation, 51 Franklin Street, Fifth Floor,
22 Boston, MA 02110-1301, USA. */
24 #include "config.h"
25 #include "system.h"
26 #include "coretypes.h"
27 #include "tm.h"
28 #include "insn-config.h"
29 #include "rtl.h"
30 #include "tree.h"
31 #include "flags.h"
32 #include "expr.h"
33 #include "optabs.h"
34 #include "function.h"
35 #include "regs.h"
36 #include "hard-reg-set.h"
37 #include "output.h"
38 #include "insn-attr.h"
39 #include "toplev.h"
40 #include "recog.h"
41 #include "c-pragma.h"
42 #include "integrate.h"
43 #include "dwarf2.h"
44 #include "tm_p.h"
45 #include "target.h"
46 #include "target-def.h"
47 #include "real.h"
48 #include "langhooks.h"
49 #include "basic-block.h"
50 #include "cfglayout.h"
51 #include "intl.h"
52 #include "sched-int.h"
53 #include "ggc.h"
54 #include "tree-gimple.h"
55 #include "cfgloop.h"
58 int code_for_indirect_jump_scratch = CODE_FOR_indirect_jump_scratch;
60 #define MSW (TARGET_LITTLE_ENDIAN ? 1 : 0)
61 #define LSW (TARGET_LITTLE_ENDIAN ? 0 : 1)
63 /* These are some macros to abstract register modes. */
64 #define CONST_OK_FOR_ADD(size) \
65 (TARGET_SHMEDIA ? CONST_OK_FOR_I10 (size) : CONST_OK_FOR_I08 (size))
66 #define GEN_MOV (*(TARGET_SHMEDIA64 ? gen_movdi : gen_movsi))
67 #define GEN_ADD3 (*(TARGET_SHMEDIA64 ? gen_adddi3 : gen_addsi3))
68 #define GEN_SUB3 (*(TARGET_SHMEDIA64 ? gen_subdi3 : gen_subsi3))
70 /* Set to 1 by expand_prologue() when the function is an interrupt handler. */
71 int current_function_interrupt;
73 tree sh_deferred_function_attributes;
74 tree *sh_deferred_function_attributes_tail = &sh_deferred_function_attributes;
76 /* Global variables for machine-dependent things. */
78 /* Which cpu are we scheduling for. */
79 enum processor_type sh_cpu;
81 /* Definitions used in ready queue reordering for first scheduling pass. */
83 /* Reg weights arrays for modes SFmode and SImode, indexed by insn LUID. */
84 static short *regmode_weight[2];
86 /* Total SFmode and SImode weights of scheduled insns. */
87 static int curr_regmode_pressure[2];
89 /* If true, skip cycles for Q -> R movement. */
90 static int skip_cycles = 0;
92 /* Cached value of can_issue_more. This is cached in sh_variable_issue hook
93 and returned from sh_reorder2. */
94 static short cached_can_issue_more;
96 /* Saved operands from the last compare to use when we generate an scc
97 or bcc insn. */
99 rtx sh_compare_op0;
100 rtx sh_compare_op1;
102 /* Provides the class number of the smallest class containing
103 reg number. */
105 enum reg_class regno_reg_class[FIRST_PSEUDO_REGISTER] =
107 R0_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
108 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
109 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
110 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
111 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
112 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
113 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
114 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
115 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
116 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
117 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
118 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
119 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
120 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
121 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
122 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
123 FP0_REGS,FP_REGS, FP_REGS, FP_REGS,
124 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
125 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
126 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
127 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
128 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
129 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
130 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
131 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
132 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
133 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
134 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
135 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
136 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
137 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
138 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
139 TARGET_REGS, TARGET_REGS, TARGET_REGS, TARGET_REGS,
140 TARGET_REGS, TARGET_REGS, TARGET_REGS, TARGET_REGS,
141 DF_REGS, DF_REGS, DF_REGS, DF_REGS,
142 DF_REGS, DF_REGS, DF_REGS, DF_REGS,
143 NO_REGS, GENERAL_REGS, PR_REGS, T_REGS,
144 MAC_REGS, MAC_REGS, FPUL_REGS, FPSCR_REGS,
145 GENERAL_REGS, GENERAL_REGS,
148 char sh_register_names[FIRST_PSEUDO_REGISTER] \
149 [MAX_REGISTER_NAME_LENGTH + 1] = SH_REGISTER_NAMES_INITIALIZER;
151 char sh_additional_register_names[ADDREGNAMES_SIZE] \
152 [MAX_ADDITIONAL_REGISTER_NAME_LENGTH + 1]
153 = SH_ADDITIONAL_REGISTER_NAMES_INITIALIZER;
155 /* Provide reg_class from a letter such as appears in the machine
156 description. *: target independently reserved letter.
157 reg_class_from_letter['e' - 'a'] is set to NO_REGS for TARGET_FMOVD. */
159 enum reg_class reg_class_from_letter[] =
161 /* a */ ALL_REGS, /* b */ TARGET_REGS, /* c */ FPSCR_REGS, /* d */ DF_REGS,
162 /* e */ FP_REGS, /* f */ FP_REGS, /* g **/ NO_REGS, /* h */ NO_REGS,
163 /* i **/ NO_REGS, /* j */ NO_REGS, /* k */ SIBCALL_REGS, /* l */ PR_REGS,
164 /* m **/ NO_REGS, /* n **/ NO_REGS, /* o **/ NO_REGS, /* p **/ NO_REGS,
165 /* q */ NO_REGS, /* r **/ NO_REGS, /* s **/ NO_REGS, /* t */ T_REGS,
166 /* u */ NO_REGS, /* v */ NO_REGS, /* w */ FP0_REGS, /* x */ MAC_REGS,
167 /* y */ FPUL_REGS, /* z */ R0_REGS
170 int assembler_dialect;
172 static bool shmedia_space_reserved_for_target_registers;
174 static bool sh_handle_option (size_t, const char *, int);
175 static void split_branches (rtx);
176 static int branch_dest (rtx);
177 static void force_into (rtx, rtx);
178 static void print_slot (rtx);
179 static rtx add_constant (rtx, enum machine_mode, rtx);
180 static void dump_table (rtx, rtx);
181 static int hi_const (rtx);
182 static int broken_move (rtx);
183 static int mova_p (rtx);
184 static rtx find_barrier (int, rtx, rtx);
185 static int noncall_uses_reg (rtx, rtx, rtx *);
186 static rtx gen_block_redirect (rtx, int, int);
187 static void sh_reorg (void);
188 static void output_stack_adjust (int, rtx, int, HARD_REG_SET *);
189 static rtx frame_insn (rtx);
190 static rtx push (int);
191 static void pop (int);
192 static void push_regs (HARD_REG_SET *, int);
193 static int calc_live_regs (HARD_REG_SET *);
194 static void mark_use (rtx, rtx *);
195 static HOST_WIDE_INT rounded_frame_size (int);
196 static rtx mark_constant_pool_use (rtx);
197 const struct attribute_spec sh_attribute_table[];
198 static tree sh_handle_interrupt_handler_attribute (tree *, tree, tree, int, bool *);
199 static tree sh_handle_sp_switch_attribute (tree *, tree, tree, int, bool *);
200 static tree sh_handle_trap_exit_attribute (tree *, tree, tree, int, bool *);
201 static tree sh_handle_renesas_attribute (tree *, tree, tree, int, bool *);
202 static void sh_output_function_epilogue (FILE *, HOST_WIDE_INT);
203 static void sh_insert_attributes (tree, tree *);
204 static const char *sh_check_pch_target_flags (int);
205 static int sh_adjust_cost (rtx, rtx, rtx, int);
206 static int sh_issue_rate (void);
207 static int sh_dfa_new_cycle (FILE *, int, rtx, int, int, int *sort_p);
208 static short find_set_regmode_weight (rtx, enum machine_mode);
209 static short find_insn_regmode_weight (rtx, enum machine_mode);
210 static void find_regmode_weight (int, enum machine_mode);
211 static void sh_md_init_global (FILE *, int, int);
212 static void sh_md_finish_global (FILE *, int);
213 static int rank_for_reorder (const void *, const void *);
214 static void swap_reorder (rtx *, int);
215 static void ready_reorder (rtx *, int);
216 static short high_pressure (enum machine_mode);
217 static int sh_reorder (FILE *, int, rtx *, int *, int);
218 static int sh_reorder2 (FILE *, int, rtx *, int *, int);
219 static void sh_md_init (FILE *, int, int);
220 static int sh_variable_issue (FILE *, int, rtx, int);
222 static bool sh_function_ok_for_sibcall (tree, tree);
224 static bool sh_cannot_modify_jumps_p (void);
225 static int sh_target_reg_class (void);
226 static bool sh_optimize_target_register_callee_saved (bool);
227 static bool sh_ms_bitfield_layout_p (tree);
229 static void sh_init_builtins (void);
230 static void sh_media_init_builtins (void);
231 static rtx sh_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
232 static void sh_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT, tree);
233 static void sh_file_start (void);
234 static int flow_dependent_p (rtx, rtx);
235 static void flow_dependent_p_1 (rtx, rtx, void *);
236 static int shiftcosts (rtx);
237 static int andcosts (rtx);
238 static int addsubcosts (rtx);
239 static int multcosts (rtx);
240 static bool unspec_caller_rtx_p (rtx);
241 static bool sh_cannot_copy_insn_p (rtx);
242 static bool sh_rtx_costs (rtx, int, int, int *);
243 static int sh_address_cost (rtx);
244 #ifdef TARGET_ADJUST_UNROLL_MAX
245 static int sh_adjust_unroll_max (struct loop *, int, int, int, int);
246 #endif
247 static int sh_pr_n_sets (void);
248 static rtx sh_allocate_initial_value (rtx);
249 static int shmedia_target_regs_stack_space (HARD_REG_SET *);
250 static int shmedia_reserve_space_for_target_registers_p (int, HARD_REG_SET *);
251 static int shmedia_target_regs_stack_adjust (HARD_REG_SET *);
252 static int scavenge_reg (HARD_REG_SET *s);
253 struct save_schedule_s;
254 static struct save_entry_s *sh5_schedule_saves (HARD_REG_SET *,
255 struct save_schedule_s *, int);
257 static rtx sh_struct_value_rtx (tree, int);
258 static bool sh_return_in_memory (tree, tree);
259 static rtx sh_builtin_saveregs (void);
260 static void sh_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode, tree, int *, int);
261 static bool sh_strict_argument_naming (CUMULATIVE_ARGS *);
262 static bool sh_pretend_outgoing_varargs_named (CUMULATIVE_ARGS *);
263 static tree sh_build_builtin_va_list (void);
264 static tree sh_gimplify_va_arg_expr (tree, tree, tree *, tree *);
265 static bool sh_pass_by_reference (CUMULATIVE_ARGS *, enum machine_mode,
266 tree, bool);
267 static bool sh_callee_copies (CUMULATIVE_ARGS *, enum machine_mode,
268 tree, bool);
269 static int sh_arg_partial_bytes (CUMULATIVE_ARGS *, enum machine_mode,
270 tree, bool);
271 static int sh_dwarf_calling_convention (tree);
272 static int hard_regs_intersect_p (HARD_REG_SET *, HARD_REG_SET *);
275 /* Initialize the GCC target structure. */
276 #undef TARGET_ATTRIBUTE_TABLE
277 #define TARGET_ATTRIBUTE_TABLE sh_attribute_table
279 /* The next two are used for debug info when compiling with -gdwarf. */
280 #undef TARGET_ASM_UNALIGNED_HI_OP
281 #define TARGET_ASM_UNALIGNED_HI_OP "\t.uaword\t"
282 #undef TARGET_ASM_UNALIGNED_SI_OP
283 #define TARGET_ASM_UNALIGNED_SI_OP "\t.ualong\t"
285 /* These are NULLed out on non-SH5 in OVERRIDE_OPTIONS. */
286 #undef TARGET_ASM_UNALIGNED_DI_OP
287 #define TARGET_ASM_UNALIGNED_DI_OP "\t.uaquad\t"
288 #undef TARGET_ASM_ALIGNED_DI_OP
289 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
291 #undef TARGET_ASM_FUNCTION_EPILOGUE
292 #define TARGET_ASM_FUNCTION_EPILOGUE sh_output_function_epilogue
294 #undef TARGET_ASM_OUTPUT_MI_THUNK
295 #define TARGET_ASM_OUTPUT_MI_THUNK sh_output_mi_thunk
297 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
298 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_tree_hwi_hwi_tree_true
300 #undef TARGET_ASM_FILE_START
301 #define TARGET_ASM_FILE_START sh_file_start
302 #undef TARGET_ASM_FILE_START_FILE_DIRECTIVE
303 #define TARGET_ASM_FILE_START_FILE_DIRECTIVE true
305 #undef TARGET_DEFAULT_TARGET_FLAGS
306 #define TARGET_DEFAULT_TARGET_FLAGS TARGET_DEFAULT
307 #undef TARGET_HANDLE_OPTION
308 #define TARGET_HANDLE_OPTION sh_handle_option
310 #undef TARGET_INSERT_ATTRIBUTES
311 #define TARGET_INSERT_ATTRIBUTES sh_insert_attributes
313 #undef TARGET_SCHED_ADJUST_COST
314 #define TARGET_SCHED_ADJUST_COST sh_adjust_cost
316 #undef TARGET_SCHED_ISSUE_RATE
317 #define TARGET_SCHED_ISSUE_RATE sh_issue_rate
319 /* The next 5 hooks have been implemented for reenabling sched1. With the
320 help of these macros we are limiting the movement of insns in sched1 to
321 reduce the register pressure. The overall idea is to keep count of SImode
322 and SFmode regs required by already scheduled insns. When these counts
323 cross some threshold values; give priority to insns that free registers.
324 The insn that frees registers is most likely to be the insn with lowest
325 LUID (original insn order); but such an insn might be there in the stalled
326 queue (Q) instead of the ready queue (R). To solve this, we skip cycles
327 upto a max of 8 cycles so that such insns may move from Q -> R.
329 The description of the hooks are as below:
331 TARGET_SCHED_INIT_GLOBAL: Added a new target hook in the generic
332 scheduler; it is called inside the sched_init function just after
333 find_insn_reg_weights function call. It is used to calculate the SImode
334 and SFmode weights of insns of basic blocks; much similar to what
335 find_insn_reg_weights does.
336 TARGET_SCHED_FINISH_GLOBAL: Corresponding cleanup hook.
338 TARGET_SCHED_DFA_NEW_CYCLE: Skip cycles if high register pressure is
339 indicated by TARGET_SCHED_REORDER2; doing this may move insns from
340 (Q)->(R).
342 TARGET_SCHED_REORDER: If the register pressure for SImode or SFmode is
343 high; reorder the ready queue so that the insn with lowest LUID will be
344 issued next.
346 TARGET_SCHED_REORDER2: If the register pressure is high, indicate to
347 TARGET_SCHED_DFA_NEW_CYCLE to skip cycles.
349 TARGET_SCHED_VARIABLE_ISSUE: Cache the value of can_issue_more so that it
350 can be returned from TARGET_SCHED_REORDER2.
352 TARGET_SCHED_INIT: Reset the register pressure counting variables. */
354 #undef TARGET_SCHED_DFA_NEW_CYCLE
355 #define TARGET_SCHED_DFA_NEW_CYCLE sh_dfa_new_cycle
357 #undef TARGET_SCHED_INIT_GLOBAL
358 #define TARGET_SCHED_INIT_GLOBAL sh_md_init_global
360 #undef TARGET_SCHED_FINISH_GLOBAL
361 #define TARGET_SCHED_FINISH_GLOBAL sh_md_finish_global
363 #undef TARGET_SCHED_VARIABLE_ISSUE
364 #define TARGET_SCHED_VARIABLE_ISSUE sh_variable_issue
366 #undef TARGET_SCHED_REORDER
367 #define TARGET_SCHED_REORDER sh_reorder
369 #undef TARGET_SCHED_REORDER2
370 #define TARGET_SCHED_REORDER2 sh_reorder2
372 #undef TARGET_SCHED_INIT
373 #define TARGET_SCHED_INIT sh_md_init
375 #undef TARGET_CANNOT_MODIFY_JUMPS_P
376 #define TARGET_CANNOT_MODIFY_JUMPS_P sh_cannot_modify_jumps_p
377 #undef TARGET_BRANCH_TARGET_REGISTER_CLASS
378 #define TARGET_BRANCH_TARGET_REGISTER_CLASS sh_target_reg_class
379 #undef TARGET_BRANCH_TARGET_REGISTER_CALLEE_SAVED
380 #define TARGET_BRANCH_TARGET_REGISTER_CALLEE_SAVED \
381 sh_optimize_target_register_callee_saved
383 #undef TARGET_MS_BITFIELD_LAYOUT_P
384 #define TARGET_MS_BITFIELD_LAYOUT_P sh_ms_bitfield_layout_p
386 #undef TARGET_INIT_BUILTINS
387 #define TARGET_INIT_BUILTINS sh_init_builtins
388 #undef TARGET_EXPAND_BUILTIN
389 #define TARGET_EXPAND_BUILTIN sh_expand_builtin
391 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
392 #define TARGET_FUNCTION_OK_FOR_SIBCALL sh_function_ok_for_sibcall
394 #undef TARGET_CANNOT_COPY_INSN_P
395 #define TARGET_CANNOT_COPY_INSN_P sh_cannot_copy_insn_p
396 #undef TARGET_RTX_COSTS
397 #define TARGET_RTX_COSTS sh_rtx_costs
398 #undef TARGET_ADDRESS_COST
399 #define TARGET_ADDRESS_COST sh_address_cost
400 #undef TARGET_ALLOCATE_INITIAL_VALUE
401 #define TARGET_ALLOCATE_INITIAL_VALUE sh_allocate_initial_value
403 #undef TARGET_MACHINE_DEPENDENT_REORG
404 #define TARGET_MACHINE_DEPENDENT_REORG sh_reorg
406 #ifdef HAVE_AS_TLS
407 #undef TARGET_HAVE_TLS
408 #define TARGET_HAVE_TLS true
409 #endif
411 #undef TARGET_PROMOTE_PROTOTYPES
412 #define TARGET_PROMOTE_PROTOTYPES sh_promote_prototypes
413 #undef TARGET_PROMOTE_FUNCTION_ARGS
414 #define TARGET_PROMOTE_FUNCTION_ARGS sh_promote_prototypes
415 #undef TARGET_PROMOTE_FUNCTION_RETURN
416 #define TARGET_PROMOTE_FUNCTION_RETURN sh_promote_prototypes
418 #undef TARGET_STRUCT_VALUE_RTX
419 #define TARGET_STRUCT_VALUE_RTX sh_struct_value_rtx
420 #undef TARGET_RETURN_IN_MEMORY
421 #define TARGET_RETURN_IN_MEMORY sh_return_in_memory
423 #undef TARGET_EXPAND_BUILTIN_SAVEREGS
424 #define TARGET_EXPAND_BUILTIN_SAVEREGS sh_builtin_saveregs
425 #undef TARGET_SETUP_INCOMING_VARARGS
426 #define TARGET_SETUP_INCOMING_VARARGS sh_setup_incoming_varargs
427 #undef TARGET_STRICT_ARGUMENT_NAMING
428 #define TARGET_STRICT_ARGUMENT_NAMING sh_strict_argument_naming
429 #undef TARGET_PRETEND_OUTGOING_VARARGS_NAMED
430 #define TARGET_PRETEND_OUTGOING_VARARGS_NAMED sh_pretend_outgoing_varargs_named
431 #undef TARGET_MUST_PASS_IN_STACK
432 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
433 #undef TARGET_PASS_BY_REFERENCE
434 #define TARGET_PASS_BY_REFERENCE sh_pass_by_reference
435 #undef TARGET_CALLEE_COPIES
436 #define TARGET_CALLEE_COPIES sh_callee_copies
437 #undef TARGET_ARG_PARTIAL_BYTES
438 #define TARGET_ARG_PARTIAL_BYTES sh_arg_partial_bytes
440 #undef TARGET_BUILD_BUILTIN_VA_LIST
441 #define TARGET_BUILD_BUILTIN_VA_LIST sh_build_builtin_va_list
442 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
443 #define TARGET_GIMPLIFY_VA_ARG_EXPR sh_gimplify_va_arg_expr
445 #undef TARGET_VECTOR_MODE_SUPPORTED_P
446 #define TARGET_VECTOR_MODE_SUPPORTED_P sh_vector_mode_supported_p
448 #undef TARGET_CHECK_PCH_TARGET_FLAGS
449 #define TARGET_CHECK_PCH_TARGET_FLAGS sh_check_pch_target_flags
451 #undef TARGET_DWARF_CALLING_CONVENTION
452 #define TARGET_DWARF_CALLING_CONVENTION sh_dwarf_calling_convention
454 /* Return regmode weight for insn. */
455 #define INSN_REGMODE_WEIGHT(INSN, MODE) regmode_weight[((MODE) == SImode) ? 0 : 1][INSN_UID (INSN)]
457 /* Return current register pressure for regmode. */
458 #define CURR_REGMODE_PRESSURE(MODE) curr_regmode_pressure[((MODE) == SImode) ? 0 : 1]
460 #ifdef SYMBIAN
462 #undef TARGET_ENCODE_SECTION_INFO
463 #define TARGET_ENCODE_SECTION_INFO sh_symbian_encode_section_info
464 #undef TARGET_STRIP_NAME_ENCODING
465 #define TARGET_STRIP_NAME_ENCODING sh_symbian_strip_name_encoding
466 #undef TARGET_CXX_IMPORT_EXPORT_CLASS
467 #define TARGET_CXX_IMPORT_EXPORT_CLASS symbian_import_export_class
469 #endif /* SYMBIAN */
471 #ifdef TARGET_ADJUST_UNROLL_MAX
472 #undef TARGET_ADJUST_UNROLL_MAX
473 #define TARGET_ADJUST_UNROLL_MAX sh_adjust_unroll_max
474 #endif
476 #undef TARGET_SECONDARY_RELOAD
477 #define TARGET_SECONDARY_RELOAD sh_secondary_reload
479 struct gcc_target targetm = TARGET_INITIALIZER;
481 /* Implement TARGET_HANDLE_OPTION. */
483 static bool
484 sh_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED,
485 int value ATTRIBUTE_UNUSED)
487 switch (code)
489 case OPT_m1:
490 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH1;
491 return true;
493 case OPT_m2:
494 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2;
495 return true;
497 case OPT_m2a:
498 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2A;
499 return true;
501 case OPT_m2a_nofpu:
502 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2A_NOFPU;
503 return true;
505 case OPT_m2a_single:
506 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2A_SINGLE;
507 return true;
509 case OPT_m2a_single_only:
510 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2A_SINGLE_ONLY;
511 return true;
513 case OPT_m2e:
514 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2E;
515 return true;
517 case OPT_m3:
518 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH3;
519 return true;
521 case OPT_m3e:
522 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH3E;
523 return true;
525 case OPT_m4:
526 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4;
527 return true;
529 case OPT_m4_nofpu:
530 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4_NOFPU;
531 return true;
533 case OPT_m4_single:
534 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4_SINGLE;
535 return true;
537 case OPT_m4_single_only:
538 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4_SINGLE_ONLY;
539 return true;
541 case OPT_m4a:
542 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4A;
543 return true;
545 case OPT_m4a_nofpu:
546 case OPT_m4al:
547 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4A_NOFPU;
548 return true;
550 case OPT_m4a_single:
551 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4A_SINGLE;
552 return true;
554 case OPT_m4a_single_only:
555 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4A_SINGLE_ONLY;
556 return true;
558 case OPT_m5_32media:
559 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_32MEDIA;
560 return true;
562 case OPT_m5_32media_nofpu:
563 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_32MEDIA_NOFPU;
564 return true;
566 case OPT_m5_64media:
567 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_64MEDIA;
568 return true;
570 case OPT_m5_64media_nofpu:
571 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_64MEDIA_NOFPU;
572 return true;
574 case OPT_m5_compact:
575 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_COMPACT;
576 return true;
578 case OPT_m5_compact_nofpu:
579 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_COMPACT_NOFPU;
580 return true;
582 default:
583 return true;
587 /* Print the operand address in x to the stream. */
589 void
590 print_operand_address (FILE *stream, rtx x)
592 switch (GET_CODE (x))
594 case REG:
595 case SUBREG:
596 fprintf (stream, "@%s", reg_names[true_regnum (x)]);
597 break;
599 case PLUS:
601 rtx base = XEXP (x, 0);
602 rtx index = XEXP (x, 1);
604 switch (GET_CODE (index))
606 case CONST_INT:
607 fprintf (stream, "@(%d,%s)", (int) INTVAL (index),
608 reg_names[true_regnum (base)]);
609 break;
611 case REG:
612 case SUBREG:
614 int base_num = true_regnum (base);
615 int index_num = true_regnum (index);
617 fprintf (stream, "@(r0,%s)",
618 reg_names[MAX (base_num, index_num)]);
619 break;
622 default:
623 gcc_unreachable ();
626 break;
628 case PRE_DEC:
629 fprintf (stream, "@-%s", reg_names[true_regnum (XEXP (x, 0))]);
630 break;
632 case POST_INC:
633 fprintf (stream, "@%s+", reg_names[true_regnum (XEXP (x, 0))]);
634 break;
636 default:
637 x = mark_constant_pool_use (x);
638 output_addr_const (stream, x);
639 break;
643 /* Print operand x (an rtx) in assembler syntax to file stream
644 according to modifier code.
646 '.' print a .s if insn needs delay slot
647 ',' print LOCAL_LABEL_PREFIX
648 '@' print trap, rte or rts depending upon pragma interruptness
649 '#' output a nop if there is nothing to put in the delay slot
650 ''' print likelihood suffix (/u for unlikely).
651 '>' print branch target if -fverbose-asm
652 'O' print a constant without the #
653 'R' print the LSW of a dp value - changes if in little endian
654 'S' print the MSW of a dp value - changes if in little endian
655 'T' print the next word of a dp value - same as 'R' in big endian mode.
656 'M' print an `x' if `m' will print `base,index'.
657 'N' print 'r63' if the operand is (const_int 0).
658 'd' print a V2SF reg as dN instead of fpN.
659 'm' print a pair `base,offset' or `base,index', for LD and ST.
660 'U' Likewise for {LD,ST}{HI,LO}.
661 'u' prints the lowest 16 bits of CONST_INT, as an unsigned value.
662 'o' output an operator. */
664 void
665 print_operand (FILE *stream, rtx x, int code)
667 int regno;
668 enum machine_mode mode;
670 switch (code)
672 tree trapa_attr;
674 case '.':
675 if (final_sequence
676 && ! INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))
677 && get_attr_length (XVECEXP (final_sequence, 0, 1)))
678 fprintf (stream, ASSEMBLER_DIALECT ? "/s" : ".s");
679 break;
680 case ',':
681 fprintf (stream, "%s", LOCAL_LABEL_PREFIX);
682 break;
683 case '@':
684 trapa_attr = lookup_attribute ("trap_exit",
685 DECL_ATTRIBUTES (current_function_decl));
686 if (trapa_attr)
687 fprintf (stream, "trapa #%ld",
688 (long) TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (trapa_attr))));
689 else if (sh_cfun_interrupt_handler_p ())
690 fprintf (stream, "rte");
691 else
692 fprintf (stream, "rts");
693 break;
694 case '#':
695 /* Output a nop if there's nothing in the delay slot. */
696 if (dbr_sequence_length () == 0)
697 fprintf (stream, "\n\tnop");
698 break;
699 case '\'':
701 rtx note = find_reg_note (current_output_insn, REG_BR_PROB, 0);
703 if (note && INTVAL (XEXP (note, 0)) * 2 < REG_BR_PROB_BASE)
704 fputs ("/u", stream);
705 break;
707 case '>':
708 if (flag_verbose_asm && JUMP_LABEL (current_output_insn))
710 fputs ("\t! target: ", stream);
711 output_addr_const (stream, JUMP_LABEL (current_output_insn));
713 break;
714 case 'O':
715 x = mark_constant_pool_use (x);
716 output_addr_const (stream, x);
717 break;
718 /* N.B.: %R / %S / %T adjust memory addresses by four.
719 For SHMEDIA, that means they can be used to access the first and
720 second 32 bit part of a 64 bit (or larger) value that
721 might be held in floating point registers or memory.
722 While they can be used to access 64 bit parts of a larger value
723 held in general purpose registers, that won't work with memory -
724 neither for fp registers, since the frxx names are used. */
725 case 'R':
726 if (REG_P (x) || GET_CODE (x) == SUBREG)
728 regno = true_regnum (x);
729 regno += FP_REGISTER_P (regno) ? 1 : LSW;
730 fputs (reg_names[regno], (stream));
732 else if (MEM_P (x))
734 x = adjust_address (x, SImode, 4 * LSW);
735 print_operand_address (stream, XEXP (x, 0));
737 else
739 rtx sub = NULL_RTX;
741 mode = GET_MODE (x);
742 if (mode == VOIDmode)
743 mode = DImode;
744 if (GET_MODE_SIZE (mode) >= 8)
745 sub = simplify_subreg (SImode, x, mode, 4 * LSW);
746 if (sub)
747 print_operand (stream, sub, 0);
748 else
749 output_operand_lossage ("invalid operand to %%R");
751 break;
752 case 'S':
753 if (REG_P (x) || GET_CODE (x) == SUBREG)
755 regno = true_regnum (x);
756 regno += FP_REGISTER_P (regno) ? 0 : MSW;
757 fputs (reg_names[regno], (stream));
759 else if (MEM_P (x))
761 x = adjust_address (x, SImode, 4 * MSW);
762 print_operand_address (stream, XEXP (x, 0));
764 else
766 rtx sub = NULL_RTX;
768 mode = GET_MODE (x);
769 if (mode == VOIDmode)
770 mode = DImode;
771 if (GET_MODE_SIZE (mode) >= 8)
772 sub = simplify_subreg (SImode, x, mode, 4 * MSW);
773 if (sub)
774 print_operand (stream, sub, 0);
775 else
776 output_operand_lossage ("invalid operand to %%S");
778 break;
779 case 'T':
780 /* Next word of a double. */
781 switch (GET_CODE (x))
783 case REG:
784 fputs (reg_names[REGNO (x) + 1], (stream));
785 break;
786 case MEM:
787 if (GET_CODE (XEXP (x, 0)) != PRE_DEC
788 && GET_CODE (XEXP (x, 0)) != POST_INC)
789 x = adjust_address (x, SImode, 4);
790 print_operand_address (stream, XEXP (x, 0));
791 break;
792 default:
793 break;
795 break;
796 case 'o':
797 switch (GET_CODE (x))
799 case PLUS: fputs ("add", stream); break;
800 case MINUS: fputs ("sub", stream); break;
801 case MULT: fputs ("mul", stream); break;
802 case DIV: fputs ("div", stream); break;
803 case EQ: fputs ("eq", stream); break;
804 case NE: fputs ("ne", stream); break;
805 case GT: case LT: fputs ("gt", stream); break;
806 case GE: case LE: fputs ("ge", stream); break;
807 case GTU: case LTU: fputs ("gtu", stream); break;
808 case GEU: case LEU: fputs ("geu", stream); break;
809 default:
810 break;
812 break;
813 case 'M':
814 if (GET_CODE (x) == MEM
815 && GET_CODE (XEXP (x, 0)) == PLUS
816 && (GET_CODE (XEXP (XEXP (x, 0), 1)) == REG
817 || GET_CODE (XEXP (XEXP (x, 0), 1)) == SUBREG))
818 fputc ('x', stream);
819 break;
821 case 'm':
822 gcc_assert (GET_CODE (x) == MEM);
823 x = XEXP (x, 0);
824 /* Fall through. */
825 case 'U':
826 switch (GET_CODE (x))
828 case REG:
829 case SUBREG:
830 print_operand (stream, x, 0);
831 fputs (", 0", stream);
832 break;
834 case PLUS:
835 print_operand (stream, XEXP (x, 0), 0);
836 fputs (", ", stream);
837 print_operand (stream, XEXP (x, 1), 0);
838 break;
840 default:
841 gcc_unreachable ();
843 break;
845 case 'd':
846 gcc_assert (GET_CODE (x) == REG && GET_MODE (x) == V2SFmode);
848 fprintf ((stream), "d%s", reg_names[REGNO (x)] + 1);
849 break;
851 case 'N':
852 if (x == CONST0_RTX (GET_MODE (x)))
854 fprintf ((stream), "r63");
855 break;
857 goto default_output;
858 case 'u':
859 if (GET_CODE (x) == CONST_INT)
861 fprintf ((stream), "%u", (unsigned) INTVAL (x) & (0x10000 - 1));
862 break;
864 /* Fall through. */
866 default_output:
867 default:
868 regno = 0;
869 mode = GET_MODE (x);
871 switch (GET_CODE (x))
873 case TRUNCATE:
875 rtx inner = XEXP (x, 0);
876 int offset = 0;
877 enum machine_mode inner_mode;
879 /* We might see SUBREGs with vector mode registers inside. */
880 if (GET_CODE (inner) == SUBREG
881 && (GET_MODE_SIZE (GET_MODE (inner))
882 == GET_MODE_SIZE (GET_MODE (SUBREG_REG (inner))))
883 && subreg_lowpart_p (inner))
884 inner = SUBREG_REG (inner);
885 if (GET_CODE (inner) == CONST_INT)
887 x = GEN_INT (trunc_int_for_mode (INTVAL (inner), GET_MODE (x)));
888 goto default_output;
890 inner_mode = GET_MODE (inner);
891 if (GET_CODE (inner) == SUBREG
892 && (GET_MODE_SIZE (GET_MODE (inner))
893 < GET_MODE_SIZE (GET_MODE (SUBREG_REG (inner))))
894 && GET_CODE (SUBREG_REG (inner)) == REG)
896 offset = subreg_regno_offset (REGNO (SUBREG_REG (inner)),
897 GET_MODE (SUBREG_REG (inner)),
898 SUBREG_BYTE (inner),
899 GET_MODE (inner));
900 inner = SUBREG_REG (inner);
902 if (GET_CODE (inner) != REG || GET_MODE_SIZE (inner_mode) > 8)
903 abort ();
904 /* Floating point register pairs are always big endian;
905 general purpose registers are 64 bit wide. */
906 regno = REGNO (inner);
907 regno = (HARD_REGNO_NREGS (regno, inner_mode)
908 - HARD_REGNO_NREGS (regno, mode))
909 + offset;
910 x = inner;
911 goto reg;
913 case SIGN_EXTEND:
914 x = XEXP (x, 0);
915 goto reg;
916 /* FIXME: We need this on SHmedia32 because reload generates
917 some sign-extended HI or QI loads into DImode registers
918 but, because Pmode is SImode, the address ends up with a
919 subreg:SI of the DImode register. Maybe reload should be
920 fixed so as to apply alter_subreg to such loads? */
921 case IF_THEN_ELSE:
922 gcc_assert (trapping_target_operand (x, VOIDmode));
923 x = XEXP (XEXP (x, 2), 0);
924 goto default_output;
925 case SUBREG:
926 gcc_assert (SUBREG_BYTE (x) == 0
927 && GET_CODE (SUBREG_REG (x)) == REG);
929 x = SUBREG_REG (x);
930 /* Fall through. */
932 reg:
933 case REG:
934 regno += REGNO (x);
935 if (FP_REGISTER_P (regno)
936 && mode == V16SFmode)
937 fprintf ((stream), "mtrx%s", reg_names[regno] + 2);
938 else if (FP_REGISTER_P (REGNO (x))
939 && mode == V4SFmode)
940 fprintf ((stream), "fv%s", reg_names[regno] + 2);
941 else if (GET_CODE (x) == REG
942 && mode == V2SFmode)
943 fprintf ((stream), "fp%s", reg_names[regno] + 2);
944 else if (FP_REGISTER_P (REGNO (x))
945 && GET_MODE_SIZE (mode) > 4)
946 fprintf ((stream), "d%s", reg_names[regno] + 1);
947 else
948 fputs (reg_names[regno], (stream));
949 break;
951 case MEM:
952 output_address (XEXP (x, 0));
953 break;
955 case CONST:
956 if (TARGET_SHMEDIA
957 && (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
958 || GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
959 && (GET_MODE (XEXP (x, 0)) == DImode
960 || GET_MODE (XEXP (x, 0)) == SImode)
961 && GET_CODE (XEXP (XEXP (x, 0), 0)) == TRUNCATE
962 && GET_MODE (XEXP (XEXP (x, 0), 0)) == HImode)
964 rtx val = XEXP (XEXP (XEXP (x, 0), 0), 0);
965 rtx val2 = val;
966 bool nested_expr = false;
968 fputc ('(', stream);
969 if (GET_CODE (val) == ASHIFTRT)
971 fputc ('(', stream);
972 val2 = XEXP (val, 0);
974 if (GET_CODE (val2) == CONST
975 || GET_RTX_CLASS (GET_CODE (val2)) != RTX_OBJ)
977 fputc ('(', stream);
978 nested_expr = true;
980 output_addr_const (stream, val2);
981 if (nested_expr)
982 fputc (')', stream);
983 if (GET_CODE (val) == ASHIFTRT)
985 fputs (" >> ", stream);
986 output_addr_const (stream, XEXP (val, 1));
987 fputc (')', stream);
989 fputs (" & 65535)", stream);
990 break;
993 /* Fall through. */
994 default:
995 if (TARGET_SH1)
996 fputc ('#', stream);
997 output_addr_const (stream, x);
998 break;
1000 break;
1004 /* Like force_operand, but guarantees that VALUE ends up in TARGET. */
1005 static void
1006 force_into (rtx value, rtx target)
1008 value = force_operand (value, target);
1009 if (! rtx_equal_p (value, target))
1010 emit_insn (gen_move_insn (target, value));
1013 /* Emit code to perform a block move. Choose the best method.
1015 OPERANDS[0] is the destination.
1016 OPERANDS[1] is the source.
1017 OPERANDS[2] is the size.
1018 OPERANDS[3] is the alignment safe to use. */
1021 expand_block_move (rtx *operands)
1023 int align = INTVAL (operands[3]);
1024 int constp = (GET_CODE (operands[2]) == CONST_INT);
1025 int bytes = (constp ? INTVAL (operands[2]) : 0);
1027 if (! constp)
1028 return 0;
1030 /* If we could use mov.l to move words and dest is word-aligned, we
1031 can use movua.l for loads and still generate a relatively short
1032 and efficient sequence. */
1033 if (TARGET_SH4A_ARCH && align < 4
1034 && MEM_ALIGN (operands[0]) >= 32
1035 && can_move_by_pieces (bytes, 32))
1037 rtx dest = copy_rtx (operands[0]);
1038 rtx src = copy_rtx (operands[1]);
1039 /* We could use different pseudos for each copied word, but
1040 since movua can only load into r0, it's kind of
1041 pointless. */
1042 rtx temp = gen_reg_rtx (SImode);
1043 rtx src_addr = copy_addr_to_reg (XEXP (src, 0));
1044 int copied = 0;
1046 while (copied + 4 <= bytes)
1048 rtx to = adjust_address (dest, SImode, copied);
1049 rtx from = adjust_automodify_address (src, SImode, src_addr, copied);
1051 emit_insn (gen_movua (temp, from));
1052 emit_move_insn (src_addr, plus_constant (src_addr, 4));
1053 emit_move_insn (to, temp);
1054 copied += 4;
1057 if (copied < bytes)
1058 move_by_pieces (adjust_address (dest, BLKmode, copied),
1059 adjust_automodify_address (src, BLKmode,
1060 src_addr, copied),
1061 bytes - copied, align, 0);
1063 return 1;
1066 /* If it isn't a constant number of bytes, or if it doesn't have 4 byte
1067 alignment, or if it isn't a multiple of 4 bytes, then fail. */
1068 if (align < 4 || (bytes % 4 != 0))
1069 return 0;
1071 if (TARGET_HARD_SH4)
1073 if (bytes < 12)
1074 return 0;
1075 else if (bytes == 12)
1077 rtx func_addr_rtx = gen_reg_rtx (Pmode);
1078 rtx r4 = gen_rtx_REG (SImode, 4);
1079 rtx r5 = gen_rtx_REG (SImode, 5);
1081 function_symbol (func_addr_rtx, "__movmemSI12_i4", SFUNC_STATIC);
1082 force_into (XEXP (operands[0], 0), r4);
1083 force_into (XEXP (operands[1], 0), r5);
1084 emit_insn (gen_block_move_real_i4 (func_addr_rtx));
1085 return 1;
1087 else if (! TARGET_SMALLCODE)
1089 const char *entry_name;
1090 rtx func_addr_rtx = gen_reg_rtx (Pmode);
1091 int dwords;
1092 rtx r4 = gen_rtx_REG (SImode, 4);
1093 rtx r5 = gen_rtx_REG (SImode, 5);
1094 rtx r6 = gen_rtx_REG (SImode, 6);
1096 entry_name = (bytes & 4 ? "__movmem_i4_odd" : "__movmem_i4_even");
1097 function_symbol (func_addr_rtx, entry_name, SFUNC_STATIC);
1098 force_into (XEXP (operands[0], 0), r4);
1099 force_into (XEXP (operands[1], 0), r5);
1101 dwords = bytes >> 3;
1102 emit_insn (gen_move_insn (r6, GEN_INT (dwords - 1)));
1103 emit_insn (gen_block_lump_real_i4 (func_addr_rtx));
1104 return 1;
1106 else
1107 return 0;
1109 if (bytes < 64)
1111 char entry[30];
1112 rtx func_addr_rtx = gen_reg_rtx (Pmode);
1113 rtx r4 = gen_rtx_REG (SImode, 4);
1114 rtx r5 = gen_rtx_REG (SImode, 5);
1116 sprintf (entry, "__movmemSI%d", bytes);
1117 function_symbol (func_addr_rtx, entry, SFUNC_STATIC);
1118 force_into (XEXP (operands[0], 0), r4);
1119 force_into (XEXP (operands[1], 0), r5);
1120 emit_insn (gen_block_move_real (func_addr_rtx));
1121 return 1;
1124 /* This is the same number of bytes as a memcpy call, but to a different
1125 less common function name, so this will occasionally use more space. */
1126 if (! TARGET_SMALLCODE)
1128 rtx func_addr_rtx = gen_reg_rtx (Pmode);
1129 int final_switch, while_loop;
1130 rtx r4 = gen_rtx_REG (SImode, 4);
1131 rtx r5 = gen_rtx_REG (SImode, 5);
1132 rtx r6 = gen_rtx_REG (SImode, 6);
1134 function_symbol (func_addr_rtx, "__movmem", SFUNC_STATIC);
1135 force_into (XEXP (operands[0], 0), r4);
1136 force_into (XEXP (operands[1], 0), r5);
1138 /* r6 controls the size of the move. 16 is decremented from it
1139 for each 64 bytes moved. Then the negative bit left over is used
1140 as an index into a list of move instructions. e.g., a 72 byte move
1141 would be set up with size(r6) = 14, for one iteration through the
1142 big while loop, and a switch of -2 for the last part. */
1144 final_switch = 16 - ((bytes / 4) % 16);
1145 while_loop = ((bytes / 4) / 16 - 1) * 16;
1146 emit_insn (gen_move_insn (r6, GEN_INT (while_loop + final_switch)));
1147 emit_insn (gen_block_lump_real (func_addr_rtx));
1148 return 1;
1151 return 0;
1154 /* Prepare operands for a move define_expand; specifically, one of the
1155 operands must be in a register. */
1158 prepare_move_operands (rtx operands[], enum machine_mode mode)
1160 if ((mode == SImode || mode == DImode)
1161 && flag_pic
1162 && ! ((mode == Pmode || mode == ptr_mode)
1163 && tls_symbolic_operand (operands[1], Pmode) != 0))
1165 rtx temp;
1166 if (SYMBOLIC_CONST_P (operands[1]))
1168 if (GET_CODE (operands[0]) == MEM)
1169 operands[1] = force_reg (Pmode, operands[1]);
1170 else if (TARGET_SHMEDIA
1171 && GET_CODE (operands[1]) == LABEL_REF
1172 && target_reg_operand (operands[0], mode))
1173 /* It's ok. */;
1174 else
1176 temp = no_new_pseudos ? operands[0] : gen_reg_rtx (Pmode);
1177 operands[1] = legitimize_pic_address (operands[1], mode, temp);
1180 else if (GET_CODE (operands[1]) == CONST
1181 && GET_CODE (XEXP (operands[1], 0)) == PLUS
1182 && SYMBOLIC_CONST_P (XEXP (XEXP (operands[1], 0), 0)))
1184 temp = no_new_pseudos ? operands[0] : gen_reg_rtx (Pmode);
1185 temp = legitimize_pic_address (XEXP (XEXP (operands[1], 0), 0),
1186 mode, temp);
1187 operands[1] = expand_binop (mode, add_optab, temp,
1188 XEXP (XEXP (operands[1], 0), 1),
1189 no_new_pseudos ? temp
1190 : gen_reg_rtx (Pmode),
1191 0, OPTAB_LIB_WIDEN);
1195 if (! reload_in_progress && ! reload_completed)
1197 /* Copy the source to a register if both operands aren't registers. */
1198 if (! register_operand (operands[0], mode)
1199 && ! sh_register_operand (operands[1], mode))
1200 operands[1] = copy_to_mode_reg (mode, operands[1]);
1202 if (GET_CODE (operands[0]) == MEM && ! memory_operand (operands[0], mode))
1204 /* This is like change_address_1 (operands[0], mode, 0, 1) ,
1205 except that we can't use that function because it is static. */
1206 rtx new = change_address (operands[0], mode, 0);
1207 MEM_COPY_ATTRIBUTES (new, operands[0]);
1208 operands[0] = new;
1211 /* This case can happen while generating code to move the result
1212 of a library call to the target. Reject `st r0,@(rX,rY)' because
1213 reload will fail to find a spill register for rX, since r0 is already
1214 being used for the source. */
1215 else if (TARGET_SH1
1216 && refers_to_regno_p (R0_REG, R0_REG + 1, operands[1], (rtx *)0)
1217 && GET_CODE (operands[0]) == MEM
1218 && GET_CODE (XEXP (operands[0], 0)) == PLUS
1219 && GET_CODE (XEXP (XEXP (operands[0], 0), 1)) == REG)
1220 operands[1] = copy_to_mode_reg (mode, operands[1]);
1223 if (mode == Pmode || mode == ptr_mode)
1225 rtx op0, op1, opc;
1226 enum tls_model tls_kind;
1228 op0 = operands[0];
1229 op1 = operands[1];
1230 if (GET_CODE (op1) == CONST
1231 && GET_CODE (XEXP (op1, 0)) == PLUS
1232 && tls_symbolic_operand (XEXP (XEXP (op1, 0), 0), Pmode))
1234 opc = XEXP (XEXP (op1, 0), 1);
1235 op1 = XEXP (XEXP (op1, 0), 0);
1237 else
1238 opc = NULL_RTX;
1240 if ((tls_kind = tls_symbolic_operand (op1, Pmode)))
1242 rtx tga_op1, tga_ret, tmp, tmp2;
1244 switch (tls_kind)
1246 case TLS_MODEL_GLOBAL_DYNAMIC:
1247 tga_ret = gen_rtx_REG (Pmode, R0_REG);
1248 emit_call_insn (gen_tls_global_dynamic (tga_ret, op1));
1249 op1 = tga_ret;
1250 break;
1252 case TLS_MODEL_LOCAL_DYNAMIC:
1253 tga_ret = gen_rtx_REG (Pmode, R0_REG);
1254 emit_call_insn (gen_tls_local_dynamic (tga_ret, op1));
1256 tmp = gen_reg_rtx (Pmode);
1257 emit_move_insn (tmp, tga_ret);
1259 if (register_operand (op0, Pmode))
1260 tmp2 = op0;
1261 else
1262 tmp2 = gen_reg_rtx (Pmode);
1264 emit_insn (gen_symDTPOFF2reg (tmp2, op1, tmp));
1265 op1 = tmp2;
1266 break;
1268 case TLS_MODEL_INITIAL_EXEC:
1269 if (! flag_pic)
1271 /* Don't schedule insns for getting GOT address when
1272 the first scheduling is enabled, to avoid spill
1273 failures for R0. */
1274 if (flag_schedule_insns)
1275 emit_insn (gen_blockage ());
1276 emit_insn (gen_GOTaddr2picreg ());
1277 emit_insn (gen_rtx_USE (VOIDmode, gen_rtx_REG (SImode,
1278 PIC_REG)));
1279 if (flag_schedule_insns)
1280 emit_insn (gen_blockage ());
1282 tga_op1 = no_new_pseudos ? op0 : gen_reg_rtx (Pmode);
1283 tmp = gen_sym2GOTTPOFF (op1);
1284 emit_insn (gen_tls_initial_exec (tga_op1, tmp));
1285 op1 = tga_op1;
1286 break;
1288 case TLS_MODEL_LOCAL_EXEC:
1289 tmp2 = gen_reg_rtx (Pmode);
1290 emit_insn (gen_load_gbr (tmp2));
1291 tmp = gen_reg_rtx (Pmode);
1292 emit_insn (gen_symTPOFF2reg (tmp, op1));
1294 if (register_operand (op0, Pmode))
1295 op1 = op0;
1296 else
1297 op1 = gen_reg_rtx (Pmode);
1299 emit_insn (gen_addsi3 (op1, tmp, tmp2));
1300 break;
1302 default:
1303 gcc_unreachable ();
1305 if (opc)
1306 emit_insn (gen_addsi3 (op1, op1, force_reg (SImode, opc)));
1307 operands[1] = op1;
1311 return 0;
1314 /* Prepare the operands for an scc instruction; make sure that the
1315 compare has been done. */
1317 prepare_scc_operands (enum rtx_code code)
1319 rtx t_reg = gen_rtx_REG (SImode, T_REG);
1320 enum rtx_code oldcode = code;
1321 enum machine_mode mode;
1323 /* First need a compare insn. */
1324 switch (code)
1326 case NE:
1327 /* It isn't possible to handle this case. */
1328 gcc_unreachable ();
1329 case LT:
1330 code = GT;
1331 break;
1332 case LE:
1333 code = GE;
1334 break;
1335 case LTU:
1336 code = GTU;
1337 break;
1338 case LEU:
1339 code = GEU;
1340 break;
1341 default:
1342 break;
1344 if (code != oldcode)
1346 rtx tmp = sh_compare_op0;
1347 sh_compare_op0 = sh_compare_op1;
1348 sh_compare_op1 = tmp;
1351 mode = GET_MODE (sh_compare_op0);
1352 if (mode == VOIDmode)
1353 mode = GET_MODE (sh_compare_op1);
1355 sh_compare_op0 = force_reg (mode, sh_compare_op0);
1356 if ((code != EQ && code != NE
1357 && (sh_compare_op1 != const0_rtx
1358 || code == GTU || code == GEU || code == LTU || code == LEU))
1359 || (mode == DImode && sh_compare_op1 != const0_rtx)
1360 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
1361 sh_compare_op1 = force_reg (mode, sh_compare_op1);
1363 if ((TARGET_SH4 || TARGET_SH2A) && GET_MODE_CLASS (mode) == MODE_FLOAT)
1364 (mode == SFmode ? emit_sf_insn : emit_df_insn)
1365 (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2,
1366 gen_rtx_SET (VOIDmode, t_reg,
1367 gen_rtx_fmt_ee (code, SImode,
1368 sh_compare_op0, sh_compare_op1)),
1369 gen_rtx_USE (VOIDmode, get_fpscr_rtx ()))));
1370 else
1371 emit_insn (gen_rtx_SET (VOIDmode, t_reg,
1372 gen_rtx_fmt_ee (code, SImode,
1373 sh_compare_op0, sh_compare_op1)));
1375 return t_reg;
1378 /* Called from the md file, set up the operands of a compare instruction. */
1380 void
1381 from_compare (rtx *operands, int code)
1383 enum machine_mode mode = GET_MODE (sh_compare_op0);
1384 rtx insn;
1385 if (mode == VOIDmode)
1386 mode = GET_MODE (sh_compare_op1);
1387 if (code != EQ
1388 || mode == DImode
1389 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
1391 /* Force args into regs, since we can't use constants here. */
1392 sh_compare_op0 = force_reg (mode, sh_compare_op0);
1393 if (sh_compare_op1 != const0_rtx
1394 || code == GTU || code == GEU
1395 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
1396 sh_compare_op1 = force_reg (mode, sh_compare_op1);
1398 if (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT && code == GE)
1400 from_compare (operands, GT);
1401 insn = gen_ieee_ccmpeqsf_t (sh_compare_op0, sh_compare_op1);
1403 else
1404 insn = gen_rtx_SET (VOIDmode,
1405 gen_rtx_REG (SImode, T_REG),
1406 gen_rtx_fmt_ee (code, SImode,
1407 sh_compare_op0, sh_compare_op1));
1408 if ((TARGET_SH4 || TARGET_SH2A) && GET_MODE_CLASS (mode) == MODE_FLOAT)
1410 insn = gen_rtx_PARALLEL (VOIDmode,
1411 gen_rtvec (2, insn,
1412 gen_rtx_USE (VOIDmode, get_fpscr_rtx ())));
1413 (mode == SFmode ? emit_sf_insn : emit_df_insn) (insn);
1415 else
1416 emit_insn (insn);
1419 /* Functions to output assembly code. */
1421 /* Return a sequence of instructions to perform DI or DF move.
1423 Since the SH cannot move a DI or DF in one instruction, we have
1424 to take care when we see overlapping source and dest registers. */
1426 const char *
1427 output_movedouble (rtx insn ATTRIBUTE_UNUSED, rtx operands[],
1428 enum machine_mode mode)
1430 rtx dst = operands[0];
1431 rtx src = operands[1];
1433 if (GET_CODE (dst) == MEM
1434 && GET_CODE (XEXP (dst, 0)) == PRE_DEC)
1435 return "mov.l %T1,%0\n\tmov.l %1,%0";
1437 if (register_operand (dst, mode)
1438 && register_operand (src, mode))
1440 if (REGNO (src) == MACH_REG)
1441 return "sts mach,%S0\n\tsts macl,%R0";
1443 /* When mov.d r1,r2 do r2->r3 then r1->r2;
1444 when mov.d r1,r0 do r1->r0 then r2->r1. */
1446 if (REGNO (src) + 1 == REGNO (dst))
1447 return "mov %T1,%T0\n\tmov %1,%0";
1448 else
1449 return "mov %1,%0\n\tmov %T1,%T0";
1451 else if (GET_CODE (src) == CONST_INT)
1453 if (INTVAL (src) < 0)
1454 output_asm_insn ("mov #-1,%S0", operands);
1455 else
1456 output_asm_insn ("mov #0,%S0", operands);
1458 return "mov %1,%R0";
1460 else if (GET_CODE (src) == MEM)
1462 int ptrreg = -1;
1463 int dreg = REGNO (dst);
1464 rtx inside = XEXP (src, 0);
1466 switch (GET_CODE (inside))
1468 case REG:
1469 ptrreg = REGNO (inside);
1470 break;
1472 case SUBREG:
1473 ptrreg = subreg_regno (inside);
1474 break;
1476 case PLUS:
1477 ptrreg = REGNO (XEXP (inside, 0));
1478 /* ??? A r0+REG address shouldn't be possible here, because it isn't
1479 an offsettable address. Unfortunately, offsettable addresses use
1480 QImode to check the offset, and a QImode offsettable address
1481 requires r0 for the other operand, which is not currently
1482 supported, so we can't use the 'o' constraint.
1483 Thus we must check for and handle r0+REG addresses here.
1484 We punt for now, since this is likely very rare. */
1485 gcc_assert (GET_CODE (XEXP (inside, 1)) != REG);
1486 break;
1488 case LABEL_REF:
1489 return "mov.l %1,%0\n\tmov.l %1+4,%T0";
1490 case POST_INC:
1491 return "mov.l %1,%0\n\tmov.l %1,%T0";
1492 default:
1493 gcc_unreachable ();
1496 /* Work out the safe way to copy. Copy into the second half first. */
1497 if (dreg == ptrreg)
1498 return "mov.l %T1,%T0\n\tmov.l %1,%0";
1501 return "mov.l %1,%0\n\tmov.l %T1,%T0";
1504 /* Print an instruction which would have gone into a delay slot after
1505 another instruction, but couldn't because the other instruction expanded
1506 into a sequence where putting the slot insn at the end wouldn't work. */
1508 static void
1509 print_slot (rtx insn)
1511 final_scan_insn (XVECEXP (insn, 0, 1), asm_out_file, optimize, 1, NULL);
1513 INSN_DELETED_P (XVECEXP (insn, 0, 1)) = 1;
1516 const char *
1517 output_far_jump (rtx insn, rtx op)
1519 struct { rtx lab, reg, op; } this;
1520 rtx braf_base_lab = NULL_RTX;
1521 const char *jump;
1522 int far;
1523 int offset = branch_dest (insn) - INSN_ADDRESSES (INSN_UID (insn));
1524 rtx prev;
1526 this.lab = gen_label_rtx ();
1528 if (TARGET_SH2
1529 && offset >= -32764
1530 && offset - get_attr_length (insn) <= 32766)
1532 far = 0;
1533 jump = "mov.w %O0,%1; braf %1";
1535 else
1537 far = 1;
1538 if (flag_pic)
1540 if (TARGET_SH2)
1541 jump = "mov.l %O0,%1; braf %1";
1542 else
1543 jump = "mov.l r0,@-r15; mova %O0,r0; mov.l @r0,%1; add r0,%1; mov.l @r15+,r0; jmp @%1";
1545 else
1546 jump = "mov.l %O0,%1; jmp @%1";
1548 /* If we have a scratch register available, use it. */
1549 if (GET_CODE ((prev = prev_nonnote_insn (insn))) == INSN
1550 && INSN_CODE (prev) == CODE_FOR_indirect_jump_scratch)
1552 this.reg = SET_DEST (XVECEXP (PATTERN (prev), 0, 0));
1553 if (REGNO (this.reg) == R0_REG && flag_pic && ! TARGET_SH2)
1554 jump = "mov.l r1,@-r15; mova %O0,r0; mov.l @r0,r1; add r1,r0; mov.l @r15+,r1; jmp @%1";
1555 output_asm_insn (jump, &this.lab);
1556 if (dbr_sequence_length ())
1557 print_slot (final_sequence);
1558 else
1559 output_asm_insn ("nop", 0);
1561 else
1563 /* Output the delay slot insn first if any. */
1564 if (dbr_sequence_length ())
1565 print_slot (final_sequence);
1567 this.reg = gen_rtx_REG (SImode, 13);
1568 /* We must keep the stack aligned to 8-byte boundaries on SH5.
1569 Fortunately, MACL is fixed and call-clobbered, and we never
1570 need its value across jumps, so save r13 in it instead of in
1571 the stack. */
1572 if (TARGET_SH5)
1573 output_asm_insn ("lds r13, macl", 0);
1574 else
1575 output_asm_insn ("mov.l r13,@-r15", 0);
1576 output_asm_insn (jump, &this.lab);
1577 if (TARGET_SH5)
1578 output_asm_insn ("sts macl, r13", 0);
1579 else
1580 output_asm_insn ("mov.l @r15+,r13", 0);
1582 if (far && flag_pic && TARGET_SH2)
1584 braf_base_lab = gen_label_rtx ();
1585 (*targetm.asm_out.internal_label) (asm_out_file, "L",
1586 CODE_LABEL_NUMBER (braf_base_lab));
1588 if (far)
1589 output_asm_insn (".align 2", 0);
1590 (*targetm.asm_out.internal_label) (asm_out_file, "L", CODE_LABEL_NUMBER (this.lab));
1591 this.op = op;
1592 if (far && flag_pic)
1594 if (TARGET_SH2)
1595 this.lab = braf_base_lab;
1596 output_asm_insn (".long %O2-%O0", &this.lab);
1598 else
1599 output_asm_insn (far ? ".long %O2" : ".word %O2-%O0", &this.lab);
1600 return "";
1603 /* Local label counter, used for constants in the pool and inside
1604 pattern branches. */
1606 static int lf = 100;
1608 /* Output code for ordinary branches. */
1610 const char *
1611 output_branch (int logic, rtx insn, rtx *operands)
1613 switch (get_attr_length (insn))
1615 case 6:
1616 /* This can happen if filling the delay slot has caused a forward
1617 branch to exceed its range (we could reverse it, but only
1618 when we know we won't overextend other branches; this should
1619 best be handled by relaxation).
1620 It can also happen when other condbranches hoist delay slot insn
1621 from their destination, thus leading to code size increase.
1622 But the branch will still be in the range -4092..+4098 bytes. */
1624 if (! TARGET_RELAX)
1626 int label = lf++;
1627 /* The call to print_slot will clobber the operands. */
1628 rtx op0 = operands[0];
1630 /* If the instruction in the delay slot is annulled (true), then
1631 there is no delay slot where we can put it now. The only safe
1632 place for it is after the label. final will do that by default. */
1634 if (final_sequence
1635 && ! INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))
1636 && get_attr_length (XVECEXP (final_sequence, 0, 1)))
1638 asm_fprintf (asm_out_file, "\tb%s%ss\t%LLF%d\n", logic ? "f" : "t",
1639 ASSEMBLER_DIALECT ? "/" : ".", label);
1640 print_slot (final_sequence);
1642 else
1643 asm_fprintf (asm_out_file, "\tb%s\t%LLF%d\n", logic ? "f" : "t", label);
1645 output_asm_insn ("bra\t%l0", &op0);
1646 fprintf (asm_out_file, "\tnop\n");
1647 (*targetm.asm_out.internal_label) (asm_out_file, "LF", label);
1649 return "";
1651 /* When relaxing, handle this like a short branch. The linker
1652 will fix it up if it still doesn't fit after relaxation. */
1653 case 2:
1654 return logic ? "bt%.\t%l0" : "bf%.\t%l0";
1656 /* These are for SH2e, in which we have to account for the
1657 extra nop because of the hardware bug in annulled branches. */
1658 case 8:
1659 if (! TARGET_RELAX)
1661 int label = lf++;
1663 gcc_assert (!final_sequence
1664 || !(INSN_ANNULLED_BRANCH_P
1665 (XVECEXP (final_sequence, 0, 0))));
1666 asm_fprintf (asm_out_file, "b%s%ss\t%LLF%d\n",
1667 logic ? "f" : "t",
1668 ASSEMBLER_DIALECT ? "/" : ".", label);
1669 fprintf (asm_out_file, "\tnop\n");
1670 output_asm_insn ("bra\t%l0", operands);
1671 fprintf (asm_out_file, "\tnop\n");
1672 (*targetm.asm_out.internal_label) (asm_out_file, "LF", label);
1674 return "";
1676 /* When relaxing, fall through. */
1677 case 4:
1679 char buffer[10];
1681 sprintf (buffer, "b%s%ss\t%%l0",
1682 logic ? "t" : "f",
1683 ASSEMBLER_DIALECT ? "/" : ".");
1684 output_asm_insn (buffer, &operands[0]);
1685 return "nop";
1688 default:
1689 /* There should be no longer branches now - that would
1690 indicate that something has destroyed the branches set
1691 up in machine_dependent_reorg. */
1692 gcc_unreachable ();
1696 const char *
1697 output_branchy_insn (enum rtx_code code, const char *template,
1698 rtx insn, rtx *operands)
1700 rtx next_insn = NEXT_INSN (insn);
1702 if (next_insn && GET_CODE (next_insn) == JUMP_INSN && condjump_p (next_insn))
1704 rtx src = SET_SRC (PATTERN (next_insn));
1705 if (GET_CODE (src) == IF_THEN_ELSE && GET_CODE (XEXP (src, 0)) != code)
1707 /* Following branch not taken */
1708 operands[9] = gen_label_rtx ();
1709 emit_label_after (operands[9], next_insn);
1710 INSN_ADDRESSES_NEW (operands[9],
1711 INSN_ADDRESSES (INSN_UID (next_insn))
1712 + get_attr_length (next_insn));
1713 return template;
1715 else
1717 int offset = (branch_dest (next_insn)
1718 - INSN_ADDRESSES (INSN_UID (next_insn)) + 4);
1719 if (offset >= -252 && offset <= 258)
1721 if (GET_CODE (src) == IF_THEN_ELSE)
1722 /* branch_true */
1723 src = XEXP (src, 1);
1724 operands[9] = src;
1725 return template;
1729 operands[9] = gen_label_rtx ();
1730 emit_label_after (operands[9], insn);
1731 INSN_ADDRESSES_NEW (operands[9],
1732 INSN_ADDRESSES (INSN_UID (insn))
1733 + get_attr_length (insn));
1734 return template;
1737 const char *
1738 output_ieee_ccmpeq (rtx insn, rtx *operands)
1740 return output_branchy_insn (NE, "bt\t%l9\n\tfcmp/eq\t%1,%0",
1741 insn, operands);
1744 /* Output the start of the assembler file. */
1746 static void
1747 sh_file_start (void)
1749 default_file_start ();
1751 #ifdef SYMBIAN
1752 /* Declare the .directive section before it is used. */
1753 fputs ("\t.section .directive, \"SM\", @progbits, 1\n", asm_out_file);
1754 fputs ("\t.asciz \"#<SYMEDIT>#\\n\"\n", asm_out_file);
1755 #endif
1757 if (TARGET_ELF)
1758 /* We need to show the text section with the proper
1759 attributes as in TEXT_SECTION_ASM_OP, before dwarf2out
1760 emits it without attributes in TEXT_SECTION_ASM_OP, else GAS
1761 will complain. We can teach GAS specifically about the
1762 default attributes for our choice of text section, but
1763 then we would have to change GAS again if/when we change
1764 the text section name. */
1765 fprintf (asm_out_file, "%s\n", TEXT_SECTION_ASM_OP);
1766 else
1767 /* Switch to the data section so that the coffsem symbol
1768 isn't in the text section. */
1769 switch_to_section (data_section);
1771 if (TARGET_LITTLE_ENDIAN)
1772 fputs ("\t.little\n", asm_out_file);
1774 if (!TARGET_ELF)
1776 if (TARGET_SHCOMPACT)
1777 fputs ("\t.mode\tSHcompact\n", asm_out_file);
1778 else if (TARGET_SHMEDIA)
1779 fprintf (asm_out_file, "\t.mode\tSHmedia\n\t.abi\t%i\n",
1780 TARGET_SHMEDIA64 ? 64 : 32);
1784 /* Check if PAT includes UNSPEC_CALLER unspec pattern. */
1786 static bool
1787 unspec_caller_rtx_p (rtx pat)
1789 switch (GET_CODE (pat))
1791 case CONST:
1792 return unspec_caller_rtx_p (XEXP (pat, 0));
1793 case PLUS:
1794 case MINUS:
1795 if (unspec_caller_rtx_p (XEXP (pat, 0)))
1796 return true;
1797 return unspec_caller_rtx_p (XEXP (pat, 1));
1798 case UNSPEC:
1799 if (XINT (pat, 1) == UNSPEC_CALLER)
1800 return true;
1801 default:
1802 break;
1805 return false;
1808 /* Indicate that INSN cannot be duplicated. This is true for insn
1809 that generates a unique label. */
1811 static bool
1812 sh_cannot_copy_insn_p (rtx insn)
1814 rtx pat;
1816 if (!reload_completed || !flag_pic)
1817 return false;
1819 if (GET_CODE (insn) != INSN)
1820 return false;
1821 if (asm_noperands (insn) >= 0)
1822 return false;
1824 pat = PATTERN (insn);
1825 if (GET_CODE (pat) != SET)
1826 return false;
1827 pat = SET_SRC (pat);
1829 if (unspec_caller_rtx_p (pat))
1830 return true;
1832 return false;
1835 /* Actual number of instructions used to make a shift by N. */
1836 static const char ashiftrt_insns[] =
1837 { 0,1,2,3,4,5,8,8,8,8,8,8,8,8,8,8,2,3,4,5,8,8,8,8,8,8,8,8,8,8,8,2};
1839 /* Left shift and logical right shift are the same. */
1840 static const char shift_insns[] =
1841 { 0,1,1,2,2,3,3,4,1,2,2,3,3,4,3,3,1,2,2,3,3,4,3,3,2,3,3,4,4,4,3,3};
1843 /* Individual shift amounts needed to get the above length sequences.
1844 One bit right shifts clobber the T bit, so when possible, put one bit
1845 shifts in the middle of the sequence, so the ends are eligible for
1846 branch delay slots. */
1847 static const short shift_amounts[32][5] = {
1848 {0}, {1}, {2}, {2, 1},
1849 {2, 2}, {2, 1, 2}, {2, 2, 2}, {2, 2, 1, 2},
1850 {8}, {8, 1}, {8, 2}, {8, 1, 2},
1851 {8, 2, 2}, {8, 2, 1, 2}, {8, -2, 8}, {8, -1, 8},
1852 {16}, {16, 1}, {16, 2}, {16, 1, 2},
1853 {16, 2, 2}, {16, 2, 1, 2}, {16, -2, 8}, {16, -1, 8},
1854 {16, 8}, {16, 1, 8}, {16, 8, 2}, {16, 8, 1, 2},
1855 {16, 8, 2, 2}, {16, -1, -2, 16}, {16, -2, 16}, {16, -1, 16}};
1857 /* Likewise, but for shift amounts < 16, up to three highmost bits
1858 might be clobbered. This is typically used when combined with some
1859 kind of sign or zero extension. */
1861 static const char ext_shift_insns[] =
1862 { 0,1,1,2,2,3,2,2,1,2,2,3,3,3,2,2,1,2,2,3,3,4,3,3,2,3,3,4,4,4,3,3};
1864 static const short ext_shift_amounts[32][4] = {
1865 {0}, {1}, {2}, {2, 1},
1866 {2, 2}, {2, 1, 2}, {8, -2}, {8, -1},
1867 {8}, {8, 1}, {8, 2}, {8, 1, 2},
1868 {8, 2, 2}, {16, -2, -1}, {16, -2}, {16, -1},
1869 {16}, {16, 1}, {16, 2}, {16, 1, 2},
1870 {16, 2, 2}, {16, 2, 1, 2}, {16, -2, 8}, {16, -1, 8},
1871 {16, 8}, {16, 1, 8}, {16, 8, 2}, {16, 8, 1, 2},
1872 {16, 8, 2, 2}, {16, -1, -2, 16}, {16, -2, 16}, {16, -1, 16}};
1874 /* Assuming we have a value that has been sign-extended by at least one bit,
1875 can we use the ext_shift_amounts with the last shift turned to an arithmetic shift
1876 to shift it by N without data loss, and quicker than by other means? */
1877 #define EXT_SHIFT_SIGNED(n) (((n) | 8) == 15)
1879 /* This is used in length attributes in sh.md to help compute the length
1880 of arbitrary constant shift instructions. */
1883 shift_insns_rtx (rtx insn)
1885 rtx set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
1886 int shift_count = INTVAL (XEXP (set_src, 1));
1887 enum rtx_code shift_code = GET_CODE (set_src);
1889 switch (shift_code)
1891 case ASHIFTRT:
1892 return ashiftrt_insns[shift_count];
1893 case LSHIFTRT:
1894 case ASHIFT:
1895 return shift_insns[shift_count];
1896 default:
1897 gcc_unreachable ();
1901 /* Return the cost of a shift. */
1903 static inline int
1904 shiftcosts (rtx x)
1906 int value;
1908 if (TARGET_SHMEDIA)
1909 return 1;
1911 if (GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD)
1913 if (GET_MODE (x) == DImode
1914 && GET_CODE (XEXP (x, 1)) == CONST_INT
1915 && INTVAL (XEXP (x, 1)) == 1)
1916 return 2;
1918 /* Everything else is invalid, because there is no pattern for it. */
1919 return 10000;
1921 /* If shift by a non constant, then this will be expensive. */
1922 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
1923 return SH_DYNAMIC_SHIFT_COST;
1925 value = INTVAL (XEXP (x, 1));
1927 /* Otherwise, return the true cost in instructions. */
1928 if (GET_CODE (x) == ASHIFTRT)
1930 int cost = ashiftrt_insns[value];
1931 /* If SH3, then we put the constant in a reg and use shad. */
1932 if (cost > 1 + SH_DYNAMIC_SHIFT_COST)
1933 cost = 1 + SH_DYNAMIC_SHIFT_COST;
1934 return cost;
1936 else
1937 return shift_insns[value];
1940 /* Return the cost of an AND operation. */
1942 static inline int
1943 andcosts (rtx x)
1945 int i;
1947 /* Anding with a register is a single cycle and instruction. */
1948 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
1949 return 1;
1951 i = INTVAL (XEXP (x, 1));
1953 if (TARGET_SHMEDIA)
1955 if (GET_CODE (XEXP (x, 1)) == CONST_INT
1956 && (CONST_OK_FOR_I10 (INTVAL (XEXP (x, 1)))
1957 || CONST_OK_FOR_J16 (INTVAL (XEXP (x, 1)))))
1958 return 1;
1959 else
1960 return 1 + rtx_cost (XEXP (x, 1), AND);
1963 /* These constants are single cycle extu.[bw] instructions. */
1964 if (i == 0xff || i == 0xffff)
1965 return 1;
1966 /* Constants that can be used in an and immediate instruction in a single
1967 cycle, but this requires r0, so make it a little more expensive. */
1968 if (CONST_OK_FOR_K08 (i))
1969 return 2;
1970 /* Constants that can be loaded with a mov immediate and an and.
1971 This case is probably unnecessary. */
1972 if (CONST_OK_FOR_I08 (i))
1973 return 2;
1974 /* Any other constants requires a 2 cycle pc-relative load plus an and.
1975 This case is probably unnecessary. */
1976 return 3;
1979 /* Return the cost of an addition or a subtraction. */
1981 static inline int
1982 addsubcosts (rtx x)
1984 /* Adding a register is a single cycle insn. */
1985 if (GET_CODE (XEXP (x, 1)) == REG
1986 || GET_CODE (XEXP (x, 1)) == SUBREG)
1987 return 1;
1989 /* Likewise for small constants. */
1990 if (GET_CODE (XEXP (x, 1)) == CONST_INT
1991 && CONST_OK_FOR_ADD (INTVAL (XEXP (x, 1))))
1992 return 1;
1994 if (TARGET_SHMEDIA)
1995 switch (GET_CODE (XEXP (x, 1)))
1997 case CONST:
1998 case LABEL_REF:
1999 case SYMBOL_REF:
2000 return TARGET_SHMEDIA64 ? 5 : 3;
2002 case CONST_INT:
2003 if (CONST_OK_FOR_I16 (INTVAL (XEXP (x, 1))))
2004 return 2;
2005 else if (CONST_OK_FOR_I16 (INTVAL (XEXP (x, 1)) >> 16))
2006 return 3;
2007 else if (CONST_OK_FOR_I16 ((INTVAL (XEXP (x, 1)) >> 16) >> 16))
2008 return 4;
2010 /* Fall through. */
2011 default:
2012 return 5;
2015 /* Any other constant requires a 2 cycle pc-relative load plus an
2016 addition. */
2017 return 3;
2020 /* Return the cost of a multiply. */
2021 static inline int
2022 multcosts (rtx x ATTRIBUTE_UNUSED)
2024 if (sh_multcost >= 0)
2025 return sh_multcost;
2026 if (TARGET_SHMEDIA)
2027 /* ??? We have a mul insn, but it has a latency of three, and doesn't
2028 accept constants. Ideally, we would use a cost of one or two and
2029 add the cost of the operand, but disregard the latter when inside loops
2030 and loop invariant code motion is still to follow.
2031 Using a multiply first and splitting it later if it's a loss
2032 doesn't work because of different sign / zero extension semantics
2033 of multiplies vs. shifts. */
2034 return TARGET_SMALLCODE ? 2 : 3;
2036 if (TARGET_SH2)
2038 /* We have a mul insn, so we can never take more than the mul and the
2039 read of the mac reg, but count more because of the latency and extra
2040 reg usage. */
2041 if (TARGET_SMALLCODE)
2042 return 2;
2043 return 3;
2046 /* If we're aiming at small code, then just count the number of
2047 insns in a multiply call sequence. */
2048 if (TARGET_SMALLCODE)
2049 return 5;
2051 /* Otherwise count all the insns in the routine we'd be calling too. */
2052 return 20;
2055 /* Compute a (partial) cost for rtx X. Return true if the complete
2056 cost has been computed, and false if subexpressions should be
2057 scanned. In either case, *TOTAL contains the cost result. */
2059 static bool
2060 sh_rtx_costs (rtx x, int code, int outer_code, int *total)
2062 switch (code)
2064 case CONST_INT:
2065 if (TARGET_SHMEDIA)
2067 if (INTVAL (x) == 0)
2068 *total = 0;
2069 else if (outer_code == AND && and_operand ((x), DImode))
2070 *total = 0;
2071 else if ((outer_code == IOR || outer_code == XOR
2072 || outer_code == PLUS)
2073 && CONST_OK_FOR_I10 (INTVAL (x)))
2074 *total = 0;
2075 else if (CONST_OK_FOR_I16 (INTVAL (x)))
2076 *total = COSTS_N_INSNS (outer_code != SET);
2077 else if (CONST_OK_FOR_I16 (INTVAL (x) >> 16))
2078 *total = COSTS_N_INSNS ((outer_code != SET) + 1);
2079 else if (CONST_OK_FOR_I16 ((INTVAL (x) >> 16) >> 16))
2080 *total = COSTS_N_INSNS ((outer_code != SET) + 2);
2081 else
2082 *total = COSTS_N_INSNS ((outer_code != SET) + 3);
2083 return true;
2085 if (CONST_OK_FOR_I08 (INTVAL (x)))
2086 *total = 0;
2087 else if ((outer_code == AND || outer_code == IOR || outer_code == XOR)
2088 && CONST_OK_FOR_K08 (INTVAL (x)))
2089 *total = 1;
2090 else
2091 *total = 8;
2092 return true;
2094 case CONST:
2095 case LABEL_REF:
2096 case SYMBOL_REF:
2097 if (TARGET_SHMEDIA64)
2098 *total = COSTS_N_INSNS (4);
2099 else if (TARGET_SHMEDIA32)
2100 *total = COSTS_N_INSNS (2);
2101 else
2102 *total = 5;
2103 return true;
2105 case CONST_DOUBLE:
2106 if (TARGET_SHMEDIA)
2107 *total = COSTS_N_INSNS (4);
2108 else
2109 *total = 10;
2110 return true;
2111 case CONST_VECTOR:
2112 if (x == CONST0_RTX (GET_MODE (x)))
2113 *total = 0;
2114 else if (sh_1el_vec (x, VOIDmode))
2115 *total = outer_code != SET;
2116 if (sh_rep_vec (x, VOIDmode))
2117 *total = ((GET_MODE_UNIT_SIZE (GET_MODE (x)) + 3) / 4
2118 + (outer_code != SET));
2119 *total = COSTS_N_INSNS (3) + (outer_code != SET);
2120 return true;
2122 case PLUS:
2123 case MINUS:
2124 *total = COSTS_N_INSNS (addsubcosts (x));
2125 return true;
2127 case AND:
2128 *total = COSTS_N_INSNS (andcosts (x));
2129 return true;
2131 case MULT:
2132 *total = COSTS_N_INSNS (multcosts (x));
2133 return true;
2135 case ASHIFT:
2136 case ASHIFTRT:
2137 case LSHIFTRT:
2138 *total = COSTS_N_INSNS (shiftcosts (x));
2139 return true;
2141 case DIV:
2142 case UDIV:
2143 case MOD:
2144 case UMOD:
2145 *total = COSTS_N_INSNS (20);
2146 return true;
2148 case PARALLEL:
2149 if (sh_1el_vec (x, VOIDmode))
2150 *total = outer_code != SET;
2151 if (sh_rep_vec (x, VOIDmode))
2152 *total = ((GET_MODE_UNIT_SIZE (GET_MODE (x)) + 3) / 4
2153 + (outer_code != SET));
2154 *total = COSTS_N_INSNS (3) + (outer_code != SET);
2155 return true;
2157 case FLOAT:
2158 case FIX:
2159 *total = 100;
2160 return true;
2162 default:
2163 return false;
2167 /* Compute the cost of an address. For the SH, all valid addresses are
2168 the same cost. Use a slightly higher cost for reg + reg addressing,
2169 since it increases pressure on r0. */
2171 static int
2172 sh_address_cost (rtx X)
2174 return (GET_CODE (X) == PLUS
2175 && ! CONSTANT_P (XEXP (X, 1))
2176 && ! TARGET_SHMEDIA ? 1 : 0);
2179 /* Code to expand a shift. */
2181 void
2182 gen_ashift (int type, int n, rtx reg)
2184 /* Negative values here come from the shift_amounts array. */
2185 if (n < 0)
2187 if (type == ASHIFT)
2188 type = LSHIFTRT;
2189 else
2190 type = ASHIFT;
2191 n = -n;
2194 switch (type)
2196 case ASHIFTRT:
2197 emit_insn (gen_ashrsi3_k (reg, reg, GEN_INT (n)));
2198 break;
2199 case LSHIFTRT:
2200 if (n == 1)
2201 emit_insn (gen_lshrsi3_m (reg, reg, GEN_INT (n)));
2202 else
2203 emit_insn (gen_lshrsi3_k (reg, reg, GEN_INT (n)));
2204 break;
2205 case ASHIFT:
2206 emit_insn (gen_ashlsi3_std (reg, reg, GEN_INT (n)));
2207 break;
2211 /* Same for HImode */
2213 void
2214 gen_ashift_hi (int type, int n, rtx reg)
2216 /* Negative values here come from the shift_amounts array. */
2217 if (n < 0)
2219 if (type == ASHIFT)
2220 type = LSHIFTRT;
2221 else
2222 type = ASHIFT;
2223 n = -n;
2226 switch (type)
2228 case ASHIFTRT:
2229 case LSHIFTRT:
2230 /* We don't have HImode right shift operations because using the
2231 ordinary 32 bit shift instructions for that doesn't generate proper
2232 zero/sign extension.
2233 gen_ashift_hi is only called in contexts where we know that the
2234 sign extension works out correctly. */
2236 int offset = 0;
2237 if (GET_CODE (reg) == SUBREG)
2239 offset = SUBREG_BYTE (reg);
2240 reg = SUBREG_REG (reg);
2242 gen_ashift (type, n, gen_rtx_SUBREG (SImode, reg, offset));
2243 break;
2245 case ASHIFT:
2246 emit_insn (gen_ashlhi3_k (reg, reg, GEN_INT (n)));
2247 break;
2251 /* Output RTL to split a constant shift into its component SH constant
2252 shift instructions. */
2254 void
2255 gen_shifty_op (int code, rtx *operands)
2257 int value = INTVAL (operands[2]);
2258 int max, i;
2260 /* Truncate the shift count in case it is out of bounds. */
2261 value = value & 0x1f;
2263 if (value == 31)
2265 if (code == LSHIFTRT)
2267 emit_insn (gen_rotlsi3_1 (operands[0], operands[0]));
2268 emit_insn (gen_movt (operands[0]));
2269 return;
2271 else if (code == ASHIFT)
2273 /* There is a two instruction sequence for 31 bit left shifts,
2274 but it requires r0. */
2275 if (GET_CODE (operands[0]) == REG && REGNO (operands[0]) == 0)
2277 emit_insn (gen_andsi3 (operands[0], operands[0], const1_rtx));
2278 emit_insn (gen_rotlsi3_31 (operands[0], operands[0]));
2279 return;
2283 else if (value == 0)
2285 /* This can happen even when optimizing, if there were subregs before
2286 reload. Don't output a nop here, as this is never optimized away;
2287 use a no-op move instead. */
2288 emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[0]));
2289 return;
2292 max = shift_insns[value];
2293 for (i = 0; i < max; i++)
2294 gen_ashift (code, shift_amounts[value][i], operands[0]);
2297 /* Same as above, but optimized for values where the topmost bits don't
2298 matter. */
2300 void
2301 gen_shifty_hi_op (int code, rtx *operands)
2303 int value = INTVAL (operands[2]);
2304 int max, i;
2305 void (*gen_fun) (int, int, rtx);
2307 /* This operation is used by and_shl for SImode values with a few
2308 high bits known to be cleared. */
2309 value &= 31;
2310 if (value == 0)
2312 emit_insn (gen_nop ());
2313 return;
2316 gen_fun = GET_MODE (operands[0]) == HImode ? gen_ashift_hi : gen_ashift;
2317 if (code == ASHIFT)
2319 max = ext_shift_insns[value];
2320 for (i = 0; i < max; i++)
2321 gen_fun (code, ext_shift_amounts[value][i], operands[0]);
2323 else
2324 /* When shifting right, emit the shifts in reverse order, so that
2325 solitary negative values come first. */
2326 for (i = ext_shift_insns[value] - 1; i >= 0; i--)
2327 gen_fun (code, ext_shift_amounts[value][i], operands[0]);
2330 /* Output RTL for an arithmetic right shift. */
2332 /* ??? Rewrite to use super-optimizer sequences. */
2335 expand_ashiftrt (rtx *operands)
2337 rtx wrk;
2338 char func[18];
2339 int value;
2341 if (TARGET_SH3)
2343 if (GET_CODE (operands[2]) != CONST_INT)
2345 rtx count = copy_to_mode_reg (SImode, operands[2]);
2346 emit_insn (gen_negsi2 (count, count));
2347 emit_insn (gen_ashrsi3_d (operands[0], operands[1], count));
2348 return 1;
2350 else if (ashiftrt_insns[INTVAL (operands[2]) & 31]
2351 > 1 + SH_DYNAMIC_SHIFT_COST)
2353 rtx count
2354 = force_reg (SImode, GEN_INT (- (INTVAL (operands[2]) & 31)));
2355 emit_insn (gen_ashrsi3_d (operands[0], operands[1], count));
2356 return 1;
2359 if (GET_CODE (operands[2]) != CONST_INT)
2360 return 0;
2362 value = INTVAL (operands[2]) & 31;
2364 if (value == 31)
2366 /* If we are called from abs expansion, arrange things so that we
2367 we can use a single MT instruction that doesn't clobber the source,
2368 if LICM can hoist out the load of the constant zero. */
2369 if (currently_expanding_to_rtl)
2371 emit_insn (gen_cmpgtsi_t (force_reg (SImode, CONST0_RTX (SImode)),
2372 operands[1]));
2373 emit_insn (gen_mov_neg_si_t (operands[0]));
2374 return 1;
2376 emit_insn (gen_ashrsi2_31 (operands[0], operands[1]));
2377 return 1;
2379 else if (value >= 16 && value <= 19)
2381 wrk = gen_reg_rtx (SImode);
2382 emit_insn (gen_ashrsi2_16 (wrk, operands[1]));
2383 value -= 16;
2384 while (value--)
2385 gen_ashift (ASHIFTRT, 1, wrk);
2386 emit_move_insn (operands[0], wrk);
2387 return 1;
2389 /* Expand a short sequence inline, longer call a magic routine. */
2390 else if (value <= 5)
2392 wrk = gen_reg_rtx (SImode);
2393 emit_move_insn (wrk, operands[1]);
2394 while (value--)
2395 gen_ashift (ASHIFTRT, 1, wrk);
2396 emit_move_insn (operands[0], wrk);
2397 return 1;
2400 wrk = gen_reg_rtx (Pmode);
2402 /* Load the value into an arg reg and call a helper. */
2403 emit_move_insn (gen_rtx_REG (SImode, 4), operands[1]);
2404 sprintf (func, "__ashiftrt_r4_%d", value);
2405 function_symbol (wrk, func, SFUNC_STATIC);
2406 emit_insn (gen_ashrsi3_n (GEN_INT (value), wrk));
2407 emit_move_insn (operands[0], gen_rtx_REG (SImode, 4));
2408 return 1;
2412 sh_dynamicalize_shift_p (rtx count)
2414 return shift_insns[INTVAL (count)] > 1 + SH_DYNAMIC_SHIFT_COST;
2417 /* Try to find a good way to implement the combiner pattern
2418 [(set (match_operand:SI 0 "register_operand" "r")
2419 (and:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
2420 (match_operand:SI 2 "const_int_operand" "n"))
2421 (match_operand:SI 3 "const_int_operand" "n"))) .
2422 LEFT_RTX is operand 2 in the above pattern, and MASK_RTX is operand 3.
2423 return 0 for simple right / left or left/right shift combination.
2424 return 1 for a combination of shifts with zero_extend.
2425 return 2 for a combination of shifts with an AND that needs r0.
2426 return 3 for a combination of shifts with an AND that needs an extra
2427 scratch register, when the three highmost bits of the AND mask are clear.
2428 return 4 for a combination of shifts with an AND that needs an extra
2429 scratch register, when any of the three highmost bits of the AND mask
2430 is set.
2431 If ATTRP is set, store an initial right shift width in ATTRP[0],
2432 and the instruction length in ATTRP[1] . These values are not valid
2433 when returning 0.
2434 When ATTRP is set and returning 1, ATTRP[2] gets set to the index into
2435 shift_amounts for the last shift value that is to be used before the
2436 sign extend. */
2438 shl_and_kind (rtx left_rtx, rtx mask_rtx, int *attrp)
2440 unsigned HOST_WIDE_INT mask, lsb, mask2, lsb2;
2441 int left = INTVAL (left_rtx), right;
2442 int best = 0;
2443 int cost, best_cost = 10000;
2444 int best_right = 0, best_len = 0;
2445 int i;
2446 int can_ext;
2448 if (left < 0 || left > 31)
2449 return 0;
2450 if (GET_CODE (mask_rtx) == CONST_INT)
2451 mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> left;
2452 else
2453 mask = (unsigned HOST_WIDE_INT) GET_MODE_MASK (SImode) >> left;
2454 /* Can this be expressed as a right shift / left shift pair? */
2455 lsb = ((mask ^ (mask - 1)) >> 1) + 1;
2456 right = exact_log2 (lsb);
2457 mask2 = ~(mask + lsb - 1);
2458 lsb2 = ((mask2 ^ (mask2 - 1)) >> 1) + 1;
2459 /* mask has no zeroes but trailing zeroes <==> ! mask2 */
2460 if (! mask2)
2461 best_cost = shift_insns[right] + shift_insns[right + left];
2462 /* mask has no trailing zeroes <==> ! right */
2463 else if (! right && mask2 == ~(lsb2 - 1))
2465 int late_right = exact_log2 (lsb2);
2466 best_cost = shift_insns[left + late_right] + shift_insns[late_right];
2468 /* Try to use zero extend. */
2469 if (mask2 == ~(lsb2 - 1))
2471 int width, first;
2473 for (width = 8; width <= 16; width += 8)
2475 /* Can we zero-extend right away? */
2476 if (lsb2 == (unsigned HOST_WIDE_INT) 1 << width)
2478 cost
2479 = 1 + ext_shift_insns[right] + ext_shift_insns[left + right];
2480 if (cost < best_cost)
2482 best = 1;
2483 best_cost = cost;
2484 best_right = right;
2485 best_len = cost;
2486 if (attrp)
2487 attrp[2] = -1;
2489 continue;
2491 /* ??? Could try to put zero extend into initial right shift,
2492 or even shift a bit left before the right shift. */
2493 /* Determine value of first part of left shift, to get to the
2494 zero extend cut-off point. */
2495 first = width - exact_log2 (lsb2) + right;
2496 if (first >= 0 && right + left - first >= 0)
2498 cost = ext_shift_insns[right] + ext_shift_insns[first] + 1
2499 + ext_shift_insns[right + left - first];
2500 if (cost < best_cost)
2502 best = 1;
2503 best_cost = cost;
2504 best_right = right;
2505 best_len = cost;
2506 if (attrp)
2507 attrp[2] = first;
2512 /* Try to use r0 AND pattern */
2513 for (i = 0; i <= 2; i++)
2515 if (i > right)
2516 break;
2517 if (! CONST_OK_FOR_K08 (mask >> i))
2518 continue;
2519 cost = (i != 0) + 2 + ext_shift_insns[left + i];
2520 if (cost < best_cost)
2522 best = 2;
2523 best_cost = cost;
2524 best_right = i;
2525 best_len = cost - 1;
2528 /* Try to use a scratch register to hold the AND operand. */
2529 can_ext = ((mask << left) & ((unsigned HOST_WIDE_INT) 3 << 30)) == 0;
2530 for (i = 0; i <= 2; i++)
2532 if (i > right)
2533 break;
2534 cost = (i != 0) + (CONST_OK_FOR_I08 (mask >> i) ? 2 : 3)
2535 + (can_ext ? ext_shift_insns : shift_insns)[left + i];
2536 if (cost < best_cost)
2538 best = 4 - can_ext;
2539 best_cost = cost;
2540 best_right = i;
2541 best_len = cost - 1 - ! CONST_OK_FOR_I08 (mask >> i);
2545 if (attrp)
2547 attrp[0] = best_right;
2548 attrp[1] = best_len;
2550 return best;
2553 /* This is used in length attributes of the unnamed instructions
2554 corresponding to shl_and_kind return values of 1 and 2. */
2556 shl_and_length (rtx insn)
2558 rtx set_src, left_rtx, mask_rtx;
2559 int attributes[3];
2561 set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
2562 left_rtx = XEXP (XEXP (set_src, 0), 1);
2563 mask_rtx = XEXP (set_src, 1);
2564 shl_and_kind (left_rtx, mask_rtx, attributes);
2565 return attributes[1];
2568 /* This is used in length attribute of the and_shl_scratch instruction. */
2571 shl_and_scr_length (rtx insn)
2573 rtx set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
2574 int len = shift_insns[INTVAL (XEXP (set_src, 1))];
2575 rtx op = XEXP (set_src, 0);
2576 len += shift_insns[INTVAL (XEXP (op, 1))] + 1;
2577 op = XEXP (XEXP (op, 0), 0);
2578 return len + shift_insns[INTVAL (XEXP (op, 1))];
2581 /* Generate rtl for instructions for which shl_and_kind advised a particular
2582 method of generating them, i.e. returned zero. */
2585 gen_shl_and (rtx dest, rtx left_rtx, rtx mask_rtx, rtx source)
2587 int attributes[3];
2588 unsigned HOST_WIDE_INT mask;
2589 int kind = shl_and_kind (left_rtx, mask_rtx, attributes);
2590 int right, total_shift;
2591 void (*shift_gen_fun) (int, rtx *) = gen_shifty_hi_op;
2593 right = attributes[0];
2594 total_shift = INTVAL (left_rtx) + right;
2595 mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> total_shift;
2596 switch (kind)
2598 default:
2599 return -1;
2600 case 1:
2602 int first = attributes[2];
2603 rtx operands[3];
2605 if (first < 0)
2607 emit_insn ((mask << right) <= 0xff
2608 ? gen_zero_extendqisi2 (dest,
2609 gen_lowpart (QImode, source))
2610 : gen_zero_extendhisi2 (dest,
2611 gen_lowpart (HImode, source)));
2612 source = dest;
2614 if (source != dest)
2615 emit_insn (gen_movsi (dest, source));
2616 operands[0] = dest;
2617 if (right)
2619 operands[2] = GEN_INT (right);
2620 gen_shifty_hi_op (LSHIFTRT, operands);
2622 if (first > 0)
2624 operands[2] = GEN_INT (first);
2625 gen_shifty_hi_op (ASHIFT, operands);
2626 total_shift -= first;
2627 mask <<= first;
2629 if (first >= 0)
2630 emit_insn (mask <= 0xff
2631 ? gen_zero_extendqisi2 (dest, gen_lowpart (QImode, dest))
2632 : gen_zero_extendhisi2 (dest, gen_lowpart (HImode, dest)));
2633 if (total_shift > 0)
2635 operands[2] = GEN_INT (total_shift);
2636 gen_shifty_hi_op (ASHIFT, operands);
2638 break;
2640 case 4:
2641 shift_gen_fun = gen_shifty_op;
2642 case 3:
2643 /* If the topmost bit that matters is set, set the topmost bits
2644 that don't matter. This way, we might be able to get a shorter
2645 signed constant. */
2646 if (mask & ((HOST_WIDE_INT) 1 << (31 - total_shift)))
2647 mask |= (HOST_WIDE_INT) ~0 << (31 - total_shift);
2648 case 2:
2649 /* Don't expand fine-grained when combining, because that will
2650 make the pattern fail. */
2651 if (currently_expanding_to_rtl
2652 || reload_in_progress || reload_completed)
2654 rtx operands[3];
2656 /* Cases 3 and 4 should be handled by this split
2657 only while combining */
2658 gcc_assert (kind <= 2);
2659 if (right)
2661 emit_insn (gen_lshrsi3 (dest, source, GEN_INT (right)));
2662 source = dest;
2664 emit_insn (gen_andsi3 (dest, source, GEN_INT (mask)));
2665 if (total_shift)
2667 operands[0] = dest;
2668 operands[1] = dest;
2669 operands[2] = GEN_INT (total_shift);
2670 shift_gen_fun (ASHIFT, operands);
2672 break;
2674 else
2676 int neg = 0;
2677 if (kind != 4 && total_shift < 16)
2679 neg = -ext_shift_amounts[total_shift][1];
2680 if (neg > 0)
2681 neg -= ext_shift_amounts[total_shift][2];
2682 else
2683 neg = 0;
2685 emit_insn (gen_and_shl_scratch (dest, source,
2686 GEN_INT (right),
2687 GEN_INT (mask),
2688 GEN_INT (total_shift + neg),
2689 GEN_INT (neg)));
2690 emit_insn (gen_movsi (dest, dest));
2691 break;
2694 return 0;
2697 /* Try to find a good way to implement the combiner pattern
2698 [(set (match_operand:SI 0 "register_operand" "=r")
2699 (sign_extract:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
2700 (match_operand:SI 2 "const_int_operand" "n")
2701 (match_operand:SI 3 "const_int_operand" "n")
2702 (const_int 0)))
2703 (clobber (reg:SI T_REG))]
2704 LEFT_RTX is operand 2 in the above pattern, and SIZE_RTX is operand 3.
2705 return 0 for simple left / right shift combination.
2706 return 1 for left shift / 8 bit sign extend / left shift.
2707 return 2 for left shift / 16 bit sign extend / left shift.
2708 return 3 for left shift / 8 bit sign extend / shift / sign extend.
2709 return 4 for left shift / 16 bit sign extend / shift / sign extend.
2710 return 5 for left shift / 16 bit sign extend / right shift
2711 return 6 for < 8 bit sign extend / left shift.
2712 return 7 for < 8 bit sign extend / left shift / single right shift.
2713 If COSTP is nonzero, assign the calculated cost to *COSTP. */
2716 shl_sext_kind (rtx left_rtx, rtx size_rtx, int *costp)
2718 int left, size, insize, ext;
2719 int cost = 0, best_cost;
2720 int kind;
2722 left = INTVAL (left_rtx);
2723 size = INTVAL (size_rtx);
2724 insize = size - left;
2725 gcc_assert (insize > 0);
2726 /* Default to left / right shift. */
2727 kind = 0;
2728 best_cost = shift_insns[32 - insize] + ashiftrt_insns[32 - size];
2729 if (size <= 16)
2731 /* 16 bit shift / sign extend / 16 bit shift */
2732 cost = shift_insns[16 - insize] + 1 + ashiftrt_insns[16 - size];
2733 /* If ashiftrt_insns[16 - size] is 8, this choice will be overridden
2734 below, by alternative 3 or something even better. */
2735 if (cost < best_cost)
2737 kind = 5;
2738 best_cost = cost;
2741 /* Try a plain sign extend between two shifts. */
2742 for (ext = 16; ext >= insize; ext -= 8)
2744 if (ext <= size)
2746 cost = ext_shift_insns[ext - insize] + 1 + shift_insns[size - ext];
2747 if (cost < best_cost)
2749 kind = ext / (unsigned) 8;
2750 best_cost = cost;
2753 /* Check if we can do a sloppy shift with a final signed shift
2754 restoring the sign. */
2755 if (EXT_SHIFT_SIGNED (size - ext))
2756 cost = ext_shift_insns[ext - insize] + ext_shift_insns[size - ext] + 1;
2757 /* If not, maybe it's still cheaper to do the second shift sloppy,
2758 and do a final sign extend? */
2759 else if (size <= 16)
2760 cost = ext_shift_insns[ext - insize] + 1
2761 + ext_shift_insns[size > ext ? size - ext : ext - size] + 1;
2762 else
2763 continue;
2764 if (cost < best_cost)
2766 kind = ext / (unsigned) 8 + 2;
2767 best_cost = cost;
2770 /* Check if we can sign extend in r0 */
2771 if (insize < 8)
2773 cost = 3 + shift_insns[left];
2774 if (cost < best_cost)
2776 kind = 6;
2777 best_cost = cost;
2779 /* Try the same with a final signed shift. */
2780 if (left < 31)
2782 cost = 3 + ext_shift_insns[left + 1] + 1;
2783 if (cost < best_cost)
2785 kind = 7;
2786 best_cost = cost;
2790 if (TARGET_SH3)
2792 /* Try to use a dynamic shift. */
2793 cost = shift_insns[32 - insize] + 1 + SH_DYNAMIC_SHIFT_COST;
2794 if (cost < best_cost)
2796 kind = 0;
2797 best_cost = cost;
2800 if (costp)
2801 *costp = cost;
2802 return kind;
2805 /* Function to be used in the length attribute of the instructions
2806 implementing this pattern. */
2809 shl_sext_length (rtx insn)
2811 rtx set_src, left_rtx, size_rtx;
2812 int cost;
2814 set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
2815 left_rtx = XEXP (XEXP (set_src, 0), 1);
2816 size_rtx = XEXP (set_src, 1);
2817 shl_sext_kind (left_rtx, size_rtx, &cost);
2818 return cost;
2821 /* Generate rtl for this pattern */
2824 gen_shl_sext (rtx dest, rtx left_rtx, rtx size_rtx, rtx source)
2826 int kind;
2827 int left, size, insize, cost;
2828 rtx operands[3];
2830 kind = shl_sext_kind (left_rtx, size_rtx, &cost);
2831 left = INTVAL (left_rtx);
2832 size = INTVAL (size_rtx);
2833 insize = size - left;
2834 switch (kind)
2836 case 1:
2837 case 2:
2838 case 3:
2839 case 4:
2841 int ext = kind & 1 ? 8 : 16;
2842 int shift2 = size - ext;
2844 /* Don't expand fine-grained when combining, because that will
2845 make the pattern fail. */
2846 if (! currently_expanding_to_rtl
2847 && ! reload_in_progress && ! reload_completed)
2849 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
2850 emit_insn (gen_movsi (dest, source));
2851 break;
2853 if (dest != source)
2854 emit_insn (gen_movsi (dest, source));
2855 operands[0] = dest;
2856 if (ext - insize)
2858 operands[2] = GEN_INT (ext - insize);
2859 gen_shifty_hi_op (ASHIFT, operands);
2861 emit_insn (kind & 1
2862 ? gen_extendqisi2 (dest, gen_lowpart (QImode, dest))
2863 : gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
2864 if (kind <= 2)
2866 if (shift2)
2868 operands[2] = GEN_INT (shift2);
2869 gen_shifty_op (ASHIFT, operands);
2872 else
2874 if (shift2 > 0)
2876 if (EXT_SHIFT_SIGNED (shift2))
2878 operands[2] = GEN_INT (shift2 + 1);
2879 gen_shifty_op (ASHIFT, operands);
2880 operands[2] = const1_rtx;
2881 gen_shifty_op (ASHIFTRT, operands);
2882 break;
2884 operands[2] = GEN_INT (shift2);
2885 gen_shifty_hi_op (ASHIFT, operands);
2887 else if (shift2)
2889 operands[2] = GEN_INT (-shift2);
2890 gen_shifty_hi_op (LSHIFTRT, operands);
2892 emit_insn (size <= 8
2893 ? gen_extendqisi2 (dest, gen_lowpart (QImode, dest))
2894 : gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
2896 break;
2898 case 5:
2900 int i = 16 - size;
2901 if (! currently_expanding_to_rtl
2902 && ! reload_in_progress && ! reload_completed)
2903 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
2904 else
2906 operands[0] = dest;
2907 operands[2] = GEN_INT (16 - insize);
2908 gen_shifty_hi_op (ASHIFT, operands);
2909 emit_insn (gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
2911 /* Don't use gen_ashrsi3 because it generates new pseudos. */
2912 while (--i >= 0)
2913 gen_ashift (ASHIFTRT, 1, dest);
2914 break;
2916 case 6:
2917 case 7:
2918 /* Don't expand fine-grained when combining, because that will
2919 make the pattern fail. */
2920 if (! currently_expanding_to_rtl
2921 && ! reload_in_progress && ! reload_completed)
2923 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
2924 emit_insn (gen_movsi (dest, source));
2925 break;
2927 emit_insn (gen_andsi3 (dest, source, GEN_INT ((1 << insize) - 1)));
2928 emit_insn (gen_xorsi3 (dest, dest, GEN_INT (1 << (insize - 1))));
2929 emit_insn (gen_addsi3 (dest, dest, GEN_INT (-1 << (insize - 1))));
2930 operands[0] = dest;
2931 operands[2] = kind == 7 ? GEN_INT (left + 1) : left_rtx;
2932 gen_shifty_op (ASHIFT, operands);
2933 if (kind == 7)
2934 emit_insn (gen_ashrsi3_k (dest, dest, const1_rtx));
2935 break;
2936 default:
2937 return -1;
2939 return 0;
2942 /* Prefix a symbol_ref name with "datalabel". */
2945 gen_datalabel_ref (rtx sym)
2947 const char *str;
2949 if (GET_CODE (sym) == LABEL_REF)
2950 return gen_rtx_CONST (GET_MODE (sym),
2951 gen_rtx_UNSPEC (GET_MODE (sym),
2952 gen_rtvec (1, sym),
2953 UNSPEC_DATALABEL));
2955 gcc_assert (GET_CODE (sym) == SYMBOL_REF);
2957 str = XSTR (sym, 0);
2958 /* Share all SYMBOL_REF strings with the same value - that is important
2959 for cse. */
2960 str = IDENTIFIER_POINTER (get_identifier (str));
2961 XSTR (sym, 0) = str;
2963 return sym;
2967 /* The SH cannot load a large constant into a register, constants have to
2968 come from a pc relative load. The reference of a pc relative load
2969 instruction must be less than 1k in front of the instruction. This
2970 means that we often have to dump a constant inside a function, and
2971 generate code to branch around it.
2973 It is important to minimize this, since the branches will slow things
2974 down and make things bigger.
2976 Worst case code looks like:
2978 mov.l L1,rn
2979 bra L2
2981 align
2982 L1: .long value
2986 mov.l L3,rn
2987 bra L4
2989 align
2990 L3: .long value
2994 We fix this by performing a scan before scheduling, which notices which
2995 instructions need to have their operands fetched from the constant table
2996 and builds the table.
2998 The algorithm is:
3000 scan, find an instruction which needs a pcrel move. Look forward, find the
3001 last barrier which is within MAX_COUNT bytes of the requirement.
3002 If there isn't one, make one. Process all the instructions between
3003 the find and the barrier.
3005 In the above example, we can tell that L3 is within 1k of L1, so
3006 the first move can be shrunk from the 3 insn+constant sequence into
3007 just 1 insn, and the constant moved to L3 to make:
3009 mov.l L1,rn
3011 mov.l L3,rn
3012 bra L4
3014 align
3015 L3:.long value
3016 L4:.long value
3018 Then the second move becomes the target for the shortening process. */
3020 typedef struct
3022 rtx value; /* Value in table. */
3023 rtx label; /* Label of value. */
3024 rtx wend; /* End of window. */
3025 enum machine_mode mode; /* Mode of value. */
3027 /* True if this constant is accessed as part of a post-increment
3028 sequence. Note that HImode constants are never accessed in this way. */
3029 bool part_of_sequence_p;
3030 } pool_node;
3032 /* The maximum number of constants that can fit into one pool, since
3033 constants in the range 0..510 are at least 2 bytes long, and in the
3034 range from there to 1018 at least 4 bytes. */
3036 #define MAX_POOL_SIZE 372
3037 static pool_node pool_vector[MAX_POOL_SIZE];
3038 static int pool_size;
3039 static rtx pool_window_label;
3040 static int pool_window_last;
3042 /* ??? If we need a constant in HImode which is the truncated value of a
3043 constant we need in SImode, we could combine the two entries thus saving
3044 two bytes. Is this common enough to be worth the effort of implementing
3045 it? */
3047 /* ??? This stuff should be done at the same time that we shorten branches.
3048 As it is now, we must assume that all branches are the maximum size, and
3049 this causes us to almost always output constant pools sooner than
3050 necessary. */
3052 /* Add a constant to the pool and return its label. */
3054 static rtx
3055 add_constant (rtx x, enum machine_mode mode, rtx last_value)
3057 int i;
3058 rtx lab, new, ref, newref;
3060 /* First see if we've already got it. */
3061 for (i = 0; i < pool_size; i++)
3063 if (x->code == pool_vector[i].value->code
3064 && mode == pool_vector[i].mode)
3066 if (x->code == CODE_LABEL)
3068 if (XINT (x, 3) != XINT (pool_vector[i].value, 3))
3069 continue;
3071 if (rtx_equal_p (x, pool_vector[i].value))
3073 lab = new = 0;
3074 if (! last_value
3075 || ! i
3076 || ! rtx_equal_p (last_value, pool_vector[i-1].value))
3078 new = gen_label_rtx ();
3079 LABEL_REFS (new) = pool_vector[i].label;
3080 pool_vector[i].label = lab = new;
3082 if (lab && pool_window_label)
3084 newref = gen_rtx_LABEL_REF (VOIDmode, pool_window_label);
3085 ref = pool_vector[pool_window_last].wend;
3086 LABEL_NEXTREF (newref) = ref;
3087 pool_vector[pool_window_last].wend = newref;
3089 if (new)
3090 pool_window_label = new;
3091 pool_window_last = i;
3092 return lab;
3097 /* Need a new one. */
3098 pool_vector[pool_size].value = x;
3099 if (last_value && rtx_equal_p (last_value, pool_vector[pool_size - 1].value))
3101 lab = 0;
3102 pool_vector[pool_size - 1].part_of_sequence_p = true;
3104 else
3105 lab = gen_label_rtx ();
3106 pool_vector[pool_size].mode = mode;
3107 pool_vector[pool_size].label = lab;
3108 pool_vector[pool_size].wend = NULL_RTX;
3109 pool_vector[pool_size].part_of_sequence_p = (lab == 0);
3110 if (lab && pool_window_label)
3112 newref = gen_rtx_LABEL_REF (VOIDmode, pool_window_label);
3113 ref = pool_vector[pool_window_last].wend;
3114 LABEL_NEXTREF (newref) = ref;
3115 pool_vector[pool_window_last].wend = newref;
3117 if (lab)
3118 pool_window_label = lab;
3119 pool_window_last = pool_size;
3120 pool_size++;
3121 return lab;
3124 /* Output the literal table. START, if nonzero, is the first instruction
3125 this table is needed for, and also indicates that there is at least one
3126 casesi_worker_2 instruction; We have to emit the operand3 labels from
3127 these insns at a 4-byte aligned position. BARRIER is the barrier
3128 after which we are to place the table. */
3130 static void
3131 dump_table (rtx start, rtx barrier)
3133 rtx scan = barrier;
3134 int i;
3135 int need_align = 1;
3136 rtx lab, ref;
3137 int have_df = 0;
3139 /* Do two passes, first time dump out the HI sized constants. */
3141 for (i = 0; i < pool_size; i++)
3143 pool_node *p = &pool_vector[i];
3145 if (p->mode == HImode)
3147 if (need_align)
3149 scan = emit_insn_after (gen_align_2 (), scan);
3150 need_align = 0;
3152 for (lab = p->label; lab; lab = LABEL_REFS (lab))
3153 scan = emit_label_after (lab, scan);
3154 scan = emit_insn_after (gen_consttable_2 (p->value, const0_rtx),
3155 scan);
3156 for (ref = p->wend; ref; ref = LABEL_NEXTREF (ref))
3158 lab = XEXP (ref, 0);
3159 scan = emit_insn_after (gen_consttable_window_end (lab), scan);
3162 else if (p->mode == DFmode)
3163 have_df = 1;
3166 need_align = 1;
3168 if (start)
3170 scan = emit_insn_after (gen_align_4 (), scan);
3171 need_align = 0;
3172 for (; start != barrier; start = NEXT_INSN (start))
3173 if (GET_CODE (start) == INSN
3174 && recog_memoized (start) == CODE_FOR_casesi_worker_2)
3176 rtx src = SET_SRC (XVECEXP (PATTERN (start), 0, 0));
3177 rtx lab = XEXP (XVECEXP (src, 0, 3), 0);
3179 scan = emit_label_after (lab, scan);
3182 if (TARGET_FMOVD && TARGET_ALIGN_DOUBLE && have_df)
3184 rtx align_insn = NULL_RTX;
3186 scan = emit_label_after (gen_label_rtx (), scan);
3187 scan = emit_insn_after (gen_align_log (GEN_INT (3)), scan);
3188 need_align = 0;
3190 for (i = 0; i < pool_size; i++)
3192 pool_node *p = &pool_vector[i];
3194 switch (p->mode)
3196 case HImode:
3197 break;
3198 case SImode:
3199 case SFmode:
3200 if (align_insn && !p->part_of_sequence_p)
3202 for (lab = p->label; lab; lab = LABEL_REFS (lab))
3203 emit_label_before (lab, align_insn);
3204 emit_insn_before (gen_consttable_4 (p->value, const0_rtx),
3205 align_insn);
3206 for (ref = p->wend; ref; ref = LABEL_NEXTREF (ref))
3208 lab = XEXP (ref, 0);
3209 emit_insn_before (gen_consttable_window_end (lab),
3210 align_insn);
3212 delete_insn (align_insn);
3213 align_insn = NULL_RTX;
3214 continue;
3216 else
3218 for (lab = p->label; lab; lab = LABEL_REFS (lab))
3219 scan = emit_label_after (lab, scan);
3220 scan = emit_insn_after (gen_consttable_4 (p->value,
3221 const0_rtx), scan);
3222 need_align = ! need_align;
3224 break;
3225 case DFmode:
3226 if (need_align)
3228 scan = emit_insn_after (gen_align_log (GEN_INT (3)), scan);
3229 align_insn = scan;
3230 need_align = 0;
3232 case DImode:
3233 for (lab = p->label; lab; lab = LABEL_REFS (lab))
3234 scan = emit_label_after (lab, scan);
3235 scan = emit_insn_after (gen_consttable_8 (p->value, const0_rtx),
3236 scan);
3237 break;
3238 default:
3239 gcc_unreachable ();
3242 if (p->mode != HImode)
3244 for (ref = p->wend; ref; ref = LABEL_NEXTREF (ref))
3246 lab = XEXP (ref, 0);
3247 scan = emit_insn_after (gen_consttable_window_end (lab),
3248 scan);
3253 pool_size = 0;
3256 for (i = 0; i < pool_size; i++)
3258 pool_node *p = &pool_vector[i];
3260 switch (p->mode)
3262 case HImode:
3263 break;
3264 case SImode:
3265 case SFmode:
3266 if (need_align)
3268 need_align = 0;
3269 scan = emit_label_after (gen_label_rtx (), scan);
3270 scan = emit_insn_after (gen_align_4 (), scan);
3272 for (lab = p->label; lab; lab = LABEL_REFS (lab))
3273 scan = emit_label_after (lab, scan);
3274 scan = emit_insn_after (gen_consttable_4 (p->value, const0_rtx),
3275 scan);
3276 break;
3277 case DFmode:
3278 case DImode:
3279 if (need_align)
3281 need_align = 0;
3282 scan = emit_label_after (gen_label_rtx (), scan);
3283 scan = emit_insn_after (gen_align_4 (), scan);
3285 for (lab = p->label; lab; lab = LABEL_REFS (lab))
3286 scan = emit_label_after (lab, scan);
3287 scan = emit_insn_after (gen_consttable_8 (p->value, const0_rtx),
3288 scan);
3289 break;
3290 default:
3291 gcc_unreachable ();
3294 if (p->mode != HImode)
3296 for (ref = p->wend; ref; ref = LABEL_NEXTREF (ref))
3298 lab = XEXP (ref, 0);
3299 scan = emit_insn_after (gen_consttable_window_end (lab), scan);
3304 scan = emit_insn_after (gen_consttable_end (), scan);
3305 scan = emit_barrier_after (scan);
3306 pool_size = 0;
3307 pool_window_label = NULL_RTX;
3308 pool_window_last = 0;
3311 /* Return nonzero if constant would be an ok source for a
3312 mov.w instead of a mov.l. */
3314 static int
3315 hi_const (rtx src)
3317 return (GET_CODE (src) == CONST_INT
3318 && INTVAL (src) >= -32768
3319 && INTVAL (src) <= 32767);
3322 /* Nonzero if the insn is a move instruction which needs to be fixed. */
3324 /* ??? For a DImode/DFmode moves, we don't need to fix it if each half of the
3325 CONST_DOUBLE input value is CONST_OK_FOR_I08. For a SFmode move, we don't
3326 need to fix it if the input value is CONST_OK_FOR_I08. */
3328 static int
3329 broken_move (rtx insn)
3331 if (GET_CODE (insn) == INSN)
3333 rtx pat = PATTERN (insn);
3334 if (GET_CODE (pat) == PARALLEL)
3335 pat = XVECEXP (pat, 0, 0);
3336 if (GET_CODE (pat) == SET
3337 /* We can load any 8 bit value if we don't care what the high
3338 order bits end up as. */
3339 && GET_MODE (SET_DEST (pat)) != QImode
3340 && (CONSTANT_P (SET_SRC (pat))
3341 /* Match mova_const. */
3342 || (GET_CODE (SET_SRC (pat)) == UNSPEC
3343 && XINT (SET_SRC (pat), 1) == UNSPEC_MOVA
3344 && GET_CODE (XVECEXP (SET_SRC (pat), 0, 0)) == CONST))
3345 && ! (TARGET_SH2E
3346 && GET_CODE (SET_SRC (pat)) == CONST_DOUBLE
3347 && (fp_zero_operand (SET_SRC (pat))
3348 || fp_one_operand (SET_SRC (pat)))
3349 /* ??? If this is a -m4 or -m4-single compilation, in general
3350 we don't know the current setting of fpscr, so disable fldi.
3351 There is an exception if this was a register-register move
3352 before reload - and hence it was ascertained that we have
3353 single precision setting - and in a post-reload optimization
3354 we changed this to do a constant load. In that case
3355 we don't have an r0 clobber, hence we must use fldi. */
3356 && (! TARGET_SH4 || TARGET_FMOVD
3357 || (GET_CODE (XEXP (XVECEXP (PATTERN (insn), 0, 2), 0))
3358 == SCRATCH))
3359 && GET_CODE (SET_DEST (pat)) == REG
3360 && FP_REGISTER_P (REGNO (SET_DEST (pat))))
3361 && ! (TARGET_SH2A
3362 && GET_MODE (SET_DEST (pat)) == SImode
3363 && GET_CODE (SET_SRC (pat)) == CONST_INT
3364 && CONST_OK_FOR_I20 (INTVAL (SET_SRC (pat))))
3365 && (GET_CODE (SET_SRC (pat)) != CONST_INT
3366 || ! CONST_OK_FOR_I08 (INTVAL (SET_SRC (pat)))))
3367 return 1;
3370 return 0;
3373 static int
3374 mova_p (rtx insn)
3376 return (GET_CODE (insn) == INSN
3377 && GET_CODE (PATTERN (insn)) == SET
3378 && GET_CODE (SET_SRC (PATTERN (insn))) == UNSPEC
3379 && XINT (SET_SRC (PATTERN (insn)), 1) == UNSPEC_MOVA
3380 /* Don't match mova_const. */
3381 && GET_CODE (XVECEXP (SET_SRC (PATTERN (insn)), 0, 0)) == LABEL_REF);
3384 /* Fix up a mova from a switch that went out of range. */
3385 static void
3386 fixup_mova (rtx mova)
3388 if (! flag_pic)
3390 SET_SRC (PATTERN (mova)) = XVECEXP (SET_SRC (PATTERN (mova)), 0, 0);
3391 INSN_CODE (mova) = -1;
3393 else
3395 rtx worker = mova;
3396 rtx lab = gen_label_rtx ();
3397 rtx wpat, wpat0, wpat1, wsrc, diff;
3401 worker = NEXT_INSN (worker);
3402 gcc_assert (worker
3403 && GET_CODE (worker) != CODE_LABEL
3404 && GET_CODE (worker) != JUMP_INSN);
3405 } while (GET_CODE (worker) == NOTE
3406 || recog_memoized (worker) != CODE_FOR_casesi_worker_1);
3407 wpat = PATTERN (worker);
3408 wpat0 = XVECEXP (wpat, 0, 0);
3409 wpat1 = XVECEXP (wpat, 0, 1);
3410 wsrc = SET_SRC (wpat0);
3411 PATTERN (worker) = (gen_casesi_worker_2
3412 (SET_DEST (wpat0), XVECEXP (wsrc, 0, 1),
3413 XEXP (XVECEXP (wsrc, 0, 2), 0), lab,
3414 XEXP (wpat1, 0)));
3415 INSN_CODE (worker) = -1;
3416 diff = gen_rtx_MINUS (Pmode, XVECEXP (SET_SRC (PATTERN (mova)), 0, 0),
3417 gen_rtx_LABEL_REF (Pmode, lab));
3418 diff = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, diff), UNSPEC_PIC);
3419 SET_SRC (PATTERN (mova)) = gen_rtx_CONST (Pmode, diff);
3420 INSN_CODE (mova) = -1;
3424 /* Find the last barrier from insn FROM which is close enough to hold the
3425 constant pool. If we can't find one, then create one near the end of
3426 the range. */
3428 static rtx
3429 find_barrier (int num_mova, rtx mova, rtx from)
3431 int count_si = 0;
3432 int count_hi = 0;
3433 int found_hi = 0;
3434 int found_si = 0;
3435 int found_di = 0;
3436 int hi_align = 2;
3437 int si_align = 2;
3438 int leading_mova = num_mova;
3439 rtx barrier_before_mova = 0, found_barrier = 0, good_barrier = 0;
3440 int si_limit;
3441 int hi_limit;
3443 /* For HImode: range is 510, add 4 because pc counts from address of
3444 second instruction after this one, subtract 2 for the jump instruction
3445 that we may need to emit before the table, subtract 2 for the instruction
3446 that fills the jump delay slot (in very rare cases, reorg will take an
3447 instruction from after the constant pool or will leave the delay slot
3448 empty). This gives 510.
3449 For SImode: range is 1020, add 4 because pc counts from address of
3450 second instruction after this one, subtract 2 in case pc is 2 byte
3451 aligned, subtract 2 for the jump instruction that we may need to emit
3452 before the table, subtract 2 for the instruction that fills the jump
3453 delay slot. This gives 1018. */
3455 /* The branch will always be shortened now that the reference address for
3456 forward branches is the successor address, thus we need no longer make
3457 adjustments to the [sh]i_limit for -O0. */
3459 si_limit = 1018;
3460 hi_limit = 510;
3462 while (from && count_si < si_limit && count_hi < hi_limit)
3464 int inc = get_attr_length (from);
3465 int new_align = 1;
3467 if (GET_CODE (from) == CODE_LABEL)
3469 if (optimize)
3470 new_align = 1 << label_to_alignment (from);
3471 else if (GET_CODE (prev_nonnote_insn (from)) == BARRIER)
3472 new_align = 1 << barrier_align (from);
3473 else
3474 new_align = 1;
3475 inc = 0;
3478 if (GET_CODE (from) == BARRIER)
3481 found_barrier = from;
3483 /* If we are at the end of the function, or in front of an alignment
3484 instruction, we need not insert an extra alignment. We prefer
3485 this kind of barrier. */
3486 if (barrier_align (from) > 2)
3487 good_barrier = from;
3490 if (broken_move (from))
3492 rtx pat, src, dst;
3493 enum machine_mode mode;
3495 pat = PATTERN (from);
3496 if (GET_CODE (pat) == PARALLEL)
3497 pat = XVECEXP (pat, 0, 0);
3498 src = SET_SRC (pat);
3499 dst = SET_DEST (pat);
3500 mode = GET_MODE (dst);
3502 /* We must explicitly check the mode, because sometimes the
3503 front end will generate code to load unsigned constants into
3504 HImode targets without properly sign extending them. */
3505 if (mode == HImode
3506 || (mode == SImode && hi_const (src) && REGNO (dst) != FPUL_REG))
3508 found_hi += 2;
3509 /* We put the short constants before the long constants, so
3510 we must count the length of short constants in the range
3511 for the long constants. */
3512 /* ??? This isn't optimal, but is easy to do. */
3513 si_limit -= 2;
3515 else
3517 /* We dump DF/DI constants before SF/SI ones, because
3518 the limit is the same, but the alignment requirements
3519 are higher. We may waste up to 4 additional bytes
3520 for alignment, and the DF/DI constant may have
3521 another SF/SI constant placed before it. */
3522 if (TARGET_SHCOMPACT
3523 && ! found_di
3524 && (mode == DFmode || mode == DImode))
3526 found_di = 1;
3527 si_limit -= 8;
3529 while (si_align > 2 && found_si + si_align - 2 > count_si)
3530 si_align >>= 1;
3531 if (found_si > count_si)
3532 count_si = found_si;
3533 found_si += GET_MODE_SIZE (mode);
3534 if (num_mova)
3535 si_limit -= GET_MODE_SIZE (mode);
3539 if (mova_p (from))
3541 if (! num_mova++)
3543 leading_mova = 0;
3544 mova = from;
3545 barrier_before_mova = good_barrier ? good_barrier : found_barrier;
3547 if (found_si > count_si)
3548 count_si = found_si;
3550 else if (GET_CODE (from) == JUMP_INSN
3551 && (GET_CODE (PATTERN (from)) == ADDR_VEC
3552 || GET_CODE (PATTERN (from)) == ADDR_DIFF_VEC))
3554 if (num_mova)
3555 num_mova--;
3556 if (barrier_align (next_real_insn (from)) == align_jumps_log)
3558 /* We have just passed the barrier in front of the
3559 ADDR_DIFF_VEC, which is stored in found_barrier. Since
3560 the ADDR_DIFF_VEC is accessed as data, just like our pool
3561 constants, this is a good opportunity to accommodate what
3562 we have gathered so far.
3563 If we waited any longer, we could end up at a barrier in
3564 front of code, which gives worse cache usage for separated
3565 instruction / data caches. */
3566 good_barrier = found_barrier;
3567 break;
3569 else
3571 rtx body = PATTERN (from);
3572 inc = XVECLEN (body, 1) * GET_MODE_SIZE (GET_MODE (body));
3575 /* For the SH1, we generate alignments even after jumps-around-jumps. */
3576 else if (GET_CODE (from) == JUMP_INSN
3577 && ! TARGET_SH2
3578 && ! TARGET_SMALLCODE)
3579 new_align = 4;
3581 if (found_si)
3583 count_si += inc;
3584 if (new_align > si_align)
3586 si_limit -= (count_si - 1) & (new_align - si_align);
3587 si_align = new_align;
3589 count_si = (count_si + new_align - 1) & -new_align;
3591 if (found_hi)
3593 count_hi += inc;
3594 if (new_align > hi_align)
3596 hi_limit -= (count_hi - 1) & (new_align - hi_align);
3597 hi_align = new_align;
3599 count_hi = (count_hi + new_align - 1) & -new_align;
3601 from = NEXT_INSN (from);
3604 if (num_mova)
3606 if (leading_mova)
3608 /* Try as we might, the leading mova is out of range. Change
3609 it into a load (which will become a pcload) and retry. */
3610 fixup_mova (mova);
3611 return find_barrier (0, 0, mova);
3613 else
3615 /* Insert the constant pool table before the mova instruction,
3616 to prevent the mova label reference from going out of range. */
3617 from = mova;
3618 good_barrier = found_barrier = barrier_before_mova;
3622 if (found_barrier)
3624 if (good_barrier && next_real_insn (found_barrier))
3625 found_barrier = good_barrier;
3627 else
3629 /* We didn't find a barrier in time to dump our stuff,
3630 so we'll make one. */
3631 rtx label = gen_label_rtx ();
3633 /* If we exceeded the range, then we must back up over the last
3634 instruction we looked at. Otherwise, we just need to undo the
3635 NEXT_INSN at the end of the loop. */
3636 if (count_hi > hi_limit || count_si > si_limit)
3637 from = PREV_INSN (PREV_INSN (from));
3638 else
3639 from = PREV_INSN (from);
3641 /* Walk back to be just before any jump or label.
3642 Putting it before a label reduces the number of times the branch
3643 around the constant pool table will be hit. Putting it before
3644 a jump makes it more likely that the bra delay slot will be
3645 filled. */
3646 while (GET_CODE (from) == JUMP_INSN || GET_CODE (from) == NOTE
3647 || GET_CODE (from) == CODE_LABEL)
3648 from = PREV_INSN (from);
3650 from = emit_jump_insn_after (gen_jump (label), from);
3651 JUMP_LABEL (from) = label;
3652 LABEL_NUSES (label) = 1;
3653 found_barrier = emit_barrier_after (from);
3654 emit_label_after (label, found_barrier);
3657 return found_barrier;
3660 /* If the instruction INSN is implemented by a special function, and we can
3661 positively find the register that is used to call the sfunc, and this
3662 register is not used anywhere else in this instruction - except as the
3663 destination of a set, return this register; else, return 0. */
3665 sfunc_uses_reg (rtx insn)
3667 int i;
3668 rtx pattern, part, reg_part, reg;
3670 if (GET_CODE (insn) != INSN)
3671 return 0;
3672 pattern = PATTERN (insn);
3673 if (GET_CODE (pattern) != PARALLEL || get_attr_type (insn) != TYPE_SFUNC)
3674 return 0;
3676 for (reg_part = 0, i = XVECLEN (pattern, 0) - 1; i >= 1; i--)
3678 part = XVECEXP (pattern, 0, i);
3679 if (GET_CODE (part) == USE && GET_MODE (XEXP (part, 0)) == SImode)
3680 reg_part = part;
3682 if (! reg_part)
3683 return 0;
3684 reg = XEXP (reg_part, 0);
3685 for (i = XVECLEN (pattern, 0) - 1; i >= 0; i--)
3687 part = XVECEXP (pattern, 0, i);
3688 if (part == reg_part || GET_CODE (part) == CLOBBER)
3689 continue;
3690 if (reg_mentioned_p (reg, ((GET_CODE (part) == SET
3691 && GET_CODE (SET_DEST (part)) == REG)
3692 ? SET_SRC (part) : part)))
3693 return 0;
3695 return reg;
3698 /* See if the only way in which INSN uses REG is by calling it, or by
3699 setting it while calling it. Set *SET to a SET rtx if the register
3700 is set by INSN. */
3702 static int
3703 noncall_uses_reg (rtx reg, rtx insn, rtx *set)
3705 rtx pattern, reg2;
3707 *set = NULL_RTX;
3709 reg2 = sfunc_uses_reg (insn);
3710 if (reg2 && REGNO (reg2) == REGNO (reg))
3712 pattern = single_set (insn);
3713 if (pattern
3714 && GET_CODE (SET_DEST (pattern)) == REG
3715 && REGNO (reg) == REGNO (SET_DEST (pattern)))
3716 *set = pattern;
3717 return 0;
3719 if (GET_CODE (insn) != CALL_INSN)
3721 /* We don't use rtx_equal_p because we don't care if the mode is
3722 different. */
3723 pattern = single_set (insn);
3724 if (pattern
3725 && GET_CODE (SET_DEST (pattern)) == REG
3726 && REGNO (reg) == REGNO (SET_DEST (pattern)))
3728 rtx par, part;
3729 int i;
3731 *set = pattern;
3732 par = PATTERN (insn);
3733 if (GET_CODE (par) == PARALLEL)
3734 for (i = XVECLEN (par, 0) - 1; i >= 0; i--)
3736 part = XVECEXP (par, 0, i);
3737 if (GET_CODE (part) != SET && reg_mentioned_p (reg, part))
3738 return 1;
3740 return reg_mentioned_p (reg, SET_SRC (pattern));
3743 return 1;
3746 pattern = PATTERN (insn);
3748 if (GET_CODE (pattern) == PARALLEL)
3750 int i;
3752 for (i = XVECLEN (pattern, 0) - 1; i >= 1; i--)
3753 if (reg_mentioned_p (reg, XVECEXP (pattern, 0, i)))
3754 return 1;
3755 pattern = XVECEXP (pattern, 0, 0);
3758 if (GET_CODE (pattern) == SET)
3760 if (reg_mentioned_p (reg, SET_DEST (pattern)))
3762 /* We don't use rtx_equal_p, because we don't care if the
3763 mode is different. */
3764 if (GET_CODE (SET_DEST (pattern)) != REG
3765 || REGNO (reg) != REGNO (SET_DEST (pattern)))
3766 return 1;
3768 *set = pattern;
3771 pattern = SET_SRC (pattern);
3774 if (GET_CODE (pattern) != CALL
3775 || GET_CODE (XEXP (pattern, 0)) != MEM
3776 || ! rtx_equal_p (reg, XEXP (XEXP (pattern, 0), 0)))
3777 return 1;
3779 return 0;
3782 /* Given a X, a pattern of an insn or a part of it, return a mask of used
3783 general registers. Bits 0..15 mean that the respective registers
3784 are used as inputs in the instruction. Bits 16..31 mean that the
3785 registers 0..15, respectively, are used as outputs, or are clobbered.
3786 IS_DEST should be set to 16 if X is the destination of a SET, else to 0. */
3788 regs_used (rtx x, int is_dest)
3790 enum rtx_code code;
3791 const char *fmt;
3792 int i, used = 0;
3794 if (! x)
3795 return used;
3796 code = GET_CODE (x);
3797 switch (code)
3799 case REG:
3800 if (REGNO (x) < 16)
3801 return (((1 << HARD_REGNO_NREGS (0, GET_MODE (x))) - 1)
3802 << (REGNO (x) + is_dest));
3803 return 0;
3804 case SUBREG:
3806 rtx y = SUBREG_REG (x);
3808 if (GET_CODE (y) != REG)
3809 break;
3810 if (REGNO (y) < 16)
3811 return (((1 << HARD_REGNO_NREGS (0, GET_MODE (x))) - 1)
3812 << (REGNO (y) +
3813 subreg_regno_offset (REGNO (y),
3814 GET_MODE (y),
3815 SUBREG_BYTE (x),
3816 GET_MODE (x)) + is_dest));
3817 return 0;
3819 case SET:
3820 return regs_used (SET_SRC (x), 0) | regs_used (SET_DEST (x), 16);
3821 case RETURN:
3822 /* If there was a return value, it must have been indicated with USE. */
3823 return 0x00ffff00;
3824 case CLOBBER:
3825 is_dest = 1;
3826 break;
3827 case MEM:
3828 is_dest = 0;
3829 break;
3830 case CALL:
3831 used |= 0x00ff00f0;
3832 break;
3833 default:
3834 break;
3837 fmt = GET_RTX_FORMAT (code);
3839 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
3841 if (fmt[i] == 'E')
3843 register int j;
3844 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
3845 used |= regs_used (XVECEXP (x, i, j), is_dest);
3847 else if (fmt[i] == 'e')
3848 used |= regs_used (XEXP (x, i), is_dest);
3850 return used;
3853 /* Create an instruction that prevents redirection of a conditional branch
3854 to the destination of the JUMP with address ADDR.
3855 If the branch needs to be implemented as an indirect jump, try to find
3856 a scratch register for it.
3857 If NEED_BLOCK is 0, don't do anything unless we need a scratch register.
3858 If any preceding insn that doesn't fit into a delay slot is good enough,
3859 pass 1. Pass 2 if a definite blocking insn is needed.
3860 -1 is used internally to avoid deep recursion.
3861 If a blocking instruction is made or recognized, return it. */
3863 static rtx
3864 gen_block_redirect (rtx jump, int addr, int need_block)
3866 int dead = 0;
3867 rtx prev = prev_nonnote_insn (jump);
3868 rtx dest;
3870 /* First, check if we already have an instruction that satisfies our need. */
3871 if (prev && GET_CODE (prev) == INSN && ! INSN_DELETED_P (prev))
3873 if (INSN_CODE (prev) == CODE_FOR_indirect_jump_scratch)
3874 return prev;
3875 if (GET_CODE (PATTERN (prev)) == USE
3876 || GET_CODE (PATTERN (prev)) == CLOBBER
3877 || get_attr_in_delay_slot (prev) == IN_DELAY_SLOT_YES)
3878 prev = jump;
3879 else if ((need_block &= ~1) < 0)
3880 return prev;
3881 else if (recog_memoized (prev) == CODE_FOR_block_branch_redirect)
3882 need_block = 0;
3884 if (GET_CODE (PATTERN (jump)) == RETURN)
3886 if (! need_block)
3887 return prev;
3888 /* Reorg even does nasty things with return insns that cause branches
3889 to go out of range - see find_end_label and callers. */
3890 return emit_insn_before (gen_block_branch_redirect (const0_rtx) , jump);
3892 /* We can't use JUMP_LABEL here because it might be undefined
3893 when not optimizing. */
3894 dest = XEXP (SET_SRC (PATTERN (jump)), 0);
3895 /* If the branch is out of range, try to find a scratch register for it. */
3896 if (optimize
3897 && (INSN_ADDRESSES (INSN_UID (dest)) - addr + (unsigned) 4092
3898 > 4092 + 4098))
3900 rtx scan;
3901 /* Don't look for the stack pointer as a scratch register,
3902 it would cause trouble if an interrupt occurred. */
3903 unsigned try = 0x7fff, used;
3904 int jump_left = flag_expensive_optimizations + 1;
3906 /* It is likely that the most recent eligible instruction is wanted for
3907 the delay slot. Therefore, find out which registers it uses, and
3908 try to avoid using them. */
3910 for (scan = jump; (scan = PREV_INSN (scan)); )
3912 enum rtx_code code;
3914 if (INSN_DELETED_P (scan))
3915 continue;
3916 code = GET_CODE (scan);
3917 if (code == CODE_LABEL || code == JUMP_INSN)
3918 break;
3919 if (code == INSN
3920 && GET_CODE (PATTERN (scan)) != USE
3921 && GET_CODE (PATTERN (scan)) != CLOBBER
3922 && get_attr_in_delay_slot (scan) == IN_DELAY_SLOT_YES)
3924 try &= ~regs_used (PATTERN (scan), 0);
3925 break;
3928 for (used = dead = 0, scan = JUMP_LABEL (jump);
3929 (scan = NEXT_INSN (scan)); )
3931 enum rtx_code code;
3933 if (INSN_DELETED_P (scan))
3934 continue;
3935 code = GET_CODE (scan);
3936 if (INSN_P (scan))
3938 used |= regs_used (PATTERN (scan), 0);
3939 if (code == CALL_INSN)
3940 used |= regs_used (CALL_INSN_FUNCTION_USAGE (scan), 0);
3941 dead |= (used >> 16) & ~used;
3942 if (dead & try)
3944 dead &= try;
3945 break;
3947 if (code == JUMP_INSN)
3949 if (jump_left-- && simplejump_p (scan))
3950 scan = JUMP_LABEL (scan);
3951 else
3952 break;
3956 /* Mask out the stack pointer again, in case it was
3957 the only 'free' register we have found. */
3958 dead &= 0x7fff;
3960 /* If the immediate destination is still in range, check for possible
3961 threading with a jump beyond the delay slot insn.
3962 Don't check if we are called recursively; the jump has been or will be
3963 checked in a different invocation then. */
3965 else if (optimize && need_block >= 0)
3967 rtx next = next_active_insn (next_active_insn (dest));
3968 if (next && GET_CODE (next) == JUMP_INSN
3969 && GET_CODE (PATTERN (next)) == SET
3970 && recog_memoized (next) == CODE_FOR_jump_compact)
3972 dest = JUMP_LABEL (next);
3973 if (dest
3974 && (INSN_ADDRESSES (INSN_UID (dest)) - addr + (unsigned) 4092
3975 > 4092 + 4098))
3976 gen_block_redirect (next, INSN_ADDRESSES (INSN_UID (next)), -1);
3980 if (dead)
3982 rtx reg = gen_rtx_REG (SImode, exact_log2 (dead & -dead));
3984 /* It would be nice if we could convert the jump into an indirect
3985 jump / far branch right now, and thus exposing all constituent
3986 instructions to further optimization. However, reorg uses
3987 simplejump_p to determine if there is an unconditional jump where
3988 it should try to schedule instructions from the target of the
3989 branch; simplejump_p fails for indirect jumps even if they have
3990 a JUMP_LABEL. */
3991 rtx insn = emit_insn_before (gen_indirect_jump_scratch
3992 (reg, GEN_INT (INSN_UID (JUMP_LABEL (jump))))
3993 , jump);
3994 /* ??? We would like this to have the scope of the jump, but that
3995 scope will change when a delay slot insn of an inner scope is added.
3996 Hence, after delay slot scheduling, we'll have to expect
3997 NOTE_INSN_BLOCK_END notes between the indirect_jump_scratch and
3998 the jump. */
4000 INSN_LOCATOR (insn) = INSN_LOCATOR (jump);
4001 INSN_CODE (insn) = CODE_FOR_indirect_jump_scratch;
4002 return insn;
4004 else if (need_block)
4005 /* We can't use JUMP_LABEL here because it might be undefined
4006 when not optimizing. */
4007 return emit_insn_before (gen_block_branch_redirect
4008 (GEN_INT (INSN_UID (XEXP (SET_SRC (PATTERN (jump)), 0))))
4009 , jump);
4010 return prev;
4013 #define CONDJUMP_MIN -252
4014 #define CONDJUMP_MAX 262
4015 struct far_branch
4017 /* A label (to be placed) in front of the jump
4018 that jumps to our ultimate destination. */
4019 rtx near_label;
4020 /* Where we are going to insert it if we cannot move the jump any farther,
4021 or the jump itself if we have picked up an existing jump. */
4022 rtx insert_place;
4023 /* The ultimate destination. */
4024 rtx far_label;
4025 struct far_branch *prev;
4026 /* If the branch has already been created, its address;
4027 else the address of its first prospective user. */
4028 int address;
4031 static void gen_far_branch (struct far_branch *);
4032 enum mdep_reorg_phase_e mdep_reorg_phase;
4033 static void
4034 gen_far_branch (struct far_branch *bp)
4036 rtx insn = bp->insert_place;
4037 rtx jump;
4038 rtx label = gen_label_rtx ();
4039 int ok;
4041 emit_label_after (label, insn);
4042 if (bp->far_label)
4044 jump = emit_jump_insn_after (gen_jump (bp->far_label), insn);
4045 LABEL_NUSES (bp->far_label)++;
4047 else
4048 jump = emit_jump_insn_after (gen_return (), insn);
4049 /* Emit a barrier so that reorg knows that any following instructions
4050 are not reachable via a fall-through path.
4051 But don't do this when not optimizing, since we wouldn't suppress the
4052 alignment for the barrier then, and could end up with out-of-range
4053 pc-relative loads. */
4054 if (optimize)
4055 emit_barrier_after (jump);
4056 emit_label_after (bp->near_label, insn);
4057 JUMP_LABEL (jump) = bp->far_label;
4058 ok = invert_jump (insn, label, 1);
4059 gcc_assert (ok);
4061 /* If we are branching around a jump (rather than a return), prevent
4062 reorg from using an insn from the jump target as the delay slot insn -
4063 when reorg did this, it pessimized code (we rather hide the delay slot)
4064 and it could cause branches to go out of range. */
4065 if (bp->far_label)
4066 (emit_insn_after
4067 (gen_stuff_delay_slot
4068 (GEN_INT (INSN_UID (XEXP (SET_SRC (PATTERN (jump)), 0))),
4069 GEN_INT (recog_memoized (insn) == CODE_FOR_branch_false)),
4070 insn));
4071 /* Prevent reorg from undoing our splits. */
4072 gen_block_redirect (jump, bp->address += 2, 2);
4075 /* Fix up ADDR_DIFF_VECs. */
4076 void
4077 fixup_addr_diff_vecs (rtx first)
4079 rtx insn;
4081 for (insn = first; insn; insn = NEXT_INSN (insn))
4083 rtx vec_lab, pat, prev, prevpat, x, braf_label;
4085 if (GET_CODE (insn) != JUMP_INSN
4086 || GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
4087 continue;
4088 pat = PATTERN (insn);
4089 vec_lab = XEXP (XEXP (pat, 0), 0);
4091 /* Search the matching casesi_jump_2. */
4092 for (prev = vec_lab; ; prev = PREV_INSN (prev))
4094 if (GET_CODE (prev) != JUMP_INSN)
4095 continue;
4096 prevpat = PATTERN (prev);
4097 if (GET_CODE (prevpat) != PARALLEL || XVECLEN (prevpat, 0) != 2)
4098 continue;
4099 x = XVECEXP (prevpat, 0, 1);
4100 if (GET_CODE (x) != USE)
4101 continue;
4102 x = XEXP (x, 0);
4103 if (GET_CODE (x) == LABEL_REF && XEXP (x, 0) == vec_lab)
4104 break;
4106 /* FIXME: This is a bug in the optimizer, but it seems harmless
4107 to just avoid panicing. */
4108 if (!prev)
4109 continue;
4111 /* Emit the reference label of the braf where it belongs, right after
4112 the casesi_jump_2 (i.e. braf). */
4113 braf_label = XEXP (XEXP (SET_SRC (XVECEXP (prevpat, 0, 0)), 1), 0);
4114 emit_label_after (braf_label, prev);
4116 /* Fix up the ADDR_DIF_VEC to be relative
4117 to the reference address of the braf. */
4118 XEXP (XEXP (pat, 0), 0) = braf_label;
4122 /* BARRIER_OR_LABEL is either a BARRIER or a CODE_LABEL immediately following
4123 a barrier. Return the base 2 logarithm of the desired alignment. */
4125 barrier_align (rtx barrier_or_label)
4127 rtx next = next_real_insn (barrier_or_label), pat, prev;
4128 int slot, credit, jump_to_next = 0;
4130 if (! next)
4131 return 0;
4133 pat = PATTERN (next);
4135 if (GET_CODE (pat) == ADDR_DIFF_VEC)
4136 return 2;
4138 if (GET_CODE (pat) == UNSPEC_VOLATILE && XINT (pat, 1) == UNSPECV_ALIGN)
4139 /* This is a barrier in front of a constant table. */
4140 return 0;
4142 prev = prev_real_insn (barrier_or_label);
4143 if (GET_CODE (PATTERN (prev)) == ADDR_DIFF_VEC)
4145 pat = PATTERN (prev);
4146 /* If this is a very small table, we want to keep the alignment after
4147 the table to the minimum for proper code alignment. */
4148 return ((TARGET_SMALLCODE
4149 || ((unsigned) XVECLEN (pat, 1) * GET_MODE_SIZE (GET_MODE (pat))
4150 <= (unsigned) 1 << (CACHE_LOG - 2)))
4151 ? 1 << TARGET_SHMEDIA : align_jumps_log);
4154 if (TARGET_SMALLCODE)
4155 return 0;
4157 if (! TARGET_SH2 || ! optimize)
4158 return align_jumps_log;
4160 /* When fixing up pcloads, a constant table might be inserted just before
4161 the basic block that ends with the barrier. Thus, we can't trust the
4162 instruction lengths before that. */
4163 if (mdep_reorg_phase > SH_FIXUP_PCLOAD)
4165 /* Check if there is an immediately preceding branch to the insn beyond
4166 the barrier. We must weight the cost of discarding useful information
4167 from the current cache line when executing this branch and there is
4168 an alignment, against that of fetching unneeded insn in front of the
4169 branch target when there is no alignment. */
4171 /* There are two delay_slot cases to consider. One is the simple case
4172 where the preceding branch is to the insn beyond the barrier (simple
4173 delay slot filling), and the other is where the preceding branch has
4174 a delay slot that is a duplicate of the insn after the barrier
4175 (fill_eager_delay_slots) and the branch is to the insn after the insn
4176 after the barrier. */
4178 /* PREV is presumed to be the JUMP_INSN for the barrier under
4179 investigation. Skip to the insn before it. */
4180 prev = prev_real_insn (prev);
4182 for (slot = 2, credit = (1 << (CACHE_LOG - 2)) + 2;
4183 credit >= 0 && prev && GET_CODE (prev) == INSN;
4184 prev = prev_real_insn (prev))
4186 jump_to_next = 0;
4187 if (GET_CODE (PATTERN (prev)) == USE
4188 || GET_CODE (PATTERN (prev)) == CLOBBER)
4189 continue;
4190 if (GET_CODE (PATTERN (prev)) == SEQUENCE)
4192 prev = XVECEXP (PATTERN (prev), 0, 1);
4193 if (INSN_UID (prev) == INSN_UID (next))
4195 /* Delay slot was filled with insn at jump target. */
4196 jump_to_next = 1;
4197 continue;
4201 if (slot &&
4202 get_attr_in_delay_slot (prev) == IN_DELAY_SLOT_YES)
4203 slot = 0;
4204 credit -= get_attr_length (prev);
4206 if (prev
4207 && GET_CODE (prev) == JUMP_INSN
4208 && JUMP_LABEL (prev))
4210 rtx x;
4211 if (jump_to_next
4212 || next_real_insn (JUMP_LABEL (prev)) == next
4213 /* If relax_delay_slots() decides NEXT was redundant
4214 with some previous instruction, it will have
4215 redirected PREV's jump to the following insn. */
4216 || JUMP_LABEL (prev) == next_nonnote_insn (next)
4217 /* There is no upper bound on redundant instructions
4218 that might have been skipped, but we must not put an
4219 alignment where none had been before. */
4220 || (x = (NEXT_INSN (NEXT_INSN (PREV_INSN (prev)))),
4221 (INSN_P (x)
4222 && (INSN_CODE (x) == CODE_FOR_block_branch_redirect
4223 || INSN_CODE (x) == CODE_FOR_indirect_jump_scratch
4224 || INSN_CODE (x) == CODE_FOR_stuff_delay_slot))))
4226 rtx pat = PATTERN (prev);
4227 if (GET_CODE (pat) == PARALLEL)
4228 pat = XVECEXP (pat, 0, 0);
4229 if (credit - slot >= (GET_CODE (SET_SRC (pat)) == PC ? 2 : 0))
4230 return 0;
4235 return align_jumps_log;
4238 /* If we are inside a phony loop, almost any kind of label can turn up as the
4239 first one in the loop. Aligning a braf label causes incorrect switch
4240 destination addresses; we can detect braf labels because they are
4241 followed by a BARRIER.
4242 Applying loop alignment to small constant or switch tables is a waste
4243 of space, so we suppress this too. */
4245 sh_loop_align (rtx label)
4247 rtx next = label;
4250 next = next_nonnote_insn (next);
4251 while (next && GET_CODE (next) == CODE_LABEL);
4253 if (! next
4254 || ! INSN_P (next)
4255 || GET_CODE (PATTERN (next)) == ADDR_DIFF_VEC
4256 || recog_memoized (next) == CODE_FOR_consttable_2)
4257 return 0;
4259 return align_loops_log;
4262 /* Do a final pass over the function, just before delayed branch
4263 scheduling. */
4265 static void
4266 sh_reorg (void)
4268 rtx first, insn, mova = NULL_RTX;
4269 int num_mova;
4270 rtx r0_rtx = gen_rtx_REG (Pmode, 0);
4271 rtx r0_inc_rtx = gen_rtx_POST_INC (Pmode, r0_rtx);
4273 first = get_insns ();
4275 /* We must split call insns before introducing `mova's. If we're
4276 optimizing, they'll have already been split. Otherwise, make
4277 sure we don't split them too late. */
4278 if (! optimize)
4279 split_all_insns_noflow ();
4281 if (TARGET_SHMEDIA)
4282 return;
4284 /* If relaxing, generate pseudo-ops to associate function calls with
4285 the symbols they call. It does no harm to not generate these
4286 pseudo-ops. However, when we can generate them, it enables to
4287 linker to potentially relax the jsr to a bsr, and eliminate the
4288 register load and, possibly, the constant pool entry. */
4290 mdep_reorg_phase = SH_INSERT_USES_LABELS;
4291 if (TARGET_RELAX)
4293 /* Remove all REG_LABEL notes. We want to use them for our own
4294 purposes. This works because none of the remaining passes
4295 need to look at them.
4297 ??? But it may break in the future. We should use a machine
4298 dependent REG_NOTE, or some other approach entirely. */
4299 for (insn = first; insn; insn = NEXT_INSN (insn))
4301 if (INSN_P (insn))
4303 rtx note;
4305 while ((note = find_reg_note (insn, REG_LABEL, NULL_RTX)) != 0)
4306 remove_note (insn, note);
4310 for (insn = first; insn; insn = NEXT_INSN (insn))
4312 rtx pattern, reg, link, set, scan, dies, label;
4313 int rescan = 0, foundinsn = 0;
4315 if (GET_CODE (insn) == CALL_INSN)
4317 pattern = PATTERN (insn);
4319 if (GET_CODE (pattern) == PARALLEL)
4320 pattern = XVECEXP (pattern, 0, 0);
4321 if (GET_CODE (pattern) == SET)
4322 pattern = SET_SRC (pattern);
4324 if (GET_CODE (pattern) != CALL
4325 || GET_CODE (XEXP (pattern, 0)) != MEM)
4326 continue;
4328 reg = XEXP (XEXP (pattern, 0), 0);
4330 else
4332 reg = sfunc_uses_reg (insn);
4333 if (! reg)
4334 continue;
4337 if (GET_CODE (reg) != REG)
4338 continue;
4340 /* This is a function call via REG. If the only uses of REG
4341 between the time that it is set and the time that it dies
4342 are in function calls, then we can associate all the
4343 function calls with the setting of REG. */
4345 for (link = LOG_LINKS (insn); link; link = XEXP (link, 1))
4347 if (REG_NOTE_KIND (link) != 0)
4348 continue;
4349 set = single_set (XEXP (link, 0));
4350 if (set && rtx_equal_p (reg, SET_DEST (set)))
4352 link = XEXP (link, 0);
4353 break;
4357 if (! link)
4359 /* ??? Sometimes global register allocation will have
4360 deleted the insn pointed to by LOG_LINKS. Try
4361 scanning backward to find where the register is set. */
4362 for (scan = PREV_INSN (insn);
4363 scan && GET_CODE (scan) != CODE_LABEL;
4364 scan = PREV_INSN (scan))
4366 if (! INSN_P (scan))
4367 continue;
4369 if (! reg_mentioned_p (reg, scan))
4370 continue;
4372 if (noncall_uses_reg (reg, scan, &set))
4373 break;
4375 if (set)
4377 link = scan;
4378 break;
4383 if (! link)
4384 continue;
4386 /* The register is set at LINK. */
4388 /* We can only optimize the function call if the register is
4389 being set to a symbol. In theory, we could sometimes
4390 optimize calls to a constant location, but the assembler
4391 and linker do not support that at present. */
4392 if (GET_CODE (SET_SRC (set)) != SYMBOL_REF
4393 && GET_CODE (SET_SRC (set)) != LABEL_REF)
4394 continue;
4396 /* Scan forward from LINK to the place where REG dies, and
4397 make sure that the only insns which use REG are
4398 themselves function calls. */
4400 /* ??? This doesn't work for call targets that were allocated
4401 by reload, since there may not be a REG_DEAD note for the
4402 register. */
4404 dies = NULL_RTX;
4405 for (scan = NEXT_INSN (link); scan; scan = NEXT_INSN (scan))
4407 rtx scanset;
4409 /* Don't try to trace forward past a CODE_LABEL if we haven't
4410 seen INSN yet. Ordinarily, we will only find the setting insn
4411 in LOG_LINKS if it is in the same basic block. However,
4412 cross-jumping can insert code labels in between the load and
4413 the call, and can result in situations where a single call
4414 insn may have two targets depending on where we came from. */
4416 if (GET_CODE (scan) == CODE_LABEL && ! foundinsn)
4417 break;
4419 if (! INSN_P (scan))
4420 continue;
4422 /* Don't try to trace forward past a JUMP. To optimize
4423 safely, we would have to check that all the
4424 instructions at the jump destination did not use REG. */
4426 if (GET_CODE (scan) == JUMP_INSN)
4427 break;
4429 if (! reg_mentioned_p (reg, scan))
4430 continue;
4432 if (noncall_uses_reg (reg, scan, &scanset))
4433 break;
4435 if (scan == insn)
4436 foundinsn = 1;
4438 if (scan != insn
4439 && (GET_CODE (scan) == CALL_INSN || sfunc_uses_reg (scan)))
4441 /* There is a function call to this register other
4442 than the one we are checking. If we optimize
4443 this call, we need to rescan again below. */
4444 rescan = 1;
4447 /* ??? We shouldn't have to worry about SCANSET here.
4448 We should just be able to check for a REG_DEAD note
4449 on a function call. However, the REG_DEAD notes are
4450 apparently not dependable around libcalls; c-torture
4451 execute/920501-2 is a test case. If SCANSET is set,
4452 then this insn sets the register, so it must have
4453 died earlier. Unfortunately, this will only handle
4454 the cases in which the register is, in fact, set in a
4455 later insn. */
4457 /* ??? We shouldn't have to use FOUNDINSN here.
4458 However, the LOG_LINKS fields are apparently not
4459 entirely reliable around libcalls;
4460 newlib/libm/math/e_pow.c is a test case. Sometimes
4461 an insn will appear in LOG_LINKS even though it is
4462 not the most recent insn which sets the register. */
4464 if (foundinsn
4465 && (scanset
4466 || find_reg_note (scan, REG_DEAD, reg)))
4468 dies = scan;
4469 break;
4473 if (! dies)
4475 /* Either there was a branch, or some insn used REG
4476 other than as a function call address. */
4477 continue;
4480 /* Create a code label, and put it in a REG_LABEL note on
4481 the insn which sets the register, and on each call insn
4482 which uses the register. In final_prescan_insn we look
4483 for the REG_LABEL notes, and output the appropriate label
4484 or pseudo-op. */
4486 label = gen_label_rtx ();
4487 REG_NOTES (link) = gen_rtx_INSN_LIST (REG_LABEL, label,
4488 REG_NOTES (link));
4489 REG_NOTES (insn) = gen_rtx_INSN_LIST (REG_LABEL, label,
4490 REG_NOTES (insn));
4491 if (rescan)
4493 scan = link;
4496 rtx reg2;
4498 scan = NEXT_INSN (scan);
4499 if (scan != insn
4500 && ((GET_CODE (scan) == CALL_INSN
4501 && reg_mentioned_p (reg, scan))
4502 || ((reg2 = sfunc_uses_reg (scan))
4503 && REGNO (reg2) == REGNO (reg))))
4504 REG_NOTES (scan)
4505 = gen_rtx_INSN_LIST (REG_LABEL, label, REG_NOTES (scan));
4507 while (scan != dies);
4512 if (TARGET_SH2)
4513 fixup_addr_diff_vecs (first);
4515 if (optimize)
4517 mdep_reorg_phase = SH_SHORTEN_BRANCHES0;
4518 shorten_branches (first);
4520 /* Scan the function looking for move instructions which have to be
4521 changed to pc-relative loads and insert the literal tables. */
4523 mdep_reorg_phase = SH_FIXUP_PCLOAD;
4524 for (insn = first, num_mova = 0; insn; insn = NEXT_INSN (insn))
4526 if (mova_p (insn))
4528 /* ??? basic block reordering can move a switch table dispatch
4529 below the switch table. Check if that has happened.
4530 We only have the addresses available when optimizing; but then,
4531 this check shouldn't be needed when not optimizing. */
4532 rtx label_ref = XVECEXP (SET_SRC (PATTERN (insn)), 0, 0);
4533 if (optimize
4534 && (INSN_ADDRESSES (INSN_UID (insn))
4535 > INSN_ADDRESSES (INSN_UID (XEXP (label_ref, 0)))))
4537 /* Change the mova into a load.
4538 broken_move will then return true for it. */
4539 fixup_mova (insn);
4541 else if (! num_mova++)
4542 mova = insn;
4544 else if (GET_CODE (insn) == JUMP_INSN
4545 && GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC
4546 && num_mova)
4548 rtx scan;
4549 int total;
4551 num_mova--;
4553 /* Some code might have been inserted between the mova and
4554 its ADDR_DIFF_VEC. Check if the mova is still in range. */
4555 for (scan = mova, total = 0; scan != insn; scan = NEXT_INSN (scan))
4556 total += get_attr_length (scan);
4558 /* range of mova is 1020, add 4 because pc counts from address of
4559 second instruction after this one, subtract 2 in case pc is 2
4560 byte aligned. Possible alignment needed for the ADDR_DIFF_VEC
4561 cancels out with alignment effects of the mova itself. */
4562 if (total > 1022)
4564 /* Change the mova into a load, and restart scanning
4565 there. broken_move will then return true for mova. */
4566 fixup_mova (mova);
4567 insn = mova;
4570 if (broken_move (insn)
4571 || (GET_CODE (insn) == INSN
4572 && recog_memoized (insn) == CODE_FOR_casesi_worker_2))
4574 rtx scan;
4575 /* Scan ahead looking for a barrier to stick the constant table
4576 behind. */
4577 rtx barrier = find_barrier (num_mova, mova, insn);
4578 rtx last_float_move = NULL_RTX, last_float = 0, *last_float_addr = NULL;
4579 int need_aligned_label = 0;
4581 if (num_mova && ! mova_p (mova))
4583 /* find_barrier had to change the first mova into a
4584 pcload; thus, we have to start with this new pcload. */
4585 insn = mova;
4586 num_mova = 0;
4588 /* Now find all the moves between the points and modify them. */
4589 for (scan = insn; scan != barrier; scan = NEXT_INSN (scan))
4591 if (GET_CODE (scan) == CODE_LABEL)
4592 last_float = 0;
4593 if (GET_CODE (scan) == INSN
4594 && recog_memoized (scan) == CODE_FOR_casesi_worker_2)
4595 need_aligned_label = 1;
4596 if (broken_move (scan))
4598 rtx *patp = &PATTERN (scan), pat = *patp;
4599 rtx src, dst;
4600 rtx lab;
4601 rtx newsrc;
4602 enum machine_mode mode;
4604 if (GET_CODE (pat) == PARALLEL)
4605 patp = &XVECEXP (pat, 0, 0), pat = *patp;
4606 src = SET_SRC (pat);
4607 dst = SET_DEST (pat);
4608 mode = GET_MODE (dst);
4610 if (mode == SImode && hi_const (src)
4611 && REGNO (dst) != FPUL_REG)
4613 int offset = 0;
4615 mode = HImode;
4616 while (GET_CODE (dst) == SUBREG)
4618 offset += subreg_regno_offset (REGNO (SUBREG_REG (dst)),
4619 GET_MODE (SUBREG_REG (dst)),
4620 SUBREG_BYTE (dst),
4621 GET_MODE (dst));
4622 dst = SUBREG_REG (dst);
4624 dst = gen_rtx_REG (HImode, REGNO (dst) + offset);
4626 if (GET_CODE (dst) == REG && FP_ANY_REGISTER_P (REGNO (dst)))
4628 /* This must be an insn that clobbers r0. */
4629 rtx *clobberp = &XVECEXP (PATTERN (scan), 0,
4630 XVECLEN (PATTERN (scan), 0)
4631 - 1);
4632 rtx clobber = *clobberp;
4634 gcc_assert (GET_CODE (clobber) == CLOBBER
4635 && rtx_equal_p (XEXP (clobber, 0), r0_rtx));
4637 if (last_float
4638 && reg_set_between_p (r0_rtx, last_float_move, scan))
4639 last_float = 0;
4640 if (last_float
4641 && TARGET_SHCOMPACT
4642 && GET_MODE_SIZE (mode) != 4
4643 && GET_MODE_SIZE (GET_MODE (last_float)) == 4)
4644 last_float = 0;
4645 lab = add_constant (src, mode, last_float);
4646 if (lab)
4647 emit_insn_before (gen_mova (lab), scan);
4648 else
4650 /* There will be a REG_UNUSED note for r0 on
4651 LAST_FLOAT_MOVE; we have to change it to REG_INC,
4652 lest reorg:mark_target_live_regs will not
4653 consider r0 to be used, and we end up with delay
4654 slot insn in front of SCAN that clobbers r0. */
4655 rtx note
4656 = find_regno_note (last_float_move, REG_UNUSED, 0);
4658 /* If we are not optimizing, then there may not be
4659 a note. */
4660 if (note)
4661 PUT_MODE (note, REG_INC);
4663 *last_float_addr = r0_inc_rtx;
4665 last_float_move = scan;
4666 last_float = src;
4667 newsrc = gen_const_mem (mode,
4668 (((TARGET_SH4 && ! TARGET_FMOVD)
4669 || REGNO (dst) == FPUL_REG)
4670 ? r0_inc_rtx
4671 : r0_rtx));
4672 last_float_addr = &XEXP (newsrc, 0);
4674 /* Remove the clobber of r0. */
4675 *clobberp = gen_rtx_CLOBBER (GET_MODE (clobber),
4676 gen_rtx_SCRATCH (Pmode));
4678 /* This is a mova needing a label. Create it. */
4679 else if (GET_CODE (src) == UNSPEC
4680 && XINT (src, 1) == UNSPEC_MOVA
4681 && GET_CODE (XVECEXP (src, 0, 0)) == CONST)
4683 lab = add_constant (XVECEXP (src, 0, 0), mode, 0);
4684 newsrc = gen_rtx_LABEL_REF (VOIDmode, lab);
4685 newsrc = gen_rtx_UNSPEC (SImode,
4686 gen_rtvec (1, newsrc),
4687 UNSPEC_MOVA);
4689 else
4691 lab = add_constant (src, mode, 0);
4692 newsrc = gen_rtx_LABEL_REF (VOIDmode, lab);
4693 newsrc = gen_const_mem (mode, newsrc);
4695 *patp = gen_rtx_SET (VOIDmode, dst, newsrc);
4696 INSN_CODE (scan) = -1;
4699 dump_table (need_aligned_label ? insn : 0, barrier);
4700 insn = barrier;
4704 mdep_reorg_phase = SH_SHORTEN_BRANCHES1;
4705 INSN_ADDRESSES_FREE ();
4706 split_branches (first);
4708 /* The INSN_REFERENCES_ARE_DELAYED in sh.h is problematic because it
4709 also has an effect on the register that holds the address of the sfunc.
4710 Insert an extra dummy insn in front of each sfunc that pretends to
4711 use this register. */
4712 if (flag_delayed_branch)
4714 for (insn = first; insn; insn = NEXT_INSN (insn))
4716 rtx reg = sfunc_uses_reg (insn);
4718 if (! reg)
4719 continue;
4720 emit_insn_before (gen_use_sfunc_addr (reg), insn);
4723 #if 0
4724 /* fpscr is not actually a user variable, but we pretend it is for the
4725 sake of the previous optimization passes, since we want it handled like
4726 one. However, we don't have any debugging information for it, so turn
4727 it into a non-user variable now. */
4728 if (TARGET_SH4)
4729 REG_USERVAR_P (get_fpscr_rtx ()) = 0;
4730 #endif
4731 mdep_reorg_phase = SH_AFTER_MDEP_REORG;
4735 get_dest_uid (rtx label, int max_uid)
4737 rtx dest = next_real_insn (label);
4738 int dest_uid;
4739 if (! dest)
4740 /* This can happen for an undefined label. */
4741 return 0;
4742 dest_uid = INSN_UID (dest);
4743 /* If this is a newly created branch redirection blocking instruction,
4744 we cannot index the branch_uid or insn_addresses arrays with its
4745 uid. But then, we won't need to, because the actual destination is
4746 the following branch. */
4747 while (dest_uid >= max_uid)
4749 dest = NEXT_INSN (dest);
4750 dest_uid = INSN_UID (dest);
4752 if (GET_CODE (dest) == JUMP_INSN && GET_CODE (PATTERN (dest)) == RETURN)
4753 return 0;
4754 return dest_uid;
4757 /* Split condbranches that are out of range. Also add clobbers for
4758 scratch registers that are needed in far jumps.
4759 We do this before delay slot scheduling, so that it can take our
4760 newly created instructions into account. It also allows us to
4761 find branches with common targets more easily. */
4763 static void
4764 split_branches (rtx first)
4766 rtx insn;
4767 struct far_branch **uid_branch, *far_branch_list = 0;
4768 int max_uid = get_max_uid ();
4769 int ok;
4771 /* Find out which branches are out of range. */
4772 shorten_branches (first);
4774 uid_branch = (struct far_branch **) alloca (max_uid * sizeof *uid_branch);
4775 memset ((char *) uid_branch, 0, max_uid * sizeof *uid_branch);
4777 for (insn = first; insn; insn = NEXT_INSN (insn))
4778 if (! INSN_P (insn))
4779 continue;
4780 else if (INSN_DELETED_P (insn))
4782 /* Shorten_branches would split this instruction again,
4783 so transform it into a note. */
4784 PUT_CODE (insn, NOTE);
4785 NOTE_LINE_NUMBER (insn) = NOTE_INSN_DELETED;
4786 NOTE_SOURCE_FILE (insn) = 0;
4788 else if (GET_CODE (insn) == JUMP_INSN
4789 /* Don't mess with ADDR_DIFF_VEC */
4790 && (GET_CODE (PATTERN (insn)) == SET
4791 || GET_CODE (PATTERN (insn)) == RETURN))
4793 enum attr_type type = get_attr_type (insn);
4794 if (type == TYPE_CBRANCH)
4796 rtx next, beyond;
4798 if (get_attr_length (insn) > 4)
4800 rtx src = SET_SRC (PATTERN (insn));
4801 rtx olabel = XEXP (XEXP (src, 1), 0);
4802 int addr = INSN_ADDRESSES (INSN_UID (insn));
4803 rtx label = 0;
4804 int dest_uid = get_dest_uid (olabel, max_uid);
4805 struct far_branch *bp = uid_branch[dest_uid];
4807 /* redirect_jump needs a valid JUMP_LABEL, and it might delete
4808 the label if the LABEL_NUSES count drops to zero. There is
4809 always a jump_optimize pass that sets these values, but it
4810 proceeds to delete unreferenced code, and then if not
4811 optimizing, to un-delete the deleted instructions, thus
4812 leaving labels with too low uses counts. */
4813 if (! optimize)
4815 JUMP_LABEL (insn) = olabel;
4816 LABEL_NUSES (olabel)++;
4818 if (! bp)
4820 bp = (struct far_branch *) alloca (sizeof *bp);
4821 uid_branch[dest_uid] = bp;
4822 bp->prev = far_branch_list;
4823 far_branch_list = bp;
4824 bp->far_label
4825 = XEXP (XEXP (SET_SRC (PATTERN (insn)), 1), 0);
4826 LABEL_NUSES (bp->far_label)++;
4828 else
4830 label = bp->near_label;
4831 if (! label && bp->address - addr >= CONDJUMP_MIN)
4833 rtx block = bp->insert_place;
4835 if (GET_CODE (PATTERN (block)) == RETURN)
4836 block = PREV_INSN (block);
4837 else
4838 block = gen_block_redirect (block,
4839 bp->address, 2);
4840 label = emit_label_after (gen_label_rtx (),
4841 PREV_INSN (block));
4842 bp->near_label = label;
4844 else if (label && ! NEXT_INSN (label))
4846 if (addr + 2 - bp->address <= CONDJUMP_MAX)
4847 bp->insert_place = insn;
4848 else
4849 gen_far_branch (bp);
4852 if (! label
4853 || (NEXT_INSN (label) && bp->address - addr < CONDJUMP_MIN))
4855 bp->near_label = label = gen_label_rtx ();
4856 bp->insert_place = insn;
4857 bp->address = addr;
4859 ok = redirect_jump (insn, label, 1);
4860 gcc_assert (ok);
4862 else
4864 /* get_attr_length (insn) == 2 */
4865 /* Check if we have a pattern where reorg wants to redirect
4866 the branch to a label from an unconditional branch that
4867 is too far away. */
4868 /* We can't use JUMP_LABEL here because it might be undefined
4869 when not optimizing. */
4870 /* A syntax error might cause beyond to be NULL_RTX. */
4871 beyond
4872 = next_active_insn (XEXP (XEXP (SET_SRC (PATTERN (insn)), 1),
4873 0));
4875 if (beyond
4876 && (GET_CODE (beyond) == JUMP_INSN
4877 || ((beyond = next_active_insn (beyond))
4878 && GET_CODE (beyond) == JUMP_INSN))
4879 && GET_CODE (PATTERN (beyond)) == SET
4880 && recog_memoized (beyond) == CODE_FOR_jump_compact
4881 && ((INSN_ADDRESSES
4882 (INSN_UID (XEXP (SET_SRC (PATTERN (beyond)), 0)))
4883 - INSN_ADDRESSES (INSN_UID (insn)) + (unsigned) 252)
4884 > 252 + 258 + 2))
4885 gen_block_redirect (beyond,
4886 INSN_ADDRESSES (INSN_UID (beyond)), 1);
4889 next = next_active_insn (insn);
4891 if ((GET_CODE (next) == JUMP_INSN
4892 || ((next = next_active_insn (next))
4893 && GET_CODE (next) == JUMP_INSN))
4894 && GET_CODE (PATTERN (next)) == SET
4895 && recog_memoized (next) == CODE_FOR_jump_compact
4896 && ((INSN_ADDRESSES
4897 (INSN_UID (XEXP (SET_SRC (PATTERN (next)), 0)))
4898 - INSN_ADDRESSES (INSN_UID (insn)) + (unsigned) 252)
4899 > 252 + 258 + 2))
4900 gen_block_redirect (next, INSN_ADDRESSES (INSN_UID (next)), 1);
4902 else if (type == TYPE_JUMP || type == TYPE_RETURN)
4904 int addr = INSN_ADDRESSES (INSN_UID (insn));
4905 rtx far_label = 0;
4906 int dest_uid = 0;
4907 struct far_branch *bp;
4909 if (type == TYPE_JUMP)
4911 far_label = XEXP (SET_SRC (PATTERN (insn)), 0);
4912 dest_uid = get_dest_uid (far_label, max_uid);
4913 if (! dest_uid)
4915 /* Parse errors can lead to labels outside
4916 the insn stream. */
4917 if (! NEXT_INSN (far_label))
4918 continue;
4920 if (! optimize)
4922 JUMP_LABEL (insn) = far_label;
4923 LABEL_NUSES (far_label)++;
4925 redirect_jump (insn, NULL_RTX, 1);
4926 far_label = 0;
4929 bp = uid_branch[dest_uid];
4930 if (! bp)
4932 bp = (struct far_branch *) alloca (sizeof *bp);
4933 uid_branch[dest_uid] = bp;
4934 bp->prev = far_branch_list;
4935 far_branch_list = bp;
4936 bp->near_label = 0;
4937 bp->far_label = far_label;
4938 if (far_label)
4939 LABEL_NUSES (far_label)++;
4941 else if (bp->near_label && ! NEXT_INSN (bp->near_label))
4942 if (addr - bp->address <= CONDJUMP_MAX)
4943 emit_label_after (bp->near_label, PREV_INSN (insn));
4944 else
4946 gen_far_branch (bp);
4947 bp->near_label = 0;
4949 else
4950 bp->near_label = 0;
4951 bp->address = addr;
4952 bp->insert_place = insn;
4953 if (! far_label)
4954 emit_insn_before (gen_block_branch_redirect (const0_rtx), insn);
4955 else
4956 gen_block_redirect (insn, addr, bp->near_label ? 2 : 0);
4959 /* Generate all pending far branches,
4960 and free our references to the far labels. */
4961 while (far_branch_list)
4963 if (far_branch_list->near_label
4964 && ! NEXT_INSN (far_branch_list->near_label))
4965 gen_far_branch (far_branch_list);
4966 if (optimize
4967 && far_branch_list->far_label
4968 && ! --LABEL_NUSES (far_branch_list->far_label))
4969 delete_insn (far_branch_list->far_label);
4970 far_branch_list = far_branch_list->prev;
4973 /* Instruction length information is no longer valid due to the new
4974 instructions that have been generated. */
4975 init_insn_lengths ();
4978 /* Dump out instruction addresses, which is useful for debugging the
4979 constant pool table stuff.
4981 If relaxing, output the label and pseudo-ops used to link together
4982 calls and the instruction which set the registers. */
4984 /* ??? The addresses printed by this routine for insns are nonsense for
4985 insns which are inside of a sequence where none of the inner insns have
4986 variable length. This is because the second pass of shorten_branches
4987 does not bother to update them. */
4989 void
4990 final_prescan_insn (rtx insn, rtx *opvec ATTRIBUTE_UNUSED,
4991 int noperands ATTRIBUTE_UNUSED)
4993 if (TARGET_DUMPISIZE)
4994 fprintf (asm_out_file, "\n! at %04x\n", INSN_ADDRESSES (INSN_UID (insn)));
4996 if (TARGET_RELAX)
4998 rtx note;
5000 note = find_reg_note (insn, REG_LABEL, NULL_RTX);
5001 if (note)
5003 rtx pattern;
5005 pattern = PATTERN (insn);
5006 if (GET_CODE (pattern) == PARALLEL)
5007 pattern = XVECEXP (pattern, 0, 0);
5008 switch (GET_CODE (pattern))
5010 case SET:
5011 if (GET_CODE (SET_SRC (pattern)) != CALL
5012 && get_attr_type (insn) != TYPE_SFUNC)
5014 targetm.asm_out.internal_label
5015 (asm_out_file, "L", CODE_LABEL_NUMBER (XEXP (note, 0)));
5016 break;
5018 /* else FALLTHROUGH */
5019 case CALL:
5020 asm_fprintf (asm_out_file, "\t.uses %LL%d\n",
5021 CODE_LABEL_NUMBER (XEXP (note, 0)));
5022 break;
5024 default:
5025 gcc_unreachable ();
5031 /* Dump out any constants accumulated in the final pass. These will
5032 only be labels. */
5034 const char *
5035 output_jump_label_table (void)
5037 int i;
5039 if (pool_size)
5041 fprintf (asm_out_file, "\t.align 2\n");
5042 for (i = 0; i < pool_size; i++)
5044 pool_node *p = &pool_vector[i];
5046 (*targetm.asm_out.internal_label) (asm_out_file, "L",
5047 CODE_LABEL_NUMBER (p->label));
5048 output_asm_insn (".long %O0", &p->value);
5050 pool_size = 0;
5053 return "";
5056 /* A full frame looks like:
5058 arg-5
5059 arg-4
5060 [ if current_function_anonymous_args
5061 arg-3
5062 arg-2
5063 arg-1
5064 arg-0 ]
5065 saved-fp
5066 saved-r10
5067 saved-r11
5068 saved-r12
5069 saved-pr
5070 local-n
5072 local-1
5073 local-0 <- fp points here. */
5075 /* Number of bytes pushed for anonymous args, used to pass information
5076 between expand_prologue and expand_epilogue. */
5078 /* Adjust the stack by SIZE bytes. REG holds the rtl of the register to be
5079 adjusted. If epilogue_p is zero, this is for a prologue; otherwise, it's
5080 for an epilogue and a negative value means that it's for a sibcall
5081 epilogue. If LIVE_REGS_MASK is nonzero, it points to a HARD_REG_SET of
5082 all the registers that are about to be restored, and hence dead. */
5084 static void
5085 output_stack_adjust (int size, rtx reg, int epilogue_p,
5086 HARD_REG_SET *live_regs_mask)
5088 rtx (*emit_fn) (rtx) = epilogue_p ? &emit_insn : &frame_insn;
5089 if (size)
5091 HOST_WIDE_INT align = STACK_BOUNDARY / BITS_PER_UNIT;
5093 /* This test is bogus, as output_stack_adjust is used to re-align the
5094 stack. */
5095 #if 0
5096 gcc_assert (!(size % align));
5097 #endif
5099 if (CONST_OK_FOR_ADD (size))
5100 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size)));
5101 /* Try to do it with two partial adjustments; however, we must make
5102 sure that the stack is properly aligned at all times, in case
5103 an interrupt occurs between the two partial adjustments. */
5104 else if (CONST_OK_FOR_ADD (size / 2 & -align)
5105 && CONST_OK_FOR_ADD (size - (size / 2 & -align)))
5107 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size / 2 & -align)));
5108 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size - (size / 2 & -align))));
5110 else
5112 rtx const_reg;
5113 rtx insn;
5114 int temp = epilogue_p ? 7 : (TARGET_SH5 ? 0 : 1);
5115 int i;
5117 /* If TEMP is invalid, we could temporarily save a general
5118 register to MACL. However, there is currently no need
5119 to handle this case, so just die when we see it. */
5120 if (epilogue_p < 0
5121 || current_function_interrupt
5122 || ! call_really_used_regs[temp] || fixed_regs[temp])
5123 temp = -1;
5124 if (temp < 0 && ! current_function_interrupt
5125 && (TARGET_SHMEDIA || epilogue_p >= 0))
5127 HARD_REG_SET temps;
5128 COPY_HARD_REG_SET (temps, call_used_reg_set);
5129 AND_COMPL_HARD_REG_SET (temps, call_fixed_reg_set);
5130 if (epilogue_p > 0)
5132 int nreg = 0;
5133 if (current_function_return_rtx)
5135 enum machine_mode mode;
5136 mode = GET_MODE (current_function_return_rtx);
5137 if (BASE_RETURN_VALUE_REG (mode) == FIRST_RET_REG)
5138 nreg = HARD_REGNO_NREGS (FIRST_RET_REG, mode);
5140 for (i = 0; i < nreg; i++)
5141 CLEAR_HARD_REG_BIT (temps, FIRST_RET_REG + i);
5142 if (current_function_calls_eh_return)
5144 CLEAR_HARD_REG_BIT (temps, EH_RETURN_STACKADJ_REGNO);
5145 for (i = 0; i <= 3; i++)
5146 CLEAR_HARD_REG_BIT (temps, EH_RETURN_DATA_REGNO (i));
5149 if (TARGET_SHMEDIA && epilogue_p < 0)
5150 for (i = FIRST_TARGET_REG; i <= LAST_TARGET_REG; i++)
5151 CLEAR_HARD_REG_BIT (temps, i);
5152 if (epilogue_p <= 0)
5154 for (i = FIRST_PARM_REG;
5155 i < FIRST_PARM_REG + NPARM_REGS (SImode); i++)
5156 CLEAR_HARD_REG_BIT (temps, i);
5157 if (cfun->static_chain_decl != NULL)
5158 CLEAR_HARD_REG_BIT (temps, STATIC_CHAIN_REGNUM);
5160 temp = scavenge_reg (&temps);
5162 if (temp < 0 && live_regs_mask)
5163 temp = scavenge_reg (live_regs_mask);
5164 if (temp < 0)
5166 rtx adj_reg, tmp_reg, mem;
5168 /* If we reached here, the most likely case is the (sibcall)
5169 epilogue for non SHmedia. Put a special push/pop sequence
5170 for such case as the last resort. This looks lengthy but
5171 would not be problem because it seems to be very
5172 rare. */
5174 gcc_assert (!TARGET_SHMEDIA && epilogue_p);
5177 /* ??? There is still the slight possibility that r4 or
5178 r5 have been reserved as fixed registers or assigned
5179 as global registers, and they change during an
5180 interrupt. There are possible ways to handle this:
5182 - If we are adjusting the frame pointer (r14), we can do
5183 with a single temp register and an ordinary push / pop
5184 on the stack.
5185 - Grab any call-used or call-saved registers (i.e. not
5186 fixed or globals) for the temps we need. We might
5187 also grab r14 if we are adjusting the stack pointer.
5188 If we can't find enough available registers, issue
5189 a diagnostic and die - the user must have reserved
5190 way too many registers.
5191 But since all this is rather unlikely to happen and
5192 would require extra testing, we just die if r4 / r5
5193 are not available. */
5194 gcc_assert (!fixed_regs[4] && !fixed_regs[5]
5195 && !global_regs[4] && !global_regs[5]);
5197 adj_reg = gen_rtx_REG (GET_MODE (reg), 4);
5198 tmp_reg = gen_rtx_REG (GET_MODE (reg), 5);
5199 emit_move_insn (gen_tmp_stack_mem (Pmode, reg), adj_reg);
5200 emit_insn (GEN_MOV (adj_reg, GEN_INT (size)));
5201 emit_insn (GEN_ADD3 (adj_reg, adj_reg, reg));
5202 mem = gen_tmp_stack_mem (Pmode, gen_rtx_PRE_DEC (Pmode, adj_reg));
5203 emit_move_insn (mem, tmp_reg);
5204 emit_move_insn (tmp_reg, gen_tmp_stack_mem (Pmode, reg));
5205 mem = gen_tmp_stack_mem (Pmode, gen_rtx_PRE_DEC (Pmode, adj_reg));
5206 emit_move_insn (mem, tmp_reg);
5207 emit_move_insn (reg, adj_reg);
5208 mem = gen_tmp_stack_mem (Pmode, gen_rtx_POST_INC (Pmode, reg));
5209 emit_move_insn (adj_reg, mem);
5210 mem = gen_tmp_stack_mem (Pmode, gen_rtx_POST_INC (Pmode, reg));
5211 emit_move_insn (tmp_reg, mem);
5212 return;
5214 const_reg = gen_rtx_REG (GET_MODE (reg), temp);
5216 /* If SIZE is negative, subtract the positive value.
5217 This sometimes allows a constant pool entry to be shared
5218 between prologue and epilogue code. */
5219 if (size < 0)
5221 emit_insn (GEN_MOV (const_reg, GEN_INT (-size)));
5222 insn = emit_fn (GEN_SUB3 (reg, reg, const_reg));
5224 else
5226 emit_insn (GEN_MOV (const_reg, GEN_INT (size)));
5227 insn = emit_fn (GEN_ADD3 (reg, reg, const_reg));
5229 if (! epilogue_p)
5230 REG_NOTES (insn)
5231 = (gen_rtx_EXPR_LIST
5232 (REG_FRAME_RELATED_EXPR,
5233 gen_rtx_SET (VOIDmode, reg,
5234 gen_rtx_PLUS (SImode, reg, GEN_INT (size))),
5235 REG_NOTES (insn)));
5240 static rtx
5241 frame_insn (rtx x)
5243 x = emit_insn (x);
5244 RTX_FRAME_RELATED_P (x) = 1;
5245 return x;
5248 /* Output RTL to push register RN onto the stack. */
5250 static rtx
5251 push (int rn)
5253 rtx x;
5254 if (rn == FPUL_REG)
5255 x = gen_push_fpul ();
5256 else if (rn == FPSCR_REG)
5257 x = gen_push_fpscr ();
5258 else if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && ! TARGET_FPU_SINGLE
5259 && FP_OR_XD_REGISTER_P (rn))
5261 if (FP_REGISTER_P (rn) && (rn - FIRST_FP_REG) & 1)
5262 return NULL_RTX;
5263 x = gen_push_4 (gen_rtx_REG (DFmode, rn));
5265 else if (TARGET_SH2E && FP_REGISTER_P (rn))
5266 x = gen_push_e (gen_rtx_REG (SFmode, rn));
5267 else
5268 x = gen_push (gen_rtx_REG (SImode, rn));
5270 x = frame_insn (x);
5271 REG_NOTES (x)
5272 = gen_rtx_EXPR_LIST (REG_INC,
5273 gen_rtx_REG (SImode, STACK_POINTER_REGNUM), 0);
5274 return x;
5277 /* Output RTL to pop register RN from the stack. */
5279 static void
5280 pop (int rn)
5282 rtx x;
5283 if (rn == FPUL_REG)
5284 x = gen_pop_fpul ();
5285 else if (rn == FPSCR_REG)
5286 x = gen_pop_fpscr ();
5287 else if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && ! TARGET_FPU_SINGLE
5288 && FP_OR_XD_REGISTER_P (rn))
5290 if (FP_REGISTER_P (rn) && (rn - FIRST_FP_REG) & 1)
5291 return;
5292 x = gen_pop_4 (gen_rtx_REG (DFmode, rn));
5294 else if (TARGET_SH2E && FP_REGISTER_P (rn))
5295 x = gen_pop_e (gen_rtx_REG (SFmode, rn));
5296 else
5297 x = gen_pop (gen_rtx_REG (SImode, rn));
5299 x = emit_insn (x);
5300 REG_NOTES (x)
5301 = gen_rtx_EXPR_LIST (REG_INC,
5302 gen_rtx_REG (SImode, STACK_POINTER_REGNUM), 0);
5305 /* Generate code to push the regs specified in the mask. */
5307 static void
5308 push_regs (HARD_REG_SET *mask, int interrupt_handler)
5310 int i;
5311 int skip_fpscr = 0;
5313 /* Push PR last; this gives better latencies after the prologue, and
5314 candidates for the return delay slot when there are no general
5315 registers pushed. */
5316 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
5318 /* If this is an interrupt handler, and the SZ bit varies,
5319 and we have to push any floating point register, we need
5320 to switch to the correct precision first. */
5321 if (i == FIRST_FP_REG && interrupt_handler && TARGET_FMOVD
5322 && hard_regs_intersect_p (mask, &reg_class_contents[DF_REGS]))
5324 HARD_REG_SET unsaved;
5326 push (FPSCR_REG);
5327 COMPL_HARD_REG_SET (unsaved, *mask);
5328 fpscr_set_from_mem (NORMAL_MODE (FP_MODE), unsaved);
5329 skip_fpscr = 1;
5331 if (i != PR_REG
5332 && (i != FPSCR_REG || ! skip_fpscr)
5333 && TEST_HARD_REG_BIT (*mask, i))
5334 push (i);
5336 if (TEST_HARD_REG_BIT (*mask, PR_REG))
5337 push (PR_REG);
5340 /* Calculate how much extra space is needed to save all callee-saved
5341 target registers.
5342 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
5344 static int
5345 shmedia_target_regs_stack_space (HARD_REG_SET *live_regs_mask)
5347 int reg;
5348 int stack_space = 0;
5349 int interrupt_handler = sh_cfun_interrupt_handler_p ();
5351 for (reg = LAST_TARGET_REG; reg >= FIRST_TARGET_REG; reg--)
5352 if ((! call_really_used_regs[reg] || interrupt_handler)
5353 && ! TEST_HARD_REG_BIT (*live_regs_mask, reg))
5354 /* Leave space to save this target register on the stack,
5355 in case target register allocation wants to use it. */
5356 stack_space += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
5357 return stack_space;
5360 /* Decide whether we should reserve space for callee-save target registers,
5361 in case target register allocation wants to use them. REGS_SAVED is
5362 the space, in bytes, that is already required for register saves.
5363 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
5365 static int
5366 shmedia_reserve_space_for_target_registers_p (int regs_saved,
5367 HARD_REG_SET *live_regs_mask)
5369 if (optimize_size)
5370 return 0;
5371 return shmedia_target_regs_stack_space (live_regs_mask) <= regs_saved;
5374 /* Decide how much space to reserve for callee-save target registers
5375 in case target register allocation wants to use them.
5376 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
5378 static int
5379 shmedia_target_regs_stack_adjust (HARD_REG_SET *live_regs_mask)
5381 if (shmedia_space_reserved_for_target_registers)
5382 return shmedia_target_regs_stack_space (live_regs_mask);
5383 else
5384 return 0;
5387 /* Work out the registers which need to be saved, both as a mask and a
5388 count of saved words. Return the count.
5390 If doing a pragma interrupt function, then push all regs used by the
5391 function, and if we call another function (we can tell by looking at PR),
5392 make sure that all the regs it clobbers are safe too. */
5394 static int
5395 calc_live_regs (HARD_REG_SET *live_regs_mask)
5397 unsigned int reg;
5398 int count;
5399 tree attrs;
5400 bool interrupt_or_trapa_handler, trapa_handler, interrupt_handler;
5401 bool nosave_low_regs;
5402 int pr_live, has_call;
5404 attrs = DECL_ATTRIBUTES (current_function_decl);
5405 interrupt_or_trapa_handler = sh_cfun_interrupt_handler_p ();
5406 trapa_handler = lookup_attribute ("trapa_handler", attrs) != NULL_TREE;
5407 interrupt_handler = interrupt_or_trapa_handler && ! trapa_handler;
5408 nosave_low_regs = lookup_attribute ("nosave_low_regs", attrs) != NULL_TREE;
5410 CLEAR_HARD_REG_SET (*live_regs_mask);
5411 if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && interrupt_handler
5412 && regs_ever_live[FPSCR_REG])
5413 target_flags &= ~MASK_FPU_SINGLE;
5414 /* If we can save a lot of saves by switching to double mode, do that. */
5415 else if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && TARGET_FPU_SINGLE)
5416 for (count = 0, reg = FIRST_FP_REG; reg <= LAST_FP_REG; reg += 2)
5417 if (regs_ever_live[reg] && regs_ever_live[reg+1]
5418 && (! call_really_used_regs[reg]
5419 || interrupt_handler)
5420 && ++count > 2)
5422 target_flags &= ~MASK_FPU_SINGLE;
5423 break;
5425 /* PR_MEDIA_REG is a general purpose register, thus global_alloc already
5426 knows how to use it. That means the pseudo originally allocated for
5427 the initial value can become the PR_MEDIA_REG hard register, as seen for
5428 execute/20010122-1.c:test9. */
5429 if (TARGET_SHMEDIA)
5430 /* ??? this function is called from initial_elimination_offset, hence we
5431 can't use the result of sh_media_register_for_return here. */
5432 pr_live = sh_pr_n_sets ();
5433 else
5435 rtx pr_initial = has_hard_reg_initial_val (Pmode, PR_REG);
5436 pr_live = (pr_initial
5437 ? (GET_CODE (pr_initial) != REG
5438 || REGNO (pr_initial) != (PR_REG))
5439 : regs_ever_live[PR_REG]);
5440 /* For Shcompact, if not optimizing, we end up with a memory reference
5441 using the return address pointer for __builtin_return_address even
5442 though there is no actual need to put the PR register on the stack. */
5443 pr_live |= regs_ever_live[RETURN_ADDRESS_POINTER_REGNUM];
5445 /* Force PR to be live if the prologue has to call the SHmedia
5446 argument decoder or register saver. */
5447 if (TARGET_SHCOMPACT
5448 && ((current_function_args_info.call_cookie
5449 & ~ CALL_COOKIE_RET_TRAMP (1))
5450 || current_function_has_nonlocal_label))
5451 pr_live = 1;
5452 has_call = TARGET_SHMEDIA ? ! leaf_function_p () : pr_live;
5453 for (count = 0, reg = FIRST_PSEUDO_REGISTER; reg-- != 0; )
5455 if (reg == (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG)
5456 ? pr_live
5457 : interrupt_handler
5458 ? (/* Need to save all the regs ever live. */
5459 (regs_ever_live[reg]
5460 || (call_really_used_regs[reg]
5461 && (! fixed_regs[reg] || reg == MACH_REG || reg == MACL_REG
5462 || reg == PIC_OFFSET_TABLE_REGNUM)
5463 && has_call)
5464 || (TARGET_SHMEDIA && has_call
5465 && REGISTER_NATURAL_MODE (reg) == SImode
5466 && (GENERAL_REGISTER_P (reg) || TARGET_REGISTER_P (reg))))
5467 && reg != STACK_POINTER_REGNUM && reg != ARG_POINTER_REGNUM
5468 && reg != RETURN_ADDRESS_POINTER_REGNUM
5469 && reg != T_REG && reg != GBR_REG
5470 /* Push fpscr only on targets which have FPU */
5471 && (reg != FPSCR_REG || TARGET_FPU_ANY))
5472 : (/* Only push those regs which are used and need to be saved. */
5473 (TARGET_SHCOMPACT
5474 && flag_pic
5475 && current_function_args_info.call_cookie
5476 && reg == PIC_OFFSET_TABLE_REGNUM)
5477 || (regs_ever_live[reg]
5478 && (!call_really_used_regs[reg]
5479 || (trapa_handler && reg == FPSCR_REG && TARGET_FPU_ANY)))
5480 || (current_function_calls_eh_return
5481 && (reg == EH_RETURN_DATA_REGNO (0)
5482 || reg == EH_RETURN_DATA_REGNO (1)
5483 || reg == EH_RETURN_DATA_REGNO (2)
5484 || reg == EH_RETURN_DATA_REGNO (3)))
5485 || ((reg == MACL_REG || reg == MACH_REG)
5486 && regs_ever_live[reg]
5487 && sh_cfun_attr_renesas_p ())
5490 SET_HARD_REG_BIT (*live_regs_mask, reg);
5491 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
5493 if ((TARGET_SH4 || TARGET_SH2A_DOUBLE || TARGET_SH5) && TARGET_FMOVD
5494 && GET_MODE_CLASS (REGISTER_NATURAL_MODE (reg)) == MODE_FLOAT)
5496 if (FP_REGISTER_P (reg))
5498 if (! TARGET_FPU_SINGLE && ! regs_ever_live[reg ^ 1])
5500 SET_HARD_REG_BIT (*live_regs_mask, (reg ^ 1));
5501 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg ^ 1));
5504 else if (XD_REGISTER_P (reg))
5506 /* Must switch to double mode to access these registers. */
5507 target_flags &= ~MASK_FPU_SINGLE;
5511 if (nosave_low_regs && reg == R8_REG)
5512 break;
5514 /* If we have a target register optimization pass after prologue / epilogue
5515 threading, we need to assume all target registers will be live even if
5516 they aren't now. */
5517 if (flag_branch_target_load_optimize2
5518 && TARGET_SAVE_ALL_TARGET_REGS
5519 && shmedia_space_reserved_for_target_registers)
5520 for (reg = LAST_TARGET_REG; reg >= FIRST_TARGET_REG; reg--)
5521 if ((! call_really_used_regs[reg] || interrupt_handler)
5522 && ! TEST_HARD_REG_BIT (*live_regs_mask, reg))
5524 SET_HARD_REG_BIT (*live_regs_mask, reg);
5525 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
5527 /* If this is an interrupt handler, we don't have any call-clobbered
5528 registers we can conveniently use for target register save/restore.
5529 Make sure we save at least one general purpose register when we need
5530 to save target registers. */
5531 if (interrupt_handler
5532 && hard_regs_intersect_p (live_regs_mask,
5533 &reg_class_contents[TARGET_REGS])
5534 && ! hard_regs_intersect_p (live_regs_mask,
5535 &reg_class_contents[GENERAL_REGS]))
5537 SET_HARD_REG_BIT (*live_regs_mask, R0_REG);
5538 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (R0_REG));
5541 return count;
5544 /* Code to generate prologue and epilogue sequences */
5546 /* PUSHED is the number of bytes that are being pushed on the
5547 stack for register saves. Return the frame size, padded
5548 appropriately so that the stack stays properly aligned. */
5549 static HOST_WIDE_INT
5550 rounded_frame_size (int pushed)
5552 HOST_WIDE_INT size = get_frame_size ();
5553 HOST_WIDE_INT align = STACK_BOUNDARY / BITS_PER_UNIT;
5555 return ((size + pushed + align - 1) & -align) - pushed;
5558 /* Choose a call-clobbered target-branch register that remains
5559 unchanged along the whole function. We set it up as the return
5560 value in the prologue. */
5562 sh_media_register_for_return (void)
5564 int regno;
5565 int tr0_used;
5567 if (! current_function_is_leaf)
5568 return -1;
5569 if (lookup_attribute ("interrupt_handler",
5570 DECL_ATTRIBUTES (current_function_decl)))
5571 return -1;
5572 if (sh_cfun_interrupt_handler_p ())
5573 return -1;
5575 tr0_used = flag_pic && regs_ever_live[PIC_OFFSET_TABLE_REGNUM];
5577 for (regno = FIRST_TARGET_REG + tr0_used; regno <= LAST_TARGET_REG; regno++)
5578 if (call_really_used_regs[regno] && ! regs_ever_live[regno])
5579 return regno;
5581 return -1;
5584 /* The maximum registers we need to save are:
5585 - 62 general purpose registers (r15 is stack pointer, r63 is zero)
5586 - 32 floating point registers (for each pair, we save none,
5587 one single precision value, or a double precision value).
5588 - 8 target registers
5589 - add 1 entry for a delimiter. */
5590 #define MAX_SAVED_REGS (62+32+8)
5592 typedef struct save_entry_s
5594 unsigned char reg;
5595 unsigned char mode;
5596 short offset;
5597 } save_entry;
5599 #define MAX_TEMPS 4
5601 /* There will be a delimiter entry with VOIDmode both at the start and the
5602 end of a filled in schedule. The end delimiter has the offset of the
5603 save with the smallest (i.e. most negative) offset. */
5604 typedef struct save_schedule_s
5606 save_entry entries[MAX_SAVED_REGS + 2];
5607 int temps[MAX_TEMPS+1];
5608 } save_schedule;
5610 /* Fill in SCHEDULE according to LIVE_REGS_MASK. If RESTORE is nonzero,
5611 use reverse order. Returns the last entry written to (not counting
5612 the delimiter). OFFSET_BASE is a number to be added to all offset
5613 entries. */
5615 static save_entry *
5616 sh5_schedule_saves (HARD_REG_SET *live_regs_mask, save_schedule *schedule,
5617 int offset_base)
5619 int align, i;
5620 save_entry *entry = schedule->entries;
5621 int tmpx = 0;
5622 int offset;
5624 if (! current_function_interrupt)
5625 for (i = FIRST_GENERAL_REG; tmpx < MAX_TEMPS && i <= LAST_GENERAL_REG; i++)
5626 if (call_really_used_regs[i] && ! fixed_regs[i] && i != PR_MEDIA_REG
5627 && ! FUNCTION_ARG_REGNO_P (i)
5628 && i != FIRST_RET_REG
5629 && ! (cfun->static_chain_decl != NULL && i == STATIC_CHAIN_REGNUM)
5630 && ! (current_function_calls_eh_return
5631 && (i == EH_RETURN_STACKADJ_REGNO
5632 || ((unsigned) i >= EH_RETURN_DATA_REGNO (0)
5633 && (unsigned) i <= EH_RETURN_DATA_REGNO (3)))))
5634 schedule->temps[tmpx++] = i;
5635 entry->reg = -1;
5636 entry->mode = VOIDmode;
5637 entry->offset = offset_base;
5638 entry++;
5639 /* We loop twice: first, we save 8-byte aligned registers in the
5640 higher addresses, that are known to be aligned. Then, we
5641 proceed to saving 32-bit registers that don't need 8-byte
5642 alignment.
5643 If this is an interrupt function, all registers that need saving
5644 need to be saved in full. moreover, we need to postpone saving
5645 target registers till we have saved some general purpose registers
5646 we can then use as scratch registers. */
5647 offset = offset_base;
5648 for (align = 1; align >= 0; align--)
5650 for (i = FIRST_PSEUDO_REGISTER - 1; i >= 0; i--)
5651 if (TEST_HARD_REG_BIT (*live_regs_mask, i))
5653 enum machine_mode mode = REGISTER_NATURAL_MODE (i);
5654 int reg = i;
5656 if (current_function_interrupt)
5658 if (TARGET_REGISTER_P (i))
5659 continue;
5660 if (GENERAL_REGISTER_P (i))
5661 mode = DImode;
5663 if (mode == SFmode && (i % 2) == 1
5664 && ! TARGET_FPU_SINGLE && FP_REGISTER_P (i)
5665 && (TEST_HARD_REG_BIT (*live_regs_mask, (i ^ 1))))
5667 mode = DFmode;
5668 i--;
5669 reg--;
5672 /* If we're doing the aligned pass and this is not aligned,
5673 or we're doing the unaligned pass and this is aligned,
5674 skip it. */
5675 if ((GET_MODE_SIZE (mode) % (STACK_BOUNDARY / BITS_PER_UNIT) == 0)
5676 != align)
5677 continue;
5679 if (current_function_interrupt
5680 && GENERAL_REGISTER_P (i)
5681 && tmpx < MAX_TEMPS)
5682 schedule->temps[tmpx++] = i;
5684 offset -= GET_MODE_SIZE (mode);
5685 entry->reg = i;
5686 entry->mode = mode;
5687 entry->offset = offset;
5688 entry++;
5690 if (align && current_function_interrupt)
5691 for (i = LAST_TARGET_REG; i >= FIRST_TARGET_REG; i--)
5692 if (TEST_HARD_REG_BIT (*live_regs_mask, i))
5694 offset -= GET_MODE_SIZE (DImode);
5695 entry->reg = i;
5696 entry->mode = DImode;
5697 entry->offset = offset;
5698 entry++;
5701 entry->reg = -1;
5702 entry->mode = VOIDmode;
5703 entry->offset = offset;
5704 schedule->temps[tmpx] = -1;
5705 return entry - 1;
5708 void
5709 sh_expand_prologue (void)
5711 HARD_REG_SET live_regs_mask;
5712 int d, i;
5713 int d_rounding = 0;
5714 int save_flags = target_flags;
5715 int pretend_args;
5716 tree sp_switch_attr
5717 = lookup_attribute ("sp_switch", DECL_ATTRIBUTES (current_function_decl));
5719 current_function_interrupt = sh_cfun_interrupt_handler_p ();
5721 /* We have pretend args if we had an object sent partially in registers
5722 and partially on the stack, e.g. a large structure. */
5723 pretend_args = current_function_pretend_args_size;
5724 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl)
5725 && (NPARM_REGS(SImode)
5726 > current_function_args_info.arg_count[(int) SH_ARG_INT]))
5727 pretend_args = 0;
5728 output_stack_adjust (-pretend_args
5729 - current_function_args_info.stack_regs * 8,
5730 stack_pointer_rtx, 0, NULL);
5732 if (TARGET_SHCOMPACT && flag_pic && current_function_args_info.call_cookie)
5733 /* We're going to use the PIC register to load the address of the
5734 incoming-argument decoder and/or of the return trampoline from
5735 the GOT, so make sure the PIC register is preserved and
5736 initialized. */
5737 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
5739 if (TARGET_SHCOMPACT
5740 && (current_function_args_info.call_cookie & ~ CALL_COOKIE_RET_TRAMP(1)))
5742 int reg;
5744 /* First, make all registers with incoming arguments that will
5745 be pushed onto the stack live, so that register renaming
5746 doesn't overwrite them. */
5747 for (reg = 0; reg < NPARM_REGS (SImode); reg++)
5748 if (CALL_COOKIE_STACKSEQ_GET (current_function_args_info.call_cookie)
5749 >= NPARM_REGS (SImode) - reg)
5750 for (; reg < NPARM_REGS (SImode); reg++)
5751 emit_insn (gen_shcompact_preserve_incoming_args
5752 (gen_rtx_REG (SImode, FIRST_PARM_REG + reg)));
5753 else if (CALL_COOKIE_INT_REG_GET
5754 (current_function_args_info.call_cookie, reg) == 1)
5755 emit_insn (gen_shcompact_preserve_incoming_args
5756 (gen_rtx_REG (SImode, FIRST_PARM_REG + reg)));
5758 emit_move_insn (gen_rtx_REG (Pmode, MACL_REG),
5759 stack_pointer_rtx);
5760 emit_move_insn (gen_rtx_REG (SImode, R0_REG),
5761 GEN_INT (current_function_args_info.call_cookie));
5762 emit_move_insn (gen_rtx_REG (SImode, MACH_REG),
5763 gen_rtx_REG (SImode, R0_REG));
5765 else if (TARGET_SHMEDIA)
5767 int tr = sh_media_register_for_return ();
5769 if (tr >= 0)
5771 rtx insn = emit_move_insn (gen_rtx_REG (DImode, tr),
5772 gen_rtx_REG (DImode, PR_MEDIA_REG));
5774 /* ??? We should suppress saving pr when we don't need it, but this
5775 is tricky because of builtin_return_address. */
5777 /* If this function only exits with sibcalls, this copy
5778 will be flagged as dead. */
5779 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD,
5780 const0_rtx,
5781 REG_NOTES (insn));
5785 /* Emit the code for SETUP_VARARGS. */
5786 if (current_function_stdarg)
5788 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl))
5790 /* Push arg regs as if they'd been provided by caller in stack. */
5791 for (i = 0; i < NPARM_REGS(SImode); i++)
5793 int rn = NPARM_REGS(SImode) + FIRST_PARM_REG - i - 1;
5794 rtx insn;
5796 if (i >= (NPARM_REGS(SImode)
5797 - current_function_args_info.arg_count[(int) SH_ARG_INT]
5799 break;
5800 insn = push (rn);
5801 RTX_FRAME_RELATED_P (insn) = 0;
5806 /* If we're supposed to switch stacks at function entry, do so now. */
5807 if (sp_switch_attr)
5809 /* The argument specifies a variable holding the address of the
5810 stack the interrupt function should switch to/from at entry/exit. */
5811 const char *s
5812 = ggc_strdup (TREE_STRING_POINTER (TREE_VALUE (sp_switch_attr)));
5813 rtx sp_switch = gen_rtx_SYMBOL_REF (Pmode, s);
5815 emit_insn (gen_sp_switch_1 (sp_switch));
5818 d = calc_live_regs (&live_regs_mask);
5819 /* ??? Maybe we could save some switching if we can move a mode switch
5820 that already happens to be at the function start into the prologue. */
5821 if (target_flags != save_flags && ! current_function_interrupt)
5822 emit_insn (gen_toggle_sz ());
5824 if (TARGET_SH5)
5826 int offset_base, offset;
5827 rtx r0 = NULL_RTX;
5828 int offset_in_r0 = -1;
5829 int sp_in_r0 = 0;
5830 int tregs_space = shmedia_target_regs_stack_adjust (&live_regs_mask);
5831 int total_size, save_size;
5832 save_schedule schedule;
5833 save_entry *entry;
5834 int *tmp_pnt;
5836 if (call_really_used_regs[R0_REG] && ! fixed_regs[R0_REG]
5837 && ! current_function_interrupt)
5838 r0 = gen_rtx_REG (Pmode, R0_REG);
5840 /* D is the actual number of bytes that we need for saving registers,
5841 however, in initial_elimination_offset we have committed to using
5842 an additional TREGS_SPACE amount of bytes - in order to keep both
5843 addresses to arguments supplied by the caller and local variables
5844 valid, we must keep this gap. Place it between the incoming
5845 arguments and the actually saved registers in a bid to optimize
5846 locality of reference. */
5847 total_size = d + tregs_space;
5848 total_size += rounded_frame_size (total_size);
5849 save_size = total_size - rounded_frame_size (d);
5850 if (save_size % (STACK_BOUNDARY / BITS_PER_UNIT))
5851 d_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
5852 - save_size % (STACK_BOUNDARY / BITS_PER_UNIT));
5854 /* If adjusting the stack in a single step costs nothing extra, do so.
5855 I.e. either if a single addi is enough, or we need a movi anyway,
5856 and we don't exceed the maximum offset range (the test for the
5857 latter is conservative for simplicity). */
5858 if (TARGET_SHMEDIA
5859 && (CONST_OK_FOR_I10 (-total_size)
5860 || (! CONST_OK_FOR_I10 (-(save_size + d_rounding))
5861 && total_size <= 2044)))
5862 d_rounding = total_size - save_size;
5864 offset_base = d + d_rounding;
5866 output_stack_adjust (-(save_size + d_rounding), stack_pointer_rtx,
5867 0, NULL);
5869 sh5_schedule_saves (&live_regs_mask, &schedule, offset_base);
5870 tmp_pnt = schedule.temps;
5871 for (entry = &schedule.entries[1]; entry->mode != VOIDmode; entry++)
5873 enum machine_mode mode = entry->mode;
5874 unsigned int reg = entry->reg;
5875 rtx reg_rtx, mem_rtx, pre_dec = NULL_RTX;
5876 rtx orig_reg_rtx;
5878 offset = entry->offset;
5880 reg_rtx = gen_rtx_REG (mode, reg);
5882 mem_rtx = gen_frame_mem (mode,
5883 gen_rtx_PLUS (Pmode,
5884 stack_pointer_rtx,
5885 GEN_INT (offset)));
5887 GO_IF_LEGITIMATE_ADDRESS (mode, XEXP (mem_rtx, 0), try_pre_dec);
5889 gcc_assert (r0);
5890 mem_rtx = NULL_RTX;
5892 try_pre_dec:
5894 if (HAVE_PRE_DECREMENT
5895 && (offset_in_r0 - offset == GET_MODE_SIZE (mode)
5896 || mem_rtx == NULL_RTX
5897 || reg == PR_REG || SPECIAL_REGISTER_P (reg)))
5899 pre_dec = gen_frame_mem (mode, gen_rtx_PRE_DEC (Pmode, r0));
5901 GO_IF_LEGITIMATE_ADDRESS (mode, XEXP (pre_dec, 0),
5902 pre_dec_ok);
5904 pre_dec = NULL_RTX;
5906 break;
5908 pre_dec_ok:
5909 mem_rtx = NULL_RTX;
5910 offset += GET_MODE_SIZE (mode);
5912 while (0);
5914 if (mem_rtx != NULL_RTX)
5915 goto addr_ok;
5917 if (offset_in_r0 == -1)
5919 emit_move_insn (r0, GEN_INT (offset));
5920 offset_in_r0 = offset;
5922 else if (offset != offset_in_r0)
5924 emit_move_insn (r0,
5925 gen_rtx_PLUS
5926 (Pmode, r0,
5927 GEN_INT (offset - offset_in_r0)));
5928 offset_in_r0 += offset - offset_in_r0;
5931 if (pre_dec != NULL_RTX)
5933 if (! sp_in_r0)
5935 emit_move_insn (r0,
5936 gen_rtx_PLUS
5937 (Pmode, r0, stack_pointer_rtx));
5938 sp_in_r0 = 1;
5941 offset -= GET_MODE_SIZE (mode);
5942 offset_in_r0 -= GET_MODE_SIZE (mode);
5944 mem_rtx = pre_dec;
5946 else if (sp_in_r0)
5947 mem_rtx = gen_frame_mem (mode, r0);
5948 else
5949 mem_rtx = gen_frame_mem (mode,
5950 gen_rtx_PLUS (Pmode,
5951 stack_pointer_rtx,
5952 r0));
5954 /* We must not use an r0-based address for target-branch
5955 registers or for special registers without pre-dec
5956 memory addresses, since we store their values in r0
5957 first. */
5958 gcc_assert (!TARGET_REGISTER_P (reg)
5959 && ((reg != PR_REG && !SPECIAL_REGISTER_P (reg))
5960 || mem_rtx == pre_dec));
5962 addr_ok:
5963 orig_reg_rtx = reg_rtx;
5964 if (TARGET_REGISTER_P (reg)
5965 || ((reg == PR_REG || SPECIAL_REGISTER_P (reg))
5966 && mem_rtx != pre_dec))
5968 rtx tmp_reg = gen_rtx_REG (GET_MODE (reg_rtx), *tmp_pnt);
5970 emit_move_insn (tmp_reg, reg_rtx);
5972 if (REGNO (tmp_reg) == R0_REG)
5974 offset_in_r0 = -1;
5975 sp_in_r0 = 0;
5976 gcc_assert (!refers_to_regno_p
5977 (R0_REG, R0_REG+1, mem_rtx, (rtx *) 0));
5980 if (*++tmp_pnt <= 0)
5981 tmp_pnt = schedule.temps;
5983 reg_rtx = tmp_reg;
5986 rtx insn;
5988 /* Mark as interesting for dwarf cfi generator */
5989 insn = emit_move_insn (mem_rtx, reg_rtx);
5990 RTX_FRAME_RELATED_P (insn) = 1;
5991 /* If we use an intermediate register for the save, we can't
5992 describe this exactly in cfi as a copy of the to-be-saved
5993 register into the temporary register and then the temporary
5994 register on the stack, because the temporary register can
5995 have a different natural size than the to-be-saved register.
5996 Thus, we gloss over the intermediate copy and pretend we do
5997 a direct save from the to-be-saved register. */
5998 if (REGNO (reg_rtx) != reg)
6000 rtx set, note_rtx;
6002 set = gen_rtx_SET (VOIDmode, mem_rtx, orig_reg_rtx);
6003 note_rtx = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, set,
6004 REG_NOTES (insn));
6005 REG_NOTES (insn) = note_rtx;
6008 if (TARGET_SHCOMPACT && (offset_in_r0 != -1))
6010 rtx reg_rtx = gen_rtx_REG (mode, reg);
6011 rtx set, note_rtx;
6012 rtx mem_rtx = gen_frame_mem (mode,
6013 gen_rtx_PLUS (Pmode,
6014 stack_pointer_rtx,
6015 GEN_INT (offset)));
6017 set = gen_rtx_SET (VOIDmode, mem_rtx, reg_rtx);
6018 note_rtx = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, set,
6019 REG_NOTES (insn));
6020 REG_NOTES (insn) = note_rtx;
6025 gcc_assert (entry->offset == d_rounding);
6027 else
6028 push_regs (&live_regs_mask, current_function_interrupt);
6030 if (flag_pic && regs_ever_live[PIC_OFFSET_TABLE_REGNUM])
6032 rtx insn = get_last_insn ();
6033 rtx last = emit_insn (gen_GOTaddr2picreg ());
6035 /* Mark these insns as possibly dead. Sometimes, flow2 may
6036 delete all uses of the PIC register. In this case, let it
6037 delete the initialization too. */
6040 insn = NEXT_INSN (insn);
6042 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD,
6043 const0_rtx,
6044 REG_NOTES (insn));
6046 while (insn != last);
6049 if (SHMEDIA_REGS_STACK_ADJUST ())
6051 /* This must NOT go through the PLT, otherwise mach and macl
6052 may be clobbered. */
6053 function_symbol (gen_rtx_REG (Pmode, R0_REG),
6054 (TARGET_FPU_ANY
6055 ? "__GCC_push_shmedia_regs"
6056 : "__GCC_push_shmedia_regs_nofpu"), SFUNC_GOT);
6057 emit_insn (gen_shmedia_save_restore_regs_compact
6058 (GEN_INT (-SHMEDIA_REGS_STACK_ADJUST ())));
6061 if (target_flags != save_flags && ! current_function_interrupt)
6063 rtx insn = emit_insn (gen_toggle_sz ());
6065 /* If we're lucky, a mode switch in the function body will
6066 overwrite fpscr, turning this insn dead. Tell flow this
6067 insn is ok to delete. */
6068 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD,
6069 const0_rtx,
6070 REG_NOTES (insn));
6073 target_flags = save_flags;
6075 output_stack_adjust (-rounded_frame_size (d) + d_rounding,
6076 stack_pointer_rtx, 0, NULL);
6078 if (frame_pointer_needed)
6079 frame_insn (GEN_MOV (hard_frame_pointer_rtx, stack_pointer_rtx));
6081 if (TARGET_SHCOMPACT
6082 && (current_function_args_info.call_cookie & ~ CALL_COOKIE_RET_TRAMP(1)))
6084 /* This must NOT go through the PLT, otherwise mach and macl
6085 may be clobbered. */
6086 function_symbol (gen_rtx_REG (Pmode, R0_REG),
6087 "__GCC_shcompact_incoming_args", SFUNC_GOT);
6088 emit_insn (gen_shcompact_incoming_args ());
6092 void
6093 sh_expand_epilogue (bool sibcall_p)
6095 HARD_REG_SET live_regs_mask;
6096 int d, i;
6097 int d_rounding = 0;
6099 int save_flags = target_flags;
6100 int frame_size, save_size;
6101 int fpscr_deferred = 0;
6102 int e = sibcall_p ? -1 : 1;
6104 d = calc_live_regs (&live_regs_mask);
6106 save_size = d;
6107 frame_size = rounded_frame_size (d);
6109 if (TARGET_SH5)
6111 int tregs_space = shmedia_target_regs_stack_adjust (&live_regs_mask);
6112 int total_size;
6113 if (d % (STACK_BOUNDARY / BITS_PER_UNIT))
6114 d_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
6115 - d % (STACK_BOUNDARY / BITS_PER_UNIT));
6117 total_size = d + tregs_space;
6118 total_size += rounded_frame_size (total_size);
6119 save_size = total_size - frame_size;
6121 /* If adjusting the stack in a single step costs nothing extra, do so.
6122 I.e. either if a single addi is enough, or we need a movi anyway,
6123 and we don't exceed the maximum offset range (the test for the
6124 latter is conservative for simplicity). */
6125 if (TARGET_SHMEDIA
6126 && ! frame_pointer_needed
6127 && (CONST_OK_FOR_I10 (total_size)
6128 || (! CONST_OK_FOR_I10 (save_size + d_rounding)
6129 && total_size <= 2044)))
6130 d_rounding = frame_size;
6132 frame_size -= d_rounding;
6135 if (frame_pointer_needed)
6137 /* We must avoid scheduling the epilogue with previous basic blocks
6138 when exception handling is enabled. See PR/18032. */
6139 if (flag_exceptions)
6140 emit_insn (gen_blockage ());
6141 output_stack_adjust (frame_size, hard_frame_pointer_rtx, e,
6142 &live_regs_mask);
6144 /* We must avoid moving the stack pointer adjustment past code
6145 which reads from the local frame, else an interrupt could
6146 occur after the SP adjustment and clobber data in the local
6147 frame. */
6148 emit_insn (gen_blockage ());
6149 emit_insn (GEN_MOV (stack_pointer_rtx, hard_frame_pointer_rtx));
6151 else if (frame_size)
6153 /* We must avoid moving the stack pointer adjustment past code
6154 which reads from the local frame, else an interrupt could
6155 occur after the SP adjustment and clobber data in the local
6156 frame. */
6157 emit_insn (gen_blockage ());
6158 output_stack_adjust (frame_size, stack_pointer_rtx, e, &live_regs_mask);
6161 if (SHMEDIA_REGS_STACK_ADJUST ())
6163 function_symbol (gen_rtx_REG (Pmode, R0_REG),
6164 (TARGET_FPU_ANY
6165 ? "__GCC_pop_shmedia_regs"
6166 : "__GCC_pop_shmedia_regs_nofpu"), SFUNC_GOT);
6167 /* This must NOT go through the PLT, otherwise mach and macl
6168 may be clobbered. */
6169 emit_insn (gen_shmedia_save_restore_regs_compact
6170 (GEN_INT (SHMEDIA_REGS_STACK_ADJUST ())));
6173 /* Pop all the registers. */
6175 if (target_flags != save_flags && ! current_function_interrupt)
6176 emit_insn (gen_toggle_sz ());
6177 if (TARGET_SH5)
6179 int offset_base, offset;
6180 int offset_in_r0 = -1;
6181 int sp_in_r0 = 0;
6182 rtx r0 = gen_rtx_REG (Pmode, R0_REG);
6183 save_schedule schedule;
6184 save_entry *entry;
6185 int *tmp_pnt;
6187 entry = sh5_schedule_saves (&live_regs_mask, &schedule, d_rounding);
6188 offset_base = -entry[1].offset + d_rounding;
6189 tmp_pnt = schedule.temps;
6190 for (; entry->mode != VOIDmode; entry--)
6192 enum machine_mode mode = entry->mode;
6193 int reg = entry->reg;
6194 rtx reg_rtx, mem_rtx, post_inc = NULL_RTX, insn;
6196 offset = offset_base + entry->offset;
6197 reg_rtx = gen_rtx_REG (mode, reg);
6199 mem_rtx = gen_frame_mem (mode,
6200 gen_rtx_PLUS (Pmode,
6201 stack_pointer_rtx,
6202 GEN_INT (offset)));
6204 GO_IF_LEGITIMATE_ADDRESS (mode, XEXP (mem_rtx, 0), try_post_inc);
6206 mem_rtx = NULL_RTX;
6208 try_post_inc:
6210 if (HAVE_POST_INCREMENT
6211 && (offset == offset_in_r0
6212 || (offset + GET_MODE_SIZE (mode) != d + d_rounding
6213 && mem_rtx == NULL_RTX)
6214 || reg == PR_REG || SPECIAL_REGISTER_P (reg)))
6216 post_inc = gen_frame_mem (mode, gen_rtx_POST_INC (Pmode, r0));
6218 GO_IF_LEGITIMATE_ADDRESS (mode, XEXP (post_inc, 0),
6219 post_inc_ok);
6221 post_inc = NULL_RTX;
6223 break;
6225 post_inc_ok:
6226 mem_rtx = NULL_RTX;
6228 while (0);
6230 if (mem_rtx != NULL_RTX)
6231 goto addr_ok;
6233 if (offset_in_r0 == -1)
6235 emit_move_insn (r0, GEN_INT (offset));
6236 offset_in_r0 = offset;
6238 else if (offset != offset_in_r0)
6240 emit_move_insn (r0,
6241 gen_rtx_PLUS
6242 (Pmode, r0,
6243 GEN_INT (offset - offset_in_r0)));
6244 offset_in_r0 += offset - offset_in_r0;
6247 if (post_inc != NULL_RTX)
6249 if (! sp_in_r0)
6251 emit_move_insn (r0,
6252 gen_rtx_PLUS
6253 (Pmode, r0, stack_pointer_rtx));
6254 sp_in_r0 = 1;
6257 mem_rtx = post_inc;
6259 offset_in_r0 += GET_MODE_SIZE (mode);
6261 else if (sp_in_r0)
6262 mem_rtx = gen_frame_mem (mode, r0);
6263 else
6264 mem_rtx = gen_frame_mem (mode,
6265 gen_rtx_PLUS (Pmode,
6266 stack_pointer_rtx,
6267 r0));
6269 gcc_assert ((reg != PR_REG && !SPECIAL_REGISTER_P (reg))
6270 || mem_rtx == post_inc);
6272 addr_ok:
6273 if ((reg == PR_REG || SPECIAL_REGISTER_P (reg))
6274 && mem_rtx != post_inc)
6276 insn = emit_move_insn (r0, mem_rtx);
6277 mem_rtx = r0;
6279 else if (TARGET_REGISTER_P (reg))
6281 rtx tmp_reg = gen_rtx_REG (mode, *tmp_pnt);
6283 /* Give the scheduler a bit of freedom by using up to
6284 MAX_TEMPS registers in a round-robin fashion. */
6285 insn = emit_move_insn (tmp_reg, mem_rtx);
6286 mem_rtx = tmp_reg;
6287 if (*++tmp_pnt < 0)
6288 tmp_pnt = schedule.temps;
6291 insn = emit_move_insn (reg_rtx, mem_rtx);
6292 if (reg == PR_MEDIA_REG && sh_media_register_for_return () >= 0)
6293 /* This is dead, unless we return with a sibcall. */
6294 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD,
6295 const0_rtx,
6296 REG_NOTES (insn));
6299 gcc_assert (entry->offset + offset_base == d + d_rounding);
6301 else /* ! TARGET_SH5 */
6303 save_size = 0;
6304 if (TEST_HARD_REG_BIT (live_regs_mask, PR_REG))
6305 pop (PR_REG);
6306 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
6308 int j = (FIRST_PSEUDO_REGISTER - 1) - i;
6310 if (j == FPSCR_REG && current_function_interrupt && TARGET_FMOVD
6311 && hard_regs_intersect_p (&live_regs_mask,
6312 &reg_class_contents[DF_REGS]))
6313 fpscr_deferred = 1;
6314 else if (j != PR_REG && TEST_HARD_REG_BIT (live_regs_mask, j))
6315 pop (j);
6316 if (j == FIRST_FP_REG && fpscr_deferred)
6317 pop (FPSCR_REG);
6321 if (target_flags != save_flags && ! current_function_interrupt)
6322 emit_insn (gen_toggle_sz ());
6323 target_flags = save_flags;
6325 output_stack_adjust (current_function_pretend_args_size
6326 + save_size + d_rounding
6327 + current_function_args_info.stack_regs * 8,
6328 stack_pointer_rtx, e, NULL);
6330 if (current_function_calls_eh_return)
6331 emit_insn (GEN_ADD3 (stack_pointer_rtx, stack_pointer_rtx,
6332 EH_RETURN_STACKADJ_RTX));
6334 /* Switch back to the normal stack if necessary. */
6335 if (lookup_attribute ("sp_switch", DECL_ATTRIBUTES (current_function_decl)))
6336 emit_insn (gen_sp_switch_2 ());
6338 /* Tell flow the insn that pops PR isn't dead. */
6339 /* PR_REG will never be live in SHmedia mode, and we don't need to
6340 USE PR_MEDIA_REG, since it will be explicitly copied to TR0_REG
6341 by the return pattern. */
6342 if (TEST_HARD_REG_BIT (live_regs_mask, PR_REG))
6343 emit_insn (gen_rtx_USE (VOIDmode, gen_rtx_REG (SImode, PR_REG)));
6346 static int sh_need_epilogue_known = 0;
6349 sh_need_epilogue (void)
6351 if (! sh_need_epilogue_known)
6353 rtx epilogue;
6355 start_sequence ();
6356 sh_expand_epilogue (0);
6357 epilogue = get_insns ();
6358 end_sequence ();
6359 sh_need_epilogue_known = (epilogue == NULL ? -1 : 1);
6361 return sh_need_epilogue_known > 0;
6364 /* Emit code to change the current function's return address to RA.
6365 TEMP is available as a scratch register, if needed. */
6367 void
6368 sh_set_return_address (rtx ra, rtx tmp)
6370 HARD_REG_SET live_regs_mask;
6371 int d;
6372 int pr_reg = TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG;
6373 int pr_offset;
6375 d = calc_live_regs (&live_regs_mask);
6377 /* If pr_reg isn't life, we can set it (or the register given in
6378 sh_media_register_for_return) directly. */
6379 if (! TEST_HARD_REG_BIT (live_regs_mask, pr_reg))
6381 rtx rr;
6383 if (TARGET_SHMEDIA)
6385 int rr_regno = sh_media_register_for_return ();
6387 if (rr_regno < 0)
6388 rr_regno = pr_reg;
6390 rr = gen_rtx_REG (DImode, rr_regno);
6392 else
6393 rr = gen_rtx_REG (SImode, pr_reg);
6395 emit_insn (GEN_MOV (rr, ra));
6396 /* Tell flow the register for return isn't dead. */
6397 emit_insn (gen_rtx_USE (VOIDmode, rr));
6398 return;
6401 if (TARGET_SH5)
6403 int offset;
6404 save_schedule schedule;
6405 save_entry *entry;
6407 entry = sh5_schedule_saves (&live_regs_mask, &schedule, 0);
6408 offset = entry[1].offset;
6409 for (; entry->mode != VOIDmode; entry--)
6410 if (entry->reg == pr_reg)
6411 goto found;
6413 /* We can't find pr register. */
6414 gcc_unreachable ();
6416 found:
6417 offset = entry->offset - offset;
6418 pr_offset = (rounded_frame_size (d) + offset
6419 + SHMEDIA_REGS_STACK_ADJUST ());
6421 else
6422 pr_offset = rounded_frame_size (d);
6424 emit_insn (GEN_MOV (tmp, GEN_INT (pr_offset)));
6425 emit_insn (GEN_ADD3 (tmp, tmp, hard_frame_pointer_rtx));
6427 tmp = gen_frame_mem (Pmode, tmp);
6428 emit_insn (GEN_MOV (tmp, ra));
6431 /* Clear variables at function end. */
6433 static void
6434 sh_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
6435 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
6437 sh_need_epilogue_known = 0;
6440 static rtx
6441 sh_builtin_saveregs (void)
6443 /* First unnamed integer register. */
6444 int first_intreg = current_function_args_info.arg_count[(int) SH_ARG_INT];
6445 /* Number of integer registers we need to save. */
6446 int n_intregs = MAX (0, NPARM_REGS (SImode) - first_intreg);
6447 /* First unnamed SFmode float reg */
6448 int first_floatreg = current_function_args_info.arg_count[(int) SH_ARG_FLOAT];
6449 /* Number of SFmode float regs to save. */
6450 int n_floatregs = MAX (0, NPARM_REGS (SFmode) - first_floatreg);
6451 rtx regbuf, fpregs;
6452 int bufsize, regno;
6453 HOST_WIDE_INT alias_set;
6455 if (TARGET_SH5)
6457 if (n_intregs)
6459 int pushregs = n_intregs;
6461 while (pushregs < NPARM_REGS (SImode) - 1
6462 && (CALL_COOKIE_INT_REG_GET
6463 (current_function_args_info.call_cookie,
6464 NPARM_REGS (SImode) - pushregs)
6465 == 1))
6467 current_function_args_info.call_cookie
6468 &= ~ CALL_COOKIE_INT_REG (NPARM_REGS (SImode)
6469 - pushregs, 1);
6470 pushregs++;
6473 if (pushregs == NPARM_REGS (SImode))
6474 current_function_args_info.call_cookie
6475 |= (CALL_COOKIE_INT_REG (0, 1)
6476 | CALL_COOKIE_STACKSEQ (pushregs - 1));
6477 else
6478 current_function_args_info.call_cookie
6479 |= CALL_COOKIE_STACKSEQ (pushregs);
6481 current_function_pretend_args_size += 8 * n_intregs;
6483 if (TARGET_SHCOMPACT)
6484 return const0_rtx;
6487 if (! TARGET_SH2E && ! TARGET_SH4 && ! TARGET_SH5)
6489 error ("__builtin_saveregs not supported by this subtarget");
6490 return const0_rtx;
6493 if (TARGET_SHMEDIA)
6494 n_floatregs = 0;
6496 /* Allocate block of memory for the regs. */
6497 /* ??? If n_intregs + n_floatregs == 0, should we allocate at least 1 byte?
6498 Or can assign_stack_local accept a 0 SIZE argument? */
6499 bufsize = (n_intregs * UNITS_PER_WORD) + (n_floatregs * UNITS_PER_WORD);
6501 if (TARGET_SHMEDIA)
6502 regbuf = gen_frame_mem (BLKmode, gen_rtx_REG (Pmode, ARG_POINTER_REGNUM));
6503 else if (n_floatregs & 1)
6505 rtx addr;
6507 regbuf = assign_stack_local (BLKmode, bufsize + UNITS_PER_WORD, 0);
6508 addr = copy_to_mode_reg (Pmode, XEXP (regbuf, 0));
6509 emit_insn (gen_iorsi3 (addr, addr, GEN_INT (UNITS_PER_WORD)));
6510 regbuf = change_address (regbuf, BLKmode, addr);
6512 else if (STACK_BOUNDARY < 64 && TARGET_FPU_DOUBLE && n_floatregs)
6514 rtx addr, mask;
6516 regbuf = assign_stack_local (BLKmode, bufsize + UNITS_PER_WORD, 0);
6517 addr = copy_to_mode_reg (Pmode, plus_constant (XEXP (regbuf, 0), 4));
6518 mask = copy_to_mode_reg (Pmode, GEN_INT (-8));
6519 emit_insn (gen_andsi3 (addr, addr, mask));
6520 regbuf = change_address (regbuf, BLKmode, addr);
6522 else
6523 regbuf = assign_stack_local (BLKmode, bufsize, TARGET_FPU_DOUBLE ? 64 : 0);
6524 alias_set = get_varargs_alias_set ();
6525 set_mem_alias_set (regbuf, alias_set);
6527 /* Save int args.
6528 This is optimized to only save the regs that are necessary. Explicitly
6529 named args need not be saved. */
6530 if (n_intregs > 0)
6531 move_block_from_reg (BASE_ARG_REG (SImode) + first_intreg,
6532 adjust_address (regbuf, BLKmode,
6533 n_floatregs * UNITS_PER_WORD),
6534 n_intregs);
6536 if (TARGET_SHMEDIA)
6537 /* Return the address of the regbuf. */
6538 return XEXP (regbuf, 0);
6540 /* Save float args.
6541 This is optimized to only save the regs that are necessary. Explicitly
6542 named args need not be saved.
6543 We explicitly build a pointer to the buffer because it halves the insn
6544 count when not optimizing (otherwise the pointer is built for each reg
6545 saved).
6546 We emit the moves in reverse order so that we can use predecrement. */
6548 fpregs = copy_to_mode_reg (Pmode,
6549 plus_constant (XEXP (regbuf, 0),
6550 n_floatregs * UNITS_PER_WORD));
6551 if (TARGET_SH4 || TARGET_SH2A_DOUBLE)
6553 rtx mem;
6554 for (regno = NPARM_REGS (DFmode) - 2; regno >= first_floatreg; regno -= 2)
6556 emit_insn (gen_addsi3 (fpregs, fpregs,
6557 GEN_INT (-2 * UNITS_PER_WORD)));
6558 mem = change_address (regbuf, DFmode, fpregs);
6559 emit_move_insn (mem,
6560 gen_rtx_REG (DFmode, BASE_ARG_REG (DFmode) + regno));
6562 regno = first_floatreg;
6563 if (regno & 1)
6565 emit_insn (gen_addsi3 (fpregs, fpregs, GEN_INT (-UNITS_PER_WORD)));
6566 mem = change_address (regbuf, SFmode, fpregs);
6567 emit_move_insn (mem,
6568 gen_rtx_REG (SFmode, BASE_ARG_REG (SFmode) + regno
6569 - (TARGET_LITTLE_ENDIAN != 0)));
6572 else
6573 for (regno = NPARM_REGS (SFmode) - 1; regno >= first_floatreg; regno--)
6575 rtx mem;
6577 emit_insn (gen_addsi3 (fpregs, fpregs, GEN_INT (-UNITS_PER_WORD)));
6578 mem = change_address (regbuf, SFmode, fpregs);
6579 emit_move_insn (mem,
6580 gen_rtx_REG (SFmode, BASE_ARG_REG (SFmode) + regno));
6583 /* Return the address of the regbuf. */
6584 return XEXP (regbuf, 0);
6587 /* Define the `__builtin_va_list' type for the ABI. */
6589 static tree
6590 sh_build_builtin_va_list (void)
6592 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
6593 tree record;
6595 if (TARGET_SH5 || (! TARGET_SH2E && ! TARGET_SH4)
6596 || TARGET_HITACHI || sh_cfun_attr_renesas_p ())
6597 return ptr_type_node;
6599 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
6601 f_next_o = build_decl (FIELD_DECL, get_identifier ("__va_next_o"),
6602 ptr_type_node);
6603 f_next_o_limit = build_decl (FIELD_DECL,
6604 get_identifier ("__va_next_o_limit"),
6605 ptr_type_node);
6606 f_next_fp = build_decl (FIELD_DECL, get_identifier ("__va_next_fp"),
6607 ptr_type_node);
6608 f_next_fp_limit = build_decl (FIELD_DECL,
6609 get_identifier ("__va_next_fp_limit"),
6610 ptr_type_node);
6611 f_next_stack = build_decl (FIELD_DECL, get_identifier ("__va_next_stack"),
6612 ptr_type_node);
6614 DECL_FIELD_CONTEXT (f_next_o) = record;
6615 DECL_FIELD_CONTEXT (f_next_o_limit) = record;
6616 DECL_FIELD_CONTEXT (f_next_fp) = record;
6617 DECL_FIELD_CONTEXT (f_next_fp_limit) = record;
6618 DECL_FIELD_CONTEXT (f_next_stack) = record;
6620 TYPE_FIELDS (record) = f_next_o;
6621 TREE_CHAIN (f_next_o) = f_next_o_limit;
6622 TREE_CHAIN (f_next_o_limit) = f_next_fp;
6623 TREE_CHAIN (f_next_fp) = f_next_fp_limit;
6624 TREE_CHAIN (f_next_fp_limit) = f_next_stack;
6626 layout_type (record);
6628 return record;
6631 /* Implement `va_start' for varargs and stdarg. */
6633 void
6634 sh_va_start (tree valist, rtx nextarg)
6636 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
6637 tree next_o, next_o_limit, next_fp, next_fp_limit, next_stack;
6638 tree t, u;
6639 int nfp, nint;
6641 if (TARGET_SH5)
6643 expand_builtin_saveregs ();
6644 std_expand_builtin_va_start (valist, nextarg);
6645 return;
6648 if ((! TARGET_SH2E && ! TARGET_SH4)
6649 || TARGET_HITACHI || sh_cfun_attr_renesas_p ())
6651 std_expand_builtin_va_start (valist, nextarg);
6652 return;
6655 f_next_o = TYPE_FIELDS (va_list_type_node);
6656 f_next_o_limit = TREE_CHAIN (f_next_o);
6657 f_next_fp = TREE_CHAIN (f_next_o_limit);
6658 f_next_fp_limit = TREE_CHAIN (f_next_fp);
6659 f_next_stack = TREE_CHAIN (f_next_fp_limit);
6661 next_o = build3 (COMPONENT_REF, TREE_TYPE (f_next_o), valist, f_next_o,
6662 NULL_TREE);
6663 next_o_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_o_limit),
6664 valist, f_next_o_limit, NULL_TREE);
6665 next_fp = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp), valist, f_next_fp,
6666 NULL_TREE);
6667 next_fp_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp_limit),
6668 valist, f_next_fp_limit, NULL_TREE);
6669 next_stack = build3 (COMPONENT_REF, TREE_TYPE (f_next_stack),
6670 valist, f_next_stack, NULL_TREE);
6672 /* Call __builtin_saveregs. */
6673 u = make_tree (ptr_type_node, expand_builtin_saveregs ());
6674 t = build2 (MODIFY_EXPR, ptr_type_node, next_fp, u);
6675 TREE_SIDE_EFFECTS (t) = 1;
6676 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6678 nfp = current_function_args_info.arg_count[SH_ARG_FLOAT];
6679 if (nfp < 8)
6680 nfp = 8 - nfp;
6681 else
6682 nfp = 0;
6683 u = fold_build2 (PLUS_EXPR, ptr_type_node, u,
6684 build_int_cst (NULL_TREE, UNITS_PER_WORD * nfp));
6685 t = build2 (MODIFY_EXPR, ptr_type_node, next_fp_limit, u);
6686 TREE_SIDE_EFFECTS (t) = 1;
6687 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6689 t = build2 (MODIFY_EXPR, ptr_type_node, next_o, u);
6690 TREE_SIDE_EFFECTS (t) = 1;
6691 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6693 nint = current_function_args_info.arg_count[SH_ARG_INT];
6694 if (nint < 4)
6695 nint = 4 - nint;
6696 else
6697 nint = 0;
6698 u = fold_build2 (PLUS_EXPR, ptr_type_node, u,
6699 build_int_cst (NULL_TREE, UNITS_PER_WORD * nint));
6700 t = build2 (MODIFY_EXPR, ptr_type_node, next_o_limit, u);
6701 TREE_SIDE_EFFECTS (t) = 1;
6702 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6704 u = make_tree (ptr_type_node, nextarg);
6705 t = build2 (MODIFY_EXPR, ptr_type_node, next_stack, u);
6706 TREE_SIDE_EFFECTS (t) = 1;
6707 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6710 /* TYPE is a RECORD_TYPE. If there is only a single non-zero-sized
6711 member, return it. */
6712 static tree
6713 find_sole_member (tree type)
6715 tree field, member = NULL_TREE;
6717 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
6719 if (TREE_CODE (field) != FIELD_DECL)
6720 continue;
6721 if (!DECL_SIZE (field))
6722 return NULL_TREE;
6723 if (integer_zerop (DECL_SIZE (field)))
6724 continue;
6725 if (member)
6726 return NULL_TREE;
6727 member = field;
6729 return member;
6731 /* Implement `va_arg'. */
6733 static tree
6734 sh_gimplify_va_arg_expr (tree valist, tree type, tree *pre_p,
6735 tree *post_p ATTRIBUTE_UNUSED)
6737 HOST_WIDE_INT size, rsize;
6738 tree tmp, pptr_type_node;
6739 tree addr, lab_over = NULL, result = NULL;
6740 int pass_by_ref = targetm.calls.must_pass_in_stack (TYPE_MODE (type), type);
6742 if (pass_by_ref)
6743 type = build_pointer_type (type);
6745 size = int_size_in_bytes (type);
6746 rsize = (size + UNITS_PER_WORD - 1) & -UNITS_PER_WORD;
6747 pptr_type_node = build_pointer_type (ptr_type_node);
6749 if (! TARGET_SH5 && (TARGET_SH2E || TARGET_SH4)
6750 && ! (TARGET_HITACHI || sh_cfun_attr_renesas_p ()))
6752 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
6753 tree next_o, next_o_limit, next_fp, next_fp_limit, next_stack;
6754 int pass_as_float;
6755 tree lab_false;
6756 tree member;
6758 f_next_o = TYPE_FIELDS (va_list_type_node);
6759 f_next_o_limit = TREE_CHAIN (f_next_o);
6760 f_next_fp = TREE_CHAIN (f_next_o_limit);
6761 f_next_fp_limit = TREE_CHAIN (f_next_fp);
6762 f_next_stack = TREE_CHAIN (f_next_fp_limit);
6764 next_o = build3 (COMPONENT_REF, TREE_TYPE (f_next_o), valist, f_next_o,
6765 NULL_TREE);
6766 next_o_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_o_limit),
6767 valist, f_next_o_limit, NULL_TREE);
6768 next_fp = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp),
6769 valist, f_next_fp, NULL_TREE);
6770 next_fp_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp_limit),
6771 valist, f_next_fp_limit, NULL_TREE);
6772 next_stack = build3 (COMPONENT_REF, TREE_TYPE (f_next_stack),
6773 valist, f_next_stack, NULL_TREE);
6775 /* Structures with a single member with a distinct mode are passed
6776 like their member. This is relevant if the latter has a REAL_TYPE
6777 or COMPLEX_TYPE type. */
6778 while (TREE_CODE (type) == RECORD_TYPE
6779 && (member = find_sole_member (type))
6780 && (TREE_CODE (TREE_TYPE (member)) == REAL_TYPE
6781 || TREE_CODE (TREE_TYPE (member)) == COMPLEX_TYPE
6782 || TREE_CODE (TREE_TYPE (member)) == RECORD_TYPE))
6784 tree field_type = TREE_TYPE (member);
6786 if (TYPE_MODE (type) == TYPE_MODE (field_type))
6787 type = field_type;
6788 else
6790 gcc_assert ((TYPE_ALIGN (type)
6791 < GET_MODE_ALIGNMENT (TYPE_MODE (field_type)))
6792 || (TYPE_ALIGN (type)
6793 > GET_MODE_BITSIZE (TYPE_MODE (field_type))));
6794 break;
6798 if (TARGET_SH4)
6800 pass_as_float = ((TREE_CODE (type) == REAL_TYPE && size <= 8)
6801 || (TREE_CODE (type) == COMPLEX_TYPE
6802 && TREE_CODE (TREE_TYPE (type)) == REAL_TYPE
6803 && size <= 16));
6805 else
6807 pass_as_float = (TREE_CODE (type) == REAL_TYPE && size == 4);
6810 addr = create_tmp_var (pptr_type_node, NULL);
6811 lab_false = create_artificial_label ();
6812 lab_over = create_artificial_label ();
6814 valist = build1 (INDIRECT_REF, ptr_type_node, addr);
6816 if (pass_as_float)
6818 tree next_fp_tmp = create_tmp_var (TREE_TYPE (f_next_fp), NULL);
6819 tree cmp;
6820 bool is_double = size == 8 && TREE_CODE (type) == REAL_TYPE;
6822 tmp = build1 (ADDR_EXPR, pptr_type_node, next_fp);
6823 tmp = build2 (MODIFY_EXPR, void_type_node, addr, tmp);
6824 gimplify_and_add (tmp, pre_p);
6826 tmp = build2 (MODIFY_EXPR, ptr_type_node, next_fp_tmp, valist);
6827 gimplify_and_add (tmp, pre_p);
6828 tmp = next_fp_limit;
6829 if (size > 4 && !is_double)
6830 tmp = build2 (PLUS_EXPR, TREE_TYPE (tmp), tmp,
6831 fold_convert (TREE_TYPE (tmp), size_int (4 - size)));
6832 tmp = build2 (GE_EXPR, boolean_type_node, next_fp_tmp, tmp);
6833 cmp = build3 (COND_EXPR, void_type_node, tmp,
6834 build1 (GOTO_EXPR, void_type_node, lab_false),
6835 NULL_TREE);
6836 if (!is_double)
6837 gimplify_and_add (cmp, pre_p);
6839 if (TYPE_ALIGN (type) > BITS_PER_WORD || (is_double || size == 16))
6841 tmp = fold_convert (ptr_type_node, size_int (UNITS_PER_WORD));
6842 tmp = build2 (BIT_AND_EXPR, ptr_type_node, next_fp_tmp, tmp);
6843 tmp = build2 (PLUS_EXPR, ptr_type_node, next_fp_tmp, tmp);
6844 tmp = build2 (MODIFY_EXPR, ptr_type_node, next_fp_tmp, tmp);
6845 gimplify_and_add (tmp, pre_p);
6847 if (is_double)
6848 gimplify_and_add (cmp, pre_p);
6850 #ifdef FUNCTION_ARG_SCmode_WART
6851 if (TYPE_MODE (type) == SCmode && TARGET_SH4 && TARGET_LITTLE_ENDIAN)
6853 tree subtype = TREE_TYPE (type);
6854 tree real, imag;
6856 imag
6857 = std_gimplify_va_arg_expr (next_fp_tmp, subtype, pre_p, NULL);
6858 imag = get_initialized_tmp_var (imag, pre_p, NULL);
6860 real
6861 = std_gimplify_va_arg_expr (next_fp_tmp, subtype, pre_p, NULL);
6862 real = get_initialized_tmp_var (real, pre_p, NULL);
6864 result = build2 (COMPLEX_EXPR, type, real, imag);
6865 result = get_initialized_tmp_var (result, pre_p, NULL);
6867 #endif /* FUNCTION_ARG_SCmode_WART */
6869 tmp = build1 (GOTO_EXPR, void_type_node, lab_over);
6870 gimplify_and_add (tmp, pre_p);
6872 tmp = build1 (LABEL_EXPR, void_type_node, lab_false);
6873 gimplify_and_add (tmp, pre_p);
6875 tmp = build1 (ADDR_EXPR, pptr_type_node, next_stack);
6876 tmp = build2 (MODIFY_EXPR, void_type_node, addr, tmp);
6877 gimplify_and_add (tmp, pre_p);
6878 tmp = build2 (MODIFY_EXPR, ptr_type_node, next_fp_tmp, valist);
6879 gimplify_and_add (tmp, pre_p);
6881 tmp = build2 (MODIFY_EXPR, ptr_type_node, valist, next_fp_tmp);
6882 gimplify_and_add (tmp, post_p);
6883 valist = next_fp_tmp;
6885 else
6887 tmp = fold_convert (ptr_type_node, size_int (rsize));
6888 tmp = build2 (PLUS_EXPR, ptr_type_node, next_o, tmp);
6889 tmp = build2 (GT_EXPR, boolean_type_node, tmp, next_o_limit);
6890 tmp = build3 (COND_EXPR, void_type_node, tmp,
6891 build1 (GOTO_EXPR, void_type_node, lab_false),
6892 NULL_TREE);
6893 gimplify_and_add (tmp, pre_p);
6895 tmp = build1 (ADDR_EXPR, pptr_type_node, next_o);
6896 tmp = build2 (MODIFY_EXPR, void_type_node, addr, tmp);
6897 gimplify_and_add (tmp, pre_p);
6899 tmp = build1 (GOTO_EXPR, void_type_node, lab_over);
6900 gimplify_and_add (tmp, pre_p);
6902 tmp = build1 (LABEL_EXPR, void_type_node, lab_false);
6903 gimplify_and_add (tmp, pre_p);
6905 if (size > 4 && ! TARGET_SH4)
6907 tmp = build2 (MODIFY_EXPR, ptr_type_node, next_o, next_o_limit);
6908 gimplify_and_add (tmp, pre_p);
6911 tmp = build1 (ADDR_EXPR, pptr_type_node, next_stack);
6912 tmp = build2 (MODIFY_EXPR, void_type_node, addr, tmp);
6913 gimplify_and_add (tmp, pre_p);
6916 if (!result)
6918 tmp = build1 (LABEL_EXPR, void_type_node, lab_over);
6919 gimplify_and_add (tmp, pre_p);
6923 /* ??? In va-sh.h, there had been code to make values larger than
6924 size 8 indirect. This does not match the FUNCTION_ARG macros. */
6926 tmp = std_gimplify_va_arg_expr (valist, type, pre_p, NULL);
6927 if (result)
6929 tmp = build2 (MODIFY_EXPR, void_type_node, result, tmp);
6930 gimplify_and_add (tmp, pre_p);
6932 tmp = build1 (LABEL_EXPR, void_type_node, lab_over);
6933 gimplify_and_add (tmp, pre_p);
6935 else
6936 result = tmp;
6938 if (pass_by_ref)
6939 result = build_va_arg_indirect_ref (result);
6941 return result;
6944 bool
6945 sh_promote_prototypes (tree type)
6947 if (TARGET_HITACHI)
6948 return 0;
6949 if (! type)
6950 return 1;
6951 return ! sh_attr_renesas_p (type);
6954 /* Whether an argument must be passed by reference. On SHcompact, we
6955 pretend arguments wider than 32-bits that would have been passed in
6956 registers are passed by reference, so that an SHmedia trampoline
6957 loads them into the full 64-bits registers. */
6959 static int
6960 shcompact_byref (CUMULATIVE_ARGS *cum, enum machine_mode mode,
6961 tree type, bool named)
6963 unsigned HOST_WIDE_INT size;
6965 if (type)
6966 size = int_size_in_bytes (type);
6967 else
6968 size = GET_MODE_SIZE (mode);
6970 if (cum->arg_count[SH_ARG_INT] < NPARM_REGS (SImode)
6971 && (!named
6972 || GET_SH_ARG_CLASS (mode) == SH_ARG_INT
6973 || (GET_SH_ARG_CLASS (mode) == SH_ARG_FLOAT
6974 && cum->arg_count[SH_ARG_FLOAT] >= NPARM_REGS (SFmode)))
6975 && size > 4
6976 && !SHCOMPACT_FORCE_ON_STACK (mode, type)
6977 && !SH5_WOULD_BE_PARTIAL_NREGS (*cum, mode, type, named))
6978 return size;
6979 else
6980 return 0;
6983 static bool
6984 sh_pass_by_reference (CUMULATIVE_ARGS *cum, enum machine_mode mode,
6985 tree type, bool named)
6987 if (targetm.calls.must_pass_in_stack (mode, type))
6988 return true;
6990 /* ??? std_gimplify_va_arg_expr passes NULL for cum. That function
6991 wants to know about pass-by-reference semantics for incoming
6992 arguments. */
6993 if (! cum)
6994 return false;
6996 if (TARGET_SHCOMPACT)
6998 cum->byref = shcompact_byref (cum, mode, type, named);
6999 return cum->byref != 0;
7002 return false;
7005 static bool
7006 sh_callee_copies (CUMULATIVE_ARGS *cum, enum machine_mode mode,
7007 tree type, bool named ATTRIBUTE_UNUSED)
7009 /* ??? How can it possibly be correct to return true only on the
7010 caller side of the equation? Is there someplace else in the
7011 sh backend that's magically producing the copies? */
7012 return (cum->outgoing
7013 && ((mode == BLKmode ? TYPE_ALIGN (type) : GET_MODE_ALIGNMENT (mode))
7014 % SH_MIN_ALIGN_FOR_CALLEE_COPY == 0));
7017 static int
7018 sh_arg_partial_bytes (CUMULATIVE_ARGS *cum, enum machine_mode mode,
7019 tree type, bool named ATTRIBUTE_UNUSED)
7021 int words = 0;
7023 if (!TARGET_SH5
7024 && PASS_IN_REG_P (*cum, mode, type)
7025 && !(TARGET_SH4 || TARGET_SH2A_DOUBLE)
7026 && (ROUND_REG (*cum, mode)
7027 + (mode != BLKmode
7028 ? ROUND_ADVANCE (GET_MODE_SIZE (mode))
7029 : ROUND_ADVANCE (int_size_in_bytes (type)))
7030 > NPARM_REGS (mode)))
7031 words = NPARM_REGS (mode) - ROUND_REG (*cum, mode);
7033 else if (!TARGET_SHCOMPACT
7034 && SH5_WOULD_BE_PARTIAL_NREGS (*cum, mode, type, named))
7035 words = NPARM_REGS (SImode) - cum->arg_count[SH_ARG_INT];
7037 return words * UNITS_PER_WORD;
7041 /* Define where to put the arguments to a function.
7042 Value is zero to push the argument on the stack,
7043 or a hard register in which to store the argument.
7045 MODE is the argument's machine mode.
7046 TYPE is the data type of the argument (as a tree).
7047 This is null for libcalls where that information may
7048 not be available.
7049 CUM is a variable of type CUMULATIVE_ARGS which gives info about
7050 the preceding args and about the function being called.
7051 NAMED is nonzero if this argument is a named parameter
7052 (otherwise it is an extra parameter matching an ellipsis).
7054 On SH the first args are normally in registers
7055 and the rest are pushed. Any arg that starts within the first
7056 NPARM_REGS words is at least partially passed in a register unless
7057 its data type forbids. */
7061 sh_function_arg (CUMULATIVE_ARGS *ca, enum machine_mode mode,
7062 tree type, int named)
7064 if (! TARGET_SH5 && mode == VOIDmode)
7065 return GEN_INT (ca->renesas_abi ? 1 : 0);
7067 if (! TARGET_SH5
7068 && PASS_IN_REG_P (*ca, mode, type)
7069 && (named || ! (TARGET_HITACHI || ca->renesas_abi)))
7071 int regno;
7073 if (mode == SCmode && TARGET_SH4 && TARGET_LITTLE_ENDIAN
7074 && (! FUNCTION_ARG_SCmode_WART || (ROUND_REG (*ca, mode) & 1)))
7076 rtx r1 = gen_rtx_EXPR_LIST (VOIDmode,
7077 gen_rtx_REG (SFmode,
7078 BASE_ARG_REG (mode)
7079 + (ROUND_REG (*ca, mode) ^ 1)),
7080 const0_rtx);
7081 rtx r2 = gen_rtx_EXPR_LIST (VOIDmode,
7082 gen_rtx_REG (SFmode,
7083 BASE_ARG_REG (mode)
7084 + ((ROUND_REG (*ca, mode) + 1) ^ 1)),
7085 GEN_INT (4));
7086 return gen_rtx_PARALLEL(SCmode, gen_rtvec(2, r1, r2));
7089 /* If the alignment of a DF value causes an SF register to be
7090 skipped, we will use that skipped register for the next SF
7091 value. */
7092 if ((TARGET_HITACHI || ca->renesas_abi)
7093 && ca->free_single_fp_reg
7094 && mode == SFmode)
7095 return gen_rtx_REG (mode, ca->free_single_fp_reg);
7097 regno = (BASE_ARG_REG (mode) + ROUND_REG (*ca, mode))
7098 ^ (mode == SFmode && TARGET_SH4
7099 && TARGET_LITTLE_ENDIAN != 0
7100 && ! TARGET_HITACHI && ! ca->renesas_abi);
7101 return gen_rtx_REG (mode, regno);
7105 if (TARGET_SH5)
7107 if (mode == VOIDmode && TARGET_SHCOMPACT)
7108 return GEN_INT (ca->call_cookie);
7110 /* The following test assumes unnamed arguments are promoted to
7111 DFmode. */
7112 if (mode == SFmode && ca->free_single_fp_reg)
7113 return SH5_PROTOTYPED_FLOAT_ARG (*ca, mode, ca->free_single_fp_reg);
7115 if ((GET_SH_ARG_CLASS (mode) == SH_ARG_FLOAT)
7116 && (named || ! ca->prototype_p)
7117 && ca->arg_count[(int) SH_ARG_FLOAT] < NPARM_REGS (SFmode))
7119 if (! ca->prototype_p && TARGET_SHMEDIA)
7120 return SH5_PROTOTYPELESS_FLOAT_ARG (*ca, mode);
7122 return SH5_PROTOTYPED_FLOAT_ARG (*ca, mode,
7123 FIRST_FP_PARM_REG
7124 + ca->arg_count[(int) SH_ARG_FLOAT]);
7127 if (ca->arg_count[(int) SH_ARG_INT] < NPARM_REGS (SImode)
7128 && (! TARGET_SHCOMPACT
7129 || (! SHCOMPACT_FORCE_ON_STACK (mode, type)
7130 && ! SH5_WOULD_BE_PARTIAL_NREGS (*ca, mode,
7131 type, named))))
7133 return gen_rtx_REG (mode, (FIRST_PARM_REG
7134 + ca->arg_count[(int) SH_ARG_INT]));
7137 return 0;
7140 return 0;
7143 /* Update the data in CUM to advance over an argument
7144 of mode MODE and data type TYPE.
7145 (TYPE is null for libcalls where that information may not be
7146 available.) */
7148 void
7149 sh_function_arg_advance (CUMULATIVE_ARGS *ca, enum machine_mode mode,
7150 tree type, int named)
7152 if (ca->force_mem)
7153 ca->force_mem = 0;
7154 else if (TARGET_SH5)
7156 tree type2 = (ca->byref && type
7157 ? TREE_TYPE (type)
7158 : type);
7159 enum machine_mode mode2 = (ca->byref && type
7160 ? TYPE_MODE (type2)
7161 : mode);
7162 int dwords = ((ca->byref
7163 ? ca->byref
7164 : mode2 == BLKmode
7165 ? int_size_in_bytes (type2)
7166 : GET_MODE_SIZE (mode2)) + 7) / 8;
7167 int numregs = MIN (dwords, NPARM_REGS (SImode)
7168 - ca->arg_count[(int) SH_ARG_INT]);
7170 if (numregs)
7172 ca->arg_count[(int) SH_ARG_INT] += numregs;
7173 if (TARGET_SHCOMPACT
7174 && SHCOMPACT_FORCE_ON_STACK (mode2, type2))
7176 ca->call_cookie
7177 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
7178 - numregs, 1);
7179 /* N.B. We want this also for outgoing. */
7180 ca->stack_regs += numregs;
7182 else if (ca->byref)
7184 if (! ca->outgoing)
7185 ca->stack_regs += numregs;
7186 ca->byref_regs += numregs;
7187 ca->byref = 0;
7189 ca->call_cookie
7190 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
7191 - numregs, 2);
7192 while (--numregs);
7193 ca->call_cookie
7194 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
7195 - 1, 1);
7197 else if (dwords > numregs)
7199 int pushregs = numregs;
7201 if (TARGET_SHCOMPACT)
7202 ca->stack_regs += numregs;
7203 while (pushregs < NPARM_REGS (SImode) - 1
7204 && (CALL_COOKIE_INT_REG_GET
7205 (ca->call_cookie,
7206 NPARM_REGS (SImode) - pushregs)
7207 == 1))
7209 ca->call_cookie
7210 &= ~ CALL_COOKIE_INT_REG (NPARM_REGS (SImode)
7211 - pushregs, 1);
7212 pushregs++;
7214 if (numregs == NPARM_REGS (SImode))
7215 ca->call_cookie
7216 |= CALL_COOKIE_INT_REG (0, 1)
7217 | CALL_COOKIE_STACKSEQ (numregs - 1);
7218 else
7219 ca->call_cookie
7220 |= CALL_COOKIE_STACKSEQ (numregs);
7223 if (GET_SH_ARG_CLASS (mode2) == SH_ARG_FLOAT
7224 && (named || ! ca->prototype_p))
7226 if (mode2 == SFmode && ca->free_single_fp_reg)
7227 ca->free_single_fp_reg = 0;
7228 else if (ca->arg_count[(int) SH_ARG_FLOAT]
7229 < NPARM_REGS (SFmode))
7231 int numfpregs
7232 = MIN ((GET_MODE_SIZE (mode2) + 7) / 8 * 2,
7233 NPARM_REGS (SFmode)
7234 - ca->arg_count[(int) SH_ARG_FLOAT]);
7236 ca->arg_count[(int) SH_ARG_FLOAT] += numfpregs;
7238 if (TARGET_SHCOMPACT && ! ca->prototype_p)
7240 if (ca->outgoing && numregs > 0)
7243 ca->call_cookie
7244 |= (CALL_COOKIE_INT_REG
7245 (ca->arg_count[(int) SH_ARG_INT]
7246 - numregs + ((numfpregs - 2) / 2),
7247 4 + (ca->arg_count[(int) SH_ARG_FLOAT]
7248 - numfpregs) / 2));
7250 while (numfpregs -= 2);
7252 else if (mode2 == SFmode && (named)
7253 && (ca->arg_count[(int) SH_ARG_FLOAT]
7254 < NPARM_REGS (SFmode)))
7255 ca->free_single_fp_reg
7256 = FIRST_FP_PARM_REG - numfpregs
7257 + ca->arg_count[(int) SH_ARG_FLOAT] + 1;
7260 return;
7263 if ((TARGET_HITACHI || ca->renesas_abi) && TARGET_FPU_DOUBLE)
7265 /* Note that we've used the skipped register. */
7266 if (mode == SFmode && ca->free_single_fp_reg)
7268 ca->free_single_fp_reg = 0;
7269 return;
7271 /* When we have a DF after an SF, there's an SF register that get
7272 skipped in order to align the DF value. We note this skipped
7273 register, because the next SF value will use it, and not the
7274 SF that follows the DF. */
7275 if (mode == DFmode
7276 && ROUND_REG (*ca, DFmode) != ROUND_REG (*ca, SFmode))
7278 ca->free_single_fp_reg = (ROUND_REG (*ca, SFmode)
7279 + BASE_ARG_REG (mode));
7283 if (! ((TARGET_SH4 || TARGET_SH2A) || ca->renesas_abi)
7284 || PASS_IN_REG_P (*ca, mode, type))
7285 (ca->arg_count[(int) GET_SH_ARG_CLASS (mode)]
7286 = (ROUND_REG (*ca, mode)
7287 + (mode == BLKmode
7288 ? ROUND_ADVANCE (int_size_in_bytes (type))
7289 : ROUND_ADVANCE (GET_MODE_SIZE (mode)))));
7292 /* The Renesas calling convention doesn't quite fit into this scheme since
7293 the address is passed like an invisible argument, but one that is always
7294 passed in memory. */
7295 static rtx
7296 sh_struct_value_rtx (tree fndecl, int incoming ATTRIBUTE_UNUSED)
7298 if (TARGET_HITACHI || sh_attr_renesas_p (fndecl))
7299 return 0;
7300 return gen_rtx_REG (Pmode, 2);
7303 /* Worker function for TARGET_RETURN_IN_MEMORY. */
7305 static bool
7306 sh_return_in_memory (tree type, tree fndecl)
7308 if (TARGET_SH5)
7310 if (TYPE_MODE (type) == BLKmode)
7311 return ((unsigned HOST_WIDE_INT) int_size_in_bytes (type)) > 8;
7312 else
7313 return GET_MODE_SIZE (TYPE_MODE (type)) > 8;
7315 else
7317 return (TYPE_MODE (type) == BLKmode
7318 || ((TARGET_HITACHI || sh_attr_renesas_p (fndecl))
7319 && TREE_CODE (type) == RECORD_TYPE));
7323 /* We actually emit the code in sh_expand_prologue. We used to use
7324 a static variable to flag that we need to emit this code, but that
7325 doesn't when inlining, when functions are deferred and then emitted
7326 later. Fortunately, we already have two flags that are part of struct
7327 function that tell if a function uses varargs or stdarg. */
7328 static void
7329 sh_setup_incoming_varargs (CUMULATIVE_ARGS *ca,
7330 enum machine_mode mode,
7331 tree type,
7332 int *pretend_arg_size,
7333 int second_time ATTRIBUTE_UNUSED)
7335 gcc_assert (current_function_stdarg);
7336 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl))
7338 int named_parm_regs, anon_parm_regs;
7340 named_parm_regs = (ROUND_REG (*ca, mode)
7341 + (mode == BLKmode
7342 ? ROUND_ADVANCE (int_size_in_bytes (type))
7343 : ROUND_ADVANCE (GET_MODE_SIZE (mode))));
7344 anon_parm_regs = NPARM_REGS (SImode) - named_parm_regs;
7345 if (anon_parm_regs > 0)
7346 *pretend_arg_size = anon_parm_regs * 4;
7350 static bool
7351 sh_strict_argument_naming (CUMULATIVE_ARGS *ca ATTRIBUTE_UNUSED)
7353 return TARGET_SH5;
7356 static bool
7357 sh_pretend_outgoing_varargs_named (CUMULATIVE_ARGS *ca)
7359 return ! (TARGET_HITACHI || ca->renesas_abi) && ! TARGET_SH5;
7363 /* Define the offset between two registers, one to be eliminated, and
7364 the other its replacement, at the start of a routine. */
7367 initial_elimination_offset (int from, int to)
7369 int regs_saved;
7370 int regs_saved_rounding = 0;
7371 int total_saved_regs_space;
7372 int total_auto_space;
7373 int save_flags = target_flags;
7374 int copy_flags;
7375 HARD_REG_SET live_regs_mask;
7377 shmedia_space_reserved_for_target_registers = false;
7378 regs_saved = calc_live_regs (&live_regs_mask);
7379 regs_saved += SHMEDIA_REGS_STACK_ADJUST ();
7381 if (shmedia_reserve_space_for_target_registers_p (regs_saved, &live_regs_mask))
7383 shmedia_space_reserved_for_target_registers = true;
7384 regs_saved += shmedia_target_regs_stack_adjust (&live_regs_mask);
7387 if (TARGET_SH5 && regs_saved % (STACK_BOUNDARY / BITS_PER_UNIT))
7388 regs_saved_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
7389 - regs_saved % (STACK_BOUNDARY / BITS_PER_UNIT));
7391 total_auto_space = rounded_frame_size (regs_saved) - regs_saved_rounding;
7392 copy_flags = target_flags;
7393 target_flags = save_flags;
7395 total_saved_regs_space = regs_saved + regs_saved_rounding;
7397 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
7398 return total_saved_regs_space + total_auto_space
7399 + current_function_args_info.byref_regs * 8;
7401 if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
7402 return total_saved_regs_space + total_auto_space
7403 + current_function_args_info.byref_regs * 8;
7405 /* Initial gap between fp and sp is 0. */
7406 if (from == HARD_FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
7407 return 0;
7409 if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
7410 return rounded_frame_size (0);
7412 if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
7413 return rounded_frame_size (0);
7415 gcc_assert (from == RETURN_ADDRESS_POINTER_REGNUM
7416 && (to == HARD_FRAME_POINTER_REGNUM
7417 || to == STACK_POINTER_REGNUM));
7418 if (TARGET_SH5)
7420 int n = total_saved_regs_space;
7421 int pr_reg = TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG;
7422 save_schedule schedule;
7423 save_entry *entry;
7425 n += total_auto_space;
7427 /* If it wasn't saved, there's not much we can do. */
7428 if (! TEST_HARD_REG_BIT (live_regs_mask, pr_reg))
7429 return n;
7431 target_flags = copy_flags;
7433 sh5_schedule_saves (&live_regs_mask, &schedule, n);
7434 for (entry = &schedule.entries[1]; entry->mode != VOIDmode; entry++)
7435 if (entry->reg == pr_reg)
7437 target_flags = save_flags;
7438 return entry->offset;
7440 gcc_unreachable ();
7442 else
7443 return total_auto_space;
7446 /* Insert any deferred function attributes from earlier pragmas. */
7447 static void
7448 sh_insert_attributes (tree node, tree *attributes)
7450 tree attrs;
7452 if (TREE_CODE (node) != FUNCTION_DECL)
7453 return;
7455 /* We are only interested in fields. */
7456 if (!DECL_P (node))
7457 return;
7459 /* Append the attributes to the deferred attributes. */
7460 *sh_deferred_function_attributes_tail = *attributes;
7461 attrs = sh_deferred_function_attributes;
7462 if (!attrs)
7463 return;
7465 /* Some attributes imply or require the interrupt attribute. */
7466 if (!lookup_attribute ("interrupt_handler", attrs)
7467 && !lookup_attribute ("interrupt_handler", DECL_ATTRIBUTES (node)))
7469 /* If we have a trapa_handler, but no interrupt_handler attribute,
7470 insert an interrupt_handler attribute. */
7471 if (lookup_attribute ("trapa_handler", attrs) != NULL_TREE)
7472 /* We can't use sh_pr_interrupt here because that's not in the
7473 java frontend. */
7474 attrs
7475 = tree_cons (get_identifier("interrupt_handler"), NULL_TREE, attrs);
7476 /* However, for sp_switch, trap_exit and nosave_low_regs, if the
7477 interrupt attribute is missing, we ignore the attribute and warn. */
7478 else if (lookup_attribute ("sp_switch", attrs)
7479 || lookup_attribute ("trap_exit", attrs)
7480 || lookup_attribute ("nosave_low_regs", attrs))
7482 tree *tail;
7484 for (tail = attributes; attrs; attrs = TREE_CHAIN (attrs))
7486 if (is_attribute_p ("sp_switch", TREE_PURPOSE (attrs))
7487 || is_attribute_p ("trap_exit", TREE_PURPOSE (attrs))
7488 || is_attribute_p ("nosave_low_regs", TREE_PURPOSE (attrs)))
7489 warning (OPT_Wattributes,
7490 "%qs attribute only applies to interrupt functions",
7491 IDENTIFIER_POINTER (TREE_PURPOSE (attrs)));
7492 else
7494 *tail = tree_cons (TREE_PURPOSE (attrs), NULL_TREE,
7495 NULL_TREE);
7496 tail = &TREE_CHAIN (*tail);
7499 attrs = *attributes;
7503 /* Install the processed list. */
7504 *attributes = attrs;
7506 /* Clear deferred attributes. */
7507 sh_deferred_function_attributes = NULL_TREE;
7508 sh_deferred_function_attributes_tail = &sh_deferred_function_attributes;
7510 return;
7513 /* Supported attributes:
7515 interrupt_handler -- specifies this function is an interrupt handler.
7517 trapa_handler - like above, but don't save all registers.
7519 sp_switch -- specifies an alternate stack for an interrupt handler
7520 to run on.
7522 trap_exit -- use a trapa to exit an interrupt function instead of
7523 an rte instruction.
7525 nosave_low_regs - don't save r0..r7 in an interrupt handler.
7526 This is useful on the SH3 and upwards,
7527 which has a separate set of low regs for User and Supervisor modes.
7528 This should only be used for the lowest level of interrupts. Higher levels
7529 of interrupts must save the registers in case they themselves are
7530 interrupted.
7532 renesas -- use Renesas calling/layout conventions (functions and
7533 structures).
7537 const struct attribute_spec sh_attribute_table[] =
7539 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
7540 { "interrupt_handler", 0, 0, true, false, false, sh_handle_interrupt_handler_attribute },
7541 { "sp_switch", 1, 1, true, false, false, sh_handle_sp_switch_attribute },
7542 { "trap_exit", 1, 1, true, false, false, sh_handle_trap_exit_attribute },
7543 { "renesas", 0, 0, false, true, false, sh_handle_renesas_attribute },
7544 { "trapa_handler", 0, 0, true, false, false, sh_handle_interrupt_handler_attribute },
7545 { "nosave_low_regs", 0, 0, true, false, false, sh_handle_interrupt_handler_attribute },
7546 #ifdef SYMBIAN
7547 /* Symbian support adds three new attributes:
7548 dllexport - for exporting a function/variable that will live in a dll
7549 dllimport - for importing a function/variable from a dll
7551 Microsoft allows multiple declspecs in one __declspec, separating
7552 them with spaces. We do NOT support this. Instead, use __declspec
7553 multiple times. */
7554 { "dllimport", 0, 0, true, false, false, sh_symbian_handle_dll_attribute },
7555 { "dllexport", 0, 0, true, false, false, sh_symbian_handle_dll_attribute },
7556 #endif
7557 { NULL, 0, 0, false, false, false, NULL }
7560 /* Handle an "interrupt_handler" attribute; arguments as in
7561 struct attribute_spec.handler. */
7562 static tree
7563 sh_handle_interrupt_handler_attribute (tree *node, tree name,
7564 tree args ATTRIBUTE_UNUSED,
7565 int flags ATTRIBUTE_UNUSED,
7566 bool *no_add_attrs)
7568 if (TREE_CODE (*node) != FUNCTION_DECL)
7570 warning (OPT_Wattributes, "%qs attribute only applies to functions",
7571 IDENTIFIER_POINTER (name));
7572 *no_add_attrs = true;
7574 else if (TARGET_SHCOMPACT)
7576 error ("attribute interrupt_handler is not compatible with -m5-compact");
7577 *no_add_attrs = true;
7580 return NULL_TREE;
7583 /* Handle an "sp_switch" attribute; arguments as in
7584 struct attribute_spec.handler. */
7585 static tree
7586 sh_handle_sp_switch_attribute (tree *node, tree name, tree args,
7587 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
7589 if (TREE_CODE (*node) != FUNCTION_DECL)
7591 warning (OPT_Wattributes, "%qs attribute only applies to functions",
7592 IDENTIFIER_POINTER (name));
7593 *no_add_attrs = true;
7595 else if (TREE_CODE (TREE_VALUE (args)) != STRING_CST)
7597 /* The argument must be a constant string. */
7598 warning (OPT_Wattributes, "%qs attribute argument not a string constant",
7599 IDENTIFIER_POINTER (name));
7600 *no_add_attrs = true;
7603 return NULL_TREE;
7606 /* Handle an "trap_exit" attribute; arguments as in
7607 struct attribute_spec.handler. */
7608 static tree
7609 sh_handle_trap_exit_attribute (tree *node, tree name, tree args,
7610 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
7612 if (TREE_CODE (*node) != FUNCTION_DECL)
7614 warning (OPT_Wattributes, "%qs attribute only applies to functions",
7615 IDENTIFIER_POINTER (name));
7616 *no_add_attrs = true;
7618 /* The argument specifies a trap number to be used in a trapa instruction
7619 at function exit (instead of an rte instruction). */
7620 else if (TREE_CODE (TREE_VALUE (args)) != INTEGER_CST)
7622 /* The argument must be a constant integer. */
7623 warning (OPT_Wattributes, "%qs attribute argument not an "
7624 "integer constant", IDENTIFIER_POINTER (name));
7625 *no_add_attrs = true;
7628 return NULL_TREE;
7631 static tree
7632 sh_handle_renesas_attribute (tree *node ATTRIBUTE_UNUSED,
7633 tree name ATTRIBUTE_UNUSED,
7634 tree args ATTRIBUTE_UNUSED,
7635 int flags ATTRIBUTE_UNUSED,
7636 bool *no_add_attrs ATTRIBUTE_UNUSED)
7638 return NULL_TREE;
7641 /* True if __attribute__((renesas)) or -mrenesas. */
7643 sh_attr_renesas_p (tree td)
7645 if (TARGET_HITACHI)
7646 return 1;
7647 if (td == 0)
7648 return 0;
7649 if (DECL_P (td))
7650 td = TREE_TYPE (td);
7651 if (td == error_mark_node)
7652 return 0;
7653 return (lookup_attribute ("renesas", TYPE_ATTRIBUTES (td))
7654 != NULL_TREE);
7657 /* True if __attribute__((renesas)) or -mrenesas, for the current
7658 function. */
7660 sh_cfun_attr_renesas_p (void)
7662 return sh_attr_renesas_p (current_function_decl);
7666 sh_cfun_interrupt_handler_p (void)
7668 return (lookup_attribute ("interrupt_handler",
7669 DECL_ATTRIBUTES (current_function_decl))
7670 != NULL_TREE);
7673 /* Implement TARGET_CHECK_PCH_TARGET_FLAGS. */
7675 static const char *
7676 sh_check_pch_target_flags (int old_flags)
7678 if ((old_flags ^ target_flags) & (MASK_SH1 | MASK_SH2 | MASK_SH3
7679 | MASK_SH_E | MASK_HARD_SH4
7680 | MASK_FPU_SINGLE | MASK_SH4))
7681 return _("created and used with different architectures / ABIs");
7682 if ((old_flags ^ target_flags) & MASK_HITACHI)
7683 return _("created and used with different ABIs");
7684 if ((old_flags ^ target_flags) & MASK_LITTLE_ENDIAN)
7685 return _("created and used with different endianness");
7686 return NULL;
7689 /* Predicates used by the templates. */
7691 /* Returns 1 if OP is MACL, MACH or PR. The input must be a REG rtx.
7692 Used only in general_movsrc_operand. */
7695 system_reg_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
7697 switch (REGNO (op))
7699 case PR_REG:
7700 case MACL_REG:
7701 case MACH_REG:
7702 return 1;
7704 return 0;
7707 /* Nonzero if OP is a floating point value with value 0.0. */
7710 fp_zero_operand (rtx op)
7712 REAL_VALUE_TYPE r;
7714 if (GET_MODE (op) != SFmode)
7715 return 0;
7717 REAL_VALUE_FROM_CONST_DOUBLE (r, op);
7718 return REAL_VALUES_EQUAL (r, dconst0) && ! REAL_VALUE_MINUS_ZERO (r);
7721 /* Nonzero if OP is a floating point value with value 1.0. */
7724 fp_one_operand (rtx op)
7726 REAL_VALUE_TYPE r;
7728 if (GET_MODE (op) != SFmode)
7729 return 0;
7731 REAL_VALUE_FROM_CONST_DOUBLE (r, op);
7732 return REAL_VALUES_EQUAL (r, dconst1);
7735 /* For -m4 and -m4-single-only, mode switching is used. If we are
7736 compiling without -mfmovd, movsf_ie isn't taken into account for
7737 mode switching. We could check in machine_dependent_reorg for
7738 cases where we know we are in single precision mode, but there is
7739 interface to find that out during reload, so we must avoid
7740 choosing an fldi alternative during reload and thus failing to
7741 allocate a scratch register for the constant loading. */
7743 fldi_ok (void)
7745 return ! TARGET_SH4 || TARGET_FMOVD || reload_completed;
7749 tertiary_reload_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
7751 enum rtx_code code = GET_CODE (op);
7752 return code == MEM || (TARGET_SH4 && code == CONST_DOUBLE);
7755 /* Return the TLS type for TLS symbols, 0 for otherwise. */
7757 tls_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
7759 if (GET_CODE (op) != SYMBOL_REF)
7760 return 0;
7761 return SYMBOL_REF_TLS_MODEL (op);
7764 /* Return the destination address of a branch. */
7766 static int
7767 branch_dest (rtx branch)
7769 rtx dest = SET_SRC (PATTERN (branch));
7770 int dest_uid;
7772 if (GET_CODE (dest) == IF_THEN_ELSE)
7773 dest = XEXP (dest, 1);
7774 dest = XEXP (dest, 0);
7775 dest_uid = INSN_UID (dest);
7776 return INSN_ADDRESSES (dest_uid);
7779 /* Return nonzero if REG is not used after INSN.
7780 We assume REG is a reload reg, and therefore does
7781 not live past labels. It may live past calls or jumps though. */
7783 reg_unused_after (rtx reg, rtx insn)
7785 enum rtx_code code;
7786 rtx set;
7788 /* If the reg is set by this instruction, then it is safe for our
7789 case. Disregard the case where this is a store to memory, since
7790 we are checking a register used in the store address. */
7791 set = single_set (insn);
7792 if (set && GET_CODE (SET_DEST (set)) != MEM
7793 && reg_overlap_mentioned_p (reg, SET_DEST (set)))
7794 return 1;
7796 while ((insn = NEXT_INSN (insn)))
7798 rtx set;
7799 if (!INSN_P (insn))
7800 continue;
7802 code = GET_CODE (insn);
7804 #if 0
7805 /* If this is a label that existed before reload, then the register
7806 if dead here. However, if this is a label added by reorg, then
7807 the register may still be live here. We can't tell the difference,
7808 so we just ignore labels completely. */
7809 if (code == CODE_LABEL)
7810 return 1;
7811 /* else */
7812 #endif
7814 if (code == JUMP_INSN)
7815 return 0;
7817 /* If this is a sequence, we must handle them all at once.
7818 We could have for instance a call that sets the target register,
7819 and an insn in a delay slot that uses the register. In this case,
7820 we must return 0. */
7821 else if (code == INSN && GET_CODE (PATTERN (insn)) == SEQUENCE)
7823 int i;
7824 int retval = 0;
7826 for (i = 0; i < XVECLEN (PATTERN (insn), 0); i++)
7828 rtx this_insn = XVECEXP (PATTERN (insn), 0, i);
7829 rtx set = single_set (this_insn);
7831 if (GET_CODE (this_insn) == CALL_INSN)
7832 code = CALL_INSN;
7833 else if (GET_CODE (this_insn) == JUMP_INSN)
7835 if (INSN_ANNULLED_BRANCH_P (this_insn))
7836 return 0;
7837 code = JUMP_INSN;
7840 if (set && reg_overlap_mentioned_p (reg, SET_SRC (set)))
7841 return 0;
7842 if (set && reg_overlap_mentioned_p (reg, SET_DEST (set)))
7844 if (GET_CODE (SET_DEST (set)) != MEM)
7845 retval = 1;
7846 else
7847 return 0;
7849 if (set == 0
7850 && reg_overlap_mentioned_p (reg, PATTERN (this_insn)))
7851 return 0;
7853 if (retval == 1)
7854 return 1;
7855 else if (code == JUMP_INSN)
7856 return 0;
7859 set = single_set (insn);
7860 if (set && reg_overlap_mentioned_p (reg, SET_SRC (set)))
7861 return 0;
7862 if (set && reg_overlap_mentioned_p (reg, SET_DEST (set)))
7863 return GET_CODE (SET_DEST (set)) != MEM;
7864 if (set == 0 && reg_overlap_mentioned_p (reg, PATTERN (insn)))
7865 return 0;
7867 if (code == CALL_INSN && call_really_used_regs[REGNO (reg)])
7868 return 1;
7870 return 1;
7873 #include "ggc.h"
7875 static GTY(()) rtx fpscr_rtx;
7877 get_fpscr_rtx (void)
7879 if (! fpscr_rtx)
7881 fpscr_rtx = gen_rtx_REG (PSImode, FPSCR_REG);
7882 REG_USERVAR_P (fpscr_rtx) = 1;
7883 mark_user_reg (fpscr_rtx);
7885 if (! reload_completed || mdep_reorg_phase != SH_AFTER_MDEP_REORG)
7886 mark_user_reg (fpscr_rtx);
7887 return fpscr_rtx;
7890 static GTY(()) tree fpscr_values;
7892 static void
7893 emit_fpu_switch (rtx scratch, int index)
7895 rtx dst, src;
7897 if (fpscr_values == NULL)
7899 tree t;
7901 t = build_index_type (integer_one_node);
7902 t = build_array_type (integer_type_node, t);
7903 t = build_decl (VAR_DECL, get_identifier ("__fpscr_values"), t);
7904 DECL_ARTIFICIAL (t) = 1;
7905 DECL_IGNORED_P (t) = 1;
7906 DECL_EXTERNAL (t) = 1;
7907 TREE_STATIC (t) = 1;
7908 TREE_PUBLIC (t) = 1;
7909 TREE_USED (t) = 1;
7911 fpscr_values = t;
7914 src = DECL_RTL (fpscr_values);
7915 if (no_new_pseudos)
7917 emit_move_insn (scratch, XEXP (src, 0));
7918 if (index != 0)
7919 emit_insn (gen_addsi3 (scratch, scratch, GEN_INT (index * 4)));
7920 src = adjust_automodify_address (src, PSImode, scratch, index * 4);
7922 else
7923 src = adjust_address (src, PSImode, index * 4);
7925 dst = get_fpscr_rtx ();
7926 emit_move_insn (dst, src);
7929 void
7930 emit_sf_insn (rtx pat)
7932 emit_insn (pat);
7935 void
7936 emit_df_insn (rtx pat)
7938 emit_insn (pat);
7941 void
7942 expand_sf_unop (rtx (*fun) (rtx, rtx, rtx), rtx *operands)
7944 emit_sf_insn ((*fun) (operands[0], operands[1], get_fpscr_rtx ()));
7947 void
7948 expand_sf_binop (rtx (*fun) (rtx, rtx, rtx, rtx), rtx *operands)
7950 emit_sf_insn ((*fun) (operands[0], operands[1], operands[2],
7951 get_fpscr_rtx ()));
7954 void
7955 expand_df_unop (rtx (*fun) (rtx, rtx, rtx), rtx *operands)
7957 emit_df_insn ((*fun) (operands[0], operands[1], get_fpscr_rtx ()));
7960 void
7961 expand_df_binop (rtx (*fun) (rtx, rtx, rtx, rtx), rtx *operands)
7963 emit_df_insn ((*fun) (operands[0], operands[1], operands[2],
7964 get_fpscr_rtx ()));
7967 /* ??? gcc does flow analysis strictly after common subexpression
7968 elimination. As a result, common subexpression elimination fails
7969 when there are some intervening statements setting the same register.
7970 If we did nothing about this, this would hurt the precision switching
7971 for SH4 badly. There is some cse after reload, but it is unable to
7972 undo the extra register pressure from the unused instructions, and
7973 it cannot remove auto-increment loads.
7975 A C code example that shows this flow/cse weakness for (at least) SH
7976 and sparc (as of gcc ss-970706) is this:
7978 double
7979 f(double a)
7981 double d;
7982 d = 0.1;
7983 a += d;
7984 d = 1.1;
7985 d = 0.1;
7986 a *= d;
7987 return a;
7990 So we add another pass before common subexpression elimination, to
7991 remove assignments that are dead due to a following assignment in the
7992 same basic block. */
7994 static void
7995 mark_use (rtx x, rtx *reg_set_block)
7997 enum rtx_code code;
7999 if (! x)
8000 return;
8001 code = GET_CODE (x);
8002 switch (code)
8004 case REG:
8006 int regno = REGNO (x);
8007 int nregs = (regno < FIRST_PSEUDO_REGISTER
8008 ? HARD_REGNO_NREGS (regno, GET_MODE (x))
8009 : 1);
8012 reg_set_block[regno + nregs - 1] = 0;
8014 while (--nregs);
8015 break;
8017 case SET:
8019 rtx dest = SET_DEST (x);
8021 if (GET_CODE (dest) == SUBREG)
8022 dest = SUBREG_REG (dest);
8023 if (GET_CODE (dest) != REG)
8024 mark_use (dest, reg_set_block);
8025 mark_use (SET_SRC (x), reg_set_block);
8026 break;
8028 case CLOBBER:
8029 break;
8030 default:
8032 const char *fmt = GET_RTX_FORMAT (code);
8033 int i, j;
8034 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
8036 if (fmt[i] == 'e')
8037 mark_use (XEXP (x, i), reg_set_block);
8038 else if (fmt[i] == 'E')
8039 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
8040 mark_use (XVECEXP (x, i, j), reg_set_block);
8042 break;
8047 static rtx get_free_reg (HARD_REG_SET);
8049 /* This function returns a register to use to load the address to load
8050 the fpscr from. Currently it always returns r1 or r7, but when we are
8051 able to use pseudo registers after combine, or have a better mechanism
8052 for choosing a register, it should be done here. */
8053 /* REGS_LIVE is the liveness information for the point for which we
8054 need this allocation. In some bare-bones exit blocks, r1 is live at the
8055 start. We can even have all of r0..r3 being live:
8056 __complex__ long long f (double d) { if (d == 0) return 2; else return 3; }
8057 INSN before which new insns are placed with will clobber the register
8058 we return. If a basic block consists only of setting the return value
8059 register to a pseudo and using that register, the return value is not
8060 live before or after this block, yet we we'll insert our insns right in
8061 the middle. */
8063 static rtx
8064 get_free_reg (HARD_REG_SET regs_live)
8066 if (! TEST_HARD_REG_BIT (regs_live, 1))
8067 return gen_rtx_REG (Pmode, 1);
8069 /* Hard reg 1 is live; since this is a SMALL_REGISTER_CLASSES target,
8070 there shouldn't be anything but a jump before the function end. */
8071 gcc_assert (!TEST_HARD_REG_BIT (regs_live, 7));
8072 return gen_rtx_REG (Pmode, 7);
8075 /* This function will set the fpscr from memory.
8076 MODE is the mode we are setting it to. */
8077 void
8078 fpscr_set_from_mem (int mode, HARD_REG_SET regs_live)
8080 enum attr_fp_mode fp_mode = mode;
8081 enum attr_fp_mode norm_mode = ACTUAL_NORMAL_MODE (FP_MODE);
8082 rtx addr_reg = get_free_reg (regs_live);
8084 emit_fpu_switch (addr_reg, fp_mode == norm_mode);
8087 /* Is the given character a logical line separator for the assembler? */
8088 #ifndef IS_ASM_LOGICAL_LINE_SEPARATOR
8089 #define IS_ASM_LOGICAL_LINE_SEPARATOR(C) ((C) == ';')
8090 #endif
8093 sh_insn_length_adjustment (rtx insn)
8095 /* Instructions with unfilled delay slots take up an extra two bytes for
8096 the nop in the delay slot. */
8097 if (((GET_CODE (insn) == INSN
8098 && GET_CODE (PATTERN (insn)) != USE
8099 && GET_CODE (PATTERN (insn)) != CLOBBER)
8100 || GET_CODE (insn) == CALL_INSN
8101 || (GET_CODE (insn) == JUMP_INSN
8102 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC
8103 && GET_CODE (PATTERN (insn)) != ADDR_VEC))
8104 && GET_CODE (PATTERN (NEXT_INSN (PREV_INSN (insn)))) != SEQUENCE
8105 && get_attr_needs_delay_slot (insn) == NEEDS_DELAY_SLOT_YES)
8106 return 2;
8108 /* SH2e has a bug that prevents the use of annulled branches, so if
8109 the delay slot is not filled, we'll have to put a NOP in it. */
8110 if (sh_cpu == CPU_SH2E
8111 && GET_CODE (insn) == JUMP_INSN
8112 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC
8113 && GET_CODE (PATTERN (insn)) != ADDR_VEC
8114 && get_attr_type (insn) == TYPE_CBRANCH
8115 && GET_CODE (PATTERN (NEXT_INSN (PREV_INSN (insn)))) != SEQUENCE)
8116 return 2;
8118 /* sh-dsp parallel processing insn take four bytes instead of two. */
8120 if (GET_CODE (insn) == INSN)
8122 int sum = 0;
8123 rtx body = PATTERN (insn);
8124 const char *template;
8125 char c;
8126 int maybe_label = 1;
8128 if (GET_CODE (body) == ASM_INPUT)
8129 template = XSTR (body, 0);
8130 else if (asm_noperands (body) >= 0)
8131 template
8132 = decode_asm_operands (body, NULL, NULL, NULL, NULL);
8133 else
8134 return 0;
8137 int ppi_adjust = 0;
8140 c = *template++;
8141 while (c == ' ' || c == '\t');
8142 /* all sh-dsp parallel-processing insns start with p.
8143 The only non-ppi sh insn starting with p is pref.
8144 The only ppi starting with pr is prnd. */
8145 if ((c == 'p' || c == 'P') && strncasecmp ("re", template, 2))
8146 ppi_adjust = 2;
8147 /* The repeat pseudo-insn expands two three insns, a total of
8148 six bytes in size. */
8149 else if ((c == 'r' || c == 'R')
8150 && ! strncasecmp ("epeat", template, 5))
8151 ppi_adjust = 4;
8152 while (c && c != '\n' && ! IS_ASM_LOGICAL_LINE_SEPARATOR (c))
8154 /* If this is a label, it is obviously not a ppi insn. */
8155 if (c == ':' && maybe_label)
8157 ppi_adjust = 0;
8158 break;
8160 else if (c == '\'' || c == '"')
8161 maybe_label = 0;
8162 c = *template++;
8164 sum += ppi_adjust;
8165 maybe_label = c != ':';
8167 while (c);
8168 return sum;
8170 return 0;
8173 /* Return TRUE if X references a SYMBOL_REF or LABEL_REF whose symbol
8174 isn't protected by a PIC unspec. */
8176 nonpic_symbol_mentioned_p (rtx x)
8178 register const char *fmt;
8179 register int i;
8181 if (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF
8182 || GET_CODE (x) == PC)
8183 return 1;
8185 /* We don't want to look into the possible MEM location of a
8186 CONST_DOUBLE, since we're not going to use it, in general. */
8187 if (GET_CODE (x) == CONST_DOUBLE)
8188 return 0;
8190 if (GET_CODE (x) == UNSPEC
8191 && (XINT (x, 1) == UNSPEC_PIC
8192 || XINT (x, 1) == UNSPEC_GOT
8193 || XINT (x, 1) == UNSPEC_GOTOFF
8194 || XINT (x, 1) == UNSPEC_GOTPLT
8195 || XINT (x, 1) == UNSPEC_GOTTPOFF
8196 || XINT (x, 1) == UNSPEC_DTPOFF
8197 || XINT (x, 1) == UNSPEC_PLT))
8198 return 0;
8200 fmt = GET_RTX_FORMAT (GET_CODE (x));
8201 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
8203 if (fmt[i] == 'E')
8205 register int j;
8207 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
8208 if (nonpic_symbol_mentioned_p (XVECEXP (x, i, j)))
8209 return 1;
8211 else if (fmt[i] == 'e' && nonpic_symbol_mentioned_p (XEXP (x, i)))
8212 return 1;
8215 return 0;
8218 /* Convert a non-PIC address in `orig' to a PIC address using @GOT or
8219 @GOTOFF in `reg'. */
8221 legitimize_pic_address (rtx orig, enum machine_mode mode ATTRIBUTE_UNUSED,
8222 rtx reg)
8224 if (tls_symbolic_operand (orig, Pmode))
8225 return orig;
8227 if (GET_CODE (orig) == LABEL_REF
8228 || (GET_CODE (orig) == SYMBOL_REF && SYMBOL_REF_LOCAL_P (orig)))
8230 if (reg == 0)
8231 reg = gen_reg_rtx (Pmode);
8233 emit_insn (gen_symGOTOFF2reg (reg, orig));
8234 return reg;
8236 else if (GET_CODE (orig) == SYMBOL_REF)
8238 if (reg == 0)
8239 reg = gen_reg_rtx (Pmode);
8241 emit_insn (gen_symGOT2reg (reg, orig));
8242 return reg;
8244 return orig;
8247 /* Mark the use of a constant in the literal table. If the constant
8248 has multiple labels, make it unique. */
8249 static rtx
8250 mark_constant_pool_use (rtx x)
8252 rtx insn, lab, pattern;
8254 if (x == NULL)
8255 return x;
8257 switch (GET_CODE (x))
8259 case LABEL_REF:
8260 x = XEXP (x, 0);
8261 case CODE_LABEL:
8262 break;
8263 default:
8264 return x;
8267 /* Get the first label in the list of labels for the same constant
8268 and delete another labels in the list. */
8269 lab = x;
8270 for (insn = PREV_INSN (x); insn; insn = PREV_INSN (insn))
8272 if (GET_CODE (insn) != CODE_LABEL
8273 || LABEL_REFS (insn) != NEXT_INSN (insn))
8274 break;
8275 lab = insn;
8278 for (insn = LABEL_REFS (lab); insn; insn = LABEL_REFS (insn))
8279 INSN_DELETED_P (insn) = 1;
8281 /* Mark constants in a window. */
8282 for (insn = NEXT_INSN (x); insn; insn = NEXT_INSN (insn))
8284 if (GET_CODE (insn) != INSN)
8285 continue;
8287 pattern = PATTERN (insn);
8288 if (GET_CODE (pattern) != UNSPEC_VOLATILE)
8289 continue;
8291 switch (XINT (pattern, 1))
8293 case UNSPECV_CONST2:
8294 case UNSPECV_CONST4:
8295 case UNSPECV_CONST8:
8296 XVECEXP (pattern, 0, 1) = const1_rtx;
8297 break;
8298 case UNSPECV_WINDOW_END:
8299 if (XVECEXP (pattern, 0, 0) == x)
8300 return lab;
8301 break;
8302 case UNSPECV_CONST_END:
8303 return lab;
8304 default:
8305 break;
8309 return lab;
8312 /* Return true if it's possible to redirect BRANCH1 to the destination
8313 of an unconditional jump BRANCH2. We only want to do this if the
8314 resulting branch will have a short displacement. */
8316 sh_can_redirect_branch (rtx branch1, rtx branch2)
8318 if (flag_expensive_optimizations && simplejump_p (branch2))
8320 rtx dest = XEXP (SET_SRC (single_set (branch2)), 0);
8321 rtx insn;
8322 int distance;
8324 for (distance = 0, insn = NEXT_INSN (branch1);
8325 insn && distance < 256;
8326 insn = PREV_INSN (insn))
8328 if (insn == dest)
8329 return 1;
8330 else
8331 distance += get_attr_length (insn);
8333 for (distance = 0, insn = NEXT_INSN (branch1);
8334 insn && distance < 256;
8335 insn = NEXT_INSN (insn))
8337 if (insn == dest)
8338 return 1;
8339 else
8340 distance += get_attr_length (insn);
8343 return 0;
8346 /* Return nonzero if register old_reg can be renamed to register new_reg. */
8348 sh_hard_regno_rename_ok (unsigned int old_reg ATTRIBUTE_UNUSED,
8349 unsigned int new_reg)
8351 /* Interrupt functions can only use registers that have already been
8352 saved by the prologue, even if they would normally be
8353 call-clobbered. */
8355 if (sh_cfun_interrupt_handler_p () && !regs_ever_live[new_reg])
8356 return 0;
8358 return 1;
8361 /* Function to update the integer COST
8362 based on the relationship between INSN that is dependent on
8363 DEP_INSN through the dependence LINK. The default is to make no
8364 adjustment to COST. This can be used for example to specify to
8365 the scheduler that an output- or anti-dependence does not incur
8366 the same cost as a data-dependence. The return value should be
8367 the new value for COST. */
8368 static int
8369 sh_adjust_cost (rtx insn, rtx link ATTRIBUTE_UNUSED, rtx dep_insn, int cost)
8371 rtx reg, use_pat;
8373 if (TARGET_SHMEDIA)
8375 /* On SHmedia, if the dependence is an anti-dependence or
8376 output-dependence, there is no cost. */
8377 if (REG_NOTE_KIND (link) != 0)
8379 /* However, dependencies between target register loads and
8380 uses of the register in a subsequent block that are separated
8381 by a conditional branch are not modelled - we have to do with
8382 the anti-dependency between the target register load and the
8383 conditional branch that ends the current block. */
8384 if (REG_NOTE_KIND (link) == REG_DEP_ANTI
8385 && GET_CODE (PATTERN (dep_insn)) == SET
8386 && (get_attr_type (dep_insn) == TYPE_PT_MEDIA
8387 || get_attr_type (dep_insn) == TYPE_PTABS_MEDIA)
8388 && get_attr_type (insn) == TYPE_CBRANCH_MEDIA)
8390 int orig_cost = cost;
8391 rtx note = find_reg_note (insn, REG_BR_PROB, 0);
8392 rtx target = ((! note
8393 || INTVAL (XEXP (note, 0)) * 2 < REG_BR_PROB_BASE)
8394 ? insn : JUMP_LABEL (insn));
8395 /* On the likely path, the branch costs 1, on the unlikely path,
8396 it costs 3. */
8397 cost--;
8399 target = next_active_insn (target);
8400 while (target && ! flow_dependent_p (target, dep_insn)
8401 && --cost > 0);
8402 /* If two branches are executed in immediate succession, with the
8403 first branch properly predicted, this causes a stall at the
8404 second branch, hence we won't need the target for the
8405 second branch for two cycles after the launch of the first
8406 branch. */
8407 if (cost > orig_cost - 2)
8408 cost = orig_cost - 2;
8410 else
8411 cost = 0;
8414 else if (get_attr_is_mac_media (insn)
8415 && get_attr_is_mac_media (dep_insn))
8416 cost = 1;
8418 else if (! reload_completed
8419 && GET_CODE (PATTERN (insn)) == SET
8420 && GET_CODE (SET_SRC (PATTERN (insn))) == FLOAT
8421 && GET_CODE (PATTERN (dep_insn)) == SET
8422 && fp_arith_reg_operand (SET_SRC (PATTERN (dep_insn)), VOIDmode)
8423 && cost < 4)
8424 cost = 4;
8425 /* Schedule the ptabs for a casesi_jump_media in preference to stuff
8426 that is needed at the target. */
8427 else if (get_attr_type (insn) == TYPE_JUMP_MEDIA
8428 && ! flow_dependent_p (insn, dep_insn))
8429 cost--;
8431 else if (REG_NOTE_KIND (link) == 0)
8433 enum attr_type dep_type, type;
8435 if (recog_memoized (insn) < 0
8436 || recog_memoized (dep_insn) < 0)
8437 return cost;
8439 dep_type = get_attr_type (dep_insn);
8440 if (dep_type == TYPE_FLOAD || dep_type == TYPE_PCFLOAD)
8441 cost--;
8442 if ((dep_type == TYPE_LOAD_SI || dep_type == TYPE_PCLOAD_SI)
8443 && (type = get_attr_type (insn)) != TYPE_CALL
8444 && type != TYPE_SFUNC)
8445 cost--;
8447 /* The only input for a call that is timing-critical is the
8448 function's address. */
8449 if (GET_CODE(insn) == CALL_INSN)
8451 rtx call = PATTERN (insn);
8453 if (GET_CODE (call) == PARALLEL)
8454 call = XVECEXP (call, 0 ,0);
8455 if (GET_CODE (call) == SET)
8456 call = SET_SRC (call);
8457 if (GET_CODE (call) == CALL && GET_CODE (XEXP (call, 0)) == MEM
8458 /* sibcalli_thunk uses a symbol_ref in an unspec. */
8459 && (GET_CODE (XEXP (XEXP (call, 0), 0)) == UNSPEC
8460 || ! reg_set_p (XEXP (XEXP (call, 0), 0), dep_insn)))
8461 cost = 0;
8463 /* Likewise, the most timing critical input for an sfuncs call
8464 is the function address. However, sfuncs typically start
8465 using their arguments pretty quickly.
8466 Assume a four cycle delay before they are needed. */
8467 /* All sfunc calls are parallels with at least four components.
8468 Exploit this to avoid unnecessary calls to sfunc_uses_reg. */
8469 else if (GET_CODE (PATTERN (insn)) == PARALLEL
8470 && XVECLEN (PATTERN (insn), 0) >= 4
8471 && (reg = sfunc_uses_reg (insn)))
8473 if (! reg_set_p (reg, dep_insn))
8474 cost -= 4;
8476 /* When the preceding instruction loads the shift amount of
8477 the following SHAD/SHLD, the latency of the load is increased
8478 by 1 cycle. */
8479 else if (TARGET_SH4
8480 && get_attr_type (insn) == TYPE_DYN_SHIFT
8481 && get_attr_any_int_load (dep_insn) == ANY_INT_LOAD_YES
8482 && reg_overlap_mentioned_p (SET_DEST (PATTERN (dep_insn)),
8483 XEXP (SET_SRC (single_set (insn)),
8484 1)))
8485 cost++;
8486 /* When an LS group instruction with a latency of less than
8487 3 cycles is followed by a double-precision floating-point
8488 instruction, FIPR, or FTRV, the latency of the first
8489 instruction is increased to 3 cycles. */
8490 else if (cost < 3
8491 && get_attr_insn_class (dep_insn) == INSN_CLASS_LS_GROUP
8492 && get_attr_dfp_comp (insn) == DFP_COMP_YES)
8493 cost = 3;
8494 /* The lsw register of a double-precision computation is ready one
8495 cycle earlier. */
8496 else if (reload_completed
8497 && get_attr_dfp_comp (dep_insn) == DFP_COMP_YES
8498 && (use_pat = single_set (insn))
8499 && ! regno_use_in (REGNO (SET_DEST (single_set (dep_insn))),
8500 SET_SRC (use_pat)))
8501 cost -= 1;
8503 if (get_attr_any_fp_comp (dep_insn) == ANY_FP_COMP_YES
8504 && get_attr_late_fp_use (insn) == LATE_FP_USE_YES)
8505 cost -= 1;
8507 /* An anti-dependence penalty of two applies if the first insn is a double
8508 precision fadd / fsub / fmul. */
8509 else if (REG_NOTE_KIND (link) == REG_DEP_ANTI
8510 && recog_memoized (dep_insn) >= 0
8511 && get_attr_type (dep_insn) == TYPE_DFP_ARITH
8512 /* A lot of alleged anti-flow dependences are fake,
8513 so check this one is real. */
8514 && flow_dependent_p (dep_insn, insn))
8515 cost = 2;
8518 return cost;
8521 /* Check if INSN is flow-dependent on DEP_INSN. Can also be used to check
8522 if DEP_INSN is anti-flow dependent on INSN. */
8523 static int
8524 flow_dependent_p (rtx insn, rtx dep_insn)
8526 rtx tmp = PATTERN (insn);
8528 note_stores (PATTERN (dep_insn), flow_dependent_p_1, &tmp);
8529 return tmp == NULL_RTX;
8532 /* A helper function for flow_dependent_p called through note_stores. */
8533 static void
8534 flow_dependent_p_1 (rtx x, rtx pat ATTRIBUTE_UNUSED, void *data)
8536 rtx * pinsn = (rtx *) data;
8538 if (*pinsn && reg_referenced_p (x, *pinsn))
8539 *pinsn = NULL_RTX;
8542 /* For use by sh_allocate_initial_value. Note that sh.md contains some
8543 'special function' patterns (type sfunc) that clobber pr, but that
8544 do not look like function calls to leaf_function_p. Hence we must
8545 do this extra check. */
8546 static int
8547 sh_pr_n_sets (void)
8549 return REG_N_SETS (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG);
8552 /* Return where to allocate pseudo for a given hard register initial
8553 value. */
8554 static rtx
8555 sh_allocate_initial_value (rtx hard_reg)
8557 rtx x;
8559 if (REGNO (hard_reg) == (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG))
8561 if (current_function_is_leaf
8562 && ! sh_pr_n_sets ()
8563 && ! (TARGET_SHCOMPACT
8564 && ((current_function_args_info.call_cookie
8565 & ~ CALL_COOKIE_RET_TRAMP (1))
8566 || current_function_has_nonlocal_label)))
8567 x = hard_reg;
8568 else
8569 x = gen_frame_mem (Pmode, return_address_pointer_rtx);
8571 else
8572 x = NULL_RTX;
8574 return x;
8577 /* This function returns "2" to indicate dual issue for the SH4
8578 processor. To be used by the DFA pipeline description. */
8579 static int
8580 sh_issue_rate (void)
8582 if (TARGET_SUPERSCALAR)
8583 return 2;
8584 else
8585 return 1;
8588 /* Functions for ready queue reordering for sched1. */
8590 /* Get weight for mode for a set x. */
8591 static short
8592 find_set_regmode_weight (rtx x, enum machine_mode mode)
8594 if (GET_CODE (x) == CLOBBER && register_operand (SET_DEST (x), mode))
8595 return 1;
8596 if (GET_CODE (x) == SET && register_operand (SET_DEST (x), mode))
8598 if (GET_CODE (SET_DEST (x)) == REG)
8600 if (!reg_mentioned_p (SET_DEST (x), SET_SRC (x)))
8601 return 1;
8602 else
8603 return 0;
8605 return 1;
8607 return 0;
8610 /* Get regmode weight for insn. */
8611 static short
8612 find_insn_regmode_weight (rtx insn, enum machine_mode mode)
8614 short reg_weight = 0;
8615 rtx x;
8617 /* Increment weight for each register born here. */
8618 x = PATTERN (insn);
8619 reg_weight += find_set_regmode_weight (x, mode);
8620 if (GET_CODE (x) == PARALLEL)
8622 int j;
8623 for (j = XVECLEN (x, 0) - 1; j >= 0; j--)
8625 x = XVECEXP (PATTERN (insn), 0, j);
8626 reg_weight += find_set_regmode_weight (x, mode);
8629 /* Decrement weight for each register that dies here. */
8630 for (x = REG_NOTES (insn); x; x = XEXP (x, 1))
8632 if (REG_NOTE_KIND (x) == REG_DEAD || REG_NOTE_KIND (x) == REG_UNUSED)
8634 rtx note = XEXP (x, 0);
8635 if (GET_CODE (note) == REG && GET_MODE (note) == mode)
8636 reg_weight--;
8639 return reg_weight;
8642 /* Calculate regmode weights for all insns of a basic block. */
8643 static void
8644 find_regmode_weight (int b, enum machine_mode mode)
8646 rtx insn, next_tail, head, tail;
8648 get_block_head_tail (b, &head, &tail);
8649 next_tail = NEXT_INSN (tail);
8651 for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
8653 /* Handle register life information. */
8654 if (!INSN_P (insn))
8655 continue;
8657 if (mode == SFmode)
8658 INSN_REGMODE_WEIGHT (insn, mode) =
8659 find_insn_regmode_weight (insn, mode) + 2 * find_insn_regmode_weight (insn, DFmode);
8660 else if (mode == SImode)
8661 INSN_REGMODE_WEIGHT (insn, mode) =
8662 find_insn_regmode_weight (insn, mode) + 2 * find_insn_regmode_weight (insn, DImode);
8666 /* Comparison function for ready queue sorting. */
8667 static int
8668 rank_for_reorder (const void *x, const void *y)
8670 rtx tmp = *(const rtx *) y;
8671 rtx tmp2 = *(const rtx *) x;
8673 /* The insn in a schedule group should be issued the first. */
8674 if (SCHED_GROUP_P (tmp) != SCHED_GROUP_P (tmp2))
8675 return SCHED_GROUP_P (tmp2) ? 1 : -1;
8677 /* If insns are equally good, sort by INSN_LUID (original insn order), This
8678 minimizes instruction movement, thus minimizing sched's effect on
8679 register pressure. */
8680 return INSN_LUID (tmp) - INSN_LUID (tmp2);
8683 /* Resort the array A in which only element at index N may be out of order. */
8684 static void
8685 swap_reorder (rtx *a, int n)
8687 rtx insn = a[n - 1];
8688 int i = n - 2;
8690 while (i >= 0 && rank_for_reorder (a + i, &insn) >= 0)
8692 a[i + 1] = a[i];
8693 i -= 1;
8695 a[i + 1] = insn;
8698 #define SCHED_REORDER(READY, N_READY) \
8699 do \
8701 if ((N_READY) == 2) \
8702 swap_reorder (READY, N_READY); \
8703 else if ((N_READY) > 2) \
8704 qsort (READY, N_READY, sizeof (rtx), rank_for_reorder); \
8706 while (0)
8708 /* Sort the ready list READY by ascending priority, using the SCHED_REORDER
8709 macro. */
8710 static void
8711 ready_reorder (rtx *ready, int nready)
8713 SCHED_REORDER (ready, nready);
8716 /* Calculate regmode weights for all insns of all basic block. */
8717 static void
8718 sh_md_init_global (FILE *dump ATTRIBUTE_UNUSED,
8719 int verbose ATTRIBUTE_UNUSED,
8720 int old_max_uid)
8722 basic_block b;
8724 regmode_weight[0] = (short *) xcalloc (old_max_uid, sizeof (short));
8725 regmode_weight[1] = (short *) xcalloc (old_max_uid, sizeof (short));
8727 FOR_EACH_BB_REVERSE (b)
8729 find_regmode_weight (b->index, SImode);
8730 find_regmode_weight (b->index, SFmode);
8733 CURR_REGMODE_PRESSURE (SImode) = 0;
8734 CURR_REGMODE_PRESSURE (SFmode) = 0;
8738 /* Cleanup. */
8739 static void
8740 sh_md_finish_global (FILE *dump ATTRIBUTE_UNUSED,
8741 int verbose ATTRIBUTE_UNUSED)
8743 if (regmode_weight[0])
8745 free (regmode_weight[0]);
8746 regmode_weight[0] = NULL;
8748 if (regmode_weight[1])
8750 free (regmode_weight[1]);
8751 regmode_weight[1] = NULL;
8755 /* Cache the can_issue_more so that we can return it from reorder2. Also,
8756 keep count of register pressures on SImode and SFmode. */
8757 static int
8758 sh_variable_issue (FILE *dump ATTRIBUTE_UNUSED,
8759 int sched_verbose ATTRIBUTE_UNUSED,
8760 rtx insn,
8761 int can_issue_more)
8763 if (GET_CODE (PATTERN (insn)) != USE
8764 && GET_CODE (PATTERN (insn)) != CLOBBER)
8765 cached_can_issue_more = can_issue_more - 1;
8766 else
8767 cached_can_issue_more = can_issue_more;
8769 if (reload_completed)
8770 return cached_can_issue_more;
8772 CURR_REGMODE_PRESSURE (SImode) += INSN_REGMODE_WEIGHT (insn, SImode);
8773 CURR_REGMODE_PRESSURE (SFmode) += INSN_REGMODE_WEIGHT (insn, SFmode);
8775 return cached_can_issue_more;
8778 static void
8779 sh_md_init (FILE *dump ATTRIBUTE_UNUSED,
8780 int verbose ATTRIBUTE_UNUSED,
8781 int veclen ATTRIBUTE_UNUSED)
8783 CURR_REGMODE_PRESSURE (SImode) = 0;
8784 CURR_REGMODE_PRESSURE (SFmode) = 0;
8787 /* Some magic numbers. */
8788 /* Pressure on register r0 can lead to spill failures. so avoid sched1 for
8789 functions that already have high pressure on r0. */
8790 #define R0_MAX_LIFE_REGIONS 2
8791 #define R0_MAX_LIVE_LENGTH 12
8792 /* Register Pressure thresholds for SImode and SFmode registers. */
8793 #define SIMODE_MAX_WEIGHT 5
8794 #define SFMODE_MAX_WEIGHT 10
8796 /* Return true if the pressure is high for MODE. */
8797 static short
8798 high_pressure (enum machine_mode mode)
8800 /* Pressure on register r0 can lead to spill failures. so avoid sched1 for
8801 functions that already have high pressure on r0. */
8802 if ((REG_N_SETS (0) - REG_N_DEATHS (0)) >= R0_MAX_LIFE_REGIONS
8803 && REG_LIVE_LENGTH (0) >= R0_MAX_LIVE_LENGTH)
8804 return 1;
8806 if (mode == SFmode)
8807 return (CURR_REGMODE_PRESSURE (SFmode) > SFMODE_MAX_WEIGHT);
8808 else
8809 return (CURR_REGMODE_PRESSURE (SImode) > SIMODE_MAX_WEIGHT);
8812 /* Reorder ready queue if register pressure is high. */
8813 static int
8814 sh_reorder (FILE *dump ATTRIBUTE_UNUSED,
8815 int sched_verbose ATTRIBUTE_UNUSED,
8816 rtx *ready,
8817 int *n_readyp,
8818 int clock_var ATTRIBUTE_UNUSED)
8820 if (reload_completed)
8821 return sh_issue_rate ();
8823 if (high_pressure (SFmode) || high_pressure (SImode))
8825 ready_reorder (ready, *n_readyp);
8828 return sh_issue_rate ();
8831 /* Skip cycles if the current register pressure is high. */
8832 static int
8833 sh_reorder2 (FILE *dump ATTRIBUTE_UNUSED,
8834 int sched_verbose ATTRIBUTE_UNUSED,
8835 rtx *ready ATTRIBUTE_UNUSED,
8836 int *n_readyp ATTRIBUTE_UNUSED,
8837 int clock_var ATTRIBUTE_UNUSED)
8839 if (reload_completed)
8840 return cached_can_issue_more;
8842 if (high_pressure(SFmode) || high_pressure (SImode))
8843 skip_cycles = 1;
8845 return cached_can_issue_more;
8848 /* Skip cycles without sorting the ready queue. This will move insn from
8849 Q->R. If this is the last cycle we are skipping; allow sorting of ready
8850 queue by sh_reorder. */
8852 /* Generally, skipping these many cycles are sufficient for all insns to move
8853 from Q -> R. */
8854 #define MAX_SKIPS 8
8856 static int
8857 sh_dfa_new_cycle (FILE *sched_dump ATTRIBUTE_UNUSED,
8858 int sched_verbose ATTRIBUTE_UNUSED,
8859 rtx insn ATTRIBUTE_UNUSED,
8860 int last_clock_var,
8861 int clock_var,
8862 int *sort_p)
8864 if (reload_completed)
8865 return 0;
8867 if (skip_cycles)
8869 if ((clock_var - last_clock_var) < MAX_SKIPS)
8871 *sort_p = 0;
8872 return 1;
8874 /* If this is the last cycle we are skipping, allow reordering of R. */
8875 if ((clock_var - last_clock_var) == MAX_SKIPS)
8877 *sort_p = 1;
8878 return 1;
8882 skip_cycles = 0;
8884 return 0;
8887 /* SHmedia requires registers for branches, so we can't generate new
8888 branches past reload. */
8889 static bool
8890 sh_cannot_modify_jumps_p (void)
8892 return (TARGET_SHMEDIA && (reload_in_progress || reload_completed));
8895 static int
8896 sh_target_reg_class (void)
8898 return TARGET_SHMEDIA ? TARGET_REGS : NO_REGS;
8901 static bool
8902 sh_optimize_target_register_callee_saved (bool after_prologue_epilogue_gen)
8904 HARD_REG_SET dummy;
8905 rtx insn;
8907 if (! shmedia_space_reserved_for_target_registers)
8908 return 0;
8909 if (after_prologue_epilogue_gen && ! TARGET_SAVE_ALL_TARGET_REGS)
8910 return 0;
8911 if (calc_live_regs (&dummy) >= 6 * 8)
8912 return 1;
8913 /* This is a borderline case. See if we got a nested loop, or a loop
8914 with a call, or with more than 4 labels inside. */
8915 for (insn = get_insns(); insn; insn = NEXT_INSN (insn))
8917 if (GET_CODE (insn) == NOTE
8918 && NOTE_LINE_NUMBER (insn) == NOTE_INSN_LOOP_BEG)
8920 int labels = 0;
8924 insn = NEXT_INSN (insn);
8925 if ((GET_CODE (insn) == NOTE
8926 && NOTE_LINE_NUMBER (insn) == NOTE_INSN_LOOP_BEG)
8927 || GET_CODE (insn) == CALL_INSN
8928 || (GET_CODE (insn) == CODE_LABEL && ++labels > 4))
8929 return 1;
8931 while (GET_CODE (insn) != NOTE
8932 || NOTE_LINE_NUMBER (insn) != NOTE_INSN_LOOP_END);
8935 return 0;
8938 static bool
8939 sh_ms_bitfield_layout_p (tree record_type ATTRIBUTE_UNUSED)
8941 return (TARGET_SH5 || TARGET_HITACHI || sh_attr_renesas_p (record_type));
8945 On the SH1..SH4, the trampoline looks like
8946 2 0002 D202 mov.l l2,r2
8947 1 0000 D301 mov.l l1,r3
8948 3 0004 422B jmp @r2
8949 4 0006 0009 nop
8950 5 0008 00000000 l1: .long area
8951 6 000c 00000000 l2: .long function
8953 SH5 (compact) uses r1 instead of r3 for the static chain. */
8956 /* Emit RTL insns to initialize the variable parts of a trampoline.
8957 FNADDR is an RTX for the address of the function's pure code.
8958 CXT is an RTX for the static chain value for the function. */
8960 void
8961 sh_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
8963 rtx tramp_mem = gen_frame_mem (BLKmode, tramp);
8965 if (TARGET_SHMEDIA64)
8967 rtx tramp_templ;
8968 int fixed_len;
8970 rtx movi1 = GEN_INT (0xcc000010);
8971 rtx shori1 = GEN_INT (0xc8000010);
8972 rtx src, dst;
8974 /* The following trampoline works within a +- 128 KB range for cxt:
8975 ptb/u cxt,tr1; movi fnaddr >> 48,r0; shori fnaddr >> 32,r0;
8976 shori fnaddr >> 16,r0; shori fnaddr,r0; ptabs/l r0,tr0
8977 gettr tr1,r1; blink tr0,r63 */
8978 /* Address rounding makes it hard to compute the exact bounds of the
8979 offset for this trampoline, but we have a rather generous offset
8980 range, so frame_offset should do fine as an upper bound. */
8981 if (cxt == virtual_stack_vars_rtx && frame_offset < 0x20000)
8983 /* ??? could optimize this trampoline initialization
8984 by writing DImode words with two insns each. */
8985 rtx mask = force_reg (DImode, GEN_INT (0x3fffc00));
8986 rtx insn = gen_rtx_MINUS (DImode, cxt, tramp);
8987 insn = gen_rtx_ASHIFT (DImode, insn, GEN_INT (10-2));
8988 insn = gen_rtx_AND (DImode, insn, mask);
8989 /* Or in ptb/u .,tr1 pattern */
8990 insn = gen_rtx_IOR (DImode, insn, gen_int_mode (0xec000010, SImode));
8991 insn = force_operand (insn, NULL_RTX);
8992 insn = gen_lowpart (SImode, insn);
8993 emit_move_insn (change_address (tramp_mem, SImode, NULL_RTX), insn);
8994 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (38));
8995 insn = gen_rtx_AND (DImode, insn, mask);
8996 insn = force_operand (gen_rtx_IOR (DImode, movi1, insn), NULL_RTX);
8997 insn = gen_lowpart (SImode, insn);
8998 emit_move_insn (adjust_address (tramp_mem, SImode, 4), insn);
8999 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (22));
9000 insn = gen_rtx_AND (DImode, insn, mask);
9001 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
9002 insn = gen_lowpart (SImode, insn);
9003 emit_move_insn (adjust_address (tramp_mem, SImode, 8), insn);
9004 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (6));
9005 insn = gen_rtx_AND (DImode, insn, mask);
9006 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
9007 insn = gen_lowpart (SImode, insn);
9008 emit_move_insn (adjust_address (tramp_mem, SImode, 12), insn);
9009 insn = gen_rtx_ASHIFT (DImode, fnaddr, GEN_INT (10));
9010 insn = gen_rtx_AND (DImode, insn, mask);
9011 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
9012 insn = gen_lowpart (SImode, insn);
9013 emit_move_insn (adjust_address (tramp_mem, SImode, 16), insn);
9014 emit_move_insn (adjust_address (tramp_mem, SImode, 20),
9015 GEN_INT (0x6bf10600));
9016 emit_move_insn (adjust_address (tramp_mem, SImode, 24),
9017 GEN_INT (0x4415fc10));
9018 emit_move_insn (adjust_address (tramp_mem, SImode, 28),
9019 GEN_INT (0x4401fff0));
9020 emit_insn (gen_ic_invalidate_line (tramp));
9021 return;
9023 tramp_templ = gen_rtx_SYMBOL_REF (Pmode,"__GCC_nested_trampoline");
9024 fixed_len = TRAMPOLINE_SIZE - 2 * GET_MODE_SIZE (Pmode);
9026 tramp_templ = gen_datalabel_ref (tramp_templ);
9027 dst = tramp_mem;
9028 src = gen_const_mem (BLKmode, tramp_templ);
9029 set_mem_align (dst, 256);
9030 set_mem_align (src, 64);
9031 emit_block_move (dst, src, GEN_INT (fixed_len), BLOCK_OP_NORMAL);
9033 emit_move_insn (adjust_address (tramp_mem, Pmode, fixed_len), fnaddr);
9034 emit_move_insn (adjust_address (tramp_mem, Pmode,
9035 fixed_len + GET_MODE_SIZE (Pmode)),
9036 cxt);
9037 emit_insn (gen_ic_invalidate_line (tramp));
9038 return;
9040 else if (TARGET_SHMEDIA)
9042 /* movi fnaddr >> 16,r1; shori fnaddr,r1; ptabs/l r1,tr0
9043 movi cxt >> 16,r1; shori cxt,r1; blink tr0,r63 */
9044 rtx quad0 = gen_reg_rtx (DImode), cxtload = gen_reg_rtx (DImode);
9045 rtx quad1 = gen_reg_rtx (DImode), quad2 = gen_reg_rtx (DImode);
9046 /* movi 0,r1: 0xcc000010 shori 0,r1: c8000010 concatenated,
9047 rotated 10 right, and higher 16 bit of every 32 selected. */
9048 rtx movishori
9049 = force_reg (V2HImode, (simplify_gen_subreg
9050 (V2HImode, GEN_INT (0x4330432), SImode, 0)));
9051 rtx ptabs = force_reg (DImode, GEN_INT (0x6bf10600));
9052 rtx blink = force_reg (DImode, GEN_INT (0x4401fff0));
9054 tramp = force_reg (Pmode, tramp);
9055 fnaddr = force_reg (SImode, fnaddr);
9056 cxt = force_reg (SImode, cxt);
9057 emit_insn (gen_mshflo_w_x (gen_rtx_SUBREG (V4HImode, quad0, 0),
9058 gen_rtx_SUBREG (V2HImode, fnaddr, 0),
9059 movishori));
9060 emit_insn (gen_rotrdi3_mextr (quad0, quad0,
9061 GEN_INT (TARGET_LITTLE_ENDIAN ? 24 : 56)));
9062 emit_insn (gen_ashldi3_media (quad0, quad0, const2_rtx));
9063 emit_move_insn (change_address (tramp_mem, DImode, NULL_RTX), quad0);
9064 emit_insn (gen_mshflo_w_x (gen_rtx_SUBREG (V4HImode, cxtload, 0),
9065 gen_rtx_SUBREG (V2HImode, cxt, 0),
9066 movishori));
9067 emit_insn (gen_rotrdi3_mextr (cxtload, cxtload,
9068 GEN_INT (TARGET_LITTLE_ENDIAN ? 24 : 56)));
9069 emit_insn (gen_ashldi3_media (cxtload, cxtload, const2_rtx));
9070 if (TARGET_LITTLE_ENDIAN)
9072 emit_insn (gen_mshflo_l_di (quad1, ptabs, cxtload));
9073 emit_insn (gen_mextr4 (quad2, cxtload, blink));
9075 else
9077 emit_insn (gen_mextr4 (quad1, cxtload, ptabs));
9078 emit_insn (gen_mshflo_l_di (quad2, blink, cxtload));
9080 emit_move_insn (adjust_address (tramp_mem, DImode, 8), quad1);
9081 emit_move_insn (adjust_address (tramp_mem, DImode, 16), quad2);
9082 emit_insn (gen_ic_invalidate_line (tramp));
9083 return;
9085 else if (TARGET_SHCOMPACT)
9087 emit_insn (gen_initialize_trampoline (tramp, cxt, fnaddr));
9088 return;
9090 emit_move_insn (change_address (tramp_mem, SImode, NULL_RTX),
9091 gen_int_mode (TARGET_LITTLE_ENDIAN ? 0xd301d202 : 0xd202d301,
9092 SImode));
9093 emit_move_insn (adjust_address (tramp_mem, SImode, 4),
9094 gen_int_mode (TARGET_LITTLE_ENDIAN ? 0x0009422b : 0x422b0009,
9095 SImode));
9096 emit_move_insn (adjust_address (tramp_mem, SImode, 8), cxt);
9097 emit_move_insn (adjust_address (tramp_mem, SImode, 12), fnaddr);
9098 if (TARGET_HARVARD)
9100 if (TARGET_USERMODE)
9101 emit_library_call (function_symbol (NULL, "__ic_invalidate",
9102 FUNCTION_ORDINARY),
9103 0, VOIDmode, 1, tramp, SImode);
9104 else
9105 emit_insn (gen_ic_invalidate_line (tramp));
9109 /* FIXME: This is overly conservative. A SHcompact function that
9110 receives arguments ``by reference'' will have them stored in its
9111 own stack frame, so it must not pass pointers or references to
9112 these arguments to other functions by means of sibling calls. */
9113 /* If PIC, we cannot make sibling calls to global functions
9114 because the PLT requires r12 to be live. */
9115 static bool
9116 sh_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
9118 return (1
9119 && (! TARGET_SHCOMPACT
9120 || current_function_args_info.stack_regs == 0)
9121 && ! sh_cfun_interrupt_handler_p ()
9122 && (! flag_pic
9123 || (decl && ! TREE_PUBLIC (decl))
9124 || (decl && DECL_VISIBILITY (decl) != VISIBILITY_DEFAULT)));
9127 /* Machine specific built-in functions. */
9129 struct builtin_description
9131 const enum insn_code icode;
9132 const char *const name;
9133 int signature;
9136 /* describe number and signedness of arguments; arg[0] == result
9137 (1: unsigned, 2: signed, 4: don't care, 8: pointer 0: no argument */
9138 /* 9: 64 bit pointer, 10: 32 bit pointer */
9139 static const char signature_args[][4] =
9141 #define SH_BLTIN_V2SI2 0
9142 { 4, 4 },
9143 #define SH_BLTIN_V4HI2 1
9144 { 4, 4 },
9145 #define SH_BLTIN_V2SI3 2
9146 { 4, 4, 4 },
9147 #define SH_BLTIN_V4HI3 3
9148 { 4, 4, 4 },
9149 #define SH_BLTIN_V8QI3 4
9150 { 4, 4, 4 },
9151 #define SH_BLTIN_MAC_HISI 5
9152 { 1, 4, 4, 1 },
9153 #define SH_BLTIN_SH_HI 6
9154 { 4, 4, 1 },
9155 #define SH_BLTIN_SH_SI 7
9156 { 4, 4, 1 },
9157 #define SH_BLTIN_V4HI2V2SI 8
9158 { 4, 4, 4 },
9159 #define SH_BLTIN_V4HI2V8QI 9
9160 { 4, 4, 4 },
9161 #define SH_BLTIN_SISF 10
9162 { 4, 2 },
9163 #define SH_BLTIN_LDUA_L 11
9164 { 2, 10 },
9165 #define SH_BLTIN_LDUA_Q 12
9166 { 1, 10 },
9167 #define SH_BLTIN_STUA_L 13
9168 { 0, 10, 2 },
9169 #define SH_BLTIN_STUA_Q 14
9170 { 0, 10, 1 },
9171 #define SH_BLTIN_LDUA_L64 15
9172 { 2, 9 },
9173 #define SH_BLTIN_LDUA_Q64 16
9174 { 1, 9 },
9175 #define SH_BLTIN_STUA_L64 17
9176 { 0, 9, 2 },
9177 #define SH_BLTIN_STUA_Q64 18
9178 { 0, 9, 1 },
9179 #define SH_BLTIN_NUM_SHARED_SIGNATURES 19
9180 #define SH_BLTIN_2 19
9181 #define SH_BLTIN_SU 19
9182 { 1, 2 },
9183 #define SH_BLTIN_3 20
9184 #define SH_BLTIN_SUS 20
9185 { 2, 2, 1 },
9186 #define SH_BLTIN_PSSV 21
9187 { 0, 8, 2, 2 },
9188 #define SH_BLTIN_XXUU 22
9189 #define SH_BLTIN_UUUU 22
9190 { 1, 1, 1, 1 },
9191 #define SH_BLTIN_PV 23
9192 { 0, 8 },
9194 /* mcmv: operands considered unsigned. */
9195 /* mmulsum_wq, msad_ubq: result considered unsigned long long. */
9196 /* mperm: control value considered unsigned int. */
9197 /* mshalds, mshard, mshards, mshlld, mshlrd: shift count is unsigned int. */
9198 /* mshards_q: returns signed short. */
9199 /* nsb: takes long long arg, returns unsigned char. */
9200 static const struct builtin_description bdesc[] =
9202 { CODE_FOR_absv2si2, "__builtin_absv2si2", SH_BLTIN_V2SI2 },
9203 { CODE_FOR_absv4hi2, "__builtin_absv4hi2", SH_BLTIN_V4HI2 },
9204 { CODE_FOR_addv2si3, "__builtin_addv2si3", SH_BLTIN_V2SI3 },
9205 { CODE_FOR_addv4hi3, "__builtin_addv4hi3", SH_BLTIN_V4HI3 },
9206 { CODE_FOR_ssaddv2si3,"__builtin_ssaddv2si3", SH_BLTIN_V2SI3 },
9207 { CODE_FOR_usaddv8qi3,"__builtin_usaddv8qi3", SH_BLTIN_V8QI3 },
9208 { CODE_FOR_ssaddv4hi3,"__builtin_ssaddv4hi3", SH_BLTIN_V4HI3 },
9209 { CODE_FOR_alloco_i, "__builtin_sh_media_ALLOCO", SH_BLTIN_PV },
9210 { CODE_FOR_negcmpeqv8qi,"__builtin_sh_media_MCMPEQ_B", SH_BLTIN_V8QI3 },
9211 { CODE_FOR_negcmpeqv2si,"__builtin_sh_media_MCMPEQ_L", SH_BLTIN_V2SI3 },
9212 { CODE_FOR_negcmpeqv4hi,"__builtin_sh_media_MCMPEQ_W", SH_BLTIN_V4HI3 },
9213 { CODE_FOR_negcmpgtuv8qi,"__builtin_sh_media_MCMPGT_UB", SH_BLTIN_V8QI3 },
9214 { CODE_FOR_negcmpgtv2si,"__builtin_sh_media_MCMPGT_L", SH_BLTIN_V2SI3 },
9215 { CODE_FOR_negcmpgtv4hi,"__builtin_sh_media_MCMPGT_W", SH_BLTIN_V4HI3 },
9216 { CODE_FOR_mcmv, "__builtin_sh_media_MCMV", SH_BLTIN_UUUU },
9217 { CODE_FOR_mcnvs_lw, "__builtin_sh_media_MCNVS_LW", SH_BLTIN_3 },
9218 { CODE_FOR_mcnvs_wb, "__builtin_sh_media_MCNVS_WB", SH_BLTIN_V4HI2V8QI },
9219 { CODE_FOR_mcnvs_wub, "__builtin_sh_media_MCNVS_WUB", SH_BLTIN_V4HI2V8QI },
9220 { CODE_FOR_mextr1, "__builtin_sh_media_MEXTR1", SH_BLTIN_V8QI3 },
9221 { CODE_FOR_mextr2, "__builtin_sh_media_MEXTR2", SH_BLTIN_V8QI3 },
9222 { CODE_FOR_mextr3, "__builtin_sh_media_MEXTR3", SH_BLTIN_V8QI3 },
9223 { CODE_FOR_mextr4, "__builtin_sh_media_MEXTR4", SH_BLTIN_V8QI3 },
9224 { CODE_FOR_mextr5, "__builtin_sh_media_MEXTR5", SH_BLTIN_V8QI3 },
9225 { CODE_FOR_mextr6, "__builtin_sh_media_MEXTR6", SH_BLTIN_V8QI3 },
9226 { CODE_FOR_mextr7, "__builtin_sh_media_MEXTR7", SH_BLTIN_V8QI3 },
9227 { CODE_FOR_mmacfx_wl, "__builtin_sh_media_MMACFX_WL", SH_BLTIN_MAC_HISI },
9228 { CODE_FOR_mmacnfx_wl,"__builtin_sh_media_MMACNFX_WL", SH_BLTIN_MAC_HISI },
9229 { CODE_FOR_mulv2si3, "__builtin_mulv2si3", SH_BLTIN_V2SI3, },
9230 { CODE_FOR_mulv4hi3, "__builtin_mulv4hi3", SH_BLTIN_V4HI3 },
9231 { CODE_FOR_mmulfx_l, "__builtin_sh_media_MMULFX_L", SH_BLTIN_V2SI3 },
9232 { CODE_FOR_mmulfx_w, "__builtin_sh_media_MMULFX_W", SH_BLTIN_V4HI3 },
9233 { CODE_FOR_mmulfxrp_w,"__builtin_sh_media_MMULFXRP_W", SH_BLTIN_V4HI3 },
9234 { CODE_FOR_mmulhi_wl, "__builtin_sh_media_MMULHI_WL", SH_BLTIN_V4HI2V2SI },
9235 { CODE_FOR_mmullo_wl, "__builtin_sh_media_MMULLO_WL", SH_BLTIN_V4HI2V2SI },
9236 { CODE_FOR_mmulsum_wq,"__builtin_sh_media_MMULSUM_WQ", SH_BLTIN_XXUU },
9237 { CODE_FOR_mperm_w, "__builtin_sh_media_MPERM_W", SH_BLTIN_SH_HI },
9238 { CODE_FOR_msad_ubq, "__builtin_sh_media_MSAD_UBQ", SH_BLTIN_XXUU },
9239 { CODE_FOR_mshalds_l, "__builtin_sh_media_MSHALDS_L", SH_BLTIN_SH_SI },
9240 { CODE_FOR_mshalds_w, "__builtin_sh_media_MSHALDS_W", SH_BLTIN_SH_HI },
9241 { CODE_FOR_ashrv2si3, "__builtin_ashrv2si3", SH_BLTIN_SH_SI },
9242 { CODE_FOR_ashrv4hi3, "__builtin_ashrv4hi3", SH_BLTIN_SH_HI },
9243 { CODE_FOR_mshards_q, "__builtin_sh_media_MSHARDS_Q", SH_BLTIN_SUS },
9244 { CODE_FOR_mshfhi_b, "__builtin_sh_media_MSHFHI_B", SH_BLTIN_V8QI3 },
9245 { CODE_FOR_mshfhi_l, "__builtin_sh_media_MSHFHI_L", SH_BLTIN_V2SI3 },
9246 { CODE_FOR_mshfhi_w, "__builtin_sh_media_MSHFHI_W", SH_BLTIN_V4HI3 },
9247 { CODE_FOR_mshflo_b, "__builtin_sh_media_MSHFLO_B", SH_BLTIN_V8QI3 },
9248 { CODE_FOR_mshflo_l, "__builtin_sh_media_MSHFLO_L", SH_BLTIN_V2SI3 },
9249 { CODE_FOR_mshflo_w, "__builtin_sh_media_MSHFLO_W", SH_BLTIN_V4HI3 },
9250 { CODE_FOR_ashlv2si3, "__builtin_ashlv2si3", SH_BLTIN_SH_SI },
9251 { CODE_FOR_ashlv4hi3, "__builtin_ashlv4hi3", SH_BLTIN_SH_HI },
9252 { CODE_FOR_lshrv2si3, "__builtin_lshrv2si3", SH_BLTIN_SH_SI },
9253 { CODE_FOR_lshrv4hi3, "__builtin_lshrv4hi3", SH_BLTIN_SH_HI },
9254 { CODE_FOR_subv2si3, "__builtin_subv2si3", SH_BLTIN_V2SI3 },
9255 { CODE_FOR_subv4hi3, "__builtin_subv4hi3", SH_BLTIN_V4HI3 },
9256 { CODE_FOR_sssubv2si3,"__builtin_sssubv2si3", SH_BLTIN_V2SI3 },
9257 { CODE_FOR_ussubv8qi3,"__builtin_ussubv8qi3", SH_BLTIN_V8QI3 },
9258 { CODE_FOR_sssubv4hi3,"__builtin_sssubv4hi3", SH_BLTIN_V4HI3 },
9259 { CODE_FOR_fcosa_s, "__builtin_sh_media_FCOSA_S", SH_BLTIN_SISF },
9260 { CODE_FOR_fsina_s, "__builtin_sh_media_FSINA_S", SH_BLTIN_SISF },
9261 { CODE_FOR_fipr, "__builtin_sh_media_FIPR_S", SH_BLTIN_3 },
9262 { CODE_FOR_ftrv, "__builtin_sh_media_FTRV_S", SH_BLTIN_3 },
9263 { CODE_FOR_mac_media, "__builtin_sh_media_FMAC_S", SH_BLTIN_3 },
9264 { CODE_FOR_sqrtdf2, "__builtin_sh_media_FSQRT_D", SH_BLTIN_2 },
9265 { CODE_FOR_sqrtsf2, "__builtin_sh_media_FSQRT_S", SH_BLTIN_2 },
9266 { CODE_FOR_fsrra_s, "__builtin_sh_media_FSRRA_S", SH_BLTIN_2 },
9267 { CODE_FOR_ldhi_l, "__builtin_sh_media_LDHI_L", SH_BLTIN_LDUA_L },
9268 { CODE_FOR_ldhi_q, "__builtin_sh_media_LDHI_Q", SH_BLTIN_LDUA_Q },
9269 { CODE_FOR_ldlo_l, "__builtin_sh_media_LDLO_L", SH_BLTIN_LDUA_L },
9270 { CODE_FOR_ldlo_q, "__builtin_sh_media_LDLO_Q", SH_BLTIN_LDUA_Q },
9271 { CODE_FOR_sthi_l, "__builtin_sh_media_STHI_L", SH_BLTIN_STUA_L },
9272 { CODE_FOR_sthi_q, "__builtin_sh_media_STHI_Q", SH_BLTIN_STUA_Q },
9273 { CODE_FOR_stlo_l, "__builtin_sh_media_STLO_L", SH_BLTIN_STUA_L },
9274 { CODE_FOR_stlo_q, "__builtin_sh_media_STLO_Q", SH_BLTIN_STUA_Q },
9275 { CODE_FOR_ldhi_l64, "__builtin_sh_media_LDHI_L", SH_BLTIN_LDUA_L64 },
9276 { CODE_FOR_ldhi_q64, "__builtin_sh_media_LDHI_Q", SH_BLTIN_LDUA_Q64 },
9277 { CODE_FOR_ldlo_l64, "__builtin_sh_media_LDLO_L", SH_BLTIN_LDUA_L64 },
9278 { CODE_FOR_ldlo_q64, "__builtin_sh_media_LDLO_Q", SH_BLTIN_LDUA_Q64 },
9279 { CODE_FOR_sthi_l64, "__builtin_sh_media_STHI_L", SH_BLTIN_STUA_L64 },
9280 { CODE_FOR_sthi_q64, "__builtin_sh_media_STHI_Q", SH_BLTIN_STUA_Q64 },
9281 { CODE_FOR_stlo_l64, "__builtin_sh_media_STLO_L", SH_BLTIN_STUA_L64 },
9282 { CODE_FOR_stlo_q64, "__builtin_sh_media_STLO_Q", SH_BLTIN_STUA_Q64 },
9283 { CODE_FOR_nsb, "__builtin_sh_media_NSB", SH_BLTIN_SU },
9284 { CODE_FOR_byterev, "__builtin_sh_media_BYTEREV", SH_BLTIN_2 },
9285 { CODE_FOR_prefetch, "__builtin_sh_media_PREFO", SH_BLTIN_PSSV },
9288 static void
9289 sh_media_init_builtins (void)
9291 tree shared[SH_BLTIN_NUM_SHARED_SIGNATURES];
9292 const struct builtin_description *d;
9294 memset (shared, 0, sizeof shared);
9295 for (d = bdesc; d - bdesc < (int) ARRAY_SIZE (bdesc); d++)
9297 tree type, arg_type = 0;
9298 int signature = d->signature;
9299 int i;
9301 if (signature < SH_BLTIN_NUM_SHARED_SIGNATURES && shared[signature])
9302 type = shared[signature];
9303 else
9305 int has_result = signature_args[signature][0] != 0;
9307 if ((signature_args[signature][1] & 8)
9308 && (((signature_args[signature][1] & 1) && TARGET_SHMEDIA32)
9309 || ((signature_args[signature][1] & 2) && TARGET_SHMEDIA64)))
9310 continue;
9311 if (! TARGET_FPU_ANY
9312 && FLOAT_MODE_P (insn_data[d->icode].operand[0].mode))
9313 continue;
9314 type = void_list_node;
9315 for (i = 3; ; i--)
9317 int arg = signature_args[signature][i];
9318 int opno = i - 1 + has_result;
9320 if (arg & 8)
9321 arg_type = ptr_type_node;
9322 else if (arg)
9323 arg_type = (*lang_hooks.types.type_for_mode)
9324 (insn_data[d->icode].operand[opno].mode,
9325 (arg & 1));
9326 else if (i)
9327 continue;
9328 else
9329 arg_type = void_type_node;
9330 if (i == 0)
9331 break;
9332 type = tree_cons (NULL_TREE, arg_type, type);
9334 type = build_function_type (arg_type, type);
9335 if (signature < SH_BLTIN_NUM_SHARED_SIGNATURES)
9336 shared[signature] = type;
9338 lang_hooks.builtin_function (d->name, type, d - bdesc, BUILT_IN_MD,
9339 NULL, NULL_TREE);
9343 /* Implements target hook vector_mode_supported_p. */
9344 bool
9345 sh_vector_mode_supported_p (enum machine_mode mode)
9347 if (TARGET_FPU_ANY
9348 && ((mode == V2SFmode)
9349 || (mode == V4SFmode)
9350 || (mode == V16SFmode)))
9351 return true;
9353 else if (TARGET_SHMEDIA
9354 && ((mode == V8QImode)
9355 || (mode == V2HImode)
9356 || (mode == V4HImode)
9357 || (mode == V2SImode)))
9358 return true;
9360 return false;
9363 /* Implements target hook dwarf_calling_convention. Return an enum
9364 of dwarf_calling_convention. */
9366 sh_dwarf_calling_convention (tree func)
9368 if (sh_attr_renesas_p (func))
9369 return DW_CC_GNU_renesas_sh;
9371 return DW_CC_normal;
9374 static void
9375 sh_init_builtins (void)
9377 if (TARGET_SHMEDIA)
9378 sh_media_init_builtins ();
9381 /* Expand an expression EXP that calls a built-in function,
9382 with result going to TARGET if that's convenient
9383 (and in mode MODE if that's convenient).
9384 SUBTARGET may be used as the target for computing one of EXP's operands.
9385 IGNORE is nonzero if the value is to be ignored. */
9387 static rtx
9388 sh_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
9389 enum machine_mode mode ATTRIBUTE_UNUSED, int ignore)
9391 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
9392 tree arglist = TREE_OPERAND (exp, 1);
9393 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
9394 const struct builtin_description *d = &bdesc[fcode];
9395 enum insn_code icode = d->icode;
9396 int signature = d->signature;
9397 enum machine_mode tmode = VOIDmode;
9398 int nop = 0, i;
9399 rtx op[4];
9400 rtx pat = 0;
9402 if (signature_args[signature][0])
9404 if (ignore)
9405 return 0;
9407 tmode = insn_data[icode].operand[0].mode;
9408 if (! target
9409 || GET_MODE (target) != tmode
9410 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
9411 target = gen_reg_rtx (tmode);
9412 op[nop++] = target;
9414 else
9415 target = 0;
9417 for (i = 1; i <= 3; i++, nop++)
9419 tree arg;
9420 enum machine_mode opmode, argmode;
9421 tree optype;
9423 if (! signature_args[signature][i])
9424 break;
9425 arg = TREE_VALUE (arglist);
9426 if (arg == error_mark_node)
9427 return const0_rtx;
9428 arglist = TREE_CHAIN (arglist);
9429 if (signature_args[signature][i] & 8)
9431 opmode = ptr_mode;
9432 optype = ptr_type_node;
9434 else
9436 opmode = insn_data[icode].operand[nop].mode;
9437 optype = (*lang_hooks.types.type_for_mode) (opmode, 0);
9439 argmode = TYPE_MODE (TREE_TYPE (arg));
9440 if (argmode != opmode)
9441 arg = build1 (NOP_EXPR, optype, arg);
9442 op[nop] = expand_expr (arg, NULL_RTX, opmode, 0);
9443 if (! (*insn_data[icode].operand[nop].predicate) (op[nop], opmode))
9444 op[nop] = copy_to_mode_reg (opmode, op[nop]);
9447 switch (nop)
9449 case 1:
9450 pat = (*insn_data[d->icode].genfun) (op[0]);
9451 break;
9452 case 2:
9453 pat = (*insn_data[d->icode].genfun) (op[0], op[1]);
9454 break;
9455 case 3:
9456 pat = (*insn_data[d->icode].genfun) (op[0], op[1], op[2]);
9457 break;
9458 case 4:
9459 pat = (*insn_data[d->icode].genfun) (op[0], op[1], op[2], op[3]);
9460 break;
9461 default:
9462 gcc_unreachable ();
9464 if (! pat)
9465 return 0;
9466 emit_insn (pat);
9467 return target;
9470 void
9471 sh_expand_unop_v2sf (enum rtx_code code, rtx op0, rtx op1)
9473 rtx sel0 = const0_rtx;
9474 rtx sel1 = const1_rtx;
9475 rtx (*fn) (rtx, rtx, rtx, rtx, rtx) = gen_unary_sf_op;
9476 rtx op = gen_rtx_fmt_e (code, SFmode, op1);
9478 emit_insn ((*fn) (op0, op1, op, sel0, sel0));
9479 emit_insn ((*fn) (op0, op1, op, sel1, sel1));
9482 void
9483 sh_expand_binop_v2sf (enum rtx_code code, rtx op0, rtx op1, rtx op2)
9485 rtx sel0 = const0_rtx;
9486 rtx sel1 = const1_rtx;
9487 rtx (*fn) (rtx, rtx, rtx, rtx, rtx, rtx, rtx, rtx)
9488 = gen_binary_sf_op;
9489 rtx op = gen_rtx_fmt_ee (code, SFmode, op1, op2);
9491 emit_insn ((*fn) (op0, op1, op2, op, sel0, sel0, sel0, sel1));
9492 emit_insn ((*fn) (op0, op1, op2, op, sel1, sel1, sel1, sel0));
9495 /* Return the class of registers for which a mode change from FROM to TO
9496 is invalid. */
9497 bool
9498 sh_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
9499 enum reg_class class)
9501 /* We want to enable the use of SUBREGs as a means to
9502 VEC_SELECT a single element of a vector. */
9503 if (to == SFmode && VECTOR_MODE_P (from) && GET_MODE_INNER (from) == SFmode)
9504 return (reg_classes_intersect_p (GENERAL_REGS, class));
9506 if (GET_MODE_SIZE (from) != GET_MODE_SIZE (to))
9508 if (TARGET_LITTLE_ENDIAN)
9510 if (GET_MODE_SIZE (to) < 8 || GET_MODE_SIZE (from) < 8)
9511 return reg_classes_intersect_p (DF_REGS, class);
9513 else
9515 if (GET_MODE_SIZE (from) < 8)
9516 return reg_classes_intersect_p (DF_HI_REGS, class);
9519 return 0;
9523 /* If ADDRESS refers to a CODE_LABEL, add NUSES to the number of times
9524 that label is used. */
9526 void
9527 sh_mark_label (rtx address, int nuses)
9529 if (GOTOFF_P (address))
9531 /* Extract the label or symbol. */
9532 address = XEXP (address, 0);
9533 if (GET_CODE (address) == PLUS)
9534 address = XEXP (address, 0);
9535 address = XVECEXP (address, 0, 0);
9537 if (GET_CODE (address) == LABEL_REF
9538 && GET_CODE (XEXP (address, 0)) == CODE_LABEL)
9539 LABEL_NUSES (XEXP (address, 0)) += nuses;
9542 /* Compute extra cost of moving data between one register class
9543 and another. */
9545 /* If SECONDARY*_RELOAD_CLASS says something about the src/dst pair, regclass
9546 uses this information. Hence, the general register <-> floating point
9547 register information here is not used for SFmode. */
9550 sh_register_move_cost (enum machine_mode mode,
9551 enum reg_class srcclass, enum reg_class dstclass)
9553 if (dstclass == T_REGS || dstclass == PR_REGS)
9554 return 10;
9556 if (dstclass == MAC_REGS && srcclass == MAC_REGS)
9557 return 4;
9559 if (mode == SImode && ! TARGET_SHMEDIA && TARGET_FMOVD
9560 && REGCLASS_HAS_FP_REG (srcclass)
9561 && REGCLASS_HAS_FP_REG (dstclass))
9562 return 4;
9564 if (REGCLASS_HAS_FP_REG (dstclass) && srcclass == T_REGS)
9565 return ((TARGET_HARD_SH4 && !optimize_size) ? 10 : 7);
9567 if ((REGCLASS_HAS_FP_REG (dstclass) && srcclass == MAC_REGS)
9568 || (dstclass == MAC_REGS && REGCLASS_HAS_FP_REG (srcclass)))
9569 return 9;
9571 if ((REGCLASS_HAS_FP_REG (dstclass)
9572 && REGCLASS_HAS_GENERAL_REG (srcclass))
9573 || (REGCLASS_HAS_GENERAL_REG (dstclass)
9574 && REGCLASS_HAS_FP_REG (srcclass)))
9575 return ((TARGET_SHMEDIA ? 4 : TARGET_FMOVD ? 8 : 12)
9576 * ((GET_MODE_SIZE (mode) + 7) / 8U));
9578 if ((dstclass == FPUL_REGS
9579 && REGCLASS_HAS_GENERAL_REG (srcclass))
9580 || (srcclass == FPUL_REGS
9581 && REGCLASS_HAS_GENERAL_REG (dstclass)))
9582 return 5;
9584 if ((dstclass == FPUL_REGS
9585 && (srcclass == PR_REGS || srcclass == MAC_REGS || srcclass == T_REGS))
9586 || (srcclass == FPUL_REGS
9587 && (dstclass == PR_REGS || dstclass == MAC_REGS)))
9588 return 7;
9590 if ((srcclass == TARGET_REGS && ! REGCLASS_HAS_GENERAL_REG (dstclass))
9591 || ((dstclass) == TARGET_REGS && ! REGCLASS_HAS_GENERAL_REG (srcclass)))
9592 return 20;
9594 /* ??? ptabs faults on (value & 0x3) == 0x3 */
9595 if (TARGET_SHMEDIA
9596 && ((srcclass) == TARGET_REGS || (srcclass) == SIBCALL_REGS))
9598 if (sh_gettrcost >= 0)
9599 return sh_gettrcost;
9600 else if (!TARGET_PT_FIXED)
9601 return 100;
9604 if ((srcclass == FPSCR_REGS && ! REGCLASS_HAS_GENERAL_REG (dstclass))
9605 || (dstclass == FPSCR_REGS && ! REGCLASS_HAS_GENERAL_REG (srcclass)))
9606 return 4;
9608 if (TARGET_SHMEDIA
9609 || (TARGET_FMOVD
9610 && ! REGCLASS_HAS_GENERAL_REG (srcclass)
9611 && ! REGCLASS_HAS_GENERAL_REG (dstclass)))
9612 return 2 * ((GET_MODE_SIZE (mode) + 7) / 8U);
9614 return 2 * ((GET_MODE_SIZE (mode) + 3) / 4U);
9617 static rtx emit_load_ptr (rtx, rtx);
9619 static rtx
9620 emit_load_ptr (rtx reg, rtx addr)
9622 rtx mem = gen_const_mem (ptr_mode, addr);
9624 if (Pmode != ptr_mode)
9625 mem = gen_rtx_SIGN_EXTEND (Pmode, mem);
9626 return emit_move_insn (reg, mem);
9629 static void
9630 sh_output_mi_thunk (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED,
9631 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
9632 tree function)
9634 CUMULATIVE_ARGS cum;
9635 int structure_value_byref = 0;
9636 rtx this, this_value, sibcall, insns, funexp;
9637 tree funtype = TREE_TYPE (function);
9638 int simple_add = CONST_OK_FOR_ADD (delta);
9639 int did_load = 0;
9640 rtx scratch0, scratch1, scratch2;
9641 unsigned i;
9643 reload_completed = 1;
9644 epilogue_completed = 1;
9645 no_new_pseudos = 1;
9646 current_function_uses_only_leaf_regs = 1;
9647 reset_block_changes ();
9649 emit_note (NOTE_INSN_PROLOGUE_END);
9651 /* Find the "this" pointer. We have such a wide range of ABIs for the
9652 SH that it's best to do this completely machine independently.
9653 "this" is passed as first argument, unless a structure return pointer
9654 comes first, in which case "this" comes second. */
9655 INIT_CUMULATIVE_ARGS (cum, funtype, NULL_RTX, 0, 1);
9656 #ifndef PCC_STATIC_STRUCT_RETURN
9657 if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
9658 structure_value_byref = 1;
9659 #endif /* not PCC_STATIC_STRUCT_RETURN */
9660 if (structure_value_byref && sh_struct_value_rtx (function, 0) == 0)
9662 tree ptype = build_pointer_type (TREE_TYPE (funtype));
9664 FUNCTION_ARG_ADVANCE (cum, Pmode, ptype, 1);
9666 this = FUNCTION_ARG (cum, Pmode, ptr_type_node, 1);
9668 /* For SHcompact, we only have r0 for a scratch register: r1 is the
9669 static chain pointer (even if you can't have nested virtual functions
9670 right now, someone might implement them sometime), and the rest of the
9671 registers are used for argument passing, are callee-saved, or reserved. */
9672 /* We need to check call_used_regs / fixed_regs in case -fcall_saved-reg /
9673 -ffixed-reg has been used. */
9674 if (! call_used_regs[0] || fixed_regs[0])
9675 error ("r0 needs to be available as a call-clobbered register");
9676 scratch0 = scratch1 = scratch2 = gen_rtx_REG (Pmode, 0);
9677 if (! TARGET_SH5)
9679 if (call_used_regs[1] && ! fixed_regs[1])
9680 scratch1 = gen_rtx_REG (ptr_mode, 1);
9681 /* N.B., if not TARGET_HITACHI, register 2 is used to pass the pointer
9682 pointing where to return struct values. */
9683 if (call_used_regs[3] && ! fixed_regs[3])
9684 scratch2 = gen_rtx_REG (Pmode, 3);
9686 else if (TARGET_SHMEDIA)
9688 for (i = FIRST_GENERAL_REG; i <= LAST_GENERAL_REG; i++)
9689 if (i != REGNO (scratch0) &&
9690 call_used_regs[i] && ! fixed_regs[i] && ! FUNCTION_ARG_REGNO_P (i))
9692 scratch1 = gen_rtx_REG (ptr_mode, i);
9693 break;
9695 if (scratch1 == scratch0)
9696 error ("Need a second call-clobbered general purpose register");
9697 for (i = FIRST_TARGET_REG; i <= LAST_TARGET_REG; i++)
9698 if (call_used_regs[i] && ! fixed_regs[i])
9700 scratch2 = gen_rtx_REG (Pmode, i);
9701 break;
9703 if (scratch2 == scratch0)
9704 error ("Need a call-clobbered target register");
9707 this_value = plus_constant (this, delta);
9708 if (vcall_offset
9709 && (simple_add || scratch0 != scratch1)
9710 && strict_memory_address_p (ptr_mode, this_value))
9712 emit_load_ptr (scratch0, this_value);
9713 did_load = 1;
9716 if (!delta)
9717 ; /* Do nothing. */
9718 else if (simple_add)
9719 emit_move_insn (this, this_value);
9720 else
9722 emit_move_insn (scratch1, GEN_INT (delta));
9723 emit_insn (gen_add2_insn (this, scratch1));
9726 if (vcall_offset)
9728 rtx offset_addr;
9730 if (!did_load)
9731 emit_load_ptr (scratch0, this);
9733 offset_addr = plus_constant (scratch0, vcall_offset);
9734 if (strict_memory_address_p (ptr_mode, offset_addr))
9735 ; /* Do nothing. */
9736 else if (! TARGET_SH5 && scratch0 != scratch1)
9738 /* scratch0 != scratch1, and we have indexed loads. Get better
9739 schedule by loading the offset into r1 and using an indexed
9740 load - then the load of r1 can issue before the load from
9741 (this + delta) finishes. */
9742 emit_move_insn (scratch1, GEN_INT (vcall_offset));
9743 offset_addr = gen_rtx_PLUS (Pmode, scratch0, scratch1);
9745 else if (CONST_OK_FOR_ADD (vcall_offset))
9747 emit_insn (gen_add2_insn (scratch0, GEN_INT (vcall_offset)));
9748 offset_addr = scratch0;
9750 else if (scratch0 != scratch1)
9752 emit_move_insn (scratch1, GEN_INT (vcall_offset));
9753 emit_insn (gen_add2_insn (scratch0, scratch1));
9754 offset_addr = scratch0;
9756 else
9757 gcc_unreachable (); /* FIXME */
9758 emit_load_ptr (scratch0, offset_addr);
9760 if (Pmode != ptr_mode)
9761 scratch0 = gen_rtx_TRUNCATE (ptr_mode, scratch0);
9762 emit_insn (gen_add2_insn (this, scratch0));
9765 /* Generate a tail call to the target function. */
9766 if (! TREE_USED (function))
9768 assemble_external (function);
9769 TREE_USED (function) = 1;
9771 funexp = XEXP (DECL_RTL (function), 0);
9772 /* If the function is overridden, so is the thunk, hence we don't
9773 need GOT addressing even if this is a public symbol. */
9774 #if 0
9775 if (TARGET_SH1 && ! flag_weak)
9776 sibcall = gen_sibcalli_thunk (funexp, const0_rtx);
9777 else
9778 #endif
9779 if (TARGET_SH2 && flag_pic)
9781 sibcall = gen_sibcall_pcrel (funexp, const0_rtx);
9782 XEXP (XVECEXP (sibcall, 0, 2), 0) = scratch2;
9784 else
9786 if (TARGET_SHMEDIA && flag_pic)
9788 funexp = gen_sym2PIC (funexp);
9789 PUT_MODE (funexp, Pmode);
9791 emit_move_insn (scratch2, funexp);
9792 funexp = gen_rtx_MEM (FUNCTION_MODE, scratch2);
9793 sibcall = gen_sibcall (funexp, const0_rtx, NULL_RTX);
9795 sibcall = emit_call_insn (sibcall);
9796 SIBLING_CALL_P (sibcall) = 1;
9797 use_reg (&CALL_INSN_FUNCTION_USAGE (sibcall), this);
9798 emit_barrier ();
9800 /* Run just enough of rest_of_compilation to do scheduling and get
9801 the insns emitted. Note that use_thunk calls
9802 assemble_start_function and assemble_end_function. */
9804 insn_locators_initialize ();
9805 insns = get_insns ();
9807 if (optimize > 0)
9809 /* Initialize the bitmap obstacks. */
9810 bitmap_obstack_initialize (NULL);
9811 bitmap_obstack_initialize (&reg_obstack);
9812 if (! cfun->cfg)
9813 init_flow ();
9814 rtl_register_cfg_hooks ();
9815 init_rtl_bb_info (ENTRY_BLOCK_PTR);
9816 init_rtl_bb_info (EXIT_BLOCK_PTR);
9817 ENTRY_BLOCK_PTR->flags |= BB_RTL;
9818 EXIT_BLOCK_PTR->flags |= BB_RTL;
9819 find_basic_blocks (insns);
9821 if (flag_schedule_insns_after_reload)
9823 life_analysis (dump_file, PROP_FINAL);
9825 split_all_insns (1);
9827 schedule_insns (dump_file);
9829 /* We must split jmp insn in PIC case. */
9830 else if (flag_pic)
9831 split_all_insns_noflow ();
9834 sh_reorg ();
9836 if (optimize > 0 && flag_delayed_branch)
9837 dbr_schedule (insns, dump_file);
9839 shorten_branches (insns);
9840 final_start_function (insns, file, 1);
9841 final (insns, file, 1);
9842 final_end_function ();
9844 if (optimize > 0)
9846 /* Release all memory allocated by flow. */
9847 free_basic_block_vars ();
9849 /* Release the bitmap obstacks. */
9850 bitmap_obstack_release (&reg_obstack);
9851 bitmap_obstack_release (NULL);
9854 reload_completed = 0;
9855 epilogue_completed = 0;
9856 no_new_pseudos = 0;
9860 function_symbol (rtx target, const char *name, enum sh_function_kind kind)
9862 rtx sym;
9864 /* If this is not an ordinary function, the name usually comes from a
9865 string literal or an sprintf buffer. Make sure we use the same
9866 string consistently, so that cse will be able to unify address loads. */
9867 if (kind != FUNCTION_ORDINARY)
9868 name = IDENTIFIER_POINTER (get_identifier (name));
9869 sym = gen_rtx_SYMBOL_REF (Pmode, name);
9870 SYMBOL_REF_FLAGS (sym) = SYMBOL_FLAG_FUNCTION;
9871 if (flag_pic)
9872 switch (kind)
9874 case FUNCTION_ORDINARY:
9875 break;
9876 case SFUNC_GOT:
9878 rtx reg = target ? target : gen_reg_rtx (Pmode);
9880 emit_insn (gen_symGOT2reg (reg, sym));
9881 sym = reg;
9882 break;
9884 case SFUNC_STATIC:
9886 /* ??? To allow cse to work, we use GOTOFF relocations.
9887 we could add combiner patterns to transform this into
9888 straight pc-relative calls with sym2PIC / bsrf when
9889 label load and function call are still 1:1 and in the
9890 same basic block during combine. */
9891 rtx reg = target ? target : gen_reg_rtx (Pmode);
9893 emit_insn (gen_symGOTOFF2reg (reg, sym));
9894 sym = reg;
9895 break;
9898 if (target && sym != target)
9900 emit_move_insn (target, sym);
9901 return target;
9903 return sym;
9906 /* Find the number of a general purpose register in S. */
9907 static int
9908 scavenge_reg (HARD_REG_SET *s)
9910 int r;
9911 for (r = FIRST_GENERAL_REG; r <= LAST_GENERAL_REG; r++)
9912 if (TEST_HARD_REG_BIT (*s, r))
9913 return r;
9914 return -1;
9918 sh_get_pr_initial_val (void)
9920 rtx val;
9922 /* ??? Unfortunately, get_hard_reg_initial_val doesn't always work for the
9923 PR register on SHcompact, because it might be clobbered by the prologue.
9924 We check first if that is known to be the case. */
9925 if (TARGET_SHCOMPACT
9926 && ((current_function_args_info.call_cookie
9927 & ~ CALL_COOKIE_RET_TRAMP (1))
9928 || current_function_has_nonlocal_label))
9929 return gen_frame_mem (SImode, return_address_pointer_rtx);
9931 /* If we haven't finished rtl generation, there might be a nonlocal label
9932 that we haven't seen yet.
9933 ??? get_hard_reg_initial_val fails if it is called while no_new_pseudos
9934 is set, unless it has been called before for the same register. And even
9935 then, we end in trouble if we didn't use the register in the same
9936 basic block before. So call get_hard_reg_initial_val now and wrap it
9937 in an unspec if we might need to replace it. */
9938 /* ??? We also must do this for TARGET_SH1 in general, because otherwise
9939 combine can put the pseudo returned by get_hard_reg_initial_val into
9940 instructions that need a general purpose registers, which will fail to
9941 be recognized when the pseudo becomes allocated to PR. */
9943 = get_hard_reg_initial_val (Pmode, TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG);
9944 if (TARGET_SH1)
9945 return gen_rtx_UNSPEC (SImode, gen_rtvec (1, val), UNSPEC_RA);
9946 return val;
9950 sh_expand_t_scc (enum rtx_code code, rtx target)
9952 rtx result = target;
9953 HOST_WIDE_INT val;
9955 if (GET_CODE (sh_compare_op0) != REG || REGNO (sh_compare_op0) != T_REG
9956 || GET_CODE (sh_compare_op1) != CONST_INT)
9957 return 0;
9958 if (GET_CODE (result) != REG)
9959 result = gen_reg_rtx (SImode);
9960 val = INTVAL (sh_compare_op1);
9961 if ((code == EQ && val == 1) || (code == NE && val == 0))
9962 emit_insn (gen_movt (result));
9963 else if ((code == EQ && val == 0) || (code == NE && val == 1))
9965 emit_insn (gen_rtx_CLOBBER (VOIDmode, result));
9966 emit_insn (gen_subc (result, result, result));
9967 emit_insn (gen_addsi3 (result, result, const1_rtx));
9969 else if (code == EQ || code == NE)
9970 emit_insn (gen_move_insn (result, GEN_INT (code == NE)));
9971 else
9972 return 0;
9973 if (result != target)
9974 emit_move_insn (target, result);
9975 return 1;
9978 /* INSN is an sfunc; return the rtx that describes the address used. */
9979 static rtx
9980 extract_sfunc_addr (rtx insn)
9982 rtx pattern, part = NULL_RTX;
9983 int len, i;
9985 pattern = PATTERN (insn);
9986 len = XVECLEN (pattern, 0);
9987 for (i = 0; i < len; i++)
9989 part = XVECEXP (pattern, 0, i);
9990 if (GET_CODE (part) == USE && GET_MODE (XEXP (part, 0)) == Pmode
9991 && GENERAL_REGISTER_P (true_regnum (XEXP (part, 0))))
9992 return XEXP (part, 0);
9994 gcc_assert (GET_CODE (XVECEXP (pattern, 0, 0)) == UNSPEC_VOLATILE);
9995 return XVECEXP (XVECEXP (pattern, 0, 0), 0, 1);
9998 /* Verify that the register in use_sfunc_addr still agrees with the address
9999 used in the sfunc. This prevents fill_slots_from_thread from changing
10000 use_sfunc_addr.
10001 INSN is the use_sfunc_addr instruction, and REG is the register it
10002 guards. */
10004 check_use_sfunc_addr (rtx insn, rtx reg)
10006 /* Search for the sfunc. It should really come right after INSN. */
10007 while ((insn = NEXT_INSN (insn)))
10009 if (GET_CODE (insn) == CODE_LABEL || GET_CODE (insn) == JUMP_INSN)
10010 break;
10011 if (! INSN_P (insn))
10012 continue;
10014 if (GET_CODE (PATTERN (insn)) == SEQUENCE)
10015 insn = XVECEXP (PATTERN (insn), 0, 0);
10016 if (GET_CODE (PATTERN (insn)) != PARALLEL
10017 || get_attr_type (insn) != TYPE_SFUNC)
10018 continue;
10019 return rtx_equal_p (extract_sfunc_addr (insn), reg);
10021 gcc_unreachable ();
10024 /* This function returns a constant rtx that represents pi / 2**15 in
10025 SFmode. it's used to scale SFmode angles, in radians, to a
10026 fixed-point signed 16.16-bit fraction of a full circle, i.e., 2*pi
10027 maps to 0x10000). */
10029 static GTY(()) rtx sh_fsca_sf2int_rtx;
10032 sh_fsca_sf2int (void)
10034 if (! sh_fsca_sf2int_rtx)
10036 REAL_VALUE_TYPE rv;
10038 real_from_string (&rv, "10430.378350470453");
10039 sh_fsca_sf2int_rtx = const_double_from_real_value (rv, SFmode);
10042 return sh_fsca_sf2int_rtx;
10045 /* This function returns a constant rtx that represents pi / 2**15 in
10046 DFmode. it's used to scale DFmode angles, in radians, to a
10047 fixed-point signed 16.16-bit fraction of a full circle, i.e., 2*pi
10048 maps to 0x10000). */
10050 static GTY(()) rtx sh_fsca_df2int_rtx;
10053 sh_fsca_df2int (void)
10055 if (! sh_fsca_df2int_rtx)
10057 REAL_VALUE_TYPE rv;
10059 real_from_string (&rv, "10430.378350470453");
10060 sh_fsca_df2int_rtx = const_double_from_real_value (rv, DFmode);
10063 return sh_fsca_df2int_rtx;
10066 /* This function returns a constant rtx that represents 2**15 / pi in
10067 SFmode. it's used to scale a fixed-point signed 16.16-bit fraction
10068 of a full circle back to a SFmode value, i.e., 0x10000 maps to
10069 2*pi). */
10071 static GTY(()) rtx sh_fsca_int2sf_rtx;
10074 sh_fsca_int2sf (void)
10076 if (! sh_fsca_int2sf_rtx)
10078 REAL_VALUE_TYPE rv;
10080 real_from_string (&rv, "9.587379924285257e-5");
10081 sh_fsca_int2sf_rtx = const_double_from_real_value (rv, SFmode);
10084 return sh_fsca_int2sf_rtx;
10087 /* Initialize the CUMULATIVE_ARGS structure. */
10089 void
10090 sh_init_cumulative_args (CUMULATIVE_ARGS * pcum,
10091 tree fntype,
10092 rtx libname ATTRIBUTE_UNUSED,
10093 tree fndecl,
10094 signed int n_named_args,
10095 enum machine_mode mode)
10097 pcum->arg_count [(int) SH_ARG_FLOAT] = 0;
10098 pcum->free_single_fp_reg = 0;
10099 pcum->stack_regs = 0;
10100 pcum->byref_regs = 0;
10101 pcum->byref = 0;
10102 pcum->outgoing = (n_named_args == -1) ? 0 : 1;
10104 /* XXX - Should we check TARGET_HITACHI here ??? */
10105 pcum->renesas_abi = sh_attr_renesas_p (fntype) ? 1 : 0;
10107 if (fntype)
10109 pcum->force_mem = ((TARGET_HITACHI || pcum->renesas_abi)
10110 && aggregate_value_p (TREE_TYPE (fntype), fndecl));
10111 pcum->prototype_p = TYPE_ARG_TYPES (fntype) ? TRUE : FALSE;
10112 pcum->arg_count [(int) SH_ARG_INT]
10113 = TARGET_SH5 && aggregate_value_p (TREE_TYPE (fntype), fndecl);
10115 pcum->call_cookie
10116 = CALL_COOKIE_RET_TRAMP (TARGET_SHCOMPACT
10117 && pcum->arg_count [(int) SH_ARG_INT] == 0
10118 && (TYPE_MODE (TREE_TYPE (fntype)) == BLKmode
10119 ? int_size_in_bytes (TREE_TYPE (fntype))
10120 : GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (fntype)))) > 4
10121 && (BASE_RETURN_VALUE_REG (TYPE_MODE (TREE_TYPE (fntype)))
10122 == FIRST_RET_REG));
10124 else
10126 pcum->arg_count [(int) SH_ARG_INT] = 0;
10127 pcum->prototype_p = FALSE;
10128 if (mode != VOIDmode)
10130 pcum->call_cookie =
10131 CALL_COOKIE_RET_TRAMP (TARGET_SHCOMPACT
10132 && GET_MODE_SIZE (mode) > 4
10133 && BASE_RETURN_VALUE_REG (mode) == FIRST_RET_REG);
10135 /* If the default ABI is the Renesas ABI then all library
10136 calls must assume that the library will be using the
10137 Renesas ABI. So if the function would return its result
10138 in memory then we must force the address of this memory
10139 block onto the stack. Ideally we would like to call
10140 targetm.calls.return_in_memory() here but we do not have
10141 the TYPE or the FNDECL available so we synthesize the
10142 contents of that function as best we can. */
10143 pcum->force_mem =
10144 (TARGET_DEFAULT & MASK_HITACHI)
10145 && (mode == BLKmode
10146 || (GET_MODE_SIZE (mode) > 4
10147 && !(mode == DFmode
10148 && TARGET_FPU_DOUBLE)));
10150 else
10152 pcum->call_cookie = 0;
10153 pcum->force_mem = FALSE;
10158 /* Determine if two hard register sets intersect.
10159 Return 1 if they do. */
10161 static int
10162 hard_regs_intersect_p (HARD_REG_SET *a, HARD_REG_SET *b)
10164 HARD_REG_SET c;
10165 COPY_HARD_REG_SET (c, *a);
10166 AND_HARD_REG_SET (c, *b);
10167 GO_IF_HARD_REG_SUBSET (c, reg_class_contents[(int) NO_REGS], lose);
10168 return 1;
10169 lose:
10170 return 0;
10173 #ifdef TARGET_ADJUST_UNROLL_MAX
10174 static int
10175 sh_adjust_unroll_max (struct loop * loop, int insn_count,
10176 int max_unrolled_insns, int strength_reduce_p,
10177 int unroll_type)
10179 /* This doesn't work in 4.0 because the old unroller & loop.h is gone. */
10180 if (TARGET_ADJUST_UNROLL && TARGET_SHMEDIA)
10182 /* Throttle back loop unrolling so that the costs of using more
10183 targets than the eight target register we have don't outweigh
10184 the benefits of unrolling. */
10185 rtx insn;
10186 int n_labels = 0, n_calls = 0, n_exit_dest = 0, n_inner_loops = -1;
10187 int n_barriers = 0;
10188 rtx dest;
10189 int i;
10190 rtx exit_dest[8];
10191 int threshold;
10192 int unroll_benefit = 0, mem_latency = 0;
10193 int base_cost, best_cost, cost;
10194 int factor, best_factor;
10195 int n_dest;
10196 unsigned max_iterations = 32767;
10197 int n_iterations;
10198 int need_precond = 0, precond = 0;
10199 basic_block * bbs = get_loop_body (loop);
10200 struct niter_desc *desc;
10202 /* Assume that all labels inside the loop are used from inside the
10203 loop. If the loop has multiple entry points, it is unlikely to
10204 be unrolled anyways.
10205 Also assume that all calls are to different functions. That is
10206 somewhat pessimistic, but if you have lots of calls, unrolling the
10207 loop is not likely to gain you much in the first place. */
10208 i = loop->num_nodes - 1;
10209 for (insn = BB_HEAD (bbs[i]); ; )
10211 if (GET_CODE (insn) == CODE_LABEL)
10212 n_labels++;
10213 else if (GET_CODE (insn) == CALL_INSN)
10214 n_calls++;
10215 else if (GET_CODE (insn) == NOTE
10216 && NOTE_LINE_NUMBER (insn) == NOTE_INSN_LOOP_BEG)
10217 n_inner_loops++;
10218 else if (GET_CODE (insn) == BARRIER)
10219 n_barriers++;
10220 if (insn != BB_END (bbs[i]))
10221 insn = NEXT_INSN (insn);
10222 else if (--i >= 0)
10223 insn = BB_HEAD (bbs[i]);
10224 else
10225 break;
10227 free (bbs);
10228 /* One label for the loop top is normal, and it won't be duplicated by
10229 unrolling. */
10230 if (n_labels <= 1)
10231 return max_unrolled_insns;
10232 if (n_inner_loops > 0)
10233 return 0;
10234 for (dest = loop->exit_labels; dest && n_exit_dest < 8;
10235 dest = LABEL_NEXTREF (dest))
10237 for (i = n_exit_dest - 1;
10238 i >= 0 && XEXP (dest, 0) != XEXP (exit_dest[i], 0); i--);
10239 if (i < 0)
10240 exit_dest[n_exit_dest++] = dest;
10242 /* If the loop top and call and exit destinations are enough to fill up
10243 the target registers, we're unlikely to do any more damage by
10244 unrolling. */
10245 if (n_calls + n_exit_dest >= 7)
10246 return max_unrolled_insns;
10248 /* ??? In the new loop unroller, there is no longer any strength
10249 reduction information available. Thus, when it comes to unrolling,
10250 we know the cost of everything, but we know the value of nothing. */
10251 #if 0
10252 if (strength_reduce_p
10253 && (unroll_type == LPT_UNROLL_RUNTIME
10254 || unroll_type == LPT_UNROLL_CONSTANT
10255 || unroll_type == LPT_PEEL_COMPLETELY))
10257 struct loop_ivs *ivs = LOOP_IVS (loop);
10258 struct iv_class *bl;
10260 /* We'll save one compare-and-branch in each loop body copy
10261 but the last one. */
10262 unroll_benefit = 1;
10263 /* Assess the benefit of removing biv & giv updates. */
10264 for (bl = ivs->list; bl; bl = bl->next)
10266 rtx increment = biv_total_increment (bl);
10267 struct induction *v;
10269 if (increment && GET_CODE (increment) == CONST_INT)
10271 unroll_benefit++;
10272 for (v = bl->giv; v; v = v->next_iv)
10274 if (! v->ignore && v->same == 0
10275 && GET_CODE (v->mult_val) == CONST_INT)
10276 unroll_benefit++;
10277 /* If this giv uses an array, try to determine
10278 a maximum iteration count from the size of the
10279 array. This need not be correct all the time,
10280 but should not be too far off the mark too often. */
10281 while (v->giv_type == DEST_ADDR)
10283 rtx mem = PATTERN (v->insn);
10284 tree mem_expr, type, size_tree;
10286 if (GET_CODE (SET_SRC (mem)) == MEM)
10287 mem = SET_SRC (mem);
10288 else if (GET_CODE (SET_DEST (mem)) == MEM)
10289 mem = SET_DEST (mem);
10290 else
10291 break;
10292 mem_expr = MEM_EXPR (mem);
10293 if (! mem_expr)
10294 break;
10295 type = TREE_TYPE (mem_expr);
10296 if (TREE_CODE (type) != ARRAY_TYPE
10297 || ! TYPE_SIZE (type) || ! TYPE_SIZE_UNIT (type))
10298 break;
10299 size_tree = fold_build2 (TRUNC_DIV_EXPR,
10300 bitsizetype,
10301 TYPE_SIZE (type),
10302 TYPE_SIZE_UNIT (type));
10303 if (TREE_CODE (size_tree) == INTEGER_CST
10304 && ! TREE_INT_CST_HIGH (size_tree)
10305 && TREE_INT_CST_LOW (size_tree) < max_iterations)
10306 max_iterations = TREE_INT_CST_LOW (size_tree);
10307 break;
10313 #else /* 0 */
10314 /* Assume there is at least some benefit. */
10315 unroll_benefit = 1;
10316 #endif /* 0 */
10318 desc = get_simple_loop_desc (loop);
10319 n_iterations = desc->const_iter ? desc->niter : 0;
10320 max_iterations
10321 = max_iterations < desc->niter_max ? max_iterations : desc->niter_max;
10323 if (! strength_reduce_p || ! n_iterations)
10324 need_precond = 1;
10325 if (! n_iterations)
10327 n_iterations
10328 = max_iterations < 3 ? max_iterations : max_iterations * 3 / 4;
10329 if (! n_iterations)
10330 return 0;
10332 #if 0 /* ??? See above - missing induction variable information. */
10333 while (unroll_benefit > 1) /* no loop */
10335 /* We include the benefit of biv/ giv updates. Check if some or
10336 all of these updates are likely to fit into a scheduling
10337 bubble of a load.
10338 We check for the following case:
10339 - All the insns leading to the first JUMP_INSN are in a strict
10340 dependency chain.
10341 - there is at least one memory reference in them.
10343 When we find such a pattern, we assume that we can hide as many
10344 updates as the total of the load latency is, if we have an
10345 unroll factor of at least two. We might or might not also do
10346 this without unrolling, so rather than considering this as an
10347 extra unroll benefit, discount it in the unroll benefits of unroll
10348 factors higher than two. */
10350 rtx set, last_set;
10352 insn = next_active_insn (loop->start);
10353 last_set = single_set (insn);
10354 if (! last_set)
10355 break;
10356 if (GET_CODE (SET_SRC (last_set)) == MEM)
10357 mem_latency += 2;
10358 for (insn = NEXT_INSN (insn); insn != end; insn = NEXT_INSN (insn))
10360 if (! INSN_P (insn))
10361 continue;
10362 if (GET_CODE (insn) == JUMP_INSN)
10363 break;
10364 if (! reg_referenced_p (SET_DEST (last_set), PATTERN (insn)))
10366 /* Check if this is a to-be-reduced giv insn. */
10367 struct loop_ivs *ivs = LOOP_IVS (loop);
10368 struct iv_class *bl;
10369 struct induction *v;
10370 for (bl = ivs->list; bl; bl = bl->next)
10372 if (bl->biv->insn == insn)
10373 goto is_biv;
10374 for (v = bl->giv; v; v = v->next_iv)
10375 if (v->insn == insn)
10376 goto is_giv;
10378 mem_latency--;
10379 is_biv:
10380 is_giv:
10381 continue;
10383 set = single_set (insn);
10384 if (! set)
10385 continue;
10386 if (GET_CODE (SET_SRC (set)) == MEM)
10387 mem_latency += 2;
10388 last_set = set;
10390 if (mem_latency < 0)
10391 mem_latency = 0;
10392 else if (mem_latency > unroll_benefit - 1)
10393 mem_latency = unroll_benefit - 1;
10394 break;
10396 #endif /* 0 */
10397 if (n_labels + (unroll_benefit + n_labels * 8) / n_iterations
10398 <= unroll_benefit)
10399 return max_unrolled_insns;
10401 n_dest = n_labels + n_calls + n_exit_dest;
10402 base_cost = n_dest <= 8 ? 0 : n_dest - 7;
10403 best_cost = 0;
10404 best_factor = 1;
10405 if (n_barriers * 2 > n_labels - 1)
10406 n_barriers = (n_labels - 1) / 2;
10407 for (factor = 2; factor <= 8; factor++)
10409 /* Bump up preconditioning cost for each power of two. */
10410 if (! (factor & (factor-1)))
10411 precond += 4;
10412 /* When preconditioning, only powers of two will be considered. */
10413 else if (need_precond)
10414 continue;
10415 n_dest = ((unroll_type != LPT_PEEL_COMPLETELY)
10416 + (n_labels - 1) * factor + n_calls + n_exit_dest
10417 - (n_barriers * factor >> 1)
10418 + need_precond);
10419 cost
10420 = ((n_dest <= 8 ? 0 : n_dest - 7)
10421 - base_cost * factor
10422 - ((factor > 2 ? unroll_benefit - mem_latency : unroll_benefit)
10423 * (factor - (unroll_type != LPT_PEEL_COMPLETELY)))
10424 + ((unroll_benefit + 1 + (n_labels - 1) * factor)
10425 / n_iterations));
10426 if (need_precond)
10427 cost += (precond + unroll_benefit * factor / 2) / n_iterations;
10428 if (cost < best_cost)
10430 best_cost = cost;
10431 best_factor = factor;
10434 threshold = best_factor * insn_count;
10435 if (max_unrolled_insns > threshold)
10436 max_unrolled_insns = threshold;
10438 return max_unrolled_insns;
10440 #endif /* TARGET_ADJUST_UNROLL_MAX */
10442 /* Replace any occurrence of FROM(n) in X with TO(n). The function does
10443 not enter into CONST_DOUBLE for the replace.
10445 Note that copying is not done so X must not be shared unless all copies
10446 are to be modified.
10448 This is like replace_rtx, except that we operate on N_REPLACEMENTS
10449 replacements simultaneously - FROM(n) is replacements[n*2] and to(n) is
10450 replacements[n*2+1] - and that we take mode changes into account.
10452 If a replacement is ambiguous, return NULL_RTX.
10454 If MODIFY is zero, don't modify any rtl in place,
10455 just return zero or nonzero for failure / success. */
10458 replace_n_hard_rtx (rtx x, rtx *replacements, int n_replacements, int modify)
10460 int i, j;
10461 const char *fmt;
10463 /* The following prevents loops occurrence when we change MEM in
10464 CONST_DOUBLE onto the same CONST_DOUBLE. */
10465 if (x != 0 && GET_CODE (x) == CONST_DOUBLE)
10466 return x;
10468 for (i = n_replacements - 1; i >= 0 ; i--)
10469 if (x == replacements[i*2] && GET_MODE (x) == GET_MODE (replacements[i*2+1]))
10470 return replacements[i*2+1];
10472 /* Allow this function to make replacements in EXPR_LISTs. */
10473 if (x == 0)
10474 return 0;
10476 if (GET_CODE (x) == SUBREG)
10478 rtx new = replace_n_hard_rtx (SUBREG_REG (x), replacements,
10479 n_replacements, modify);
10481 if (GET_CODE (new) == CONST_INT)
10483 x = simplify_subreg (GET_MODE (x), new,
10484 GET_MODE (SUBREG_REG (x)),
10485 SUBREG_BYTE (x));
10486 if (! x)
10487 abort ();
10489 else if (modify)
10490 SUBREG_REG (x) = new;
10492 return x;
10494 else if (GET_CODE (x) == REG)
10496 unsigned regno = REGNO (x);
10497 unsigned nregs = (regno < FIRST_PSEUDO_REGISTER
10498 ? HARD_REGNO_NREGS (regno, GET_MODE (x)) : 1);
10499 rtx result = NULL_RTX;
10501 for (i = n_replacements - 1; i >= 0; i--)
10503 rtx from = replacements[i*2];
10504 rtx to = replacements[i*2+1];
10505 unsigned from_regno, from_nregs, to_regno, new_regno;
10507 if (GET_CODE (from) != REG)
10508 continue;
10509 from_regno = REGNO (from);
10510 from_nregs = (from_regno < FIRST_PSEUDO_REGISTER
10511 ? HARD_REGNO_NREGS (from_regno, GET_MODE (from)) : 1);
10512 if (regno < from_regno + from_nregs && regno + nregs > from_regno)
10514 if (regno < from_regno
10515 || regno + nregs > from_regno + nregs
10516 || GET_CODE (to) != REG
10517 || result)
10518 return NULL_RTX;
10519 to_regno = REGNO (to);
10520 if (to_regno < FIRST_PSEUDO_REGISTER)
10522 new_regno = regno + to_regno - from_regno;
10523 if ((unsigned) HARD_REGNO_NREGS (new_regno, GET_MODE (x))
10524 != nregs)
10525 return NULL_RTX;
10526 result = gen_rtx_REG (GET_MODE (x), new_regno);
10528 else if (GET_MODE (x) <= GET_MODE (to))
10529 result = gen_lowpart_common (GET_MODE (x), to);
10530 else
10531 result = gen_lowpart_SUBREG (GET_MODE (x), to);
10534 return result ? result : x;
10536 else if (GET_CODE (x) == ZERO_EXTEND)
10538 rtx new = replace_n_hard_rtx (XEXP (x, 0), replacements,
10539 n_replacements, modify);
10541 if (GET_CODE (new) == CONST_INT)
10543 x = simplify_unary_operation (ZERO_EXTEND, GET_MODE (x),
10544 new, GET_MODE (XEXP (x, 0)));
10545 if (! x)
10546 abort ();
10548 else if (modify)
10549 XEXP (x, 0) = new;
10551 return x;
10554 fmt = GET_RTX_FORMAT (GET_CODE (x));
10555 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
10557 rtx new;
10559 if (fmt[i] == 'e')
10561 new = replace_n_hard_rtx (XEXP (x, i), replacements,
10562 n_replacements, modify);
10563 if (!new)
10564 return NULL_RTX;
10565 if (modify)
10566 XEXP (x, i) = new;
10568 else if (fmt[i] == 'E')
10569 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
10571 new = replace_n_hard_rtx (XVECEXP (x, i, j), replacements,
10572 n_replacements, modify);
10573 if (!new)
10574 return NULL_RTX;
10575 if (modify)
10576 XVECEXP (x, i, j) = new;
10580 return x;
10584 sh_gen_truncate (enum machine_mode mode, rtx x, int need_sign_ext)
10586 enum rtx_code code = TRUNCATE;
10588 if (GET_CODE (x) == ZERO_EXTEND || GET_CODE (x) == SIGN_EXTEND)
10590 rtx inner = XEXP (x, 0);
10591 enum machine_mode inner_mode = GET_MODE (inner);
10593 if (inner_mode == mode)
10594 return inner;
10595 else if (GET_MODE_SIZE (inner_mode) >= GET_MODE_SIZE (mode))
10596 x = inner;
10597 else if (GET_MODE_SIZE (inner_mode) < GET_MODE_SIZE (mode)
10598 && (! need_sign_ext || GET_CODE (x) == SIGN_EXTEND))
10600 code = GET_CODE (x);
10601 x = inner;
10604 return gen_rtx_fmt_e (code, mode, x);
10607 /* called via for_each_rtx after reload, to clean up truncates of
10608 registers that span multiple actual hard registers. */
10610 shmedia_cleanup_truncate (rtx *p, void *n_changes)
10612 rtx x = *p, reg;
10614 if (GET_CODE (x) != TRUNCATE)
10615 return 0;
10616 reg = XEXP (x, 0);
10617 if (GET_MODE_SIZE (GET_MODE (reg)) > 8 && GET_CODE (reg) == REG)
10619 enum machine_mode reg_mode = GET_MODE (reg);
10620 XEXP (x, 0) = simplify_subreg (DImode, reg, reg_mode,
10621 subreg_lowpart_offset (DImode, reg_mode));
10622 *(int*) n_changes += 1;
10623 return -1;
10625 return 0;
10628 /* Load and store depend on the highpart of the address. However,
10629 set_attr_alternative does not give well-defined results before reload,
10630 so we must look at the rtl ourselves to see if any of the feeding
10631 registers is used in a memref. */
10633 /* Called by sh_contains_memref_p via for_each_rtx. */
10634 static int
10635 sh_contains_memref_p_1 (rtx *loc, void *data ATTRIBUTE_UNUSED)
10637 return (GET_CODE (*loc) == MEM);
10640 /* Return nonzero iff INSN contains a MEM. */
10642 sh_contains_memref_p (rtx insn)
10644 return for_each_rtx (&PATTERN (insn), &sh_contains_memref_p_1, NULL);
10647 /* FNADDR is the MEM expression from a call expander. Return an address
10648 to use in an SHmedia insn pattern. */
10650 shmedia_prepare_call_address (rtx fnaddr, int is_sibcall)
10652 int is_sym;
10654 fnaddr = XEXP (fnaddr, 0);
10655 is_sym = GET_CODE (fnaddr) == SYMBOL_REF;
10656 if (flag_pic && is_sym)
10658 if (! SYMBOL_REF_LOCAL_P (fnaddr))
10660 rtx reg = gen_reg_rtx (Pmode);
10662 /* We must not use GOTPLT for sibcalls, because PIC_REG
10663 must be restored before the PLT code gets to run. */
10664 if (is_sibcall)
10665 emit_insn (gen_symGOT2reg (reg, fnaddr));
10666 else
10667 emit_insn (gen_symGOTPLT2reg (reg, fnaddr));
10668 fnaddr = reg;
10670 else
10672 fnaddr = gen_sym2PIC (fnaddr);
10673 PUT_MODE (fnaddr, Pmode);
10676 /* If ptabs might trap, make this visible to the rest of the compiler.
10677 We generally assume that symbols pertain to valid locations, but
10678 it is possible to generate invalid symbols with asm or linker tricks.
10679 In a list of functions where each returns its successor, an invalid
10680 symbol might denote an empty list. */
10681 if (!TARGET_PT_FIXED
10682 && (!is_sym || TARGET_INVALID_SYMBOLS)
10683 && (!REG_P (fnaddr) || ! TARGET_REGISTER_P (REGNO (fnaddr))))
10685 rtx tr = gen_reg_rtx (PDImode);
10687 emit_insn (gen_ptabs (tr, fnaddr));
10688 fnaddr = tr;
10690 else if (! target_reg_operand (fnaddr, Pmode))
10691 fnaddr = copy_to_mode_reg (Pmode, fnaddr);
10692 return fnaddr;
10695 enum reg_class
10696 sh_secondary_reload (bool in_p, rtx x, enum reg_class class,
10697 enum machine_mode mode, secondary_reload_info *sri)
10699 if (in_p)
10701 if (REGCLASS_HAS_FP_REG (class)
10702 && ! TARGET_SHMEDIA
10703 && immediate_operand ((x), mode)
10704 && ! ((fp_zero_operand (x) || fp_one_operand (x))
10705 && mode == SFmode && fldi_ok ()))
10706 switch (mode)
10708 case SFmode:
10709 sri->icode = CODE_FOR_reload_insf__frn;
10710 return NO_REGS;
10711 case DFmode:
10712 sri->icode = CODE_FOR_reload_indf__frn;
10713 return NO_REGS;
10714 case SImode:
10715 /* ??? If we knew that we are in the appropriate mode -
10716 single precision - we could use a reload pattern directly. */
10717 return FPUL_REGS;
10718 default:
10719 abort ();
10721 if (class == FPUL_REGS
10722 && ((GET_CODE (x) == REG
10723 && (REGNO (x) == MACL_REG || REGNO (x) == MACH_REG
10724 || REGNO (x) == T_REG))
10725 || GET_CODE (x) == PLUS))
10726 return GENERAL_REGS;
10727 if (class == FPUL_REGS && immediate_operand (x, mode))
10729 if (GET_CODE (x) == CONST_INT && CONST_OK_FOR_I08 (INTVAL (x)))
10730 return GENERAL_REGS;
10731 sri->icode = CODE_FOR_reload_insi__i_fpul;
10732 return NO_REGS;
10734 if (class == FPSCR_REGS
10735 && ((GET_CODE (x) == REG && REGNO (x) >= FIRST_PSEUDO_REGISTER)
10736 || (GET_CODE (x) == MEM && GET_CODE (XEXP (x, 0)) == PLUS)))
10737 return GENERAL_REGS;
10738 if (REGCLASS_HAS_FP_REG (class)
10739 && TARGET_SHMEDIA
10740 && immediate_operand (x, mode)
10741 && x != CONST0_RTX (GET_MODE (x))
10742 && GET_MODE (x) != V4SFmode)
10743 return GENERAL_REGS;
10744 if ((mode == QImode || mode == HImode)
10745 && TARGET_SHMEDIA && inqhi_operand (x, mode))
10747 sri->icode = ((mode == QImode)
10748 ? CODE_FOR_reload_inqi : CODE_FOR_reload_inhi);
10749 return NO_REGS;
10751 if (TARGET_SHMEDIA && class == GENERAL_REGS
10752 && (GET_CODE (x) == LABEL_REF || PIC_DIRECT_ADDR_P (x)))
10753 return TARGET_REGS;
10754 } /* end of input-only processing. */
10756 if (((REGCLASS_HAS_FP_REG (class)
10757 && (GET_CODE (x) == REG
10758 && (GENERAL_OR_AP_REGISTER_P (REGNO (x))
10759 || (FP_REGISTER_P (REGNO (x)) && mode == SImode
10760 && TARGET_FMOVD))))
10761 || (REGCLASS_HAS_GENERAL_REG (class)
10762 && GET_CODE (x) == REG
10763 && FP_REGISTER_P (REGNO (x))))
10764 && ! TARGET_SHMEDIA
10765 && (mode == SFmode || mode == SImode))
10766 return FPUL_REGS;
10767 if ((class == FPUL_REGS
10768 || (REGCLASS_HAS_FP_REG (class)
10769 && ! TARGET_SHMEDIA && mode == SImode))
10770 && (GET_CODE (x) == MEM
10771 || (GET_CODE (x) == REG
10772 && (REGNO (x) >= FIRST_PSEUDO_REGISTER
10773 || REGNO (x) == T_REG
10774 || system_reg_operand (x, VOIDmode)))))
10776 if (class == FPUL_REGS)
10777 return GENERAL_REGS;
10778 return FPUL_REGS;
10780 if ((class == TARGET_REGS
10781 || (TARGET_SHMEDIA && class == SIBCALL_REGS))
10782 && !EXTRA_CONSTRAINT_Csy (x)
10783 && (GET_CODE (x) != REG || ! GENERAL_REGISTER_P (REGNO (x))))
10784 return GENERAL_REGS;
10785 if ((class == MAC_REGS || class == PR_REGS)
10786 && GET_CODE (x) == REG && ! GENERAL_REGISTER_P (REGNO (x))
10787 && class != REGNO_REG_CLASS (REGNO (x)))
10788 return GENERAL_REGS;
10789 if (class != GENERAL_REGS && GET_CODE (x) == REG
10790 && TARGET_REGISTER_P (REGNO (x)))
10791 return GENERAL_REGS;
10792 return NO_REGS;
10795 enum sh_divide_strategy_e sh_div_strategy = SH_DIV_STRATEGY_DEFAULT;
10797 /* This defines the storage for the variable part of a -mboard= option.
10798 It is only required when using the sh-superh-elf target */
10799 #ifdef _SUPERH_H
10800 const char * boardtype = "7750p2";
10801 const char * osruntime = "bare";
10802 #endif
10804 #include "gt-sh.h"