* builtins.c, config/arm/arm.c, config/i386/cygwin.h,
[official-gcc.git] / gcc / config / sh / sh.c
blob929b60f89c893cc9bf4c1b306e8c4c1829f10a91
1 /* Output routines for GCC for Renesas / SuperH SH.
2 Copyright (C) 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002,
3 2003, 2004, 2005, 2006 Free Software Foundation, Inc.
4 Contributed by Steve Chamberlain (sac@cygnus.com).
5 Improved by Jim Wilson (wilson@cygnus.com).
7 This file is part of GCC.
9 GCC is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 2, or (at your option)
12 any later version.
14 GCC is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING. If not, write to
21 the Free Software Foundation, 51 Franklin Street, Fifth Floor,
22 Boston, MA 02110-1301, USA. */
24 #include "config.h"
25 #include "system.h"
26 #include "coretypes.h"
27 #include "tm.h"
28 #include "insn-config.h"
29 #include "rtl.h"
30 #include "tree.h"
31 #include "flags.h"
32 #include "expr.h"
33 #include "optabs.h"
34 #include "function.h"
35 #include "regs.h"
36 #include "hard-reg-set.h"
37 #include "output.h"
38 #include "insn-attr.h"
39 #include "toplev.h"
40 #include "recog.h"
41 #include "c-pragma.h"
42 #include "integrate.h"
43 #include "dwarf2.h"
44 #include "tm_p.h"
45 #include "target.h"
46 #include "target-def.h"
47 #include "real.h"
48 #include "langhooks.h"
49 #include "basic-block.h"
50 #include "cfglayout.h"
51 #include "intl.h"
52 #include "sched-int.h"
53 #include "ggc.h"
54 #include "tree-gimple.h"
55 #include "cfgloop.h"
56 #include "alloc-pool.h"
59 int code_for_indirect_jump_scratch = CODE_FOR_indirect_jump_scratch;
61 #define MSW (TARGET_LITTLE_ENDIAN ? 1 : 0)
62 #define LSW (TARGET_LITTLE_ENDIAN ? 0 : 1)
64 /* These are some macros to abstract register modes. */
65 #define CONST_OK_FOR_ADD(size) \
66 (TARGET_SHMEDIA ? CONST_OK_FOR_I10 (size) : CONST_OK_FOR_I08 (size))
67 #define GEN_MOV (*(TARGET_SHMEDIA64 ? gen_movdi : gen_movsi))
68 #define GEN_ADD3 (*(TARGET_SHMEDIA64 ? gen_adddi3 : gen_addsi3))
69 #define GEN_SUB3 (*(TARGET_SHMEDIA64 ? gen_subdi3 : gen_subsi3))
71 /* Set to 1 by expand_prologue() when the function is an interrupt handler. */
72 int current_function_interrupt;
74 tree sh_deferred_function_attributes;
75 tree *sh_deferred_function_attributes_tail = &sh_deferred_function_attributes;
77 /* Global variables for machine-dependent things. */
79 /* Which cpu are we scheduling for. */
80 enum processor_type sh_cpu;
82 /* Definitions used in ready queue reordering for first scheduling pass. */
84 /* Reg weights arrays for modes SFmode and SImode, indexed by insn LUID. */
85 static short *regmode_weight[2];
87 /* Total SFmode and SImode weights of scheduled insns. */
88 static int curr_regmode_pressure[2];
90 /* If true, skip cycles for Q -> R movement. */
91 static int skip_cycles = 0;
93 /* Cached value of can_issue_more. This is cached in sh_variable_issue hook
94 and returned from sh_reorder2. */
95 static short cached_can_issue_more;
97 /* Saved operands from the last compare to use when we generate an scc
98 or bcc insn. */
100 rtx sh_compare_op0;
101 rtx sh_compare_op1;
103 /* Provides the class number of the smallest class containing
104 reg number. */
106 enum reg_class regno_reg_class[FIRST_PSEUDO_REGISTER] =
108 R0_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
109 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
110 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
111 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
112 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
113 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
114 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
115 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
116 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
117 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
118 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
119 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
120 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
121 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
122 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
123 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
124 FP0_REGS,FP_REGS, FP_REGS, FP_REGS,
125 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
126 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
127 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
128 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
129 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
130 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
131 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
132 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
133 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
134 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
135 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
136 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
137 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
138 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
139 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
140 TARGET_REGS, TARGET_REGS, TARGET_REGS, TARGET_REGS,
141 TARGET_REGS, TARGET_REGS, TARGET_REGS, TARGET_REGS,
142 DF_REGS, DF_REGS, DF_REGS, DF_REGS,
143 DF_REGS, DF_REGS, DF_REGS, DF_REGS,
144 NO_REGS, GENERAL_REGS, PR_REGS, T_REGS,
145 MAC_REGS, MAC_REGS, FPUL_REGS, FPSCR_REGS,
146 GENERAL_REGS, GENERAL_REGS,
149 char sh_register_names[FIRST_PSEUDO_REGISTER] \
150 [MAX_REGISTER_NAME_LENGTH + 1] = SH_REGISTER_NAMES_INITIALIZER;
152 char sh_additional_register_names[ADDREGNAMES_SIZE] \
153 [MAX_ADDITIONAL_REGISTER_NAME_LENGTH + 1]
154 = SH_ADDITIONAL_REGISTER_NAMES_INITIALIZER;
156 /* Provide reg_class from a letter such as appears in the machine
157 description. *: target independently reserved letter.
158 reg_class_from_letter['e' - 'a'] is set to NO_REGS for TARGET_FMOVD. */
160 enum reg_class reg_class_from_letter[] =
162 /* a */ ALL_REGS, /* b */ TARGET_REGS, /* c */ FPSCR_REGS, /* d */ DF_REGS,
163 /* e */ FP_REGS, /* f */ FP_REGS, /* g **/ NO_REGS, /* h */ NO_REGS,
164 /* i **/ NO_REGS, /* j */ NO_REGS, /* k */ SIBCALL_REGS, /* l */ PR_REGS,
165 /* m **/ NO_REGS, /* n **/ NO_REGS, /* o **/ NO_REGS, /* p **/ NO_REGS,
166 /* q */ NO_REGS, /* r **/ NO_REGS, /* s **/ NO_REGS, /* t */ T_REGS,
167 /* u */ NO_REGS, /* v */ NO_REGS, /* w */ FP0_REGS, /* x */ MAC_REGS,
168 /* y */ FPUL_REGS, /* z */ R0_REGS
171 int assembler_dialect;
173 static bool shmedia_space_reserved_for_target_registers;
175 static bool sh_handle_option (size_t, const char *, int);
176 static void split_branches (rtx);
177 static int branch_dest (rtx);
178 static void force_into (rtx, rtx);
179 static void print_slot (rtx);
180 static rtx add_constant (rtx, enum machine_mode, rtx);
181 static void dump_table (rtx, rtx);
182 static int hi_const (rtx);
183 static int broken_move (rtx);
184 static int mova_p (rtx);
185 static rtx find_barrier (int, rtx, rtx);
186 static int noncall_uses_reg (rtx, rtx, rtx *);
187 static rtx gen_block_redirect (rtx, int, int);
188 static void sh_reorg (void);
189 static void output_stack_adjust (int, rtx, int, HARD_REG_SET *);
190 static rtx frame_insn (rtx);
191 static rtx push (int);
192 static void pop (int);
193 static void push_regs (HARD_REG_SET *, int);
194 static int calc_live_regs (HARD_REG_SET *);
195 static void mark_use (rtx, rtx *);
196 static HOST_WIDE_INT rounded_frame_size (int);
197 static rtx mark_constant_pool_use (rtx);
198 const struct attribute_spec sh_attribute_table[];
199 static tree sh_handle_interrupt_handler_attribute (tree *, tree, tree, int, bool *);
200 static tree sh_handle_sp_switch_attribute (tree *, tree, tree, int, bool *);
201 static tree sh_handle_trap_exit_attribute (tree *, tree, tree, int, bool *);
202 static tree sh_handle_renesas_attribute (tree *, tree, tree, int, bool *);
203 static void sh_output_function_epilogue (FILE *, HOST_WIDE_INT);
204 static void sh_insert_attributes (tree, tree *);
205 static const char *sh_check_pch_target_flags (int);
206 static int sh_adjust_cost (rtx, rtx, rtx, int);
207 static int sh_issue_rate (void);
208 static int sh_dfa_new_cycle (FILE *, int, rtx, int, int, int *sort_p);
209 static short find_set_regmode_weight (rtx, enum machine_mode);
210 static short find_insn_regmode_weight (rtx, enum machine_mode);
211 static void find_regmode_weight (basic_block, enum machine_mode);
212 static void sh_md_init_global (FILE *, int, int);
213 static void sh_md_finish_global (FILE *, int);
214 static int rank_for_reorder (const void *, const void *);
215 static void swap_reorder (rtx *, int);
216 static void ready_reorder (rtx *, int);
217 static short high_pressure (enum machine_mode);
218 static int sh_reorder (FILE *, int, rtx *, int *, int);
219 static int sh_reorder2 (FILE *, int, rtx *, int *, int);
220 static void sh_md_init (FILE *, int, int);
221 static int sh_variable_issue (FILE *, int, rtx, int);
223 static bool sh_function_ok_for_sibcall (tree, tree);
225 static bool sh_cannot_modify_jumps_p (void);
226 static int sh_target_reg_class (void);
227 static bool sh_optimize_target_register_callee_saved (bool);
228 static bool sh_ms_bitfield_layout_p (tree);
230 static void sh_init_builtins (void);
231 static void sh_media_init_builtins (void);
232 static rtx sh_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
233 static void sh_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT, tree);
234 static void sh_file_start (void);
235 static int flow_dependent_p (rtx, rtx);
236 static void flow_dependent_p_1 (rtx, rtx, void *);
237 static int shiftcosts (rtx);
238 static int andcosts (rtx);
239 static int addsubcosts (rtx);
240 static int multcosts (rtx);
241 static bool unspec_caller_rtx_p (rtx);
242 static bool sh_cannot_copy_insn_p (rtx);
243 static bool sh_rtx_costs (rtx, int, int, int *);
244 static int sh_address_cost (rtx);
245 #ifdef TARGET_ADJUST_UNROLL_MAX
246 static int sh_adjust_unroll_max (struct loop *, int, int, int, int);
247 #endif
248 static int sh_pr_n_sets (void);
249 static rtx sh_allocate_initial_value (rtx);
250 static int shmedia_target_regs_stack_space (HARD_REG_SET *);
251 static int shmedia_reserve_space_for_target_registers_p (int, HARD_REG_SET *);
252 static int shmedia_target_regs_stack_adjust (HARD_REG_SET *);
253 static int scavenge_reg (HARD_REG_SET *s);
254 struct save_schedule_s;
255 static struct save_entry_s *sh5_schedule_saves (HARD_REG_SET *,
256 struct save_schedule_s *, int);
258 static rtx sh_struct_value_rtx (tree, int);
259 static bool sh_return_in_memory (tree, tree);
260 static rtx sh_builtin_saveregs (void);
261 static void sh_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode, tree, int *, int);
262 static bool sh_strict_argument_naming (CUMULATIVE_ARGS *);
263 static bool sh_pretend_outgoing_varargs_named (CUMULATIVE_ARGS *);
264 static tree sh_build_builtin_va_list (void);
265 static tree sh_gimplify_va_arg_expr (tree, tree, tree *, tree *);
266 static bool sh_pass_by_reference (CUMULATIVE_ARGS *, enum machine_mode,
267 tree, bool);
268 static bool sh_callee_copies (CUMULATIVE_ARGS *, enum machine_mode,
269 tree, bool);
270 static int sh_arg_partial_bytes (CUMULATIVE_ARGS *, enum machine_mode,
271 tree, bool);
272 static int sh_dwarf_calling_convention (tree);
273 static int hard_regs_intersect_p (HARD_REG_SET *, HARD_REG_SET *);
276 /* Initialize the GCC target structure. */
277 #undef TARGET_ATTRIBUTE_TABLE
278 #define TARGET_ATTRIBUTE_TABLE sh_attribute_table
280 /* The next two are used for debug info when compiling with -gdwarf. */
281 #undef TARGET_ASM_UNALIGNED_HI_OP
282 #define TARGET_ASM_UNALIGNED_HI_OP "\t.uaword\t"
283 #undef TARGET_ASM_UNALIGNED_SI_OP
284 #define TARGET_ASM_UNALIGNED_SI_OP "\t.ualong\t"
286 /* These are NULLed out on non-SH5 in OVERRIDE_OPTIONS. */
287 #undef TARGET_ASM_UNALIGNED_DI_OP
288 #define TARGET_ASM_UNALIGNED_DI_OP "\t.uaquad\t"
289 #undef TARGET_ASM_ALIGNED_DI_OP
290 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
292 #undef TARGET_ASM_FUNCTION_EPILOGUE
293 #define TARGET_ASM_FUNCTION_EPILOGUE sh_output_function_epilogue
295 #undef TARGET_ASM_OUTPUT_MI_THUNK
296 #define TARGET_ASM_OUTPUT_MI_THUNK sh_output_mi_thunk
298 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
299 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_tree_hwi_hwi_tree_true
301 #undef TARGET_ASM_FILE_START
302 #define TARGET_ASM_FILE_START sh_file_start
303 #undef TARGET_ASM_FILE_START_FILE_DIRECTIVE
304 #define TARGET_ASM_FILE_START_FILE_DIRECTIVE true
306 #undef TARGET_DEFAULT_TARGET_FLAGS
307 #define TARGET_DEFAULT_TARGET_FLAGS TARGET_DEFAULT
308 #undef TARGET_HANDLE_OPTION
309 #define TARGET_HANDLE_OPTION sh_handle_option
311 #undef TARGET_INSERT_ATTRIBUTES
312 #define TARGET_INSERT_ATTRIBUTES sh_insert_attributes
314 #undef TARGET_SCHED_ADJUST_COST
315 #define TARGET_SCHED_ADJUST_COST sh_adjust_cost
317 #undef TARGET_SCHED_ISSUE_RATE
318 #define TARGET_SCHED_ISSUE_RATE sh_issue_rate
320 /* The next 5 hooks have been implemented for reenabling sched1. With the
321 help of these macros we are limiting the movement of insns in sched1 to
322 reduce the register pressure. The overall idea is to keep count of SImode
323 and SFmode regs required by already scheduled insns. When these counts
324 cross some threshold values; give priority to insns that free registers.
325 The insn that frees registers is most likely to be the insn with lowest
326 LUID (original insn order); but such an insn might be there in the stalled
327 queue (Q) instead of the ready queue (R). To solve this, we skip cycles
328 upto a max of 8 cycles so that such insns may move from Q -> R.
330 The description of the hooks are as below:
332 TARGET_SCHED_INIT_GLOBAL: Added a new target hook in the generic
333 scheduler; it is called inside the sched_init function just after
334 find_insn_reg_weights function call. It is used to calculate the SImode
335 and SFmode weights of insns of basic blocks; much similar to what
336 find_insn_reg_weights does.
337 TARGET_SCHED_FINISH_GLOBAL: Corresponding cleanup hook.
339 TARGET_SCHED_DFA_NEW_CYCLE: Skip cycles if high register pressure is
340 indicated by TARGET_SCHED_REORDER2; doing this may move insns from
341 (Q)->(R).
343 TARGET_SCHED_REORDER: If the register pressure for SImode or SFmode is
344 high; reorder the ready queue so that the insn with lowest LUID will be
345 issued next.
347 TARGET_SCHED_REORDER2: If the register pressure is high, indicate to
348 TARGET_SCHED_DFA_NEW_CYCLE to skip cycles.
350 TARGET_SCHED_VARIABLE_ISSUE: Cache the value of can_issue_more so that it
351 can be returned from TARGET_SCHED_REORDER2.
353 TARGET_SCHED_INIT: Reset the register pressure counting variables. */
355 #undef TARGET_SCHED_DFA_NEW_CYCLE
356 #define TARGET_SCHED_DFA_NEW_CYCLE sh_dfa_new_cycle
358 #undef TARGET_SCHED_INIT_GLOBAL
359 #define TARGET_SCHED_INIT_GLOBAL sh_md_init_global
361 #undef TARGET_SCHED_FINISH_GLOBAL
362 #define TARGET_SCHED_FINISH_GLOBAL sh_md_finish_global
364 #undef TARGET_SCHED_VARIABLE_ISSUE
365 #define TARGET_SCHED_VARIABLE_ISSUE sh_variable_issue
367 #undef TARGET_SCHED_REORDER
368 #define TARGET_SCHED_REORDER sh_reorder
370 #undef TARGET_SCHED_REORDER2
371 #define TARGET_SCHED_REORDER2 sh_reorder2
373 #undef TARGET_SCHED_INIT
374 #define TARGET_SCHED_INIT sh_md_init
376 #undef TARGET_CANNOT_MODIFY_JUMPS_P
377 #define TARGET_CANNOT_MODIFY_JUMPS_P sh_cannot_modify_jumps_p
378 #undef TARGET_BRANCH_TARGET_REGISTER_CLASS
379 #define TARGET_BRANCH_TARGET_REGISTER_CLASS sh_target_reg_class
380 #undef TARGET_BRANCH_TARGET_REGISTER_CALLEE_SAVED
381 #define TARGET_BRANCH_TARGET_REGISTER_CALLEE_SAVED \
382 sh_optimize_target_register_callee_saved
384 #undef TARGET_MS_BITFIELD_LAYOUT_P
385 #define TARGET_MS_BITFIELD_LAYOUT_P sh_ms_bitfield_layout_p
387 #undef TARGET_INIT_BUILTINS
388 #define TARGET_INIT_BUILTINS sh_init_builtins
389 #undef TARGET_EXPAND_BUILTIN
390 #define TARGET_EXPAND_BUILTIN sh_expand_builtin
392 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
393 #define TARGET_FUNCTION_OK_FOR_SIBCALL sh_function_ok_for_sibcall
395 #undef TARGET_CANNOT_COPY_INSN_P
396 #define TARGET_CANNOT_COPY_INSN_P sh_cannot_copy_insn_p
397 #undef TARGET_RTX_COSTS
398 #define TARGET_RTX_COSTS sh_rtx_costs
399 #undef TARGET_ADDRESS_COST
400 #define TARGET_ADDRESS_COST sh_address_cost
401 #undef TARGET_ALLOCATE_INITIAL_VALUE
402 #define TARGET_ALLOCATE_INITIAL_VALUE sh_allocate_initial_value
404 #undef TARGET_MACHINE_DEPENDENT_REORG
405 #define TARGET_MACHINE_DEPENDENT_REORG sh_reorg
407 #ifdef HAVE_AS_TLS
408 #undef TARGET_HAVE_TLS
409 #define TARGET_HAVE_TLS true
410 #endif
412 #undef TARGET_PROMOTE_PROTOTYPES
413 #define TARGET_PROMOTE_PROTOTYPES sh_promote_prototypes
414 #undef TARGET_PROMOTE_FUNCTION_ARGS
415 #define TARGET_PROMOTE_FUNCTION_ARGS sh_promote_prototypes
416 #undef TARGET_PROMOTE_FUNCTION_RETURN
417 #define TARGET_PROMOTE_FUNCTION_RETURN sh_promote_prototypes
419 #undef TARGET_STRUCT_VALUE_RTX
420 #define TARGET_STRUCT_VALUE_RTX sh_struct_value_rtx
421 #undef TARGET_RETURN_IN_MEMORY
422 #define TARGET_RETURN_IN_MEMORY sh_return_in_memory
424 #undef TARGET_EXPAND_BUILTIN_SAVEREGS
425 #define TARGET_EXPAND_BUILTIN_SAVEREGS sh_builtin_saveregs
426 #undef TARGET_SETUP_INCOMING_VARARGS
427 #define TARGET_SETUP_INCOMING_VARARGS sh_setup_incoming_varargs
428 #undef TARGET_STRICT_ARGUMENT_NAMING
429 #define TARGET_STRICT_ARGUMENT_NAMING sh_strict_argument_naming
430 #undef TARGET_PRETEND_OUTGOING_VARARGS_NAMED
431 #define TARGET_PRETEND_OUTGOING_VARARGS_NAMED sh_pretend_outgoing_varargs_named
432 #undef TARGET_MUST_PASS_IN_STACK
433 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
434 #undef TARGET_PASS_BY_REFERENCE
435 #define TARGET_PASS_BY_REFERENCE sh_pass_by_reference
436 #undef TARGET_CALLEE_COPIES
437 #define TARGET_CALLEE_COPIES sh_callee_copies
438 #undef TARGET_ARG_PARTIAL_BYTES
439 #define TARGET_ARG_PARTIAL_BYTES sh_arg_partial_bytes
441 #undef TARGET_BUILD_BUILTIN_VA_LIST
442 #define TARGET_BUILD_BUILTIN_VA_LIST sh_build_builtin_va_list
443 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
444 #define TARGET_GIMPLIFY_VA_ARG_EXPR sh_gimplify_va_arg_expr
446 #undef TARGET_VECTOR_MODE_SUPPORTED_P
447 #define TARGET_VECTOR_MODE_SUPPORTED_P sh_vector_mode_supported_p
449 #undef TARGET_CHECK_PCH_TARGET_FLAGS
450 #define TARGET_CHECK_PCH_TARGET_FLAGS sh_check_pch_target_flags
452 #undef TARGET_DWARF_CALLING_CONVENTION
453 #define TARGET_DWARF_CALLING_CONVENTION sh_dwarf_calling_convention
455 /* Return regmode weight for insn. */
456 #define INSN_REGMODE_WEIGHT(INSN, MODE) regmode_weight[((MODE) == SImode) ? 0 : 1][INSN_UID (INSN)]
458 /* Return current register pressure for regmode. */
459 #define CURR_REGMODE_PRESSURE(MODE) curr_regmode_pressure[((MODE) == SImode) ? 0 : 1]
461 #ifdef SYMBIAN
463 #undef TARGET_ENCODE_SECTION_INFO
464 #define TARGET_ENCODE_SECTION_INFO sh_symbian_encode_section_info
465 #undef TARGET_STRIP_NAME_ENCODING
466 #define TARGET_STRIP_NAME_ENCODING sh_symbian_strip_name_encoding
467 #undef TARGET_CXX_IMPORT_EXPORT_CLASS
468 #define TARGET_CXX_IMPORT_EXPORT_CLASS symbian_import_export_class
470 #endif /* SYMBIAN */
472 #ifdef TARGET_ADJUST_UNROLL_MAX
473 #undef TARGET_ADJUST_UNROLL_MAX
474 #define TARGET_ADJUST_UNROLL_MAX sh_adjust_unroll_max
475 #endif
477 #undef TARGET_SECONDARY_RELOAD
478 #define TARGET_SECONDARY_RELOAD sh_secondary_reload
480 struct gcc_target targetm = TARGET_INITIALIZER;
482 /* Implement TARGET_HANDLE_OPTION. */
484 static bool
485 sh_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED,
486 int value ATTRIBUTE_UNUSED)
488 switch (code)
490 case OPT_m1:
491 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH1;
492 return true;
494 case OPT_m2:
495 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2;
496 return true;
498 case OPT_m2a:
499 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2A;
500 return true;
502 case OPT_m2a_nofpu:
503 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2A_NOFPU;
504 return true;
506 case OPT_m2a_single:
507 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2A_SINGLE;
508 return true;
510 case OPT_m2a_single_only:
511 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2A_SINGLE_ONLY;
512 return true;
514 case OPT_m2e:
515 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH2E;
516 return true;
518 case OPT_m3:
519 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH3;
520 return true;
522 case OPT_m3e:
523 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH3E;
524 return true;
526 case OPT_m4:
527 case OPT_m4_100:
528 case OPT_m4_200:
529 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4;
530 return true;
532 case OPT_m4_nofpu:
533 case OPT_m4_400:
534 case OPT_m4_500:
535 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4_NOFPU;
536 return true;
538 case OPT_m4_single:
539 case OPT_m4_100_single:
540 case OPT_m4_200_single:
541 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4_SINGLE;
542 return true;
544 case OPT_m4_single_only:
545 case OPT_m4_100_single_only:
546 case OPT_m4_200_single_only:
547 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4_SINGLE_ONLY;
548 return true;
550 case OPT_m4a:
551 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4A;
552 return true;
554 case OPT_m4a_nofpu:
555 case OPT_m4al:
556 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4A_NOFPU;
557 return true;
559 case OPT_m4a_single:
560 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4A_SINGLE;
561 return true;
563 case OPT_m4a_single_only:
564 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH4A_SINGLE_ONLY;
565 return true;
567 case OPT_m5_32media:
568 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_32MEDIA;
569 return true;
571 case OPT_m5_32media_nofpu:
572 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_32MEDIA_NOFPU;
573 return true;
575 case OPT_m5_64media:
576 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_64MEDIA;
577 return true;
579 case OPT_m5_64media_nofpu:
580 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_64MEDIA_NOFPU;
581 return true;
583 case OPT_m5_compact:
584 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_COMPACT;
585 return true;
587 case OPT_m5_compact_nofpu:
588 target_flags = (target_flags & ~MASK_ARCH) | SELECT_SH5_COMPACT_NOFPU;
589 return true;
591 default:
592 return true;
596 /* Print the operand address in x to the stream. */
598 void
599 print_operand_address (FILE *stream, rtx x)
601 switch (GET_CODE (x))
603 case REG:
604 case SUBREG:
605 fprintf (stream, "@%s", reg_names[true_regnum (x)]);
606 break;
608 case PLUS:
610 rtx base = XEXP (x, 0);
611 rtx index = XEXP (x, 1);
613 switch (GET_CODE (index))
615 case CONST_INT:
616 fprintf (stream, "@(%d,%s)", (int) INTVAL (index),
617 reg_names[true_regnum (base)]);
618 break;
620 case REG:
621 case SUBREG:
623 int base_num = true_regnum (base);
624 int index_num = true_regnum (index);
626 fprintf (stream, "@(r0,%s)",
627 reg_names[MAX (base_num, index_num)]);
628 break;
631 default:
632 gcc_unreachable ();
635 break;
637 case PRE_DEC:
638 fprintf (stream, "@-%s", reg_names[true_regnum (XEXP (x, 0))]);
639 break;
641 case POST_INC:
642 fprintf (stream, "@%s+", reg_names[true_regnum (XEXP (x, 0))]);
643 break;
645 default:
646 x = mark_constant_pool_use (x);
647 output_addr_const (stream, x);
648 break;
652 /* Print operand x (an rtx) in assembler syntax to file stream
653 according to modifier code.
655 '.' print a .s if insn needs delay slot
656 ',' print LOCAL_LABEL_PREFIX
657 '@' print trap, rte or rts depending upon pragma interruptness
658 '#' output a nop if there is nothing to put in the delay slot
659 ''' print likelihood suffix (/u for unlikely).
660 '>' print branch target if -fverbose-asm
661 'O' print a constant without the #
662 'R' print the LSW of a dp value - changes if in little endian
663 'S' print the MSW of a dp value - changes if in little endian
664 'T' print the next word of a dp value - same as 'R' in big endian mode.
665 'M' print an `x' if `m' will print `base,index'.
666 'N' print 'r63' if the operand is (const_int 0).
667 'd' print a V2SF reg as dN instead of fpN.
668 'm' print a pair `base,offset' or `base,index', for LD and ST.
669 'U' Likewise for {LD,ST}{HI,LO}.
670 'u' prints the lowest 16 bits of CONST_INT, as an unsigned value.
671 'o' output an operator. */
673 void
674 print_operand (FILE *stream, rtx x, int code)
676 int regno;
677 enum machine_mode mode;
679 switch (code)
681 tree trapa_attr;
683 case '.':
684 if (final_sequence
685 && ! INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))
686 && get_attr_length (XVECEXP (final_sequence, 0, 1)))
687 fprintf (stream, ASSEMBLER_DIALECT ? "/s" : ".s");
688 break;
689 case ',':
690 fprintf (stream, "%s", LOCAL_LABEL_PREFIX);
691 break;
692 case '@':
693 trapa_attr = lookup_attribute ("trap_exit",
694 DECL_ATTRIBUTES (current_function_decl));
695 if (trapa_attr)
696 fprintf (stream, "trapa #%ld",
697 (long) TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (trapa_attr))));
698 else if (sh_cfun_interrupt_handler_p ())
699 fprintf (stream, "rte");
700 else
701 fprintf (stream, "rts");
702 break;
703 case '#':
704 /* Output a nop if there's nothing in the delay slot. */
705 if (dbr_sequence_length () == 0)
706 fprintf (stream, "\n\tnop");
707 break;
708 case '\'':
710 rtx note = find_reg_note (current_output_insn, REG_BR_PROB, 0);
712 if (note && INTVAL (XEXP (note, 0)) * 2 < REG_BR_PROB_BASE)
713 fputs ("/u", stream);
714 break;
716 case '>':
717 if (flag_verbose_asm && JUMP_LABEL (current_output_insn))
719 fputs ("\t! target: ", stream);
720 output_addr_const (stream, JUMP_LABEL (current_output_insn));
722 break;
723 case 'O':
724 x = mark_constant_pool_use (x);
725 output_addr_const (stream, x);
726 break;
727 /* N.B.: %R / %S / %T adjust memory addresses by four.
728 For SHMEDIA, that means they can be used to access the first and
729 second 32 bit part of a 64 bit (or larger) value that
730 might be held in floating point registers or memory.
731 While they can be used to access 64 bit parts of a larger value
732 held in general purpose registers, that won't work with memory -
733 neither for fp registers, since the frxx names are used. */
734 case 'R':
735 if (REG_P (x) || GET_CODE (x) == SUBREG)
737 regno = true_regnum (x);
738 regno += FP_REGISTER_P (regno) ? 1 : LSW;
739 fputs (reg_names[regno], (stream));
741 else if (MEM_P (x))
743 x = adjust_address (x, SImode, 4 * LSW);
744 print_operand_address (stream, XEXP (x, 0));
746 else
748 rtx sub = NULL_RTX;
750 mode = GET_MODE (x);
751 if (mode == VOIDmode)
752 mode = DImode;
753 if (GET_MODE_SIZE (mode) >= 8)
754 sub = simplify_subreg (SImode, x, mode, 4 * LSW);
755 if (sub)
756 print_operand (stream, sub, 0);
757 else
758 output_operand_lossage ("invalid operand to %%R");
760 break;
761 case 'S':
762 if (REG_P (x) || GET_CODE (x) == SUBREG)
764 regno = true_regnum (x);
765 regno += FP_REGISTER_P (regno) ? 0 : MSW;
766 fputs (reg_names[regno], (stream));
768 else if (MEM_P (x))
770 x = adjust_address (x, SImode, 4 * MSW);
771 print_operand_address (stream, XEXP (x, 0));
773 else
775 rtx sub = NULL_RTX;
777 mode = GET_MODE (x);
778 if (mode == VOIDmode)
779 mode = DImode;
780 if (GET_MODE_SIZE (mode) >= 8)
781 sub = simplify_subreg (SImode, x, mode, 4 * MSW);
782 if (sub)
783 print_operand (stream, sub, 0);
784 else
785 output_operand_lossage ("invalid operand to %%S");
787 break;
788 case 'T':
789 /* Next word of a double. */
790 switch (GET_CODE (x))
792 case REG:
793 fputs (reg_names[REGNO (x) + 1], (stream));
794 break;
795 case MEM:
796 if (GET_CODE (XEXP (x, 0)) != PRE_DEC
797 && GET_CODE (XEXP (x, 0)) != POST_INC)
798 x = adjust_address (x, SImode, 4);
799 print_operand_address (stream, XEXP (x, 0));
800 break;
801 default:
802 break;
804 break;
805 case 'o':
806 switch (GET_CODE (x))
808 case PLUS: fputs ("add", stream); break;
809 case MINUS: fputs ("sub", stream); break;
810 case MULT: fputs ("mul", stream); break;
811 case DIV: fputs ("div", stream); break;
812 case EQ: fputs ("eq", stream); break;
813 case NE: fputs ("ne", stream); break;
814 case GT: case LT: fputs ("gt", stream); break;
815 case GE: case LE: fputs ("ge", stream); break;
816 case GTU: case LTU: fputs ("gtu", stream); break;
817 case GEU: case LEU: fputs ("geu", stream); break;
818 default:
819 break;
821 break;
822 case 'M':
823 if (GET_CODE (x) == MEM
824 && GET_CODE (XEXP (x, 0)) == PLUS
825 && (GET_CODE (XEXP (XEXP (x, 0), 1)) == REG
826 || GET_CODE (XEXP (XEXP (x, 0), 1)) == SUBREG))
827 fputc ('x', stream);
828 break;
830 case 'm':
831 gcc_assert (GET_CODE (x) == MEM);
832 x = XEXP (x, 0);
833 /* Fall through. */
834 case 'U':
835 switch (GET_CODE (x))
837 case REG:
838 case SUBREG:
839 print_operand (stream, x, 0);
840 fputs (", 0", stream);
841 break;
843 case PLUS:
844 print_operand (stream, XEXP (x, 0), 0);
845 fputs (", ", stream);
846 print_operand (stream, XEXP (x, 1), 0);
847 break;
849 default:
850 gcc_unreachable ();
852 break;
854 case 'd':
855 gcc_assert (GET_CODE (x) == REG && GET_MODE (x) == V2SFmode);
857 fprintf ((stream), "d%s", reg_names[REGNO (x)] + 1);
858 break;
860 case 'N':
861 if (x == CONST0_RTX (GET_MODE (x)))
863 fprintf ((stream), "r63");
864 break;
866 goto default_output;
867 case 'u':
868 if (GET_CODE (x) == CONST_INT)
870 fprintf ((stream), "%u", (unsigned) INTVAL (x) & (0x10000 - 1));
871 break;
873 /* Fall through. */
875 default_output:
876 default:
877 regno = 0;
878 mode = GET_MODE (x);
880 switch (GET_CODE (x))
882 case TRUNCATE:
884 rtx inner = XEXP (x, 0);
885 int offset = 0;
886 enum machine_mode inner_mode;
888 /* We might see SUBREGs with vector mode registers inside. */
889 if (GET_CODE (inner) == SUBREG
890 && (GET_MODE_SIZE (GET_MODE (inner))
891 == GET_MODE_SIZE (GET_MODE (SUBREG_REG (inner))))
892 && subreg_lowpart_p (inner))
893 inner = SUBREG_REG (inner);
894 if (GET_CODE (inner) == CONST_INT)
896 x = GEN_INT (trunc_int_for_mode (INTVAL (inner), GET_MODE (x)));
897 goto default_output;
899 inner_mode = GET_MODE (inner);
900 if (GET_CODE (inner) == SUBREG
901 && (GET_MODE_SIZE (GET_MODE (inner))
902 < GET_MODE_SIZE (GET_MODE (SUBREG_REG (inner))))
903 && GET_CODE (SUBREG_REG (inner)) == REG)
905 offset = subreg_regno_offset (REGNO (SUBREG_REG (inner)),
906 GET_MODE (SUBREG_REG (inner)),
907 SUBREG_BYTE (inner),
908 GET_MODE (inner));
909 inner = SUBREG_REG (inner);
911 if (GET_CODE (inner) != REG || GET_MODE_SIZE (inner_mode) > 8)
912 abort ();
913 /* Floating point register pairs are always big endian;
914 general purpose registers are 64 bit wide. */
915 regno = REGNO (inner);
916 regno = (HARD_REGNO_NREGS (regno, inner_mode)
917 - HARD_REGNO_NREGS (regno, mode))
918 + offset;
919 x = inner;
920 goto reg;
922 case SIGN_EXTEND:
923 x = XEXP (x, 0);
924 goto reg;
925 /* FIXME: We need this on SHmedia32 because reload generates
926 some sign-extended HI or QI loads into DImode registers
927 but, because Pmode is SImode, the address ends up with a
928 subreg:SI of the DImode register. Maybe reload should be
929 fixed so as to apply alter_subreg to such loads? */
930 case IF_THEN_ELSE:
931 gcc_assert (trapping_target_operand (x, VOIDmode));
932 x = XEXP (XEXP (x, 2), 0);
933 goto default_output;
934 case SUBREG:
935 gcc_assert (SUBREG_BYTE (x) == 0
936 && GET_CODE (SUBREG_REG (x)) == REG);
938 x = SUBREG_REG (x);
939 /* Fall through. */
941 reg:
942 case REG:
943 regno += REGNO (x);
944 if (FP_REGISTER_P (regno)
945 && mode == V16SFmode)
946 fprintf ((stream), "mtrx%s", reg_names[regno] + 2);
947 else if (FP_REGISTER_P (REGNO (x))
948 && mode == V4SFmode)
949 fprintf ((stream), "fv%s", reg_names[regno] + 2);
950 else if (GET_CODE (x) == REG
951 && mode == V2SFmode)
952 fprintf ((stream), "fp%s", reg_names[regno] + 2);
953 else if (FP_REGISTER_P (REGNO (x))
954 && GET_MODE_SIZE (mode) > 4)
955 fprintf ((stream), "d%s", reg_names[regno] + 1);
956 else
957 fputs (reg_names[regno], (stream));
958 break;
960 case MEM:
961 output_address (XEXP (x, 0));
962 break;
964 case CONST:
965 if (TARGET_SHMEDIA
966 && (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
967 || GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
968 && (GET_MODE (XEXP (x, 0)) == DImode
969 || GET_MODE (XEXP (x, 0)) == SImode)
970 && GET_CODE (XEXP (XEXP (x, 0), 0)) == TRUNCATE
971 && GET_MODE (XEXP (XEXP (x, 0), 0)) == HImode)
973 rtx val = XEXP (XEXP (XEXP (x, 0), 0), 0);
974 rtx val2 = val;
975 bool nested_expr = false;
977 fputc ('(', stream);
978 if (GET_CODE (val) == ASHIFTRT)
980 fputc ('(', stream);
981 val2 = XEXP (val, 0);
983 if (GET_CODE (val2) == CONST
984 || GET_RTX_CLASS (GET_CODE (val2)) != RTX_OBJ)
986 fputc ('(', stream);
987 nested_expr = true;
989 output_addr_const (stream, val2);
990 if (nested_expr)
991 fputc (')', stream);
992 if (GET_CODE (val) == ASHIFTRT)
994 fputs (" >> ", stream);
995 output_addr_const (stream, XEXP (val, 1));
996 fputc (')', stream);
998 fputs (" & 65535)", stream);
999 break;
1002 /* Fall through. */
1003 default:
1004 if (TARGET_SH1)
1005 fputc ('#', stream);
1006 output_addr_const (stream, x);
1007 break;
1009 break;
1013 /* Like force_operand, but guarantees that VALUE ends up in TARGET. */
1014 static void
1015 force_into (rtx value, rtx target)
1017 value = force_operand (value, target);
1018 if (! rtx_equal_p (value, target))
1019 emit_insn (gen_move_insn (target, value));
1022 /* Emit code to perform a block move. Choose the best method.
1024 OPERANDS[0] is the destination.
1025 OPERANDS[1] is the source.
1026 OPERANDS[2] is the size.
1027 OPERANDS[3] is the alignment safe to use. */
1030 expand_block_move (rtx *operands)
1032 int align = INTVAL (operands[3]);
1033 int constp = (GET_CODE (operands[2]) == CONST_INT);
1034 int bytes = (constp ? INTVAL (operands[2]) : 0);
1036 if (! constp)
1037 return 0;
1039 /* If we could use mov.l to move words and dest is word-aligned, we
1040 can use movua.l for loads and still generate a relatively short
1041 and efficient sequence. */
1042 if (TARGET_SH4A_ARCH && align < 4
1043 && MEM_ALIGN (operands[0]) >= 32
1044 && can_move_by_pieces (bytes, 32))
1046 rtx dest = copy_rtx (operands[0]);
1047 rtx src = copy_rtx (operands[1]);
1048 /* We could use different pseudos for each copied word, but
1049 since movua can only load into r0, it's kind of
1050 pointless. */
1051 rtx temp = gen_reg_rtx (SImode);
1052 rtx src_addr = copy_addr_to_reg (XEXP (src, 0));
1053 int copied = 0;
1055 while (copied + 4 <= bytes)
1057 rtx to = adjust_address (dest, SImode, copied);
1058 rtx from = adjust_automodify_address (src, SImode, src_addr, copied);
1060 emit_insn (gen_movua (temp, from));
1061 emit_move_insn (src_addr, plus_constant (src_addr, 4));
1062 emit_move_insn (to, temp);
1063 copied += 4;
1066 if (copied < bytes)
1067 move_by_pieces (adjust_address (dest, BLKmode, copied),
1068 adjust_automodify_address (src, BLKmode,
1069 src_addr, copied),
1070 bytes - copied, align, 0);
1072 return 1;
1075 /* If it isn't a constant number of bytes, or if it doesn't have 4 byte
1076 alignment, or if it isn't a multiple of 4 bytes, then fail. */
1077 if (align < 4 || (bytes % 4 != 0))
1078 return 0;
1080 if (TARGET_HARD_SH4)
1082 if (bytes < 12)
1083 return 0;
1084 else if (bytes == 12)
1086 rtx func_addr_rtx = gen_reg_rtx (Pmode);
1087 rtx r4 = gen_rtx_REG (SImode, 4);
1088 rtx r5 = gen_rtx_REG (SImode, 5);
1090 function_symbol (func_addr_rtx, "__movmemSI12_i4", SFUNC_STATIC);
1091 force_into (XEXP (operands[0], 0), r4);
1092 force_into (XEXP (operands[1], 0), r5);
1093 emit_insn (gen_block_move_real_i4 (func_addr_rtx));
1094 return 1;
1096 else if (! TARGET_SMALLCODE)
1098 const char *entry_name;
1099 rtx func_addr_rtx = gen_reg_rtx (Pmode);
1100 int dwords;
1101 rtx r4 = gen_rtx_REG (SImode, 4);
1102 rtx r5 = gen_rtx_REG (SImode, 5);
1103 rtx r6 = gen_rtx_REG (SImode, 6);
1105 entry_name = (bytes & 4 ? "__movmem_i4_odd" : "__movmem_i4_even");
1106 function_symbol (func_addr_rtx, entry_name, SFUNC_STATIC);
1107 force_into (XEXP (operands[0], 0), r4);
1108 force_into (XEXP (operands[1], 0), r5);
1110 dwords = bytes >> 3;
1111 emit_insn (gen_move_insn (r6, GEN_INT (dwords - 1)));
1112 emit_insn (gen_block_lump_real_i4 (func_addr_rtx));
1113 return 1;
1115 else
1116 return 0;
1118 if (bytes < 64)
1120 char entry[30];
1121 rtx func_addr_rtx = gen_reg_rtx (Pmode);
1122 rtx r4 = gen_rtx_REG (SImode, 4);
1123 rtx r5 = gen_rtx_REG (SImode, 5);
1125 sprintf (entry, "__movmemSI%d", bytes);
1126 function_symbol (func_addr_rtx, entry, SFUNC_STATIC);
1127 force_into (XEXP (operands[0], 0), r4);
1128 force_into (XEXP (operands[1], 0), r5);
1129 emit_insn (gen_block_move_real (func_addr_rtx));
1130 return 1;
1133 /* This is the same number of bytes as a memcpy call, but to a different
1134 less common function name, so this will occasionally use more space. */
1135 if (! TARGET_SMALLCODE)
1137 rtx func_addr_rtx = gen_reg_rtx (Pmode);
1138 int final_switch, while_loop;
1139 rtx r4 = gen_rtx_REG (SImode, 4);
1140 rtx r5 = gen_rtx_REG (SImode, 5);
1141 rtx r6 = gen_rtx_REG (SImode, 6);
1143 function_symbol (func_addr_rtx, "__movmem", SFUNC_STATIC);
1144 force_into (XEXP (operands[0], 0), r4);
1145 force_into (XEXP (operands[1], 0), r5);
1147 /* r6 controls the size of the move. 16 is decremented from it
1148 for each 64 bytes moved. Then the negative bit left over is used
1149 as an index into a list of move instructions. e.g., a 72 byte move
1150 would be set up with size(r6) = 14, for one iteration through the
1151 big while loop, and a switch of -2 for the last part. */
1153 final_switch = 16 - ((bytes / 4) % 16);
1154 while_loop = ((bytes / 4) / 16 - 1) * 16;
1155 emit_insn (gen_move_insn (r6, GEN_INT (while_loop + final_switch)));
1156 emit_insn (gen_block_lump_real (func_addr_rtx));
1157 return 1;
1160 return 0;
1163 /* Prepare operands for a move define_expand; specifically, one of the
1164 operands must be in a register. */
1167 prepare_move_operands (rtx operands[], enum machine_mode mode)
1169 if ((mode == SImode || mode == DImode)
1170 && flag_pic
1171 && ! ((mode == Pmode || mode == ptr_mode)
1172 && tls_symbolic_operand (operands[1], Pmode) != 0))
1174 rtx temp;
1175 if (SYMBOLIC_CONST_P (operands[1]))
1177 if (GET_CODE (operands[0]) == MEM)
1178 operands[1] = force_reg (Pmode, operands[1]);
1179 else if (TARGET_SHMEDIA
1180 && GET_CODE (operands[1]) == LABEL_REF
1181 && target_reg_operand (operands[0], mode))
1182 /* It's ok. */;
1183 else
1185 temp = no_new_pseudos ? operands[0] : gen_reg_rtx (Pmode);
1186 operands[1] = legitimize_pic_address (operands[1], mode, temp);
1189 else if (GET_CODE (operands[1]) == CONST
1190 && GET_CODE (XEXP (operands[1], 0)) == PLUS
1191 && SYMBOLIC_CONST_P (XEXP (XEXP (operands[1], 0), 0)))
1193 temp = no_new_pseudos ? operands[0] : gen_reg_rtx (Pmode);
1194 temp = legitimize_pic_address (XEXP (XEXP (operands[1], 0), 0),
1195 mode, temp);
1196 operands[1] = expand_binop (mode, add_optab, temp,
1197 XEXP (XEXP (operands[1], 0), 1),
1198 no_new_pseudos ? temp
1199 : gen_reg_rtx (Pmode),
1200 0, OPTAB_LIB_WIDEN);
1204 if (! reload_in_progress && ! reload_completed)
1206 /* Copy the source to a register if both operands aren't registers. */
1207 if (! register_operand (operands[0], mode)
1208 && ! sh_register_operand (operands[1], mode))
1209 operands[1] = copy_to_mode_reg (mode, operands[1]);
1211 if (GET_CODE (operands[0]) == MEM && ! memory_operand (operands[0], mode))
1213 /* This is like change_address_1 (operands[0], mode, 0, 1) ,
1214 except that we can't use that function because it is static. */
1215 rtx new = change_address (operands[0], mode, 0);
1216 MEM_COPY_ATTRIBUTES (new, operands[0]);
1217 operands[0] = new;
1220 /* This case can happen while generating code to move the result
1221 of a library call to the target. Reject `st r0,@(rX,rY)' because
1222 reload will fail to find a spill register for rX, since r0 is already
1223 being used for the source. */
1224 else if (TARGET_SH1
1225 && refers_to_regno_p (R0_REG, R0_REG + 1, operands[1], (rtx *)0)
1226 && GET_CODE (operands[0]) == MEM
1227 && GET_CODE (XEXP (operands[0], 0)) == PLUS
1228 && GET_CODE (XEXP (XEXP (operands[0], 0), 1)) == REG)
1229 operands[1] = copy_to_mode_reg (mode, operands[1]);
1232 if (mode == Pmode || mode == ptr_mode)
1234 rtx op0, op1, opc;
1235 enum tls_model tls_kind;
1237 op0 = operands[0];
1238 op1 = operands[1];
1239 if (GET_CODE (op1) == CONST
1240 && GET_CODE (XEXP (op1, 0)) == PLUS
1241 && tls_symbolic_operand (XEXP (XEXP (op1, 0), 0), Pmode))
1243 opc = XEXP (XEXP (op1, 0), 1);
1244 op1 = XEXP (XEXP (op1, 0), 0);
1246 else
1247 opc = NULL_RTX;
1249 if ((tls_kind = tls_symbolic_operand (op1, Pmode)))
1251 rtx tga_op1, tga_ret, tmp, tmp2;
1253 switch (tls_kind)
1255 case TLS_MODEL_GLOBAL_DYNAMIC:
1256 tga_ret = gen_rtx_REG (Pmode, R0_REG);
1257 emit_call_insn (gen_tls_global_dynamic (tga_ret, op1));
1258 op1 = tga_ret;
1259 break;
1261 case TLS_MODEL_LOCAL_DYNAMIC:
1262 tga_ret = gen_rtx_REG (Pmode, R0_REG);
1263 emit_call_insn (gen_tls_local_dynamic (tga_ret, op1));
1265 tmp = gen_reg_rtx (Pmode);
1266 emit_move_insn (tmp, tga_ret);
1268 if (register_operand (op0, Pmode))
1269 tmp2 = op0;
1270 else
1271 tmp2 = gen_reg_rtx (Pmode);
1273 emit_insn (gen_symDTPOFF2reg (tmp2, op1, tmp));
1274 op1 = tmp2;
1275 break;
1277 case TLS_MODEL_INITIAL_EXEC:
1278 if (! flag_pic)
1280 /* Don't schedule insns for getting GOT address when
1281 the first scheduling is enabled, to avoid spill
1282 failures for R0. */
1283 if (flag_schedule_insns)
1284 emit_insn (gen_blockage ());
1285 emit_insn (gen_GOTaddr2picreg ());
1286 emit_insn (gen_rtx_USE (VOIDmode, gen_rtx_REG (SImode,
1287 PIC_REG)));
1288 if (flag_schedule_insns)
1289 emit_insn (gen_blockage ());
1291 tga_op1 = no_new_pseudos ? op0 : gen_reg_rtx (Pmode);
1292 tmp = gen_sym2GOTTPOFF (op1);
1293 emit_insn (gen_tls_initial_exec (tga_op1, tmp));
1294 op1 = tga_op1;
1295 break;
1297 case TLS_MODEL_LOCAL_EXEC:
1298 tmp2 = gen_reg_rtx (Pmode);
1299 emit_insn (gen_load_gbr (tmp2));
1300 tmp = gen_reg_rtx (Pmode);
1301 emit_insn (gen_symTPOFF2reg (tmp, op1));
1303 if (register_operand (op0, Pmode))
1304 op1 = op0;
1305 else
1306 op1 = gen_reg_rtx (Pmode);
1308 emit_insn (gen_addsi3 (op1, tmp, tmp2));
1309 break;
1311 default:
1312 gcc_unreachable ();
1314 if (opc)
1315 emit_insn (gen_addsi3 (op1, op1, force_reg (SImode, opc)));
1316 operands[1] = op1;
1320 return 0;
1323 /* Prepare the operands for an scc instruction; make sure that the
1324 compare has been done. */
1326 prepare_scc_operands (enum rtx_code code)
1328 rtx t_reg = gen_rtx_REG (SImode, T_REG);
1329 enum rtx_code oldcode = code;
1330 enum machine_mode mode;
1332 /* First need a compare insn. */
1333 switch (code)
1335 case NE:
1336 /* It isn't possible to handle this case. */
1337 gcc_unreachable ();
1338 case LT:
1339 code = GT;
1340 break;
1341 case LE:
1342 code = GE;
1343 break;
1344 case LTU:
1345 code = GTU;
1346 break;
1347 case LEU:
1348 code = GEU;
1349 break;
1350 default:
1351 break;
1353 if (code != oldcode)
1355 rtx tmp = sh_compare_op0;
1356 sh_compare_op0 = sh_compare_op1;
1357 sh_compare_op1 = tmp;
1360 mode = GET_MODE (sh_compare_op0);
1361 if (mode == VOIDmode)
1362 mode = GET_MODE (sh_compare_op1);
1364 sh_compare_op0 = force_reg (mode, sh_compare_op0);
1365 if ((code != EQ && code != NE
1366 && (sh_compare_op1 != const0_rtx
1367 || code == GTU || code == GEU || code == LTU || code == LEU))
1368 || (mode == DImode && sh_compare_op1 != const0_rtx)
1369 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
1370 sh_compare_op1 = force_reg (mode, sh_compare_op1);
1372 if ((TARGET_SH4 || TARGET_SH2A) && GET_MODE_CLASS (mode) == MODE_FLOAT)
1373 (mode == SFmode ? emit_sf_insn : emit_df_insn)
1374 (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2,
1375 gen_rtx_SET (VOIDmode, t_reg,
1376 gen_rtx_fmt_ee (code, SImode,
1377 sh_compare_op0, sh_compare_op1)),
1378 gen_rtx_USE (VOIDmode, get_fpscr_rtx ()))));
1379 else
1380 emit_insn (gen_rtx_SET (VOIDmode, t_reg,
1381 gen_rtx_fmt_ee (code, SImode,
1382 sh_compare_op0, sh_compare_op1)));
1384 return t_reg;
1387 /* Called from the md file, set up the operands of a compare instruction. */
1389 void
1390 from_compare (rtx *operands, int code)
1392 enum machine_mode mode = GET_MODE (sh_compare_op0);
1393 rtx insn;
1394 if (mode == VOIDmode)
1395 mode = GET_MODE (sh_compare_op1);
1396 if (code != EQ
1397 || mode == DImode
1398 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
1400 /* Force args into regs, since we can't use constants here. */
1401 sh_compare_op0 = force_reg (mode, sh_compare_op0);
1402 if (sh_compare_op1 != const0_rtx
1403 || code == GTU || code == GEU
1404 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
1405 sh_compare_op1 = force_reg (mode, sh_compare_op1);
1407 if (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT && code == GE)
1409 from_compare (operands, GT);
1410 insn = gen_ieee_ccmpeqsf_t (sh_compare_op0, sh_compare_op1);
1412 else
1413 insn = gen_rtx_SET (VOIDmode,
1414 gen_rtx_REG (SImode, T_REG),
1415 gen_rtx_fmt_ee (code, SImode,
1416 sh_compare_op0, sh_compare_op1));
1417 if ((TARGET_SH4 || TARGET_SH2A) && GET_MODE_CLASS (mode) == MODE_FLOAT)
1419 insn = gen_rtx_PARALLEL (VOIDmode,
1420 gen_rtvec (2, insn,
1421 gen_rtx_USE (VOIDmode, get_fpscr_rtx ())));
1422 (mode == SFmode ? emit_sf_insn : emit_df_insn) (insn);
1424 else
1425 emit_insn (insn);
1428 /* Functions to output assembly code. */
1430 /* Return a sequence of instructions to perform DI or DF move.
1432 Since the SH cannot move a DI or DF in one instruction, we have
1433 to take care when we see overlapping source and dest registers. */
1435 const char *
1436 output_movedouble (rtx insn ATTRIBUTE_UNUSED, rtx operands[],
1437 enum machine_mode mode)
1439 rtx dst = operands[0];
1440 rtx src = operands[1];
1442 if (GET_CODE (dst) == MEM
1443 && GET_CODE (XEXP (dst, 0)) == PRE_DEC)
1444 return "mov.l %T1,%0\n\tmov.l %1,%0";
1446 if (register_operand (dst, mode)
1447 && register_operand (src, mode))
1449 if (REGNO (src) == MACH_REG)
1450 return "sts mach,%S0\n\tsts macl,%R0";
1452 /* When mov.d r1,r2 do r2->r3 then r1->r2;
1453 when mov.d r1,r0 do r1->r0 then r2->r1. */
1455 if (REGNO (src) + 1 == REGNO (dst))
1456 return "mov %T1,%T0\n\tmov %1,%0";
1457 else
1458 return "mov %1,%0\n\tmov %T1,%T0";
1460 else if (GET_CODE (src) == CONST_INT)
1462 if (INTVAL (src) < 0)
1463 output_asm_insn ("mov #-1,%S0", operands);
1464 else
1465 output_asm_insn ("mov #0,%S0", operands);
1467 return "mov %1,%R0";
1469 else if (GET_CODE (src) == MEM)
1471 int ptrreg = -1;
1472 int dreg = REGNO (dst);
1473 rtx inside = XEXP (src, 0);
1475 switch (GET_CODE (inside))
1477 case REG:
1478 ptrreg = REGNO (inside);
1479 break;
1481 case SUBREG:
1482 ptrreg = subreg_regno (inside);
1483 break;
1485 case PLUS:
1486 ptrreg = REGNO (XEXP (inside, 0));
1487 /* ??? A r0+REG address shouldn't be possible here, because it isn't
1488 an offsettable address. Unfortunately, offsettable addresses use
1489 QImode to check the offset, and a QImode offsettable address
1490 requires r0 for the other operand, which is not currently
1491 supported, so we can't use the 'o' constraint.
1492 Thus we must check for and handle r0+REG addresses here.
1493 We punt for now, since this is likely very rare. */
1494 gcc_assert (GET_CODE (XEXP (inside, 1)) != REG);
1495 break;
1497 case LABEL_REF:
1498 return "mov.l %1,%0\n\tmov.l %1+4,%T0";
1499 case POST_INC:
1500 return "mov.l %1,%0\n\tmov.l %1,%T0";
1501 default:
1502 gcc_unreachable ();
1505 /* Work out the safe way to copy. Copy into the second half first. */
1506 if (dreg == ptrreg)
1507 return "mov.l %T1,%T0\n\tmov.l %1,%0";
1510 return "mov.l %1,%0\n\tmov.l %T1,%T0";
1513 /* Print an instruction which would have gone into a delay slot after
1514 another instruction, but couldn't because the other instruction expanded
1515 into a sequence where putting the slot insn at the end wouldn't work. */
1517 static void
1518 print_slot (rtx insn)
1520 final_scan_insn (XVECEXP (insn, 0, 1), asm_out_file, optimize, 1, NULL);
1522 INSN_DELETED_P (XVECEXP (insn, 0, 1)) = 1;
1525 const char *
1526 output_far_jump (rtx insn, rtx op)
1528 struct { rtx lab, reg, op; } this;
1529 rtx braf_base_lab = NULL_RTX;
1530 const char *jump;
1531 int far;
1532 int offset = branch_dest (insn) - INSN_ADDRESSES (INSN_UID (insn));
1533 rtx prev;
1535 this.lab = gen_label_rtx ();
1537 if (TARGET_SH2
1538 && offset >= -32764
1539 && offset - get_attr_length (insn) <= 32766)
1541 far = 0;
1542 jump = "mov.w %O0,%1; braf %1";
1544 else
1546 far = 1;
1547 if (flag_pic)
1549 if (TARGET_SH2)
1550 jump = "mov.l %O0,%1; braf %1";
1551 else
1552 jump = "mov.l r0,@-r15; mova %O0,r0; mov.l @r0,%1; add r0,%1; mov.l @r15+,r0; jmp @%1";
1554 else
1555 jump = "mov.l %O0,%1; jmp @%1";
1557 /* If we have a scratch register available, use it. */
1558 if (GET_CODE ((prev = prev_nonnote_insn (insn))) == INSN
1559 && INSN_CODE (prev) == CODE_FOR_indirect_jump_scratch)
1561 this.reg = SET_DEST (XVECEXP (PATTERN (prev), 0, 0));
1562 if (REGNO (this.reg) == R0_REG && flag_pic && ! TARGET_SH2)
1563 jump = "mov.l r1,@-r15; mova %O0,r0; mov.l @r0,r1; add r1,r0; mov.l @r15+,r1; jmp @%1";
1564 output_asm_insn (jump, &this.lab);
1565 if (dbr_sequence_length ())
1566 print_slot (final_sequence);
1567 else
1568 output_asm_insn ("nop", 0);
1570 else
1572 /* Output the delay slot insn first if any. */
1573 if (dbr_sequence_length ())
1574 print_slot (final_sequence);
1576 this.reg = gen_rtx_REG (SImode, 13);
1577 /* We must keep the stack aligned to 8-byte boundaries on SH5.
1578 Fortunately, MACL is fixed and call-clobbered, and we never
1579 need its value across jumps, so save r13 in it instead of in
1580 the stack. */
1581 if (TARGET_SH5)
1582 output_asm_insn ("lds r13, macl", 0);
1583 else
1584 output_asm_insn ("mov.l r13,@-r15", 0);
1585 output_asm_insn (jump, &this.lab);
1586 if (TARGET_SH5)
1587 output_asm_insn ("sts macl, r13", 0);
1588 else
1589 output_asm_insn ("mov.l @r15+,r13", 0);
1591 if (far && flag_pic && TARGET_SH2)
1593 braf_base_lab = gen_label_rtx ();
1594 (*targetm.asm_out.internal_label) (asm_out_file, "L",
1595 CODE_LABEL_NUMBER (braf_base_lab));
1597 if (far)
1598 output_asm_insn (".align 2", 0);
1599 (*targetm.asm_out.internal_label) (asm_out_file, "L", CODE_LABEL_NUMBER (this.lab));
1600 this.op = op;
1601 if (far && flag_pic)
1603 if (TARGET_SH2)
1604 this.lab = braf_base_lab;
1605 output_asm_insn (".long %O2-%O0", &this.lab);
1607 else
1608 output_asm_insn (far ? ".long %O2" : ".word %O2-%O0", &this.lab);
1609 return "";
1612 /* Local label counter, used for constants in the pool and inside
1613 pattern branches. */
1615 static int lf = 100;
1617 /* Output code for ordinary branches. */
1619 const char *
1620 output_branch (int logic, rtx insn, rtx *operands)
1622 switch (get_attr_length (insn))
1624 case 6:
1625 /* This can happen if filling the delay slot has caused a forward
1626 branch to exceed its range (we could reverse it, but only
1627 when we know we won't overextend other branches; this should
1628 best be handled by relaxation).
1629 It can also happen when other condbranches hoist delay slot insn
1630 from their destination, thus leading to code size increase.
1631 But the branch will still be in the range -4092..+4098 bytes. */
1633 if (! TARGET_RELAX)
1635 int label = lf++;
1636 /* The call to print_slot will clobber the operands. */
1637 rtx op0 = operands[0];
1639 /* If the instruction in the delay slot is annulled (true), then
1640 there is no delay slot where we can put it now. The only safe
1641 place for it is after the label. final will do that by default. */
1643 if (final_sequence
1644 && ! INSN_ANNULLED_BRANCH_P (XVECEXP (final_sequence, 0, 0))
1645 && get_attr_length (XVECEXP (final_sequence, 0, 1)))
1647 asm_fprintf (asm_out_file, "\tb%s%ss\t%LLF%d\n", logic ? "f" : "t",
1648 ASSEMBLER_DIALECT ? "/" : ".", label);
1649 print_slot (final_sequence);
1651 else
1652 asm_fprintf (asm_out_file, "\tb%s\t%LLF%d\n", logic ? "f" : "t", label);
1654 output_asm_insn ("bra\t%l0", &op0);
1655 fprintf (asm_out_file, "\tnop\n");
1656 (*targetm.asm_out.internal_label) (asm_out_file, "LF", label);
1658 return "";
1660 /* When relaxing, handle this like a short branch. The linker
1661 will fix it up if it still doesn't fit after relaxation. */
1662 case 2:
1663 return logic ? "bt%.\t%l0" : "bf%.\t%l0";
1665 /* These are for SH2e, in which we have to account for the
1666 extra nop because of the hardware bug in annulled branches. */
1667 case 8:
1668 if (! TARGET_RELAX)
1670 int label = lf++;
1672 gcc_assert (!final_sequence
1673 || !(INSN_ANNULLED_BRANCH_P
1674 (XVECEXP (final_sequence, 0, 0))));
1675 asm_fprintf (asm_out_file, "b%s%ss\t%LLF%d\n",
1676 logic ? "f" : "t",
1677 ASSEMBLER_DIALECT ? "/" : ".", label);
1678 fprintf (asm_out_file, "\tnop\n");
1679 output_asm_insn ("bra\t%l0", operands);
1680 fprintf (asm_out_file, "\tnop\n");
1681 (*targetm.asm_out.internal_label) (asm_out_file, "LF", label);
1683 return "";
1685 /* When relaxing, fall through. */
1686 case 4:
1688 char buffer[10];
1690 sprintf (buffer, "b%s%ss\t%%l0",
1691 logic ? "t" : "f",
1692 ASSEMBLER_DIALECT ? "/" : ".");
1693 output_asm_insn (buffer, &operands[0]);
1694 return "nop";
1697 default:
1698 /* There should be no longer branches now - that would
1699 indicate that something has destroyed the branches set
1700 up in machine_dependent_reorg. */
1701 gcc_unreachable ();
1705 const char *
1706 output_branchy_insn (enum rtx_code code, const char *template,
1707 rtx insn, rtx *operands)
1709 rtx next_insn = NEXT_INSN (insn);
1711 if (next_insn && GET_CODE (next_insn) == JUMP_INSN && condjump_p (next_insn))
1713 rtx src = SET_SRC (PATTERN (next_insn));
1714 if (GET_CODE (src) == IF_THEN_ELSE && GET_CODE (XEXP (src, 0)) != code)
1716 /* Following branch not taken */
1717 operands[9] = gen_label_rtx ();
1718 emit_label_after (operands[9], next_insn);
1719 INSN_ADDRESSES_NEW (operands[9],
1720 INSN_ADDRESSES (INSN_UID (next_insn))
1721 + get_attr_length (next_insn));
1722 return template;
1724 else
1726 int offset = (branch_dest (next_insn)
1727 - INSN_ADDRESSES (INSN_UID (next_insn)) + 4);
1728 if (offset >= -252 && offset <= 258)
1730 if (GET_CODE (src) == IF_THEN_ELSE)
1731 /* branch_true */
1732 src = XEXP (src, 1);
1733 operands[9] = src;
1734 return template;
1738 operands[9] = gen_label_rtx ();
1739 emit_label_after (operands[9], insn);
1740 INSN_ADDRESSES_NEW (operands[9],
1741 INSN_ADDRESSES (INSN_UID (insn))
1742 + get_attr_length (insn));
1743 return template;
1746 const char *
1747 output_ieee_ccmpeq (rtx insn, rtx *operands)
1749 return output_branchy_insn (NE, "bt\t%l9\n\tfcmp/eq\t%1,%0",
1750 insn, operands);
1753 /* Output the start of the assembler file. */
1755 static void
1756 sh_file_start (void)
1758 default_file_start ();
1760 #ifdef SYMBIAN
1761 /* Declare the .directive section before it is used. */
1762 fputs ("\t.section .directive, \"SM\", @progbits, 1\n", asm_out_file);
1763 fputs ("\t.asciz \"#<SYMEDIT>#\\n\"\n", asm_out_file);
1764 #endif
1766 if (TARGET_ELF)
1767 /* We need to show the text section with the proper
1768 attributes as in TEXT_SECTION_ASM_OP, before dwarf2out
1769 emits it without attributes in TEXT_SECTION_ASM_OP, else GAS
1770 will complain. We can teach GAS specifically about the
1771 default attributes for our choice of text section, but
1772 then we would have to change GAS again if/when we change
1773 the text section name. */
1774 fprintf (asm_out_file, "%s\n", TEXT_SECTION_ASM_OP);
1775 else
1776 /* Switch to the data section so that the coffsem symbol
1777 isn't in the text section. */
1778 switch_to_section (data_section);
1780 if (TARGET_LITTLE_ENDIAN)
1781 fputs ("\t.little\n", asm_out_file);
1783 if (!TARGET_ELF)
1785 if (TARGET_SHCOMPACT)
1786 fputs ("\t.mode\tSHcompact\n", asm_out_file);
1787 else if (TARGET_SHMEDIA)
1788 fprintf (asm_out_file, "\t.mode\tSHmedia\n\t.abi\t%i\n",
1789 TARGET_SHMEDIA64 ? 64 : 32);
1793 /* Check if PAT includes UNSPEC_CALLER unspec pattern. */
1795 static bool
1796 unspec_caller_rtx_p (rtx pat)
1798 switch (GET_CODE (pat))
1800 case CONST:
1801 return unspec_caller_rtx_p (XEXP (pat, 0));
1802 case PLUS:
1803 case MINUS:
1804 if (unspec_caller_rtx_p (XEXP (pat, 0)))
1805 return true;
1806 return unspec_caller_rtx_p (XEXP (pat, 1));
1807 case UNSPEC:
1808 if (XINT (pat, 1) == UNSPEC_CALLER)
1809 return true;
1810 default:
1811 break;
1814 return false;
1817 /* Indicate that INSN cannot be duplicated. This is true for insn
1818 that generates a unique label. */
1820 static bool
1821 sh_cannot_copy_insn_p (rtx insn)
1823 rtx pat;
1825 if (!reload_completed || !flag_pic)
1826 return false;
1828 if (GET_CODE (insn) != INSN)
1829 return false;
1830 if (asm_noperands (insn) >= 0)
1831 return false;
1833 pat = PATTERN (insn);
1834 if (GET_CODE (pat) != SET)
1835 return false;
1836 pat = SET_SRC (pat);
1838 if (unspec_caller_rtx_p (pat))
1839 return true;
1841 return false;
1844 /* Actual number of instructions used to make a shift by N. */
1845 static const char ashiftrt_insns[] =
1846 { 0,1,2,3,4,5,8,8,8,8,8,8,8,8,8,8,2,3,4,5,8,8,8,8,8,8,8,8,8,8,8,2};
1848 /* Left shift and logical right shift are the same. */
1849 static const char shift_insns[] =
1850 { 0,1,1,2,2,3,3,4,1,2,2,3,3,4,3,3,1,2,2,3,3,4,3,3,2,3,3,4,4,4,3,3};
1852 /* Individual shift amounts needed to get the above length sequences.
1853 One bit right shifts clobber the T bit, so when possible, put one bit
1854 shifts in the middle of the sequence, so the ends are eligible for
1855 branch delay slots. */
1856 static const short shift_amounts[32][5] = {
1857 {0}, {1}, {2}, {2, 1},
1858 {2, 2}, {2, 1, 2}, {2, 2, 2}, {2, 2, 1, 2},
1859 {8}, {8, 1}, {8, 2}, {8, 1, 2},
1860 {8, 2, 2}, {8, 2, 1, 2}, {8, -2, 8}, {8, -1, 8},
1861 {16}, {16, 1}, {16, 2}, {16, 1, 2},
1862 {16, 2, 2}, {16, 2, 1, 2}, {16, -2, 8}, {16, -1, 8},
1863 {16, 8}, {16, 1, 8}, {16, 8, 2}, {16, 8, 1, 2},
1864 {16, 8, 2, 2}, {16, -1, -2, 16}, {16, -2, 16}, {16, -1, 16}};
1866 /* Likewise, but for shift amounts < 16, up to three highmost bits
1867 might be clobbered. This is typically used when combined with some
1868 kind of sign or zero extension. */
1870 static const char ext_shift_insns[] =
1871 { 0,1,1,2,2,3,2,2,1,2,2,3,3,3,2,2,1,2,2,3,3,4,3,3,2,3,3,4,4,4,3,3};
1873 static const short ext_shift_amounts[32][4] = {
1874 {0}, {1}, {2}, {2, 1},
1875 {2, 2}, {2, 1, 2}, {8, -2}, {8, -1},
1876 {8}, {8, 1}, {8, 2}, {8, 1, 2},
1877 {8, 2, 2}, {16, -2, -1}, {16, -2}, {16, -1},
1878 {16}, {16, 1}, {16, 2}, {16, 1, 2},
1879 {16, 2, 2}, {16, 2, 1, 2}, {16, -2, 8}, {16, -1, 8},
1880 {16, 8}, {16, 1, 8}, {16, 8, 2}, {16, 8, 1, 2},
1881 {16, 8, 2, 2}, {16, -1, -2, 16}, {16, -2, 16}, {16, -1, 16}};
1883 /* Assuming we have a value that has been sign-extended by at least one bit,
1884 can we use the ext_shift_amounts with the last shift turned to an arithmetic shift
1885 to shift it by N without data loss, and quicker than by other means? */
1886 #define EXT_SHIFT_SIGNED(n) (((n) | 8) == 15)
1888 /* This is used in length attributes in sh.md to help compute the length
1889 of arbitrary constant shift instructions. */
1892 shift_insns_rtx (rtx insn)
1894 rtx set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
1895 int shift_count = INTVAL (XEXP (set_src, 1));
1896 enum rtx_code shift_code = GET_CODE (set_src);
1898 switch (shift_code)
1900 case ASHIFTRT:
1901 return ashiftrt_insns[shift_count];
1902 case LSHIFTRT:
1903 case ASHIFT:
1904 return shift_insns[shift_count];
1905 default:
1906 gcc_unreachable ();
1910 /* Return the cost of a shift. */
1912 static inline int
1913 shiftcosts (rtx x)
1915 int value;
1917 if (TARGET_SHMEDIA)
1918 return 1;
1920 if (GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD)
1922 if (GET_MODE (x) == DImode
1923 && GET_CODE (XEXP (x, 1)) == CONST_INT
1924 && INTVAL (XEXP (x, 1)) == 1)
1925 return 2;
1927 /* Everything else is invalid, because there is no pattern for it. */
1928 return 10000;
1930 /* If shift by a non constant, then this will be expensive. */
1931 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
1932 return SH_DYNAMIC_SHIFT_COST;
1934 value = INTVAL (XEXP (x, 1));
1936 /* Otherwise, return the true cost in instructions. */
1937 if (GET_CODE (x) == ASHIFTRT)
1939 int cost = ashiftrt_insns[value];
1940 /* If SH3, then we put the constant in a reg and use shad. */
1941 if (cost > 1 + SH_DYNAMIC_SHIFT_COST)
1942 cost = 1 + SH_DYNAMIC_SHIFT_COST;
1943 return cost;
1945 else
1946 return shift_insns[value];
1949 /* Return the cost of an AND operation. */
1951 static inline int
1952 andcosts (rtx x)
1954 int i;
1956 /* Anding with a register is a single cycle and instruction. */
1957 if (GET_CODE (XEXP (x, 1)) != CONST_INT)
1958 return 1;
1960 i = INTVAL (XEXP (x, 1));
1962 if (TARGET_SHMEDIA)
1964 if (GET_CODE (XEXP (x, 1)) == CONST_INT
1965 && (CONST_OK_FOR_I10 (INTVAL (XEXP (x, 1)))
1966 || CONST_OK_FOR_J16 (INTVAL (XEXP (x, 1)))))
1967 return 1;
1968 else
1969 return 1 + rtx_cost (XEXP (x, 1), AND);
1972 /* These constants are single cycle extu.[bw] instructions. */
1973 if (i == 0xff || i == 0xffff)
1974 return 1;
1975 /* Constants that can be used in an and immediate instruction in a single
1976 cycle, but this requires r0, so make it a little more expensive. */
1977 if (CONST_OK_FOR_K08 (i))
1978 return 2;
1979 /* Constants that can be loaded with a mov immediate and an and.
1980 This case is probably unnecessary. */
1981 if (CONST_OK_FOR_I08 (i))
1982 return 2;
1983 /* Any other constants requires a 2 cycle pc-relative load plus an and.
1984 This case is probably unnecessary. */
1985 return 3;
1988 /* Return the cost of an addition or a subtraction. */
1990 static inline int
1991 addsubcosts (rtx x)
1993 /* Adding a register is a single cycle insn. */
1994 if (GET_CODE (XEXP (x, 1)) == REG
1995 || GET_CODE (XEXP (x, 1)) == SUBREG)
1996 return 1;
1998 /* Likewise for small constants. */
1999 if (GET_CODE (XEXP (x, 1)) == CONST_INT
2000 && CONST_OK_FOR_ADD (INTVAL (XEXP (x, 1))))
2001 return 1;
2003 if (TARGET_SHMEDIA)
2004 switch (GET_CODE (XEXP (x, 1)))
2006 case CONST:
2007 case LABEL_REF:
2008 case SYMBOL_REF:
2009 return TARGET_SHMEDIA64 ? 5 : 3;
2011 case CONST_INT:
2012 if (CONST_OK_FOR_I16 (INTVAL (XEXP (x, 1))))
2013 return 2;
2014 else if (CONST_OK_FOR_I16 (INTVAL (XEXP (x, 1)) >> 16))
2015 return 3;
2016 else if (CONST_OK_FOR_I16 ((INTVAL (XEXP (x, 1)) >> 16) >> 16))
2017 return 4;
2019 /* Fall through. */
2020 default:
2021 return 5;
2024 /* Any other constant requires a 2 cycle pc-relative load plus an
2025 addition. */
2026 return 3;
2029 /* Return the cost of a multiply. */
2030 static inline int
2031 multcosts (rtx x ATTRIBUTE_UNUSED)
2033 if (sh_multcost >= 0)
2034 return sh_multcost;
2035 if (TARGET_SHMEDIA)
2036 /* ??? We have a mul insn, but it has a latency of three, and doesn't
2037 accept constants. Ideally, we would use a cost of one or two and
2038 add the cost of the operand, but disregard the latter when inside loops
2039 and loop invariant code motion is still to follow.
2040 Using a multiply first and splitting it later if it's a loss
2041 doesn't work because of different sign / zero extension semantics
2042 of multiplies vs. shifts. */
2043 return TARGET_SMALLCODE ? 2 : 3;
2045 if (TARGET_SH2)
2047 /* We have a mul insn, so we can never take more than the mul and the
2048 read of the mac reg, but count more because of the latency and extra
2049 reg usage. */
2050 if (TARGET_SMALLCODE)
2051 return 2;
2052 return 3;
2055 /* If we're aiming at small code, then just count the number of
2056 insns in a multiply call sequence. */
2057 if (TARGET_SMALLCODE)
2058 return 5;
2060 /* Otherwise count all the insns in the routine we'd be calling too. */
2061 return 20;
2064 /* Compute a (partial) cost for rtx X. Return true if the complete
2065 cost has been computed, and false if subexpressions should be
2066 scanned. In either case, *TOTAL contains the cost result. */
2068 static bool
2069 sh_rtx_costs (rtx x, int code, int outer_code, int *total)
2071 switch (code)
2073 case CONST_INT:
2074 if (TARGET_SHMEDIA)
2076 if (INTVAL (x) == 0)
2077 *total = 0;
2078 else if (outer_code == AND && and_operand ((x), DImode))
2079 *total = 0;
2080 else if ((outer_code == IOR || outer_code == XOR
2081 || outer_code == PLUS)
2082 && CONST_OK_FOR_I10 (INTVAL (x)))
2083 *total = 0;
2084 else if (CONST_OK_FOR_I16 (INTVAL (x)))
2085 *total = COSTS_N_INSNS (outer_code != SET);
2086 else if (CONST_OK_FOR_I16 (INTVAL (x) >> 16))
2087 *total = COSTS_N_INSNS ((outer_code != SET) + 1);
2088 else if (CONST_OK_FOR_I16 ((INTVAL (x) >> 16) >> 16))
2089 *total = COSTS_N_INSNS ((outer_code != SET) + 2);
2090 else
2091 *total = COSTS_N_INSNS ((outer_code != SET) + 3);
2092 return true;
2094 if (CONST_OK_FOR_I08 (INTVAL (x)))
2095 *total = 0;
2096 else if ((outer_code == AND || outer_code == IOR || outer_code == XOR)
2097 && CONST_OK_FOR_K08 (INTVAL (x)))
2098 *total = 1;
2099 else
2100 *total = 8;
2101 return true;
2103 case CONST:
2104 case LABEL_REF:
2105 case SYMBOL_REF:
2106 if (TARGET_SHMEDIA64)
2107 *total = COSTS_N_INSNS (4);
2108 else if (TARGET_SHMEDIA32)
2109 *total = COSTS_N_INSNS (2);
2110 else
2111 *total = 5;
2112 return true;
2114 case CONST_DOUBLE:
2115 if (TARGET_SHMEDIA)
2116 *total = COSTS_N_INSNS (4);
2117 else
2118 *total = 10;
2119 return true;
2120 case CONST_VECTOR:
2121 if (x == CONST0_RTX (GET_MODE (x)))
2122 *total = 0;
2123 else if (sh_1el_vec (x, VOIDmode))
2124 *total = outer_code != SET;
2125 if (sh_rep_vec (x, VOIDmode))
2126 *total = ((GET_MODE_UNIT_SIZE (GET_MODE (x)) + 3) / 4
2127 + (outer_code != SET));
2128 *total = COSTS_N_INSNS (3) + (outer_code != SET);
2129 return true;
2131 case PLUS:
2132 case MINUS:
2133 *total = COSTS_N_INSNS (addsubcosts (x));
2134 return true;
2136 case AND:
2137 *total = COSTS_N_INSNS (andcosts (x));
2138 return true;
2140 case MULT:
2141 *total = COSTS_N_INSNS (multcosts (x));
2142 return true;
2144 case ASHIFT:
2145 case ASHIFTRT:
2146 case LSHIFTRT:
2147 *total = COSTS_N_INSNS (shiftcosts (x));
2148 return true;
2150 case DIV:
2151 case UDIV:
2152 case MOD:
2153 case UMOD:
2154 *total = COSTS_N_INSNS (20);
2155 return true;
2157 case PARALLEL:
2158 if (sh_1el_vec (x, VOIDmode))
2159 *total = outer_code != SET;
2160 if (sh_rep_vec (x, VOIDmode))
2161 *total = ((GET_MODE_UNIT_SIZE (GET_MODE (x)) + 3) / 4
2162 + (outer_code != SET));
2163 *total = COSTS_N_INSNS (3) + (outer_code != SET);
2164 return true;
2166 case FLOAT:
2167 case FIX:
2168 *total = 100;
2169 return true;
2171 default:
2172 return false;
2176 /* Compute the cost of an address. For the SH, all valid addresses are
2177 the same cost. Use a slightly higher cost for reg + reg addressing,
2178 since it increases pressure on r0. */
2180 static int
2181 sh_address_cost (rtx X)
2183 return (GET_CODE (X) == PLUS
2184 && ! CONSTANT_P (XEXP (X, 1))
2185 && ! TARGET_SHMEDIA ? 1 : 0);
2188 /* Code to expand a shift. */
2190 void
2191 gen_ashift (int type, int n, rtx reg)
2193 /* Negative values here come from the shift_amounts array. */
2194 if (n < 0)
2196 if (type == ASHIFT)
2197 type = LSHIFTRT;
2198 else
2199 type = ASHIFT;
2200 n = -n;
2203 switch (type)
2205 case ASHIFTRT:
2206 emit_insn (gen_ashrsi3_k (reg, reg, GEN_INT (n)));
2207 break;
2208 case LSHIFTRT:
2209 if (n == 1)
2210 emit_insn (gen_lshrsi3_m (reg, reg, GEN_INT (n)));
2211 else
2212 emit_insn (gen_lshrsi3_k (reg, reg, GEN_INT (n)));
2213 break;
2214 case ASHIFT:
2215 emit_insn (gen_ashlsi3_std (reg, reg, GEN_INT (n)));
2216 break;
2220 /* Same for HImode */
2222 void
2223 gen_ashift_hi (int type, int n, rtx reg)
2225 /* Negative values here come from the shift_amounts array. */
2226 if (n < 0)
2228 if (type == ASHIFT)
2229 type = LSHIFTRT;
2230 else
2231 type = ASHIFT;
2232 n = -n;
2235 switch (type)
2237 case ASHIFTRT:
2238 case LSHIFTRT:
2239 /* We don't have HImode right shift operations because using the
2240 ordinary 32 bit shift instructions for that doesn't generate proper
2241 zero/sign extension.
2242 gen_ashift_hi is only called in contexts where we know that the
2243 sign extension works out correctly. */
2245 int offset = 0;
2246 if (GET_CODE (reg) == SUBREG)
2248 offset = SUBREG_BYTE (reg);
2249 reg = SUBREG_REG (reg);
2251 gen_ashift (type, n, gen_rtx_SUBREG (SImode, reg, offset));
2252 break;
2254 case ASHIFT:
2255 emit_insn (gen_ashlhi3_k (reg, reg, GEN_INT (n)));
2256 break;
2260 /* Output RTL to split a constant shift into its component SH constant
2261 shift instructions. */
2263 void
2264 gen_shifty_op (int code, rtx *operands)
2266 int value = INTVAL (operands[2]);
2267 int max, i;
2269 /* Truncate the shift count in case it is out of bounds. */
2270 value = value & 0x1f;
2272 if (value == 31)
2274 if (code == LSHIFTRT)
2276 emit_insn (gen_rotlsi3_1 (operands[0], operands[0]));
2277 emit_insn (gen_movt (operands[0]));
2278 return;
2280 else if (code == ASHIFT)
2282 /* There is a two instruction sequence for 31 bit left shifts,
2283 but it requires r0. */
2284 if (GET_CODE (operands[0]) == REG && REGNO (operands[0]) == 0)
2286 emit_insn (gen_andsi3 (operands[0], operands[0], const1_rtx));
2287 emit_insn (gen_rotlsi3_31 (operands[0], operands[0]));
2288 return;
2292 else if (value == 0)
2294 /* This can happen even when optimizing, if there were subregs before
2295 reload. Don't output a nop here, as this is never optimized away;
2296 use a no-op move instead. */
2297 emit_insn (gen_rtx_SET (VOIDmode, operands[0], operands[0]));
2298 return;
2301 max = shift_insns[value];
2302 for (i = 0; i < max; i++)
2303 gen_ashift (code, shift_amounts[value][i], operands[0]);
2306 /* Same as above, but optimized for values where the topmost bits don't
2307 matter. */
2309 void
2310 gen_shifty_hi_op (int code, rtx *operands)
2312 int value = INTVAL (operands[2]);
2313 int max, i;
2314 void (*gen_fun) (int, int, rtx);
2316 /* This operation is used by and_shl for SImode values with a few
2317 high bits known to be cleared. */
2318 value &= 31;
2319 if (value == 0)
2321 emit_insn (gen_nop ());
2322 return;
2325 gen_fun = GET_MODE (operands[0]) == HImode ? gen_ashift_hi : gen_ashift;
2326 if (code == ASHIFT)
2328 max = ext_shift_insns[value];
2329 for (i = 0; i < max; i++)
2330 gen_fun (code, ext_shift_amounts[value][i], operands[0]);
2332 else
2333 /* When shifting right, emit the shifts in reverse order, so that
2334 solitary negative values come first. */
2335 for (i = ext_shift_insns[value] - 1; i >= 0; i--)
2336 gen_fun (code, ext_shift_amounts[value][i], operands[0]);
2339 /* Output RTL for an arithmetic right shift. */
2341 /* ??? Rewrite to use super-optimizer sequences. */
2344 expand_ashiftrt (rtx *operands)
2346 rtx wrk;
2347 char func[18];
2348 int value;
2350 if (TARGET_SH3)
2352 if (GET_CODE (operands[2]) != CONST_INT)
2354 rtx count = copy_to_mode_reg (SImode, operands[2]);
2355 emit_insn (gen_negsi2 (count, count));
2356 emit_insn (gen_ashrsi3_d (operands[0], operands[1], count));
2357 return 1;
2359 else if (ashiftrt_insns[INTVAL (operands[2]) & 31]
2360 > 1 + SH_DYNAMIC_SHIFT_COST)
2362 rtx count
2363 = force_reg (SImode, GEN_INT (- (INTVAL (operands[2]) & 31)));
2364 emit_insn (gen_ashrsi3_d (operands[0], operands[1], count));
2365 return 1;
2368 if (GET_CODE (operands[2]) != CONST_INT)
2369 return 0;
2371 value = INTVAL (operands[2]) & 31;
2373 if (value == 31)
2375 /* If we are called from abs expansion, arrange things so that we
2376 we can use a single MT instruction that doesn't clobber the source,
2377 if LICM can hoist out the load of the constant zero. */
2378 if (currently_expanding_to_rtl)
2380 emit_insn (gen_cmpgtsi_t (force_reg (SImode, CONST0_RTX (SImode)),
2381 operands[1]));
2382 emit_insn (gen_mov_neg_si_t (operands[0]));
2383 return 1;
2385 emit_insn (gen_ashrsi2_31 (operands[0], operands[1]));
2386 return 1;
2388 else if (value >= 16 && value <= 19)
2390 wrk = gen_reg_rtx (SImode);
2391 emit_insn (gen_ashrsi2_16 (wrk, operands[1]));
2392 value -= 16;
2393 while (value--)
2394 gen_ashift (ASHIFTRT, 1, wrk);
2395 emit_move_insn (operands[0], wrk);
2396 return 1;
2398 /* Expand a short sequence inline, longer call a magic routine. */
2399 else if (value <= 5)
2401 wrk = gen_reg_rtx (SImode);
2402 emit_move_insn (wrk, operands[1]);
2403 while (value--)
2404 gen_ashift (ASHIFTRT, 1, wrk);
2405 emit_move_insn (operands[0], wrk);
2406 return 1;
2409 wrk = gen_reg_rtx (Pmode);
2411 /* Load the value into an arg reg and call a helper. */
2412 emit_move_insn (gen_rtx_REG (SImode, 4), operands[1]);
2413 sprintf (func, "__ashiftrt_r4_%d", value);
2414 function_symbol (wrk, func, SFUNC_STATIC);
2415 emit_insn (gen_ashrsi3_n (GEN_INT (value), wrk));
2416 emit_move_insn (operands[0], gen_rtx_REG (SImode, 4));
2417 return 1;
2421 sh_dynamicalize_shift_p (rtx count)
2423 return shift_insns[INTVAL (count)] > 1 + SH_DYNAMIC_SHIFT_COST;
2426 /* Try to find a good way to implement the combiner pattern
2427 [(set (match_operand:SI 0 "register_operand" "r")
2428 (and:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
2429 (match_operand:SI 2 "const_int_operand" "n"))
2430 (match_operand:SI 3 "const_int_operand" "n"))) .
2431 LEFT_RTX is operand 2 in the above pattern, and MASK_RTX is operand 3.
2432 return 0 for simple right / left or left/right shift combination.
2433 return 1 for a combination of shifts with zero_extend.
2434 return 2 for a combination of shifts with an AND that needs r0.
2435 return 3 for a combination of shifts with an AND that needs an extra
2436 scratch register, when the three highmost bits of the AND mask are clear.
2437 return 4 for a combination of shifts with an AND that needs an extra
2438 scratch register, when any of the three highmost bits of the AND mask
2439 is set.
2440 If ATTRP is set, store an initial right shift width in ATTRP[0],
2441 and the instruction length in ATTRP[1] . These values are not valid
2442 when returning 0.
2443 When ATTRP is set and returning 1, ATTRP[2] gets set to the index into
2444 shift_amounts for the last shift value that is to be used before the
2445 sign extend. */
2447 shl_and_kind (rtx left_rtx, rtx mask_rtx, int *attrp)
2449 unsigned HOST_WIDE_INT mask, lsb, mask2, lsb2;
2450 int left = INTVAL (left_rtx), right;
2451 int best = 0;
2452 int cost, best_cost = 10000;
2453 int best_right = 0, best_len = 0;
2454 int i;
2455 int can_ext;
2457 if (left < 0 || left > 31)
2458 return 0;
2459 if (GET_CODE (mask_rtx) == CONST_INT)
2460 mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> left;
2461 else
2462 mask = (unsigned HOST_WIDE_INT) GET_MODE_MASK (SImode) >> left;
2463 /* Can this be expressed as a right shift / left shift pair? */
2464 lsb = ((mask ^ (mask - 1)) >> 1) + 1;
2465 right = exact_log2 (lsb);
2466 mask2 = ~(mask + lsb - 1);
2467 lsb2 = ((mask2 ^ (mask2 - 1)) >> 1) + 1;
2468 /* mask has no zeroes but trailing zeroes <==> ! mask2 */
2469 if (! mask2)
2470 best_cost = shift_insns[right] + shift_insns[right + left];
2471 /* mask has no trailing zeroes <==> ! right */
2472 else if (! right && mask2 == ~(lsb2 - 1))
2474 int late_right = exact_log2 (lsb2);
2475 best_cost = shift_insns[left + late_right] + shift_insns[late_right];
2477 /* Try to use zero extend. */
2478 if (mask2 == ~(lsb2 - 1))
2480 int width, first;
2482 for (width = 8; width <= 16; width += 8)
2484 /* Can we zero-extend right away? */
2485 if (lsb2 == (unsigned HOST_WIDE_INT) 1 << width)
2487 cost
2488 = 1 + ext_shift_insns[right] + ext_shift_insns[left + right];
2489 if (cost < best_cost)
2491 best = 1;
2492 best_cost = cost;
2493 best_right = right;
2494 best_len = cost;
2495 if (attrp)
2496 attrp[2] = -1;
2498 continue;
2500 /* ??? Could try to put zero extend into initial right shift,
2501 or even shift a bit left before the right shift. */
2502 /* Determine value of first part of left shift, to get to the
2503 zero extend cut-off point. */
2504 first = width - exact_log2 (lsb2) + right;
2505 if (first >= 0 && right + left - first >= 0)
2507 cost = ext_shift_insns[right] + ext_shift_insns[first] + 1
2508 + ext_shift_insns[right + left - first];
2509 if (cost < best_cost)
2511 best = 1;
2512 best_cost = cost;
2513 best_right = right;
2514 best_len = cost;
2515 if (attrp)
2516 attrp[2] = first;
2521 /* Try to use r0 AND pattern */
2522 for (i = 0; i <= 2; i++)
2524 if (i > right)
2525 break;
2526 if (! CONST_OK_FOR_K08 (mask >> i))
2527 continue;
2528 cost = (i != 0) + 2 + ext_shift_insns[left + i];
2529 if (cost < best_cost)
2531 best = 2;
2532 best_cost = cost;
2533 best_right = i;
2534 best_len = cost - 1;
2537 /* Try to use a scratch register to hold the AND operand. */
2538 can_ext = ((mask << left) & ((unsigned HOST_WIDE_INT) 3 << 30)) == 0;
2539 for (i = 0; i <= 2; i++)
2541 if (i > right)
2542 break;
2543 cost = (i != 0) + (CONST_OK_FOR_I08 (mask >> i) ? 2 : 3)
2544 + (can_ext ? ext_shift_insns : shift_insns)[left + i];
2545 if (cost < best_cost)
2547 best = 4 - can_ext;
2548 best_cost = cost;
2549 best_right = i;
2550 best_len = cost - 1 - ! CONST_OK_FOR_I08 (mask >> i);
2554 if (attrp)
2556 attrp[0] = best_right;
2557 attrp[1] = best_len;
2559 return best;
2562 /* This is used in length attributes of the unnamed instructions
2563 corresponding to shl_and_kind return values of 1 and 2. */
2565 shl_and_length (rtx insn)
2567 rtx set_src, left_rtx, mask_rtx;
2568 int attributes[3];
2570 set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
2571 left_rtx = XEXP (XEXP (set_src, 0), 1);
2572 mask_rtx = XEXP (set_src, 1);
2573 shl_and_kind (left_rtx, mask_rtx, attributes);
2574 return attributes[1];
2577 /* This is used in length attribute of the and_shl_scratch instruction. */
2580 shl_and_scr_length (rtx insn)
2582 rtx set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
2583 int len = shift_insns[INTVAL (XEXP (set_src, 1))];
2584 rtx op = XEXP (set_src, 0);
2585 len += shift_insns[INTVAL (XEXP (op, 1))] + 1;
2586 op = XEXP (XEXP (op, 0), 0);
2587 return len + shift_insns[INTVAL (XEXP (op, 1))];
2590 /* Generate rtl for instructions for which shl_and_kind advised a particular
2591 method of generating them, i.e. returned zero. */
2594 gen_shl_and (rtx dest, rtx left_rtx, rtx mask_rtx, rtx source)
2596 int attributes[3];
2597 unsigned HOST_WIDE_INT mask;
2598 int kind = shl_and_kind (left_rtx, mask_rtx, attributes);
2599 int right, total_shift;
2600 void (*shift_gen_fun) (int, rtx *) = gen_shifty_hi_op;
2602 right = attributes[0];
2603 total_shift = INTVAL (left_rtx) + right;
2604 mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> total_shift;
2605 switch (kind)
2607 default:
2608 return -1;
2609 case 1:
2611 int first = attributes[2];
2612 rtx operands[3];
2614 if (first < 0)
2616 emit_insn ((mask << right) <= 0xff
2617 ? gen_zero_extendqisi2 (dest,
2618 gen_lowpart (QImode, source))
2619 : gen_zero_extendhisi2 (dest,
2620 gen_lowpart (HImode, source)));
2621 source = dest;
2623 if (source != dest)
2624 emit_insn (gen_movsi (dest, source));
2625 operands[0] = dest;
2626 if (right)
2628 operands[2] = GEN_INT (right);
2629 gen_shifty_hi_op (LSHIFTRT, operands);
2631 if (first > 0)
2633 operands[2] = GEN_INT (first);
2634 gen_shifty_hi_op (ASHIFT, operands);
2635 total_shift -= first;
2636 mask <<= first;
2638 if (first >= 0)
2639 emit_insn (mask <= 0xff
2640 ? gen_zero_extendqisi2 (dest, gen_lowpart (QImode, dest))
2641 : gen_zero_extendhisi2 (dest, gen_lowpart (HImode, dest)));
2642 if (total_shift > 0)
2644 operands[2] = GEN_INT (total_shift);
2645 gen_shifty_hi_op (ASHIFT, operands);
2647 break;
2649 case 4:
2650 shift_gen_fun = gen_shifty_op;
2651 case 3:
2652 /* If the topmost bit that matters is set, set the topmost bits
2653 that don't matter. This way, we might be able to get a shorter
2654 signed constant. */
2655 if (mask & ((HOST_WIDE_INT) 1 << (31 - total_shift)))
2656 mask |= (HOST_WIDE_INT) ~0 << (31 - total_shift);
2657 case 2:
2658 /* Don't expand fine-grained when combining, because that will
2659 make the pattern fail. */
2660 if (currently_expanding_to_rtl
2661 || reload_in_progress || reload_completed)
2663 rtx operands[3];
2665 /* Cases 3 and 4 should be handled by this split
2666 only while combining */
2667 gcc_assert (kind <= 2);
2668 if (right)
2670 emit_insn (gen_lshrsi3 (dest, source, GEN_INT (right)));
2671 source = dest;
2673 emit_insn (gen_andsi3 (dest, source, GEN_INT (mask)));
2674 if (total_shift)
2676 operands[0] = dest;
2677 operands[1] = dest;
2678 operands[2] = GEN_INT (total_shift);
2679 shift_gen_fun (ASHIFT, operands);
2681 break;
2683 else
2685 int neg = 0;
2686 if (kind != 4 && total_shift < 16)
2688 neg = -ext_shift_amounts[total_shift][1];
2689 if (neg > 0)
2690 neg -= ext_shift_amounts[total_shift][2];
2691 else
2692 neg = 0;
2694 emit_insn (gen_and_shl_scratch (dest, source,
2695 GEN_INT (right),
2696 GEN_INT (mask),
2697 GEN_INT (total_shift + neg),
2698 GEN_INT (neg)));
2699 emit_insn (gen_movsi (dest, dest));
2700 break;
2703 return 0;
2706 /* Try to find a good way to implement the combiner pattern
2707 [(set (match_operand:SI 0 "register_operand" "=r")
2708 (sign_extract:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
2709 (match_operand:SI 2 "const_int_operand" "n")
2710 (match_operand:SI 3 "const_int_operand" "n")
2711 (const_int 0)))
2712 (clobber (reg:SI T_REG))]
2713 LEFT_RTX is operand 2 in the above pattern, and SIZE_RTX is operand 3.
2714 return 0 for simple left / right shift combination.
2715 return 1 for left shift / 8 bit sign extend / left shift.
2716 return 2 for left shift / 16 bit sign extend / left shift.
2717 return 3 for left shift / 8 bit sign extend / shift / sign extend.
2718 return 4 for left shift / 16 bit sign extend / shift / sign extend.
2719 return 5 for left shift / 16 bit sign extend / right shift
2720 return 6 for < 8 bit sign extend / left shift.
2721 return 7 for < 8 bit sign extend / left shift / single right shift.
2722 If COSTP is nonzero, assign the calculated cost to *COSTP. */
2725 shl_sext_kind (rtx left_rtx, rtx size_rtx, int *costp)
2727 int left, size, insize, ext;
2728 int cost = 0, best_cost;
2729 int kind;
2731 left = INTVAL (left_rtx);
2732 size = INTVAL (size_rtx);
2733 insize = size - left;
2734 gcc_assert (insize > 0);
2735 /* Default to left / right shift. */
2736 kind = 0;
2737 best_cost = shift_insns[32 - insize] + ashiftrt_insns[32 - size];
2738 if (size <= 16)
2740 /* 16 bit shift / sign extend / 16 bit shift */
2741 cost = shift_insns[16 - insize] + 1 + ashiftrt_insns[16 - size];
2742 /* If ashiftrt_insns[16 - size] is 8, this choice will be overridden
2743 below, by alternative 3 or something even better. */
2744 if (cost < best_cost)
2746 kind = 5;
2747 best_cost = cost;
2750 /* Try a plain sign extend between two shifts. */
2751 for (ext = 16; ext >= insize; ext -= 8)
2753 if (ext <= size)
2755 cost = ext_shift_insns[ext - insize] + 1 + shift_insns[size - ext];
2756 if (cost < best_cost)
2758 kind = ext / (unsigned) 8;
2759 best_cost = cost;
2762 /* Check if we can do a sloppy shift with a final signed shift
2763 restoring the sign. */
2764 if (EXT_SHIFT_SIGNED (size - ext))
2765 cost = ext_shift_insns[ext - insize] + ext_shift_insns[size - ext] + 1;
2766 /* If not, maybe it's still cheaper to do the second shift sloppy,
2767 and do a final sign extend? */
2768 else if (size <= 16)
2769 cost = ext_shift_insns[ext - insize] + 1
2770 + ext_shift_insns[size > ext ? size - ext : ext - size] + 1;
2771 else
2772 continue;
2773 if (cost < best_cost)
2775 kind = ext / (unsigned) 8 + 2;
2776 best_cost = cost;
2779 /* Check if we can sign extend in r0 */
2780 if (insize < 8)
2782 cost = 3 + shift_insns[left];
2783 if (cost < best_cost)
2785 kind = 6;
2786 best_cost = cost;
2788 /* Try the same with a final signed shift. */
2789 if (left < 31)
2791 cost = 3 + ext_shift_insns[left + 1] + 1;
2792 if (cost < best_cost)
2794 kind = 7;
2795 best_cost = cost;
2799 if (TARGET_SH3)
2801 /* Try to use a dynamic shift. */
2802 cost = shift_insns[32 - insize] + 1 + SH_DYNAMIC_SHIFT_COST;
2803 if (cost < best_cost)
2805 kind = 0;
2806 best_cost = cost;
2809 if (costp)
2810 *costp = cost;
2811 return kind;
2814 /* Function to be used in the length attribute of the instructions
2815 implementing this pattern. */
2818 shl_sext_length (rtx insn)
2820 rtx set_src, left_rtx, size_rtx;
2821 int cost;
2823 set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
2824 left_rtx = XEXP (XEXP (set_src, 0), 1);
2825 size_rtx = XEXP (set_src, 1);
2826 shl_sext_kind (left_rtx, size_rtx, &cost);
2827 return cost;
2830 /* Generate rtl for this pattern */
2833 gen_shl_sext (rtx dest, rtx left_rtx, rtx size_rtx, rtx source)
2835 int kind;
2836 int left, size, insize, cost;
2837 rtx operands[3];
2839 kind = shl_sext_kind (left_rtx, size_rtx, &cost);
2840 left = INTVAL (left_rtx);
2841 size = INTVAL (size_rtx);
2842 insize = size - left;
2843 switch (kind)
2845 case 1:
2846 case 2:
2847 case 3:
2848 case 4:
2850 int ext = kind & 1 ? 8 : 16;
2851 int shift2 = size - ext;
2853 /* Don't expand fine-grained when combining, because that will
2854 make the pattern fail. */
2855 if (! currently_expanding_to_rtl
2856 && ! reload_in_progress && ! reload_completed)
2858 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
2859 emit_insn (gen_movsi (dest, source));
2860 break;
2862 if (dest != source)
2863 emit_insn (gen_movsi (dest, source));
2864 operands[0] = dest;
2865 if (ext - insize)
2867 operands[2] = GEN_INT (ext - insize);
2868 gen_shifty_hi_op (ASHIFT, operands);
2870 emit_insn (kind & 1
2871 ? gen_extendqisi2 (dest, gen_lowpart (QImode, dest))
2872 : gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
2873 if (kind <= 2)
2875 if (shift2)
2877 operands[2] = GEN_INT (shift2);
2878 gen_shifty_op (ASHIFT, operands);
2881 else
2883 if (shift2 > 0)
2885 if (EXT_SHIFT_SIGNED (shift2))
2887 operands[2] = GEN_INT (shift2 + 1);
2888 gen_shifty_op (ASHIFT, operands);
2889 operands[2] = const1_rtx;
2890 gen_shifty_op (ASHIFTRT, operands);
2891 break;
2893 operands[2] = GEN_INT (shift2);
2894 gen_shifty_hi_op (ASHIFT, operands);
2896 else if (shift2)
2898 operands[2] = GEN_INT (-shift2);
2899 gen_shifty_hi_op (LSHIFTRT, operands);
2901 emit_insn (size <= 8
2902 ? gen_extendqisi2 (dest, gen_lowpart (QImode, dest))
2903 : gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
2905 break;
2907 case 5:
2909 int i = 16 - size;
2910 if (! currently_expanding_to_rtl
2911 && ! reload_in_progress && ! reload_completed)
2912 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
2913 else
2915 operands[0] = dest;
2916 operands[2] = GEN_INT (16 - insize);
2917 gen_shifty_hi_op (ASHIFT, operands);
2918 emit_insn (gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
2920 /* Don't use gen_ashrsi3 because it generates new pseudos. */
2921 while (--i >= 0)
2922 gen_ashift (ASHIFTRT, 1, dest);
2923 break;
2925 case 6:
2926 case 7:
2927 /* Don't expand fine-grained when combining, because that will
2928 make the pattern fail. */
2929 if (! currently_expanding_to_rtl
2930 && ! reload_in_progress && ! reload_completed)
2932 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
2933 emit_insn (gen_movsi (dest, source));
2934 break;
2936 emit_insn (gen_andsi3 (dest, source, GEN_INT ((1 << insize) - 1)));
2937 emit_insn (gen_xorsi3 (dest, dest, GEN_INT (1 << (insize - 1))));
2938 emit_insn (gen_addsi3 (dest, dest, GEN_INT (-1 << (insize - 1))));
2939 operands[0] = dest;
2940 operands[2] = kind == 7 ? GEN_INT (left + 1) : left_rtx;
2941 gen_shifty_op (ASHIFT, operands);
2942 if (kind == 7)
2943 emit_insn (gen_ashrsi3_k (dest, dest, const1_rtx));
2944 break;
2945 default:
2946 return -1;
2948 return 0;
2951 /* Prefix a symbol_ref name with "datalabel". */
2954 gen_datalabel_ref (rtx sym)
2956 const char *str;
2958 if (GET_CODE (sym) == LABEL_REF)
2959 return gen_rtx_CONST (GET_MODE (sym),
2960 gen_rtx_UNSPEC (GET_MODE (sym),
2961 gen_rtvec (1, sym),
2962 UNSPEC_DATALABEL));
2964 gcc_assert (GET_CODE (sym) == SYMBOL_REF);
2966 str = XSTR (sym, 0);
2967 /* Share all SYMBOL_REF strings with the same value - that is important
2968 for cse. */
2969 str = IDENTIFIER_POINTER (get_identifier (str));
2970 XSTR (sym, 0) = str;
2972 return sym;
2976 static alloc_pool label_ref_list_pool;
2978 typedef struct label_ref_list_d
2980 rtx label;
2981 struct label_ref_list_d *next;
2982 } *label_ref_list_t;
2984 /* The SH cannot load a large constant into a register, constants have to
2985 come from a pc relative load. The reference of a pc relative load
2986 instruction must be less than 1k in front of the instruction. This
2987 means that we often have to dump a constant inside a function, and
2988 generate code to branch around it.
2990 It is important to minimize this, since the branches will slow things
2991 down and make things bigger.
2993 Worst case code looks like:
2995 mov.l L1,rn
2996 bra L2
2998 align
2999 L1: .long value
3003 mov.l L3,rn
3004 bra L4
3006 align
3007 L3: .long value
3011 We fix this by performing a scan before scheduling, which notices which
3012 instructions need to have their operands fetched from the constant table
3013 and builds the table.
3015 The algorithm is:
3017 scan, find an instruction which needs a pcrel move. Look forward, find the
3018 last barrier which is within MAX_COUNT bytes of the requirement.
3019 If there isn't one, make one. Process all the instructions between
3020 the find and the barrier.
3022 In the above example, we can tell that L3 is within 1k of L1, so
3023 the first move can be shrunk from the 3 insn+constant sequence into
3024 just 1 insn, and the constant moved to L3 to make:
3026 mov.l L1,rn
3028 mov.l L3,rn
3029 bra L4
3031 align
3032 L3:.long value
3033 L4:.long value
3035 Then the second move becomes the target for the shortening process. */
3037 typedef struct
3039 rtx value; /* Value in table. */
3040 rtx label; /* Label of value. */
3041 label_ref_list_t wend; /* End of window. */
3042 enum machine_mode mode; /* Mode of value. */
3044 /* True if this constant is accessed as part of a post-increment
3045 sequence. Note that HImode constants are never accessed in this way. */
3046 bool part_of_sequence_p;
3047 } pool_node;
3049 /* The maximum number of constants that can fit into one pool, since
3050 constants in the range 0..510 are at least 2 bytes long, and in the
3051 range from there to 1018 at least 4 bytes. */
3053 #define MAX_POOL_SIZE 372
3054 static pool_node pool_vector[MAX_POOL_SIZE];
3055 static int pool_size;
3056 static rtx pool_window_label;
3057 static int pool_window_last;
3059 static int max_labelno_before_reorg;
3061 /* ??? If we need a constant in HImode which is the truncated value of a
3062 constant we need in SImode, we could combine the two entries thus saving
3063 two bytes. Is this common enough to be worth the effort of implementing
3064 it? */
3066 /* ??? This stuff should be done at the same time that we shorten branches.
3067 As it is now, we must assume that all branches are the maximum size, and
3068 this causes us to almost always output constant pools sooner than
3069 necessary. */
3071 /* Add a constant to the pool and return its label. */
3073 static rtx
3074 add_constant (rtx x, enum machine_mode mode, rtx last_value)
3076 int i;
3077 rtx lab, new;
3078 label_ref_list_t ref, newref;
3080 /* First see if we've already got it. */
3081 for (i = 0; i < pool_size; i++)
3083 if (x->code == pool_vector[i].value->code
3084 && mode == pool_vector[i].mode)
3086 if (x->code == CODE_LABEL)
3088 if (XINT (x, 3) != XINT (pool_vector[i].value, 3))
3089 continue;
3091 if (rtx_equal_p (x, pool_vector[i].value))
3093 lab = new = 0;
3094 if (! last_value
3095 || ! i
3096 || ! rtx_equal_p (last_value, pool_vector[i-1].value))
3098 new = gen_label_rtx ();
3099 LABEL_REFS (new) = pool_vector[i].label;
3100 pool_vector[i].label = lab = new;
3102 if (lab && pool_window_label)
3104 newref = (label_ref_list_t) pool_alloc (label_ref_list_pool);
3105 newref->label = pool_window_label;
3106 ref = pool_vector[pool_window_last].wend;
3107 newref->next = ref;
3108 pool_vector[pool_window_last].wend = newref;
3110 if (new)
3111 pool_window_label = new;
3112 pool_window_last = i;
3113 return lab;
3118 /* Need a new one. */
3119 pool_vector[pool_size].value = x;
3120 if (last_value && rtx_equal_p (last_value, pool_vector[pool_size - 1].value))
3122 lab = 0;
3123 pool_vector[pool_size - 1].part_of_sequence_p = true;
3125 else
3126 lab = gen_label_rtx ();
3127 pool_vector[pool_size].mode = mode;
3128 pool_vector[pool_size].label = lab;
3129 pool_vector[pool_size].wend = NULL;
3130 pool_vector[pool_size].part_of_sequence_p = (lab == 0);
3131 if (lab && pool_window_label)
3133 newref = (label_ref_list_t) pool_alloc (label_ref_list_pool);
3134 newref->label = pool_window_label;
3135 ref = pool_vector[pool_window_last].wend;
3136 newref->next = ref;
3137 pool_vector[pool_window_last].wend = newref;
3139 if (lab)
3140 pool_window_label = lab;
3141 pool_window_last = pool_size;
3142 pool_size++;
3143 return lab;
3146 /* Output the literal table. START, if nonzero, is the first instruction
3147 this table is needed for, and also indicates that there is at least one
3148 casesi_worker_2 instruction; We have to emit the operand3 labels from
3149 these insns at a 4-byte aligned position. BARRIER is the barrier
3150 after which we are to place the table. */
3152 static void
3153 dump_table (rtx start, rtx barrier)
3155 rtx scan = barrier;
3156 int i;
3157 int need_align = 1;
3158 rtx lab;
3159 label_ref_list_t ref;
3160 int have_df = 0;
3162 /* Do two passes, first time dump out the HI sized constants. */
3164 for (i = 0; i < pool_size; i++)
3166 pool_node *p = &pool_vector[i];
3168 if (p->mode == HImode)
3170 if (need_align)
3172 scan = emit_insn_after (gen_align_2 (), scan);
3173 need_align = 0;
3175 for (lab = p->label; lab; lab = LABEL_REFS (lab))
3176 scan = emit_label_after (lab, scan);
3177 scan = emit_insn_after (gen_consttable_2 (p->value, const0_rtx),
3178 scan);
3179 for (ref = p->wend; ref; ref = ref->next)
3181 lab = ref->label;
3182 scan = emit_insn_after (gen_consttable_window_end (lab), scan);
3185 else if (p->mode == DFmode)
3186 have_df = 1;
3189 need_align = 1;
3191 if (start)
3193 scan = emit_insn_after (gen_align_4 (), scan);
3194 need_align = 0;
3195 for (; start != barrier; start = NEXT_INSN (start))
3196 if (GET_CODE (start) == INSN
3197 && recog_memoized (start) == CODE_FOR_casesi_worker_2)
3199 rtx src = SET_SRC (XVECEXP (PATTERN (start), 0, 0));
3200 rtx lab = XEXP (XVECEXP (src, 0, 3), 0);
3202 scan = emit_label_after (lab, scan);
3205 if (TARGET_FMOVD && TARGET_ALIGN_DOUBLE && have_df)
3207 rtx align_insn = NULL_RTX;
3209 scan = emit_label_after (gen_label_rtx (), scan);
3210 scan = emit_insn_after (gen_align_log (GEN_INT (3)), scan);
3211 need_align = 0;
3213 for (i = 0; i < pool_size; i++)
3215 pool_node *p = &pool_vector[i];
3217 switch (p->mode)
3219 case HImode:
3220 break;
3221 case SImode:
3222 case SFmode:
3223 if (align_insn && !p->part_of_sequence_p)
3225 for (lab = p->label; lab; lab = LABEL_REFS (lab))
3226 emit_label_before (lab, align_insn);
3227 emit_insn_before (gen_consttable_4 (p->value, const0_rtx),
3228 align_insn);
3229 for (ref = p->wend; ref; ref = ref->next)
3231 lab = ref->label;
3232 emit_insn_before (gen_consttable_window_end (lab),
3233 align_insn);
3235 delete_insn (align_insn);
3236 align_insn = NULL_RTX;
3237 continue;
3239 else
3241 for (lab = p->label; lab; lab = LABEL_REFS (lab))
3242 scan = emit_label_after (lab, scan);
3243 scan = emit_insn_after (gen_consttable_4 (p->value,
3244 const0_rtx), scan);
3245 need_align = ! need_align;
3247 break;
3248 case DFmode:
3249 if (need_align)
3251 scan = emit_insn_after (gen_align_log (GEN_INT (3)), scan);
3252 align_insn = scan;
3253 need_align = 0;
3255 case DImode:
3256 for (lab = p->label; lab; lab = LABEL_REFS (lab))
3257 scan = emit_label_after (lab, scan);
3258 scan = emit_insn_after (gen_consttable_8 (p->value, const0_rtx),
3259 scan);
3260 break;
3261 default:
3262 gcc_unreachable ();
3265 if (p->mode != HImode)
3267 for (ref = p->wend; ref; ref = ref->next)
3269 lab = ref->label;
3270 scan = emit_insn_after (gen_consttable_window_end (lab),
3271 scan);
3276 pool_size = 0;
3279 for (i = 0; i < pool_size; i++)
3281 pool_node *p = &pool_vector[i];
3283 switch (p->mode)
3285 case HImode:
3286 break;
3287 case SImode:
3288 case SFmode:
3289 if (need_align)
3291 need_align = 0;
3292 scan = emit_label_after (gen_label_rtx (), scan);
3293 scan = emit_insn_after (gen_align_4 (), scan);
3295 for (lab = p->label; lab; lab = LABEL_REFS (lab))
3296 scan = emit_label_after (lab, scan);
3297 scan = emit_insn_after (gen_consttable_4 (p->value, const0_rtx),
3298 scan);
3299 break;
3300 case DFmode:
3301 case DImode:
3302 if (need_align)
3304 need_align = 0;
3305 scan = emit_label_after (gen_label_rtx (), scan);
3306 scan = emit_insn_after (gen_align_4 (), scan);
3308 for (lab = p->label; lab; lab = LABEL_REFS (lab))
3309 scan = emit_label_after (lab, scan);
3310 scan = emit_insn_after (gen_consttable_8 (p->value, const0_rtx),
3311 scan);
3312 break;
3313 default:
3314 gcc_unreachable ();
3317 if (p->mode != HImode)
3319 for (ref = p->wend; ref; ref = ref->next)
3321 lab = ref->label;
3322 scan = emit_insn_after (gen_consttable_window_end (lab), scan);
3327 scan = emit_insn_after (gen_consttable_end (), scan);
3328 scan = emit_barrier_after (scan);
3329 pool_size = 0;
3330 pool_window_label = NULL_RTX;
3331 pool_window_last = 0;
3334 /* Return nonzero if constant would be an ok source for a
3335 mov.w instead of a mov.l. */
3337 static int
3338 hi_const (rtx src)
3340 return (GET_CODE (src) == CONST_INT
3341 && INTVAL (src) >= -32768
3342 && INTVAL (src) <= 32767);
3345 #define MOVA_LABELREF(mova) XVECEXP (SET_SRC (PATTERN (mova)), 0, 0)
3347 /* Nonzero if the insn is a move instruction which needs to be fixed. */
3349 /* ??? For a DImode/DFmode moves, we don't need to fix it if each half of the
3350 CONST_DOUBLE input value is CONST_OK_FOR_I08. For a SFmode move, we don't
3351 need to fix it if the input value is CONST_OK_FOR_I08. */
3353 static int
3354 broken_move (rtx insn)
3356 if (GET_CODE (insn) == INSN)
3358 rtx pat = PATTERN (insn);
3359 if (GET_CODE (pat) == PARALLEL)
3360 pat = XVECEXP (pat, 0, 0);
3361 if (GET_CODE (pat) == SET
3362 /* We can load any 8 bit value if we don't care what the high
3363 order bits end up as. */
3364 && GET_MODE (SET_DEST (pat)) != QImode
3365 && (CONSTANT_P (SET_SRC (pat))
3366 /* Match mova_const. */
3367 || (GET_CODE (SET_SRC (pat)) == UNSPEC
3368 && XINT (SET_SRC (pat), 1) == UNSPEC_MOVA
3369 && GET_CODE (XVECEXP (SET_SRC (pat), 0, 0)) == CONST))
3370 && ! (TARGET_SH2E
3371 && GET_CODE (SET_SRC (pat)) == CONST_DOUBLE
3372 && (fp_zero_operand (SET_SRC (pat))
3373 || fp_one_operand (SET_SRC (pat)))
3374 /* ??? If this is a -m4 or -m4-single compilation, in general
3375 we don't know the current setting of fpscr, so disable fldi.
3376 There is an exception if this was a register-register move
3377 before reload - and hence it was ascertained that we have
3378 single precision setting - and in a post-reload optimization
3379 we changed this to do a constant load. In that case
3380 we don't have an r0 clobber, hence we must use fldi. */
3381 && (! TARGET_SH4 || TARGET_FMOVD
3382 || (GET_CODE (XEXP (XVECEXP (PATTERN (insn), 0, 2), 0))
3383 == SCRATCH))
3384 && GET_CODE (SET_DEST (pat)) == REG
3385 && FP_REGISTER_P (REGNO (SET_DEST (pat))))
3386 && ! (TARGET_SH2A
3387 && GET_MODE (SET_DEST (pat)) == SImode
3388 && GET_CODE (SET_SRC (pat)) == CONST_INT
3389 && CONST_OK_FOR_I20 (INTVAL (SET_SRC (pat))))
3390 && (GET_CODE (SET_SRC (pat)) != CONST_INT
3391 || ! CONST_OK_FOR_I08 (INTVAL (SET_SRC (pat)))))
3392 return 1;
3395 return 0;
3398 static int
3399 mova_p (rtx insn)
3401 return (GET_CODE (insn) == INSN
3402 && GET_CODE (PATTERN (insn)) == SET
3403 && GET_CODE (SET_SRC (PATTERN (insn))) == UNSPEC
3404 && XINT (SET_SRC (PATTERN (insn)), 1) == UNSPEC_MOVA
3405 /* Don't match mova_const. */
3406 && GET_CODE (MOVA_LABELREF (insn)) == LABEL_REF);
3409 /* Fix up a mova from a switch that went out of range. */
3410 static void
3411 fixup_mova (rtx mova)
3413 PUT_MODE (XEXP (MOVA_LABELREF (mova), 0), QImode);
3414 if (! flag_pic)
3416 SET_SRC (PATTERN (mova)) = MOVA_LABELREF (mova);
3417 INSN_CODE (mova) = -1;
3419 else
3421 rtx worker = mova;
3422 rtx lab = gen_label_rtx ();
3423 rtx wpat, wpat0, wpat1, wsrc, diff;
3427 worker = NEXT_INSN (worker);
3428 gcc_assert (worker
3429 && GET_CODE (worker) != CODE_LABEL
3430 && GET_CODE (worker) != JUMP_INSN);
3431 } while (GET_CODE (worker) == NOTE
3432 || recog_memoized (worker) != CODE_FOR_casesi_worker_1);
3433 wpat = PATTERN (worker);
3434 wpat0 = XVECEXP (wpat, 0, 0);
3435 wpat1 = XVECEXP (wpat, 0, 1);
3436 wsrc = SET_SRC (wpat0);
3437 PATTERN (worker) = (gen_casesi_worker_2
3438 (SET_DEST (wpat0), XVECEXP (wsrc, 0, 1),
3439 XEXP (XVECEXP (wsrc, 0, 2), 0), lab,
3440 XEXP (wpat1, 0)));
3441 INSN_CODE (worker) = -1;
3442 diff = gen_rtx_MINUS (Pmode, XVECEXP (SET_SRC (PATTERN (mova)), 0, 0),
3443 gen_rtx_LABEL_REF (Pmode, lab));
3444 diff = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, diff), UNSPEC_PIC);
3445 SET_SRC (PATTERN (mova)) = gen_rtx_CONST (Pmode, diff);
3446 INSN_CODE (mova) = -1;
3450 /* NEW_MOVA is a mova we've just encountered while scanning forward. Update
3451 *num_mova, and check if the new mova is not nested within the first one.
3452 return 0 if *first_mova was replaced, 1 if new_mova was replaced,
3453 2 if new_mova has been assigned to *first_mova, -1 otherwise.. */
3454 static int
3455 untangle_mova (int *num_mova, rtx *first_mova, rtx new_mova)
3457 int n_addr = 0; /* Initialization to shut up spurious warning. */
3458 int f_target, n_target = 0; /* Likewise. */
3460 if (optimize)
3462 n_addr = INSN_ADDRESSES (INSN_UID (new_mova));
3463 n_target = INSN_ADDRESSES (INSN_UID (XEXP (MOVA_LABELREF (new_mova), 0)));
3464 if (n_addr > n_target || n_addr + 1022 < n_target)
3466 /* Change the mova into a load.
3467 broken_move will then return true for it. */
3468 fixup_mova (new_mova);
3469 return 1;
3472 if (!(*num_mova)++)
3474 *first_mova = new_mova;
3475 return 2;
3477 if (!optimize
3478 || ((f_target
3479 = INSN_ADDRESSES (INSN_UID (XEXP (MOVA_LABELREF (*first_mova), 0))))
3480 >= n_target))
3481 return -1;
3483 (*num_mova)--;
3484 if (f_target - INSN_ADDRESSES (INSN_UID (*first_mova))
3485 > n_target - n_addr)
3487 fixup_mova (*first_mova);
3488 return 0;
3490 else
3492 fixup_mova (new_mova);
3493 return 1;
3497 /* Find the last barrier from insn FROM which is close enough to hold the
3498 constant pool. If we can't find one, then create one near the end of
3499 the range. */
3501 static rtx
3502 find_barrier (int num_mova, rtx mova, rtx from)
3504 int count_si = 0;
3505 int count_hi = 0;
3506 int found_hi = 0;
3507 int found_si = 0;
3508 int found_di = 0;
3509 int hi_align = 2;
3510 int si_align = 2;
3511 int leading_mova = num_mova;
3512 rtx barrier_before_mova = 0, found_barrier = 0, good_barrier = 0;
3513 int si_limit;
3514 int hi_limit;
3516 /* For HImode: range is 510, add 4 because pc counts from address of
3517 second instruction after this one, subtract 2 for the jump instruction
3518 that we may need to emit before the table, subtract 2 for the instruction
3519 that fills the jump delay slot (in very rare cases, reorg will take an
3520 instruction from after the constant pool or will leave the delay slot
3521 empty). This gives 510.
3522 For SImode: range is 1020, add 4 because pc counts from address of
3523 second instruction after this one, subtract 2 in case pc is 2 byte
3524 aligned, subtract 2 for the jump instruction that we may need to emit
3525 before the table, subtract 2 for the instruction that fills the jump
3526 delay slot. This gives 1018. */
3528 /* The branch will always be shortened now that the reference address for
3529 forward branches is the successor address, thus we need no longer make
3530 adjustments to the [sh]i_limit for -O0. */
3532 si_limit = 1018;
3533 hi_limit = 510;
3535 while (from && count_si < si_limit && count_hi < hi_limit)
3537 int inc = get_attr_length (from);
3538 int new_align = 1;
3540 /* If this is a label that existed at the time of the compute_alignments
3541 call, determine the alignment. N.B. When find_barrier recurses for
3542 an out-of-reach mova, we might see labels at the start of previously
3543 inserted constant tables. */
3544 if (GET_CODE (from) == CODE_LABEL
3545 && CODE_LABEL_NUMBER (from) <= max_labelno_before_reorg)
3547 if (optimize)
3548 new_align = 1 << label_to_alignment (from);
3549 else if (GET_CODE (prev_nonnote_insn (from)) == BARRIER)
3550 new_align = 1 << barrier_align (from);
3551 else
3552 new_align = 1;
3553 inc = 0;
3555 /* In case we are scanning a constant table because of recursion, check
3556 for explicit alignments. If the table is long, we might be forced
3557 to emit the new table in front of it; the length of the alignment
3558 might be the last straw. */
3559 else if (GET_CODE (from) == INSN
3560 && GET_CODE (PATTERN (from)) == UNSPEC_VOLATILE
3561 && XINT (PATTERN (from), 1) == UNSPECV_ALIGN)
3562 new_align = INTVAL (XVECEXP (PATTERN (from), 0, 0));
3563 /* When we find the end of a constant table, paste the new constant
3564 at the end. That is better than putting it in front because
3565 this way, we don't need extra alignment for adding a 4-byte-aligned
3566 mov(a) label to a 2/4 or 8/4 byte aligned table. */
3567 else if (GET_CODE (from) == INSN
3568 && GET_CODE (PATTERN (from)) == UNSPEC_VOLATILE
3569 && XINT (PATTERN (from), 1) == UNSPECV_CONST_END)
3570 return from;
3572 if (GET_CODE (from) == BARRIER)
3575 found_barrier = from;
3577 /* If we are at the end of the function, or in front of an alignment
3578 instruction, we need not insert an extra alignment. We prefer
3579 this kind of barrier. */
3580 if (barrier_align (from) > 2)
3581 good_barrier = from;
3584 if (broken_move (from))
3586 rtx pat, src, dst;
3587 enum machine_mode mode;
3589 pat = PATTERN (from);
3590 if (GET_CODE (pat) == PARALLEL)
3591 pat = XVECEXP (pat, 0, 0);
3592 src = SET_SRC (pat);
3593 dst = SET_DEST (pat);
3594 mode = GET_MODE (dst);
3596 /* We must explicitly check the mode, because sometimes the
3597 front end will generate code to load unsigned constants into
3598 HImode targets without properly sign extending them. */
3599 if (mode == HImode
3600 || (mode == SImode && hi_const (src) && REGNO (dst) != FPUL_REG))
3602 found_hi += 2;
3603 /* We put the short constants before the long constants, so
3604 we must count the length of short constants in the range
3605 for the long constants. */
3606 /* ??? This isn't optimal, but is easy to do. */
3607 si_limit -= 2;
3609 else
3611 /* We dump DF/DI constants before SF/SI ones, because
3612 the limit is the same, but the alignment requirements
3613 are higher. We may waste up to 4 additional bytes
3614 for alignment, and the DF/DI constant may have
3615 another SF/SI constant placed before it. */
3616 if (TARGET_SHCOMPACT
3617 && ! found_di
3618 && (mode == DFmode || mode == DImode))
3620 found_di = 1;
3621 si_limit -= 8;
3623 while (si_align > 2 && found_si + si_align - 2 > count_si)
3624 si_align >>= 1;
3625 if (found_si > count_si)
3626 count_si = found_si;
3627 found_si += GET_MODE_SIZE (mode);
3628 if (num_mova)
3629 si_limit -= GET_MODE_SIZE (mode);
3633 if (mova_p (from))
3635 switch (untangle_mova (&num_mova, &mova, from))
3637 case 0: return find_barrier (0, 0, mova);
3638 case 2:
3640 leading_mova = 0;
3641 barrier_before_mova
3642 = good_barrier ? good_barrier : found_barrier;
3644 default: break;
3646 if (found_si > count_si)
3647 count_si = found_si;
3649 else if (GET_CODE (from) == JUMP_INSN
3650 && (GET_CODE (PATTERN (from)) == ADDR_VEC
3651 || GET_CODE (PATTERN (from)) == ADDR_DIFF_VEC))
3653 if ((num_mova > 1 && GET_MODE (prev_nonnote_insn (from)) == VOIDmode)
3654 || (num_mova
3655 && (prev_nonnote_insn (from)
3656 == XEXP (MOVA_LABELREF (mova), 0))))
3657 num_mova--;
3658 if (barrier_align (next_real_insn (from)) == align_jumps_log)
3660 /* We have just passed the barrier in front of the
3661 ADDR_DIFF_VEC, which is stored in found_barrier. Since
3662 the ADDR_DIFF_VEC is accessed as data, just like our pool
3663 constants, this is a good opportunity to accommodate what
3664 we have gathered so far.
3665 If we waited any longer, we could end up at a barrier in
3666 front of code, which gives worse cache usage for separated
3667 instruction / data caches. */
3668 good_barrier = found_barrier;
3669 break;
3671 else
3673 rtx body = PATTERN (from);
3674 inc = XVECLEN (body, 1) * GET_MODE_SIZE (GET_MODE (body));
3677 /* For the SH1, we generate alignments even after jumps-around-jumps. */
3678 else if (GET_CODE (from) == JUMP_INSN
3679 && ! TARGET_SH2
3680 && ! TARGET_SMALLCODE)
3681 new_align = 4;
3683 if (found_si)
3685 count_si += inc;
3686 if (new_align > si_align)
3688 si_limit -= (count_si - 1) & (new_align - si_align);
3689 si_align = new_align;
3691 count_si = (count_si + new_align - 1) & -new_align;
3693 if (found_hi)
3695 count_hi += inc;
3696 if (new_align > hi_align)
3698 hi_limit -= (count_hi - 1) & (new_align - hi_align);
3699 hi_align = new_align;
3701 count_hi = (count_hi + new_align - 1) & -new_align;
3703 from = NEXT_INSN (from);
3706 if (num_mova)
3708 if (leading_mova)
3710 /* Try as we might, the leading mova is out of range. Change
3711 it into a load (which will become a pcload) and retry. */
3712 fixup_mova (mova);
3713 return find_barrier (0, 0, mova);
3715 else
3717 /* Insert the constant pool table before the mova instruction,
3718 to prevent the mova label reference from going out of range. */
3719 from = mova;
3720 good_barrier = found_barrier = barrier_before_mova;
3724 if (found_barrier)
3726 if (good_barrier && next_real_insn (found_barrier))
3727 found_barrier = good_barrier;
3729 else
3731 /* We didn't find a barrier in time to dump our stuff,
3732 so we'll make one. */
3733 rtx label = gen_label_rtx ();
3735 /* If we exceeded the range, then we must back up over the last
3736 instruction we looked at. Otherwise, we just need to undo the
3737 NEXT_INSN at the end of the loop. */
3738 if (count_hi > hi_limit || count_si > si_limit)
3739 from = PREV_INSN (PREV_INSN (from));
3740 else
3741 from = PREV_INSN (from);
3743 /* Walk back to be just before any jump or label.
3744 Putting it before a label reduces the number of times the branch
3745 around the constant pool table will be hit. Putting it before
3746 a jump makes it more likely that the bra delay slot will be
3747 filled. */
3748 while (GET_CODE (from) == JUMP_INSN || GET_CODE (from) == NOTE
3749 || GET_CODE (from) == CODE_LABEL)
3750 from = PREV_INSN (from);
3752 from = emit_jump_insn_after (gen_jump (label), from);
3753 JUMP_LABEL (from) = label;
3754 LABEL_NUSES (label) = 1;
3755 found_barrier = emit_barrier_after (from);
3756 emit_label_after (label, found_barrier);
3759 return found_barrier;
3762 /* If the instruction INSN is implemented by a special function, and we can
3763 positively find the register that is used to call the sfunc, and this
3764 register is not used anywhere else in this instruction - except as the
3765 destination of a set, return this register; else, return 0. */
3767 sfunc_uses_reg (rtx insn)
3769 int i;
3770 rtx pattern, part, reg_part, reg;
3772 if (GET_CODE (insn) != INSN)
3773 return 0;
3774 pattern = PATTERN (insn);
3775 if (GET_CODE (pattern) != PARALLEL || get_attr_type (insn) != TYPE_SFUNC)
3776 return 0;
3778 for (reg_part = 0, i = XVECLEN (pattern, 0) - 1; i >= 1; i--)
3780 part = XVECEXP (pattern, 0, i);
3781 if (GET_CODE (part) == USE && GET_MODE (XEXP (part, 0)) == SImode)
3782 reg_part = part;
3784 if (! reg_part)
3785 return 0;
3786 reg = XEXP (reg_part, 0);
3787 for (i = XVECLEN (pattern, 0) - 1; i >= 0; i--)
3789 part = XVECEXP (pattern, 0, i);
3790 if (part == reg_part || GET_CODE (part) == CLOBBER)
3791 continue;
3792 if (reg_mentioned_p (reg, ((GET_CODE (part) == SET
3793 && GET_CODE (SET_DEST (part)) == REG)
3794 ? SET_SRC (part) : part)))
3795 return 0;
3797 return reg;
3800 /* See if the only way in which INSN uses REG is by calling it, or by
3801 setting it while calling it. Set *SET to a SET rtx if the register
3802 is set by INSN. */
3804 static int
3805 noncall_uses_reg (rtx reg, rtx insn, rtx *set)
3807 rtx pattern, reg2;
3809 *set = NULL_RTX;
3811 reg2 = sfunc_uses_reg (insn);
3812 if (reg2 && REGNO (reg2) == REGNO (reg))
3814 pattern = single_set (insn);
3815 if (pattern
3816 && GET_CODE (SET_DEST (pattern)) == REG
3817 && REGNO (reg) == REGNO (SET_DEST (pattern)))
3818 *set = pattern;
3819 return 0;
3821 if (GET_CODE (insn) != CALL_INSN)
3823 /* We don't use rtx_equal_p because we don't care if the mode is
3824 different. */
3825 pattern = single_set (insn);
3826 if (pattern
3827 && GET_CODE (SET_DEST (pattern)) == REG
3828 && REGNO (reg) == REGNO (SET_DEST (pattern)))
3830 rtx par, part;
3831 int i;
3833 *set = pattern;
3834 par = PATTERN (insn);
3835 if (GET_CODE (par) == PARALLEL)
3836 for (i = XVECLEN (par, 0) - 1; i >= 0; i--)
3838 part = XVECEXP (par, 0, i);
3839 if (GET_CODE (part) != SET && reg_mentioned_p (reg, part))
3840 return 1;
3842 return reg_mentioned_p (reg, SET_SRC (pattern));
3845 return 1;
3848 pattern = PATTERN (insn);
3850 if (GET_CODE (pattern) == PARALLEL)
3852 int i;
3854 for (i = XVECLEN (pattern, 0) - 1; i >= 1; i--)
3855 if (reg_mentioned_p (reg, XVECEXP (pattern, 0, i)))
3856 return 1;
3857 pattern = XVECEXP (pattern, 0, 0);
3860 if (GET_CODE (pattern) == SET)
3862 if (reg_mentioned_p (reg, SET_DEST (pattern)))
3864 /* We don't use rtx_equal_p, because we don't care if the
3865 mode is different. */
3866 if (GET_CODE (SET_DEST (pattern)) != REG
3867 || REGNO (reg) != REGNO (SET_DEST (pattern)))
3868 return 1;
3870 *set = pattern;
3873 pattern = SET_SRC (pattern);
3876 if (GET_CODE (pattern) != CALL
3877 || GET_CODE (XEXP (pattern, 0)) != MEM
3878 || ! rtx_equal_p (reg, XEXP (XEXP (pattern, 0), 0)))
3879 return 1;
3881 return 0;
3884 /* Given a X, a pattern of an insn or a part of it, return a mask of used
3885 general registers. Bits 0..15 mean that the respective registers
3886 are used as inputs in the instruction. Bits 16..31 mean that the
3887 registers 0..15, respectively, are used as outputs, or are clobbered.
3888 IS_DEST should be set to 16 if X is the destination of a SET, else to 0. */
3890 regs_used (rtx x, int is_dest)
3892 enum rtx_code code;
3893 const char *fmt;
3894 int i, used = 0;
3896 if (! x)
3897 return used;
3898 code = GET_CODE (x);
3899 switch (code)
3901 case REG:
3902 if (REGNO (x) < 16)
3903 return (((1 << HARD_REGNO_NREGS (0, GET_MODE (x))) - 1)
3904 << (REGNO (x) + is_dest));
3905 return 0;
3906 case SUBREG:
3908 rtx y = SUBREG_REG (x);
3910 if (GET_CODE (y) != REG)
3911 break;
3912 if (REGNO (y) < 16)
3913 return (((1 << HARD_REGNO_NREGS (0, GET_MODE (x))) - 1)
3914 << (REGNO (y) +
3915 subreg_regno_offset (REGNO (y),
3916 GET_MODE (y),
3917 SUBREG_BYTE (x),
3918 GET_MODE (x)) + is_dest));
3919 return 0;
3921 case SET:
3922 return regs_used (SET_SRC (x), 0) | regs_used (SET_DEST (x), 16);
3923 case RETURN:
3924 /* If there was a return value, it must have been indicated with USE. */
3925 return 0x00ffff00;
3926 case CLOBBER:
3927 is_dest = 1;
3928 break;
3929 case MEM:
3930 is_dest = 0;
3931 break;
3932 case CALL:
3933 used |= 0x00ff00f0;
3934 break;
3935 default:
3936 break;
3939 fmt = GET_RTX_FORMAT (code);
3941 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
3943 if (fmt[i] == 'E')
3945 register int j;
3946 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
3947 used |= regs_used (XVECEXP (x, i, j), is_dest);
3949 else if (fmt[i] == 'e')
3950 used |= regs_used (XEXP (x, i), is_dest);
3952 return used;
3955 /* Create an instruction that prevents redirection of a conditional branch
3956 to the destination of the JUMP with address ADDR.
3957 If the branch needs to be implemented as an indirect jump, try to find
3958 a scratch register for it.
3959 If NEED_BLOCK is 0, don't do anything unless we need a scratch register.
3960 If any preceding insn that doesn't fit into a delay slot is good enough,
3961 pass 1. Pass 2 if a definite blocking insn is needed.
3962 -1 is used internally to avoid deep recursion.
3963 If a blocking instruction is made or recognized, return it. */
3965 static rtx
3966 gen_block_redirect (rtx jump, int addr, int need_block)
3968 int dead = 0;
3969 rtx prev = prev_nonnote_insn (jump);
3970 rtx dest;
3972 /* First, check if we already have an instruction that satisfies our need. */
3973 if (prev && GET_CODE (prev) == INSN && ! INSN_DELETED_P (prev))
3975 if (INSN_CODE (prev) == CODE_FOR_indirect_jump_scratch)
3976 return prev;
3977 if (GET_CODE (PATTERN (prev)) == USE
3978 || GET_CODE (PATTERN (prev)) == CLOBBER
3979 || get_attr_in_delay_slot (prev) == IN_DELAY_SLOT_YES)
3980 prev = jump;
3981 else if ((need_block &= ~1) < 0)
3982 return prev;
3983 else if (recog_memoized (prev) == CODE_FOR_block_branch_redirect)
3984 need_block = 0;
3986 if (GET_CODE (PATTERN (jump)) == RETURN)
3988 if (! need_block)
3989 return prev;
3990 /* Reorg even does nasty things with return insns that cause branches
3991 to go out of range - see find_end_label and callers. */
3992 return emit_insn_before (gen_block_branch_redirect (const0_rtx) , jump);
3994 /* We can't use JUMP_LABEL here because it might be undefined
3995 when not optimizing. */
3996 dest = XEXP (SET_SRC (PATTERN (jump)), 0);
3997 /* If the branch is out of range, try to find a scratch register for it. */
3998 if (optimize
3999 && (INSN_ADDRESSES (INSN_UID (dest)) - addr + (unsigned) 4092
4000 > 4092 + 4098))
4002 rtx scan;
4003 /* Don't look for the stack pointer as a scratch register,
4004 it would cause trouble if an interrupt occurred. */
4005 unsigned try = 0x7fff, used;
4006 int jump_left = flag_expensive_optimizations + 1;
4008 /* It is likely that the most recent eligible instruction is wanted for
4009 the delay slot. Therefore, find out which registers it uses, and
4010 try to avoid using them. */
4012 for (scan = jump; (scan = PREV_INSN (scan)); )
4014 enum rtx_code code;
4016 if (INSN_DELETED_P (scan))
4017 continue;
4018 code = GET_CODE (scan);
4019 if (code == CODE_LABEL || code == JUMP_INSN)
4020 break;
4021 if (code == INSN
4022 && GET_CODE (PATTERN (scan)) != USE
4023 && GET_CODE (PATTERN (scan)) != CLOBBER
4024 && get_attr_in_delay_slot (scan) == IN_DELAY_SLOT_YES)
4026 try &= ~regs_used (PATTERN (scan), 0);
4027 break;
4030 for (used = dead = 0, scan = JUMP_LABEL (jump);
4031 (scan = NEXT_INSN (scan)); )
4033 enum rtx_code code;
4035 if (INSN_DELETED_P (scan))
4036 continue;
4037 code = GET_CODE (scan);
4038 if (INSN_P (scan))
4040 used |= regs_used (PATTERN (scan), 0);
4041 if (code == CALL_INSN)
4042 used |= regs_used (CALL_INSN_FUNCTION_USAGE (scan), 0);
4043 dead |= (used >> 16) & ~used;
4044 if (dead & try)
4046 dead &= try;
4047 break;
4049 if (code == JUMP_INSN)
4051 if (jump_left-- && simplejump_p (scan))
4052 scan = JUMP_LABEL (scan);
4053 else
4054 break;
4058 /* Mask out the stack pointer again, in case it was
4059 the only 'free' register we have found. */
4060 dead &= 0x7fff;
4062 /* If the immediate destination is still in range, check for possible
4063 threading with a jump beyond the delay slot insn.
4064 Don't check if we are called recursively; the jump has been or will be
4065 checked in a different invocation then. */
4067 else if (optimize && need_block >= 0)
4069 rtx next = next_active_insn (next_active_insn (dest));
4070 if (next && GET_CODE (next) == JUMP_INSN
4071 && GET_CODE (PATTERN (next)) == SET
4072 && recog_memoized (next) == CODE_FOR_jump_compact)
4074 dest = JUMP_LABEL (next);
4075 if (dest
4076 && (INSN_ADDRESSES (INSN_UID (dest)) - addr + (unsigned) 4092
4077 > 4092 + 4098))
4078 gen_block_redirect (next, INSN_ADDRESSES (INSN_UID (next)), -1);
4082 if (dead)
4084 rtx reg = gen_rtx_REG (SImode, exact_log2 (dead & -dead));
4086 /* It would be nice if we could convert the jump into an indirect
4087 jump / far branch right now, and thus exposing all constituent
4088 instructions to further optimization. However, reorg uses
4089 simplejump_p to determine if there is an unconditional jump where
4090 it should try to schedule instructions from the target of the
4091 branch; simplejump_p fails for indirect jumps even if they have
4092 a JUMP_LABEL. */
4093 rtx insn = emit_insn_before (gen_indirect_jump_scratch
4094 (reg, GEN_INT (INSN_UID (JUMP_LABEL (jump))))
4095 , jump);
4096 /* ??? We would like this to have the scope of the jump, but that
4097 scope will change when a delay slot insn of an inner scope is added.
4098 Hence, after delay slot scheduling, we'll have to expect
4099 NOTE_INSN_BLOCK_END notes between the indirect_jump_scratch and
4100 the jump. */
4102 INSN_LOCATOR (insn) = INSN_LOCATOR (jump);
4103 INSN_CODE (insn) = CODE_FOR_indirect_jump_scratch;
4104 return insn;
4106 else if (need_block)
4107 /* We can't use JUMP_LABEL here because it might be undefined
4108 when not optimizing. */
4109 return emit_insn_before (gen_block_branch_redirect
4110 (GEN_INT (INSN_UID (XEXP (SET_SRC (PATTERN (jump)), 0))))
4111 , jump);
4112 return prev;
4115 #define CONDJUMP_MIN -252
4116 #define CONDJUMP_MAX 262
4117 struct far_branch
4119 /* A label (to be placed) in front of the jump
4120 that jumps to our ultimate destination. */
4121 rtx near_label;
4122 /* Where we are going to insert it if we cannot move the jump any farther,
4123 or the jump itself if we have picked up an existing jump. */
4124 rtx insert_place;
4125 /* The ultimate destination. */
4126 rtx far_label;
4127 struct far_branch *prev;
4128 /* If the branch has already been created, its address;
4129 else the address of its first prospective user. */
4130 int address;
4133 static void gen_far_branch (struct far_branch *);
4134 enum mdep_reorg_phase_e mdep_reorg_phase;
4135 static void
4136 gen_far_branch (struct far_branch *bp)
4138 rtx insn = bp->insert_place;
4139 rtx jump;
4140 rtx label = gen_label_rtx ();
4141 int ok;
4143 emit_label_after (label, insn);
4144 if (bp->far_label)
4146 jump = emit_jump_insn_after (gen_jump (bp->far_label), insn);
4147 LABEL_NUSES (bp->far_label)++;
4149 else
4150 jump = emit_jump_insn_after (gen_return (), insn);
4151 /* Emit a barrier so that reorg knows that any following instructions
4152 are not reachable via a fall-through path.
4153 But don't do this when not optimizing, since we wouldn't suppress the
4154 alignment for the barrier then, and could end up with out-of-range
4155 pc-relative loads. */
4156 if (optimize)
4157 emit_barrier_after (jump);
4158 emit_label_after (bp->near_label, insn);
4159 JUMP_LABEL (jump) = bp->far_label;
4160 ok = invert_jump (insn, label, 1);
4161 gcc_assert (ok);
4163 /* If we are branching around a jump (rather than a return), prevent
4164 reorg from using an insn from the jump target as the delay slot insn -
4165 when reorg did this, it pessimized code (we rather hide the delay slot)
4166 and it could cause branches to go out of range. */
4167 if (bp->far_label)
4168 (emit_insn_after
4169 (gen_stuff_delay_slot
4170 (GEN_INT (INSN_UID (XEXP (SET_SRC (PATTERN (jump)), 0))),
4171 GEN_INT (recog_memoized (insn) == CODE_FOR_branch_false)),
4172 insn));
4173 /* Prevent reorg from undoing our splits. */
4174 gen_block_redirect (jump, bp->address += 2, 2);
4177 /* Fix up ADDR_DIFF_VECs. */
4178 void
4179 fixup_addr_diff_vecs (rtx first)
4181 rtx insn;
4183 for (insn = first; insn; insn = NEXT_INSN (insn))
4185 rtx vec_lab, pat, prev, prevpat, x, braf_label;
4187 if (GET_CODE (insn) != JUMP_INSN
4188 || GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
4189 continue;
4190 pat = PATTERN (insn);
4191 vec_lab = XEXP (XEXP (pat, 0), 0);
4193 /* Search the matching casesi_jump_2. */
4194 for (prev = vec_lab; ; prev = PREV_INSN (prev))
4196 if (GET_CODE (prev) != JUMP_INSN)
4197 continue;
4198 prevpat = PATTERN (prev);
4199 if (GET_CODE (prevpat) != PARALLEL || XVECLEN (prevpat, 0) != 2)
4200 continue;
4201 x = XVECEXP (prevpat, 0, 1);
4202 if (GET_CODE (x) != USE)
4203 continue;
4204 x = XEXP (x, 0);
4205 if (GET_CODE (x) == LABEL_REF && XEXP (x, 0) == vec_lab)
4206 break;
4208 /* FIXME: This is a bug in the optimizer, but it seems harmless
4209 to just avoid panicing. */
4210 if (!prev)
4211 continue;
4213 /* Emit the reference label of the braf where it belongs, right after
4214 the casesi_jump_2 (i.e. braf). */
4215 braf_label = XEXP (XEXP (SET_SRC (XVECEXP (prevpat, 0, 0)), 1), 0);
4216 emit_label_after (braf_label, prev);
4218 /* Fix up the ADDR_DIF_VEC to be relative
4219 to the reference address of the braf. */
4220 XEXP (XEXP (pat, 0), 0) = braf_label;
4224 /* BARRIER_OR_LABEL is either a BARRIER or a CODE_LABEL immediately following
4225 a barrier. Return the base 2 logarithm of the desired alignment. */
4227 barrier_align (rtx barrier_or_label)
4229 rtx next = next_real_insn (barrier_or_label), pat, prev;
4230 int slot, credit, jump_to_next = 0;
4232 if (! next)
4233 return 0;
4235 pat = PATTERN (next);
4237 if (GET_CODE (pat) == ADDR_DIFF_VEC)
4238 return 2;
4240 if (GET_CODE (pat) == UNSPEC_VOLATILE && XINT (pat, 1) == UNSPECV_ALIGN)
4241 /* This is a barrier in front of a constant table. */
4242 return 0;
4244 prev = prev_real_insn (barrier_or_label);
4245 if (GET_CODE (PATTERN (prev)) == ADDR_DIFF_VEC)
4247 pat = PATTERN (prev);
4248 /* If this is a very small table, we want to keep the alignment after
4249 the table to the minimum for proper code alignment. */
4250 return ((TARGET_SMALLCODE
4251 || ((unsigned) XVECLEN (pat, 1) * GET_MODE_SIZE (GET_MODE (pat))
4252 <= (unsigned) 1 << (CACHE_LOG - 2)))
4253 ? 1 << TARGET_SHMEDIA : align_jumps_log);
4256 if (TARGET_SMALLCODE)
4257 return 0;
4259 if (! TARGET_SH2 || ! optimize)
4260 return align_jumps_log;
4262 /* When fixing up pcloads, a constant table might be inserted just before
4263 the basic block that ends with the barrier. Thus, we can't trust the
4264 instruction lengths before that. */
4265 if (mdep_reorg_phase > SH_FIXUP_PCLOAD)
4267 /* Check if there is an immediately preceding branch to the insn beyond
4268 the barrier. We must weight the cost of discarding useful information
4269 from the current cache line when executing this branch and there is
4270 an alignment, against that of fetching unneeded insn in front of the
4271 branch target when there is no alignment. */
4273 /* There are two delay_slot cases to consider. One is the simple case
4274 where the preceding branch is to the insn beyond the barrier (simple
4275 delay slot filling), and the other is where the preceding branch has
4276 a delay slot that is a duplicate of the insn after the barrier
4277 (fill_eager_delay_slots) and the branch is to the insn after the insn
4278 after the barrier. */
4280 /* PREV is presumed to be the JUMP_INSN for the barrier under
4281 investigation. Skip to the insn before it. */
4282 prev = prev_real_insn (prev);
4284 for (slot = 2, credit = (1 << (CACHE_LOG - 2)) + 2;
4285 credit >= 0 && prev && GET_CODE (prev) == INSN;
4286 prev = prev_real_insn (prev))
4288 jump_to_next = 0;
4289 if (GET_CODE (PATTERN (prev)) == USE
4290 || GET_CODE (PATTERN (prev)) == CLOBBER)
4291 continue;
4292 if (GET_CODE (PATTERN (prev)) == SEQUENCE)
4294 prev = XVECEXP (PATTERN (prev), 0, 1);
4295 if (INSN_UID (prev) == INSN_UID (next))
4297 /* Delay slot was filled with insn at jump target. */
4298 jump_to_next = 1;
4299 continue;
4303 if (slot &&
4304 get_attr_in_delay_slot (prev) == IN_DELAY_SLOT_YES)
4305 slot = 0;
4306 credit -= get_attr_length (prev);
4308 if (prev
4309 && GET_CODE (prev) == JUMP_INSN
4310 && JUMP_LABEL (prev))
4312 rtx x;
4313 if (jump_to_next
4314 || next_real_insn (JUMP_LABEL (prev)) == next
4315 /* If relax_delay_slots() decides NEXT was redundant
4316 with some previous instruction, it will have
4317 redirected PREV's jump to the following insn. */
4318 || JUMP_LABEL (prev) == next_nonnote_insn (next)
4319 /* There is no upper bound on redundant instructions
4320 that might have been skipped, but we must not put an
4321 alignment where none had been before. */
4322 || (x = (NEXT_INSN (NEXT_INSN (PREV_INSN (prev)))),
4323 (INSN_P (x)
4324 && (INSN_CODE (x) == CODE_FOR_block_branch_redirect
4325 || INSN_CODE (x) == CODE_FOR_indirect_jump_scratch
4326 || INSN_CODE (x) == CODE_FOR_stuff_delay_slot))))
4328 rtx pat = PATTERN (prev);
4329 if (GET_CODE (pat) == PARALLEL)
4330 pat = XVECEXP (pat, 0, 0);
4331 if (credit - slot >= (GET_CODE (SET_SRC (pat)) == PC ? 2 : 0))
4332 return 0;
4337 return align_jumps_log;
4340 /* If we are inside a phony loop, almost any kind of label can turn up as the
4341 first one in the loop. Aligning a braf label causes incorrect switch
4342 destination addresses; we can detect braf labels because they are
4343 followed by a BARRIER.
4344 Applying loop alignment to small constant or switch tables is a waste
4345 of space, so we suppress this too. */
4347 sh_loop_align (rtx label)
4349 rtx next = label;
4352 next = next_nonnote_insn (next);
4353 while (next && GET_CODE (next) == CODE_LABEL);
4355 if (! next
4356 || ! INSN_P (next)
4357 || GET_CODE (PATTERN (next)) == ADDR_DIFF_VEC
4358 || recog_memoized (next) == CODE_FOR_consttable_2)
4359 return 0;
4361 return align_loops_log;
4364 /* Do a final pass over the function, just before delayed branch
4365 scheduling. */
4367 static void
4368 sh_reorg (void)
4370 rtx first, insn, mova = NULL_RTX;
4371 int num_mova;
4372 rtx r0_rtx = gen_rtx_REG (Pmode, 0);
4373 rtx r0_inc_rtx = gen_rtx_POST_INC (Pmode, r0_rtx);
4375 first = get_insns ();
4376 max_labelno_before_reorg = max_label_num ();
4378 /* We must split call insns before introducing `mova's. If we're
4379 optimizing, they'll have already been split. Otherwise, make
4380 sure we don't split them too late. */
4381 if (! optimize)
4382 split_all_insns_noflow ();
4384 if (TARGET_SHMEDIA)
4385 return;
4387 /* If relaxing, generate pseudo-ops to associate function calls with
4388 the symbols they call. It does no harm to not generate these
4389 pseudo-ops. However, when we can generate them, it enables to
4390 linker to potentially relax the jsr to a bsr, and eliminate the
4391 register load and, possibly, the constant pool entry. */
4393 mdep_reorg_phase = SH_INSERT_USES_LABELS;
4394 if (TARGET_RELAX)
4396 /* Remove all REG_LABEL notes. We want to use them for our own
4397 purposes. This works because none of the remaining passes
4398 need to look at them.
4400 ??? But it may break in the future. We should use a machine
4401 dependent REG_NOTE, or some other approach entirely. */
4402 for (insn = first; insn; insn = NEXT_INSN (insn))
4404 if (INSN_P (insn))
4406 rtx note;
4408 while ((note = find_reg_note (insn, REG_LABEL, NULL_RTX)) != 0)
4409 remove_note (insn, note);
4413 for (insn = first; insn; insn = NEXT_INSN (insn))
4415 rtx pattern, reg, link, set, scan, dies, label;
4416 int rescan = 0, foundinsn = 0;
4418 if (GET_CODE (insn) == CALL_INSN)
4420 pattern = PATTERN (insn);
4422 if (GET_CODE (pattern) == PARALLEL)
4423 pattern = XVECEXP (pattern, 0, 0);
4424 if (GET_CODE (pattern) == SET)
4425 pattern = SET_SRC (pattern);
4427 if (GET_CODE (pattern) != CALL
4428 || GET_CODE (XEXP (pattern, 0)) != MEM)
4429 continue;
4431 reg = XEXP (XEXP (pattern, 0), 0);
4433 else
4435 reg = sfunc_uses_reg (insn);
4436 if (! reg)
4437 continue;
4440 if (GET_CODE (reg) != REG)
4441 continue;
4443 /* This is a function call via REG. If the only uses of REG
4444 between the time that it is set and the time that it dies
4445 are in function calls, then we can associate all the
4446 function calls with the setting of REG. */
4448 for (link = LOG_LINKS (insn); link; link = XEXP (link, 1))
4450 if (REG_NOTE_KIND (link) != 0)
4451 continue;
4452 set = single_set (XEXP (link, 0));
4453 if (set && rtx_equal_p (reg, SET_DEST (set)))
4455 link = XEXP (link, 0);
4456 break;
4460 if (! link)
4462 /* ??? Sometimes global register allocation will have
4463 deleted the insn pointed to by LOG_LINKS. Try
4464 scanning backward to find where the register is set. */
4465 for (scan = PREV_INSN (insn);
4466 scan && GET_CODE (scan) != CODE_LABEL;
4467 scan = PREV_INSN (scan))
4469 if (! INSN_P (scan))
4470 continue;
4472 if (! reg_mentioned_p (reg, scan))
4473 continue;
4475 if (noncall_uses_reg (reg, scan, &set))
4476 break;
4478 if (set)
4480 link = scan;
4481 break;
4486 if (! link)
4487 continue;
4489 /* The register is set at LINK. */
4491 /* We can only optimize the function call if the register is
4492 being set to a symbol. In theory, we could sometimes
4493 optimize calls to a constant location, but the assembler
4494 and linker do not support that at present. */
4495 if (GET_CODE (SET_SRC (set)) != SYMBOL_REF
4496 && GET_CODE (SET_SRC (set)) != LABEL_REF)
4497 continue;
4499 /* Scan forward from LINK to the place where REG dies, and
4500 make sure that the only insns which use REG are
4501 themselves function calls. */
4503 /* ??? This doesn't work for call targets that were allocated
4504 by reload, since there may not be a REG_DEAD note for the
4505 register. */
4507 dies = NULL_RTX;
4508 for (scan = NEXT_INSN (link); scan; scan = NEXT_INSN (scan))
4510 rtx scanset;
4512 /* Don't try to trace forward past a CODE_LABEL if we haven't
4513 seen INSN yet. Ordinarily, we will only find the setting insn
4514 in LOG_LINKS if it is in the same basic block. However,
4515 cross-jumping can insert code labels in between the load and
4516 the call, and can result in situations where a single call
4517 insn may have two targets depending on where we came from. */
4519 if (GET_CODE (scan) == CODE_LABEL && ! foundinsn)
4520 break;
4522 if (! INSN_P (scan))
4523 continue;
4525 /* Don't try to trace forward past a JUMP. To optimize
4526 safely, we would have to check that all the
4527 instructions at the jump destination did not use REG. */
4529 if (GET_CODE (scan) == JUMP_INSN)
4530 break;
4532 if (! reg_mentioned_p (reg, scan))
4533 continue;
4535 if (noncall_uses_reg (reg, scan, &scanset))
4536 break;
4538 if (scan == insn)
4539 foundinsn = 1;
4541 if (scan != insn
4542 && (GET_CODE (scan) == CALL_INSN || sfunc_uses_reg (scan)))
4544 /* There is a function call to this register other
4545 than the one we are checking. If we optimize
4546 this call, we need to rescan again below. */
4547 rescan = 1;
4550 /* ??? We shouldn't have to worry about SCANSET here.
4551 We should just be able to check for a REG_DEAD note
4552 on a function call. However, the REG_DEAD notes are
4553 apparently not dependable around libcalls; c-torture
4554 execute/920501-2 is a test case. If SCANSET is set,
4555 then this insn sets the register, so it must have
4556 died earlier. Unfortunately, this will only handle
4557 the cases in which the register is, in fact, set in a
4558 later insn. */
4560 /* ??? We shouldn't have to use FOUNDINSN here.
4561 However, the LOG_LINKS fields are apparently not
4562 entirely reliable around libcalls;
4563 newlib/libm/math/e_pow.c is a test case. Sometimes
4564 an insn will appear in LOG_LINKS even though it is
4565 not the most recent insn which sets the register. */
4567 if (foundinsn
4568 && (scanset
4569 || find_reg_note (scan, REG_DEAD, reg)))
4571 dies = scan;
4572 break;
4576 if (! dies)
4578 /* Either there was a branch, or some insn used REG
4579 other than as a function call address. */
4580 continue;
4583 /* Create a code label, and put it in a REG_LABEL note on
4584 the insn which sets the register, and on each call insn
4585 which uses the register. In final_prescan_insn we look
4586 for the REG_LABEL notes, and output the appropriate label
4587 or pseudo-op. */
4589 label = gen_label_rtx ();
4590 REG_NOTES (link) = gen_rtx_INSN_LIST (REG_LABEL, label,
4591 REG_NOTES (link));
4592 REG_NOTES (insn) = gen_rtx_INSN_LIST (REG_LABEL, label,
4593 REG_NOTES (insn));
4594 if (rescan)
4596 scan = link;
4599 rtx reg2;
4601 scan = NEXT_INSN (scan);
4602 if (scan != insn
4603 && ((GET_CODE (scan) == CALL_INSN
4604 && reg_mentioned_p (reg, scan))
4605 || ((reg2 = sfunc_uses_reg (scan))
4606 && REGNO (reg2) == REGNO (reg))))
4607 REG_NOTES (scan)
4608 = gen_rtx_INSN_LIST (REG_LABEL, label, REG_NOTES (scan));
4610 while (scan != dies);
4615 if (TARGET_SH2)
4616 fixup_addr_diff_vecs (first);
4618 if (optimize)
4620 mdep_reorg_phase = SH_SHORTEN_BRANCHES0;
4621 shorten_branches (first);
4624 /* Scan the function looking for move instructions which have to be
4625 changed to pc-relative loads and insert the literal tables. */
4626 label_ref_list_pool = create_alloc_pool ("label references list",
4627 sizeof (struct label_ref_list_d),
4628 30);
4629 mdep_reorg_phase = SH_FIXUP_PCLOAD;
4630 for (insn = first, num_mova = 0; insn; insn = NEXT_INSN (insn))
4632 if (mova_p (insn))
4634 /* ??? basic block reordering can move a switch table dispatch
4635 below the switch table. Check if that has happened.
4636 We only have the addresses available when optimizing; but then,
4637 this check shouldn't be needed when not optimizing. */
4638 if (!untangle_mova (&num_mova, &mova, insn))
4640 insn = mova;
4641 num_mova = 0;
4644 else if (GET_CODE (insn) == JUMP_INSN
4645 && GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC
4646 && num_mova
4647 /* ??? loop invariant motion can also move a mova out of a
4648 loop. Since loop does this code motion anyway, maybe we
4649 should wrap UNSPEC_MOVA into a CONST, so that reload can
4650 move it back. */
4651 && ((num_mova > 1
4652 && GET_MODE (prev_nonnote_insn (insn)) == VOIDmode)
4653 || (prev_nonnote_insn (insn)
4654 == XEXP (MOVA_LABELREF (mova), 0))))
4656 rtx scan;
4657 int total;
4659 num_mova--;
4661 /* Some code might have been inserted between the mova and
4662 its ADDR_DIFF_VEC. Check if the mova is still in range. */
4663 for (scan = mova, total = 0; scan != insn; scan = NEXT_INSN (scan))
4664 total += get_attr_length (scan);
4666 /* range of mova is 1020, add 4 because pc counts from address of
4667 second instruction after this one, subtract 2 in case pc is 2
4668 byte aligned. Possible alignment needed for the ADDR_DIFF_VEC
4669 cancels out with alignment effects of the mova itself. */
4670 if (total > 1022)
4672 /* Change the mova into a load, and restart scanning
4673 there. broken_move will then return true for mova. */
4674 fixup_mova (mova);
4675 insn = mova;
4678 if (broken_move (insn)
4679 || (GET_CODE (insn) == INSN
4680 && recog_memoized (insn) == CODE_FOR_casesi_worker_2))
4682 rtx scan;
4683 /* Scan ahead looking for a barrier to stick the constant table
4684 behind. */
4685 rtx barrier = find_barrier (num_mova, mova, insn);
4686 rtx last_float_move = NULL_RTX, last_float = 0, *last_float_addr = NULL;
4687 int need_aligned_label = 0;
4689 if (num_mova && ! mova_p (mova))
4691 /* find_barrier had to change the first mova into a
4692 pcload; thus, we have to start with this new pcload. */
4693 insn = mova;
4694 num_mova = 0;
4696 /* Now find all the moves between the points and modify them. */
4697 for (scan = insn; scan != barrier; scan = NEXT_INSN (scan))
4699 if (GET_CODE (scan) == CODE_LABEL)
4700 last_float = 0;
4701 if (GET_CODE (scan) == INSN
4702 && recog_memoized (scan) == CODE_FOR_casesi_worker_2)
4703 need_aligned_label = 1;
4704 if (broken_move (scan))
4706 rtx *patp = &PATTERN (scan), pat = *patp;
4707 rtx src, dst;
4708 rtx lab;
4709 rtx newsrc;
4710 enum machine_mode mode;
4712 if (GET_CODE (pat) == PARALLEL)
4713 patp = &XVECEXP (pat, 0, 0), pat = *patp;
4714 src = SET_SRC (pat);
4715 dst = SET_DEST (pat);
4716 mode = GET_MODE (dst);
4718 if (mode == SImode && hi_const (src)
4719 && REGNO (dst) != FPUL_REG)
4721 int offset = 0;
4723 mode = HImode;
4724 while (GET_CODE (dst) == SUBREG)
4726 offset += subreg_regno_offset (REGNO (SUBREG_REG (dst)),
4727 GET_MODE (SUBREG_REG (dst)),
4728 SUBREG_BYTE (dst),
4729 GET_MODE (dst));
4730 dst = SUBREG_REG (dst);
4732 dst = gen_rtx_REG (HImode, REGNO (dst) + offset);
4734 if (GET_CODE (dst) == REG && FP_ANY_REGISTER_P (REGNO (dst)))
4736 /* This must be an insn that clobbers r0. */
4737 rtx *clobberp = &XVECEXP (PATTERN (scan), 0,
4738 XVECLEN (PATTERN (scan), 0)
4739 - 1);
4740 rtx clobber = *clobberp;
4742 gcc_assert (GET_CODE (clobber) == CLOBBER
4743 && rtx_equal_p (XEXP (clobber, 0), r0_rtx));
4745 if (last_float
4746 && reg_set_between_p (r0_rtx, last_float_move, scan))
4747 last_float = 0;
4748 if (last_float
4749 && TARGET_SHCOMPACT
4750 && GET_MODE_SIZE (mode) != 4
4751 && GET_MODE_SIZE (GET_MODE (last_float)) == 4)
4752 last_float = 0;
4753 lab = add_constant (src, mode, last_float);
4754 if (lab)
4755 emit_insn_before (gen_mova (lab), scan);
4756 else
4758 /* There will be a REG_UNUSED note for r0 on
4759 LAST_FLOAT_MOVE; we have to change it to REG_INC,
4760 lest reorg:mark_target_live_regs will not
4761 consider r0 to be used, and we end up with delay
4762 slot insn in front of SCAN that clobbers r0. */
4763 rtx note
4764 = find_regno_note (last_float_move, REG_UNUSED, 0);
4766 /* If we are not optimizing, then there may not be
4767 a note. */
4768 if (note)
4769 PUT_MODE (note, REG_INC);
4771 *last_float_addr = r0_inc_rtx;
4773 last_float_move = scan;
4774 last_float = src;
4775 newsrc = gen_const_mem (mode,
4776 (((TARGET_SH4 && ! TARGET_FMOVD)
4777 || REGNO (dst) == FPUL_REG)
4778 ? r0_inc_rtx
4779 : r0_rtx));
4780 last_float_addr = &XEXP (newsrc, 0);
4782 /* Remove the clobber of r0. */
4783 *clobberp = gen_rtx_CLOBBER (GET_MODE (clobber),
4784 gen_rtx_SCRATCH (Pmode));
4786 /* This is a mova needing a label. Create it. */
4787 else if (GET_CODE (src) == UNSPEC
4788 && XINT (src, 1) == UNSPEC_MOVA
4789 && GET_CODE (XVECEXP (src, 0, 0)) == CONST)
4791 lab = add_constant (XVECEXP (src, 0, 0), mode, 0);
4792 newsrc = gen_rtx_LABEL_REF (VOIDmode, lab);
4793 newsrc = gen_rtx_UNSPEC (SImode,
4794 gen_rtvec (1, newsrc),
4795 UNSPEC_MOVA);
4797 else
4799 lab = add_constant (src, mode, 0);
4800 newsrc = gen_rtx_LABEL_REF (VOIDmode, lab);
4801 newsrc = gen_const_mem (mode, newsrc);
4803 *patp = gen_rtx_SET (VOIDmode, dst, newsrc);
4804 INSN_CODE (scan) = -1;
4807 dump_table (need_aligned_label ? insn : 0, barrier);
4808 insn = barrier;
4811 free_alloc_pool (label_ref_list_pool);
4812 for (insn = first; insn; insn = NEXT_INSN (insn))
4813 PUT_MODE (insn, VOIDmode);
4815 mdep_reorg_phase = SH_SHORTEN_BRANCHES1;
4816 INSN_ADDRESSES_FREE ();
4817 split_branches (first);
4819 /* The INSN_REFERENCES_ARE_DELAYED in sh.h is problematic because it
4820 also has an effect on the register that holds the address of the sfunc.
4821 Insert an extra dummy insn in front of each sfunc that pretends to
4822 use this register. */
4823 if (flag_delayed_branch)
4825 for (insn = first; insn; insn = NEXT_INSN (insn))
4827 rtx reg = sfunc_uses_reg (insn);
4829 if (! reg)
4830 continue;
4831 emit_insn_before (gen_use_sfunc_addr (reg), insn);
4834 #if 0
4835 /* fpscr is not actually a user variable, but we pretend it is for the
4836 sake of the previous optimization passes, since we want it handled like
4837 one. However, we don't have any debugging information for it, so turn
4838 it into a non-user variable now. */
4839 if (TARGET_SH4)
4840 REG_USERVAR_P (get_fpscr_rtx ()) = 0;
4841 #endif
4842 mdep_reorg_phase = SH_AFTER_MDEP_REORG;
4846 get_dest_uid (rtx label, int max_uid)
4848 rtx dest = next_real_insn (label);
4849 int dest_uid;
4850 if (! dest)
4851 /* This can happen for an undefined label. */
4852 return 0;
4853 dest_uid = INSN_UID (dest);
4854 /* If this is a newly created branch redirection blocking instruction,
4855 we cannot index the branch_uid or insn_addresses arrays with its
4856 uid. But then, we won't need to, because the actual destination is
4857 the following branch. */
4858 while (dest_uid >= max_uid)
4860 dest = NEXT_INSN (dest);
4861 dest_uid = INSN_UID (dest);
4863 if (GET_CODE (dest) == JUMP_INSN && GET_CODE (PATTERN (dest)) == RETURN)
4864 return 0;
4865 return dest_uid;
4868 /* Split condbranches that are out of range. Also add clobbers for
4869 scratch registers that are needed in far jumps.
4870 We do this before delay slot scheduling, so that it can take our
4871 newly created instructions into account. It also allows us to
4872 find branches with common targets more easily. */
4874 static void
4875 split_branches (rtx first)
4877 rtx insn;
4878 struct far_branch **uid_branch, *far_branch_list = 0;
4879 int max_uid = get_max_uid ();
4880 int ok;
4882 /* Find out which branches are out of range. */
4883 shorten_branches (first);
4885 uid_branch = (struct far_branch **) alloca (max_uid * sizeof *uid_branch);
4886 memset ((char *) uid_branch, 0, max_uid * sizeof *uid_branch);
4888 for (insn = first; insn; insn = NEXT_INSN (insn))
4889 if (! INSN_P (insn))
4890 continue;
4891 else if (INSN_DELETED_P (insn))
4893 /* Shorten_branches would split this instruction again,
4894 so transform it into a note. */
4895 PUT_CODE (insn, NOTE);
4896 NOTE_LINE_NUMBER (insn) = NOTE_INSN_DELETED;
4897 NOTE_SOURCE_FILE (insn) = 0;
4899 else if (GET_CODE (insn) == JUMP_INSN
4900 /* Don't mess with ADDR_DIFF_VEC */
4901 && (GET_CODE (PATTERN (insn)) == SET
4902 || GET_CODE (PATTERN (insn)) == RETURN))
4904 enum attr_type type = get_attr_type (insn);
4905 if (type == TYPE_CBRANCH)
4907 rtx next, beyond;
4909 if (get_attr_length (insn) > 4)
4911 rtx src = SET_SRC (PATTERN (insn));
4912 rtx olabel = XEXP (XEXP (src, 1), 0);
4913 int addr = INSN_ADDRESSES (INSN_UID (insn));
4914 rtx label = 0;
4915 int dest_uid = get_dest_uid (olabel, max_uid);
4916 struct far_branch *bp = uid_branch[dest_uid];
4918 /* redirect_jump needs a valid JUMP_LABEL, and it might delete
4919 the label if the LABEL_NUSES count drops to zero. There is
4920 always a jump_optimize pass that sets these values, but it
4921 proceeds to delete unreferenced code, and then if not
4922 optimizing, to un-delete the deleted instructions, thus
4923 leaving labels with too low uses counts. */
4924 if (! optimize)
4926 JUMP_LABEL (insn) = olabel;
4927 LABEL_NUSES (olabel)++;
4929 if (! bp)
4931 bp = (struct far_branch *) alloca (sizeof *bp);
4932 uid_branch[dest_uid] = bp;
4933 bp->prev = far_branch_list;
4934 far_branch_list = bp;
4935 bp->far_label
4936 = XEXP (XEXP (SET_SRC (PATTERN (insn)), 1), 0);
4937 LABEL_NUSES (bp->far_label)++;
4939 else
4941 label = bp->near_label;
4942 if (! label && bp->address - addr >= CONDJUMP_MIN)
4944 rtx block = bp->insert_place;
4946 if (GET_CODE (PATTERN (block)) == RETURN)
4947 block = PREV_INSN (block);
4948 else
4949 block = gen_block_redirect (block,
4950 bp->address, 2);
4951 label = emit_label_after (gen_label_rtx (),
4952 PREV_INSN (block));
4953 bp->near_label = label;
4955 else if (label && ! NEXT_INSN (label))
4957 if (addr + 2 - bp->address <= CONDJUMP_MAX)
4958 bp->insert_place = insn;
4959 else
4960 gen_far_branch (bp);
4963 if (! label
4964 || (NEXT_INSN (label) && bp->address - addr < CONDJUMP_MIN))
4966 bp->near_label = label = gen_label_rtx ();
4967 bp->insert_place = insn;
4968 bp->address = addr;
4970 ok = redirect_jump (insn, label, 1);
4971 gcc_assert (ok);
4973 else
4975 /* get_attr_length (insn) == 2 */
4976 /* Check if we have a pattern where reorg wants to redirect
4977 the branch to a label from an unconditional branch that
4978 is too far away. */
4979 /* We can't use JUMP_LABEL here because it might be undefined
4980 when not optimizing. */
4981 /* A syntax error might cause beyond to be NULL_RTX. */
4982 beyond
4983 = next_active_insn (XEXP (XEXP (SET_SRC (PATTERN (insn)), 1),
4984 0));
4986 if (beyond
4987 && (GET_CODE (beyond) == JUMP_INSN
4988 || ((beyond = next_active_insn (beyond))
4989 && GET_CODE (beyond) == JUMP_INSN))
4990 && GET_CODE (PATTERN (beyond)) == SET
4991 && recog_memoized (beyond) == CODE_FOR_jump_compact
4992 && ((INSN_ADDRESSES
4993 (INSN_UID (XEXP (SET_SRC (PATTERN (beyond)), 0)))
4994 - INSN_ADDRESSES (INSN_UID (insn)) + (unsigned) 252)
4995 > 252 + 258 + 2))
4996 gen_block_redirect (beyond,
4997 INSN_ADDRESSES (INSN_UID (beyond)), 1);
5000 next = next_active_insn (insn);
5002 if ((GET_CODE (next) == JUMP_INSN
5003 || ((next = next_active_insn (next))
5004 && GET_CODE (next) == JUMP_INSN))
5005 && GET_CODE (PATTERN (next)) == SET
5006 && recog_memoized (next) == CODE_FOR_jump_compact
5007 && ((INSN_ADDRESSES
5008 (INSN_UID (XEXP (SET_SRC (PATTERN (next)), 0)))
5009 - INSN_ADDRESSES (INSN_UID (insn)) + (unsigned) 252)
5010 > 252 + 258 + 2))
5011 gen_block_redirect (next, INSN_ADDRESSES (INSN_UID (next)), 1);
5013 else if (type == TYPE_JUMP || type == TYPE_RETURN)
5015 int addr = INSN_ADDRESSES (INSN_UID (insn));
5016 rtx far_label = 0;
5017 int dest_uid = 0;
5018 struct far_branch *bp;
5020 if (type == TYPE_JUMP)
5022 far_label = XEXP (SET_SRC (PATTERN (insn)), 0);
5023 dest_uid = get_dest_uid (far_label, max_uid);
5024 if (! dest_uid)
5026 /* Parse errors can lead to labels outside
5027 the insn stream. */
5028 if (! NEXT_INSN (far_label))
5029 continue;
5031 if (! optimize)
5033 JUMP_LABEL (insn) = far_label;
5034 LABEL_NUSES (far_label)++;
5036 redirect_jump (insn, NULL_RTX, 1);
5037 far_label = 0;
5040 bp = uid_branch[dest_uid];
5041 if (! bp)
5043 bp = (struct far_branch *) alloca (sizeof *bp);
5044 uid_branch[dest_uid] = bp;
5045 bp->prev = far_branch_list;
5046 far_branch_list = bp;
5047 bp->near_label = 0;
5048 bp->far_label = far_label;
5049 if (far_label)
5050 LABEL_NUSES (far_label)++;
5052 else if (bp->near_label && ! NEXT_INSN (bp->near_label))
5053 if (addr - bp->address <= CONDJUMP_MAX)
5054 emit_label_after (bp->near_label, PREV_INSN (insn));
5055 else
5057 gen_far_branch (bp);
5058 bp->near_label = 0;
5060 else
5061 bp->near_label = 0;
5062 bp->address = addr;
5063 bp->insert_place = insn;
5064 if (! far_label)
5065 emit_insn_before (gen_block_branch_redirect (const0_rtx), insn);
5066 else
5067 gen_block_redirect (insn, addr, bp->near_label ? 2 : 0);
5070 /* Generate all pending far branches,
5071 and free our references to the far labels. */
5072 while (far_branch_list)
5074 if (far_branch_list->near_label
5075 && ! NEXT_INSN (far_branch_list->near_label))
5076 gen_far_branch (far_branch_list);
5077 if (optimize
5078 && far_branch_list->far_label
5079 && ! --LABEL_NUSES (far_branch_list->far_label))
5080 delete_insn (far_branch_list->far_label);
5081 far_branch_list = far_branch_list->prev;
5084 /* Instruction length information is no longer valid due to the new
5085 instructions that have been generated. */
5086 init_insn_lengths ();
5089 /* Dump out instruction addresses, which is useful for debugging the
5090 constant pool table stuff.
5092 If relaxing, output the label and pseudo-ops used to link together
5093 calls and the instruction which set the registers. */
5095 /* ??? The addresses printed by this routine for insns are nonsense for
5096 insns which are inside of a sequence where none of the inner insns have
5097 variable length. This is because the second pass of shorten_branches
5098 does not bother to update them. */
5100 void
5101 final_prescan_insn (rtx insn, rtx *opvec ATTRIBUTE_UNUSED,
5102 int noperands ATTRIBUTE_UNUSED)
5104 if (TARGET_DUMPISIZE)
5105 fprintf (asm_out_file, "\n! at %04x\n", INSN_ADDRESSES (INSN_UID (insn)));
5107 if (TARGET_RELAX)
5109 rtx note;
5111 note = find_reg_note (insn, REG_LABEL, NULL_RTX);
5112 if (note)
5114 rtx pattern;
5116 pattern = PATTERN (insn);
5117 if (GET_CODE (pattern) == PARALLEL)
5118 pattern = XVECEXP (pattern, 0, 0);
5119 switch (GET_CODE (pattern))
5121 case SET:
5122 if (GET_CODE (SET_SRC (pattern)) != CALL
5123 && get_attr_type (insn) != TYPE_SFUNC)
5125 targetm.asm_out.internal_label
5126 (asm_out_file, "L", CODE_LABEL_NUMBER (XEXP (note, 0)));
5127 break;
5129 /* else FALLTHROUGH */
5130 case CALL:
5131 asm_fprintf (asm_out_file, "\t.uses %LL%d\n",
5132 CODE_LABEL_NUMBER (XEXP (note, 0)));
5133 break;
5135 default:
5136 gcc_unreachable ();
5142 /* Dump out any constants accumulated in the final pass. These will
5143 only be labels. */
5145 const char *
5146 output_jump_label_table (void)
5148 int i;
5150 if (pool_size)
5152 fprintf (asm_out_file, "\t.align 2\n");
5153 for (i = 0; i < pool_size; i++)
5155 pool_node *p = &pool_vector[i];
5157 (*targetm.asm_out.internal_label) (asm_out_file, "L",
5158 CODE_LABEL_NUMBER (p->label));
5159 output_asm_insn (".long %O0", &p->value);
5161 pool_size = 0;
5164 return "";
5167 /* A full frame looks like:
5169 arg-5
5170 arg-4
5171 [ if current_function_anonymous_args
5172 arg-3
5173 arg-2
5174 arg-1
5175 arg-0 ]
5176 saved-fp
5177 saved-r10
5178 saved-r11
5179 saved-r12
5180 saved-pr
5181 local-n
5183 local-1
5184 local-0 <- fp points here. */
5186 /* Number of bytes pushed for anonymous args, used to pass information
5187 between expand_prologue and expand_epilogue. */
5189 /* Adjust the stack by SIZE bytes. REG holds the rtl of the register to be
5190 adjusted. If epilogue_p is zero, this is for a prologue; otherwise, it's
5191 for an epilogue and a negative value means that it's for a sibcall
5192 epilogue. If LIVE_REGS_MASK is nonzero, it points to a HARD_REG_SET of
5193 all the registers that are about to be restored, and hence dead. */
5195 static void
5196 output_stack_adjust (int size, rtx reg, int epilogue_p,
5197 HARD_REG_SET *live_regs_mask)
5199 rtx (*emit_fn) (rtx) = epilogue_p ? &emit_insn : &frame_insn;
5200 if (size)
5202 HOST_WIDE_INT align = STACK_BOUNDARY / BITS_PER_UNIT;
5204 /* This test is bogus, as output_stack_adjust is used to re-align the
5205 stack. */
5206 #if 0
5207 gcc_assert (!(size % align));
5208 #endif
5210 if (CONST_OK_FOR_ADD (size))
5211 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size)));
5212 /* Try to do it with two partial adjustments; however, we must make
5213 sure that the stack is properly aligned at all times, in case
5214 an interrupt occurs between the two partial adjustments. */
5215 else if (CONST_OK_FOR_ADD (size / 2 & -align)
5216 && CONST_OK_FOR_ADD (size - (size / 2 & -align)))
5218 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size / 2 & -align)));
5219 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size - (size / 2 & -align))));
5221 else
5223 rtx const_reg;
5224 rtx insn;
5225 int temp = epilogue_p ? 7 : (TARGET_SH5 ? 0 : 1);
5226 int i;
5228 /* If TEMP is invalid, we could temporarily save a general
5229 register to MACL. However, there is currently no need
5230 to handle this case, so just die when we see it. */
5231 if (epilogue_p < 0
5232 || current_function_interrupt
5233 || ! call_really_used_regs[temp] || fixed_regs[temp])
5234 temp = -1;
5235 if (temp < 0 && ! current_function_interrupt
5236 && (TARGET_SHMEDIA || epilogue_p >= 0))
5238 HARD_REG_SET temps;
5239 COPY_HARD_REG_SET (temps, call_used_reg_set);
5240 AND_COMPL_HARD_REG_SET (temps, call_fixed_reg_set);
5241 if (epilogue_p > 0)
5243 int nreg = 0;
5244 if (current_function_return_rtx)
5246 enum machine_mode mode;
5247 mode = GET_MODE (current_function_return_rtx);
5248 if (BASE_RETURN_VALUE_REG (mode) == FIRST_RET_REG)
5249 nreg = HARD_REGNO_NREGS (FIRST_RET_REG, mode);
5251 for (i = 0; i < nreg; i++)
5252 CLEAR_HARD_REG_BIT (temps, FIRST_RET_REG + i);
5253 if (current_function_calls_eh_return)
5255 CLEAR_HARD_REG_BIT (temps, EH_RETURN_STACKADJ_REGNO);
5256 for (i = 0; i <= 3; i++)
5257 CLEAR_HARD_REG_BIT (temps, EH_RETURN_DATA_REGNO (i));
5260 if (TARGET_SHMEDIA && epilogue_p < 0)
5261 for (i = FIRST_TARGET_REG; i <= LAST_TARGET_REG; i++)
5262 CLEAR_HARD_REG_BIT (temps, i);
5263 if (epilogue_p <= 0)
5265 for (i = FIRST_PARM_REG;
5266 i < FIRST_PARM_REG + NPARM_REGS (SImode); i++)
5267 CLEAR_HARD_REG_BIT (temps, i);
5268 if (cfun->static_chain_decl != NULL)
5269 CLEAR_HARD_REG_BIT (temps, STATIC_CHAIN_REGNUM);
5271 temp = scavenge_reg (&temps);
5273 if (temp < 0 && live_regs_mask)
5274 temp = scavenge_reg (live_regs_mask);
5275 if (temp < 0)
5277 rtx adj_reg, tmp_reg, mem;
5279 /* If we reached here, the most likely case is the (sibcall)
5280 epilogue for non SHmedia. Put a special push/pop sequence
5281 for such case as the last resort. This looks lengthy but
5282 would not be problem because it seems to be very
5283 rare. */
5285 gcc_assert (!TARGET_SHMEDIA && epilogue_p);
5288 /* ??? There is still the slight possibility that r4 or
5289 r5 have been reserved as fixed registers or assigned
5290 as global registers, and they change during an
5291 interrupt. There are possible ways to handle this:
5293 - If we are adjusting the frame pointer (r14), we can do
5294 with a single temp register and an ordinary push / pop
5295 on the stack.
5296 - Grab any call-used or call-saved registers (i.e. not
5297 fixed or globals) for the temps we need. We might
5298 also grab r14 if we are adjusting the stack pointer.
5299 If we can't find enough available registers, issue
5300 a diagnostic and die - the user must have reserved
5301 way too many registers.
5302 But since all this is rather unlikely to happen and
5303 would require extra testing, we just die if r4 / r5
5304 are not available. */
5305 gcc_assert (!fixed_regs[4] && !fixed_regs[5]
5306 && !global_regs[4] && !global_regs[5]);
5308 adj_reg = gen_rtx_REG (GET_MODE (reg), 4);
5309 tmp_reg = gen_rtx_REG (GET_MODE (reg), 5);
5310 emit_move_insn (gen_tmp_stack_mem (Pmode, reg), adj_reg);
5311 emit_insn (GEN_MOV (adj_reg, GEN_INT (size)));
5312 emit_insn (GEN_ADD3 (adj_reg, adj_reg, reg));
5313 mem = gen_tmp_stack_mem (Pmode, gen_rtx_PRE_DEC (Pmode, adj_reg));
5314 emit_move_insn (mem, tmp_reg);
5315 emit_move_insn (tmp_reg, gen_tmp_stack_mem (Pmode, reg));
5316 mem = gen_tmp_stack_mem (Pmode, gen_rtx_PRE_DEC (Pmode, adj_reg));
5317 emit_move_insn (mem, tmp_reg);
5318 emit_move_insn (reg, adj_reg);
5319 mem = gen_tmp_stack_mem (Pmode, gen_rtx_POST_INC (Pmode, reg));
5320 emit_move_insn (adj_reg, mem);
5321 mem = gen_tmp_stack_mem (Pmode, gen_rtx_POST_INC (Pmode, reg));
5322 emit_move_insn (tmp_reg, mem);
5323 return;
5325 const_reg = gen_rtx_REG (GET_MODE (reg), temp);
5327 /* If SIZE is negative, subtract the positive value.
5328 This sometimes allows a constant pool entry to be shared
5329 between prologue and epilogue code. */
5330 if (size < 0)
5332 emit_insn (GEN_MOV (const_reg, GEN_INT (-size)));
5333 insn = emit_fn (GEN_SUB3 (reg, reg, const_reg));
5335 else
5337 emit_insn (GEN_MOV (const_reg, GEN_INT (size)));
5338 insn = emit_fn (GEN_ADD3 (reg, reg, const_reg));
5340 if (! epilogue_p)
5341 REG_NOTES (insn)
5342 = (gen_rtx_EXPR_LIST
5343 (REG_FRAME_RELATED_EXPR,
5344 gen_rtx_SET (VOIDmode, reg,
5345 gen_rtx_PLUS (SImode, reg, GEN_INT (size))),
5346 REG_NOTES (insn)));
5351 static rtx
5352 frame_insn (rtx x)
5354 x = emit_insn (x);
5355 RTX_FRAME_RELATED_P (x) = 1;
5356 return x;
5359 /* Output RTL to push register RN onto the stack. */
5361 static rtx
5362 push (int rn)
5364 rtx x;
5365 if (rn == FPUL_REG)
5366 x = gen_push_fpul ();
5367 else if (rn == FPSCR_REG)
5368 x = gen_push_fpscr ();
5369 else if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && ! TARGET_FPU_SINGLE
5370 && FP_OR_XD_REGISTER_P (rn))
5372 if (FP_REGISTER_P (rn) && (rn - FIRST_FP_REG) & 1)
5373 return NULL_RTX;
5374 x = gen_push_4 (gen_rtx_REG (DFmode, rn));
5376 else if (TARGET_SH2E && FP_REGISTER_P (rn))
5377 x = gen_push_e (gen_rtx_REG (SFmode, rn));
5378 else
5379 x = gen_push (gen_rtx_REG (SImode, rn));
5381 x = frame_insn (x);
5382 REG_NOTES (x)
5383 = gen_rtx_EXPR_LIST (REG_INC,
5384 gen_rtx_REG (SImode, STACK_POINTER_REGNUM), 0);
5385 return x;
5388 /* Output RTL to pop register RN from the stack. */
5390 static void
5391 pop (int rn)
5393 rtx x;
5394 if (rn == FPUL_REG)
5395 x = gen_pop_fpul ();
5396 else if (rn == FPSCR_REG)
5397 x = gen_pop_fpscr ();
5398 else if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && ! TARGET_FPU_SINGLE
5399 && FP_OR_XD_REGISTER_P (rn))
5401 if (FP_REGISTER_P (rn) && (rn - FIRST_FP_REG) & 1)
5402 return;
5403 x = gen_pop_4 (gen_rtx_REG (DFmode, rn));
5405 else if (TARGET_SH2E && FP_REGISTER_P (rn))
5406 x = gen_pop_e (gen_rtx_REG (SFmode, rn));
5407 else
5408 x = gen_pop (gen_rtx_REG (SImode, rn));
5410 x = emit_insn (x);
5411 REG_NOTES (x)
5412 = gen_rtx_EXPR_LIST (REG_INC,
5413 gen_rtx_REG (SImode, STACK_POINTER_REGNUM), 0);
5416 /* Generate code to push the regs specified in the mask. */
5418 static void
5419 push_regs (HARD_REG_SET *mask, int interrupt_handler)
5421 int i;
5422 int skip_fpscr = 0;
5424 /* Push PR last; this gives better latencies after the prologue, and
5425 candidates for the return delay slot when there are no general
5426 registers pushed. */
5427 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
5429 /* If this is an interrupt handler, and the SZ bit varies,
5430 and we have to push any floating point register, we need
5431 to switch to the correct precision first. */
5432 if (i == FIRST_FP_REG && interrupt_handler && TARGET_FMOVD
5433 && hard_regs_intersect_p (mask, &reg_class_contents[DF_REGS]))
5435 HARD_REG_SET unsaved;
5437 push (FPSCR_REG);
5438 COMPL_HARD_REG_SET (unsaved, *mask);
5439 fpscr_set_from_mem (NORMAL_MODE (FP_MODE), unsaved);
5440 skip_fpscr = 1;
5442 if (i != PR_REG
5443 && (i != FPSCR_REG || ! skip_fpscr)
5444 && TEST_HARD_REG_BIT (*mask, i))
5445 push (i);
5447 if (TEST_HARD_REG_BIT (*mask, PR_REG))
5448 push (PR_REG);
5451 /* Calculate how much extra space is needed to save all callee-saved
5452 target registers.
5453 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
5455 static int
5456 shmedia_target_regs_stack_space (HARD_REG_SET *live_regs_mask)
5458 int reg;
5459 int stack_space = 0;
5460 int interrupt_handler = sh_cfun_interrupt_handler_p ();
5462 for (reg = LAST_TARGET_REG; reg >= FIRST_TARGET_REG; reg--)
5463 if ((! call_really_used_regs[reg] || interrupt_handler)
5464 && ! TEST_HARD_REG_BIT (*live_regs_mask, reg))
5465 /* Leave space to save this target register on the stack,
5466 in case target register allocation wants to use it. */
5467 stack_space += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
5468 return stack_space;
5471 /* Decide whether we should reserve space for callee-save target registers,
5472 in case target register allocation wants to use them. REGS_SAVED is
5473 the space, in bytes, that is already required for register saves.
5474 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
5476 static int
5477 shmedia_reserve_space_for_target_registers_p (int regs_saved,
5478 HARD_REG_SET *live_regs_mask)
5480 if (optimize_size)
5481 return 0;
5482 return shmedia_target_regs_stack_space (live_regs_mask) <= regs_saved;
5485 /* Decide how much space to reserve for callee-save target registers
5486 in case target register allocation wants to use them.
5487 LIVE_REGS_MASK is the register mask calculated by calc_live_regs. */
5489 static int
5490 shmedia_target_regs_stack_adjust (HARD_REG_SET *live_regs_mask)
5492 if (shmedia_space_reserved_for_target_registers)
5493 return shmedia_target_regs_stack_space (live_regs_mask);
5494 else
5495 return 0;
5498 /* Work out the registers which need to be saved, both as a mask and a
5499 count of saved words. Return the count.
5501 If doing a pragma interrupt function, then push all regs used by the
5502 function, and if we call another function (we can tell by looking at PR),
5503 make sure that all the regs it clobbers are safe too. */
5505 static int
5506 calc_live_regs (HARD_REG_SET *live_regs_mask)
5508 unsigned int reg;
5509 int count;
5510 tree attrs;
5511 bool interrupt_or_trapa_handler, trapa_handler, interrupt_handler;
5512 bool nosave_low_regs;
5513 int pr_live, has_call;
5515 attrs = DECL_ATTRIBUTES (current_function_decl);
5516 interrupt_or_trapa_handler = sh_cfun_interrupt_handler_p ();
5517 trapa_handler = lookup_attribute ("trapa_handler", attrs) != NULL_TREE;
5518 interrupt_handler = interrupt_or_trapa_handler && ! trapa_handler;
5519 nosave_low_regs = lookup_attribute ("nosave_low_regs", attrs) != NULL_TREE;
5521 CLEAR_HARD_REG_SET (*live_regs_mask);
5522 if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && interrupt_handler
5523 && regs_ever_live[FPSCR_REG])
5524 target_flags &= ~MASK_FPU_SINGLE;
5525 /* If we can save a lot of saves by switching to double mode, do that. */
5526 else if ((TARGET_SH4 || TARGET_SH2A_DOUBLE) && TARGET_FMOVD && TARGET_FPU_SINGLE)
5527 for (count = 0, reg = FIRST_FP_REG; reg <= LAST_FP_REG; reg += 2)
5528 if (regs_ever_live[reg] && regs_ever_live[reg+1]
5529 && (! call_really_used_regs[reg]
5530 || interrupt_handler)
5531 && ++count > 2)
5533 target_flags &= ~MASK_FPU_SINGLE;
5534 break;
5536 /* PR_MEDIA_REG is a general purpose register, thus global_alloc already
5537 knows how to use it. That means the pseudo originally allocated for
5538 the initial value can become the PR_MEDIA_REG hard register, as seen for
5539 execute/20010122-1.c:test9. */
5540 if (TARGET_SHMEDIA)
5541 /* ??? this function is called from initial_elimination_offset, hence we
5542 can't use the result of sh_media_register_for_return here. */
5543 pr_live = sh_pr_n_sets ();
5544 else
5546 rtx pr_initial = has_hard_reg_initial_val (Pmode, PR_REG);
5547 pr_live = (pr_initial
5548 ? (GET_CODE (pr_initial) != REG
5549 || REGNO (pr_initial) != (PR_REG))
5550 : regs_ever_live[PR_REG]);
5551 /* For Shcompact, if not optimizing, we end up with a memory reference
5552 using the return address pointer for __builtin_return_address even
5553 though there is no actual need to put the PR register on the stack. */
5554 pr_live |= regs_ever_live[RETURN_ADDRESS_POINTER_REGNUM];
5556 /* Force PR to be live if the prologue has to call the SHmedia
5557 argument decoder or register saver. */
5558 if (TARGET_SHCOMPACT
5559 && ((current_function_args_info.call_cookie
5560 & ~ CALL_COOKIE_RET_TRAMP (1))
5561 || current_function_has_nonlocal_label))
5562 pr_live = 1;
5563 has_call = TARGET_SHMEDIA ? ! leaf_function_p () : pr_live;
5564 for (count = 0, reg = FIRST_PSEUDO_REGISTER; reg-- != 0; )
5566 if (reg == (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG)
5567 ? pr_live
5568 : interrupt_handler
5569 ? (/* Need to save all the regs ever live. */
5570 (regs_ever_live[reg]
5571 || (call_really_used_regs[reg]
5572 && (! fixed_regs[reg] || reg == MACH_REG || reg == MACL_REG
5573 || reg == PIC_OFFSET_TABLE_REGNUM)
5574 && has_call)
5575 || (TARGET_SHMEDIA && has_call
5576 && REGISTER_NATURAL_MODE (reg) == SImode
5577 && (GENERAL_REGISTER_P (reg) || TARGET_REGISTER_P (reg))))
5578 && reg != STACK_POINTER_REGNUM && reg != ARG_POINTER_REGNUM
5579 && reg != RETURN_ADDRESS_POINTER_REGNUM
5580 && reg != T_REG && reg != GBR_REG
5581 /* Push fpscr only on targets which have FPU */
5582 && (reg != FPSCR_REG || TARGET_FPU_ANY))
5583 : (/* Only push those regs which are used and need to be saved. */
5584 (TARGET_SHCOMPACT
5585 && flag_pic
5586 && current_function_args_info.call_cookie
5587 && reg == PIC_OFFSET_TABLE_REGNUM)
5588 || (regs_ever_live[reg]
5589 && (!call_really_used_regs[reg]
5590 || (trapa_handler && reg == FPSCR_REG && TARGET_FPU_ANY)))
5591 || (current_function_calls_eh_return
5592 && (reg == EH_RETURN_DATA_REGNO (0)
5593 || reg == EH_RETURN_DATA_REGNO (1)
5594 || reg == EH_RETURN_DATA_REGNO (2)
5595 || reg == EH_RETURN_DATA_REGNO (3)))
5596 || ((reg == MACL_REG || reg == MACH_REG)
5597 && regs_ever_live[reg]
5598 && sh_cfun_attr_renesas_p ())
5601 SET_HARD_REG_BIT (*live_regs_mask, reg);
5602 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
5604 if ((TARGET_SH4 || TARGET_SH2A_DOUBLE || TARGET_SH5) && TARGET_FMOVD
5605 && GET_MODE_CLASS (REGISTER_NATURAL_MODE (reg)) == MODE_FLOAT)
5607 if (FP_REGISTER_P (reg))
5609 if (! TARGET_FPU_SINGLE && ! regs_ever_live[reg ^ 1])
5611 SET_HARD_REG_BIT (*live_regs_mask, (reg ^ 1));
5612 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg ^ 1));
5615 else if (XD_REGISTER_P (reg))
5617 /* Must switch to double mode to access these registers. */
5618 target_flags &= ~MASK_FPU_SINGLE;
5622 if (nosave_low_regs && reg == R8_REG)
5623 break;
5625 /* If we have a target register optimization pass after prologue / epilogue
5626 threading, we need to assume all target registers will be live even if
5627 they aren't now. */
5628 if (flag_branch_target_load_optimize2
5629 && TARGET_SAVE_ALL_TARGET_REGS
5630 && shmedia_space_reserved_for_target_registers)
5631 for (reg = LAST_TARGET_REG; reg >= FIRST_TARGET_REG; reg--)
5632 if ((! call_really_used_regs[reg] || interrupt_handler)
5633 && ! TEST_HARD_REG_BIT (*live_regs_mask, reg))
5635 SET_HARD_REG_BIT (*live_regs_mask, reg);
5636 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
5638 /* If this is an interrupt handler, we don't have any call-clobbered
5639 registers we can conveniently use for target register save/restore.
5640 Make sure we save at least one general purpose register when we need
5641 to save target registers. */
5642 if (interrupt_handler
5643 && hard_regs_intersect_p (live_regs_mask,
5644 &reg_class_contents[TARGET_REGS])
5645 && ! hard_regs_intersect_p (live_regs_mask,
5646 &reg_class_contents[GENERAL_REGS]))
5648 SET_HARD_REG_BIT (*live_regs_mask, R0_REG);
5649 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (R0_REG));
5652 return count;
5655 /* Code to generate prologue and epilogue sequences */
5657 /* PUSHED is the number of bytes that are being pushed on the
5658 stack for register saves. Return the frame size, padded
5659 appropriately so that the stack stays properly aligned. */
5660 static HOST_WIDE_INT
5661 rounded_frame_size (int pushed)
5663 HOST_WIDE_INT size = get_frame_size ();
5664 HOST_WIDE_INT align = STACK_BOUNDARY / BITS_PER_UNIT;
5666 return ((size + pushed + align - 1) & -align) - pushed;
5669 /* Choose a call-clobbered target-branch register that remains
5670 unchanged along the whole function. We set it up as the return
5671 value in the prologue. */
5673 sh_media_register_for_return (void)
5675 int regno;
5676 int tr0_used;
5678 if (! current_function_is_leaf)
5679 return -1;
5680 if (lookup_attribute ("interrupt_handler",
5681 DECL_ATTRIBUTES (current_function_decl)))
5682 return -1;
5683 if (sh_cfun_interrupt_handler_p ())
5684 return -1;
5686 tr0_used = flag_pic && regs_ever_live[PIC_OFFSET_TABLE_REGNUM];
5688 for (regno = FIRST_TARGET_REG + tr0_used; regno <= LAST_TARGET_REG; regno++)
5689 if (call_really_used_regs[regno] && ! regs_ever_live[regno])
5690 return regno;
5692 return -1;
5695 /* The maximum registers we need to save are:
5696 - 62 general purpose registers (r15 is stack pointer, r63 is zero)
5697 - 32 floating point registers (for each pair, we save none,
5698 one single precision value, or a double precision value).
5699 - 8 target registers
5700 - add 1 entry for a delimiter. */
5701 #define MAX_SAVED_REGS (62+32+8)
5703 typedef struct save_entry_s
5705 unsigned char reg;
5706 unsigned char mode;
5707 short offset;
5708 } save_entry;
5710 #define MAX_TEMPS 4
5712 /* There will be a delimiter entry with VOIDmode both at the start and the
5713 end of a filled in schedule. The end delimiter has the offset of the
5714 save with the smallest (i.e. most negative) offset. */
5715 typedef struct save_schedule_s
5717 save_entry entries[MAX_SAVED_REGS + 2];
5718 int temps[MAX_TEMPS+1];
5719 } save_schedule;
5721 /* Fill in SCHEDULE according to LIVE_REGS_MASK. If RESTORE is nonzero,
5722 use reverse order. Returns the last entry written to (not counting
5723 the delimiter). OFFSET_BASE is a number to be added to all offset
5724 entries. */
5726 static save_entry *
5727 sh5_schedule_saves (HARD_REG_SET *live_regs_mask, save_schedule *schedule,
5728 int offset_base)
5730 int align, i;
5731 save_entry *entry = schedule->entries;
5732 int tmpx = 0;
5733 int offset;
5735 if (! current_function_interrupt)
5736 for (i = FIRST_GENERAL_REG; tmpx < MAX_TEMPS && i <= LAST_GENERAL_REG; i++)
5737 if (call_really_used_regs[i] && ! fixed_regs[i] && i != PR_MEDIA_REG
5738 && ! FUNCTION_ARG_REGNO_P (i)
5739 && i != FIRST_RET_REG
5740 && ! (cfun->static_chain_decl != NULL && i == STATIC_CHAIN_REGNUM)
5741 && ! (current_function_calls_eh_return
5742 && (i == EH_RETURN_STACKADJ_REGNO
5743 || ((unsigned) i >= EH_RETURN_DATA_REGNO (0)
5744 && (unsigned) i <= EH_RETURN_DATA_REGNO (3)))))
5745 schedule->temps[tmpx++] = i;
5746 entry->reg = -1;
5747 entry->mode = VOIDmode;
5748 entry->offset = offset_base;
5749 entry++;
5750 /* We loop twice: first, we save 8-byte aligned registers in the
5751 higher addresses, that are known to be aligned. Then, we
5752 proceed to saving 32-bit registers that don't need 8-byte
5753 alignment.
5754 If this is an interrupt function, all registers that need saving
5755 need to be saved in full. moreover, we need to postpone saving
5756 target registers till we have saved some general purpose registers
5757 we can then use as scratch registers. */
5758 offset = offset_base;
5759 for (align = 1; align >= 0; align--)
5761 for (i = FIRST_PSEUDO_REGISTER - 1; i >= 0; i--)
5762 if (TEST_HARD_REG_BIT (*live_regs_mask, i))
5764 enum machine_mode mode = REGISTER_NATURAL_MODE (i);
5765 int reg = i;
5767 if (current_function_interrupt)
5769 if (TARGET_REGISTER_P (i))
5770 continue;
5771 if (GENERAL_REGISTER_P (i))
5772 mode = DImode;
5774 if (mode == SFmode && (i % 2) == 1
5775 && ! TARGET_FPU_SINGLE && FP_REGISTER_P (i)
5776 && (TEST_HARD_REG_BIT (*live_regs_mask, (i ^ 1))))
5778 mode = DFmode;
5779 i--;
5780 reg--;
5783 /* If we're doing the aligned pass and this is not aligned,
5784 or we're doing the unaligned pass and this is aligned,
5785 skip it. */
5786 if ((GET_MODE_SIZE (mode) % (STACK_BOUNDARY / BITS_PER_UNIT) == 0)
5787 != align)
5788 continue;
5790 if (current_function_interrupt
5791 && GENERAL_REGISTER_P (i)
5792 && tmpx < MAX_TEMPS)
5793 schedule->temps[tmpx++] = i;
5795 offset -= GET_MODE_SIZE (mode);
5796 entry->reg = i;
5797 entry->mode = mode;
5798 entry->offset = offset;
5799 entry++;
5801 if (align && current_function_interrupt)
5802 for (i = LAST_TARGET_REG; i >= FIRST_TARGET_REG; i--)
5803 if (TEST_HARD_REG_BIT (*live_regs_mask, i))
5805 offset -= GET_MODE_SIZE (DImode);
5806 entry->reg = i;
5807 entry->mode = DImode;
5808 entry->offset = offset;
5809 entry++;
5812 entry->reg = -1;
5813 entry->mode = VOIDmode;
5814 entry->offset = offset;
5815 schedule->temps[tmpx] = -1;
5816 return entry - 1;
5819 void
5820 sh_expand_prologue (void)
5822 HARD_REG_SET live_regs_mask;
5823 int d, i;
5824 int d_rounding = 0;
5825 int save_flags = target_flags;
5826 int pretend_args;
5827 tree sp_switch_attr
5828 = lookup_attribute ("sp_switch", DECL_ATTRIBUTES (current_function_decl));
5830 current_function_interrupt = sh_cfun_interrupt_handler_p ();
5832 /* We have pretend args if we had an object sent partially in registers
5833 and partially on the stack, e.g. a large structure. */
5834 pretend_args = current_function_pretend_args_size;
5835 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl)
5836 && (NPARM_REGS(SImode)
5837 > current_function_args_info.arg_count[(int) SH_ARG_INT]))
5838 pretend_args = 0;
5839 output_stack_adjust (-pretend_args
5840 - current_function_args_info.stack_regs * 8,
5841 stack_pointer_rtx, 0, NULL);
5843 if (TARGET_SHCOMPACT && flag_pic && current_function_args_info.call_cookie)
5844 /* We're going to use the PIC register to load the address of the
5845 incoming-argument decoder and/or of the return trampoline from
5846 the GOT, so make sure the PIC register is preserved and
5847 initialized. */
5848 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
5850 if (TARGET_SHCOMPACT
5851 && (current_function_args_info.call_cookie & ~ CALL_COOKIE_RET_TRAMP(1)))
5853 int reg;
5855 /* First, make all registers with incoming arguments that will
5856 be pushed onto the stack live, so that register renaming
5857 doesn't overwrite them. */
5858 for (reg = 0; reg < NPARM_REGS (SImode); reg++)
5859 if (CALL_COOKIE_STACKSEQ_GET (current_function_args_info.call_cookie)
5860 >= NPARM_REGS (SImode) - reg)
5861 for (; reg < NPARM_REGS (SImode); reg++)
5862 emit_insn (gen_shcompact_preserve_incoming_args
5863 (gen_rtx_REG (SImode, FIRST_PARM_REG + reg)));
5864 else if (CALL_COOKIE_INT_REG_GET
5865 (current_function_args_info.call_cookie, reg) == 1)
5866 emit_insn (gen_shcompact_preserve_incoming_args
5867 (gen_rtx_REG (SImode, FIRST_PARM_REG + reg)));
5869 emit_move_insn (gen_rtx_REG (Pmode, MACL_REG),
5870 stack_pointer_rtx);
5871 emit_move_insn (gen_rtx_REG (SImode, R0_REG),
5872 GEN_INT (current_function_args_info.call_cookie));
5873 emit_move_insn (gen_rtx_REG (SImode, MACH_REG),
5874 gen_rtx_REG (SImode, R0_REG));
5876 else if (TARGET_SHMEDIA)
5878 int tr = sh_media_register_for_return ();
5880 if (tr >= 0)
5882 rtx insn = emit_move_insn (gen_rtx_REG (DImode, tr),
5883 gen_rtx_REG (DImode, PR_MEDIA_REG));
5885 /* ??? We should suppress saving pr when we don't need it, but this
5886 is tricky because of builtin_return_address. */
5888 /* If this function only exits with sibcalls, this copy
5889 will be flagged as dead. */
5890 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD,
5891 const0_rtx,
5892 REG_NOTES (insn));
5896 /* Emit the code for SETUP_VARARGS. */
5897 if (current_function_stdarg)
5899 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl))
5901 /* Push arg regs as if they'd been provided by caller in stack. */
5902 for (i = 0; i < NPARM_REGS(SImode); i++)
5904 int rn = NPARM_REGS(SImode) + FIRST_PARM_REG - i - 1;
5905 rtx insn;
5907 if (i >= (NPARM_REGS(SImode)
5908 - current_function_args_info.arg_count[(int) SH_ARG_INT]
5910 break;
5911 insn = push (rn);
5912 RTX_FRAME_RELATED_P (insn) = 0;
5917 /* If we're supposed to switch stacks at function entry, do so now. */
5918 if (sp_switch_attr)
5920 /* The argument specifies a variable holding the address of the
5921 stack the interrupt function should switch to/from at entry/exit. */
5922 const char *s
5923 = ggc_strdup (TREE_STRING_POINTER (TREE_VALUE (sp_switch_attr)));
5924 rtx sp_switch = gen_rtx_SYMBOL_REF (Pmode, s);
5926 emit_insn (gen_sp_switch_1 (sp_switch));
5929 d = calc_live_regs (&live_regs_mask);
5930 /* ??? Maybe we could save some switching if we can move a mode switch
5931 that already happens to be at the function start into the prologue. */
5932 if (target_flags != save_flags && ! current_function_interrupt)
5933 emit_insn (gen_toggle_sz ());
5935 if (TARGET_SH5)
5937 int offset_base, offset;
5938 rtx r0 = NULL_RTX;
5939 int offset_in_r0 = -1;
5940 int sp_in_r0 = 0;
5941 int tregs_space = shmedia_target_regs_stack_adjust (&live_regs_mask);
5942 int total_size, save_size;
5943 save_schedule schedule;
5944 save_entry *entry;
5945 int *tmp_pnt;
5947 if (call_really_used_regs[R0_REG] && ! fixed_regs[R0_REG]
5948 && ! current_function_interrupt)
5949 r0 = gen_rtx_REG (Pmode, R0_REG);
5951 /* D is the actual number of bytes that we need for saving registers,
5952 however, in initial_elimination_offset we have committed to using
5953 an additional TREGS_SPACE amount of bytes - in order to keep both
5954 addresses to arguments supplied by the caller and local variables
5955 valid, we must keep this gap. Place it between the incoming
5956 arguments and the actually saved registers in a bid to optimize
5957 locality of reference. */
5958 total_size = d + tregs_space;
5959 total_size += rounded_frame_size (total_size);
5960 save_size = total_size - rounded_frame_size (d);
5961 if (save_size % (STACK_BOUNDARY / BITS_PER_UNIT))
5962 d_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
5963 - save_size % (STACK_BOUNDARY / BITS_PER_UNIT));
5965 /* If adjusting the stack in a single step costs nothing extra, do so.
5966 I.e. either if a single addi is enough, or we need a movi anyway,
5967 and we don't exceed the maximum offset range (the test for the
5968 latter is conservative for simplicity). */
5969 if (TARGET_SHMEDIA
5970 && (CONST_OK_FOR_I10 (-total_size)
5971 || (! CONST_OK_FOR_I10 (-(save_size + d_rounding))
5972 && total_size <= 2044)))
5973 d_rounding = total_size - save_size;
5975 offset_base = d + d_rounding;
5977 output_stack_adjust (-(save_size + d_rounding), stack_pointer_rtx,
5978 0, NULL);
5980 sh5_schedule_saves (&live_regs_mask, &schedule, offset_base);
5981 tmp_pnt = schedule.temps;
5982 for (entry = &schedule.entries[1]; entry->mode != VOIDmode; entry++)
5984 enum machine_mode mode = entry->mode;
5985 unsigned int reg = entry->reg;
5986 rtx reg_rtx, mem_rtx, pre_dec = NULL_RTX;
5987 rtx orig_reg_rtx;
5989 offset = entry->offset;
5991 reg_rtx = gen_rtx_REG (mode, reg);
5993 mem_rtx = gen_frame_mem (mode,
5994 gen_rtx_PLUS (Pmode,
5995 stack_pointer_rtx,
5996 GEN_INT (offset)));
5998 GO_IF_LEGITIMATE_ADDRESS (mode, XEXP (mem_rtx, 0), try_pre_dec);
6000 gcc_assert (r0);
6001 mem_rtx = NULL_RTX;
6003 try_pre_dec:
6005 if (HAVE_PRE_DECREMENT
6006 && (offset_in_r0 - offset == GET_MODE_SIZE (mode)
6007 || mem_rtx == NULL_RTX
6008 || reg == PR_REG || SPECIAL_REGISTER_P (reg)))
6010 pre_dec = gen_frame_mem (mode, gen_rtx_PRE_DEC (Pmode, r0));
6012 GO_IF_LEGITIMATE_ADDRESS (mode, XEXP (pre_dec, 0),
6013 pre_dec_ok);
6015 pre_dec = NULL_RTX;
6017 break;
6019 pre_dec_ok:
6020 mem_rtx = NULL_RTX;
6021 offset += GET_MODE_SIZE (mode);
6023 while (0);
6025 if (mem_rtx != NULL_RTX)
6026 goto addr_ok;
6028 if (offset_in_r0 == -1)
6030 emit_move_insn (r0, GEN_INT (offset));
6031 offset_in_r0 = offset;
6033 else if (offset != offset_in_r0)
6035 emit_move_insn (r0,
6036 gen_rtx_PLUS
6037 (Pmode, r0,
6038 GEN_INT (offset - offset_in_r0)));
6039 offset_in_r0 += offset - offset_in_r0;
6042 if (pre_dec != NULL_RTX)
6044 if (! sp_in_r0)
6046 emit_move_insn (r0,
6047 gen_rtx_PLUS
6048 (Pmode, r0, stack_pointer_rtx));
6049 sp_in_r0 = 1;
6052 offset -= GET_MODE_SIZE (mode);
6053 offset_in_r0 -= GET_MODE_SIZE (mode);
6055 mem_rtx = pre_dec;
6057 else if (sp_in_r0)
6058 mem_rtx = gen_frame_mem (mode, r0);
6059 else
6060 mem_rtx = gen_frame_mem (mode,
6061 gen_rtx_PLUS (Pmode,
6062 stack_pointer_rtx,
6063 r0));
6065 /* We must not use an r0-based address for target-branch
6066 registers or for special registers without pre-dec
6067 memory addresses, since we store their values in r0
6068 first. */
6069 gcc_assert (!TARGET_REGISTER_P (reg)
6070 && ((reg != PR_REG && !SPECIAL_REGISTER_P (reg))
6071 || mem_rtx == pre_dec));
6073 addr_ok:
6074 orig_reg_rtx = reg_rtx;
6075 if (TARGET_REGISTER_P (reg)
6076 || ((reg == PR_REG || SPECIAL_REGISTER_P (reg))
6077 && mem_rtx != pre_dec))
6079 rtx tmp_reg = gen_rtx_REG (GET_MODE (reg_rtx), *tmp_pnt);
6081 emit_move_insn (tmp_reg, reg_rtx);
6083 if (REGNO (tmp_reg) == R0_REG)
6085 offset_in_r0 = -1;
6086 sp_in_r0 = 0;
6087 gcc_assert (!refers_to_regno_p
6088 (R0_REG, R0_REG+1, mem_rtx, (rtx *) 0));
6091 if (*++tmp_pnt <= 0)
6092 tmp_pnt = schedule.temps;
6094 reg_rtx = tmp_reg;
6097 rtx insn;
6099 /* Mark as interesting for dwarf cfi generator */
6100 insn = emit_move_insn (mem_rtx, reg_rtx);
6101 RTX_FRAME_RELATED_P (insn) = 1;
6102 /* If we use an intermediate register for the save, we can't
6103 describe this exactly in cfi as a copy of the to-be-saved
6104 register into the temporary register and then the temporary
6105 register on the stack, because the temporary register can
6106 have a different natural size than the to-be-saved register.
6107 Thus, we gloss over the intermediate copy and pretend we do
6108 a direct save from the to-be-saved register. */
6109 if (REGNO (reg_rtx) != reg)
6111 rtx set, note_rtx;
6113 set = gen_rtx_SET (VOIDmode, mem_rtx, orig_reg_rtx);
6114 note_rtx = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, set,
6115 REG_NOTES (insn));
6116 REG_NOTES (insn) = note_rtx;
6119 if (TARGET_SHCOMPACT && (offset_in_r0 != -1))
6121 rtx reg_rtx = gen_rtx_REG (mode, reg);
6122 rtx set, note_rtx;
6123 rtx mem_rtx = gen_frame_mem (mode,
6124 gen_rtx_PLUS (Pmode,
6125 stack_pointer_rtx,
6126 GEN_INT (offset)));
6128 set = gen_rtx_SET (VOIDmode, mem_rtx, reg_rtx);
6129 note_rtx = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, set,
6130 REG_NOTES (insn));
6131 REG_NOTES (insn) = note_rtx;
6136 gcc_assert (entry->offset == d_rounding);
6138 else
6139 push_regs (&live_regs_mask, current_function_interrupt);
6141 if (flag_pic && regs_ever_live[PIC_OFFSET_TABLE_REGNUM])
6143 rtx insn = get_last_insn ();
6144 rtx last = emit_insn (gen_GOTaddr2picreg ());
6146 /* Mark these insns as possibly dead. Sometimes, flow2 may
6147 delete all uses of the PIC register. In this case, let it
6148 delete the initialization too. */
6151 insn = NEXT_INSN (insn);
6153 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD,
6154 const0_rtx,
6155 REG_NOTES (insn));
6157 while (insn != last);
6160 if (SHMEDIA_REGS_STACK_ADJUST ())
6162 /* This must NOT go through the PLT, otherwise mach and macl
6163 may be clobbered. */
6164 function_symbol (gen_rtx_REG (Pmode, R0_REG),
6165 (TARGET_FPU_ANY
6166 ? "__GCC_push_shmedia_regs"
6167 : "__GCC_push_shmedia_regs_nofpu"), SFUNC_GOT);
6168 emit_insn (gen_shmedia_save_restore_regs_compact
6169 (GEN_INT (-SHMEDIA_REGS_STACK_ADJUST ())));
6172 if (target_flags != save_flags && ! current_function_interrupt)
6174 rtx insn = emit_insn (gen_toggle_sz ());
6176 /* If we're lucky, a mode switch in the function body will
6177 overwrite fpscr, turning this insn dead. Tell flow this
6178 insn is ok to delete. */
6179 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD,
6180 const0_rtx,
6181 REG_NOTES (insn));
6184 target_flags = save_flags;
6186 output_stack_adjust (-rounded_frame_size (d) + d_rounding,
6187 stack_pointer_rtx, 0, NULL);
6189 if (frame_pointer_needed)
6190 frame_insn (GEN_MOV (hard_frame_pointer_rtx, stack_pointer_rtx));
6192 if (TARGET_SHCOMPACT
6193 && (current_function_args_info.call_cookie & ~ CALL_COOKIE_RET_TRAMP(1)))
6195 /* This must NOT go through the PLT, otherwise mach and macl
6196 may be clobbered. */
6197 function_symbol (gen_rtx_REG (Pmode, R0_REG),
6198 "__GCC_shcompact_incoming_args", SFUNC_GOT);
6199 emit_insn (gen_shcompact_incoming_args ());
6203 void
6204 sh_expand_epilogue (bool sibcall_p)
6206 HARD_REG_SET live_regs_mask;
6207 int d, i;
6208 int d_rounding = 0;
6210 int save_flags = target_flags;
6211 int frame_size, save_size;
6212 int fpscr_deferred = 0;
6213 int e = sibcall_p ? -1 : 1;
6215 d = calc_live_regs (&live_regs_mask);
6217 save_size = d;
6218 frame_size = rounded_frame_size (d);
6220 if (TARGET_SH5)
6222 int tregs_space = shmedia_target_regs_stack_adjust (&live_regs_mask);
6223 int total_size;
6224 if (d % (STACK_BOUNDARY / BITS_PER_UNIT))
6225 d_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
6226 - d % (STACK_BOUNDARY / BITS_PER_UNIT));
6228 total_size = d + tregs_space;
6229 total_size += rounded_frame_size (total_size);
6230 save_size = total_size - frame_size;
6232 /* If adjusting the stack in a single step costs nothing extra, do so.
6233 I.e. either if a single addi is enough, or we need a movi anyway,
6234 and we don't exceed the maximum offset range (the test for the
6235 latter is conservative for simplicity). */
6236 if (TARGET_SHMEDIA
6237 && ! frame_pointer_needed
6238 && (CONST_OK_FOR_I10 (total_size)
6239 || (! CONST_OK_FOR_I10 (save_size + d_rounding)
6240 && total_size <= 2044)))
6241 d_rounding = frame_size;
6243 frame_size -= d_rounding;
6246 if (frame_pointer_needed)
6248 /* We must avoid scheduling the epilogue with previous basic blocks
6249 when exception handling is enabled. See PR/18032. */
6250 if (flag_exceptions)
6251 emit_insn (gen_blockage ());
6252 output_stack_adjust (frame_size, hard_frame_pointer_rtx, e,
6253 &live_regs_mask);
6255 /* We must avoid moving the stack pointer adjustment past code
6256 which reads from the local frame, else an interrupt could
6257 occur after the SP adjustment and clobber data in the local
6258 frame. */
6259 emit_insn (gen_blockage ());
6260 emit_insn (GEN_MOV (stack_pointer_rtx, hard_frame_pointer_rtx));
6262 else if (frame_size)
6264 /* We must avoid moving the stack pointer adjustment past code
6265 which reads from the local frame, else an interrupt could
6266 occur after the SP adjustment and clobber data in the local
6267 frame. */
6268 emit_insn (gen_blockage ());
6269 output_stack_adjust (frame_size, stack_pointer_rtx, e, &live_regs_mask);
6272 if (SHMEDIA_REGS_STACK_ADJUST ())
6274 function_symbol (gen_rtx_REG (Pmode, R0_REG),
6275 (TARGET_FPU_ANY
6276 ? "__GCC_pop_shmedia_regs"
6277 : "__GCC_pop_shmedia_regs_nofpu"), SFUNC_GOT);
6278 /* This must NOT go through the PLT, otherwise mach and macl
6279 may be clobbered. */
6280 emit_insn (gen_shmedia_save_restore_regs_compact
6281 (GEN_INT (SHMEDIA_REGS_STACK_ADJUST ())));
6284 /* Pop all the registers. */
6286 if (target_flags != save_flags && ! current_function_interrupt)
6287 emit_insn (gen_toggle_sz ());
6288 if (TARGET_SH5)
6290 int offset_base, offset;
6291 int offset_in_r0 = -1;
6292 int sp_in_r0 = 0;
6293 rtx r0 = gen_rtx_REG (Pmode, R0_REG);
6294 save_schedule schedule;
6295 save_entry *entry;
6296 int *tmp_pnt;
6298 entry = sh5_schedule_saves (&live_regs_mask, &schedule, d_rounding);
6299 offset_base = -entry[1].offset + d_rounding;
6300 tmp_pnt = schedule.temps;
6301 for (; entry->mode != VOIDmode; entry--)
6303 enum machine_mode mode = entry->mode;
6304 int reg = entry->reg;
6305 rtx reg_rtx, mem_rtx, post_inc = NULL_RTX, insn;
6307 offset = offset_base + entry->offset;
6308 reg_rtx = gen_rtx_REG (mode, reg);
6310 mem_rtx = gen_frame_mem (mode,
6311 gen_rtx_PLUS (Pmode,
6312 stack_pointer_rtx,
6313 GEN_INT (offset)));
6315 GO_IF_LEGITIMATE_ADDRESS (mode, XEXP (mem_rtx, 0), try_post_inc);
6317 mem_rtx = NULL_RTX;
6319 try_post_inc:
6321 if (HAVE_POST_INCREMENT
6322 && (offset == offset_in_r0
6323 || (offset + GET_MODE_SIZE (mode) != d + d_rounding
6324 && mem_rtx == NULL_RTX)
6325 || reg == PR_REG || SPECIAL_REGISTER_P (reg)))
6327 post_inc = gen_frame_mem (mode, gen_rtx_POST_INC (Pmode, r0));
6329 GO_IF_LEGITIMATE_ADDRESS (mode, XEXP (post_inc, 0),
6330 post_inc_ok);
6332 post_inc = NULL_RTX;
6334 break;
6336 post_inc_ok:
6337 mem_rtx = NULL_RTX;
6339 while (0);
6341 if (mem_rtx != NULL_RTX)
6342 goto addr_ok;
6344 if (offset_in_r0 == -1)
6346 emit_move_insn (r0, GEN_INT (offset));
6347 offset_in_r0 = offset;
6349 else if (offset != offset_in_r0)
6351 emit_move_insn (r0,
6352 gen_rtx_PLUS
6353 (Pmode, r0,
6354 GEN_INT (offset - offset_in_r0)));
6355 offset_in_r0 += offset - offset_in_r0;
6358 if (post_inc != NULL_RTX)
6360 if (! sp_in_r0)
6362 emit_move_insn (r0,
6363 gen_rtx_PLUS
6364 (Pmode, r0, stack_pointer_rtx));
6365 sp_in_r0 = 1;
6368 mem_rtx = post_inc;
6370 offset_in_r0 += GET_MODE_SIZE (mode);
6372 else if (sp_in_r0)
6373 mem_rtx = gen_frame_mem (mode, r0);
6374 else
6375 mem_rtx = gen_frame_mem (mode,
6376 gen_rtx_PLUS (Pmode,
6377 stack_pointer_rtx,
6378 r0));
6380 gcc_assert ((reg != PR_REG && !SPECIAL_REGISTER_P (reg))
6381 || mem_rtx == post_inc);
6383 addr_ok:
6384 if ((reg == PR_REG || SPECIAL_REGISTER_P (reg))
6385 && mem_rtx != post_inc)
6387 insn = emit_move_insn (r0, mem_rtx);
6388 mem_rtx = r0;
6390 else if (TARGET_REGISTER_P (reg))
6392 rtx tmp_reg = gen_rtx_REG (mode, *tmp_pnt);
6394 /* Give the scheduler a bit of freedom by using up to
6395 MAX_TEMPS registers in a round-robin fashion. */
6396 insn = emit_move_insn (tmp_reg, mem_rtx);
6397 mem_rtx = tmp_reg;
6398 if (*++tmp_pnt < 0)
6399 tmp_pnt = schedule.temps;
6402 insn = emit_move_insn (reg_rtx, mem_rtx);
6403 if (reg == PR_MEDIA_REG && sh_media_register_for_return () >= 0)
6404 /* This is dead, unless we return with a sibcall. */
6405 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD,
6406 const0_rtx,
6407 REG_NOTES (insn));
6410 gcc_assert (entry->offset + offset_base == d + d_rounding);
6412 else /* ! TARGET_SH5 */
6414 save_size = 0;
6415 if (TEST_HARD_REG_BIT (live_regs_mask, PR_REG))
6416 pop (PR_REG);
6417 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
6419 int j = (FIRST_PSEUDO_REGISTER - 1) - i;
6421 if (j == FPSCR_REG && current_function_interrupt && TARGET_FMOVD
6422 && hard_regs_intersect_p (&live_regs_mask,
6423 &reg_class_contents[DF_REGS]))
6424 fpscr_deferred = 1;
6425 else if (j != PR_REG && TEST_HARD_REG_BIT (live_regs_mask, j))
6426 pop (j);
6427 if (j == FIRST_FP_REG && fpscr_deferred)
6428 pop (FPSCR_REG);
6432 if (target_flags != save_flags && ! current_function_interrupt)
6433 emit_insn (gen_toggle_sz ());
6434 target_flags = save_flags;
6436 output_stack_adjust (current_function_pretend_args_size
6437 + save_size + d_rounding
6438 + current_function_args_info.stack_regs * 8,
6439 stack_pointer_rtx, e, NULL);
6441 if (current_function_calls_eh_return)
6442 emit_insn (GEN_ADD3 (stack_pointer_rtx, stack_pointer_rtx,
6443 EH_RETURN_STACKADJ_RTX));
6445 /* Switch back to the normal stack if necessary. */
6446 if (lookup_attribute ("sp_switch", DECL_ATTRIBUTES (current_function_decl)))
6447 emit_insn (gen_sp_switch_2 ());
6449 /* Tell flow the insn that pops PR isn't dead. */
6450 /* PR_REG will never be live in SHmedia mode, and we don't need to
6451 USE PR_MEDIA_REG, since it will be explicitly copied to TR0_REG
6452 by the return pattern. */
6453 if (TEST_HARD_REG_BIT (live_regs_mask, PR_REG))
6454 emit_insn (gen_rtx_USE (VOIDmode, gen_rtx_REG (SImode, PR_REG)));
6457 static int sh_need_epilogue_known = 0;
6460 sh_need_epilogue (void)
6462 if (! sh_need_epilogue_known)
6464 rtx epilogue;
6466 start_sequence ();
6467 sh_expand_epilogue (0);
6468 epilogue = get_insns ();
6469 end_sequence ();
6470 sh_need_epilogue_known = (epilogue == NULL ? -1 : 1);
6472 return sh_need_epilogue_known > 0;
6475 /* Emit code to change the current function's return address to RA.
6476 TEMP is available as a scratch register, if needed. */
6478 void
6479 sh_set_return_address (rtx ra, rtx tmp)
6481 HARD_REG_SET live_regs_mask;
6482 int d;
6483 int pr_reg = TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG;
6484 int pr_offset;
6486 d = calc_live_regs (&live_regs_mask);
6488 /* If pr_reg isn't life, we can set it (or the register given in
6489 sh_media_register_for_return) directly. */
6490 if (! TEST_HARD_REG_BIT (live_regs_mask, pr_reg))
6492 rtx rr;
6494 if (TARGET_SHMEDIA)
6496 int rr_regno = sh_media_register_for_return ();
6498 if (rr_regno < 0)
6499 rr_regno = pr_reg;
6501 rr = gen_rtx_REG (DImode, rr_regno);
6503 else
6504 rr = gen_rtx_REG (SImode, pr_reg);
6506 emit_insn (GEN_MOV (rr, ra));
6507 /* Tell flow the register for return isn't dead. */
6508 emit_insn (gen_rtx_USE (VOIDmode, rr));
6509 return;
6512 if (TARGET_SH5)
6514 int offset;
6515 save_schedule schedule;
6516 save_entry *entry;
6518 entry = sh5_schedule_saves (&live_regs_mask, &schedule, 0);
6519 offset = entry[1].offset;
6520 for (; entry->mode != VOIDmode; entry--)
6521 if (entry->reg == pr_reg)
6522 goto found;
6524 /* We can't find pr register. */
6525 gcc_unreachable ();
6527 found:
6528 offset = entry->offset - offset;
6529 pr_offset = (rounded_frame_size (d) + offset
6530 + SHMEDIA_REGS_STACK_ADJUST ());
6532 else
6533 pr_offset = rounded_frame_size (d);
6535 emit_insn (GEN_MOV (tmp, GEN_INT (pr_offset)));
6536 emit_insn (GEN_ADD3 (tmp, tmp, hard_frame_pointer_rtx));
6538 tmp = gen_frame_mem (Pmode, tmp);
6539 emit_insn (GEN_MOV (tmp, ra));
6542 /* Clear variables at function end. */
6544 static void
6545 sh_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
6546 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
6548 sh_need_epilogue_known = 0;
6551 static rtx
6552 sh_builtin_saveregs (void)
6554 /* First unnamed integer register. */
6555 int first_intreg = current_function_args_info.arg_count[(int) SH_ARG_INT];
6556 /* Number of integer registers we need to save. */
6557 int n_intregs = MAX (0, NPARM_REGS (SImode) - first_intreg);
6558 /* First unnamed SFmode float reg */
6559 int first_floatreg = current_function_args_info.arg_count[(int) SH_ARG_FLOAT];
6560 /* Number of SFmode float regs to save. */
6561 int n_floatregs = MAX (0, NPARM_REGS (SFmode) - first_floatreg);
6562 rtx regbuf, fpregs;
6563 int bufsize, regno;
6564 HOST_WIDE_INT alias_set;
6566 if (TARGET_SH5)
6568 if (n_intregs)
6570 int pushregs = n_intregs;
6572 while (pushregs < NPARM_REGS (SImode) - 1
6573 && (CALL_COOKIE_INT_REG_GET
6574 (current_function_args_info.call_cookie,
6575 NPARM_REGS (SImode) - pushregs)
6576 == 1))
6578 current_function_args_info.call_cookie
6579 &= ~ CALL_COOKIE_INT_REG (NPARM_REGS (SImode)
6580 - pushregs, 1);
6581 pushregs++;
6584 if (pushregs == NPARM_REGS (SImode))
6585 current_function_args_info.call_cookie
6586 |= (CALL_COOKIE_INT_REG (0, 1)
6587 | CALL_COOKIE_STACKSEQ (pushregs - 1));
6588 else
6589 current_function_args_info.call_cookie
6590 |= CALL_COOKIE_STACKSEQ (pushregs);
6592 current_function_pretend_args_size += 8 * n_intregs;
6594 if (TARGET_SHCOMPACT)
6595 return const0_rtx;
6598 if (! TARGET_SH2E && ! TARGET_SH4 && ! TARGET_SH5)
6600 error ("__builtin_saveregs not supported by this subtarget");
6601 return const0_rtx;
6604 if (TARGET_SHMEDIA)
6605 n_floatregs = 0;
6607 /* Allocate block of memory for the regs. */
6608 /* ??? If n_intregs + n_floatregs == 0, should we allocate at least 1 byte?
6609 Or can assign_stack_local accept a 0 SIZE argument? */
6610 bufsize = (n_intregs * UNITS_PER_WORD) + (n_floatregs * UNITS_PER_WORD);
6612 if (TARGET_SHMEDIA)
6613 regbuf = gen_frame_mem (BLKmode, gen_rtx_REG (Pmode, ARG_POINTER_REGNUM));
6614 else if (n_floatregs & 1)
6616 rtx addr;
6618 regbuf = assign_stack_local (BLKmode, bufsize + UNITS_PER_WORD, 0);
6619 addr = copy_to_mode_reg (Pmode, XEXP (regbuf, 0));
6620 emit_insn (gen_iorsi3 (addr, addr, GEN_INT (UNITS_PER_WORD)));
6621 regbuf = change_address (regbuf, BLKmode, addr);
6623 else if (STACK_BOUNDARY < 64 && TARGET_FPU_DOUBLE && n_floatregs)
6625 rtx addr, mask;
6627 regbuf = assign_stack_local (BLKmode, bufsize + UNITS_PER_WORD, 0);
6628 addr = copy_to_mode_reg (Pmode, plus_constant (XEXP (regbuf, 0), 4));
6629 mask = copy_to_mode_reg (Pmode, GEN_INT (-8));
6630 emit_insn (gen_andsi3 (addr, addr, mask));
6631 regbuf = change_address (regbuf, BLKmode, addr);
6633 else
6634 regbuf = assign_stack_local (BLKmode, bufsize, TARGET_FPU_DOUBLE ? 64 : 0);
6635 alias_set = get_varargs_alias_set ();
6636 set_mem_alias_set (regbuf, alias_set);
6638 /* Save int args.
6639 This is optimized to only save the regs that are necessary. Explicitly
6640 named args need not be saved. */
6641 if (n_intregs > 0)
6642 move_block_from_reg (BASE_ARG_REG (SImode) + first_intreg,
6643 adjust_address (regbuf, BLKmode,
6644 n_floatregs * UNITS_PER_WORD),
6645 n_intregs);
6647 if (TARGET_SHMEDIA)
6648 /* Return the address of the regbuf. */
6649 return XEXP (regbuf, 0);
6651 /* Save float args.
6652 This is optimized to only save the regs that are necessary. Explicitly
6653 named args need not be saved.
6654 We explicitly build a pointer to the buffer because it halves the insn
6655 count when not optimizing (otherwise the pointer is built for each reg
6656 saved).
6657 We emit the moves in reverse order so that we can use predecrement. */
6659 fpregs = copy_to_mode_reg (Pmode,
6660 plus_constant (XEXP (regbuf, 0),
6661 n_floatregs * UNITS_PER_WORD));
6662 if (TARGET_SH4 || TARGET_SH2A_DOUBLE)
6664 rtx mem;
6665 for (regno = NPARM_REGS (DFmode) - 2; regno >= first_floatreg; regno -= 2)
6667 emit_insn (gen_addsi3 (fpregs, fpregs,
6668 GEN_INT (-2 * UNITS_PER_WORD)));
6669 mem = change_address (regbuf, DFmode, fpregs);
6670 emit_move_insn (mem,
6671 gen_rtx_REG (DFmode, BASE_ARG_REG (DFmode) + regno));
6673 regno = first_floatreg;
6674 if (regno & 1)
6676 emit_insn (gen_addsi3 (fpregs, fpregs, GEN_INT (-UNITS_PER_WORD)));
6677 mem = change_address (regbuf, SFmode, fpregs);
6678 emit_move_insn (mem,
6679 gen_rtx_REG (SFmode, BASE_ARG_REG (SFmode) + regno
6680 - (TARGET_LITTLE_ENDIAN != 0)));
6683 else
6684 for (regno = NPARM_REGS (SFmode) - 1; regno >= first_floatreg; regno--)
6686 rtx mem;
6688 emit_insn (gen_addsi3 (fpregs, fpregs, GEN_INT (-UNITS_PER_WORD)));
6689 mem = change_address (regbuf, SFmode, fpregs);
6690 emit_move_insn (mem,
6691 gen_rtx_REG (SFmode, BASE_ARG_REG (SFmode) + regno));
6694 /* Return the address of the regbuf. */
6695 return XEXP (regbuf, 0);
6698 /* Define the `__builtin_va_list' type for the ABI. */
6700 static tree
6701 sh_build_builtin_va_list (void)
6703 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
6704 tree record;
6706 if (TARGET_SH5 || (! TARGET_SH2E && ! TARGET_SH4)
6707 || TARGET_HITACHI || sh_cfun_attr_renesas_p ())
6708 return ptr_type_node;
6710 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
6712 f_next_o = build_decl (FIELD_DECL, get_identifier ("__va_next_o"),
6713 ptr_type_node);
6714 f_next_o_limit = build_decl (FIELD_DECL,
6715 get_identifier ("__va_next_o_limit"),
6716 ptr_type_node);
6717 f_next_fp = build_decl (FIELD_DECL, get_identifier ("__va_next_fp"),
6718 ptr_type_node);
6719 f_next_fp_limit = build_decl (FIELD_DECL,
6720 get_identifier ("__va_next_fp_limit"),
6721 ptr_type_node);
6722 f_next_stack = build_decl (FIELD_DECL, get_identifier ("__va_next_stack"),
6723 ptr_type_node);
6725 DECL_FIELD_CONTEXT (f_next_o) = record;
6726 DECL_FIELD_CONTEXT (f_next_o_limit) = record;
6727 DECL_FIELD_CONTEXT (f_next_fp) = record;
6728 DECL_FIELD_CONTEXT (f_next_fp_limit) = record;
6729 DECL_FIELD_CONTEXT (f_next_stack) = record;
6731 TYPE_FIELDS (record) = f_next_o;
6732 TREE_CHAIN (f_next_o) = f_next_o_limit;
6733 TREE_CHAIN (f_next_o_limit) = f_next_fp;
6734 TREE_CHAIN (f_next_fp) = f_next_fp_limit;
6735 TREE_CHAIN (f_next_fp_limit) = f_next_stack;
6737 layout_type (record);
6739 return record;
6742 /* Implement `va_start' for varargs and stdarg. */
6744 void
6745 sh_va_start (tree valist, rtx nextarg)
6747 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
6748 tree next_o, next_o_limit, next_fp, next_fp_limit, next_stack;
6749 tree t, u;
6750 int nfp, nint;
6752 if (TARGET_SH5)
6754 expand_builtin_saveregs ();
6755 std_expand_builtin_va_start (valist, nextarg);
6756 return;
6759 if ((! TARGET_SH2E && ! TARGET_SH4)
6760 || TARGET_HITACHI || sh_cfun_attr_renesas_p ())
6762 std_expand_builtin_va_start (valist, nextarg);
6763 return;
6766 f_next_o = TYPE_FIELDS (va_list_type_node);
6767 f_next_o_limit = TREE_CHAIN (f_next_o);
6768 f_next_fp = TREE_CHAIN (f_next_o_limit);
6769 f_next_fp_limit = TREE_CHAIN (f_next_fp);
6770 f_next_stack = TREE_CHAIN (f_next_fp_limit);
6772 next_o = build3 (COMPONENT_REF, TREE_TYPE (f_next_o), valist, f_next_o,
6773 NULL_TREE);
6774 next_o_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_o_limit),
6775 valist, f_next_o_limit, NULL_TREE);
6776 next_fp = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp), valist, f_next_fp,
6777 NULL_TREE);
6778 next_fp_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp_limit),
6779 valist, f_next_fp_limit, NULL_TREE);
6780 next_stack = build3 (COMPONENT_REF, TREE_TYPE (f_next_stack),
6781 valist, f_next_stack, NULL_TREE);
6783 /* Call __builtin_saveregs. */
6784 u = make_tree (ptr_type_node, expand_builtin_saveregs ());
6785 t = build2 (MODIFY_EXPR, ptr_type_node, next_fp, u);
6786 TREE_SIDE_EFFECTS (t) = 1;
6787 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6789 nfp = current_function_args_info.arg_count[SH_ARG_FLOAT];
6790 if (nfp < 8)
6791 nfp = 8 - nfp;
6792 else
6793 nfp = 0;
6794 u = fold_build2 (PLUS_EXPR, ptr_type_node, u,
6795 build_int_cst (NULL_TREE, UNITS_PER_WORD * nfp));
6796 t = build2 (MODIFY_EXPR, ptr_type_node, next_fp_limit, u);
6797 TREE_SIDE_EFFECTS (t) = 1;
6798 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6800 t = build2 (MODIFY_EXPR, ptr_type_node, next_o, u);
6801 TREE_SIDE_EFFECTS (t) = 1;
6802 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6804 nint = current_function_args_info.arg_count[SH_ARG_INT];
6805 if (nint < 4)
6806 nint = 4 - nint;
6807 else
6808 nint = 0;
6809 u = fold_build2 (PLUS_EXPR, ptr_type_node, u,
6810 build_int_cst (NULL_TREE, UNITS_PER_WORD * nint));
6811 t = build2 (MODIFY_EXPR, ptr_type_node, next_o_limit, u);
6812 TREE_SIDE_EFFECTS (t) = 1;
6813 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6815 u = make_tree (ptr_type_node, nextarg);
6816 t = build2 (MODIFY_EXPR, ptr_type_node, next_stack, u);
6817 TREE_SIDE_EFFECTS (t) = 1;
6818 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
6821 /* TYPE is a RECORD_TYPE. If there is only a single nonzero-sized
6822 member, return it. */
6823 static tree
6824 find_sole_member (tree type)
6826 tree field, member = NULL_TREE;
6828 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
6830 if (TREE_CODE (field) != FIELD_DECL)
6831 continue;
6832 if (!DECL_SIZE (field))
6833 return NULL_TREE;
6834 if (integer_zerop (DECL_SIZE (field)))
6835 continue;
6836 if (member)
6837 return NULL_TREE;
6838 member = field;
6840 return member;
6842 /* Implement `va_arg'. */
6844 static tree
6845 sh_gimplify_va_arg_expr (tree valist, tree type, tree *pre_p,
6846 tree *post_p ATTRIBUTE_UNUSED)
6848 HOST_WIDE_INT size, rsize;
6849 tree tmp, pptr_type_node;
6850 tree addr, lab_over = NULL, result = NULL;
6851 int pass_by_ref = targetm.calls.must_pass_in_stack (TYPE_MODE (type), type);
6852 tree eff_type;
6854 if (pass_by_ref)
6855 type = build_pointer_type (type);
6857 size = int_size_in_bytes (type);
6858 rsize = (size + UNITS_PER_WORD - 1) & -UNITS_PER_WORD;
6859 pptr_type_node = build_pointer_type (ptr_type_node);
6861 if (! TARGET_SH5 && (TARGET_SH2E || TARGET_SH4)
6862 && ! (TARGET_HITACHI || sh_cfun_attr_renesas_p ()))
6864 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
6865 tree next_o, next_o_limit, next_fp, next_fp_limit, next_stack;
6866 int pass_as_float;
6867 tree lab_false;
6868 tree member;
6870 f_next_o = TYPE_FIELDS (va_list_type_node);
6871 f_next_o_limit = TREE_CHAIN (f_next_o);
6872 f_next_fp = TREE_CHAIN (f_next_o_limit);
6873 f_next_fp_limit = TREE_CHAIN (f_next_fp);
6874 f_next_stack = TREE_CHAIN (f_next_fp_limit);
6876 next_o = build3 (COMPONENT_REF, TREE_TYPE (f_next_o), valist, f_next_o,
6877 NULL_TREE);
6878 next_o_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_o_limit),
6879 valist, f_next_o_limit, NULL_TREE);
6880 next_fp = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp),
6881 valist, f_next_fp, NULL_TREE);
6882 next_fp_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp_limit),
6883 valist, f_next_fp_limit, NULL_TREE);
6884 next_stack = build3 (COMPONENT_REF, TREE_TYPE (f_next_stack),
6885 valist, f_next_stack, NULL_TREE);
6887 /* Structures with a single member with a distinct mode are passed
6888 like their member. This is relevant if the latter has a REAL_TYPE
6889 or COMPLEX_TYPE type. */
6890 eff_type = type;
6891 while (TREE_CODE (eff_type) == RECORD_TYPE
6892 && (member = find_sole_member (eff_type))
6893 && (TREE_CODE (TREE_TYPE (member)) == REAL_TYPE
6894 || TREE_CODE (TREE_TYPE (member)) == COMPLEX_TYPE
6895 || TREE_CODE (TREE_TYPE (member)) == RECORD_TYPE))
6897 tree field_type = TREE_TYPE (member);
6899 if (TYPE_MODE (eff_type) == TYPE_MODE (field_type))
6900 eff_type = field_type;
6901 else
6903 gcc_assert ((TYPE_ALIGN (eff_type)
6904 < GET_MODE_ALIGNMENT (TYPE_MODE (field_type)))
6905 || (TYPE_ALIGN (eff_type)
6906 > GET_MODE_BITSIZE (TYPE_MODE (field_type))));
6907 break;
6911 if (TARGET_SH4)
6913 pass_as_float = ((TREE_CODE (eff_type) == REAL_TYPE && size <= 8)
6914 || (TREE_CODE (eff_type) == COMPLEX_TYPE
6915 && TREE_CODE (TREE_TYPE (eff_type)) == REAL_TYPE
6916 && size <= 16));
6918 else
6920 pass_as_float = (TREE_CODE (eff_type) == REAL_TYPE && size == 4);
6923 addr = create_tmp_var (pptr_type_node, NULL);
6924 lab_false = create_artificial_label ();
6925 lab_over = create_artificial_label ();
6927 valist = build1 (INDIRECT_REF, ptr_type_node, addr);
6929 if (pass_as_float)
6931 tree next_fp_tmp = create_tmp_var (TREE_TYPE (f_next_fp), NULL);
6932 tree cmp;
6933 bool is_double = size == 8 && TREE_CODE (eff_type) == REAL_TYPE;
6935 tmp = build1 (ADDR_EXPR, pptr_type_node, next_fp);
6936 tmp = build2 (MODIFY_EXPR, void_type_node, addr, tmp);
6937 gimplify_and_add (tmp, pre_p);
6939 tmp = build2 (MODIFY_EXPR, ptr_type_node, next_fp_tmp, valist);
6940 gimplify_and_add (tmp, pre_p);
6941 tmp = next_fp_limit;
6942 if (size > 4 && !is_double)
6943 tmp = build2 (PLUS_EXPR, TREE_TYPE (tmp), tmp,
6944 fold_convert (TREE_TYPE (tmp), size_int (4 - size)));
6945 tmp = build2 (GE_EXPR, boolean_type_node, next_fp_tmp, tmp);
6946 cmp = build3 (COND_EXPR, void_type_node, tmp,
6947 build1 (GOTO_EXPR, void_type_node, lab_false),
6948 NULL_TREE);
6949 if (!is_double)
6950 gimplify_and_add (cmp, pre_p);
6952 if (TYPE_ALIGN (eff_type) > BITS_PER_WORD
6953 || (is_double || size == 16))
6955 tmp = fold_convert (ptr_type_node, size_int (UNITS_PER_WORD));
6956 tmp = build2 (BIT_AND_EXPR, ptr_type_node, next_fp_tmp, tmp);
6957 tmp = build2 (PLUS_EXPR, ptr_type_node, next_fp_tmp, tmp);
6958 tmp = build2 (MODIFY_EXPR, ptr_type_node, next_fp_tmp, tmp);
6959 gimplify_and_add (tmp, pre_p);
6961 if (is_double)
6962 gimplify_and_add (cmp, pre_p);
6964 #ifdef FUNCTION_ARG_SCmode_WART
6965 if (TYPE_MODE (eff_type) == SCmode
6966 && TARGET_SH4 && TARGET_LITTLE_ENDIAN)
6968 tree subtype = TREE_TYPE (eff_type);
6969 tree real, imag;
6971 imag
6972 = std_gimplify_va_arg_expr (next_fp_tmp, subtype, pre_p, NULL);
6973 imag = get_initialized_tmp_var (imag, pre_p, NULL);
6975 real
6976 = std_gimplify_va_arg_expr (next_fp_tmp, subtype, pre_p, NULL);
6977 real = get_initialized_tmp_var (real, pre_p, NULL);
6979 result = build2 (COMPLEX_EXPR, type, real, imag);
6980 result = get_initialized_tmp_var (result, pre_p, NULL);
6982 #endif /* FUNCTION_ARG_SCmode_WART */
6984 tmp = build1 (GOTO_EXPR, void_type_node, lab_over);
6985 gimplify_and_add (tmp, pre_p);
6987 tmp = build1 (LABEL_EXPR, void_type_node, lab_false);
6988 gimplify_and_add (tmp, pre_p);
6990 tmp = build1 (ADDR_EXPR, pptr_type_node, next_stack);
6991 tmp = build2 (MODIFY_EXPR, void_type_node, addr, tmp);
6992 gimplify_and_add (tmp, pre_p);
6993 tmp = build2 (MODIFY_EXPR, ptr_type_node, next_fp_tmp, valist);
6994 gimplify_and_add (tmp, pre_p);
6996 tmp = build2 (MODIFY_EXPR, ptr_type_node, valist, next_fp_tmp);
6997 gimplify_and_add (tmp, post_p);
6998 valist = next_fp_tmp;
7000 else
7002 tmp = fold_convert (ptr_type_node, size_int (rsize));
7003 tmp = build2 (PLUS_EXPR, ptr_type_node, next_o, tmp);
7004 tmp = build2 (GT_EXPR, boolean_type_node, tmp, next_o_limit);
7005 tmp = build3 (COND_EXPR, void_type_node, tmp,
7006 build1 (GOTO_EXPR, void_type_node, lab_false),
7007 NULL_TREE);
7008 gimplify_and_add (tmp, pre_p);
7010 tmp = build1 (ADDR_EXPR, pptr_type_node, next_o);
7011 tmp = build2 (MODIFY_EXPR, void_type_node, addr, tmp);
7012 gimplify_and_add (tmp, pre_p);
7014 tmp = build1 (GOTO_EXPR, void_type_node, lab_over);
7015 gimplify_and_add (tmp, pre_p);
7017 tmp = build1 (LABEL_EXPR, void_type_node, lab_false);
7018 gimplify_and_add (tmp, pre_p);
7020 if (size > 4 && ! TARGET_SH4)
7022 tmp = build2 (MODIFY_EXPR, ptr_type_node, next_o, next_o_limit);
7023 gimplify_and_add (tmp, pre_p);
7026 tmp = build1 (ADDR_EXPR, pptr_type_node, next_stack);
7027 tmp = build2 (MODIFY_EXPR, void_type_node, addr, tmp);
7028 gimplify_and_add (tmp, pre_p);
7031 if (!result)
7033 tmp = build1 (LABEL_EXPR, void_type_node, lab_over);
7034 gimplify_and_add (tmp, pre_p);
7038 /* ??? In va-sh.h, there had been code to make values larger than
7039 size 8 indirect. This does not match the FUNCTION_ARG macros. */
7041 tmp = std_gimplify_va_arg_expr (valist, type, pre_p, NULL);
7042 if (result)
7044 tmp = build2 (MODIFY_EXPR, void_type_node, result, tmp);
7045 gimplify_and_add (tmp, pre_p);
7047 tmp = build1 (LABEL_EXPR, void_type_node, lab_over);
7048 gimplify_and_add (tmp, pre_p);
7050 else
7051 result = tmp;
7053 if (pass_by_ref)
7054 result = build_va_arg_indirect_ref (result);
7056 return result;
7059 bool
7060 sh_promote_prototypes (tree type)
7062 if (TARGET_HITACHI)
7063 return 0;
7064 if (! type)
7065 return 1;
7066 return ! sh_attr_renesas_p (type);
7069 /* Whether an argument must be passed by reference. On SHcompact, we
7070 pretend arguments wider than 32-bits that would have been passed in
7071 registers are passed by reference, so that an SHmedia trampoline
7072 loads them into the full 64-bits registers. */
7074 static int
7075 shcompact_byref (CUMULATIVE_ARGS *cum, enum machine_mode mode,
7076 tree type, bool named)
7078 unsigned HOST_WIDE_INT size;
7080 if (type)
7081 size = int_size_in_bytes (type);
7082 else
7083 size = GET_MODE_SIZE (mode);
7085 if (cum->arg_count[SH_ARG_INT] < NPARM_REGS (SImode)
7086 && (!named
7087 || GET_SH_ARG_CLASS (mode) == SH_ARG_INT
7088 || (GET_SH_ARG_CLASS (mode) == SH_ARG_FLOAT
7089 && cum->arg_count[SH_ARG_FLOAT] >= NPARM_REGS (SFmode)))
7090 && size > 4
7091 && !SHCOMPACT_FORCE_ON_STACK (mode, type)
7092 && !SH5_WOULD_BE_PARTIAL_NREGS (*cum, mode, type, named))
7093 return size;
7094 else
7095 return 0;
7098 static bool
7099 sh_pass_by_reference (CUMULATIVE_ARGS *cum, enum machine_mode mode,
7100 tree type, bool named)
7102 if (targetm.calls.must_pass_in_stack (mode, type))
7103 return true;
7105 /* ??? std_gimplify_va_arg_expr passes NULL for cum. That function
7106 wants to know about pass-by-reference semantics for incoming
7107 arguments. */
7108 if (! cum)
7109 return false;
7111 if (TARGET_SHCOMPACT)
7113 cum->byref = shcompact_byref (cum, mode, type, named);
7114 return cum->byref != 0;
7117 return false;
7120 static bool
7121 sh_callee_copies (CUMULATIVE_ARGS *cum, enum machine_mode mode,
7122 tree type, bool named ATTRIBUTE_UNUSED)
7124 /* ??? How can it possibly be correct to return true only on the
7125 caller side of the equation? Is there someplace else in the
7126 sh backend that's magically producing the copies? */
7127 return (cum->outgoing
7128 && ((mode == BLKmode ? TYPE_ALIGN (type) : GET_MODE_ALIGNMENT (mode))
7129 % SH_MIN_ALIGN_FOR_CALLEE_COPY == 0));
7132 static int
7133 sh_arg_partial_bytes (CUMULATIVE_ARGS *cum, enum machine_mode mode,
7134 tree type, bool named ATTRIBUTE_UNUSED)
7136 int words = 0;
7138 if (!TARGET_SH5
7139 && PASS_IN_REG_P (*cum, mode, type)
7140 && !(TARGET_SH4 || TARGET_SH2A_DOUBLE)
7141 && (ROUND_REG (*cum, mode)
7142 + (mode != BLKmode
7143 ? ROUND_ADVANCE (GET_MODE_SIZE (mode))
7144 : ROUND_ADVANCE (int_size_in_bytes (type)))
7145 > NPARM_REGS (mode)))
7146 words = NPARM_REGS (mode) - ROUND_REG (*cum, mode);
7148 else if (!TARGET_SHCOMPACT
7149 && SH5_WOULD_BE_PARTIAL_NREGS (*cum, mode, type, named))
7150 words = NPARM_REGS (SImode) - cum->arg_count[SH_ARG_INT];
7152 return words * UNITS_PER_WORD;
7156 /* Define where to put the arguments to a function.
7157 Value is zero to push the argument on the stack,
7158 or a hard register in which to store the argument.
7160 MODE is the argument's machine mode.
7161 TYPE is the data type of the argument (as a tree).
7162 This is null for libcalls where that information may
7163 not be available.
7164 CUM is a variable of type CUMULATIVE_ARGS which gives info about
7165 the preceding args and about the function being called.
7166 NAMED is nonzero if this argument is a named parameter
7167 (otherwise it is an extra parameter matching an ellipsis).
7169 On SH the first args are normally in registers
7170 and the rest are pushed. Any arg that starts within the first
7171 NPARM_REGS words is at least partially passed in a register unless
7172 its data type forbids. */
7176 sh_function_arg (CUMULATIVE_ARGS *ca, enum machine_mode mode,
7177 tree type, int named)
7179 if (! TARGET_SH5 && mode == VOIDmode)
7180 return GEN_INT (ca->renesas_abi ? 1 : 0);
7182 if (! TARGET_SH5
7183 && PASS_IN_REG_P (*ca, mode, type)
7184 && (named || ! (TARGET_HITACHI || ca->renesas_abi)))
7186 int regno;
7188 if (mode == SCmode && TARGET_SH4 && TARGET_LITTLE_ENDIAN
7189 && (! FUNCTION_ARG_SCmode_WART || (ROUND_REG (*ca, mode) & 1)))
7191 rtx r1 = gen_rtx_EXPR_LIST (VOIDmode,
7192 gen_rtx_REG (SFmode,
7193 BASE_ARG_REG (mode)
7194 + (ROUND_REG (*ca, mode) ^ 1)),
7195 const0_rtx);
7196 rtx r2 = gen_rtx_EXPR_LIST (VOIDmode,
7197 gen_rtx_REG (SFmode,
7198 BASE_ARG_REG (mode)
7199 + ((ROUND_REG (*ca, mode) + 1) ^ 1)),
7200 GEN_INT (4));
7201 return gen_rtx_PARALLEL(SCmode, gen_rtvec(2, r1, r2));
7204 /* If the alignment of a DF value causes an SF register to be
7205 skipped, we will use that skipped register for the next SF
7206 value. */
7207 if ((TARGET_HITACHI || ca->renesas_abi)
7208 && ca->free_single_fp_reg
7209 && mode == SFmode)
7210 return gen_rtx_REG (mode, ca->free_single_fp_reg);
7212 regno = (BASE_ARG_REG (mode) + ROUND_REG (*ca, mode))
7213 ^ (mode == SFmode && TARGET_SH4
7214 && TARGET_LITTLE_ENDIAN != 0
7215 && ! TARGET_HITACHI && ! ca->renesas_abi);
7216 return gen_rtx_REG (mode, regno);
7220 if (TARGET_SH5)
7222 if (mode == VOIDmode && TARGET_SHCOMPACT)
7223 return GEN_INT (ca->call_cookie);
7225 /* The following test assumes unnamed arguments are promoted to
7226 DFmode. */
7227 if (mode == SFmode && ca->free_single_fp_reg)
7228 return SH5_PROTOTYPED_FLOAT_ARG (*ca, mode, ca->free_single_fp_reg);
7230 if ((GET_SH_ARG_CLASS (mode) == SH_ARG_FLOAT)
7231 && (named || ! ca->prototype_p)
7232 && ca->arg_count[(int) SH_ARG_FLOAT] < NPARM_REGS (SFmode))
7234 if (! ca->prototype_p && TARGET_SHMEDIA)
7235 return SH5_PROTOTYPELESS_FLOAT_ARG (*ca, mode);
7237 return SH5_PROTOTYPED_FLOAT_ARG (*ca, mode,
7238 FIRST_FP_PARM_REG
7239 + ca->arg_count[(int) SH_ARG_FLOAT]);
7242 if (ca->arg_count[(int) SH_ARG_INT] < NPARM_REGS (SImode)
7243 && (! TARGET_SHCOMPACT
7244 || (! SHCOMPACT_FORCE_ON_STACK (mode, type)
7245 && ! SH5_WOULD_BE_PARTIAL_NREGS (*ca, mode,
7246 type, named))))
7248 return gen_rtx_REG (mode, (FIRST_PARM_REG
7249 + ca->arg_count[(int) SH_ARG_INT]));
7252 return 0;
7255 return 0;
7258 /* Update the data in CUM to advance over an argument
7259 of mode MODE and data type TYPE.
7260 (TYPE is null for libcalls where that information may not be
7261 available.) */
7263 void
7264 sh_function_arg_advance (CUMULATIVE_ARGS *ca, enum machine_mode mode,
7265 tree type, int named)
7267 if (ca->force_mem)
7268 ca->force_mem = 0;
7269 else if (TARGET_SH5)
7271 tree type2 = (ca->byref && type
7272 ? TREE_TYPE (type)
7273 : type);
7274 enum machine_mode mode2 = (ca->byref && type
7275 ? TYPE_MODE (type2)
7276 : mode);
7277 int dwords = ((ca->byref
7278 ? ca->byref
7279 : mode2 == BLKmode
7280 ? int_size_in_bytes (type2)
7281 : GET_MODE_SIZE (mode2)) + 7) / 8;
7282 int numregs = MIN (dwords, NPARM_REGS (SImode)
7283 - ca->arg_count[(int) SH_ARG_INT]);
7285 if (numregs)
7287 ca->arg_count[(int) SH_ARG_INT] += numregs;
7288 if (TARGET_SHCOMPACT
7289 && SHCOMPACT_FORCE_ON_STACK (mode2, type2))
7291 ca->call_cookie
7292 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
7293 - numregs, 1);
7294 /* N.B. We want this also for outgoing. */
7295 ca->stack_regs += numregs;
7297 else if (ca->byref)
7299 if (! ca->outgoing)
7300 ca->stack_regs += numregs;
7301 ca->byref_regs += numregs;
7302 ca->byref = 0;
7304 ca->call_cookie
7305 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
7306 - numregs, 2);
7307 while (--numregs);
7308 ca->call_cookie
7309 |= CALL_COOKIE_INT_REG (ca->arg_count[(int) SH_ARG_INT]
7310 - 1, 1);
7312 else if (dwords > numregs)
7314 int pushregs = numregs;
7316 if (TARGET_SHCOMPACT)
7317 ca->stack_regs += numregs;
7318 while (pushregs < NPARM_REGS (SImode) - 1
7319 && (CALL_COOKIE_INT_REG_GET
7320 (ca->call_cookie,
7321 NPARM_REGS (SImode) - pushregs)
7322 == 1))
7324 ca->call_cookie
7325 &= ~ CALL_COOKIE_INT_REG (NPARM_REGS (SImode)
7326 - pushregs, 1);
7327 pushregs++;
7329 if (numregs == NPARM_REGS (SImode))
7330 ca->call_cookie
7331 |= CALL_COOKIE_INT_REG (0, 1)
7332 | CALL_COOKIE_STACKSEQ (numregs - 1);
7333 else
7334 ca->call_cookie
7335 |= CALL_COOKIE_STACKSEQ (numregs);
7338 if (GET_SH_ARG_CLASS (mode2) == SH_ARG_FLOAT
7339 && (named || ! ca->prototype_p))
7341 if (mode2 == SFmode && ca->free_single_fp_reg)
7342 ca->free_single_fp_reg = 0;
7343 else if (ca->arg_count[(int) SH_ARG_FLOAT]
7344 < NPARM_REGS (SFmode))
7346 int numfpregs
7347 = MIN ((GET_MODE_SIZE (mode2) + 7) / 8 * 2,
7348 NPARM_REGS (SFmode)
7349 - ca->arg_count[(int) SH_ARG_FLOAT]);
7351 ca->arg_count[(int) SH_ARG_FLOAT] += numfpregs;
7353 if (TARGET_SHCOMPACT && ! ca->prototype_p)
7355 if (ca->outgoing && numregs > 0)
7358 ca->call_cookie
7359 |= (CALL_COOKIE_INT_REG
7360 (ca->arg_count[(int) SH_ARG_INT]
7361 - numregs + ((numfpregs - 2) / 2),
7362 4 + (ca->arg_count[(int) SH_ARG_FLOAT]
7363 - numfpregs) / 2));
7365 while (numfpregs -= 2);
7367 else if (mode2 == SFmode && (named)
7368 && (ca->arg_count[(int) SH_ARG_FLOAT]
7369 < NPARM_REGS (SFmode)))
7370 ca->free_single_fp_reg
7371 = FIRST_FP_PARM_REG - numfpregs
7372 + ca->arg_count[(int) SH_ARG_FLOAT] + 1;
7375 return;
7378 if ((TARGET_HITACHI || ca->renesas_abi) && TARGET_FPU_DOUBLE)
7380 /* Note that we've used the skipped register. */
7381 if (mode == SFmode && ca->free_single_fp_reg)
7383 ca->free_single_fp_reg = 0;
7384 return;
7386 /* When we have a DF after an SF, there's an SF register that get
7387 skipped in order to align the DF value. We note this skipped
7388 register, because the next SF value will use it, and not the
7389 SF that follows the DF. */
7390 if (mode == DFmode
7391 && ROUND_REG (*ca, DFmode) != ROUND_REG (*ca, SFmode))
7393 ca->free_single_fp_reg = (ROUND_REG (*ca, SFmode)
7394 + BASE_ARG_REG (mode));
7398 if (! ((TARGET_SH4 || TARGET_SH2A) || ca->renesas_abi)
7399 || PASS_IN_REG_P (*ca, mode, type))
7400 (ca->arg_count[(int) GET_SH_ARG_CLASS (mode)]
7401 = (ROUND_REG (*ca, mode)
7402 + (mode == BLKmode
7403 ? ROUND_ADVANCE (int_size_in_bytes (type))
7404 : ROUND_ADVANCE (GET_MODE_SIZE (mode)))));
7407 /* The Renesas calling convention doesn't quite fit into this scheme since
7408 the address is passed like an invisible argument, but one that is always
7409 passed in memory. */
7410 static rtx
7411 sh_struct_value_rtx (tree fndecl, int incoming ATTRIBUTE_UNUSED)
7413 if (TARGET_HITACHI || sh_attr_renesas_p (fndecl))
7414 return 0;
7415 return gen_rtx_REG (Pmode, 2);
7418 /* Worker function for TARGET_RETURN_IN_MEMORY. */
7420 static bool
7421 sh_return_in_memory (tree type, tree fndecl)
7423 if (TARGET_SH5)
7425 if (TYPE_MODE (type) == BLKmode)
7426 return ((unsigned HOST_WIDE_INT) int_size_in_bytes (type)) > 8;
7427 else
7428 return GET_MODE_SIZE (TYPE_MODE (type)) > 8;
7430 else
7432 return (TYPE_MODE (type) == BLKmode
7433 || ((TARGET_HITACHI || sh_attr_renesas_p (fndecl))
7434 && TREE_CODE (type) == RECORD_TYPE));
7438 /* We actually emit the code in sh_expand_prologue. We used to use
7439 a static variable to flag that we need to emit this code, but that
7440 doesn't when inlining, when functions are deferred and then emitted
7441 later. Fortunately, we already have two flags that are part of struct
7442 function that tell if a function uses varargs or stdarg. */
7443 static void
7444 sh_setup_incoming_varargs (CUMULATIVE_ARGS *ca,
7445 enum machine_mode mode,
7446 tree type,
7447 int *pretend_arg_size,
7448 int second_time ATTRIBUTE_UNUSED)
7450 gcc_assert (current_function_stdarg);
7451 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl))
7453 int named_parm_regs, anon_parm_regs;
7455 named_parm_regs = (ROUND_REG (*ca, mode)
7456 + (mode == BLKmode
7457 ? ROUND_ADVANCE (int_size_in_bytes (type))
7458 : ROUND_ADVANCE (GET_MODE_SIZE (mode))));
7459 anon_parm_regs = NPARM_REGS (SImode) - named_parm_regs;
7460 if (anon_parm_regs > 0)
7461 *pretend_arg_size = anon_parm_regs * 4;
7465 static bool
7466 sh_strict_argument_naming (CUMULATIVE_ARGS *ca ATTRIBUTE_UNUSED)
7468 return TARGET_SH5;
7471 static bool
7472 sh_pretend_outgoing_varargs_named (CUMULATIVE_ARGS *ca)
7474 return ! (TARGET_HITACHI || ca->renesas_abi) && ! TARGET_SH5;
7478 /* Define the offset between two registers, one to be eliminated, and
7479 the other its replacement, at the start of a routine. */
7482 initial_elimination_offset (int from, int to)
7484 int regs_saved;
7485 int regs_saved_rounding = 0;
7486 int total_saved_regs_space;
7487 int total_auto_space;
7488 int save_flags = target_flags;
7489 int copy_flags;
7490 HARD_REG_SET live_regs_mask;
7492 shmedia_space_reserved_for_target_registers = false;
7493 regs_saved = calc_live_regs (&live_regs_mask);
7494 regs_saved += SHMEDIA_REGS_STACK_ADJUST ();
7496 if (shmedia_reserve_space_for_target_registers_p (regs_saved, &live_regs_mask))
7498 shmedia_space_reserved_for_target_registers = true;
7499 regs_saved += shmedia_target_regs_stack_adjust (&live_regs_mask);
7502 if (TARGET_SH5 && regs_saved % (STACK_BOUNDARY / BITS_PER_UNIT))
7503 regs_saved_rounding = ((STACK_BOUNDARY / BITS_PER_UNIT)
7504 - regs_saved % (STACK_BOUNDARY / BITS_PER_UNIT));
7506 total_auto_space = rounded_frame_size (regs_saved) - regs_saved_rounding;
7507 copy_flags = target_flags;
7508 target_flags = save_flags;
7510 total_saved_regs_space = regs_saved + regs_saved_rounding;
7512 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
7513 return total_saved_regs_space + total_auto_space
7514 + current_function_args_info.byref_regs * 8;
7516 if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
7517 return total_saved_regs_space + total_auto_space
7518 + current_function_args_info.byref_regs * 8;
7520 /* Initial gap between fp and sp is 0. */
7521 if (from == HARD_FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
7522 return 0;
7524 if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
7525 return rounded_frame_size (0);
7527 if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
7528 return rounded_frame_size (0);
7530 gcc_assert (from == RETURN_ADDRESS_POINTER_REGNUM
7531 && (to == HARD_FRAME_POINTER_REGNUM
7532 || to == STACK_POINTER_REGNUM));
7533 if (TARGET_SH5)
7535 int n = total_saved_regs_space;
7536 int pr_reg = TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG;
7537 save_schedule schedule;
7538 save_entry *entry;
7540 n += total_auto_space;
7542 /* If it wasn't saved, there's not much we can do. */
7543 if (! TEST_HARD_REG_BIT (live_regs_mask, pr_reg))
7544 return n;
7546 target_flags = copy_flags;
7548 sh5_schedule_saves (&live_regs_mask, &schedule, n);
7549 for (entry = &schedule.entries[1]; entry->mode != VOIDmode; entry++)
7550 if (entry->reg == pr_reg)
7552 target_flags = save_flags;
7553 return entry->offset;
7555 gcc_unreachable ();
7557 else
7558 return total_auto_space;
7561 /* Insert any deferred function attributes from earlier pragmas. */
7562 static void
7563 sh_insert_attributes (tree node, tree *attributes)
7565 tree attrs;
7567 if (TREE_CODE (node) != FUNCTION_DECL)
7568 return;
7570 /* We are only interested in fields. */
7571 if (!DECL_P (node))
7572 return;
7574 /* Append the attributes to the deferred attributes. */
7575 *sh_deferred_function_attributes_tail = *attributes;
7576 attrs = sh_deferred_function_attributes;
7577 if (!attrs)
7578 return;
7580 /* Some attributes imply or require the interrupt attribute. */
7581 if (!lookup_attribute ("interrupt_handler", attrs)
7582 && !lookup_attribute ("interrupt_handler", DECL_ATTRIBUTES (node)))
7584 /* If we have a trapa_handler, but no interrupt_handler attribute,
7585 insert an interrupt_handler attribute. */
7586 if (lookup_attribute ("trapa_handler", attrs) != NULL_TREE)
7587 /* We can't use sh_pr_interrupt here because that's not in the
7588 java frontend. */
7589 attrs
7590 = tree_cons (get_identifier("interrupt_handler"), NULL_TREE, attrs);
7591 /* However, for sp_switch, trap_exit and nosave_low_regs, if the
7592 interrupt attribute is missing, we ignore the attribute and warn. */
7593 else if (lookup_attribute ("sp_switch", attrs)
7594 || lookup_attribute ("trap_exit", attrs)
7595 || lookup_attribute ("nosave_low_regs", attrs))
7597 tree *tail;
7599 for (tail = attributes; attrs; attrs = TREE_CHAIN (attrs))
7601 if (is_attribute_p ("sp_switch", TREE_PURPOSE (attrs))
7602 || is_attribute_p ("trap_exit", TREE_PURPOSE (attrs))
7603 || is_attribute_p ("nosave_low_regs", TREE_PURPOSE (attrs)))
7604 warning (OPT_Wattributes,
7605 "%qs attribute only applies to interrupt functions",
7606 IDENTIFIER_POINTER (TREE_PURPOSE (attrs)));
7607 else
7609 *tail = tree_cons (TREE_PURPOSE (attrs), NULL_TREE,
7610 NULL_TREE);
7611 tail = &TREE_CHAIN (*tail);
7614 attrs = *attributes;
7618 /* Install the processed list. */
7619 *attributes = attrs;
7621 /* Clear deferred attributes. */
7622 sh_deferred_function_attributes = NULL_TREE;
7623 sh_deferred_function_attributes_tail = &sh_deferred_function_attributes;
7625 return;
7628 /* Supported attributes:
7630 interrupt_handler -- specifies this function is an interrupt handler.
7632 trapa_handler - like above, but don't save all registers.
7634 sp_switch -- specifies an alternate stack for an interrupt handler
7635 to run on.
7637 trap_exit -- use a trapa to exit an interrupt function instead of
7638 an rte instruction.
7640 nosave_low_regs - don't save r0..r7 in an interrupt handler.
7641 This is useful on the SH3 and upwards,
7642 which has a separate set of low regs for User and Supervisor modes.
7643 This should only be used for the lowest level of interrupts. Higher levels
7644 of interrupts must save the registers in case they themselves are
7645 interrupted.
7647 renesas -- use Renesas calling/layout conventions (functions and
7648 structures).
7652 const struct attribute_spec sh_attribute_table[] =
7654 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
7655 { "interrupt_handler", 0, 0, true, false, false, sh_handle_interrupt_handler_attribute },
7656 { "sp_switch", 1, 1, true, false, false, sh_handle_sp_switch_attribute },
7657 { "trap_exit", 1, 1, true, false, false, sh_handle_trap_exit_attribute },
7658 { "renesas", 0, 0, false, true, false, sh_handle_renesas_attribute },
7659 { "trapa_handler", 0, 0, true, false, false, sh_handle_interrupt_handler_attribute },
7660 { "nosave_low_regs", 0, 0, true, false, false, sh_handle_interrupt_handler_attribute },
7661 #ifdef SYMBIAN
7662 /* Symbian support adds three new attributes:
7663 dllexport - for exporting a function/variable that will live in a dll
7664 dllimport - for importing a function/variable from a dll
7666 Microsoft allows multiple declspecs in one __declspec, separating
7667 them with spaces. We do NOT support this. Instead, use __declspec
7668 multiple times. */
7669 { "dllimport", 0, 0, true, false, false, sh_symbian_handle_dll_attribute },
7670 { "dllexport", 0, 0, true, false, false, sh_symbian_handle_dll_attribute },
7671 #endif
7672 { NULL, 0, 0, false, false, false, NULL }
7675 /* Handle an "interrupt_handler" attribute; arguments as in
7676 struct attribute_spec.handler. */
7677 static tree
7678 sh_handle_interrupt_handler_attribute (tree *node, tree name,
7679 tree args ATTRIBUTE_UNUSED,
7680 int flags ATTRIBUTE_UNUSED,
7681 bool *no_add_attrs)
7683 if (TREE_CODE (*node) != FUNCTION_DECL)
7685 warning (OPT_Wattributes, "%qs attribute only applies to functions",
7686 IDENTIFIER_POINTER (name));
7687 *no_add_attrs = true;
7689 else if (TARGET_SHCOMPACT)
7691 error ("attribute interrupt_handler is not compatible with -m5-compact");
7692 *no_add_attrs = true;
7695 return NULL_TREE;
7698 /* Handle an "sp_switch" attribute; arguments as in
7699 struct attribute_spec.handler. */
7700 static tree
7701 sh_handle_sp_switch_attribute (tree *node, tree name, tree args,
7702 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
7704 if (TREE_CODE (*node) != FUNCTION_DECL)
7706 warning (OPT_Wattributes, "%qs attribute only applies to functions",
7707 IDENTIFIER_POINTER (name));
7708 *no_add_attrs = true;
7710 else if (TREE_CODE (TREE_VALUE (args)) != STRING_CST)
7712 /* The argument must be a constant string. */
7713 warning (OPT_Wattributes, "%qs attribute argument not a string constant",
7714 IDENTIFIER_POINTER (name));
7715 *no_add_attrs = true;
7718 return NULL_TREE;
7721 /* Handle an "trap_exit" attribute; arguments as in
7722 struct attribute_spec.handler. */
7723 static tree
7724 sh_handle_trap_exit_attribute (tree *node, tree name, tree args,
7725 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
7727 if (TREE_CODE (*node) != FUNCTION_DECL)
7729 warning (OPT_Wattributes, "%qs attribute only applies to functions",
7730 IDENTIFIER_POINTER (name));
7731 *no_add_attrs = true;
7733 /* The argument specifies a trap number to be used in a trapa instruction
7734 at function exit (instead of an rte instruction). */
7735 else if (TREE_CODE (TREE_VALUE (args)) != INTEGER_CST)
7737 /* The argument must be a constant integer. */
7738 warning (OPT_Wattributes, "%qs attribute argument not an "
7739 "integer constant", IDENTIFIER_POINTER (name));
7740 *no_add_attrs = true;
7743 return NULL_TREE;
7746 static tree
7747 sh_handle_renesas_attribute (tree *node ATTRIBUTE_UNUSED,
7748 tree name ATTRIBUTE_UNUSED,
7749 tree args ATTRIBUTE_UNUSED,
7750 int flags ATTRIBUTE_UNUSED,
7751 bool *no_add_attrs ATTRIBUTE_UNUSED)
7753 return NULL_TREE;
7756 /* True if __attribute__((renesas)) or -mrenesas. */
7758 sh_attr_renesas_p (tree td)
7760 if (TARGET_HITACHI)
7761 return 1;
7762 if (td == 0)
7763 return 0;
7764 if (DECL_P (td))
7765 td = TREE_TYPE (td);
7766 if (td == error_mark_node)
7767 return 0;
7768 return (lookup_attribute ("renesas", TYPE_ATTRIBUTES (td))
7769 != NULL_TREE);
7772 /* True if __attribute__((renesas)) or -mrenesas, for the current
7773 function. */
7775 sh_cfun_attr_renesas_p (void)
7777 return sh_attr_renesas_p (current_function_decl);
7781 sh_cfun_interrupt_handler_p (void)
7783 return (lookup_attribute ("interrupt_handler",
7784 DECL_ATTRIBUTES (current_function_decl))
7785 != NULL_TREE);
7788 /* Implement TARGET_CHECK_PCH_TARGET_FLAGS. */
7790 static const char *
7791 sh_check_pch_target_flags (int old_flags)
7793 if ((old_flags ^ target_flags) & (MASK_SH1 | MASK_SH2 | MASK_SH3
7794 | MASK_SH_E | MASK_HARD_SH4
7795 | MASK_FPU_SINGLE | MASK_SH4))
7796 return _("created and used with different architectures / ABIs");
7797 if ((old_flags ^ target_flags) & MASK_HITACHI)
7798 return _("created and used with different ABIs");
7799 if ((old_flags ^ target_flags) & MASK_LITTLE_ENDIAN)
7800 return _("created and used with different endianness");
7801 return NULL;
7804 /* Predicates used by the templates. */
7806 /* Returns 1 if OP is MACL, MACH or PR. The input must be a REG rtx.
7807 Used only in general_movsrc_operand. */
7810 system_reg_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
7812 switch (REGNO (op))
7814 case PR_REG:
7815 case MACL_REG:
7816 case MACH_REG:
7817 return 1;
7819 return 0;
7822 /* Nonzero if OP is a floating point value with value 0.0. */
7825 fp_zero_operand (rtx op)
7827 REAL_VALUE_TYPE r;
7829 if (GET_MODE (op) != SFmode)
7830 return 0;
7832 REAL_VALUE_FROM_CONST_DOUBLE (r, op);
7833 return REAL_VALUES_EQUAL (r, dconst0) && ! REAL_VALUE_MINUS_ZERO (r);
7836 /* Nonzero if OP is a floating point value with value 1.0. */
7839 fp_one_operand (rtx op)
7841 REAL_VALUE_TYPE r;
7843 if (GET_MODE (op) != SFmode)
7844 return 0;
7846 REAL_VALUE_FROM_CONST_DOUBLE (r, op);
7847 return REAL_VALUES_EQUAL (r, dconst1);
7850 /* For -m4 and -m4-single-only, mode switching is used. If we are
7851 compiling without -mfmovd, movsf_ie isn't taken into account for
7852 mode switching. We could check in machine_dependent_reorg for
7853 cases where we know we are in single precision mode, but there is
7854 interface to find that out during reload, so we must avoid
7855 choosing an fldi alternative during reload and thus failing to
7856 allocate a scratch register for the constant loading. */
7858 fldi_ok (void)
7860 return ! TARGET_SH4 || TARGET_FMOVD || reload_completed;
7864 tertiary_reload_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
7866 enum rtx_code code = GET_CODE (op);
7867 return code == MEM || (TARGET_SH4 && code == CONST_DOUBLE);
7870 /* Return the TLS type for TLS symbols, 0 for otherwise. */
7872 tls_symbolic_operand (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
7874 if (GET_CODE (op) != SYMBOL_REF)
7875 return 0;
7876 return SYMBOL_REF_TLS_MODEL (op);
7879 /* Return the destination address of a branch. */
7881 static int
7882 branch_dest (rtx branch)
7884 rtx dest = SET_SRC (PATTERN (branch));
7885 int dest_uid;
7887 if (GET_CODE (dest) == IF_THEN_ELSE)
7888 dest = XEXP (dest, 1);
7889 dest = XEXP (dest, 0);
7890 dest_uid = INSN_UID (dest);
7891 return INSN_ADDRESSES (dest_uid);
7894 /* Return nonzero if REG is not used after INSN.
7895 We assume REG is a reload reg, and therefore does
7896 not live past labels. It may live past calls or jumps though. */
7898 reg_unused_after (rtx reg, rtx insn)
7900 enum rtx_code code;
7901 rtx set;
7903 /* If the reg is set by this instruction, then it is safe for our
7904 case. Disregard the case where this is a store to memory, since
7905 we are checking a register used in the store address. */
7906 set = single_set (insn);
7907 if (set && GET_CODE (SET_DEST (set)) != MEM
7908 && reg_overlap_mentioned_p (reg, SET_DEST (set)))
7909 return 1;
7911 while ((insn = NEXT_INSN (insn)))
7913 rtx set;
7914 if (!INSN_P (insn))
7915 continue;
7917 code = GET_CODE (insn);
7919 #if 0
7920 /* If this is a label that existed before reload, then the register
7921 if dead here. However, if this is a label added by reorg, then
7922 the register may still be live here. We can't tell the difference,
7923 so we just ignore labels completely. */
7924 if (code == CODE_LABEL)
7925 return 1;
7926 /* else */
7927 #endif
7929 if (code == JUMP_INSN)
7930 return 0;
7932 /* If this is a sequence, we must handle them all at once.
7933 We could have for instance a call that sets the target register,
7934 and an insn in a delay slot that uses the register. In this case,
7935 we must return 0. */
7936 else if (code == INSN && GET_CODE (PATTERN (insn)) == SEQUENCE)
7938 int i;
7939 int retval = 0;
7941 for (i = 0; i < XVECLEN (PATTERN (insn), 0); i++)
7943 rtx this_insn = XVECEXP (PATTERN (insn), 0, i);
7944 rtx set = single_set (this_insn);
7946 if (GET_CODE (this_insn) == CALL_INSN)
7947 code = CALL_INSN;
7948 else if (GET_CODE (this_insn) == JUMP_INSN)
7950 if (INSN_ANNULLED_BRANCH_P (this_insn))
7951 return 0;
7952 code = JUMP_INSN;
7955 if (set && reg_overlap_mentioned_p (reg, SET_SRC (set)))
7956 return 0;
7957 if (set && reg_overlap_mentioned_p (reg, SET_DEST (set)))
7959 if (GET_CODE (SET_DEST (set)) != MEM)
7960 retval = 1;
7961 else
7962 return 0;
7964 if (set == 0
7965 && reg_overlap_mentioned_p (reg, PATTERN (this_insn)))
7966 return 0;
7968 if (retval == 1)
7969 return 1;
7970 else if (code == JUMP_INSN)
7971 return 0;
7974 set = single_set (insn);
7975 if (set && reg_overlap_mentioned_p (reg, SET_SRC (set)))
7976 return 0;
7977 if (set && reg_overlap_mentioned_p (reg, SET_DEST (set)))
7978 return GET_CODE (SET_DEST (set)) != MEM;
7979 if (set == 0 && reg_overlap_mentioned_p (reg, PATTERN (insn)))
7980 return 0;
7982 if (code == CALL_INSN && call_really_used_regs[REGNO (reg)])
7983 return 1;
7985 return 1;
7988 #include "ggc.h"
7990 static GTY(()) rtx fpscr_rtx;
7992 get_fpscr_rtx (void)
7994 if (! fpscr_rtx)
7996 fpscr_rtx = gen_rtx_REG (PSImode, FPSCR_REG);
7997 REG_USERVAR_P (fpscr_rtx) = 1;
7998 mark_user_reg (fpscr_rtx);
8000 if (! reload_completed || mdep_reorg_phase != SH_AFTER_MDEP_REORG)
8001 mark_user_reg (fpscr_rtx);
8002 return fpscr_rtx;
8005 static GTY(()) tree fpscr_values;
8007 static void
8008 emit_fpu_switch (rtx scratch, int index)
8010 rtx dst, src;
8012 if (fpscr_values == NULL)
8014 tree t;
8016 t = build_index_type (integer_one_node);
8017 t = build_array_type (integer_type_node, t);
8018 t = build_decl (VAR_DECL, get_identifier ("__fpscr_values"), t);
8019 DECL_ARTIFICIAL (t) = 1;
8020 DECL_IGNORED_P (t) = 1;
8021 DECL_EXTERNAL (t) = 1;
8022 TREE_STATIC (t) = 1;
8023 TREE_PUBLIC (t) = 1;
8024 TREE_USED (t) = 1;
8026 fpscr_values = t;
8029 src = DECL_RTL (fpscr_values);
8030 if (no_new_pseudos)
8032 emit_move_insn (scratch, XEXP (src, 0));
8033 if (index != 0)
8034 emit_insn (gen_addsi3 (scratch, scratch, GEN_INT (index * 4)));
8035 src = adjust_automodify_address (src, PSImode, scratch, index * 4);
8037 else
8038 src = adjust_address (src, PSImode, index * 4);
8040 dst = get_fpscr_rtx ();
8041 emit_move_insn (dst, src);
8044 void
8045 emit_sf_insn (rtx pat)
8047 emit_insn (pat);
8050 void
8051 emit_df_insn (rtx pat)
8053 emit_insn (pat);
8056 void
8057 expand_sf_unop (rtx (*fun) (rtx, rtx, rtx), rtx *operands)
8059 emit_sf_insn ((*fun) (operands[0], operands[1], get_fpscr_rtx ()));
8062 void
8063 expand_sf_binop (rtx (*fun) (rtx, rtx, rtx, rtx), rtx *operands)
8065 emit_sf_insn ((*fun) (operands[0], operands[1], operands[2],
8066 get_fpscr_rtx ()));
8069 void
8070 expand_df_unop (rtx (*fun) (rtx, rtx, rtx), rtx *operands)
8072 emit_df_insn ((*fun) (operands[0], operands[1], get_fpscr_rtx ()));
8075 void
8076 expand_df_binop (rtx (*fun) (rtx, rtx, rtx, rtx), rtx *operands)
8078 emit_df_insn ((*fun) (operands[0], operands[1], operands[2],
8079 get_fpscr_rtx ()));
8082 /* ??? gcc does flow analysis strictly after common subexpression
8083 elimination. As a result, common subexpression elimination fails
8084 when there are some intervening statements setting the same register.
8085 If we did nothing about this, this would hurt the precision switching
8086 for SH4 badly. There is some cse after reload, but it is unable to
8087 undo the extra register pressure from the unused instructions, and
8088 it cannot remove auto-increment loads.
8090 A C code example that shows this flow/cse weakness for (at least) SH
8091 and sparc (as of gcc ss-970706) is this:
8093 double
8094 f(double a)
8096 double d;
8097 d = 0.1;
8098 a += d;
8099 d = 1.1;
8100 d = 0.1;
8101 a *= d;
8102 return a;
8105 So we add another pass before common subexpression elimination, to
8106 remove assignments that are dead due to a following assignment in the
8107 same basic block. */
8109 static void
8110 mark_use (rtx x, rtx *reg_set_block)
8112 enum rtx_code code;
8114 if (! x)
8115 return;
8116 code = GET_CODE (x);
8117 switch (code)
8119 case REG:
8121 int regno = REGNO (x);
8122 int nregs = (regno < FIRST_PSEUDO_REGISTER
8123 ? HARD_REGNO_NREGS (regno, GET_MODE (x))
8124 : 1);
8127 reg_set_block[regno + nregs - 1] = 0;
8129 while (--nregs);
8130 break;
8132 case SET:
8134 rtx dest = SET_DEST (x);
8136 if (GET_CODE (dest) == SUBREG)
8137 dest = SUBREG_REG (dest);
8138 if (GET_CODE (dest) != REG)
8139 mark_use (dest, reg_set_block);
8140 mark_use (SET_SRC (x), reg_set_block);
8141 break;
8143 case CLOBBER:
8144 break;
8145 default:
8147 const char *fmt = GET_RTX_FORMAT (code);
8148 int i, j;
8149 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
8151 if (fmt[i] == 'e')
8152 mark_use (XEXP (x, i), reg_set_block);
8153 else if (fmt[i] == 'E')
8154 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
8155 mark_use (XVECEXP (x, i, j), reg_set_block);
8157 break;
8162 static rtx get_free_reg (HARD_REG_SET);
8164 /* This function returns a register to use to load the address to load
8165 the fpscr from. Currently it always returns r1 or r7, but when we are
8166 able to use pseudo registers after combine, or have a better mechanism
8167 for choosing a register, it should be done here. */
8168 /* REGS_LIVE is the liveness information for the point for which we
8169 need this allocation. In some bare-bones exit blocks, r1 is live at the
8170 start. We can even have all of r0..r3 being live:
8171 __complex__ long long f (double d) { if (d == 0) return 2; else return 3; }
8172 INSN before which new insns are placed with will clobber the register
8173 we return. If a basic block consists only of setting the return value
8174 register to a pseudo and using that register, the return value is not
8175 live before or after this block, yet we we'll insert our insns right in
8176 the middle. */
8178 static rtx
8179 get_free_reg (HARD_REG_SET regs_live)
8181 if (! TEST_HARD_REG_BIT (regs_live, 1))
8182 return gen_rtx_REG (Pmode, 1);
8184 /* Hard reg 1 is live; since this is a SMALL_REGISTER_CLASSES target,
8185 there shouldn't be anything but a jump before the function end. */
8186 gcc_assert (!TEST_HARD_REG_BIT (regs_live, 7));
8187 return gen_rtx_REG (Pmode, 7);
8190 /* This function will set the fpscr from memory.
8191 MODE is the mode we are setting it to. */
8192 void
8193 fpscr_set_from_mem (int mode, HARD_REG_SET regs_live)
8195 enum attr_fp_mode fp_mode = mode;
8196 enum attr_fp_mode norm_mode = ACTUAL_NORMAL_MODE (FP_MODE);
8197 rtx addr_reg = get_free_reg (regs_live);
8199 emit_fpu_switch (addr_reg, fp_mode == norm_mode);
8202 /* Is the given character a logical line separator for the assembler? */
8203 #ifndef IS_ASM_LOGICAL_LINE_SEPARATOR
8204 #define IS_ASM_LOGICAL_LINE_SEPARATOR(C) ((C) == ';')
8205 #endif
8208 sh_insn_length_adjustment (rtx insn)
8210 /* Instructions with unfilled delay slots take up an extra two bytes for
8211 the nop in the delay slot. */
8212 if (((GET_CODE (insn) == INSN
8213 && GET_CODE (PATTERN (insn)) != USE
8214 && GET_CODE (PATTERN (insn)) != CLOBBER)
8215 || GET_CODE (insn) == CALL_INSN
8216 || (GET_CODE (insn) == JUMP_INSN
8217 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC
8218 && GET_CODE (PATTERN (insn)) != ADDR_VEC))
8219 && GET_CODE (PATTERN (NEXT_INSN (PREV_INSN (insn)))) != SEQUENCE
8220 && get_attr_needs_delay_slot (insn) == NEEDS_DELAY_SLOT_YES)
8221 return 2;
8223 /* SH2e has a bug that prevents the use of annulled branches, so if
8224 the delay slot is not filled, we'll have to put a NOP in it. */
8225 if (sh_cpu == CPU_SH2E
8226 && GET_CODE (insn) == JUMP_INSN
8227 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC
8228 && GET_CODE (PATTERN (insn)) != ADDR_VEC
8229 && get_attr_type (insn) == TYPE_CBRANCH
8230 && GET_CODE (PATTERN (NEXT_INSN (PREV_INSN (insn)))) != SEQUENCE)
8231 return 2;
8233 /* sh-dsp parallel processing insn take four bytes instead of two. */
8235 if (GET_CODE (insn) == INSN)
8237 int sum = 0;
8238 rtx body = PATTERN (insn);
8239 const char *template;
8240 char c;
8241 int maybe_label = 1;
8243 if (GET_CODE (body) == ASM_INPUT)
8244 template = XSTR (body, 0);
8245 else if (asm_noperands (body) >= 0)
8246 template
8247 = decode_asm_operands (body, NULL, NULL, NULL, NULL);
8248 else
8249 return 0;
8252 int ppi_adjust = 0;
8255 c = *template++;
8256 while (c == ' ' || c == '\t');
8257 /* all sh-dsp parallel-processing insns start with p.
8258 The only non-ppi sh insn starting with p is pref.
8259 The only ppi starting with pr is prnd. */
8260 if ((c == 'p' || c == 'P') && strncasecmp ("re", template, 2))
8261 ppi_adjust = 2;
8262 /* The repeat pseudo-insn expands two three insns, a total of
8263 six bytes in size. */
8264 else if ((c == 'r' || c == 'R')
8265 && ! strncasecmp ("epeat", template, 5))
8266 ppi_adjust = 4;
8267 while (c && c != '\n' && ! IS_ASM_LOGICAL_LINE_SEPARATOR (c))
8269 /* If this is a label, it is obviously not a ppi insn. */
8270 if (c == ':' && maybe_label)
8272 ppi_adjust = 0;
8273 break;
8275 else if (c == '\'' || c == '"')
8276 maybe_label = 0;
8277 c = *template++;
8279 sum += ppi_adjust;
8280 maybe_label = c != ':';
8282 while (c);
8283 return sum;
8285 return 0;
8288 /* Return TRUE if X references a SYMBOL_REF or LABEL_REF whose symbol
8289 isn't protected by a PIC unspec. */
8291 nonpic_symbol_mentioned_p (rtx x)
8293 register const char *fmt;
8294 register int i;
8296 if (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF
8297 || GET_CODE (x) == PC)
8298 return 1;
8300 /* We don't want to look into the possible MEM location of a
8301 CONST_DOUBLE, since we're not going to use it, in general. */
8302 if (GET_CODE (x) == CONST_DOUBLE)
8303 return 0;
8305 if (GET_CODE (x) == UNSPEC
8306 && (XINT (x, 1) == UNSPEC_PIC
8307 || XINT (x, 1) == UNSPEC_GOT
8308 || XINT (x, 1) == UNSPEC_GOTOFF
8309 || XINT (x, 1) == UNSPEC_GOTPLT
8310 || XINT (x, 1) == UNSPEC_GOTTPOFF
8311 || XINT (x, 1) == UNSPEC_DTPOFF
8312 || XINT (x, 1) == UNSPEC_PLT))
8313 return 0;
8315 fmt = GET_RTX_FORMAT (GET_CODE (x));
8316 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
8318 if (fmt[i] == 'E')
8320 register int j;
8322 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
8323 if (nonpic_symbol_mentioned_p (XVECEXP (x, i, j)))
8324 return 1;
8326 else if (fmt[i] == 'e' && nonpic_symbol_mentioned_p (XEXP (x, i)))
8327 return 1;
8330 return 0;
8333 /* Convert a non-PIC address in `orig' to a PIC address using @GOT or
8334 @GOTOFF in `reg'. */
8336 legitimize_pic_address (rtx orig, enum machine_mode mode ATTRIBUTE_UNUSED,
8337 rtx reg)
8339 if (tls_symbolic_operand (orig, Pmode))
8340 return orig;
8342 if (GET_CODE (orig) == LABEL_REF
8343 || (GET_CODE (orig) == SYMBOL_REF && SYMBOL_REF_LOCAL_P (orig)))
8345 if (reg == 0)
8346 reg = gen_reg_rtx (Pmode);
8348 emit_insn (gen_symGOTOFF2reg (reg, orig));
8349 return reg;
8351 else if (GET_CODE (orig) == SYMBOL_REF)
8353 if (reg == 0)
8354 reg = gen_reg_rtx (Pmode);
8356 emit_insn (gen_symGOT2reg (reg, orig));
8357 return reg;
8359 return orig;
8362 /* Mark the use of a constant in the literal table. If the constant
8363 has multiple labels, make it unique. */
8364 static rtx
8365 mark_constant_pool_use (rtx x)
8367 rtx insn, lab, pattern;
8369 if (x == NULL)
8370 return x;
8372 switch (GET_CODE (x))
8374 case LABEL_REF:
8375 x = XEXP (x, 0);
8376 case CODE_LABEL:
8377 break;
8378 default:
8379 return x;
8382 /* Get the first label in the list of labels for the same constant
8383 and delete another labels in the list. */
8384 lab = x;
8385 for (insn = PREV_INSN (x); insn; insn = PREV_INSN (insn))
8387 if (GET_CODE (insn) != CODE_LABEL
8388 || LABEL_REFS (insn) != NEXT_INSN (insn))
8389 break;
8390 lab = insn;
8393 for (insn = LABEL_REFS (lab); insn; insn = LABEL_REFS (insn))
8394 INSN_DELETED_P (insn) = 1;
8396 /* Mark constants in a window. */
8397 for (insn = NEXT_INSN (x); insn; insn = NEXT_INSN (insn))
8399 if (GET_CODE (insn) != INSN)
8400 continue;
8402 pattern = PATTERN (insn);
8403 if (GET_CODE (pattern) != UNSPEC_VOLATILE)
8404 continue;
8406 switch (XINT (pattern, 1))
8408 case UNSPECV_CONST2:
8409 case UNSPECV_CONST4:
8410 case UNSPECV_CONST8:
8411 XVECEXP (pattern, 0, 1) = const1_rtx;
8412 break;
8413 case UNSPECV_WINDOW_END:
8414 if (XVECEXP (pattern, 0, 0) == x)
8415 return lab;
8416 break;
8417 case UNSPECV_CONST_END:
8418 return lab;
8419 default:
8420 break;
8424 return lab;
8427 /* Return true if it's possible to redirect BRANCH1 to the destination
8428 of an unconditional jump BRANCH2. We only want to do this if the
8429 resulting branch will have a short displacement. */
8431 sh_can_redirect_branch (rtx branch1, rtx branch2)
8433 if (flag_expensive_optimizations && simplejump_p (branch2))
8435 rtx dest = XEXP (SET_SRC (single_set (branch2)), 0);
8436 rtx insn;
8437 int distance;
8439 for (distance = 0, insn = NEXT_INSN (branch1);
8440 insn && distance < 256;
8441 insn = PREV_INSN (insn))
8443 if (insn == dest)
8444 return 1;
8445 else
8446 distance += get_attr_length (insn);
8448 for (distance = 0, insn = NEXT_INSN (branch1);
8449 insn && distance < 256;
8450 insn = NEXT_INSN (insn))
8452 if (insn == dest)
8453 return 1;
8454 else
8455 distance += get_attr_length (insn);
8458 return 0;
8461 /* Return nonzero if register old_reg can be renamed to register new_reg. */
8463 sh_hard_regno_rename_ok (unsigned int old_reg ATTRIBUTE_UNUSED,
8464 unsigned int new_reg)
8466 /* Interrupt functions can only use registers that have already been
8467 saved by the prologue, even if they would normally be
8468 call-clobbered. */
8470 if (sh_cfun_interrupt_handler_p () && !regs_ever_live[new_reg])
8471 return 0;
8473 return 1;
8476 /* Function to update the integer COST
8477 based on the relationship between INSN that is dependent on
8478 DEP_INSN through the dependence LINK. The default is to make no
8479 adjustment to COST. This can be used for example to specify to
8480 the scheduler that an output- or anti-dependence does not incur
8481 the same cost as a data-dependence. The return value should be
8482 the new value for COST. */
8483 static int
8484 sh_adjust_cost (rtx insn, rtx link ATTRIBUTE_UNUSED, rtx dep_insn, int cost)
8486 rtx reg, use_pat;
8488 if (TARGET_SHMEDIA)
8490 /* On SHmedia, if the dependence is an anti-dependence or
8491 output-dependence, there is no cost. */
8492 if (REG_NOTE_KIND (link) != 0)
8494 /* However, dependencies between target register loads and
8495 uses of the register in a subsequent block that are separated
8496 by a conditional branch are not modelled - we have to do with
8497 the anti-dependency between the target register load and the
8498 conditional branch that ends the current block. */
8499 if (REG_NOTE_KIND (link) == REG_DEP_ANTI
8500 && GET_CODE (PATTERN (dep_insn)) == SET
8501 && (get_attr_type (dep_insn) == TYPE_PT_MEDIA
8502 || get_attr_type (dep_insn) == TYPE_PTABS_MEDIA)
8503 && get_attr_type (insn) == TYPE_CBRANCH_MEDIA)
8505 int orig_cost = cost;
8506 rtx note = find_reg_note (insn, REG_BR_PROB, 0);
8507 rtx target = ((! note
8508 || INTVAL (XEXP (note, 0)) * 2 < REG_BR_PROB_BASE)
8509 ? insn : JUMP_LABEL (insn));
8510 /* On the likely path, the branch costs 1, on the unlikely path,
8511 it costs 3. */
8512 cost--;
8514 target = next_active_insn (target);
8515 while (target && ! flow_dependent_p (target, dep_insn)
8516 && --cost > 0);
8517 /* If two branches are executed in immediate succession, with the
8518 first branch properly predicted, this causes a stall at the
8519 second branch, hence we won't need the target for the
8520 second branch for two cycles after the launch of the first
8521 branch. */
8522 if (cost > orig_cost - 2)
8523 cost = orig_cost - 2;
8525 else
8526 cost = 0;
8529 else if (get_attr_is_mac_media (insn)
8530 && get_attr_is_mac_media (dep_insn))
8531 cost = 1;
8533 else if (! reload_completed
8534 && GET_CODE (PATTERN (insn)) == SET
8535 && GET_CODE (SET_SRC (PATTERN (insn))) == FLOAT
8536 && GET_CODE (PATTERN (dep_insn)) == SET
8537 && fp_arith_reg_operand (SET_SRC (PATTERN (dep_insn)), VOIDmode)
8538 && cost < 4)
8539 cost = 4;
8540 /* Schedule the ptabs for a casesi_jump_media in preference to stuff
8541 that is needed at the target. */
8542 else if (get_attr_type (insn) == TYPE_JUMP_MEDIA
8543 && ! flow_dependent_p (insn, dep_insn))
8544 cost--;
8546 else if (REG_NOTE_KIND (link) == 0)
8548 enum attr_type dep_type, type;
8550 if (recog_memoized (insn) < 0
8551 || recog_memoized (dep_insn) < 0)
8552 return cost;
8554 dep_type = get_attr_type (dep_insn);
8555 if (dep_type == TYPE_FLOAD || dep_type == TYPE_PCFLOAD)
8556 cost--;
8557 if ((dep_type == TYPE_LOAD_SI || dep_type == TYPE_PCLOAD_SI)
8558 && (type = get_attr_type (insn)) != TYPE_CALL
8559 && type != TYPE_SFUNC)
8560 cost--;
8562 /* The only input for a call that is timing-critical is the
8563 function's address. */
8564 if (GET_CODE(insn) == CALL_INSN)
8566 rtx call = PATTERN (insn);
8568 if (GET_CODE (call) == PARALLEL)
8569 call = XVECEXP (call, 0 ,0);
8570 if (GET_CODE (call) == SET)
8571 call = SET_SRC (call);
8572 if (GET_CODE (call) == CALL && GET_CODE (XEXP (call, 0)) == MEM
8573 /* sibcalli_thunk uses a symbol_ref in an unspec. */
8574 && (GET_CODE (XEXP (XEXP (call, 0), 0)) == UNSPEC
8575 || ! reg_set_p (XEXP (XEXP (call, 0), 0), dep_insn)))
8576 cost = 0;
8578 /* Likewise, the most timing critical input for an sfuncs call
8579 is the function address. However, sfuncs typically start
8580 using their arguments pretty quickly.
8581 Assume a four cycle delay before they are needed. */
8582 /* All sfunc calls are parallels with at least four components.
8583 Exploit this to avoid unnecessary calls to sfunc_uses_reg. */
8584 else if (GET_CODE (PATTERN (insn)) == PARALLEL
8585 && XVECLEN (PATTERN (insn), 0) >= 4
8586 && (reg = sfunc_uses_reg (insn)))
8588 if (! reg_set_p (reg, dep_insn))
8589 cost -= 4;
8591 /* When the preceding instruction loads the shift amount of
8592 the following SHAD/SHLD, the latency of the load is increased
8593 by 1 cycle. */
8594 else if (TARGET_SH4
8595 && get_attr_type (insn) == TYPE_DYN_SHIFT
8596 && get_attr_any_int_load (dep_insn) == ANY_INT_LOAD_YES
8597 && reg_overlap_mentioned_p (SET_DEST (PATTERN (dep_insn)),
8598 XEXP (SET_SRC (single_set (insn)),
8599 1)))
8600 cost++;
8601 /* When an LS group instruction with a latency of less than
8602 3 cycles is followed by a double-precision floating-point
8603 instruction, FIPR, or FTRV, the latency of the first
8604 instruction is increased to 3 cycles. */
8605 else if (cost < 3
8606 && get_attr_insn_class (dep_insn) == INSN_CLASS_LS_GROUP
8607 && get_attr_dfp_comp (insn) == DFP_COMP_YES)
8608 cost = 3;
8609 /* The lsw register of a double-precision computation is ready one
8610 cycle earlier. */
8611 else if (reload_completed
8612 && get_attr_dfp_comp (dep_insn) == DFP_COMP_YES
8613 && (use_pat = single_set (insn))
8614 && ! regno_use_in (REGNO (SET_DEST (single_set (dep_insn))),
8615 SET_SRC (use_pat)))
8616 cost -= 1;
8618 if (get_attr_any_fp_comp (dep_insn) == ANY_FP_COMP_YES
8619 && get_attr_late_fp_use (insn) == LATE_FP_USE_YES)
8620 cost -= 1;
8622 /* An anti-dependence penalty of two applies if the first insn is a double
8623 precision fadd / fsub / fmul. */
8624 else if (REG_NOTE_KIND (link) == REG_DEP_ANTI
8625 && recog_memoized (dep_insn) >= 0
8626 && get_attr_type (dep_insn) == TYPE_DFP_ARITH
8627 /* A lot of alleged anti-flow dependences are fake,
8628 so check this one is real. */
8629 && flow_dependent_p (dep_insn, insn))
8630 cost = 2;
8633 return cost;
8636 /* Check if INSN is flow-dependent on DEP_INSN. Can also be used to check
8637 if DEP_INSN is anti-flow dependent on INSN. */
8638 static int
8639 flow_dependent_p (rtx insn, rtx dep_insn)
8641 rtx tmp = PATTERN (insn);
8643 note_stores (PATTERN (dep_insn), flow_dependent_p_1, &tmp);
8644 return tmp == NULL_RTX;
8647 /* A helper function for flow_dependent_p called through note_stores. */
8648 static void
8649 flow_dependent_p_1 (rtx x, rtx pat ATTRIBUTE_UNUSED, void *data)
8651 rtx * pinsn = (rtx *) data;
8653 if (*pinsn && reg_referenced_p (x, *pinsn))
8654 *pinsn = NULL_RTX;
8657 /* For use by sh_allocate_initial_value. Note that sh.md contains some
8658 'special function' patterns (type sfunc) that clobber pr, but that
8659 do not look like function calls to leaf_function_p. Hence we must
8660 do this extra check. */
8661 static int
8662 sh_pr_n_sets (void)
8664 return REG_N_SETS (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG);
8667 /* Return where to allocate pseudo for a given hard register initial
8668 value. */
8669 static rtx
8670 sh_allocate_initial_value (rtx hard_reg)
8672 rtx x;
8674 if (REGNO (hard_reg) == (TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG))
8676 if (current_function_is_leaf
8677 && ! sh_pr_n_sets ()
8678 && ! (TARGET_SHCOMPACT
8679 && ((current_function_args_info.call_cookie
8680 & ~ CALL_COOKIE_RET_TRAMP (1))
8681 || current_function_has_nonlocal_label)))
8682 x = hard_reg;
8683 else
8684 x = gen_frame_mem (Pmode, return_address_pointer_rtx);
8686 else
8687 x = NULL_RTX;
8689 return x;
8692 /* This function returns "2" to indicate dual issue for the SH4
8693 processor. To be used by the DFA pipeline description. */
8694 static int
8695 sh_issue_rate (void)
8697 if (TARGET_SUPERSCALAR)
8698 return 2;
8699 else
8700 return 1;
8703 /* Functions for ready queue reordering for sched1. */
8705 /* Get weight for mode for a set x. */
8706 static short
8707 find_set_regmode_weight (rtx x, enum machine_mode mode)
8709 if (GET_CODE (x) == CLOBBER && register_operand (SET_DEST (x), mode))
8710 return 1;
8711 if (GET_CODE (x) == SET && register_operand (SET_DEST (x), mode))
8713 if (GET_CODE (SET_DEST (x)) == REG)
8715 if (!reg_mentioned_p (SET_DEST (x), SET_SRC (x)))
8716 return 1;
8717 else
8718 return 0;
8720 return 1;
8722 return 0;
8725 /* Get regmode weight for insn. */
8726 static short
8727 find_insn_regmode_weight (rtx insn, enum machine_mode mode)
8729 short reg_weight = 0;
8730 rtx x;
8732 /* Increment weight for each register born here. */
8733 x = PATTERN (insn);
8734 reg_weight += find_set_regmode_weight (x, mode);
8735 if (GET_CODE (x) == PARALLEL)
8737 int j;
8738 for (j = XVECLEN (x, 0) - 1; j >= 0; j--)
8740 x = XVECEXP (PATTERN (insn), 0, j);
8741 reg_weight += find_set_regmode_weight (x, mode);
8744 /* Decrement weight for each register that dies here. */
8745 for (x = REG_NOTES (insn); x; x = XEXP (x, 1))
8747 if (REG_NOTE_KIND (x) == REG_DEAD || REG_NOTE_KIND (x) == REG_UNUSED)
8749 rtx note = XEXP (x, 0);
8750 if (GET_CODE (note) == REG && GET_MODE (note) == mode)
8751 reg_weight--;
8754 return reg_weight;
8757 /* Calculate regmode weights for all insns of a basic block. */
8758 static void
8759 find_regmode_weight (basic_block b, enum machine_mode mode)
8761 rtx insn, next_tail, head, tail;
8763 get_ebb_head_tail (b, b, &head, &tail);
8764 next_tail = NEXT_INSN (tail);
8766 for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
8768 /* Handle register life information. */
8769 if (!INSN_P (insn))
8770 continue;
8772 if (mode == SFmode)
8773 INSN_REGMODE_WEIGHT (insn, mode) =
8774 find_insn_regmode_weight (insn, mode) + 2 * find_insn_regmode_weight (insn, DFmode);
8775 else if (mode == SImode)
8776 INSN_REGMODE_WEIGHT (insn, mode) =
8777 find_insn_regmode_weight (insn, mode) + 2 * find_insn_regmode_weight (insn, DImode);
8781 /* Comparison function for ready queue sorting. */
8782 static int
8783 rank_for_reorder (const void *x, const void *y)
8785 rtx tmp = *(const rtx *) y;
8786 rtx tmp2 = *(const rtx *) x;
8788 /* The insn in a schedule group should be issued the first. */
8789 if (SCHED_GROUP_P (tmp) != SCHED_GROUP_P (tmp2))
8790 return SCHED_GROUP_P (tmp2) ? 1 : -1;
8792 /* If insns are equally good, sort by INSN_LUID (original insn order), This
8793 minimizes instruction movement, thus minimizing sched's effect on
8794 register pressure. */
8795 return INSN_LUID (tmp) - INSN_LUID (tmp2);
8798 /* Resort the array A in which only element at index N may be out of order. */
8799 static void
8800 swap_reorder (rtx *a, int n)
8802 rtx insn = a[n - 1];
8803 int i = n - 2;
8805 while (i >= 0 && rank_for_reorder (a + i, &insn) >= 0)
8807 a[i + 1] = a[i];
8808 i -= 1;
8810 a[i + 1] = insn;
8813 #define SCHED_REORDER(READY, N_READY) \
8814 do \
8816 if ((N_READY) == 2) \
8817 swap_reorder (READY, N_READY); \
8818 else if ((N_READY) > 2) \
8819 qsort (READY, N_READY, sizeof (rtx), rank_for_reorder); \
8821 while (0)
8823 /* Sort the ready list READY by ascending priority, using the SCHED_REORDER
8824 macro. */
8825 static void
8826 ready_reorder (rtx *ready, int nready)
8828 SCHED_REORDER (ready, nready);
8831 /* Calculate regmode weights for all insns of all basic block. */
8832 static void
8833 sh_md_init_global (FILE *dump ATTRIBUTE_UNUSED,
8834 int verbose ATTRIBUTE_UNUSED,
8835 int old_max_uid)
8837 basic_block b;
8839 regmode_weight[0] = (short *) xcalloc (old_max_uid, sizeof (short));
8840 regmode_weight[1] = (short *) xcalloc (old_max_uid, sizeof (short));
8842 FOR_EACH_BB_REVERSE (b)
8844 find_regmode_weight (b, SImode);
8845 find_regmode_weight (b, SFmode);
8848 CURR_REGMODE_PRESSURE (SImode) = 0;
8849 CURR_REGMODE_PRESSURE (SFmode) = 0;
8853 /* Cleanup. */
8854 static void
8855 sh_md_finish_global (FILE *dump ATTRIBUTE_UNUSED,
8856 int verbose ATTRIBUTE_UNUSED)
8858 if (regmode_weight[0])
8860 free (regmode_weight[0]);
8861 regmode_weight[0] = NULL;
8863 if (regmode_weight[1])
8865 free (regmode_weight[1]);
8866 regmode_weight[1] = NULL;
8870 /* Cache the can_issue_more so that we can return it from reorder2. Also,
8871 keep count of register pressures on SImode and SFmode. */
8872 static int
8873 sh_variable_issue (FILE *dump ATTRIBUTE_UNUSED,
8874 int sched_verbose ATTRIBUTE_UNUSED,
8875 rtx insn,
8876 int can_issue_more)
8878 if (GET_CODE (PATTERN (insn)) != USE
8879 && GET_CODE (PATTERN (insn)) != CLOBBER)
8880 cached_can_issue_more = can_issue_more - 1;
8881 else
8882 cached_can_issue_more = can_issue_more;
8884 if (reload_completed)
8885 return cached_can_issue_more;
8887 CURR_REGMODE_PRESSURE (SImode) += INSN_REGMODE_WEIGHT (insn, SImode);
8888 CURR_REGMODE_PRESSURE (SFmode) += INSN_REGMODE_WEIGHT (insn, SFmode);
8890 return cached_can_issue_more;
8893 static void
8894 sh_md_init (FILE *dump ATTRIBUTE_UNUSED,
8895 int verbose ATTRIBUTE_UNUSED,
8896 int veclen ATTRIBUTE_UNUSED)
8898 CURR_REGMODE_PRESSURE (SImode) = 0;
8899 CURR_REGMODE_PRESSURE (SFmode) = 0;
8902 /* Some magic numbers. */
8903 /* Pressure on register r0 can lead to spill failures. so avoid sched1 for
8904 functions that already have high pressure on r0. */
8905 #define R0_MAX_LIFE_REGIONS 2
8906 #define R0_MAX_LIVE_LENGTH 12
8907 /* Register Pressure thresholds for SImode and SFmode registers. */
8908 #define SIMODE_MAX_WEIGHT 5
8909 #define SFMODE_MAX_WEIGHT 10
8911 /* Return true if the pressure is high for MODE. */
8912 static short
8913 high_pressure (enum machine_mode mode)
8915 /* Pressure on register r0 can lead to spill failures. so avoid sched1 for
8916 functions that already have high pressure on r0. */
8917 if ((REG_N_SETS (0) - REG_N_DEATHS (0)) >= R0_MAX_LIFE_REGIONS
8918 && REG_LIVE_LENGTH (0) >= R0_MAX_LIVE_LENGTH)
8919 return 1;
8921 if (mode == SFmode)
8922 return (CURR_REGMODE_PRESSURE (SFmode) > SFMODE_MAX_WEIGHT);
8923 else
8924 return (CURR_REGMODE_PRESSURE (SImode) > SIMODE_MAX_WEIGHT);
8927 /* Reorder ready queue if register pressure is high. */
8928 static int
8929 sh_reorder (FILE *dump ATTRIBUTE_UNUSED,
8930 int sched_verbose ATTRIBUTE_UNUSED,
8931 rtx *ready,
8932 int *n_readyp,
8933 int clock_var ATTRIBUTE_UNUSED)
8935 if (reload_completed)
8936 return sh_issue_rate ();
8938 if (high_pressure (SFmode) || high_pressure (SImode))
8940 ready_reorder (ready, *n_readyp);
8943 return sh_issue_rate ();
8946 /* Skip cycles if the current register pressure is high. */
8947 static int
8948 sh_reorder2 (FILE *dump ATTRIBUTE_UNUSED,
8949 int sched_verbose ATTRIBUTE_UNUSED,
8950 rtx *ready ATTRIBUTE_UNUSED,
8951 int *n_readyp ATTRIBUTE_UNUSED,
8952 int clock_var ATTRIBUTE_UNUSED)
8954 if (reload_completed)
8955 return cached_can_issue_more;
8957 if (high_pressure(SFmode) || high_pressure (SImode))
8958 skip_cycles = 1;
8960 return cached_can_issue_more;
8963 /* Skip cycles without sorting the ready queue. This will move insn from
8964 Q->R. If this is the last cycle we are skipping; allow sorting of ready
8965 queue by sh_reorder. */
8967 /* Generally, skipping these many cycles are sufficient for all insns to move
8968 from Q -> R. */
8969 #define MAX_SKIPS 8
8971 static int
8972 sh_dfa_new_cycle (FILE *sched_dump ATTRIBUTE_UNUSED,
8973 int sched_verbose ATTRIBUTE_UNUSED,
8974 rtx insn ATTRIBUTE_UNUSED,
8975 int last_clock_var,
8976 int clock_var,
8977 int *sort_p)
8979 if (reload_completed)
8980 return 0;
8982 if (skip_cycles)
8984 if ((clock_var - last_clock_var) < MAX_SKIPS)
8986 *sort_p = 0;
8987 return 1;
8989 /* If this is the last cycle we are skipping, allow reordering of R. */
8990 if ((clock_var - last_clock_var) == MAX_SKIPS)
8992 *sort_p = 1;
8993 return 1;
8997 skip_cycles = 0;
8999 return 0;
9002 /* SHmedia requires registers for branches, so we can't generate new
9003 branches past reload. */
9004 static bool
9005 sh_cannot_modify_jumps_p (void)
9007 return (TARGET_SHMEDIA && (reload_in_progress || reload_completed));
9010 static int
9011 sh_target_reg_class (void)
9013 return TARGET_SHMEDIA ? TARGET_REGS : NO_REGS;
9016 static bool
9017 sh_optimize_target_register_callee_saved (bool after_prologue_epilogue_gen)
9019 HARD_REG_SET dummy;
9020 rtx insn;
9022 if (! shmedia_space_reserved_for_target_registers)
9023 return 0;
9024 if (after_prologue_epilogue_gen && ! TARGET_SAVE_ALL_TARGET_REGS)
9025 return 0;
9026 if (calc_live_regs (&dummy) >= 6 * 8)
9027 return 1;
9028 /* This is a borderline case. See if we got a nested loop, or a loop
9029 with a call, or with more than 4 labels inside. */
9030 for (insn = get_insns(); insn; insn = NEXT_INSN (insn))
9032 if (GET_CODE (insn) == NOTE
9033 && NOTE_LINE_NUMBER (insn) == NOTE_INSN_LOOP_BEG)
9035 int labels = 0;
9039 insn = NEXT_INSN (insn);
9040 if ((GET_CODE (insn) == NOTE
9041 && NOTE_LINE_NUMBER (insn) == NOTE_INSN_LOOP_BEG)
9042 || GET_CODE (insn) == CALL_INSN
9043 || (GET_CODE (insn) == CODE_LABEL && ++labels > 4))
9044 return 1;
9046 while (GET_CODE (insn) != NOTE
9047 || NOTE_LINE_NUMBER (insn) != NOTE_INSN_LOOP_END);
9050 return 0;
9053 static bool
9054 sh_ms_bitfield_layout_p (tree record_type ATTRIBUTE_UNUSED)
9056 return (TARGET_SH5 || TARGET_HITACHI || sh_attr_renesas_p (record_type));
9060 On the SH1..SH4, the trampoline looks like
9061 2 0002 D202 mov.l l2,r2
9062 1 0000 D301 mov.l l1,r3
9063 3 0004 422B jmp @r2
9064 4 0006 0009 nop
9065 5 0008 00000000 l1: .long area
9066 6 000c 00000000 l2: .long function
9068 SH5 (compact) uses r1 instead of r3 for the static chain. */
9071 /* Emit RTL insns to initialize the variable parts of a trampoline.
9072 FNADDR is an RTX for the address of the function's pure code.
9073 CXT is an RTX for the static chain value for the function. */
9075 void
9076 sh_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
9078 rtx tramp_mem = gen_frame_mem (BLKmode, tramp);
9080 if (TARGET_SHMEDIA64)
9082 rtx tramp_templ;
9083 int fixed_len;
9085 rtx movi1 = GEN_INT (0xcc000010);
9086 rtx shori1 = GEN_INT (0xc8000010);
9087 rtx src, dst;
9089 /* The following trampoline works within a +- 128 KB range for cxt:
9090 ptb/u cxt,tr1; movi fnaddr >> 48,r0; shori fnaddr >> 32,r0;
9091 shori fnaddr >> 16,r0; shori fnaddr,r0; ptabs/l r0,tr0
9092 gettr tr1,r1; blink tr0,r63 */
9093 /* Address rounding makes it hard to compute the exact bounds of the
9094 offset for this trampoline, but we have a rather generous offset
9095 range, so frame_offset should do fine as an upper bound. */
9096 if (cxt == virtual_stack_vars_rtx && frame_offset < 0x20000)
9098 /* ??? could optimize this trampoline initialization
9099 by writing DImode words with two insns each. */
9100 rtx mask = force_reg (DImode, GEN_INT (0x3fffc00));
9101 rtx insn = gen_rtx_MINUS (DImode, cxt, tramp);
9102 insn = gen_rtx_ASHIFT (DImode, insn, GEN_INT (10-2));
9103 insn = gen_rtx_AND (DImode, insn, mask);
9104 /* Or in ptb/u .,tr1 pattern */
9105 insn = gen_rtx_IOR (DImode, insn, gen_int_mode (0xec000010, SImode));
9106 insn = force_operand (insn, NULL_RTX);
9107 insn = gen_lowpart (SImode, insn);
9108 emit_move_insn (change_address (tramp_mem, SImode, NULL_RTX), insn);
9109 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (38));
9110 insn = gen_rtx_AND (DImode, insn, mask);
9111 insn = force_operand (gen_rtx_IOR (DImode, movi1, insn), NULL_RTX);
9112 insn = gen_lowpart (SImode, insn);
9113 emit_move_insn (adjust_address (tramp_mem, SImode, 4), insn);
9114 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (22));
9115 insn = gen_rtx_AND (DImode, insn, mask);
9116 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
9117 insn = gen_lowpart (SImode, insn);
9118 emit_move_insn (adjust_address (tramp_mem, SImode, 8), insn);
9119 insn = gen_rtx_LSHIFTRT (DImode, fnaddr, GEN_INT (6));
9120 insn = gen_rtx_AND (DImode, insn, mask);
9121 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
9122 insn = gen_lowpart (SImode, insn);
9123 emit_move_insn (adjust_address (tramp_mem, SImode, 12), insn);
9124 insn = gen_rtx_ASHIFT (DImode, fnaddr, GEN_INT (10));
9125 insn = gen_rtx_AND (DImode, insn, mask);
9126 insn = force_operand (gen_rtx_IOR (DImode, shori1, insn), NULL_RTX);
9127 insn = gen_lowpart (SImode, insn);
9128 emit_move_insn (adjust_address (tramp_mem, SImode, 16), insn);
9129 emit_move_insn (adjust_address (tramp_mem, SImode, 20),
9130 GEN_INT (0x6bf10600));
9131 emit_move_insn (adjust_address (tramp_mem, SImode, 24),
9132 GEN_INT (0x4415fc10));
9133 emit_move_insn (adjust_address (tramp_mem, SImode, 28),
9134 GEN_INT (0x4401fff0));
9135 emit_insn (gen_ic_invalidate_line (tramp));
9136 return;
9138 tramp_templ = gen_rtx_SYMBOL_REF (Pmode,"__GCC_nested_trampoline");
9139 fixed_len = TRAMPOLINE_SIZE - 2 * GET_MODE_SIZE (Pmode);
9141 tramp_templ = gen_datalabel_ref (tramp_templ);
9142 dst = tramp_mem;
9143 src = gen_const_mem (BLKmode, tramp_templ);
9144 set_mem_align (dst, 256);
9145 set_mem_align (src, 64);
9146 emit_block_move (dst, src, GEN_INT (fixed_len), BLOCK_OP_NORMAL);
9148 emit_move_insn (adjust_address (tramp_mem, Pmode, fixed_len), fnaddr);
9149 emit_move_insn (adjust_address (tramp_mem, Pmode,
9150 fixed_len + GET_MODE_SIZE (Pmode)),
9151 cxt);
9152 emit_insn (gen_ic_invalidate_line (tramp));
9153 return;
9155 else if (TARGET_SHMEDIA)
9157 /* movi fnaddr >> 16,r1; shori fnaddr,r1; ptabs/l r1,tr0
9158 movi cxt >> 16,r1; shori cxt,r1; blink tr0,r63 */
9159 rtx quad0 = gen_reg_rtx (DImode), cxtload = gen_reg_rtx (DImode);
9160 rtx quad1 = gen_reg_rtx (DImode), quad2 = gen_reg_rtx (DImode);
9161 /* movi 0,r1: 0xcc000010 shori 0,r1: c8000010 concatenated,
9162 rotated 10 right, and higher 16 bit of every 32 selected. */
9163 rtx movishori
9164 = force_reg (V2HImode, (simplify_gen_subreg
9165 (V2HImode, GEN_INT (0x4330432), SImode, 0)));
9166 rtx ptabs = force_reg (DImode, GEN_INT (0x6bf10600));
9167 rtx blink = force_reg (DImode, GEN_INT (0x4401fff0));
9169 tramp = force_reg (Pmode, tramp);
9170 fnaddr = force_reg (SImode, fnaddr);
9171 cxt = force_reg (SImode, cxt);
9172 emit_insn (gen_mshflo_w_x (gen_rtx_SUBREG (V4HImode, quad0, 0),
9173 gen_rtx_SUBREG (V2HImode, fnaddr, 0),
9174 movishori));
9175 emit_insn (gen_rotrdi3_mextr (quad0, quad0,
9176 GEN_INT (TARGET_LITTLE_ENDIAN ? 24 : 56)));
9177 emit_insn (gen_ashldi3_media (quad0, quad0, const2_rtx));
9178 emit_move_insn (change_address (tramp_mem, DImode, NULL_RTX), quad0);
9179 emit_insn (gen_mshflo_w_x (gen_rtx_SUBREG (V4HImode, cxtload, 0),
9180 gen_rtx_SUBREG (V2HImode, cxt, 0),
9181 movishori));
9182 emit_insn (gen_rotrdi3_mextr (cxtload, cxtload,
9183 GEN_INT (TARGET_LITTLE_ENDIAN ? 24 : 56)));
9184 emit_insn (gen_ashldi3_media (cxtload, cxtload, const2_rtx));
9185 if (TARGET_LITTLE_ENDIAN)
9187 emit_insn (gen_mshflo_l_di (quad1, ptabs, cxtload));
9188 emit_insn (gen_mextr4 (quad2, cxtload, blink));
9190 else
9192 emit_insn (gen_mextr4 (quad1, cxtload, ptabs));
9193 emit_insn (gen_mshflo_l_di (quad2, blink, cxtload));
9195 emit_move_insn (adjust_address (tramp_mem, DImode, 8), quad1);
9196 emit_move_insn (adjust_address (tramp_mem, DImode, 16), quad2);
9197 emit_insn (gen_ic_invalidate_line (tramp));
9198 return;
9200 else if (TARGET_SHCOMPACT)
9202 emit_insn (gen_initialize_trampoline (tramp, cxt, fnaddr));
9203 return;
9205 emit_move_insn (change_address (tramp_mem, SImode, NULL_RTX),
9206 gen_int_mode (TARGET_LITTLE_ENDIAN ? 0xd301d202 : 0xd202d301,
9207 SImode));
9208 emit_move_insn (adjust_address (tramp_mem, SImode, 4),
9209 gen_int_mode (TARGET_LITTLE_ENDIAN ? 0x0009422b : 0x422b0009,
9210 SImode));
9211 emit_move_insn (adjust_address (tramp_mem, SImode, 8), cxt);
9212 emit_move_insn (adjust_address (tramp_mem, SImode, 12), fnaddr);
9213 if (TARGET_HARVARD)
9215 if (TARGET_USERMODE)
9216 emit_library_call (function_symbol (NULL, "__ic_invalidate",
9217 FUNCTION_ORDINARY),
9218 0, VOIDmode, 1, tramp, SImode);
9219 else
9220 emit_insn (gen_ic_invalidate_line (tramp));
9224 /* FIXME: This is overly conservative. A SHcompact function that
9225 receives arguments ``by reference'' will have them stored in its
9226 own stack frame, so it must not pass pointers or references to
9227 these arguments to other functions by means of sibling calls. */
9228 /* If PIC, we cannot make sibling calls to global functions
9229 because the PLT requires r12 to be live. */
9230 static bool
9231 sh_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
9233 return (1
9234 && (! TARGET_SHCOMPACT
9235 || current_function_args_info.stack_regs == 0)
9236 && ! sh_cfun_interrupt_handler_p ()
9237 && (! flag_pic
9238 || (decl && ! TREE_PUBLIC (decl))
9239 || (decl && DECL_VISIBILITY (decl) != VISIBILITY_DEFAULT)));
9242 /* Machine specific built-in functions. */
9244 struct builtin_description
9246 const enum insn_code icode;
9247 const char *const name;
9248 int signature;
9251 /* describe number and signedness of arguments; arg[0] == result
9252 (1: unsigned, 2: signed, 4: don't care, 8: pointer 0: no argument */
9253 /* 9: 64 bit pointer, 10: 32 bit pointer */
9254 static const char signature_args[][4] =
9256 #define SH_BLTIN_V2SI2 0
9257 { 4, 4 },
9258 #define SH_BLTIN_V4HI2 1
9259 { 4, 4 },
9260 #define SH_BLTIN_V2SI3 2
9261 { 4, 4, 4 },
9262 #define SH_BLTIN_V4HI3 3
9263 { 4, 4, 4 },
9264 #define SH_BLTIN_V8QI3 4
9265 { 4, 4, 4 },
9266 #define SH_BLTIN_MAC_HISI 5
9267 { 1, 4, 4, 1 },
9268 #define SH_BLTIN_SH_HI 6
9269 { 4, 4, 1 },
9270 #define SH_BLTIN_SH_SI 7
9271 { 4, 4, 1 },
9272 #define SH_BLTIN_V4HI2V2SI 8
9273 { 4, 4, 4 },
9274 #define SH_BLTIN_V4HI2V8QI 9
9275 { 4, 4, 4 },
9276 #define SH_BLTIN_SISF 10
9277 { 4, 2 },
9278 #define SH_BLTIN_LDUA_L 11
9279 { 2, 10 },
9280 #define SH_BLTIN_LDUA_Q 12
9281 { 1, 10 },
9282 #define SH_BLTIN_STUA_L 13
9283 { 0, 10, 2 },
9284 #define SH_BLTIN_STUA_Q 14
9285 { 0, 10, 1 },
9286 #define SH_BLTIN_LDUA_L64 15
9287 { 2, 9 },
9288 #define SH_BLTIN_LDUA_Q64 16
9289 { 1, 9 },
9290 #define SH_BLTIN_STUA_L64 17
9291 { 0, 9, 2 },
9292 #define SH_BLTIN_STUA_Q64 18
9293 { 0, 9, 1 },
9294 #define SH_BLTIN_NUM_SHARED_SIGNATURES 19
9295 #define SH_BLTIN_2 19
9296 #define SH_BLTIN_SU 19
9297 { 1, 2 },
9298 #define SH_BLTIN_3 20
9299 #define SH_BLTIN_SUS 20
9300 { 2, 2, 1 },
9301 #define SH_BLTIN_PSSV 21
9302 { 0, 8, 2, 2 },
9303 #define SH_BLTIN_XXUU 22
9304 #define SH_BLTIN_UUUU 22
9305 { 1, 1, 1, 1 },
9306 #define SH_BLTIN_PV 23
9307 { 0, 8 },
9309 /* mcmv: operands considered unsigned. */
9310 /* mmulsum_wq, msad_ubq: result considered unsigned long long. */
9311 /* mperm: control value considered unsigned int. */
9312 /* mshalds, mshard, mshards, mshlld, mshlrd: shift count is unsigned int. */
9313 /* mshards_q: returns signed short. */
9314 /* nsb: takes long long arg, returns unsigned char. */
9315 static const struct builtin_description bdesc[] =
9317 { CODE_FOR_absv2si2, "__builtin_absv2si2", SH_BLTIN_V2SI2 },
9318 { CODE_FOR_absv4hi2, "__builtin_absv4hi2", SH_BLTIN_V4HI2 },
9319 { CODE_FOR_addv2si3, "__builtin_addv2si3", SH_BLTIN_V2SI3 },
9320 { CODE_FOR_addv4hi3, "__builtin_addv4hi3", SH_BLTIN_V4HI3 },
9321 { CODE_FOR_ssaddv2si3,"__builtin_ssaddv2si3", SH_BLTIN_V2SI3 },
9322 { CODE_FOR_usaddv8qi3,"__builtin_usaddv8qi3", SH_BLTIN_V8QI3 },
9323 { CODE_FOR_ssaddv4hi3,"__builtin_ssaddv4hi3", SH_BLTIN_V4HI3 },
9324 { CODE_FOR_alloco_i, "__builtin_sh_media_ALLOCO", SH_BLTIN_PV },
9325 { CODE_FOR_negcmpeqv8qi,"__builtin_sh_media_MCMPEQ_B", SH_BLTIN_V8QI3 },
9326 { CODE_FOR_negcmpeqv2si,"__builtin_sh_media_MCMPEQ_L", SH_BLTIN_V2SI3 },
9327 { CODE_FOR_negcmpeqv4hi,"__builtin_sh_media_MCMPEQ_W", SH_BLTIN_V4HI3 },
9328 { CODE_FOR_negcmpgtuv8qi,"__builtin_sh_media_MCMPGT_UB", SH_BLTIN_V8QI3 },
9329 { CODE_FOR_negcmpgtv2si,"__builtin_sh_media_MCMPGT_L", SH_BLTIN_V2SI3 },
9330 { CODE_FOR_negcmpgtv4hi,"__builtin_sh_media_MCMPGT_W", SH_BLTIN_V4HI3 },
9331 { CODE_FOR_mcmv, "__builtin_sh_media_MCMV", SH_BLTIN_UUUU },
9332 { CODE_FOR_mcnvs_lw, "__builtin_sh_media_MCNVS_LW", SH_BLTIN_3 },
9333 { CODE_FOR_mcnvs_wb, "__builtin_sh_media_MCNVS_WB", SH_BLTIN_V4HI2V8QI },
9334 { CODE_FOR_mcnvs_wub, "__builtin_sh_media_MCNVS_WUB", SH_BLTIN_V4HI2V8QI },
9335 { CODE_FOR_mextr1, "__builtin_sh_media_MEXTR1", SH_BLTIN_V8QI3 },
9336 { CODE_FOR_mextr2, "__builtin_sh_media_MEXTR2", SH_BLTIN_V8QI3 },
9337 { CODE_FOR_mextr3, "__builtin_sh_media_MEXTR3", SH_BLTIN_V8QI3 },
9338 { CODE_FOR_mextr4, "__builtin_sh_media_MEXTR4", SH_BLTIN_V8QI3 },
9339 { CODE_FOR_mextr5, "__builtin_sh_media_MEXTR5", SH_BLTIN_V8QI3 },
9340 { CODE_FOR_mextr6, "__builtin_sh_media_MEXTR6", SH_BLTIN_V8QI3 },
9341 { CODE_FOR_mextr7, "__builtin_sh_media_MEXTR7", SH_BLTIN_V8QI3 },
9342 { CODE_FOR_mmacfx_wl, "__builtin_sh_media_MMACFX_WL", SH_BLTIN_MAC_HISI },
9343 { CODE_FOR_mmacnfx_wl,"__builtin_sh_media_MMACNFX_WL", SH_BLTIN_MAC_HISI },
9344 { CODE_FOR_mulv2si3, "__builtin_mulv2si3", SH_BLTIN_V2SI3, },
9345 { CODE_FOR_mulv4hi3, "__builtin_mulv4hi3", SH_BLTIN_V4HI3 },
9346 { CODE_FOR_mmulfx_l, "__builtin_sh_media_MMULFX_L", SH_BLTIN_V2SI3 },
9347 { CODE_FOR_mmulfx_w, "__builtin_sh_media_MMULFX_W", SH_BLTIN_V4HI3 },
9348 { CODE_FOR_mmulfxrp_w,"__builtin_sh_media_MMULFXRP_W", SH_BLTIN_V4HI3 },
9349 { CODE_FOR_mmulhi_wl, "__builtin_sh_media_MMULHI_WL", SH_BLTIN_V4HI2V2SI },
9350 { CODE_FOR_mmullo_wl, "__builtin_sh_media_MMULLO_WL", SH_BLTIN_V4HI2V2SI },
9351 { CODE_FOR_mmulsum_wq,"__builtin_sh_media_MMULSUM_WQ", SH_BLTIN_XXUU },
9352 { CODE_FOR_mperm_w, "__builtin_sh_media_MPERM_W", SH_BLTIN_SH_HI },
9353 { CODE_FOR_msad_ubq, "__builtin_sh_media_MSAD_UBQ", SH_BLTIN_XXUU },
9354 { CODE_FOR_mshalds_l, "__builtin_sh_media_MSHALDS_L", SH_BLTIN_SH_SI },
9355 { CODE_FOR_mshalds_w, "__builtin_sh_media_MSHALDS_W", SH_BLTIN_SH_HI },
9356 { CODE_FOR_ashrv2si3, "__builtin_ashrv2si3", SH_BLTIN_SH_SI },
9357 { CODE_FOR_ashrv4hi3, "__builtin_ashrv4hi3", SH_BLTIN_SH_HI },
9358 { CODE_FOR_mshards_q, "__builtin_sh_media_MSHARDS_Q", SH_BLTIN_SUS },
9359 { CODE_FOR_mshfhi_b, "__builtin_sh_media_MSHFHI_B", SH_BLTIN_V8QI3 },
9360 { CODE_FOR_mshfhi_l, "__builtin_sh_media_MSHFHI_L", SH_BLTIN_V2SI3 },
9361 { CODE_FOR_mshfhi_w, "__builtin_sh_media_MSHFHI_W", SH_BLTIN_V4HI3 },
9362 { CODE_FOR_mshflo_b, "__builtin_sh_media_MSHFLO_B", SH_BLTIN_V8QI3 },
9363 { CODE_FOR_mshflo_l, "__builtin_sh_media_MSHFLO_L", SH_BLTIN_V2SI3 },
9364 { CODE_FOR_mshflo_w, "__builtin_sh_media_MSHFLO_W", SH_BLTIN_V4HI3 },
9365 { CODE_FOR_ashlv2si3, "__builtin_ashlv2si3", SH_BLTIN_SH_SI },
9366 { CODE_FOR_ashlv4hi3, "__builtin_ashlv4hi3", SH_BLTIN_SH_HI },
9367 { CODE_FOR_lshrv2si3, "__builtin_lshrv2si3", SH_BLTIN_SH_SI },
9368 { CODE_FOR_lshrv4hi3, "__builtin_lshrv4hi3", SH_BLTIN_SH_HI },
9369 { CODE_FOR_subv2si3, "__builtin_subv2si3", SH_BLTIN_V2SI3 },
9370 { CODE_FOR_subv4hi3, "__builtin_subv4hi3", SH_BLTIN_V4HI3 },
9371 { CODE_FOR_sssubv2si3,"__builtin_sssubv2si3", SH_BLTIN_V2SI3 },
9372 { CODE_FOR_ussubv8qi3,"__builtin_ussubv8qi3", SH_BLTIN_V8QI3 },
9373 { CODE_FOR_sssubv4hi3,"__builtin_sssubv4hi3", SH_BLTIN_V4HI3 },
9374 { CODE_FOR_fcosa_s, "__builtin_sh_media_FCOSA_S", SH_BLTIN_SISF },
9375 { CODE_FOR_fsina_s, "__builtin_sh_media_FSINA_S", SH_BLTIN_SISF },
9376 { CODE_FOR_fipr, "__builtin_sh_media_FIPR_S", SH_BLTIN_3 },
9377 { CODE_FOR_ftrv, "__builtin_sh_media_FTRV_S", SH_BLTIN_3 },
9378 { CODE_FOR_mac_media, "__builtin_sh_media_FMAC_S", SH_BLTIN_3 },
9379 { CODE_FOR_sqrtdf2, "__builtin_sh_media_FSQRT_D", SH_BLTIN_2 },
9380 { CODE_FOR_sqrtsf2, "__builtin_sh_media_FSQRT_S", SH_BLTIN_2 },
9381 { CODE_FOR_fsrra_s, "__builtin_sh_media_FSRRA_S", SH_BLTIN_2 },
9382 { CODE_FOR_ldhi_l, "__builtin_sh_media_LDHI_L", SH_BLTIN_LDUA_L },
9383 { CODE_FOR_ldhi_q, "__builtin_sh_media_LDHI_Q", SH_BLTIN_LDUA_Q },
9384 { CODE_FOR_ldlo_l, "__builtin_sh_media_LDLO_L", SH_BLTIN_LDUA_L },
9385 { CODE_FOR_ldlo_q, "__builtin_sh_media_LDLO_Q", SH_BLTIN_LDUA_Q },
9386 { CODE_FOR_sthi_l, "__builtin_sh_media_STHI_L", SH_BLTIN_STUA_L },
9387 { CODE_FOR_sthi_q, "__builtin_sh_media_STHI_Q", SH_BLTIN_STUA_Q },
9388 { CODE_FOR_stlo_l, "__builtin_sh_media_STLO_L", SH_BLTIN_STUA_L },
9389 { CODE_FOR_stlo_q, "__builtin_sh_media_STLO_Q", SH_BLTIN_STUA_Q },
9390 { CODE_FOR_ldhi_l64, "__builtin_sh_media_LDHI_L", SH_BLTIN_LDUA_L64 },
9391 { CODE_FOR_ldhi_q64, "__builtin_sh_media_LDHI_Q", SH_BLTIN_LDUA_Q64 },
9392 { CODE_FOR_ldlo_l64, "__builtin_sh_media_LDLO_L", SH_BLTIN_LDUA_L64 },
9393 { CODE_FOR_ldlo_q64, "__builtin_sh_media_LDLO_Q", SH_BLTIN_LDUA_Q64 },
9394 { CODE_FOR_sthi_l64, "__builtin_sh_media_STHI_L", SH_BLTIN_STUA_L64 },
9395 { CODE_FOR_sthi_q64, "__builtin_sh_media_STHI_Q", SH_BLTIN_STUA_Q64 },
9396 { CODE_FOR_stlo_l64, "__builtin_sh_media_STLO_L", SH_BLTIN_STUA_L64 },
9397 { CODE_FOR_stlo_q64, "__builtin_sh_media_STLO_Q", SH_BLTIN_STUA_Q64 },
9398 { CODE_FOR_nsb, "__builtin_sh_media_NSB", SH_BLTIN_SU },
9399 { CODE_FOR_byterev, "__builtin_sh_media_BYTEREV", SH_BLTIN_2 },
9400 { CODE_FOR_prefetch, "__builtin_sh_media_PREFO", SH_BLTIN_PSSV },
9403 static void
9404 sh_media_init_builtins (void)
9406 tree shared[SH_BLTIN_NUM_SHARED_SIGNATURES];
9407 const struct builtin_description *d;
9409 memset (shared, 0, sizeof shared);
9410 for (d = bdesc; d - bdesc < (int) ARRAY_SIZE (bdesc); d++)
9412 tree type, arg_type = 0;
9413 int signature = d->signature;
9414 int i;
9416 if (signature < SH_BLTIN_NUM_SHARED_SIGNATURES && shared[signature])
9417 type = shared[signature];
9418 else
9420 int has_result = signature_args[signature][0] != 0;
9422 if ((signature_args[signature][1] & 8)
9423 && (((signature_args[signature][1] & 1) && TARGET_SHMEDIA32)
9424 || ((signature_args[signature][1] & 2) && TARGET_SHMEDIA64)))
9425 continue;
9426 if (! TARGET_FPU_ANY
9427 && FLOAT_MODE_P (insn_data[d->icode].operand[0].mode))
9428 continue;
9429 type = void_list_node;
9430 for (i = 3; ; i--)
9432 int arg = signature_args[signature][i];
9433 int opno = i - 1 + has_result;
9435 if (arg & 8)
9436 arg_type = ptr_type_node;
9437 else if (arg)
9438 arg_type = (*lang_hooks.types.type_for_mode)
9439 (insn_data[d->icode].operand[opno].mode,
9440 (arg & 1));
9441 else if (i)
9442 continue;
9443 else
9444 arg_type = void_type_node;
9445 if (i == 0)
9446 break;
9447 type = tree_cons (NULL_TREE, arg_type, type);
9449 type = build_function_type (arg_type, type);
9450 if (signature < SH_BLTIN_NUM_SHARED_SIGNATURES)
9451 shared[signature] = type;
9453 lang_hooks.builtin_function (d->name, type, d - bdesc, BUILT_IN_MD,
9454 NULL, NULL_TREE);
9458 /* Implements target hook vector_mode_supported_p. */
9459 bool
9460 sh_vector_mode_supported_p (enum machine_mode mode)
9462 if (TARGET_FPU_ANY
9463 && ((mode == V2SFmode)
9464 || (mode == V4SFmode)
9465 || (mode == V16SFmode)))
9466 return true;
9468 else if (TARGET_SHMEDIA
9469 && ((mode == V8QImode)
9470 || (mode == V2HImode)
9471 || (mode == V4HImode)
9472 || (mode == V2SImode)))
9473 return true;
9475 return false;
9478 /* Implements target hook dwarf_calling_convention. Return an enum
9479 of dwarf_calling_convention. */
9481 sh_dwarf_calling_convention (tree func)
9483 if (sh_attr_renesas_p (func))
9484 return DW_CC_GNU_renesas_sh;
9486 return DW_CC_normal;
9489 static void
9490 sh_init_builtins (void)
9492 if (TARGET_SHMEDIA)
9493 sh_media_init_builtins ();
9496 /* Expand an expression EXP that calls a built-in function,
9497 with result going to TARGET if that's convenient
9498 (and in mode MODE if that's convenient).
9499 SUBTARGET may be used as the target for computing one of EXP's operands.
9500 IGNORE is nonzero if the value is to be ignored. */
9502 static rtx
9503 sh_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
9504 enum machine_mode mode ATTRIBUTE_UNUSED, int ignore)
9506 tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0);
9507 tree arglist = TREE_OPERAND (exp, 1);
9508 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
9509 const struct builtin_description *d = &bdesc[fcode];
9510 enum insn_code icode = d->icode;
9511 int signature = d->signature;
9512 enum machine_mode tmode = VOIDmode;
9513 int nop = 0, i;
9514 rtx op[4];
9515 rtx pat = 0;
9517 if (signature_args[signature][0])
9519 if (ignore)
9520 return 0;
9522 tmode = insn_data[icode].operand[0].mode;
9523 if (! target
9524 || GET_MODE (target) != tmode
9525 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
9526 target = gen_reg_rtx (tmode);
9527 op[nop++] = target;
9529 else
9530 target = 0;
9532 for (i = 1; i <= 3; i++, nop++)
9534 tree arg;
9535 enum machine_mode opmode, argmode;
9536 tree optype;
9538 if (! signature_args[signature][i])
9539 break;
9540 arg = TREE_VALUE (arglist);
9541 if (arg == error_mark_node)
9542 return const0_rtx;
9543 arglist = TREE_CHAIN (arglist);
9544 if (signature_args[signature][i] & 8)
9546 opmode = ptr_mode;
9547 optype = ptr_type_node;
9549 else
9551 opmode = insn_data[icode].operand[nop].mode;
9552 optype = (*lang_hooks.types.type_for_mode) (opmode, 0);
9554 argmode = TYPE_MODE (TREE_TYPE (arg));
9555 if (argmode != opmode)
9556 arg = build1 (NOP_EXPR, optype, arg);
9557 op[nop] = expand_expr (arg, NULL_RTX, opmode, 0);
9558 if (! (*insn_data[icode].operand[nop].predicate) (op[nop], opmode))
9559 op[nop] = copy_to_mode_reg (opmode, op[nop]);
9562 switch (nop)
9564 case 1:
9565 pat = (*insn_data[d->icode].genfun) (op[0]);
9566 break;
9567 case 2:
9568 pat = (*insn_data[d->icode].genfun) (op[0], op[1]);
9569 break;
9570 case 3:
9571 pat = (*insn_data[d->icode].genfun) (op[0], op[1], op[2]);
9572 break;
9573 case 4:
9574 pat = (*insn_data[d->icode].genfun) (op[0], op[1], op[2], op[3]);
9575 break;
9576 default:
9577 gcc_unreachable ();
9579 if (! pat)
9580 return 0;
9581 emit_insn (pat);
9582 return target;
9585 void
9586 sh_expand_unop_v2sf (enum rtx_code code, rtx op0, rtx op1)
9588 rtx sel0 = const0_rtx;
9589 rtx sel1 = const1_rtx;
9590 rtx (*fn) (rtx, rtx, rtx, rtx, rtx) = gen_unary_sf_op;
9591 rtx op = gen_rtx_fmt_e (code, SFmode, op1);
9593 emit_insn ((*fn) (op0, op1, op, sel0, sel0));
9594 emit_insn ((*fn) (op0, op1, op, sel1, sel1));
9597 void
9598 sh_expand_binop_v2sf (enum rtx_code code, rtx op0, rtx op1, rtx op2)
9600 rtx sel0 = const0_rtx;
9601 rtx sel1 = const1_rtx;
9602 rtx (*fn) (rtx, rtx, rtx, rtx, rtx, rtx, rtx, rtx)
9603 = gen_binary_sf_op;
9604 rtx op = gen_rtx_fmt_ee (code, SFmode, op1, op2);
9606 emit_insn ((*fn) (op0, op1, op2, op, sel0, sel0, sel0, sel1));
9607 emit_insn ((*fn) (op0, op1, op2, op, sel1, sel1, sel1, sel0));
9610 /* Return the class of registers for which a mode change from FROM to TO
9611 is invalid. */
9612 bool
9613 sh_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
9614 enum reg_class class)
9616 /* We want to enable the use of SUBREGs as a means to
9617 VEC_SELECT a single element of a vector. */
9618 if (to == SFmode && VECTOR_MODE_P (from) && GET_MODE_INNER (from) == SFmode)
9619 return (reg_classes_intersect_p (GENERAL_REGS, class));
9621 if (GET_MODE_SIZE (from) != GET_MODE_SIZE (to))
9623 if (TARGET_LITTLE_ENDIAN)
9625 if (GET_MODE_SIZE (to) < 8 || GET_MODE_SIZE (from) < 8)
9626 return reg_classes_intersect_p (DF_REGS, class);
9628 else
9630 if (GET_MODE_SIZE (from) < 8)
9631 return reg_classes_intersect_p (DF_HI_REGS, class);
9634 return 0;
9638 /* If ADDRESS refers to a CODE_LABEL, add NUSES to the number of times
9639 that label is used. */
9641 void
9642 sh_mark_label (rtx address, int nuses)
9644 if (GOTOFF_P (address))
9646 /* Extract the label or symbol. */
9647 address = XEXP (address, 0);
9648 if (GET_CODE (address) == PLUS)
9649 address = XEXP (address, 0);
9650 address = XVECEXP (address, 0, 0);
9652 if (GET_CODE (address) == LABEL_REF
9653 && GET_CODE (XEXP (address, 0)) == CODE_LABEL)
9654 LABEL_NUSES (XEXP (address, 0)) += nuses;
9657 /* Compute extra cost of moving data between one register class
9658 and another. */
9660 /* If SECONDARY*_RELOAD_CLASS says something about the src/dst pair, regclass
9661 uses this information. Hence, the general register <-> floating point
9662 register information here is not used for SFmode. */
9665 sh_register_move_cost (enum machine_mode mode,
9666 enum reg_class srcclass, enum reg_class dstclass)
9668 if (dstclass == T_REGS || dstclass == PR_REGS)
9669 return 10;
9671 if (dstclass == MAC_REGS && srcclass == MAC_REGS)
9672 return 4;
9674 if (mode == SImode && ! TARGET_SHMEDIA && TARGET_FMOVD
9675 && REGCLASS_HAS_FP_REG (srcclass)
9676 && REGCLASS_HAS_FP_REG (dstclass))
9677 return 4;
9679 if (REGCLASS_HAS_FP_REG (dstclass) && srcclass == T_REGS)
9680 return ((TARGET_HARD_SH4 && !optimize_size) ? 10 : 7);
9682 if ((REGCLASS_HAS_FP_REG (dstclass) && srcclass == MAC_REGS)
9683 || (dstclass == MAC_REGS && REGCLASS_HAS_FP_REG (srcclass)))
9684 return 9;
9686 if ((REGCLASS_HAS_FP_REG (dstclass)
9687 && REGCLASS_HAS_GENERAL_REG (srcclass))
9688 || (REGCLASS_HAS_GENERAL_REG (dstclass)
9689 && REGCLASS_HAS_FP_REG (srcclass)))
9690 return ((TARGET_SHMEDIA ? 4 : TARGET_FMOVD ? 8 : 12)
9691 * ((GET_MODE_SIZE (mode) + 7) / 8U));
9693 if ((dstclass == FPUL_REGS
9694 && REGCLASS_HAS_GENERAL_REG (srcclass))
9695 || (srcclass == FPUL_REGS
9696 && REGCLASS_HAS_GENERAL_REG (dstclass)))
9697 return 5;
9699 if ((dstclass == FPUL_REGS
9700 && (srcclass == PR_REGS || srcclass == MAC_REGS || srcclass == T_REGS))
9701 || (srcclass == FPUL_REGS
9702 && (dstclass == PR_REGS || dstclass == MAC_REGS)))
9703 return 7;
9705 if ((srcclass == TARGET_REGS && ! REGCLASS_HAS_GENERAL_REG (dstclass))
9706 || ((dstclass) == TARGET_REGS && ! REGCLASS_HAS_GENERAL_REG (srcclass)))
9707 return 20;
9709 /* ??? ptabs faults on (value & 0x3) == 0x3 */
9710 if (TARGET_SHMEDIA
9711 && ((srcclass) == TARGET_REGS || (srcclass) == SIBCALL_REGS))
9713 if (sh_gettrcost >= 0)
9714 return sh_gettrcost;
9715 else if (!TARGET_PT_FIXED)
9716 return 100;
9719 if ((srcclass == FPSCR_REGS && ! REGCLASS_HAS_GENERAL_REG (dstclass))
9720 || (dstclass == FPSCR_REGS && ! REGCLASS_HAS_GENERAL_REG (srcclass)))
9721 return 4;
9723 if (TARGET_SHMEDIA
9724 || (TARGET_FMOVD
9725 && ! REGCLASS_HAS_GENERAL_REG (srcclass)
9726 && ! REGCLASS_HAS_GENERAL_REG (dstclass)))
9727 return 2 * ((GET_MODE_SIZE (mode) + 7) / 8U);
9729 return 2 * ((GET_MODE_SIZE (mode) + 3) / 4U);
9732 static rtx emit_load_ptr (rtx, rtx);
9734 static rtx
9735 emit_load_ptr (rtx reg, rtx addr)
9737 rtx mem = gen_const_mem (ptr_mode, addr);
9739 if (Pmode != ptr_mode)
9740 mem = gen_rtx_SIGN_EXTEND (Pmode, mem);
9741 return emit_move_insn (reg, mem);
9744 static void
9745 sh_output_mi_thunk (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED,
9746 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
9747 tree function)
9749 CUMULATIVE_ARGS cum;
9750 int structure_value_byref = 0;
9751 rtx this, this_value, sibcall, insns, funexp;
9752 tree funtype = TREE_TYPE (function);
9753 int simple_add = CONST_OK_FOR_ADD (delta);
9754 int did_load = 0;
9755 rtx scratch0, scratch1, scratch2;
9756 unsigned i;
9758 reload_completed = 1;
9759 epilogue_completed = 1;
9760 no_new_pseudos = 1;
9761 current_function_uses_only_leaf_regs = 1;
9762 reset_block_changes ();
9764 emit_note (NOTE_INSN_PROLOGUE_END);
9766 /* Find the "this" pointer. We have such a wide range of ABIs for the
9767 SH that it's best to do this completely machine independently.
9768 "this" is passed as first argument, unless a structure return pointer
9769 comes first, in which case "this" comes second. */
9770 INIT_CUMULATIVE_ARGS (cum, funtype, NULL_RTX, 0, 1);
9771 #ifndef PCC_STATIC_STRUCT_RETURN
9772 if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
9773 structure_value_byref = 1;
9774 #endif /* not PCC_STATIC_STRUCT_RETURN */
9775 if (structure_value_byref && sh_struct_value_rtx (function, 0) == 0)
9777 tree ptype = build_pointer_type (TREE_TYPE (funtype));
9779 FUNCTION_ARG_ADVANCE (cum, Pmode, ptype, 1);
9781 this = FUNCTION_ARG (cum, Pmode, ptr_type_node, 1);
9783 /* For SHcompact, we only have r0 for a scratch register: r1 is the
9784 static chain pointer (even if you can't have nested virtual functions
9785 right now, someone might implement them sometime), and the rest of the
9786 registers are used for argument passing, are callee-saved, or reserved. */
9787 /* We need to check call_used_regs / fixed_regs in case -fcall_saved-reg /
9788 -ffixed-reg has been used. */
9789 if (! call_used_regs[0] || fixed_regs[0])
9790 error ("r0 needs to be available as a call-clobbered register");
9791 scratch0 = scratch1 = scratch2 = gen_rtx_REG (Pmode, 0);
9792 if (! TARGET_SH5)
9794 if (call_used_regs[1] && ! fixed_regs[1])
9795 scratch1 = gen_rtx_REG (ptr_mode, 1);
9796 /* N.B., if not TARGET_HITACHI, register 2 is used to pass the pointer
9797 pointing where to return struct values. */
9798 if (call_used_regs[3] && ! fixed_regs[3])
9799 scratch2 = gen_rtx_REG (Pmode, 3);
9801 else if (TARGET_SHMEDIA)
9803 for (i = FIRST_GENERAL_REG; i <= LAST_GENERAL_REG; i++)
9804 if (i != REGNO (scratch0) &&
9805 call_used_regs[i] && ! fixed_regs[i] && ! FUNCTION_ARG_REGNO_P (i))
9807 scratch1 = gen_rtx_REG (ptr_mode, i);
9808 break;
9810 if (scratch1 == scratch0)
9811 error ("Need a second call-clobbered general purpose register");
9812 for (i = FIRST_TARGET_REG; i <= LAST_TARGET_REG; i++)
9813 if (call_used_regs[i] && ! fixed_regs[i])
9815 scratch2 = gen_rtx_REG (Pmode, i);
9816 break;
9818 if (scratch2 == scratch0)
9819 error ("Need a call-clobbered target register");
9822 this_value = plus_constant (this, delta);
9823 if (vcall_offset
9824 && (simple_add || scratch0 != scratch1)
9825 && strict_memory_address_p (ptr_mode, this_value))
9827 emit_load_ptr (scratch0, this_value);
9828 did_load = 1;
9831 if (!delta)
9832 ; /* Do nothing. */
9833 else if (simple_add)
9834 emit_move_insn (this, this_value);
9835 else
9837 emit_move_insn (scratch1, GEN_INT (delta));
9838 emit_insn (gen_add2_insn (this, scratch1));
9841 if (vcall_offset)
9843 rtx offset_addr;
9845 if (!did_load)
9846 emit_load_ptr (scratch0, this);
9848 offset_addr = plus_constant (scratch0, vcall_offset);
9849 if (strict_memory_address_p (ptr_mode, offset_addr))
9850 ; /* Do nothing. */
9851 else if (! TARGET_SH5 && scratch0 != scratch1)
9853 /* scratch0 != scratch1, and we have indexed loads. Get better
9854 schedule by loading the offset into r1 and using an indexed
9855 load - then the load of r1 can issue before the load from
9856 (this + delta) finishes. */
9857 emit_move_insn (scratch1, GEN_INT (vcall_offset));
9858 offset_addr = gen_rtx_PLUS (Pmode, scratch0, scratch1);
9860 else if (CONST_OK_FOR_ADD (vcall_offset))
9862 emit_insn (gen_add2_insn (scratch0, GEN_INT (vcall_offset)));
9863 offset_addr = scratch0;
9865 else if (scratch0 != scratch1)
9867 emit_move_insn (scratch1, GEN_INT (vcall_offset));
9868 emit_insn (gen_add2_insn (scratch0, scratch1));
9869 offset_addr = scratch0;
9871 else
9872 gcc_unreachable (); /* FIXME */
9873 emit_load_ptr (scratch0, offset_addr);
9875 if (Pmode != ptr_mode)
9876 scratch0 = gen_rtx_TRUNCATE (ptr_mode, scratch0);
9877 emit_insn (gen_add2_insn (this, scratch0));
9880 /* Generate a tail call to the target function. */
9881 if (! TREE_USED (function))
9883 assemble_external (function);
9884 TREE_USED (function) = 1;
9886 funexp = XEXP (DECL_RTL (function), 0);
9887 /* If the function is overridden, so is the thunk, hence we don't
9888 need GOT addressing even if this is a public symbol. */
9889 #if 0
9890 if (TARGET_SH1 && ! flag_weak)
9891 sibcall = gen_sibcalli_thunk (funexp, const0_rtx);
9892 else
9893 #endif
9894 if (TARGET_SH2 && flag_pic)
9896 sibcall = gen_sibcall_pcrel (funexp, const0_rtx);
9897 XEXP (XVECEXP (sibcall, 0, 2), 0) = scratch2;
9899 else
9901 if (TARGET_SHMEDIA && flag_pic)
9903 funexp = gen_sym2PIC (funexp);
9904 PUT_MODE (funexp, Pmode);
9906 emit_move_insn (scratch2, funexp);
9907 funexp = gen_rtx_MEM (FUNCTION_MODE, scratch2);
9908 sibcall = gen_sibcall (funexp, const0_rtx, NULL_RTX);
9910 sibcall = emit_call_insn (sibcall);
9911 SIBLING_CALL_P (sibcall) = 1;
9912 use_reg (&CALL_INSN_FUNCTION_USAGE (sibcall), this);
9913 emit_barrier ();
9915 /* Run just enough of rest_of_compilation to do scheduling and get
9916 the insns emitted. Note that use_thunk calls
9917 assemble_start_function and assemble_end_function. */
9919 insn_locators_initialize ();
9920 insns = get_insns ();
9922 if (optimize > 0)
9924 /* Initialize the bitmap obstacks. */
9925 bitmap_obstack_initialize (NULL);
9926 bitmap_obstack_initialize (&reg_obstack);
9927 if (! cfun->cfg)
9928 init_flow ();
9929 rtl_register_cfg_hooks ();
9930 init_rtl_bb_info (ENTRY_BLOCK_PTR);
9931 init_rtl_bb_info (EXIT_BLOCK_PTR);
9932 ENTRY_BLOCK_PTR->flags |= BB_RTL;
9933 EXIT_BLOCK_PTR->flags |= BB_RTL;
9934 find_basic_blocks (insns);
9936 if (flag_schedule_insns_after_reload)
9938 life_analysis (PROP_FINAL);
9940 split_all_insns (1);
9942 schedule_insns ();
9944 /* We must split jmp insn in PIC case. */
9945 else if (flag_pic)
9946 split_all_insns_noflow ();
9949 sh_reorg ();
9951 if (optimize > 0 && flag_delayed_branch)
9952 dbr_schedule (insns);
9954 shorten_branches (insns);
9955 final_start_function (insns, file, 1);
9956 final (insns, file, 1);
9957 final_end_function ();
9959 if (optimize > 0)
9961 /* Release all memory allocated by flow. */
9962 free_basic_block_vars ();
9964 /* Release the bitmap obstacks. */
9965 bitmap_obstack_release (&reg_obstack);
9966 bitmap_obstack_release (NULL);
9969 reload_completed = 0;
9970 epilogue_completed = 0;
9971 no_new_pseudos = 0;
9975 function_symbol (rtx target, const char *name, enum sh_function_kind kind)
9977 rtx sym;
9979 /* If this is not an ordinary function, the name usually comes from a
9980 string literal or an sprintf buffer. Make sure we use the same
9981 string consistently, so that cse will be able to unify address loads. */
9982 if (kind != FUNCTION_ORDINARY)
9983 name = IDENTIFIER_POINTER (get_identifier (name));
9984 sym = gen_rtx_SYMBOL_REF (Pmode, name);
9985 SYMBOL_REF_FLAGS (sym) = SYMBOL_FLAG_FUNCTION;
9986 if (flag_pic)
9987 switch (kind)
9989 case FUNCTION_ORDINARY:
9990 break;
9991 case SFUNC_GOT:
9993 rtx reg = target ? target : gen_reg_rtx (Pmode);
9995 emit_insn (gen_symGOT2reg (reg, sym));
9996 sym = reg;
9997 break;
9999 case SFUNC_STATIC:
10001 /* ??? To allow cse to work, we use GOTOFF relocations.
10002 we could add combiner patterns to transform this into
10003 straight pc-relative calls with sym2PIC / bsrf when
10004 label load and function call are still 1:1 and in the
10005 same basic block during combine. */
10006 rtx reg = target ? target : gen_reg_rtx (Pmode);
10008 emit_insn (gen_symGOTOFF2reg (reg, sym));
10009 sym = reg;
10010 break;
10013 if (target && sym != target)
10015 emit_move_insn (target, sym);
10016 return target;
10018 return sym;
10021 /* Find the number of a general purpose register in S. */
10022 static int
10023 scavenge_reg (HARD_REG_SET *s)
10025 int r;
10026 for (r = FIRST_GENERAL_REG; r <= LAST_GENERAL_REG; r++)
10027 if (TEST_HARD_REG_BIT (*s, r))
10028 return r;
10029 return -1;
10033 sh_get_pr_initial_val (void)
10035 rtx val;
10037 /* ??? Unfortunately, get_hard_reg_initial_val doesn't always work for the
10038 PR register on SHcompact, because it might be clobbered by the prologue.
10039 We check first if that is known to be the case. */
10040 if (TARGET_SHCOMPACT
10041 && ((current_function_args_info.call_cookie
10042 & ~ CALL_COOKIE_RET_TRAMP (1))
10043 || current_function_has_nonlocal_label))
10044 return gen_frame_mem (SImode, return_address_pointer_rtx);
10046 /* If we haven't finished rtl generation, there might be a nonlocal label
10047 that we haven't seen yet.
10048 ??? get_hard_reg_initial_val fails if it is called while no_new_pseudos
10049 is set, unless it has been called before for the same register. And even
10050 then, we end in trouble if we didn't use the register in the same
10051 basic block before. So call get_hard_reg_initial_val now and wrap it
10052 in an unspec if we might need to replace it. */
10053 /* ??? We also must do this for TARGET_SH1 in general, because otherwise
10054 combine can put the pseudo returned by get_hard_reg_initial_val into
10055 instructions that need a general purpose registers, which will fail to
10056 be recognized when the pseudo becomes allocated to PR. */
10058 = get_hard_reg_initial_val (Pmode, TARGET_SHMEDIA ? PR_MEDIA_REG : PR_REG);
10059 if (TARGET_SH1)
10060 return gen_rtx_UNSPEC (SImode, gen_rtvec (1, val), UNSPEC_RA);
10061 return val;
10065 sh_expand_t_scc (enum rtx_code code, rtx target)
10067 rtx result = target;
10068 HOST_WIDE_INT val;
10070 if (GET_CODE (sh_compare_op0) != REG || REGNO (sh_compare_op0) != T_REG
10071 || GET_CODE (sh_compare_op1) != CONST_INT)
10072 return 0;
10073 if (GET_CODE (result) != REG)
10074 result = gen_reg_rtx (SImode);
10075 val = INTVAL (sh_compare_op1);
10076 if ((code == EQ && val == 1) || (code == NE && val == 0))
10077 emit_insn (gen_movt (result));
10078 else if ((code == EQ && val == 0) || (code == NE && val == 1))
10080 emit_insn (gen_rtx_CLOBBER (VOIDmode, result));
10081 emit_insn (gen_subc (result, result, result));
10082 emit_insn (gen_addsi3 (result, result, const1_rtx));
10084 else if (code == EQ || code == NE)
10085 emit_insn (gen_move_insn (result, GEN_INT (code == NE)));
10086 else
10087 return 0;
10088 if (result != target)
10089 emit_move_insn (target, result);
10090 return 1;
10093 /* INSN is an sfunc; return the rtx that describes the address used. */
10094 static rtx
10095 extract_sfunc_addr (rtx insn)
10097 rtx pattern, part = NULL_RTX;
10098 int len, i;
10100 pattern = PATTERN (insn);
10101 len = XVECLEN (pattern, 0);
10102 for (i = 0; i < len; i++)
10104 part = XVECEXP (pattern, 0, i);
10105 if (GET_CODE (part) == USE && GET_MODE (XEXP (part, 0)) == Pmode
10106 && GENERAL_REGISTER_P (true_regnum (XEXP (part, 0))))
10107 return XEXP (part, 0);
10109 gcc_assert (GET_CODE (XVECEXP (pattern, 0, 0)) == UNSPEC_VOLATILE);
10110 return XVECEXP (XVECEXP (pattern, 0, 0), 0, 1);
10113 /* Verify that the register in use_sfunc_addr still agrees with the address
10114 used in the sfunc. This prevents fill_slots_from_thread from changing
10115 use_sfunc_addr.
10116 INSN is the use_sfunc_addr instruction, and REG is the register it
10117 guards. */
10119 check_use_sfunc_addr (rtx insn, rtx reg)
10121 /* Search for the sfunc. It should really come right after INSN. */
10122 while ((insn = NEXT_INSN (insn)))
10124 if (GET_CODE (insn) == CODE_LABEL || GET_CODE (insn) == JUMP_INSN)
10125 break;
10126 if (! INSN_P (insn))
10127 continue;
10129 if (GET_CODE (PATTERN (insn)) == SEQUENCE)
10130 insn = XVECEXP (PATTERN (insn), 0, 0);
10131 if (GET_CODE (PATTERN (insn)) != PARALLEL
10132 || get_attr_type (insn) != TYPE_SFUNC)
10133 continue;
10134 return rtx_equal_p (extract_sfunc_addr (insn), reg);
10136 gcc_unreachable ();
10139 /* This function returns a constant rtx that represents pi / 2**15 in
10140 SFmode. it's used to scale SFmode angles, in radians, to a
10141 fixed-point signed 16.16-bit fraction of a full circle, i.e., 2*pi
10142 maps to 0x10000). */
10144 static GTY(()) rtx sh_fsca_sf2int_rtx;
10147 sh_fsca_sf2int (void)
10149 if (! sh_fsca_sf2int_rtx)
10151 REAL_VALUE_TYPE rv;
10153 real_from_string (&rv, "10430.378350470453");
10154 sh_fsca_sf2int_rtx = const_double_from_real_value (rv, SFmode);
10157 return sh_fsca_sf2int_rtx;
10160 /* This function returns a constant rtx that represents pi / 2**15 in
10161 DFmode. it's used to scale DFmode angles, in radians, to a
10162 fixed-point signed 16.16-bit fraction of a full circle, i.e., 2*pi
10163 maps to 0x10000). */
10165 static GTY(()) rtx sh_fsca_df2int_rtx;
10168 sh_fsca_df2int (void)
10170 if (! sh_fsca_df2int_rtx)
10172 REAL_VALUE_TYPE rv;
10174 real_from_string (&rv, "10430.378350470453");
10175 sh_fsca_df2int_rtx = const_double_from_real_value (rv, DFmode);
10178 return sh_fsca_df2int_rtx;
10181 /* This function returns a constant rtx that represents 2**15 / pi in
10182 SFmode. it's used to scale a fixed-point signed 16.16-bit fraction
10183 of a full circle back to a SFmode value, i.e., 0x10000 maps to
10184 2*pi). */
10186 static GTY(()) rtx sh_fsca_int2sf_rtx;
10189 sh_fsca_int2sf (void)
10191 if (! sh_fsca_int2sf_rtx)
10193 REAL_VALUE_TYPE rv;
10195 real_from_string (&rv, "9.587379924285257e-5");
10196 sh_fsca_int2sf_rtx = const_double_from_real_value (rv, SFmode);
10199 return sh_fsca_int2sf_rtx;
10202 /* Initialize the CUMULATIVE_ARGS structure. */
10204 void
10205 sh_init_cumulative_args (CUMULATIVE_ARGS * pcum,
10206 tree fntype,
10207 rtx libname ATTRIBUTE_UNUSED,
10208 tree fndecl,
10209 signed int n_named_args,
10210 enum machine_mode mode)
10212 pcum->arg_count [(int) SH_ARG_FLOAT] = 0;
10213 pcum->free_single_fp_reg = 0;
10214 pcum->stack_regs = 0;
10215 pcum->byref_regs = 0;
10216 pcum->byref = 0;
10217 pcum->outgoing = (n_named_args == -1) ? 0 : 1;
10219 /* XXX - Should we check TARGET_HITACHI here ??? */
10220 pcum->renesas_abi = sh_attr_renesas_p (fntype) ? 1 : 0;
10222 if (fntype)
10224 pcum->force_mem = ((TARGET_HITACHI || pcum->renesas_abi)
10225 && aggregate_value_p (TREE_TYPE (fntype), fndecl));
10226 pcum->prototype_p = TYPE_ARG_TYPES (fntype) ? TRUE : FALSE;
10227 pcum->arg_count [(int) SH_ARG_INT]
10228 = TARGET_SH5 && aggregate_value_p (TREE_TYPE (fntype), fndecl);
10230 pcum->call_cookie
10231 = CALL_COOKIE_RET_TRAMP (TARGET_SHCOMPACT
10232 && pcum->arg_count [(int) SH_ARG_INT] == 0
10233 && (TYPE_MODE (TREE_TYPE (fntype)) == BLKmode
10234 ? int_size_in_bytes (TREE_TYPE (fntype))
10235 : GET_MODE_SIZE (TYPE_MODE (TREE_TYPE (fntype)))) > 4
10236 && (BASE_RETURN_VALUE_REG (TYPE_MODE (TREE_TYPE (fntype)))
10237 == FIRST_RET_REG));
10239 else
10241 pcum->arg_count [(int) SH_ARG_INT] = 0;
10242 pcum->prototype_p = FALSE;
10243 if (mode != VOIDmode)
10245 pcum->call_cookie =
10246 CALL_COOKIE_RET_TRAMP (TARGET_SHCOMPACT
10247 && GET_MODE_SIZE (mode) > 4
10248 && BASE_RETURN_VALUE_REG (mode) == FIRST_RET_REG);
10250 /* If the default ABI is the Renesas ABI then all library
10251 calls must assume that the library will be using the
10252 Renesas ABI. So if the function would return its result
10253 in memory then we must force the address of this memory
10254 block onto the stack. Ideally we would like to call
10255 targetm.calls.return_in_memory() here but we do not have
10256 the TYPE or the FNDECL available so we synthesize the
10257 contents of that function as best we can. */
10258 pcum->force_mem =
10259 (TARGET_DEFAULT & MASK_HITACHI)
10260 && (mode == BLKmode
10261 || (GET_MODE_SIZE (mode) > 4
10262 && !(mode == DFmode
10263 && TARGET_FPU_DOUBLE)));
10265 else
10267 pcum->call_cookie = 0;
10268 pcum->force_mem = FALSE;
10273 /* Determine if two hard register sets intersect.
10274 Return 1 if they do. */
10276 static int
10277 hard_regs_intersect_p (HARD_REG_SET *a, HARD_REG_SET *b)
10279 HARD_REG_SET c;
10280 COPY_HARD_REG_SET (c, *a);
10281 AND_HARD_REG_SET (c, *b);
10282 GO_IF_HARD_REG_SUBSET (c, reg_class_contents[(int) NO_REGS], lose);
10283 return 1;
10284 lose:
10285 return 0;
10288 #ifdef TARGET_ADJUST_UNROLL_MAX
10289 static int
10290 sh_adjust_unroll_max (struct loop * loop, int insn_count,
10291 int max_unrolled_insns, int strength_reduce_p,
10292 int unroll_type)
10294 /* This doesn't work in 4.0 because the old unroller & loop.h is gone. */
10295 if (TARGET_ADJUST_UNROLL && TARGET_SHMEDIA)
10297 /* Throttle back loop unrolling so that the costs of using more
10298 targets than the eight target register we have don't outweigh
10299 the benefits of unrolling. */
10300 rtx insn;
10301 int n_labels = 0, n_calls = 0, n_exit_dest = 0, n_inner_loops = -1;
10302 int n_barriers = 0;
10303 rtx dest;
10304 int i;
10305 rtx exit_dest[8];
10306 int threshold;
10307 int unroll_benefit = 0, mem_latency = 0;
10308 int base_cost, best_cost, cost;
10309 int factor, best_factor;
10310 int n_dest;
10311 unsigned max_iterations = 32767;
10312 int n_iterations;
10313 int need_precond = 0, precond = 0;
10314 basic_block * bbs = get_loop_body (loop);
10315 struct niter_desc *desc;
10317 /* Assume that all labels inside the loop are used from inside the
10318 loop. If the loop has multiple entry points, it is unlikely to
10319 be unrolled anyways.
10320 Also assume that all calls are to different functions. That is
10321 somewhat pessimistic, but if you have lots of calls, unrolling the
10322 loop is not likely to gain you much in the first place. */
10323 i = loop->num_nodes - 1;
10324 for (insn = BB_HEAD (bbs[i]); ; )
10326 if (GET_CODE (insn) == CODE_LABEL)
10327 n_labels++;
10328 else if (GET_CODE (insn) == CALL_INSN)
10329 n_calls++;
10330 else if (GET_CODE (insn) == NOTE
10331 && NOTE_LINE_NUMBER (insn) == NOTE_INSN_LOOP_BEG)
10332 n_inner_loops++;
10333 else if (GET_CODE (insn) == BARRIER)
10334 n_barriers++;
10335 if (insn != BB_END (bbs[i]))
10336 insn = NEXT_INSN (insn);
10337 else if (--i >= 0)
10338 insn = BB_HEAD (bbs[i]);
10339 else
10340 break;
10342 free (bbs);
10343 /* One label for the loop top is normal, and it won't be duplicated by
10344 unrolling. */
10345 if (n_labels <= 1)
10346 return max_unrolled_insns;
10347 if (n_inner_loops > 0)
10348 return 0;
10349 for (dest = loop->exit_labels; dest && n_exit_dest < 8;
10350 dest = LABEL_NEXTREF (dest))
10352 for (i = n_exit_dest - 1;
10353 i >= 0 && XEXP (dest, 0) != XEXP (exit_dest[i], 0); i--);
10354 if (i < 0)
10355 exit_dest[n_exit_dest++] = dest;
10357 /* If the loop top and call and exit destinations are enough to fill up
10358 the target registers, we're unlikely to do any more damage by
10359 unrolling. */
10360 if (n_calls + n_exit_dest >= 7)
10361 return max_unrolled_insns;
10363 /* ??? In the new loop unroller, there is no longer any strength
10364 reduction information available. Thus, when it comes to unrolling,
10365 we know the cost of everything, but we know the value of nothing. */
10366 #if 0
10367 if (strength_reduce_p
10368 && (unroll_type == LPT_UNROLL_RUNTIME
10369 || unroll_type == LPT_UNROLL_CONSTANT
10370 || unroll_type == LPT_PEEL_COMPLETELY))
10372 struct loop_ivs *ivs = LOOP_IVS (loop);
10373 struct iv_class *bl;
10375 /* We'll save one compare-and-branch in each loop body copy
10376 but the last one. */
10377 unroll_benefit = 1;
10378 /* Assess the benefit of removing biv & giv updates. */
10379 for (bl = ivs->list; bl; bl = bl->next)
10381 rtx increment = biv_total_increment (bl);
10382 struct induction *v;
10384 if (increment && GET_CODE (increment) == CONST_INT)
10386 unroll_benefit++;
10387 for (v = bl->giv; v; v = v->next_iv)
10389 if (! v->ignore && v->same == 0
10390 && GET_CODE (v->mult_val) == CONST_INT)
10391 unroll_benefit++;
10392 /* If this giv uses an array, try to determine
10393 a maximum iteration count from the size of the
10394 array. This need not be correct all the time,
10395 but should not be too far off the mark too often. */
10396 while (v->giv_type == DEST_ADDR)
10398 rtx mem = PATTERN (v->insn);
10399 tree mem_expr, type, size_tree;
10401 if (GET_CODE (SET_SRC (mem)) == MEM)
10402 mem = SET_SRC (mem);
10403 else if (GET_CODE (SET_DEST (mem)) == MEM)
10404 mem = SET_DEST (mem);
10405 else
10406 break;
10407 mem_expr = MEM_EXPR (mem);
10408 if (! mem_expr)
10409 break;
10410 type = TREE_TYPE (mem_expr);
10411 if (TREE_CODE (type) != ARRAY_TYPE
10412 || ! TYPE_SIZE (type) || ! TYPE_SIZE_UNIT (type))
10413 break;
10414 size_tree = fold_build2 (TRUNC_DIV_EXPR,
10415 bitsizetype,
10416 TYPE_SIZE (type),
10417 TYPE_SIZE_UNIT (type));
10418 if (TREE_CODE (size_tree) == INTEGER_CST
10419 && ! TREE_INT_CST_HIGH (size_tree)
10420 && TREE_INT_CST_LOW (size_tree) < max_iterations)
10421 max_iterations = TREE_INT_CST_LOW (size_tree);
10422 break;
10428 #else /* 0 */
10429 /* Assume there is at least some benefit. */
10430 unroll_benefit = 1;
10431 #endif /* 0 */
10433 desc = get_simple_loop_desc (loop);
10434 n_iterations = desc->const_iter ? desc->niter : 0;
10435 max_iterations
10436 = max_iterations < desc->niter_max ? max_iterations : desc->niter_max;
10438 if (! strength_reduce_p || ! n_iterations)
10439 need_precond = 1;
10440 if (! n_iterations)
10442 n_iterations
10443 = max_iterations < 3 ? max_iterations : max_iterations * 3 / 4;
10444 if (! n_iterations)
10445 return 0;
10447 #if 0 /* ??? See above - missing induction variable information. */
10448 while (unroll_benefit > 1) /* no loop */
10450 /* We include the benefit of biv/ giv updates. Check if some or
10451 all of these updates are likely to fit into a scheduling
10452 bubble of a load.
10453 We check for the following case:
10454 - All the insns leading to the first JUMP_INSN are in a strict
10455 dependency chain.
10456 - there is at least one memory reference in them.
10458 When we find such a pattern, we assume that we can hide as many
10459 updates as the total of the load latency is, if we have an
10460 unroll factor of at least two. We might or might not also do
10461 this without unrolling, so rather than considering this as an
10462 extra unroll benefit, discount it in the unroll benefits of unroll
10463 factors higher than two. */
10465 rtx set, last_set;
10467 insn = next_active_insn (loop->start);
10468 last_set = single_set (insn);
10469 if (! last_set)
10470 break;
10471 if (GET_CODE (SET_SRC (last_set)) == MEM)
10472 mem_latency += 2;
10473 for (insn = NEXT_INSN (insn); insn != end; insn = NEXT_INSN (insn))
10475 if (! INSN_P (insn))
10476 continue;
10477 if (GET_CODE (insn) == JUMP_INSN)
10478 break;
10479 if (! reg_referenced_p (SET_DEST (last_set), PATTERN (insn)))
10481 /* Check if this is a to-be-reduced giv insn. */
10482 struct loop_ivs *ivs = LOOP_IVS (loop);
10483 struct iv_class *bl;
10484 struct induction *v;
10485 for (bl = ivs->list; bl; bl = bl->next)
10487 if (bl->biv->insn == insn)
10488 goto is_biv;
10489 for (v = bl->giv; v; v = v->next_iv)
10490 if (v->insn == insn)
10491 goto is_giv;
10493 mem_latency--;
10494 is_biv:
10495 is_giv:
10496 continue;
10498 set = single_set (insn);
10499 if (! set)
10500 continue;
10501 if (GET_CODE (SET_SRC (set)) == MEM)
10502 mem_latency += 2;
10503 last_set = set;
10505 if (mem_latency < 0)
10506 mem_latency = 0;
10507 else if (mem_latency > unroll_benefit - 1)
10508 mem_latency = unroll_benefit - 1;
10509 break;
10511 #endif /* 0 */
10512 if (n_labels + (unroll_benefit + n_labels * 8) / n_iterations
10513 <= unroll_benefit)
10514 return max_unrolled_insns;
10516 n_dest = n_labels + n_calls + n_exit_dest;
10517 base_cost = n_dest <= 8 ? 0 : n_dest - 7;
10518 best_cost = 0;
10519 best_factor = 1;
10520 if (n_barriers * 2 > n_labels - 1)
10521 n_barriers = (n_labels - 1) / 2;
10522 for (factor = 2; factor <= 8; factor++)
10524 /* Bump up preconditioning cost for each power of two. */
10525 if (! (factor & (factor-1)))
10526 precond += 4;
10527 /* When preconditioning, only powers of two will be considered. */
10528 else if (need_precond)
10529 continue;
10530 n_dest = ((unroll_type != LPT_PEEL_COMPLETELY)
10531 + (n_labels - 1) * factor + n_calls + n_exit_dest
10532 - (n_barriers * factor >> 1)
10533 + need_precond);
10534 cost
10535 = ((n_dest <= 8 ? 0 : n_dest - 7)
10536 - base_cost * factor
10537 - ((factor > 2 ? unroll_benefit - mem_latency : unroll_benefit)
10538 * (factor - (unroll_type != LPT_PEEL_COMPLETELY)))
10539 + ((unroll_benefit + 1 + (n_labels - 1) * factor)
10540 / n_iterations));
10541 if (need_precond)
10542 cost += (precond + unroll_benefit * factor / 2) / n_iterations;
10543 if (cost < best_cost)
10545 best_cost = cost;
10546 best_factor = factor;
10549 threshold = best_factor * insn_count;
10550 if (max_unrolled_insns > threshold)
10551 max_unrolled_insns = threshold;
10553 return max_unrolled_insns;
10555 #endif /* TARGET_ADJUST_UNROLL_MAX */
10557 /* Replace any occurrence of FROM(n) in X with TO(n). The function does
10558 not enter into CONST_DOUBLE for the replace.
10560 Note that copying is not done so X must not be shared unless all copies
10561 are to be modified.
10563 This is like replace_rtx, except that we operate on N_REPLACEMENTS
10564 replacements simultaneously - FROM(n) is replacements[n*2] and to(n) is
10565 replacements[n*2+1] - and that we take mode changes into account.
10567 If a replacement is ambiguous, return NULL_RTX.
10569 If MODIFY is zero, don't modify any rtl in place,
10570 just return zero or nonzero for failure / success. */
10573 replace_n_hard_rtx (rtx x, rtx *replacements, int n_replacements, int modify)
10575 int i, j;
10576 const char *fmt;
10578 /* The following prevents loops occurrence when we change MEM in
10579 CONST_DOUBLE onto the same CONST_DOUBLE. */
10580 if (x != 0 && GET_CODE (x) == CONST_DOUBLE)
10581 return x;
10583 for (i = n_replacements - 1; i >= 0 ; i--)
10584 if (x == replacements[i*2] && GET_MODE (x) == GET_MODE (replacements[i*2+1]))
10585 return replacements[i*2+1];
10587 /* Allow this function to make replacements in EXPR_LISTs. */
10588 if (x == 0)
10589 return 0;
10591 if (GET_CODE (x) == SUBREG)
10593 rtx new = replace_n_hard_rtx (SUBREG_REG (x), replacements,
10594 n_replacements, modify);
10596 if (GET_CODE (new) == CONST_INT)
10598 x = simplify_subreg (GET_MODE (x), new,
10599 GET_MODE (SUBREG_REG (x)),
10600 SUBREG_BYTE (x));
10601 if (! x)
10602 abort ();
10604 else if (modify)
10605 SUBREG_REG (x) = new;
10607 return x;
10609 else if (GET_CODE (x) == REG)
10611 unsigned regno = REGNO (x);
10612 unsigned nregs = (regno < FIRST_PSEUDO_REGISTER
10613 ? HARD_REGNO_NREGS (regno, GET_MODE (x)) : 1);
10614 rtx result = NULL_RTX;
10616 for (i = n_replacements - 1; i >= 0; i--)
10618 rtx from = replacements[i*2];
10619 rtx to = replacements[i*2+1];
10620 unsigned from_regno, from_nregs, to_regno, new_regno;
10622 if (GET_CODE (from) != REG)
10623 continue;
10624 from_regno = REGNO (from);
10625 from_nregs = (from_regno < FIRST_PSEUDO_REGISTER
10626 ? HARD_REGNO_NREGS (from_regno, GET_MODE (from)) : 1);
10627 if (regno < from_regno + from_nregs && regno + nregs > from_regno)
10629 if (regno < from_regno
10630 || regno + nregs > from_regno + nregs
10631 || GET_CODE (to) != REG
10632 || result)
10633 return NULL_RTX;
10634 to_regno = REGNO (to);
10635 if (to_regno < FIRST_PSEUDO_REGISTER)
10637 new_regno = regno + to_regno - from_regno;
10638 if ((unsigned) HARD_REGNO_NREGS (new_regno, GET_MODE (x))
10639 != nregs)
10640 return NULL_RTX;
10641 result = gen_rtx_REG (GET_MODE (x), new_regno);
10643 else if (GET_MODE (x) <= GET_MODE (to))
10644 result = gen_lowpart_common (GET_MODE (x), to);
10645 else
10646 result = gen_lowpart_SUBREG (GET_MODE (x), to);
10649 return result ? result : x;
10651 else if (GET_CODE (x) == ZERO_EXTEND)
10653 rtx new = replace_n_hard_rtx (XEXP (x, 0), replacements,
10654 n_replacements, modify);
10656 if (GET_CODE (new) == CONST_INT)
10658 x = simplify_unary_operation (ZERO_EXTEND, GET_MODE (x),
10659 new, GET_MODE (XEXP (x, 0)));
10660 if (! x)
10661 abort ();
10663 else if (modify)
10664 XEXP (x, 0) = new;
10666 return x;
10669 fmt = GET_RTX_FORMAT (GET_CODE (x));
10670 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
10672 rtx new;
10674 if (fmt[i] == 'e')
10676 new = replace_n_hard_rtx (XEXP (x, i), replacements,
10677 n_replacements, modify);
10678 if (!new)
10679 return NULL_RTX;
10680 if (modify)
10681 XEXP (x, i) = new;
10683 else if (fmt[i] == 'E')
10684 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
10686 new = replace_n_hard_rtx (XVECEXP (x, i, j), replacements,
10687 n_replacements, modify);
10688 if (!new)
10689 return NULL_RTX;
10690 if (modify)
10691 XVECEXP (x, i, j) = new;
10695 return x;
10699 sh_gen_truncate (enum machine_mode mode, rtx x, int need_sign_ext)
10701 enum rtx_code code = TRUNCATE;
10703 if (GET_CODE (x) == ZERO_EXTEND || GET_CODE (x) == SIGN_EXTEND)
10705 rtx inner = XEXP (x, 0);
10706 enum machine_mode inner_mode = GET_MODE (inner);
10708 if (inner_mode == mode)
10709 return inner;
10710 else if (GET_MODE_SIZE (inner_mode) >= GET_MODE_SIZE (mode))
10711 x = inner;
10712 else if (GET_MODE_SIZE (inner_mode) < GET_MODE_SIZE (mode)
10713 && (! need_sign_ext || GET_CODE (x) == SIGN_EXTEND))
10715 code = GET_CODE (x);
10716 x = inner;
10719 return gen_rtx_fmt_e (code, mode, x);
10722 /* called via for_each_rtx after reload, to clean up truncates of
10723 registers that span multiple actual hard registers. */
10725 shmedia_cleanup_truncate (rtx *p, void *n_changes)
10727 rtx x = *p, reg;
10729 if (GET_CODE (x) != TRUNCATE)
10730 return 0;
10731 reg = XEXP (x, 0);
10732 if (GET_MODE_SIZE (GET_MODE (reg)) > 8 && GET_CODE (reg) == REG)
10734 enum machine_mode reg_mode = GET_MODE (reg);
10735 XEXP (x, 0) = simplify_subreg (DImode, reg, reg_mode,
10736 subreg_lowpart_offset (DImode, reg_mode));
10737 *(int*) n_changes += 1;
10738 return -1;
10740 return 0;
10743 /* Load and store depend on the highpart of the address. However,
10744 set_attr_alternative does not give well-defined results before reload,
10745 so we must look at the rtl ourselves to see if any of the feeding
10746 registers is used in a memref. */
10748 /* Called by sh_contains_memref_p via for_each_rtx. */
10749 static int
10750 sh_contains_memref_p_1 (rtx *loc, void *data ATTRIBUTE_UNUSED)
10752 return (GET_CODE (*loc) == MEM);
10755 /* Return nonzero iff INSN contains a MEM. */
10757 sh_contains_memref_p (rtx insn)
10759 return for_each_rtx (&PATTERN (insn), &sh_contains_memref_p_1, NULL);
10762 /* FNADDR is the MEM expression from a call expander. Return an address
10763 to use in an SHmedia insn pattern. */
10765 shmedia_prepare_call_address (rtx fnaddr, int is_sibcall)
10767 int is_sym;
10769 fnaddr = XEXP (fnaddr, 0);
10770 is_sym = GET_CODE (fnaddr) == SYMBOL_REF;
10771 if (flag_pic && is_sym)
10773 if (! SYMBOL_REF_LOCAL_P (fnaddr))
10775 rtx reg = gen_reg_rtx (Pmode);
10777 /* We must not use GOTPLT for sibcalls, because PIC_REG
10778 must be restored before the PLT code gets to run. */
10779 if (is_sibcall)
10780 emit_insn (gen_symGOT2reg (reg, fnaddr));
10781 else
10782 emit_insn (gen_symGOTPLT2reg (reg, fnaddr));
10783 fnaddr = reg;
10785 else
10787 fnaddr = gen_sym2PIC (fnaddr);
10788 PUT_MODE (fnaddr, Pmode);
10791 /* If ptabs might trap, make this visible to the rest of the compiler.
10792 We generally assume that symbols pertain to valid locations, but
10793 it is possible to generate invalid symbols with asm or linker tricks.
10794 In a list of functions where each returns its successor, an invalid
10795 symbol might denote an empty list. */
10796 if (!TARGET_PT_FIXED
10797 && (!is_sym || TARGET_INVALID_SYMBOLS)
10798 && (!REG_P (fnaddr) || ! TARGET_REGISTER_P (REGNO (fnaddr))))
10800 rtx tr = gen_reg_rtx (PDImode);
10802 emit_insn (gen_ptabs (tr, fnaddr));
10803 fnaddr = tr;
10805 else if (! target_reg_operand (fnaddr, Pmode))
10806 fnaddr = copy_to_mode_reg (Pmode, fnaddr);
10807 return fnaddr;
10810 enum reg_class
10811 sh_secondary_reload (bool in_p, rtx x, enum reg_class class,
10812 enum machine_mode mode, secondary_reload_info *sri)
10814 if (in_p)
10816 if (REGCLASS_HAS_FP_REG (class)
10817 && ! TARGET_SHMEDIA
10818 && immediate_operand ((x), mode)
10819 && ! ((fp_zero_operand (x) || fp_one_operand (x))
10820 && mode == SFmode && fldi_ok ()))
10821 switch (mode)
10823 case SFmode:
10824 sri->icode = CODE_FOR_reload_insf__frn;
10825 return NO_REGS;
10826 case DFmode:
10827 sri->icode = CODE_FOR_reload_indf__frn;
10828 return NO_REGS;
10829 case SImode:
10830 /* ??? If we knew that we are in the appropriate mode -
10831 single precision - we could use a reload pattern directly. */
10832 return FPUL_REGS;
10833 default:
10834 abort ();
10836 if (class == FPUL_REGS
10837 && ((GET_CODE (x) == REG
10838 && (REGNO (x) == MACL_REG || REGNO (x) == MACH_REG
10839 || REGNO (x) == T_REG))
10840 || GET_CODE (x) == PLUS))
10841 return GENERAL_REGS;
10842 if (class == FPUL_REGS && immediate_operand (x, mode))
10844 if (GET_CODE (x) == CONST_INT && CONST_OK_FOR_I08 (INTVAL (x)))
10845 return GENERAL_REGS;
10846 sri->icode = CODE_FOR_reload_insi__i_fpul;
10847 return NO_REGS;
10849 if (class == FPSCR_REGS
10850 && ((GET_CODE (x) == REG && REGNO (x) >= FIRST_PSEUDO_REGISTER)
10851 || (GET_CODE (x) == MEM && GET_CODE (XEXP (x, 0)) == PLUS)))
10852 return GENERAL_REGS;
10853 if (REGCLASS_HAS_FP_REG (class)
10854 && TARGET_SHMEDIA
10855 && immediate_operand (x, mode)
10856 && x != CONST0_RTX (GET_MODE (x))
10857 && GET_MODE (x) != V4SFmode)
10858 return GENERAL_REGS;
10859 if ((mode == QImode || mode == HImode)
10860 && TARGET_SHMEDIA && inqhi_operand (x, mode))
10862 sri->icode = ((mode == QImode)
10863 ? CODE_FOR_reload_inqi : CODE_FOR_reload_inhi);
10864 return NO_REGS;
10866 if (TARGET_SHMEDIA && class == GENERAL_REGS
10867 && (GET_CODE (x) == LABEL_REF || PIC_DIRECT_ADDR_P (x)))
10868 return TARGET_REGS;
10869 } /* end of input-only processing. */
10871 if (((REGCLASS_HAS_FP_REG (class)
10872 && (GET_CODE (x) == REG
10873 && (GENERAL_OR_AP_REGISTER_P (REGNO (x))
10874 || (FP_REGISTER_P (REGNO (x)) && mode == SImode
10875 && TARGET_FMOVD))))
10876 || (REGCLASS_HAS_GENERAL_REG (class)
10877 && GET_CODE (x) == REG
10878 && FP_REGISTER_P (REGNO (x))))
10879 && ! TARGET_SHMEDIA
10880 && (mode == SFmode || mode == SImode))
10881 return FPUL_REGS;
10882 if ((class == FPUL_REGS
10883 || (REGCLASS_HAS_FP_REG (class)
10884 && ! TARGET_SHMEDIA && mode == SImode))
10885 && (GET_CODE (x) == MEM
10886 || (GET_CODE (x) == REG
10887 && (REGNO (x) >= FIRST_PSEUDO_REGISTER
10888 || REGNO (x) == T_REG
10889 || system_reg_operand (x, VOIDmode)))))
10891 if (class == FPUL_REGS)
10892 return GENERAL_REGS;
10893 return FPUL_REGS;
10895 if ((class == TARGET_REGS
10896 || (TARGET_SHMEDIA && class == SIBCALL_REGS))
10897 && !EXTRA_CONSTRAINT_Csy (x)
10898 && (GET_CODE (x) != REG || ! GENERAL_REGISTER_P (REGNO (x))))
10899 return GENERAL_REGS;
10900 if ((class == MAC_REGS || class == PR_REGS)
10901 && GET_CODE (x) == REG && ! GENERAL_REGISTER_P (REGNO (x))
10902 && class != REGNO_REG_CLASS (REGNO (x)))
10903 return GENERAL_REGS;
10904 if (class != GENERAL_REGS && GET_CODE (x) == REG
10905 && TARGET_REGISTER_P (REGNO (x)))
10906 return GENERAL_REGS;
10907 return NO_REGS;
10910 enum sh_divide_strategy_e sh_div_strategy = SH_DIV_STRATEGY_DEFAULT;
10912 #include "gt-sh.h"